diff --git a/core/_sklearn.py b/core/_sklearn.py index eaa2d16..39e5273 100644 --- a/core/_sklearn.py +++ b/core/_sklearn.py @@ -688,15 +688,18 @@ def calculate_metrics(self, caller="external"): ["model_name", "true_label", "pred_label", "count"]] if self.model.calc_feature_importances: + # Fill null values in the test set according to the model settings + X_test = utils.fillna(self.X_test, method=self.model.missing) + # Calculate model agnostic feature importances using the skater library - interpreter = Interpretation(self.X_test, feature_names=self.model.features_df.index.tolist()) + interpreter = Interpretation(X_test, feature_names=self.model.features_df.index.tolist()) try: # We use the predicted probabilities from the estimator if available - imm = InMemoryModel(self.model.pipe.predict_proba, examples = self.X_test[:10], model_type="classifier") + imm = InMemoryModel(self.model.pipe.predict_proba, examples = X_test[:10], model_type="classifier") except AttributeError: # Otherwise we simply use the predict method - imm = InMemoryModel(self.model.pipe.predict, examples = self.X_test[:10], model_type="classifier", \ + imm = InMemoryModel(self.model.pipe.predict, examples = X_test[:10], model_type="classifier", \ unique_values = self.model.pipe.classes_) # Add the feature importances to the model as a sorted data frame @@ -718,8 +721,7 @@ def calculate_metrics(self, caller="external"): metrics_df.loc[:,"median_absolute_error"] = metrics.median_absolute_error(self.y_test, self.y_pred) # Get the explained variance score - metrics_df.loc[:,"explained_variance_score"] = metrics.explained_variance_score(self.y_test, self.y_pred,\ - metric_args) + metrics_df.loc[:,"explained_variance_score"] = metrics.explained_variance_score(self.y_test, self.y_pred, **metric_args) # Finalize the structure of the result DataFrame metrics_df.loc[:,"model_name"] = self.model.name @@ -727,11 +729,14 @@ def calculate_metrics(self, caller="external"): "median_absolute_error", "explained_variance_score"]] if self.model.calc_feature_importances: + # Fill null values in the test set according to the model settings + X_test = utils.fillna(self.X_test, method=self.model.missing) + # Calculate model agnostic feature importances using the skater library - interpreter = Interpretation(self.X_test, feature_names=self.model.features_df.index.tolist()) + interpreter = Interpretation(X_test, feature_names=self.model.features_df.index.tolist()) # Set up a skater InMemoryModel to calculate feature importances using the predict method - imm = InMemoryModel(self.model.pipe.predict, examples = self.X_test[:10], model_type="regressor") + imm = InMemoryModel(self.model.pipe.predict, examples = X_test[:10], model_type="regressor") # Add the feature importances to the model as a sorted data frame self.model.importances = interpreter.feature_importance.feature_importance(imm, progressbar=False, ascending=False) diff --git a/core/_utils.py b/core/_utils.py index 779b1da..8658ba7 100644 --- a/core/_utils.py +++ b/core/_utils.py @@ -53,6 +53,9 @@ def get_response_rows(response, template): for col in row: # Convert values to type SSE.Dual according to the template list if template[i] == "str": + if type(col) is not str: + col = "{0:.5f}".format(col) + this_row.append(SSE.Dual(strData=col)) elif template[i] == "num": this_row.append(SSE.Dual(numData=col))