Skip to content

Commit

Permalink
Bug fixes for regression
Browse files Browse the repository at this point in the history
Minor fixes for prediction and feature importances when using scikit-learn regression algorithms
  • Loading branch information
Nabeel committed Oct 7, 2018
1 parent 8be723f commit a01f6b4
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
19 changes: 12 additions & 7 deletions core/_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,15 +688,18 @@ def calculate_metrics(self, caller="external"):
["model_name", "true_label", "pred_label", "count"]]

if self.model.calc_feature_importances:
# Fill null values in the test set according to the model settings
X_test = utils.fillna(self.X_test, method=self.model.missing)

# Calculate model agnostic feature importances using the skater library
interpreter = Interpretation(self.X_test, feature_names=self.model.features_df.index.tolist())
interpreter = Interpretation(X_test, feature_names=self.model.features_df.index.tolist())

try:
# We use the predicted probabilities from the estimator if available
imm = InMemoryModel(self.model.pipe.predict_proba, examples = self.X_test[:10], model_type="classifier")
imm = InMemoryModel(self.model.pipe.predict_proba, examples = X_test[:10], model_type="classifier")
except AttributeError:
# Otherwise we simply use the predict method
imm = InMemoryModel(self.model.pipe.predict, examples = self.X_test[:10], model_type="classifier", \
imm = InMemoryModel(self.model.pipe.predict, examples = X_test[:10], model_type="classifier", \
unique_values = self.model.pipe.classes_)

# Add the feature importances to the model as a sorted data frame
Expand All @@ -718,20 +721,22 @@ def calculate_metrics(self, caller="external"):
metrics_df.loc[:,"median_absolute_error"] = metrics.median_absolute_error(self.y_test, self.y_pred)

# Get the explained variance score
metrics_df.loc[:,"explained_variance_score"] = metrics.explained_variance_score(self.y_test, self.y_pred,\
metric_args)
metrics_df.loc[:,"explained_variance_score"] = metrics.explained_variance_score(self.y_test, self.y_pred, **metric_args)

# Finalize the structure of the result DataFrame
metrics_df.loc[:,"model_name"] = self.model.name
metrics_df = metrics_df.loc[:,["model_name", "r2_score", "mean_squared_error", "mean_absolute_error",\
"median_absolute_error", "explained_variance_score"]]

if self.model.calc_feature_importances:
# Fill null values in the test set according to the model settings
X_test = utils.fillna(self.X_test, method=self.model.missing)

# Calculate model agnostic feature importances using the skater library
interpreter = Interpretation(self.X_test, feature_names=self.model.features_df.index.tolist())
interpreter = Interpretation(X_test, feature_names=self.model.features_df.index.tolist())

# Set up a skater InMemoryModel to calculate feature importances using the predict method
imm = InMemoryModel(self.model.pipe.predict, examples = self.X_test[:10], model_type="regressor")
imm = InMemoryModel(self.model.pipe.predict, examples = X_test[:10], model_type="regressor")

# Add the feature importances to the model as a sorted data frame
self.model.importances = interpreter.feature_importance.feature_importance(imm, progressbar=False, ascending=False)
Expand Down
3 changes: 3 additions & 0 deletions core/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def get_response_rows(response, template):
for col in row:
# Convert values to type SSE.Dual according to the template list
if template[i] == "str":
if type(col) is not str:
col = "{0:.5f}".format(col)

this_row.append(SSE.Dual(strData=col))
elif template[i] == "num":
this_row.append(SSE.Dual(numData=col))
Expand Down

0 comments on commit a01f6b4

Please sign in to comment.