From d7335084d63f45ddb94f0063923d31aa3da6d4c1 Mon Sep 17 00:00:00 2001 From: Ryan Hausen Date: Mon, 26 Aug 2024 10:12:43 -0400 Subject: [PATCH] added clarifying comments --- treeple/stats/forest.py | 7 +++---- treeple/stats/tests/test_forest.py | 2 ++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/treeple/stats/forest.py b/treeple/stats/forest.py index 8c4de9b3..9d86532d 100644 --- a/treeple/stats/forest.py +++ b/treeple/stats/forest.py @@ -171,13 +171,12 @@ def build_coleman_forest( ) # if we are returning the posteriors, then we need to replace the - # sparse indices and values with an array + # sparse indices and values with an array. We convert the sparse data + # to dense data, so that the function returns results in a consistent format. if return_posteriors: n_trees = y_pred_proba_orig_perm.shape[0] // 2 n_samples = y_pred_proba_orig_perm.shape[1] - # slicing a csc matrix this way is not efficient, but it is - # it is only done once, so I am not sure if it is worth it to - # optimize this. + to_coords_data = lambda x: (x.row.astype(int), x.col.astype(int), x.data) row, col, data = to_coords_data(y_pred_proba_orig_perm[:n_trees, :].tocoo()) diff --git a/treeple/stats/tests/test_forest.py b/treeple/stats/tests/test_forest.py index e32070e5..c317d217 100644 --- a/treeple/stats/tests/test_forest.py +++ b/treeple/stats/tests/test_forest.py @@ -250,6 +250,8 @@ def test_build_coleman_forest(use_bottleneck: bool, use_sparse: bool): elif use_bottleneck and utils.DISABLE_BN_ENV_VAR in os.environ: del os.environ[utils.DISABLE_BN_ENV_VAR] + # We need to reload the modules after changing the environment variable + # because an environment variable is used to disable bottleneck importlib.reload(utils) importlib.reload(stats)