Change ShapExplainer to ShapModel and add release notes (v0.1.6)

breimanntools · Jun 30, 2024 · bd39dc7 · bd39dc7
1 parent d0acd19
commit bd39dc7
Show file tree

Hide file tree

Showing 14 changed files with 289 additions and 265 deletions.
diff --git a/aaanalysis/__init__.py b/aaanalysis/__init__.py
@@ -34,7 +34,7 @@
     "SeqMut",
     "SeqMutPlot",
     "TreeModel",
-    # "ShapExplainer"       # SHAP
+    # "ShapModel"       # SHAP
     "plot_get_clist",
     "plot_get_cmap",
     "plot_get_cdict",
@@ -50,11 +50,11 @@
 
 # Import of professional (pro) version features if dependencies are available
 try:
-    from .explainable_ai_pro import ShapExplainer
+    from .explainable_ai_pro import ShapModel
     from .data_handling_pro import comp_seq_sim, filter_seq
     from .show_html import display_df
     # Extend the __all__ list with pro features if successful
-    __all__.extend(["ShapExplainer",
+    __all__.extend(["ShapModel",
                     "display_df",
                     "comp_seq_sim",
                     "filter_seq"])
@@ -76,7 +76,7 @@ def __call__(self, *args, **kwargs):
         return UnavailableFeature
 
     # Use the factory function to create placeholders for pro features
-    ShapExplainer = make_pro_feature("ShapExplainer")
+    ShapModel = make_pro_feature("ShapModel")
     display_df = make_pro_feature("display_df")
     comp_seq_sim = make_pro_feature("comp_seq_sim")
     filter_seq = make_pro_feature("filter_seq")
diff --git a/aaanalysis/explainable_ai_pro/__init__.py b/aaanalysis/explainable_ai_pro/__init__.py
@@ -1,5 +1,5 @@
-from ._shap_explainer import ShapExplainer
+from ._shap_model import ShapModel
 
 __all__ = [
-    "ShapExplainer",
+    "ShapModel",
 ]
diff --git a/...i_pro/_backend/shap_explainer/__init__.py → ...le_ai_pro/_backend/shap_model/__init__.py b/...i_pro/_backend/shap_explainer/__init__.py → ...le_ai_pro/_backend/shap_model/__init__.py
diff --git a/...kend/shap_explainer/shap_explainer_fit.py → ...pro/_backend/shap_model/shap_model_fit.py b/...kend/shap_explainer/shap_explainer_fit.py → ...pro/_backend/shap_model/shap_model_fit.py
@@ -1,13 +1,13 @@
 """
-This is a script for the backend of the ShapExplainer.fit() method.
+This is a script for the backend of the ShapModel.fit() method.
 """
 import numpy as np
 import shap
 
 import aaanalysis.utils as ut
 
 
-LIST_VERBOSE_SHAP_EXPLAINERS = [shap.KernelExplainer]
+LIST_VERBOSE_shap_modelS = [shap.KernelExplainer]
 
 
 # I Helper Functions
@@ -105,12 +105,12 @@ def monte_carlo_shap_estimation(X, labels=None, list_model_classes=None, list_mo
     # Compute SHAP values
     list_expected_value = []
     if verbose:
-        ut.print_start_progress(start_message=f"ShapExplainer starts Monte Carlo estimation of SHAP values over {n_rounds} rounds.")
+        ut.print_start_progress(start_message=f"ShapModel starts Monte Carlo estimation of SHAP values over {n_rounds} rounds.")
     for i in range(n_rounds):
         for j, selected_features in enumerate(is_selected):
             if verbose:
                 pct_progress = j / len(is_selected)
-                add_new_line = explainer_class in LIST_VERBOSE_SHAP_EXPLAINERS
+                add_new_line = explainer_class in LIST_VERBOSE_shap_modelS
                 ut.print_progress(i=i+pct_progress, n=n_rounds, add_new_line=add_new_line)
             # Adjust fuzzy labels (labels between 0 and 1, e.g., 0.5 -> 50% 1 and 50% 0)
             if fuzzy_labeling:
@@ -127,7 +127,7 @@ def monte_carlo_shap_estimation(X, labels=None, list_model_classes=None, list_mo
             list_expected_value.append(_exp_val)
     # Averaging over rounds and selections
     if verbose:
-        ut.print_end_progress(end_message=f"ShapExplainer finished Monte Carlo estimation and saved results.")
+        ut.print_end_progress(end_message=f"ShapModel finished Monte Carlo estimation and saved results.")
     shap_values = np.mean(mc_shap_values, axis=(2, 3))
     exp_val = np.mean(list_expected_value)
     return shap_values, exp_val
diff --git a/...kend/shap_explainer/se_add_feat_impact.py → ..._backend/shap_model/sm_add_feat_impact.py b/...kend/shap_explainer/se_add_feat_impact.py → ..._backend/shap_model/sm_add_feat_impact.py
@@ -1,5 +1,5 @@
 """
-This is a script for the backend of the ShapExplainer.add_feat_impact() and ShapExplainer.add_feat_importance() methods.
+This is a script for the backend of the ShapModel.add_feat_impact() and ShapModel.add_feat_importance() methods.
 """
 import numpy as np
 import pandas as pd

diff --git a/.../shap_explainer/se_add_sample_mean_dif.py → ...kend/shap_model/sm_add_sample_mean_dif.py b/.../shap_explainer/se_add_sample_mean_dif.py → ...kend/shap_model/sm_add_sample_mean_dif.py
@@ -1,4 +1,4 @@
-"""This is the backend for the ShapExplainer().add_sample_mean_dif() method"""
+"""This is the backend for the ShapModel().add_sample_mean_dif() method"""
 import numpy as np
 import pandas as pd
 import aaanalysis.utils as ut

diff --git a/...sis/explainable_ai_pro/_shap_explainer.py → aaanalysis/explainable_ai_pro/_shap_model.py b/...sis/explainable_ai_pro/_shap_explainer.py → aaanalysis/explainable_ai_pro/_shap_model.py
@@ -1,7 +1,7 @@
 """
-This is a script for the frontend of the ShapExplainer class used to obtain Mote Carlo estimates of feature impact.
+This is a script for the frontend of the ShapModel class used to obtain Mote Carlo estimates of feature impact.
 """
-from typing import Optional, Dict, List, Tuple, Type, Union, Callable
+from typing import Optional, List, Type, Union, Callable
 import pandas as pd
 import numpy as np
 from sklearn.base import BaseEstimator
@@ -12,17 +12,17 @@
 import aaanalysis.utils as ut
 from ._backend.check_models import (check_match_labels_X,
                                     check_match_X_is_selected)
-from ._backend.shap_explainer.shap_explainer_fit import monte_carlo_shap_estimation
-from ._backend.shap_explainer.se_add_feat_impact import (comp_shap_feature_importance,
-                                                         insert_shap_feature_importance,
-                                                         comp_shap_feature_impact,
-                                                         insert_shap_feature_impact)
-from ._backend.shap_explainer.se_add_sample_mean_dif import add_sample_mean_dif_
+from ._backend.shap_model.shap_model_fit import monte_carlo_shap_estimation
+from ._backend.shap_model.sm_add_feat_impact import (comp_shap_feature_importance,
+                                                     insert_shap_feature_importance,
+                                                     comp_shap_feature_impact,
+                                                     insert_shap_feature_impact)
+from ._backend.shap_model.sm_add_sample_mean_dif import add_sample_mean_dif_
 
 
 # I Helper Functions
 # Check init
-def check_shap_explainer(explainer_class=None, explainer_kwargs=None):
+def check_shap_model(explainer_class=None, explainer_kwargs=None):
     """Check if explainer class is a valid shap explainer"""
     ut.check_mode_class(model_class=explainer_class)
     ut.check_dict(name="explainer_kwargs", val=explainer_kwargs, accept_none=True)
@@ -73,7 +73,7 @@ def check_match_class_explainer_and_models(explainer_class=None, explainer_kwarg
 def check_shap_values(shap_values=None):
     """Check if shap values are properly set"""
     if shap_values is None:
-        raise ValueError("'shape_values' are None. Use 'ShapExplainer().fit()' to compute them.")
+        raise ValueError("'shape_values' are None. Use 'ShapModel().fit()' to compute them.")
     _ = ut.check_array_like(name="shap_values", val=shap_values, dtype="numeric")
 
 
@@ -219,7 +219,7 @@ def check_match_sample_positions_group_average(sample_positions=None, group_aver
 def check_match_df_feat_shap_values(df_feat=None, shap_values=None, drop=False, shap_feat_importance=False):
     """Check if df_feat matches with importance values"""
     if shap_values is None:
-        raise ValueError(f"'shap_values' is None. Please fit ShapExplainer before adding feature impact.")
+        raise ValueError(f"'shap_values' is None. Please fit ShapModel before adding feature impact.")
     n_feat = len(df_feat)
     n_samples, n_feat_imp = shap_values.shape
     if n_feat != n_feat_imp:
@@ -237,9 +237,9 @@ def check_match_df_feat_shap_values(df_feat=None, shap_values=None, drop=False,
 
 
 # II Main Functions
-class ShapExplainer:
+class ShapModel:
     """
-    SHAP Explainer class: A wrapper for SHAP (SHapley Additive exPlanations) explainers to obtain Monte Carlo estimates
+    SHAP Model class: A wrapper for SHAP (SHapley Additive exPlanations) explainers to obtain Monte Carlo estimates
     for feature impact [Breimann24c]_.
 
     `SHAP <https://shap.readthedocs.io/en/latest/index.html>`_ is an explainable Artificial Intelligence (AI) framework
@@ -287,7 +287,7 @@ def __init__(self,
 
         Notes
         -----
-        * All attributes are set during fitting via the :meth:`ShapExplainer.fit` method and can be directly accessed.
+        * All attributes are set during fitting via the :meth:`ShapModel.fit` method and can be directly accessed.
         * The Explainer models should be provided from the `SHAP <https://shap.readthedocs.io/en/latest/index.html>`_
           package
         * SHAP model fitting messages appear in red and are not controlled by ``verbose``, unlike AAanalysis progress
@@ -314,15 +314,15 @@ def __init__(self,
         --------
         * :class:`sklearn.ensemble.RandomForestClassifier` for random forest model.
         * :class:`sklearn.ensemble.ExtraTreesClassifier` for extra trees model.
-        * :meth:`ShapExplainer.add_feat_impact` for details on feature impact and SHAP value-based feature importance.
+        * :meth:`ShapModel.add_feat_impact` for details on feature impact and SHAP value-based feature importance.
 
         Warnings
         --------
         * This class requires `SHAP`, which is automatically installed via `pip install aaanalysis[pro]`.
 
         Examples
         --------
-        .. include:: examples/shap_explainer.rst
+        .. include:: examples/shap_model.rst
         """
         # Global parameters
         verbose = ut.check_verbose(verbose)
@@ -331,7 +331,7 @@ def __init__(self,
         if explainer_class is None:
             explainer_class = shap.TreeExplainer
             explainer_kwargs = explainer_kwargs or dict(model_output="probability")
-        explainer_kwargs = check_shap_explainer(explainer_class=explainer_class, explainer_kwargs=explainer_kwargs)
+        explainer_kwargs = check_shap_model(explainer_class=explainer_class, explainer_kwargs=explainer_kwargs)
         # Check model parameters
         if list_model_classes is None:
             list_model_classes = [RandomForestClassifier, ExtraTreesClassifier]
@@ -377,7 +377,7 @@ def fit(self,
             is_selected: ut.ArrayLike2D = None,
             fuzzy_labeling: bool = False,
             n_background_data: Optional[int] = None,
-            ) -> "ShapExplainer":
+            ) -> "ShapModel":
         """
         Obtain SHAP values aggregated across prediction models and training rounds.
 
@@ -403,8 +403,8 @@ def fit(self,
 
         Returns
         -------
-        ShapExplainer
-            The fitted ShapExplainer model instance.
+        ShapModel
+            The fitted ShapModel model instance.
 
         Notes
         -----

diff --git a/aaanalysis/feature_engineering/_cpp_plot.py b/aaanalysis/feature_engineering/_cpp_plot.py
@@ -239,7 +239,7 @@ def __init__(self,
         See Also
         --------
         * :class:`CPP`: the respective computation class for the **CPP Analysis**.
-        * :class:`ShapExplainer`: the class combining CPP with the `SHAP <https://shap.readthedocs.io/en/latest/index.html>`_
+        * :class:`ShapModel`: the class combining CPP with the `SHAP <https://shap.readthedocs.io/en/latest/index.html>`_
           explainable Artificial Intelligence (AI) framework.
         * `Anatomy of a figure <https://matplotlib.org/stable/gallery/showcase/anatomy.html>`_ matplotlib guide on
           figure elements.

diff --git a/docs/source/index/release_notes.rst b/docs/source/index/release_notes.rst
@@ -4,6 +4,30 @@ Release notes
 Version 0.1 (Beta Version)
 --------------------------
 
+NOT RELEASED v0.1.6 (2024-07-XX)
+--------------------------------
+
+Added
+~~~~~
+- **SequencePreprocessor**: A utility data preprocessing class (data handling module.
+- **comp_seq_sim**: A function for computing pairwise sequence similarity (data handling module).
+- **filter_seq**: A function for redundancy-reduction of sequences (data handling module).
+- **options**: Juxta Middle Domain (JMD) length can now be globally adjusted using the **jmd_n/c_len** options.
+
+Changed
+~~~~~~~
+- **ShapModel**: The **ShapExplainer** class has been renamed to **ShapModel** for consistency with the **TreeModel**
+  class and to avoid confusion with tje ShapExplainer models from the
+  `SHAP <https://shap.readthedocs.io/en/latest/index.html>`_ package.
+- **Requirements**: Biopython is now a required dependency only for the **aaanalysis[pro]** version.
+- **Module Renaming**: The **Perturbation** module has been renamed to **Protein Design** module
+  to better reflect its broad functionality.
+
+Fixed
+~~~~~
+- **Multiprocessing**: Now supported directly at the script level, outside of any functions or classes,
+  in the top-level of the script (global namespace).
+
 v0.1.5 (2024-04-18)
 -------------------
 

diff --git a/tests/unit/shap_explainer_tests/__init__.py → tests/unit/shap_model_tests/__init__.py b/tests/unit/shap_explainer_tests/__init__.py → tests/unit/shap_model_tests/__init__.py