From 7a062332a22ac95e25907ce3b612e040c01c2cc6 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Mon, 15 May 2023 20:39:12 +0200
Subject: [PATCH 01/22] BUILD: change flake8 repo to GitHub

---
 .pre-commit-config.yaml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7b19cd7f1..dc0cd9cb5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,14 +10,16 @@ repos:
       - id: black
         language_version: python3
 
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.0
+  - repo: https://github.com/pycqa/flake8
+    rev: 5.0.4
     hooks:
       - id: flake8
         name: flake8
         entry: flake8 --config tox.ini
-        language: python_venv
-        additional_dependencies: [ flake8-comprehensions, flake8-import-order ]
+        language: python
+        language_version: python39
+        additional_dependencies:
+          - flake8-comprehensions ~= 3.10
         types: [ python ]
 
   - repo: https://github.com/pre-commit/pre-commit-hooks

From 011d472eb781c8749e28589cca23779e1b65eb0c Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Mon, 15 May 2023 21:28:51 +0200
Subject: [PATCH 02/22] BUILD: bump flake8 to ~=5.0

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 9b858e013..2f52a485a 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -76,7 +76,7 @@ stages:
               versionSpec: '3.7.*'
             displayName: 'use Python 3.7'
           - script: |
-              python -m pip install flake8==3.9.0 flake8-comprehensions flake8-import-order
+              python -m pip install flake8~=5.0 flake8-comprehensions~=3.10
               python -m flake8 --config tox.ini -v .
             displayName: 'Run flake8'
 

From b481df057226ecbc45f108e93863c6ccb4ff3d68 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Mon, 15 May 2023 21:40:29 +0200
Subject: [PATCH 03/22] BUILD: disallow numpy >=1.24, which is incompatible
 with shap <=0.40

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ab020ed43..03469d806 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ requires = [
     # direct requirements of gamma-facet
     "gamma-pytools  ~=1.2,>=1.2.1",
     "matplotlib     ~=3.0,<3.6a",
-    "numpy          >=1.17,<2a",
+    "numpy          >=1.17,<1.24a",
     "packaging      >=20",
     "pandas         >=0.24,<2a",
     "scipy          ~=1.2,<1.9a",
@@ -94,7 +94,7 @@ typing-extensions = "<4.2"
 # direct requirements of gamma-facet
 gamma-pytools  = "~=1.2,>=1.2.1"
 matplotlib     = "~=3.5.2"
-numpy          = ">=1.22,<2a"
+numpy          = ">=1.22,<1.24a"
 packaging      = ">=20.9"
 pandas         = "~=1.4"
 python         = "~=3.8"

From 1f0881ea096d546df093f927b19fb2ad5b1e0935 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Wed, 17 May 2023 10:49:40 +0200
Subject: [PATCH 04/22] BUILD: update version to 2.0.1

---
 src/facet/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/facet/__init__.py b/src/facet/__init__.py
index e59d00761..1843b247b 100644
--- a/src/facet/__init__.py
+++ b/src/facet/__init__.py
@@ -6,7 +6,7 @@
 """
 
 
-__version__ = "2.0.0"
+__version__ = "2.0.1"
 
 __logo__ = (
     r"""

From 1bd64edaffab95f200d813335114177e21aba9b8 Mon Sep 17 00:00:00 2001
From: Jan Ittner <ittner.jan@bcg.com>
Date: Wed, 24 May 2023 07:17:05 +0200
Subject: [PATCH 05/22] BUILD: use mamba for faster conda builds (#365)

---
 azure-pipelines.yml | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4fe6aab88..4ee65fe20 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -243,7 +243,15 @@ stages:
           - script: dir $(Build.SourcesDirectory)
 
           - script: |
-              conda install -y -c anaconda conda-build~=3.21 conda-verify~=3.4 toml~=0.10 flit~=3.6 packaging~=20.9
+              # install micromamba
+              curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+              export MAMBA_ROOT_PREFIX=~/micromamba
+              eval "$(./bin/micromamba shell hook -s posix)"
+              
+              # create and activate a build environment, then install the tools we need
+              micromamba create -n build
+              micromamba activate build
+              micromamba install -y -c conda-forge boa~=0.14 toml~=0.10 flit~=3.6 packaging~=20.9
             displayName: 'Install conda-build, flit, toml'
             condition: eq(variables['BUILD_SYSTEM'], 'conda')
 
@@ -261,7 +269,11 @@ stages:
               targetType: 'inline'
               script: |
                 set -eux
-                if [ "$BUILD_SYSTEM" = "conda" ] ; then eval "$(conda shell.bash hook)" ; fi
+                if [ "$BUILD_SYSTEM" = "conda" ] ; then
+                  export MAMBA_ROOT_PREFIX=~/micromamba
+                  eval "$(./bin/micromamba shell hook -s posix)"
+                  micromamba activate build
+                fi
                 export RUN_PACKAGE_VERSION_TEST=$(project_name)
 
                 cd $(Build.SourcesDirectory)/$(project_root)
@@ -348,7 +360,11 @@ stages:
               targetType: 'inline'
               script: |
                 set -eux
-                if [ "$BUILD_SYSTEM" = "conda" ] ; then eval "$(conda shell.bash hook)" ; fi
+                if [ "$BUILD_SYSTEM" = "conda" ] ; then
+                  export MAMBA_ROOT_PREFIX=~/micromamba
+                  eval "$(./bin/micromamba shell hook -s posix)"
+                  micromamba activate build
+                fi
                 export RUN_PACKAGE_VERSION_TEST=$(project_name)
 
                 cd $(Build.SourcesDirectory)/$(project_root)

From ea41a47c326ecdcaf591f4a51f3dc83f98f7cbec Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Wed, 24 May 2023 18:26:27 +0200
Subject: [PATCH 06/22] BUILD: simplify development environment

---
 environment.yml | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/environment.yml b/environment.yml
index 2ffe73f95..501f6843a 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,12 +4,11 @@ channels:
   - bcg_gamma
 dependencies:
   # run
-  - boruta_py         ~= 0.3
   - gamma-pytools     ~= 2.1
   - joblib            ~= 1.2
   - lightgbm          ~= 3.3
   - matplotlib        ~= 3.7
-  - numpy             ~= 1.23
+  - numpy             ~= 1.24
   - pandas            ~= 2.0
   - python            ~= 3.9
   - scikit-learn      ~= 1.2.0
@@ -17,22 +16,13 @@ dependencies:
   - shap              ~= 0.41
   - sklearndf         ~= 2.2
   - typing_extensions ~= 4.3
-  # build/test
-  - conda-build  ~= 3.23.3
-  - conda-verify ~= 3.1.1
-  - docutils     ~= 0.17.1
-  - flit         ~= 3.8.0
-  - jinja2       ~= 2.11.3
-  - markupsafe   ~= 2.0.1  # markupsafe 2.1 breaks support for jinja2
-  - m2r          ~= 0.3.1
-  - pluggy       ~= 0.13.1
-  - pre-commit   ~= 2.21.0
-  - pytest       ~= 7.2.1
-  - pytest-cov   ~= 2.12.1
-  - pyyaml       ~= 5.4.1
-  - toml         ~= 0.10.2
-  - tox          ~= 3.27.1
-  - yaml         ~= 0.2.5
+  # additional packages for notebooks etc.
+  - pip     ~= 23.0
+  - pip:
+     - arfs ~= 1.1
+  # test
+  - pytest     ~= 7.2.1
+  - pytest-cov ~= 2.12.1
   # sphinx
   - nbsphinx                 ~= 0.8.9
   - sphinx                   ~= 4.5.0

From b3893932d595c4915cf87a8f30c6d7b0fdb856db Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Thu, 25 May 2023 10:34:49 +0200
Subject: [PATCH 07/22] FIX: install micromamba for nightly builds

---
 azure-pipelines.yml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4ee65fe20..a0a123338 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -342,7 +342,15 @@ stages:
           - script: dir $(Build.SourcesDirectory)
 
           - script: |
-              conda install -y -c anaconda conda-build~=3.21 conda-verify~=3.4 toml~=0.10 flit~=3.6 packaging~=20.9
+              # install micromamba
+              curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+              export MAMBA_ROOT_PREFIX=~/micromamba
+              eval "$(./bin/micromamba shell hook -s posix)"
+              
+              # create and activate a build environment, then install the tools we need
+              micromamba create -n build
+              micromamba activate build
+              micromamba install -y -c conda-forge boa~=0.14 toml~=0.10 flit~=3.6 packaging~=20.9
             displayName: 'Install conda-build, flit, toml'
             condition: eq(variables['BUILD_SYSTEM'], 'conda')
 

From 6749064167fd8b3b8fbfa115fe41a0c8d5732e7f Mon Sep 17 00:00:00 2001
From: Jan Ittner <ittner.jan@bcg.com>
Date: Wed, 5 Jul 2023 06:28:00 +0200
Subject: [PATCH 08/22] FIX: refer to arg model (not pipeline) in
 LearnerInspector exceptions (#367)

---
 src/facet/inspection/_learner_inspector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/facet/inspection/_learner_inspector.py b/src/facet/inspection/_learner_inspector.py
index 28412f231..500c9a345 100644
--- a/src/facet/inspection/_learner_inspector.py
+++ b/src/facet/inspection/_learner_inspector.py
@@ -102,7 +102,7 @@ def __init__(
         """
 
         if not model.is_fitted:
-            raise ValueError("arg pipeline must be fitted")
+            raise ValueError("arg model must be fitted")
 
         final_estimator: T_SupervisedLearnerDF = model.final_estimator
         if is_classifier(final_estimator):
@@ -119,7 +119,7 @@ def __init__(
                     )
         elif not is_regressor(final_estimator):
             raise TypeError(
-                "learner in arg pipeline must be a classifier or a regressor,"
+                "learner in arg model must be a classifier or a regressor,"
                 f"but is a {type(final_estimator).__name__}"
             )
 

From b9f6b73e7eca0d568e8ceb7c376f851cae1dc073 Mon Sep 17 00:00:00 2001
From: Jan Ittner <ittner.jan@bcg.com>
Date: Wed, 5 Jul 2023 13:37:13 +0200
Subject: [PATCH 09/22] API: support simple (non-pipeline) learners in
 LearnerInspector (#368)

* API: support simple (non-pipeline) learners in LearnerInspector

* API: raise a TypeError if arg model is an unexpected type

* TEST: test LearnerInspector with a simple classifier

* FIX: correctly handle simple learners throughout LearnerInspector

* DOC: improve parameter documentation of LearnerInspector
---
 RELEASE_NOTES.rst                          |  8 ++++
 src/facet/inspection/_learner_inspector.py | 51 +++++++++++++++++-----
 test/test/facet/test_inspection.py         |  9 ++--
 3 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 4944dccb8..4798cd01f 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -18,6 +18,14 @@ visualizations.
 FACET 2.0 requires :mod:`pytools` |nbsp| 2.0 and :mod:`sklearndf` |nbsp| 2.2, and is now
 fully type-checked by |mypy|.
 
+2.0.1
+~~~~~
+
+- API: class :class:`.LearnerInspector` now supports inspecting individual regressors
+  and classifiers; it is no longer necessary to wrap them into a
+  :class:`.RegressorPipelineDF` or :class:`.ClassifierPipelineDF` instance with empty
+  preprocessing
+
 
 2.0.0
 ~~~~~
diff --git a/src/facet/inspection/_learner_inspector.py b/src/facet/inspection/_learner_inspector.py
index 500c9a345..7fb496fad 100644
--- a/src/facet/inspection/_learner_inspector.py
+++ b/src/facet/inspection/_learner_inspector.py
@@ -76,6 +76,12 @@ class LearnerInspector(
     #: The factory instance used to create the explainer for the learner.
     explainer_factory: ExplainerFactory[NativeSupervisedLearner]
 
+    #: The learner being inspected.
+    #:
+    #: If the model is a pipeline, this is the final estimator in the pipeline;
+    #: otherwise, it is the model itself.
+    learner: SupervisedLearnerDF
+
     # defined in superclass, repeated here for Sphinx:
     model: T_SupervisedLearnerDF
     shap_interaction: bool
@@ -86,7 +92,7 @@ class LearnerInspector(
 
     def __init__(
         self,
-        model: SupervisedLearnerPipelineDF[T_SupervisedLearnerDF],
+        model: T_SupervisedLearnerDF,
         *,
         explainer_factory: Optional[ExplainerFactory[NativeSupervisedLearner]] = None,
         shap_interaction: bool = True,
@@ -96,7 +102,11 @@ def __init__(
         verbose: Optional[int] = None,
     ) -> None:
         """
-        :param model: the learner pipeline to inspect
+        :param model: the learner or learner pipeline to inspect (typically, one of
+            a :class:`~sklearndf.pipeline.ClassifierPipelineDF`,
+            :class:`~sklearndf.pipeline.RegressorPipelineDF`,
+            :class:`~sklearndf.classification.ClassifierDF`, or
+            :class:`~sklearndf.regression.RegressorDF`)
         :param explainer_factory: optional function that creates a shap Explainer
             (default: ``TreeExplainerFactory``)
         """
@@ -104,10 +114,22 @@ def __init__(
         if not model.is_fitted:
             raise ValueError("arg model must be fitted")
 
-        final_estimator: T_SupervisedLearnerDF = model.final_estimator
-        if is_classifier(final_estimator):
+        learner: SupervisedLearnerDF
+
+        if isinstance(model, SupervisedLearnerPipelineDF):
+            learner = model.final_estimator
+        elif isinstance(model, SupervisedLearnerDF):
+            learner = model
+        else:
+            raise TypeError(
+                "arg model must be a SupervisedLearnerPipelineDF or a "
+                f"SupervisedLearnerDF, but is a {type(model).__name__}"
+            )
+        self.learner = learner
+
+        if is_classifier(learner):
             try:
-                n_outputs = final_estimator.n_outputs_
+                n_outputs = learner.n_outputs_
             except AttributeError:
                 pass
             else:
@@ -115,12 +137,12 @@ def __init__(
                     raise ValueError(
                         "only single-target classifiers (binary or multi-class) are "
                         "supported, but the given classifier has been fitted on "
-                        f"multiple targets: {', '.join(final_estimator.output_names_)}"
+                        f"multiple targets: {', '.join(learner.output_names_)}"
                     )
-        elif not is_regressor(final_estimator):
+        elif not is_regressor(learner):
             raise TypeError(
                 "learner in arg model must be a classifier or a regressor,"
-                f"but is a {type(final_estimator).__name__}"
+                f"but is a {type(learner).__name__}"
             )
 
         if explainer_factory:
@@ -162,14 +184,19 @@ def feature_names(self) -> List[str]:
         """[see superclass]"""
         return cast(
             List[str],
-            self.model.final_estimator.feature_names_in_.to_list(),
+            self.learner.feature_names_in_.to_list(),
         )
 
     def preprocess_features(
         self, features: Union[pd.DataFrame, pd.Series]
     ) -> pd.DataFrame:
         """[see superclass]"""
-        return self.model.preprocess(features)
+        if self.model is self.learner:
+            # we have a simple learner: no preprocessing needed
+            return features
+        else:
+            # we have a pipeline: preprocess features
+            return self.model.preprocess(features)
 
     @property
     def shap_calculator(self) -> LearnerShapCalculator[Any]:
@@ -178,10 +205,10 @@ def shap_calculator(self) -> LearnerShapCalculator[Any]:
         if self._shap_calculator is not None:
             return self._shap_calculator
 
-        learner: SupervisedLearnerDF = self.model.final_estimator
+        learner: SupervisedLearnerDF = self.learner
 
         shap_calculator_params: Dict[str, Any] = dict(
-            model=self.model.final_estimator.native_estimator,
+            model=self.learner.native_estimator,
             interaction_values=self.shap_interaction,
             explainer_factory=self.explainer_factory,
             n_jobs=self.n_jobs,
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 23fab0e45..04c0c6ec6 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -161,7 +161,6 @@ def test_binary_classifier_ranking(
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
     ]
 ) -> None:
-
     expected_learner_scores = [0.938, 0.936, 0.936, 0.929]
 
     ranking = iris_classifier_selector_binary.summary_report()
@@ -185,7 +184,6 @@ def test_model_inspection_classifier_binary(
     iris_sample_binary: Sample,
     n_jobs: int,
 ) -> None:
-
     model_inspector = LearnerInspector(
         model=iris_classifier_binary,
         shap_interaction=False,
@@ -261,7 +259,9 @@ def test_model_inspection_classifier_binary_single_shap_output(n_jobs: int) -> N
 
 # noinspection DuplicatedCode
 def test_model_inspection_classifier_multi_class(
-    iris_inspector_multi_class: LearnerInspector[RandomForestClassifierDF],
+    iris_inspector_multi_class: LearnerInspector[
+        ClassifierPipelineDF[RandomForestClassifierDF]
+    ],
 ) -> None:
     iris_classifier = iris_inspector_multi_class.model
     iris_sample = iris_inspector_multi_class.sample_
@@ -382,7 +382,6 @@ def test_model_inspection_classifier_multi_class(
 def _validate_shap_values_against_predictions(
     shap_values: pd.DataFrame, model: ClassifierDF, sample: Sample
 ) -> None:
-
     # calculate the matching predictions, so we can check if the SHAP values add up
     # correctly
     predicted_probabilities: pd.DataFrame = model.predict_proba(sample.features)
@@ -447,7 +446,7 @@ def test_model_inspection_classifier_interaction(
     warnings.filterwarnings("ignore", message="You are accessing a training score")
 
     model_inspector = LearnerInspector(
-        model=iris_classifier_binary,
+        model=iris_classifier_binary.final_estimator,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", uses_background_dataset=True
         ),

From 229d32390a7b9a9fc220ad5ddcdf2897ab973154 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Wed, 5 Jul 2023 15:36:09 +0200
Subject: [PATCH 10/22] BUILD: change version to 2.1rc0

---
 src/facet/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/facet/__init__.py b/src/facet/__init__.py
index 1843b247b..6fa5211f5 100644
--- a/src/facet/__init__.py
+++ b/src/facet/__init__.py
@@ -6,7 +6,7 @@
 """
 
 
-__version__ = "2.0.1"
+__version__ = "2.1rc0"
 
 __logo__ = (
     r"""

From b9489d826b47053cfd1ec22ae6d89266707175dd Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Wed, 5 Jul 2023 15:43:51 +0200
Subject: [PATCH 11/22] DOC: update release notes

---
 RELEASE_NOTES.rst | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 4798cd01f..477230c01 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -1,4 +1,4 @@
-Release Notes
+\Release Notes
 =============
 
 .. |mypy| replace:: :external+mypy:doc:`mypy <index>`
@@ -6,6 +6,25 @@ Release Notes
 .. |nbsp| unicode:: 0xA0
    :trim:
 
+FACET 2.1
+---------
+
+FACET 2.1 introduces the :class:`.NativeLearnerInspector` for inspecting native
+*scikit-learn* models and pipelines.
+
+We still recommend using *sklearndf* models and learner pipelines and FACET's
+:class:`.LearnerSelector` for hyperparameter tuning; however the new
+:class:`.NativeLearnerInspector` can be useful for inspecting models that have been
+trained using *scikit-learn* directly.
+
+2.1.0
+~~~~~
+
+- API: new :class:`.NativeLearnerInspector` class for inspecting native *scikit-learn*
+  regressors, classifiers, and pipelines with a regressor or classifier as the final
+  estimator
+
+
 FACET 2.0
 ---------
 

From 5fe3110b010fff98c8944e317325553227ed853c Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Wed, 5 Jul 2023 15:48:27 +0200
Subject: [PATCH 12/22] REFACTOR: reformat code with newest `black`

---
 src/facet/explanation/_explanation.py         | 1 +
 src/facet/inspection/shap/_shap.py            | 1 -
 src/facet/inspection/shap/sklearn/_sklearn.py | 2 --
 src/facet/selection/_parameters.py            | 3 ---
 src/facet/validation/_validation.py           | 2 --
 5 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/facet/explanation/_explanation.py b/src/facet/explanation/_explanation.py
index 9da485a88..25116f6cb 100644
--- a/src/facet/explanation/_explanation.py
+++ b/src/facet/explanation/_explanation.py
@@ -344,6 +344,7 @@ def to_expression(self) -> Expression:
 # Exact explainer factory
 #
 
+
 # noinspection PyPep8Naming
 class _ExactExplainer(
     shap.explainers.Exact,  # type: ignore
diff --git a/src/facet/inspection/shap/_shap.py b/src/facet/inspection/shap/_shap.py
index dfd3a4ac5..671821a85 100644
--- a/src/facet/inspection/shap/_shap.py
+++ b/src/facet/inspection/shap/_shap.py
@@ -290,7 +290,6 @@ def _reset_fit(self) -> None:
         self.output_names_ = None
 
     def _make_explainer(self, features: pd.DataFrame) -> BaseExplainer:
-
         # prepare the background dataset
 
         background_dataset: Optional[pd.DataFrame]
diff --git a/src/facet/inspection/shap/sklearn/_sklearn.py b/src/facet/inspection/shap/sklearn/_sklearn.py
index 80b5aaa02..ea6f1b2fa 100644
--- a/src/facet/inspection/shap/sklearn/_sklearn.py
+++ b/src/facet/inspection/shap/sklearn/_sklearn.py
@@ -241,7 +241,6 @@ def _convert_shap_tensors_to_list(
         shap_tensors: Union[npt.NDArray[np.float_], List[npt.NDArray[np.float_]]],
         n_outputs: int,
     ) -> List[npt.NDArray[np.float_]]:
-
         if n_outputs == 1 and isinstance(shap_tensors, list) and len(shap_tensors) == 2:
             # in the binary classification case, we will proceed with SHAP values
             # for class 0 only, since values for class 1 will just be the same
@@ -273,7 +272,6 @@ def _convert_shap_to_df(
         observation_idx: pd.Index,
         feature_idx: pd.Index,
     ) -> List[pd.DataFrame]:
-
         if self.interaction_values:
             # return a list of data frame [(obs x features) x features],
             # one for each of the outputs
diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 778d0e02d..a872aefce 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -185,7 +185,6 @@ def get_parameters(self, prefix: Optional[str] = None) -> ParameterDict:
         }
 
     def _validate_parameter(self, name: str, value: ParameterSet) -> None:
-
         if name not in self._params:
             raise AttributeError(
                 f"unknown parameter name for "
@@ -222,7 +221,6 @@ def __dir__(self) -> Iterable[str]:
 
     def __getattr__(self, key: str) -> Any:
         if not key.startswith("_"):
-
             result: Union[ParameterSpace[Any], ParameterSet, None]
 
             result = self._children.get(key, None)
@@ -241,7 +239,6 @@ def __iter__(self) -> Iterator[Tuple[List[str], ParameterSet]]:
     def _iter_parameters(
         self, path_prefix: List[str]
     ) -> Iterator[Tuple[List[str], ParameterSet]]:
-
         yield from (
             ([*path_prefix, name], value) for name, value in self._values.items()
         )
diff --git a/src/facet/validation/_validation.py b/src/facet/validation/_validation.py
index ffe927e60..4c45fef83 100644
--- a/src/facet/validation/_validation.py
+++ b/src/facet/validation/_validation.py
@@ -115,7 +115,6 @@ def split(
                 test: npt.NDArray[np.int_] = indices[test_mask]
                 # make sure test is not empty, else sample another train set
                 if len(test) > 0:
-
                     yield train, test
                     break
 
@@ -254,7 +253,6 @@ def _select_train_indices(
         random_state: np.random.RandomState,
         y: Union[npt.NDArray[Any], pd.Series, pd.DataFrame, None],
     ) -> npt.NDArray[np.int_]:
-
         mean_block_size = self.mean_block_size
         if mean_block_size < 1:
             # if mean block size was set as a percentage, calculate the actual mean

From bfa5da709b3977a715520a7db41a6de6134935c0 Mon Sep 17 00:00:00 2001
From: Jan Ittner <ittner.jan@bcg.com>
Date: Mon, 10 Jul 2023 08:49:29 +0200
Subject: [PATCH 13/22] API: add support for inspecting native scikit-learn
 learners and learner pipelines (#369)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* API: preserve row order in ShapCalculator output

* TEST: suppress numba debug messages

* API: add class NativeLearnerInspector for native scikit-learn learners

* REFACTOR: pull learner inspector initializer up to base class

* REFACTOR: remove obsolete LearnerInspector.shap_calculator()

* REFACTOR: remove duplicated shap_calculator method (#370)

---------

Co-authored-by: Mateusz Sokół <8431159+mtsokol@users.noreply.github.com>
---
 src/facet/inspection/_learner_inspector.py | 346 +++++++++++++++++----
 src/facet/inspection/shap/_shap.py         |   2 +-
 test/test/conftest.py                      |   3 +-
 test/test/facet/test_inspection.py         | 101 +++++-
 4 files changed, 384 insertions(+), 68 deletions(-)

diff --git a/src/facet/inspection/_learner_inspector.py b/src/facet/inspection/_learner_inspector.py
index 7fb496fad..a3f0e6818 100644
--- a/src/facet/inspection/_learner_inspector.py
+++ b/src/facet/inspection/_learner_inspector.py
@@ -3,10 +3,12 @@
 """
 import logging
 import re
+from abc import ABCMeta, abstractmethod
 from typing import Any, Dict, Generic, List, Optional, TypeVar, Union, cast
 
 import pandas as pd
-from sklearn.base import is_classifier, is_regressor
+from sklearn.base import BaseEstimator, is_classifier, is_regressor
+from sklearn.pipeline import Pipeline
 
 from pytools.api import AllTracker, inheritdoc, subsdoc
 from sklearndf import SupervisedLearnerDF
@@ -26,6 +28,7 @@
 
 __all__ = [
     "LearnerInspector",
+    "NativeLearnerInspector",
 ]
 
 
@@ -34,7 +37,9 @@
 #
 
 T_SupervisedLearnerDF = TypeVar("T_SupervisedLearnerDF", bound=SupervisedLearnerDF)
-
+T_SupervisedLearner = TypeVar(
+    "T_SupervisedLearner", bound=Union[NativeSupervisedLearner, Pipeline]
+)
 
 #
 # Ensure all symbols introduced below are included in __all__
@@ -61,8 +66,8 @@
     replacement="Explain a regressor or classifier based on SHAP",
 )
 @inheritdoc(match="""[see superclass]""")
-class LearnerInspector(
-    ModelInspector[T_SupervisedLearnerDF], Generic[T_SupervisedLearnerDF]
+class _BaseLearnerInspector(
+    ModelInspector[T_SupervisedLearner], Generic[T_SupervisedLearner], metaclass=ABCMeta
 ):
     """[see superclass]"""
 
@@ -76,23 +81,17 @@ class LearnerInspector(
     #: The factory instance used to create the explainer for the learner.
     explainer_factory: ExplainerFactory[NativeSupervisedLearner]
 
-    #: The learner being inspected.
-    #:
-    #: If the model is a pipeline, this is the final estimator in the pipeline;
-    #: otherwise, it is the model itself.
-    learner: SupervisedLearnerDF
+    #: the supervised learner to inspect; this is either identical with
+    #: :attr:`model`, or the final estimator of :attr:`model` if :attr:`model`
+    #: is a pipeline
+    learner: NativeSupervisedLearner
 
-    # defined in superclass, repeated here for Sphinx:
-    model: T_SupervisedLearnerDF
-    shap_interaction: bool
-    n_jobs: Optional[int]
-    shared_memory: Optional[bool]
-    pre_dispatch: Optional[Union[str, int]]
-    verbose: Optional[int]
+    # the SHAP calculator used by this inspector
+    _shap_calculator: Optional[LearnerShapCalculator[Any]]
 
     def __init__(
         self,
-        model: T_SupervisedLearnerDF,
+        model: T_SupervisedLearner,
         *,
         explainer_factory: Optional[ExplainerFactory[NativeSupervisedLearner]] = None,
         shap_interaction: bool = True,
@@ -102,33 +101,20 @@ def __init__(
         verbose: Optional[int] = None,
     ) -> None:
         """
-        :param model: the learner or learner pipeline to inspect (typically, one of
-            a :class:`~sklearndf.pipeline.ClassifierPipelineDF`,
-            :class:`~sklearndf.pipeline.RegressorPipelineDF`,
-            :class:`~sklearndf.classification.ClassifierDF`, or
-            :class:`~sklearndf.regression.RegressorDF`)
+        :param model: the learner or learner pipeline to inspect
         :param explainer_factory: optional function that creates a shap Explainer
             (default: ``TreeExplainerFactory``)
         """
 
-        if not model.is_fitted:
+        fitted = self._is_model_fitted(model)
+        if not fitted:
             raise ValueError("arg model must be fitted")
 
-        learner: SupervisedLearnerDF
-
-        if isinstance(model, SupervisedLearnerPipelineDF):
-            learner = model.final_estimator
-        elif isinstance(model, SupervisedLearnerDF):
-            learner = model
-        else:
-            raise TypeError(
-                "arg model must be a SupervisedLearnerPipelineDF or a "
-                f"SupervisedLearnerDF, but is a {type(model).__name__}"
-            )
-        self.learner = learner
+        learner = self._get_learner(model)
 
         if is_classifier(learner):
             try:
+                # noinspection PyUnresolvedReferences
                 n_outputs = learner.n_outputs_
             except AttributeError:
                 pass
@@ -137,7 +123,7 @@ def __init__(
                     raise ValueError(
                         "only single-target classifiers (binary or multi-class) are "
                         "supported, but the given classifier has been fitted on "
-                        f"multiple targets: {', '.join(learner.output_names_)}"
+                        f"multiple targets: {', '.join(model.output_names_)}"
                     )
         elif not is_regressor(learner):
             raise TypeError(
@@ -173,31 +159,31 @@ def __init__(
         )
 
         self.explainer_factory = explainer_factory
+        self.learner = learner
         self._shap_calculator: Optional[LearnerShapCalculator[Any]] = None
 
     __init__.__doc__ = str(__init__.__doc__) + re.sub(
         r"(?m)^\s*:param model:\s+.*$", "", str(ModelInspector.__init__.__doc__)
     )
 
+    @property
+    @abstractmethod
+    def native_learner(self) -> NativeSupervisedLearner:
+        """
+        The native learner to inspect.
+        """
+
     @property
     def feature_names(self) -> List[str]:
         """[see superclass]"""
+        # noinspection PyUnresolvedReferences
         return cast(
             List[str],
-            self.learner.feature_names_in_.to_list(),
+            # feature_names_in_ is a pandas index (sklearndf) or an ndarray (sklearn);
+            # we convert it to a list
+            self.learner.feature_names_in_.tolist(),
         )
 
-    def preprocess_features(
-        self, features: Union[pd.DataFrame, pd.Series]
-    ) -> pd.DataFrame:
-        """[see superclass]"""
-        if self.model is self.learner:
-            # we have a simple learner: no preprocessing needed
-            return features
-        else:
-            # we have a pipeline: preprocess features
-            return self.model.preprocess(features)
-
     @property
     def shap_calculator(self) -> LearnerShapCalculator[Any]:
         """[see superclass]"""
@@ -205,10 +191,10 @@ def shap_calculator(self) -> LearnerShapCalculator[Any]:
         if self._shap_calculator is not None:
             return self._shap_calculator
 
-        learner: SupervisedLearnerDF = self.learner
+        native_learner = self.native_learner
 
         shap_calculator_params: Dict[str, Any] = dict(
-            model=self.learner.native_estimator,
+            model=native_learner,
             interaction_values=self.shap_interaction,
             explainer_factory=self.explainer_factory,
             n_jobs=self.n_jobs,
@@ -218,15 +204,271 @@ def shap_calculator(self) -> LearnerShapCalculator[Any]:
         )
 
         shap_calculator: LearnerShapCalculator[Any]
-        if is_classifier(learner):
+        if is_classifier(native_learner):
             shap_calculator = ClassifierShapCalculator(**shap_calculator_params)
         else:
             shap_calculator = RegressorShapCalculator(
-                **shap_calculator_params, output_names=learner.output_names_
+                **shap_calculator_params, output_names=self._learner_output_names
             )
 
         self._shap_calculator = shap_calculator
         return shap_calculator
 
+    @property
+    @abstractmethod
+    def _learner_output_names(self) -> List[str]:
+        """
+        The names of the outputs of the learner.
+        """
+        pass
+
+    @staticmethod
+    @abstractmethod
+    def _is_model_fitted(model: T_SupervisedLearner) -> bool:
+        # return True if the model is fitted, False otherwise
+        pass
+
+    @staticmethod
+    @abstractmethod
+    def _get_learner(model: T_SupervisedLearner) -> NativeSupervisedLearner:
+        # get the learner class from the model, which may be a pipeline
+        # that includes additional preprocessing steps
+        pass
+
+
+@inheritdoc(match="""[see superclass]""")
+class LearnerInspector(
+    _BaseLearnerInspector[T_SupervisedLearnerDF], Generic[T_SupervisedLearnerDF]
+):
+    """[see superclass]"""
+
+    # defined in superclass, repeated here for Sphinx:
+    model: T_SupervisedLearnerDF
+    shap_interaction: bool
+    n_jobs: Optional[int]
+    shared_memory: Optional[bool]
+    pre_dispatch: Optional[Union[str, int]]
+    verbose: Optional[int]
+    explainer_factory: ExplainerFactory[NativeSupervisedLearner]
+    learner: SupervisedLearnerDF
+
+    @subsdoc(
+        pattern=r"(?m)^(\s*:param model:\s+.*)$",
+        replacement=r"""\1 (typically, one of
+    a :class:`~sklearndf.pipeline.ClassifierPipelineDF`,
+    :class:`~sklearndf.pipeline.RegressorPipelineDF`,
+    :class:`~sklearndf.classification.ClassifierDF`, or
+    :class:`~sklearndf.regression.RegressorDF`)""",
+        using=_BaseLearnerInspector.__init__,
+    )
+    def __init__(
+        self,
+        model: T_SupervisedLearnerDF,
+        *,
+        explainer_factory: Optional[ExplainerFactory[NativeSupervisedLearner]] = None,
+        shap_interaction: bool = True,
+        n_jobs: Optional[int] = None,
+        shared_memory: Optional[bool] = None,
+        pre_dispatch: Optional[Union[str, int]] = None,
+        verbose: Optional[int] = None,
+    ) -> None:
+        super().__init__(
+            model=model,
+            explainer_factory=explainer_factory,
+            shap_interaction=shap_interaction,
+            n_jobs=n_jobs,
+            shared_memory=shared_memory,
+            pre_dispatch=pre_dispatch,
+            verbose=verbose,
+        )
+
+    @property
+    def native_learner(self) -> NativeSupervisedLearner:
+        """[see superclass]"""
+        return cast(NativeSupervisedLearner, self.learner.native_estimator)
+
+    @property
+    def _learner_output_names(self) -> List[str]:
+        """[see superclass]"""
+        return self.learner.output_names_
+
+    def preprocess_features(
+        self, features: Union[pd.DataFrame, pd.Series]
+    ) -> pd.DataFrame:
+        """[see superclass]"""
+        if self.model is self.learner:
+            # we have a simple learner: no preprocessing needed
+            return features
+        else:
+            # we have a pipeline: preprocess features
+            return self.model.preprocess(features)
+
+    @staticmethod
+    def _is_model_fitted(model: T_SupervisedLearnerDF) -> bool:
+        return model.is_fitted
+
+    @staticmethod
+    def _get_learner(model: T_SupervisedLearnerDF) -> SupervisedLearnerDF:
+        if isinstance(model, SupervisedLearnerPipelineDF):
+            return cast(SupervisedLearnerDF, model.final_estimator)
+        elif isinstance(model, SupervisedLearnerDF):
+            return model
+        else:
+            raise TypeError(
+                "arg model must be a SupervisedLearnerPipelineDF or a "
+                f"SupervisedLearnerDF, but is a {type(model).__name__}"
+            )
+
+
+@inheritdoc(match="""[see superclass]""")
+class NativeLearnerInspector(
+    _BaseLearnerInspector[T_SupervisedLearner], Generic[T_SupervisedLearner]
+):
+    """[see superclass]"""
+
+    #: The default explainer factory used by this inspector.
+    #: This is a tree explainer using the tree_path_dependent method for
+    #: feature perturbation, so we can calculate SHAP interaction values.
+    DEFAULT_EXPLAINER_FACTORY = TreeExplainerFactory(
+        feature_perturbation="tree_path_dependent", uses_background_dataset=False
+    )
+
+    # defined in superclass, repeated here for Sphinx:
+    model: T_SupervisedLearner
+    shap_interaction: bool
+    n_jobs: Optional[int]
+    shared_memory: Optional[bool]
+    pre_dispatch: Optional[Union[str, int]]
+    verbose: Optional[int]
+    explainer_factory: ExplainerFactory[NativeSupervisedLearner]
+    learner: NativeSupervisedLearner
+
+    @subsdoc(
+        pattern=r"(?m)^(\s*:param model:\s+.*)$",
+        replacement=r"""\1 (either a scikit-learn :class:`~sklearn.pipeline.Pipeline`,
+        or a regressor or classifier that implements the scikit-learn API)""",
+        using=_BaseLearnerInspector.__init__,
+    )
+    def __init__(
+        self,
+        model: T_SupervisedLearner,
+        *,
+        explainer_factory: Optional[ExplainerFactory[NativeSupervisedLearner]] = None,
+        shap_interaction: bool = True,
+        n_jobs: Optional[int] = None,
+        shared_memory: Optional[bool] = None,
+        pre_dispatch: Optional[Union[str, int]] = None,
+        verbose: Optional[int] = None,
+    ) -> None:
+        super().__init__(
+            model=model,
+            explainer_factory=explainer_factory,
+            shap_interaction=shap_interaction,
+            n_jobs=n_jobs,
+            shared_memory=shared_memory,
+            pre_dispatch=pre_dispatch,
+            verbose=verbose,
+        )
+
+    @property
+    def native_learner(self) -> NativeSupervisedLearner:
+        return self.learner
+
+    @property
+    def _learner_output_names(self) -> List[str]:
+        # we try to get the number of outputs from the learner; if that fails,
+        # we assume that the learner was fitted on a single target
+        n_outputs = getattr(self.learner, "n_outputs_", 1)
+        if n_outputs == 1:
+            return ["y"]
+        else:
+            return [f"y_{i}" for i in range(n_outputs)]
+
+    def preprocess_features(
+        self, features: Union[pd.DataFrame, pd.Series]
+    ) -> pd.DataFrame:
+        """[see superclass]"""
+        if self.learner is self.model:
+            # we have a single learner: do not preprocess
+            return features
+        else:
+            # we have a pipeline: preprocessing is the first part of the pipeline
+            preprocessing = self.model[:-1]
+            return pd.DataFrame(
+                preprocessing.transform(features),
+                index=features.index,
+                columns=preprocessing.get_feature_names_out(),
+            )
+
+    @staticmethod
+    def _is_model_fitted(model: T_SupervisedLearner) -> bool:
+        return is_fitted(model)
+
+    @staticmethod
+    def _get_learner(model: T_SupervisedLearner) -> NativeSupervisedLearner:
+        if isinstance(model, Pipeline):
+            try:
+                return model[-1]
+            except IndexError:
+                raise ValueError("arg model is an empty pipeline")
+        else:
+            return model
+
 
 __tracker.validate()
+
+
+#
+# Private auxiliary methods
+#
+
+
+def is_fitted(estimator: BaseEstimator) -> bool:
+    """
+    Check if the estimator is fitted.
+
+    :param estimator: a scikit-learn estimator instance
+    :return: ``True`` if the estimator is fitted; ``False`` otherwise
+    """
+
+    if not isinstance(estimator, BaseEstimator):
+        raise TypeError(
+            "arg estimator must be a scikit-learn estimator, but is a "
+            f"{type(estimator).__name__}"
+        )
+
+    # get all properties of the estimator (instances of class ``property``)
+    fitted_properties = {
+        name
+        for name, value in vars(type(estimator)).items()
+        if (
+            # we're only interested in properties that scikit-learn
+            # sets when fitting a learner
+            name.endswith("_")
+            and not name.startswith("_")
+            and isinstance(value, property)
+        )
+    }
+
+    # get all attributes ending with an underscore - these are only set as an estimator
+    # is fitted
+    fitted_attributes = [
+        name
+        for name in vars(estimator)
+        if name not in fitted_properties
+        and name.endswith("_")
+        and not name.startswith("_")
+    ]
+
+    if fitted_attributes:
+        # we have at least one fitted attribute: the estimator is fitted
+        return True
+
+    # ensure that at least one of the fitted properties is defined
+    for p in fitted_properties:
+        if hasattr(estimator, p):
+            return True
+
+    # the estimator has no fitted attributes and no fitted properties:
+    # it is not fitted
+    return False
diff --git a/src/facet/inspection/shap/_shap.py b/src/facet/inspection/shap/_shap.py
index 671821a85..4a91b6763 100644
--- a/src/facet/inspection/shap/_shap.py
+++ b/src/facet/inspection/shap/_shap.py
@@ -205,7 +205,7 @@ def shap_values(self) -> pd.DataFrame:
 
         assert self.shap_ is not None, ASSERTION__CALCULATOR_IS_FITTED
         if self.interaction_values:
-            return self.shap_.groupby(level=0).sum()
+            return self.shap_.groupby(level=0, sort=False).sum()
         else:
             return self.shap_
 
diff --git a/test/test/conftest.py b/test/test/conftest.py
index bbfcb15b8..cbeb1826c 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -41,8 +41,9 @@
 # print the FACET logo
 print(facet.__logo__)
 
-# disable SHAP debugging messages
+# disable 3rd party debugging messages
 logging.getLogger("shap").setLevel(logging.WARNING)
+logging.getLogger("numba").setLevel(logging.WARNING)
 
 # configure pandas text output
 
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 04c0c6ec6..c2af4c603 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -4,15 +4,19 @@
 import logging
 import platform
 import warnings
-from typing import Any, Dict, List, Optional, Type, TypeVar, cast
+from typing import Any, Dict, List, Optional, Set, Type, TypeVar, Union, cast
 
 import numpy as np
 import pandas as pd
 import pytest
 from numpy.testing import assert_allclose
+from pandas._testing import assert_index_equal
 from pandas.testing import assert_frame_equal, assert_series_equal
+from sklearn.base import BaseEstimator
 from sklearn.datasets import make_classification
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline
 
 from pytools.data import LinkageTree, Matrix
 from pytools.viz.dendrogram import DendrogramDrawer, DendrogramReportStyle
@@ -33,7 +37,7 @@
     TreeExplainerFactory,
 )
 from facet.explanation.base import ExplainerFactory
-from facet.inspection import FunctionInspector, LearnerInspector
+from facet.inspection import FunctionInspector, LearnerInspector, NativeLearnerInspector
 from facet.selection import LearnerSelector
 
 # noinspection PyMissingOrEmptyDocstring
@@ -80,20 +84,56 @@ def test_regressor_selector(
         (PermutationExplainerFactory, {}),
     ],
 )
+@pytest.mark.parametrize(  # type: ignore
+    argnames="native",
+    argvalues=(False, True),
+)
 def test_model_inspection(
     explainer_factory_cls: Type[ExplainerFactory[LGBMRegressorDF]],
     explainer_factory_args: Dict[str, Any],
     best_lgbm_model: RegressorPipelineDF[LGBMRegressorDF],
     sample: Sample,
     n_jobs: int,
+    native: bool,
 ) -> None:
     # test the ModelInspector with the given explainer factory:
 
-    inspector = LearnerInspector(
-        model=best_lgbm_model,
-        explainer_factory=explainer_factory_cls(**explainer_factory_args),
-        n_jobs=n_jobs,
-    ).fit(sample)
+    explainer_factory: ExplainerFactory[LGBMRegressorDF] = explainer_factory_cls(
+        **explainer_factory_args
+    )
+
+    inspector: Union[
+        LearnerInspector[RegressorPipelineDF[LGBMRegressorDF]],
+        NativeLearnerInspector[Pipeline],
+    ]
+
+    if native:
+        assert (
+            best_lgbm_model.preprocessing is not None
+        ), "preprocessing step must be defined"
+        # noinspection PyTypeChecker
+        inspector = NativeLearnerInspector(
+            model=(
+                # create and fit a native pipeline from the regressor pipeline
+                Pipeline(
+                    steps=[
+                        (
+                            "preprocessing",
+                            best_lgbm_model.preprocessing.native_estimator,
+                        ),
+                        ("regressor", best_lgbm_model.regressor.native_estimator),
+                    ]
+                ).fit(X=sample.features, y=sample.target)
+            ),
+            explainer_factory=explainer_factory,
+            n_jobs=n_jobs,
+        ).fit(sample)
+    else:
+        inspector = LearnerInspector(
+            model=best_lgbm_model,
+            explainer_factory=explainer_factory,
+            n_jobs=n_jobs,
+        ).fit(sample)
 
     shap_values: pd.DataFrame = inspector.shap_values()
 
@@ -106,9 +146,22 @@ def test_model_inspection(
     assert shap_values.columns.names == [Sample.IDX_FEATURE]
 
     # column index
-    assert set(shap_values.columns) == set(
-        inspector.model.final_estimator.feature_names_in_
-    )
+    regressor: BaseEstimator
+    if native:
+        regressor = inspector.model[-1]
+    else:
+        regressor = inspector.model.regressor
+
+    regressor_feature_names: Set[str]
+    if native:
+        regressor_feature_names = set(inspector.model[:-1].get_feature_names_out())
+    else:
+        regressor_feature_names = set(regressor.feature_names_in_)
+
+    assert set(shap_values.columns) == set(regressor_feature_names)
+
+    # check that the row order has been preserved
+    assert_index_equal(shap_values.index, sample.index)
 
     # check that the SHAP values add up to the predictions
     shap_totals = shap_values.sum(axis=1)
@@ -121,6 +174,7 @@ def test_model_inspection(
     assert (
         round((shap_minus_pred - shap_minus_pred.mean()).abs().mean(), 12) == 0.0
     ), "predictions matching total SHAP"
+
     # validate the linkage tree of the resulting inspector
 
     # if the inspector supports interaction values, test the redundancy linkage
@@ -438,23 +492,42 @@ def _check_probabilities(
 
 
 # noinspection DuplicatedCode
+@pytest.mark.parametrize(  # type: ignore
+    argnames="native",
+    argvalues=(False, True),
+)
 def test_model_inspection_classifier_interaction(
     iris_classifier_binary: ClassifierPipelineDF[RandomForestClassifierDF],
     iris_sample_binary: Sample,
     n_jobs: int,
+    native: bool,
 ) -> None:
     warnings.filterwarnings("ignore", message="You are accessing a training score")
 
-    model_inspector = LearnerInspector(
-        model=iris_classifier_binary.final_estimator,
+    cls_inspector: Type[
+        Union[
+            LearnerInspector[RandomForestClassifierDF],
+            NativeLearnerInspector[RandomForestClassifier],
+        ]
+    ]
+    learner: Union[RandomForestClassifierDF, RandomForestClassifier]
+    if native:
+        cls_inspector = NativeLearnerInspector[RandomForestClassifier]
+        learner = iris_classifier_binary.final_estimator.native_estimator
+    else:
+        cls_inspector = LearnerInspector[RandomForestClassifierDF]
+        learner = iris_classifier_binary.final_estimator
+
+    model_inspector = cls_inspector(
+        model=learner,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", uses_background_dataset=True
         ),
         n_jobs=n_jobs,
     ).fit(iris_sample_binary)
 
-    model_inspector_no_interaction = LearnerInspector(
-        model=iris_classifier_binary,
+    model_inspector_no_interaction = cls_inspector(
+        model=learner,
         shap_interaction=False,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", uses_background_dataset=True

From b67f376fe536c9993cff14fa887e44cdab9691e7 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Mon, 10 Jul 2023 08:56:05 +0200
Subject: [PATCH 14/22] REFACTOR: reformat code with newest `black`

---
 src/facet/explanation/_explanation.py         | 1 +
 src/facet/inspection/shap/_shap.py            | 1 -
 src/facet/inspection/shap/sklearn/_sklearn.py | 2 --
 src/facet/selection/_parameters.py            | 3 ---
 src/facet/validation/_validation.py           | 2 --
 test/test/facet/test_partition.py             | 3 ---
 test/test/facet/test_selection.py             | 1 -
 test/test/facet/test_simulation.py            | 5 -----
 test/test/facet/test_validation.py            | 1 -
 9 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/src/facet/explanation/_explanation.py b/src/facet/explanation/_explanation.py
index 9da485a88..25116f6cb 100644
--- a/src/facet/explanation/_explanation.py
+++ b/src/facet/explanation/_explanation.py
@@ -344,6 +344,7 @@ def to_expression(self) -> Expression:
 # Exact explainer factory
 #
 
+
 # noinspection PyPep8Naming
 class _ExactExplainer(
     shap.explainers.Exact,  # type: ignore
diff --git a/src/facet/inspection/shap/_shap.py b/src/facet/inspection/shap/_shap.py
index dfd3a4ac5..671821a85 100644
--- a/src/facet/inspection/shap/_shap.py
+++ b/src/facet/inspection/shap/_shap.py
@@ -290,7 +290,6 @@ def _reset_fit(self) -> None:
         self.output_names_ = None
 
     def _make_explainer(self, features: pd.DataFrame) -> BaseExplainer:
-
         # prepare the background dataset
 
         background_dataset: Optional[pd.DataFrame]
diff --git a/src/facet/inspection/shap/sklearn/_sklearn.py b/src/facet/inspection/shap/sklearn/_sklearn.py
index 80b5aaa02..ea6f1b2fa 100644
--- a/src/facet/inspection/shap/sklearn/_sklearn.py
+++ b/src/facet/inspection/shap/sklearn/_sklearn.py
@@ -241,7 +241,6 @@ def _convert_shap_tensors_to_list(
         shap_tensors: Union[npt.NDArray[np.float_], List[npt.NDArray[np.float_]]],
         n_outputs: int,
     ) -> List[npt.NDArray[np.float_]]:
-
         if n_outputs == 1 and isinstance(shap_tensors, list) and len(shap_tensors) == 2:
             # in the binary classification case, we will proceed with SHAP values
             # for class 0 only, since values for class 1 will just be the same
@@ -273,7 +272,6 @@ def _convert_shap_to_df(
         observation_idx: pd.Index,
         feature_idx: pd.Index,
     ) -> List[pd.DataFrame]:
-
         if self.interaction_values:
             # return a list of data frame [(obs x features) x features],
             # one for each of the outputs
diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 778d0e02d..a872aefce 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -185,7 +185,6 @@ def get_parameters(self, prefix: Optional[str] = None) -> ParameterDict:
         }
 
     def _validate_parameter(self, name: str, value: ParameterSet) -> None:
-
         if name not in self._params:
             raise AttributeError(
                 f"unknown parameter name for "
@@ -222,7 +221,6 @@ def __dir__(self) -> Iterable[str]:
 
     def __getattr__(self, key: str) -> Any:
         if not key.startswith("_"):
-
             result: Union[ParameterSpace[Any], ParameterSet, None]
 
             result = self._children.get(key, None)
@@ -241,7 +239,6 @@ def __iter__(self) -> Iterator[Tuple[List[str], ParameterSet]]:
     def _iter_parameters(
         self, path_prefix: List[str]
     ) -> Iterator[Tuple[List[str], ParameterSet]]:
-
         yield from (
             ([*path_prefix, name], value) for name, value in self._values.items()
         )
diff --git a/src/facet/validation/_validation.py b/src/facet/validation/_validation.py
index ffe927e60..4c45fef83 100644
--- a/src/facet/validation/_validation.py
+++ b/src/facet/validation/_validation.py
@@ -115,7 +115,6 @@ def split(
                 test: npt.NDArray[np.int_] = indices[test_mask]
                 # make sure test is not empty, else sample another train set
                 if len(test) > 0:
-
                     yield train, test
                     break
 
@@ -254,7 +253,6 @@ def _select_train_indices(
         random_state: np.random.RandomState,
         y: Union[npt.NDArray[Any], pd.Series, pd.DataFrame, None],
     ) -> npt.NDArray[np.int_]:
-
         mean_block_size = self.mean_block_size
         if mean_block_size < 1:
             # if mean block size was set as a percentage, calculate the actual mean
diff --git a/test/test/facet/test_partition.py b/test/test/facet/test_partition.py
index cd2cf0113..2f47345c6 100644
--- a/test/test/facet/test_partition.py
+++ b/test/test/facet/test_partition.py
@@ -18,7 +18,6 @@ def test_discrete_partitioning() -> None:
     np.random.seed(42)
 
     for _ in range(10):
-
         values = np.random.randint(
             low=0, high=10000, size=np.random.randint(low=100, high=200)
         )
@@ -51,7 +50,6 @@ def test_continuous_partitioning() -> None:
     np.random.seed(42)
 
     for _ in range(10):
-
         values = np.random.normal(
             loc=3.0, scale=8.0, size=np.random.randint(low=2000, high=4000)
         )
@@ -100,7 +98,6 @@ def test_category_partitioning() -> None:
 
 
 def test_partition_with_invalid_values() -> None:
-
     arr_empty = np.array([])
     arr_single = np.array([1])
     arr_multi = np.array([1, 1, 1, 10, 1])
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index e28143fe3..e54b8c930 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -40,7 +40,6 @@ def test_learner_selector(
     sample: Sample,
     n_jobs: int,
 ) -> None:
-
     expected_scores = [
         0.669,
         0.649,
diff --git a/test/test/facet/test_simulation.py b/test/test/facet/test_simulation.py
index a68b3d2a4..701969cf9 100644
--- a/test/test/facet/test_simulation.py
+++ b/test/test/facet/test_simulation.py
@@ -72,7 +72,6 @@ def uplift_simulator(
 def test_univariate_target_simulation(
     target_simulator: UnivariateTargetSimulator,
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
@@ -135,7 +134,6 @@ def test_univariate_target_simulation(
 def test_univariate_target_subsample_simulation_80(
     model: RegressorPipelineDF[LGBMRegressorDF], subsample: Sample, n_jobs: int
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
@@ -201,7 +199,6 @@ def test_univariate_target_subsample_simulation_80(
 def test_univariate_uplift_subsample_simulation_95(
     model: RegressorPipelineDF[LGBMRegressorDF], subsample: Sample, n_jobs: int
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
@@ -266,7 +263,6 @@ def test_univariate_uplift_subsample_simulation_95(
 def test_univariate_uplift_simulation(
     uplift_simulator: UnivariateUpliftSimulator,
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
@@ -329,7 +325,6 @@ def test_univariate_uplift_simulation(
 def test_univariate_uplift_subsample_simulation(
     model: RegressorPipelineDF[LGBMRegressorDF], subsample: Sample, n_jobs: int
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
diff --git a/test/test/facet/test_validation.py b/test/test/facet/test_validation.py
index a183eae00..51bde3d7b 100644
--- a/test/test/facet/test_validation.py
+++ b/test/test/facet/test_validation.py
@@ -30,7 +30,6 @@ def test_bootstrap_cv_init() -> None:
 
 
 def test_get_train_test_splits_as_indices() -> None:
-
     n_test_splits = 200
     test_x = np.arange(0, 1000, 1)
 

From 1833b3ceb35e697cdc78111e7aeec648f07893c9 Mon Sep 17 00:00:00 2001
From: Jan Ittner <ittner.jan@bcg.com>
Date: Mon, 10 Jul 2023 17:39:08 +0200
Subject: [PATCH 15/22] FIX: test native learner inspectors w/o preprocessing
 for sklearn 1.0.x (#372)

* REFACTOR: reformat code with newest `black`

* BUILD: drop support for scikit-learn 0.x

* FIX: suppress numpy warnings when patching to support legacy types

* FIX: test native learner inspectors w/o preprocessing for sklearn 1.0.x
---
 pyproject.toml                      |  5 ++-
 src/facet/explanation/base/_base.py | 12 +++---
 test/test/facet/test_inspection.py  | 62 ++++++++++++++++++-----------
 test/test/facet/test_partition.py   |  3 --
 test/test/facet/test_selection.py   |  1 -
 test/test/facet/test_simulation.py  |  5 ---
 test/test/facet/test_validation.py  |  1 -
 7 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f410a9c60..cf3c4b122 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,7 @@ requires = [
     "pandas            >=1.0",
     "scipy             ~=1.2",
     "shap              >=0.39",
+    "scikit-learn      ~=1.0",
     "sklearndf         ~=2.2",
     "typing_extensions ~=4.0",
 ]
@@ -85,7 +86,7 @@ typing_extensions = "~=4.0.0"
 # additional minimum requirements of sklearndf
 boruta            = "~=0.3.0"
 lightgbm          = "~=3.0.0"
-scikit-learn      = "~=0.24.2"
+scikit-learn      = "~=1.0.2"
 xgboost           = "~=1.5"
 # additional minimum requirements of gamma-pytools
 joblib            = "~=0.14.1"
@@ -109,7 +110,7 @@ typing_extensions = "~=4.3"
 # additional maximum requirements of sklearndf
 boruta            = "~=0.3"
 lightgbm          = "~=3.3"
-scikit-learn      = "~=1.1"
+scikit-learn      = "~=1.2"
 xgboost           = "~=1.5"
 # additional maximum requirements of gamma-pytools
 joblib            = "~=1.1"
diff --git a/src/facet/explanation/base/_base.py b/src/facet/explanation/base/_base.py
index 2b8a0017d..f5a0ca180 100644
--- a/src/facet/explanation/base/_base.py
+++ b/src/facet/explanation/base/_base.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import pandas as pd
+from packaging.version import Version
 from shap import Explainer, Explanation
 
 from pytools.api import AllTracker
@@ -26,14 +27,13 @@
 # shap relies on the np.bool, np.int, and np.float types, which were deprecated in
 # numpy 1.20 and removed in numpy 1.24.
 #
-# We check if the types are defined and, if not, define them as an alias
-# for the corresponding type with a trailing underscore.
+# We define these types as an alias for the corresponding type with a trailing
+# underscore.
 
-
-for __attr in ("bool", "int", "float"):
-    if not hasattr(np, __attr):
+if Version(np.__version__) >= Version("1.20"):
+    for __attr in ("bool", "int", "float"):
         setattr(np, __attr, getattr(np, f"{__attr}_"))
-del __attr
+    del __attr
 
 
 #
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index c2af4c603..5c2985f46 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -12,7 +12,6 @@
 from numpy.testing import assert_allclose
 from pandas._testing import assert_index_equal
 from pandas.testing import assert_frame_equal, assert_series_equal
-from sklearn.base import BaseEstimator
 from sklearn.datasets import make_classification
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import GridSearchCV
@@ -107,23 +106,49 @@ def test_model_inspection(
         NativeLearnerInspector[Pipeline],
     ]
 
+    regressor_feature_names: Set[str]  # column index names
+
     if native:
         assert (
             best_lgbm_model.preprocessing is not None
         ), "preprocessing step must be defined"
+
+        regressor = best_lgbm_model.regressor.native_estimator
+
+        if __sklearn_version__ < __sklearn_1_1__:
+            # scikit-learn 1.0.x does not support output feature names in simple
+            # imputers, so we cannot use this for preprocessing
+            log.warning(
+                f"scikit-learn {__sklearn_version__} does not support output "
+                "feature names in simple imputers, so we will test the native learner "
+                "inspector without preprocessing"
+            )
+            assert (
+                sample.features.notna().all().all()
+            ), "observations must not contain missing values"
+            model = regressor.fit(X=sample.features, y=sample.target)
+            regressor_feature_names = set(sample.feature_names)
+
+        else:
+            # scikit-learn 1.1.x supports output feature names in simple imputers,
+            # so we can use this for preprocessing
+            model = Pipeline(
+                # create a native pipeline from the regressor pipeline
+                steps=[
+                    (
+                        "preprocessing",
+                        best_lgbm_model.preprocessing.native_estimator,
+                    ),
+                    ("regressor", regressor),
+                ]
+            ).fit(X=sample.features, y=sample.target)
+            regressor_feature_names = set(model[:-1].get_feature_names_out())
+
         # noinspection PyTypeChecker
         inspector = NativeLearnerInspector(
             model=(
-                # create and fit a native pipeline from the regressor pipeline
-                Pipeline(
-                    steps=[
-                        (
-                            "preprocessing",
-                            best_lgbm_model.preprocessing.native_estimator,
-                        ),
-                        ("regressor", best_lgbm_model.regressor.native_estimator),
-                    ]
-                ).fit(X=sample.features, y=sample.target)
+                # fit the model on the sample
+                model.fit(X=sample.features, y=sample.target)
             ),
             explainer_factory=explainer_factory,
             n_jobs=n_jobs,
@@ -134,6 +159,7 @@ def test_model_inspection(
             explainer_factory=explainer_factory,
             n_jobs=n_jobs,
         ).fit(sample)
+        regressor_feature_names = set(best_lgbm_model.regressor.feature_names_in_)
 
     shap_values: pd.DataFrame = inspector.shap_values()
 
@@ -145,19 +171,7 @@ def test_model_inspection(
     assert shap_values.index.names == [Sample.IDX_OBSERVATION]
     assert shap_values.columns.names == [Sample.IDX_FEATURE]
 
-    # column index
-    regressor: BaseEstimator
-    if native:
-        regressor = inspector.model[-1]
-    else:
-        regressor = inspector.model.regressor
-
-    regressor_feature_names: Set[str]
-    if native:
-        regressor_feature_names = set(inspector.model[:-1].get_feature_names_out())
-    else:
-        regressor_feature_names = set(regressor.feature_names_in_)
-
+    # check that the column names are the same as the feature names
     assert set(shap_values.columns) == set(regressor_feature_names)
 
     # check that the row order has been preserved
diff --git a/test/test/facet/test_partition.py b/test/test/facet/test_partition.py
index cd2cf0113..2f47345c6 100644
--- a/test/test/facet/test_partition.py
+++ b/test/test/facet/test_partition.py
@@ -18,7 +18,6 @@ def test_discrete_partitioning() -> None:
     np.random.seed(42)
 
     for _ in range(10):
-
         values = np.random.randint(
             low=0, high=10000, size=np.random.randint(low=100, high=200)
         )
@@ -51,7 +50,6 @@ def test_continuous_partitioning() -> None:
     np.random.seed(42)
 
     for _ in range(10):
-
         values = np.random.normal(
             loc=3.0, scale=8.0, size=np.random.randint(low=2000, high=4000)
         )
@@ -100,7 +98,6 @@ def test_category_partitioning() -> None:
 
 
 def test_partition_with_invalid_values() -> None:
-
     arr_empty = np.array([])
     arr_single = np.array([1])
     arr_multi = np.array([1, 1, 1, 10, 1])
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index e28143fe3..e54b8c930 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -40,7 +40,6 @@ def test_learner_selector(
     sample: Sample,
     n_jobs: int,
 ) -> None:
-
     expected_scores = [
         0.669,
         0.649,
diff --git a/test/test/facet/test_simulation.py b/test/test/facet/test_simulation.py
index a68b3d2a4..701969cf9 100644
--- a/test/test/facet/test_simulation.py
+++ b/test/test/facet/test_simulation.py
@@ -72,7 +72,6 @@ def uplift_simulator(
 def test_univariate_target_simulation(
     target_simulator: UnivariateTargetSimulator,
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
@@ -135,7 +134,6 @@ def test_univariate_target_simulation(
 def test_univariate_target_subsample_simulation_80(
     model: RegressorPipelineDF[LGBMRegressorDF], subsample: Sample, n_jobs: int
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
@@ -201,7 +199,6 @@ def test_univariate_target_subsample_simulation_80(
 def test_univariate_uplift_subsample_simulation_95(
     model: RegressorPipelineDF[LGBMRegressorDF], subsample: Sample, n_jobs: int
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
@@ -266,7 +263,6 @@ def test_univariate_uplift_subsample_simulation_95(
 def test_univariate_uplift_simulation(
     uplift_simulator: UnivariateUpliftSimulator,
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
@@ -329,7 +325,6 @@ def test_univariate_uplift_simulation(
 def test_univariate_uplift_subsample_simulation(
     model: RegressorPipelineDF[LGBMRegressorDF], subsample: Sample, n_jobs: int
 ) -> None:
-
     parameterized_feature = "HouseAge"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
diff --git a/test/test/facet/test_validation.py b/test/test/facet/test_validation.py
index a183eae00..51bde3d7b 100644
--- a/test/test/facet/test_validation.py
+++ b/test/test/facet/test_validation.py
@@ -30,7 +30,6 @@ def test_bootstrap_cv_init() -> None:
 
 
 def test_get_train_test_splits_as_indices() -> None:
-
     n_test_splits = 200
     test_x = np.arange(0, 1000, 1)
 

From c075e10a218f68e7055c8d2f6e927b93032cdcff Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Mon, 10 Jul 2023 23:31:59 +0200
Subject: [PATCH 16/22] BUILD: update min and max matrix test package
 dependencies

---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index cf3c4b122..31cca6c31 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -87,13 +87,13 @@ typing_extensions = "~=4.0.0"
 boruta            = "~=0.3.0"
 lightgbm          = "~=3.0.0"
 scikit-learn      = "~=1.0.2"
-xgboost           = "~=1.5"
+xgboost           = "~=1.5.0"
 # additional minimum requirements of gamma-pytools
 joblib            = "~=0.14.1"
 typing_inspect    = "~=0.4.0"
 # additional minimum requirements of shap
 ipython           = "==7.0"
-numba             = "~=0.55"  # required to support numpy 1.21
+numba             = "~=0.55.2"  # required to support numpy 1.21
 
 [build.matrix.max]
 # direct requirements of gamma-facet
@@ -117,7 +117,7 @@ joblib            = "~=1.1"
 typing_inspect    = "~=0.7"
 # additional maximum requirements of shap
 ipython           = ">=7"
-numba             = ">=0.55.2"  # required to support numpy 1.22
+numba             = "~=0.56"
 
 [tool.black]
 # quiet = "True"

From 921b385066e7b659d102168e6c9282e5a1e489d1 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Mon, 10 Jul 2023 23:59:12 +0200
Subject: [PATCH 17/22] BUILD: require zipp<3.16 for min test with Python 3.7

---
 pyproject.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 31cca6c31..90b9a64fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,8 @@ typing_inspect    = "~=0.4.0"
 # additional minimum requirements of shap
 ipython           = "==7.0"
 numba             = "~=0.55.2"  # required to support numpy 1.21
+# additional minimum requirements
+zipp              = "<3.16"     # required to support python 3.7
 
 [build.matrix.max]
 # direct requirements of gamma-facet

From e880e6fbdfbf5078dc96092a24f59bb34801a1b6 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Tue, 11 Jul 2023 00:08:59 +0200
Subject: [PATCH 18/22] TEST: only re-fit the model where needed

---
 test/test/facet/test_inspection.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 5c2985f46..f59a8eb6e 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -126,7 +126,7 @@ def test_model_inspection(
             assert (
                 sample.features.notna().all().all()
             ), "observations must not contain missing values"
-            model = regressor.fit(X=sample.features, y=sample.target)
+            model = regressor
             regressor_feature_names = set(sample.feature_names)
 
         else:
@@ -146,10 +146,7 @@ def test_model_inspection(
 
         # noinspection PyTypeChecker
         inspector = NativeLearnerInspector(
-            model=(
-                # fit the model on the sample
-                model.fit(X=sample.features, y=sample.target)
-            ),
+            model=model,
             explainer_factory=explainer_factory,
             n_jobs=n_jobs,
         ).fit(sample)

From 03fc727913252f52f4554876bcf43999c81a52a3 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Tue, 11 Jul 2023 10:04:27 +0200
Subject: [PATCH 19/22] BUILD: require zipp<3.16 for min conda test with Python
 3.7

---
 condabuild/meta.yaml | 2 ++
 pyproject.toml       | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/condabuild/meta.yaml b/condabuild/meta.yaml
index 89b991af3..eb09a179e 100644
--- a/condabuild/meta.yaml
+++ b/condabuild/meta.yaml
@@ -47,6 +47,8 @@ test:
     # additional requirements of shap
     - ipython        {{ environ.get('FACET_V_IPYTHON', '[False]') }}
     - numba          {{ environ.get('FACET_V_NUMBA', '[False]') }}
+    # additional requirements for testing
+    - zipp           {{ environ.get('FACET_V_ZIPP', '[False]') }}
   commands:
     - conda list
     - python -c 'import facet;
diff --git a/pyproject.toml b/pyproject.toml
index 90b9a64fb..2837b66a4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,7 +94,7 @@ typing_inspect    = "~=0.4.0"
 # additional minimum requirements of shap
 ipython           = "==7.0"
 numba             = "~=0.55.2"  # required to support numpy 1.21
-# additional minimum requirements
+# additional requirements for testing
 zipp              = "<3.16"     # required to support python 3.7
 
 [build.matrix.max]

From 4ee81f600ae19bc5cddc308a1f5093bcd85b8027 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Tue, 11 Jul 2023 14:57:25 +0200
Subject: [PATCH 20/22] FIX: check properties of superclasses in is_fitted()

---
 src/facet/inspection/_learner_inspector.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/facet/inspection/_learner_inspector.py b/src/facet/inspection/_learner_inspector.py
index a3f0e6818..65bf9b4aa 100644
--- a/src/facet/inspection/_learner_inspector.py
+++ b/src/facet/inspection/_learner_inspector.py
@@ -440,7 +440,10 @@ def is_fitted(estimator: BaseEstimator) -> bool:
     # get all properties of the estimator (instances of class ``property``)
     fitted_properties = {
         name
-        for name, value in vars(type(estimator)).items()
+        for cls in reversed(type(estimator).mro())
+        # traverse the class hierarchy in reverse order, so that we add the
+        # properties of the most specific class last
+        for name, value in vars(cls).items()
         if (
             # we're only interested in properties that scikit-learn
             # sets when fitting a learner

From 118a8e301580b457297ebfdc07e394e75341364c Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Tue, 11 Jul 2023 15:42:36 +0200
Subject: [PATCH 21/22] DOC: tweak docstrings for learner inspectors

---
 src/facet/inspection/_learner_inspector.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/facet/inspection/_learner_inspector.py b/src/facet/inspection/_learner_inspector.py
index 65bf9b4aa..24adf589a 100644
--- a/src/facet/inspection/_learner_inspector.py
+++ b/src/facet/inspection/_learner_inspector.py
@@ -61,10 +61,6 @@
     ),
     replacement="\n\n",
 )
-@subsdoc(
-    pattern="Explain a model based on SHAP",
-    replacement="Explain a regressor or classifier based on SHAP",
-)
 @inheritdoc(match="""[see superclass]""")
 class _BaseLearnerInspector(
     ModelInspector[T_SupervisedLearner], Generic[T_SupervisedLearner], metaclass=ABCMeta
@@ -236,6 +232,10 @@ def _get_learner(model: T_SupervisedLearner) -> NativeSupervisedLearner:
         pass
 
 
+@subsdoc(
+    pattern=r"Explain a model",
+    replacement=r"Explain an :mod:`sklearndf` regressor or classifier",
+)
 @inheritdoc(match="""[see superclass]""")
 class LearnerInspector(
     _BaseLearnerInspector[T_SupervisedLearnerDF], Generic[T_SupervisedLearnerDF]
@@ -320,6 +320,10 @@ def _get_learner(model: T_SupervisedLearnerDF) -> SupervisedLearnerDF:
             )
 
 
+@subsdoc(
+    pattern=r"Explain a model",
+    replacement=r"Explain a native scikit-learn regressor or classifier",
+)
 @inheritdoc(match="""[see superclass]""")
 class NativeLearnerInspector(
     _BaseLearnerInspector[T_SupervisedLearner], Generic[T_SupervisedLearner]

From e9efa29b2661e57d8468882980dddd333df317b3 Mon Sep 17 00:00:00 2001
From: j-ittner <ittner.jan@bcg.com>
Date: Tue, 11 Jul 2023 15:56:46 +0200
Subject: [PATCH 22/22] DOC: tweak release notes

---
 RELEASE_NOTES.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 477230c01..8bdd58081 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -12,7 +12,7 @@ FACET 2.1
 FACET 2.1 introduces the :class:`.NativeLearnerInspector` for inspecting native
 *scikit-learn* models and pipelines.
 
-We still recommend using *sklearndf* models and learner pipelines and FACET's
+We still recommend using :mod:`sklearndf` models and learner pipelines along with FACET's
 :class:`.LearnerSelector` for hyperparameter tuning; however the new
 :class:`.NativeLearnerInspector` can be useful for inspecting models that have been
 trained using *scikit-learn* directly.