diff --git a/doc/api.rst b/doc/api.rst
index 59238d77a..12182469e 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -150,6 +150,19 @@ tree models.
    PermutationForestClassifier
    PermutationForestRegressor
 
+Datasets
+------------------------------
+We provide some convenience functions for simulating datasets beyond
+those offered in scikit-learn.
+
+.. currentmodule:: sktree.datasets
+.. autosummary::
+   :toctree: generated/
+
+   make_gaussian_mixture
+   make_joint_factor_model
+   make_quadratic_classification
+
 
 Experimental Functionality
 --------------------------
@@ -160,6 +173,7 @@ We also include experimental functionality that is works in progress.
    :toctree: generated/
 
    mutual_info_ksg
+   conditional_resample
 
 We also include functions that help simulate and evaluate mutual information (MI)
 and conditional mutual information (CMI) estimators. Specifically, functions that
diff --git a/doc/conf.py b/doc/conf.py
index ac752d66a..6e2a78d23 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -244,6 +244,7 @@
     "fit",
     "apply",
     "TreeBuilder",
+    "joint_rank",
 }
 
 # validation
diff --git a/doc/whats_new/v0.4.rst b/doc/whats_new/v0.4.rst
index b7d489029..8e359a89f 100644
--- a/doc/whats_new/v0.4.rst
+++ b/doc/whats_new/v0.4.rst
@@ -16,6 +16,7 @@ Changelog
 - |API| ``FeatureImportanceForest*`` now has a hyperparameter to control the number of permutations is done per forest ``permute_per_forest_fraction``, by `Adam Li`_ (:pr:`145`)
 - |Enhancement| Add dataset generators for regression and classification and hypothesis testing, by `Adam Li`_ (:pr:`169`)
 - |Fix| Fixes a bug where ``FeatureImportanceForest*`` was unable to be run when calling ``statistic`` with ``covariate_index`` defined for MI, AUC metrics, by `Adam Li`_ (:pr:`164`)
+- |Enhancement| Add :func:`sktree.experimental.conditional_resample`, which allows conditional resampling of rows based on nearest-neighbors defined via a feature set, by `Adam Li`_ (:pr:`170`)
 
 Code and Documentation Contributors
 -----------------------------------
diff --git a/pyproject.toml b/pyproject.toml
index e03c45e1a..a5c2b6fdd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -218,7 +218,7 @@ profile = 'black'
 multi_line_output = 3
 line_length = 100
 py_version = 38
-extend_skip_glob = ['sktree/__init__.py', 'sktree/_lib/*', '.asv/*', 'env/*']
+extend_skip_glob = ['sktree/__init__.py', 'sktree/_lib/*', '.asv/*', 'env/*', 'build-install/*']
 
 [tool.pydocstyle]
 convention = 'numpy'
diff --git a/sktree/experimental/__init__.py b/sktree/experimental/__init__.py
index cdf4b4295..2934888fc 100644
--- a/sktree/experimental/__init__.py
+++ b/sktree/experimental/__init__.py
@@ -1,4 +1,5 @@
 from . import mutual_info, sdf, simulate
+from .monte_carlo import conditional_resample
 from .mutual_info import (
     cmi_from_entropy,
     cmi_gaussian,
diff --git a/sktree/experimental/meson.build b/sktree/experimental/meson.build
index 2be7ae9e8..2d1dde609 100644
--- a/sktree/experimental/meson.build
+++ b/sktree/experimental/meson.build
@@ -3,6 +3,7 @@ python_sources = [
   'mutual_info.py',
   'simulate.py',
   'sdf.py',
+  'monte_carlo.py',
 ]
 
 py3.install_sources(
diff --git a/sktree/experimental/monte_carlo.py b/sktree/experimental/monte_carlo.py
new file mode 100644
index 000000000..aab6a25cd
--- /dev/null
+++ b/sktree/experimental/monte_carlo.py
@@ -0,0 +1,218 @@
+from typing import Optional
+
+import numpy as np
+from numpy.typing import ArrayLike
+from scipy.sparse import issparse
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils import _approximate_mode, _safe_indexing, check_array, check_consistent_length
+
+
+def _conditional_shuffle(nbrs: ArrayLike, replace: bool = False, seed=None) -> ArrayLike:
+    """Compute a permutation of neighbors with restrictions.
+
+    Parameters
+    ----------
+    nbrs : ArrayLike of shape (n_samples, k)
+        The k-nearest-neighbors for each sample index. Each row corresponds to the
+        original sample. Each element corresponds to another sample index that is deemed
+        as the k-nearest neighbors with respect to the original sample.
+    replace : bool, optional
+        Whether or not to allow replacement of samples, by default False.
+    seed : int, optional
+        Random seed, by default None.
+
+    Returns
+    -------
+    restricted_perm : ArrayLike of shape (n_samples)
+        The final permutation order of the sample indices. There may be
+        repeating samples. See Notes for details.
+
+    Notes
+    -----
+    Restricted permutation goes through random samples and looks at the k-nearest
+    neighbors (columns of ``nbrs``) and shuffles the closest neighbor index only
+    if it has not been used to permute another sample. If it has been, then the
+    algorithm looks at the next nearest-neighbor and so on. If all k-nearest
+    neighbors of a sample has been checked, then a random neighbor is chosen. In this
+    manner, the algorithm tries to perform permutation without replacement, but
+    if necessary, will choose a repeating neighbor sample.
+    """
+    n_samples, k_dims = nbrs.shape
+    rng = np.random.default_rng(seed=seed)
+
+    # initialize the final permutation order
+    restricted_perm = np.zeros((n_samples,), dtype=np.intp)
+
+    # generate a random order of samples to go through
+    random_order = rng.permutation(n_samples)
+
+    # keep track of values we have already used
+    used = set()
+
+    # go through the random order
+    for idx in random_order:
+        if replace:
+            possible_nbrs = nbrs[idx, :]
+            restricted_perm[idx] = rng.choice(possible_nbrs, size=1).squeeze()
+        else:
+            m = 0
+            use_idx = nbrs[idx, m]
+
+            # if the current nbr is already used, continue incrementing
+            # until we have either found a new sample to use, or if
+            # we have reach the maximum number of shuffles to consider
+            while (use_idx in used) and (m < k_dims - 1):
+                m += 1
+                use_idx = nbrs[idx, m]
+
+            # check whether or not we have exhaustively checked all kNN
+            if use_idx in used and m == k_dims:
+                # XXX: Note this step is not in the original paper
+                # choose a random neighbor to permute
+                restricted_perm[idx] = rng.choice(nbrs[idx, :], size=1)
+            else:
+                # permute with the existing neighbor
+                restricted_perm[idx] = use_idx
+            used.add(use_idx)
+    return restricted_perm
+
+
+def conditional_resample(
+    conditional_array: ArrayLike,
+    *arrays,
+    nn_estimator=None,
+    replace: bool = True,
+    replace_nbrs: bool = True,
+    n_samples: Optional[int] = None,
+    random_state: Optional[int] = None,
+    stratify: Optional[ArrayLike] = None,
+):
+    """Conditionally resample arrays or sparse matrices in a consistent way.
+
+    The default strategy implements one step of the bootstrapping
+    procedure. Conditional resampling is a modification of the bootstrap
+    technique that preserves the conditional distribution of the data. This
+    is done by fitting a nearest neighbors estimator on the conditional array
+    and then resampling the nearest neighbors of each sample.
+
+    Parameters
+    ----------
+    conditional_array : array-like of shape (n_samples, n_features)
+        The array, which we preserve the conditional distribution of.
+
+    *arrays : sequence of array-like of shape (n_samples,) or \
+            (n_samples, n_outputs)
+        Indexable data-structures can be arrays, lists, dataframes or scipy
+        sparse matrices with consistent first dimension.
+
+    nn_estimator : estimator object, default=None
+        The nearest neighbors estimator to use. If None, then a
+        :class:`sklearn.neighbors.NearestNeighbors` instance is used.
+
+    replace : bool, default=True
+        Implements resampling with replacement. If False, this will implement
+        (sliced) random permutations. The replacement will take place at the level
+        of the sample index.
+
+    replace_nbrs : bool, default=True
+        Implements resampling with replacement at the level of the nearest neighbors.
+
+    n_samples : int, default=None
+        Number of samples to generate. If left to None this is
+        automatically set to the first dimension of the arrays.
+        If replace is False it should not be larger than the length of
+        arrays.
+
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation for shuffling
+        the data.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    stratify : array-like of shape (n_samples,) or (n_samples, n_outputs), \
+            default=None
+        If not None, data is split in a stratified fashion, using this as
+        the class labels.
+
+    Returns
+    -------
+    resampled_arrays : sequence of array-like of shape (n_samples,) or \
+            (n_samples, n_outputs)
+        Sequence of resampled copies of the collections. The original arrays
+        are not impacted.
+    """
+    max_n_samples = n_samples
+    rng = np.random.default_rng(random_state)
+
+    if len(arrays) == 0:
+        return None
+
+    first = arrays[0]
+    n_samples = first.shape[0] if hasattr(first, "shape") else len(first)
+
+    if max_n_samples is None:
+        max_n_samples = n_samples
+    elif (max_n_samples > n_samples) and (not replace):
+        raise ValueError(
+            f"Cannot sample {max_n_samples} out of arrays with dim "
+            f"{n_samples} when replace is False"
+        )
+
+    check_consistent_length(conditional_array, *arrays)
+
+    # fit nearest neighbors onto the conditional array
+    if nn_estimator is None:
+        nn_estimator = NearestNeighbors()
+    nn_estimator.fit(conditional_array)
+
+    if stratify is None:
+        if replace:
+            indices = rng.integers(0, n_samples, size=(max_n_samples,))
+        else:
+            indices = np.arange(n_samples)
+            rng.shuffle(indices)
+            indices = indices[:max_n_samples]
+    else:
+        # Code adapted from StratifiedShuffleSplit()
+        y = check_array(stratify, ensure_2d=False, dtype=None)
+        if y.ndim == 2:
+            # for multi-label y, map each distinct row to a string repr
+            # using join because str(row) uses an ellipsis if len(row) > 1000
+            y = np.array([" ".join(row.astype("str")) for row in y])
+
+        classes, y_indices = np.unique(y, return_inverse=True)
+        n_classes = classes.shape[0]
+
+        class_counts = np.bincount(y_indices)
+
+        # Find the sorted list of instances for each class:
+        # (np.unique above performs a sort, so code is O(n logn) already)
+        class_indices = np.split(
+            np.argsort(y_indices, kind="mergesort"), np.cumsum(class_counts)[:-1]
+        )
+
+        n_i = _approximate_mode(class_counts, max_n_samples, random_state)
+
+        indices = []
+
+        for i in range(n_classes):
+            indices_i = rng.choice(class_indices[i], n_i[i], replace=replace)
+            indices.extend(indices_i)
+
+        indices = rng.permutation(indices)
+
+    # now get the kNN indices for each sample (n_samples, n_neighbors)
+    sample_nbrs = nn_estimator.kneighbors(X=conditional_array[indices, :], return_distance=False)
+
+    # actually sample the indices using a conditional permutation
+    indices = _conditional_shuffle(sample_nbrs, replace=replace_nbrs, seed=rng)
+
+    # convert sparse matrices to CSR for row-based indexing
+    arrays_ = [a.tocsr() if issparse(a) else a for a in arrays]
+    resampled_arrays = [_safe_indexing(a, indices) for a in arrays_]
+
+    if len(resampled_arrays) == 1:
+        # syntactic sugar for the unit argument case
+        return resampled_arrays[0]
+    else:
+        return resampled_arrays
diff --git a/sktree/experimental/tests/meson.build b/sktree/experimental/tests/meson.build
index b5d1ef79c..c3fdd07c4 100644
--- a/sktree/experimental/tests/meson.build
+++ b/sktree/experimental/tests/meson.build
@@ -3,6 +3,7 @@ python_sources = [
   'test_mutual_info.py',
   'test_simulate.py',
   'test_sdf.py',
+  'test_monte_carlo.py',
 ]
 
 py3.install_sources(
diff --git a/sktree/experimental/tests/test_monte_carlo.py b/sktree/experimental/tests/test_monte_carlo.py
new file mode 100644
index 000000000..b76d59fb4
--- /dev/null
+++ b/sktree/experimental/tests/test_monte_carlo.py
@@ -0,0 +1,196 @@
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix
+from sklearn.datasets import make_classification
+from sklearn.neighbors import NearestNeighbors
+
+from sktree.experimental import conditional_resample
+
+
+def test_conditional_resample_with_default_params():
+    # Create a simple example dataset for testing
+    X, y = make_classification(n_samples=100, n_features=5, random_state=42)
+    # Create conditional array
+    nn = NearestNeighbors(n_neighbors=5)
+    nn.fit(X)
+    conditional_array = nn.kneighbors_graph(X).toarray()
+    # Test conditional resampling with default parameters
+    resampled_arrays = conditional_resample(
+        conditional_array, X, y, nn_estimator=NearestNeighbors()
+    )
+
+    # Check if the number of samples in resampled_arrays is the same as the input arrays
+    assert len(resampled_arrays) == 2
+    assert len(resampled_arrays[0]) == len(X)
+    assert len(resampled_arrays[1]) == len(y)
+
+
+def test_conditional_resample_without_replacement():
+    # Create a simple example dataset for testing
+    X, y = make_classification(n_samples=100, n_features=5, random_state=42)
+    # Create conditional array
+    nn = NearestNeighbors(n_neighbors=5)
+    nn.fit(X)
+    conditional_array = nn.kneighbors_graph(X).toarray()
+
+    # Test conditional resampling without replacement
+    resampled_arrays = conditional_resample(
+        conditional_array, X, y, nn_estimator=NearestNeighbors(), replace=False
+    )
+
+    # Check if the number of samples in resampled_arrays is the same as the input arrays
+    assert len(resampled_arrays) == 2
+    assert len(resampled_arrays[0]) == len(X)
+    assert len(resampled_arrays[1]) == len(y)
+
+    # Check if the samples are unique (no replacement)
+    assert len(np.unique(resampled_arrays[1])) == len(
+        np.unique(y)
+    ), f"{len(np.unique(resampled_arrays[1]))} != {len(y)}"
+
+
+def test_conditional_resample_with_sparse_matrix():
+    # Create a simple example dataset for testing
+    X, y = make_classification(n_samples=100, n_features=5, random_state=42)
+    X_sparse = csr_matrix(X)  # Convert X to a sparse matrix
+
+    # Create conditional array
+    nn = NearestNeighbors(n_neighbors=5)
+    nn.fit(X)
+    conditional_array = nn.kneighbors_graph(X).toarray()
+
+    # Test conditional resampling with a sparse matrix
+    resampled_arrays = conditional_resample(
+        conditional_array, X_sparse, y, nn_estimator=NearestNeighbors()
+    )
+
+    # Check if the number of samples in resampled_arrays is the same as the input arrays
+    assert len(resampled_arrays) == 2
+    assert resampled_arrays[0].shape[0] == len(X)
+    assert len(resampled_arrays[1]) == len(y)
+
+
+def test_conditional_resample_with_stratify():
+    # Create a simple example dataset for testing
+    X, y = make_classification(n_samples=100, n_features=5, random_state=42)
+
+    # Create conditional array
+    nn = NearestNeighbors(n_neighbors=5)
+    nn.fit(X)
+    conditional_array = nn.kneighbors_graph(X).toarray()
+
+    # Define a custom stratify function
+    def custom_stratify(y, category):
+        # Create an array where each entry is True if it belongs to the specified category,
+        # False otherwise
+        stratify_array = y == category
+        return stratify_array
+
+    category_to_stratify = 1  # Change this to the category you want to stratify
+
+    # Get the distribution of the specified category before resampling
+    category_distribution_before = np.sum(y == category_to_stratify)
+
+    # Test conditional resampling with the custom stratify function
+    stratify = custom_stratify(y, category_to_stratify)
+    resampled_arrays = conditional_resample(
+        conditional_array, X, y, nn_estimator=NearestNeighbors(), stratify=stratify, random_state=0
+    )
+
+    # Get the distribution of the specified category after resampling
+    category_distribution_after = np.sum(resampled_arrays[1] == category_to_stratify)
+
+    # Check if the distribution of the specified category is preserved
+    assert category_distribution_before == category_distribution_after, (
+        f"Expected {category_distribution_before} samples, got "
+        f"{category_distribution_after} samples"
+    )
+
+
+def test_conditional_resample_with_replace_nbrs():
+    # Create a simple example dataset for testing
+    X, y = make_classification(n_samples=100, n_features=5, random_state=42)
+
+    # Create conditional array
+    nn = NearestNeighbors(n_neighbors=5)
+    nn.fit(X)
+    conditional_array = nn.kneighbors_graph(X).toarray()
+
+    # Test conditional resampling with replace_nbrs=False
+    resampled_arrays = conditional_resample(
+        conditional_array, X, y, nn_estimator=NearestNeighbors(), replace_nbrs=False
+    )
+
+    # Check if the number of samples in resampled_arrays is the same as the input arrays
+    assert len(resampled_arrays) == 2, f"Expected 2 arrays, got {len(resampled_arrays)} arrays"
+    assert len(resampled_arrays[0]) == len(X)
+    assert len(resampled_arrays[1]) == len(y)
+
+
+def test_conditional_resample_errors():
+    # 01: Test with invalid number of samples
+    # Create a simple example dataset for testing
+    X, y = make_classification(n_samples=100, n_features=5, random_state=42)
+
+    # Test conditional resampling with an invalid stratify array (should raise an error)
+    with pytest.raises(ValueError, match="Cannot sample"):
+        conditional_resample(X, y, nn_estimator=NearestNeighbors(), replace=False, n_samples=1000)
+
+    # 02: Test inconsistent_length
+    # Create an additional array with a different number of samples
+    additional_array = np.random.rand(80, 5)
+
+    # Test conditional resampling with inconsistent length of input arrays (should raise an error)
+    with pytest.raises(ValueError):
+        conditional_resample(X, y, additional_array, nn_estimator=NearestNeighbors())
+
+    # 03: Test with invalid sample size when replace=False
+    # Test conditional resampling with n_samples larger than the input arrays
+    # (should raise an error)
+    with pytest.raises(ValueError):
+        conditional_resample(X, y, nn_estimator=NearestNeighbors(), n_samples=200, replace=False)
+
+
+def test_conditional_resample():
+    # Generate synthetic data
+    X, y = make_classification(n_samples=100, n_features=5, random_state=42)
+
+    # Convert X to sparse matrix
+    X_sparse = csr_matrix(X)
+
+    # Create conditional array
+    nn = NearestNeighbors(n_neighbors=5)
+    nn.fit(X)
+    conditional_array = nn.kneighbors_graph(X).toarray()
+
+    # Perform conditional resampling
+    resampled_X = conditional_resample(conditional_array, X, replace=False, replace_nbrs=False)
+    resampled_X_sparse = conditional_resample(
+        conditional_array, X_sparse, replace=False, replace_nbrs=False
+    )
+
+    # Check that the resampled arrays have the correct shape
+    assert resampled_X.shape == X.shape
+    assert resampled_X_sparse.shape == X_sparse.shape
+
+    # Check that the resampled arrays have the correct number of unique samples
+    assert len(np.unique(resampled_X, axis=0)) == X.shape[0]
+    assert len(np.unique(resampled_X_sparse.toarray(), axis=0)) == X_sparse.shape[0]
+
+    # Check that the conditional distribution is preserved
+    for i in range(X.shape[1]):
+        unique_values, counts = np.unique(resampled_X[:, i], return_counts=True)
+        original_values, original_counts = np.unique(X[:, i], return_counts=True)
+
+        assert np.all(unique_values == original_values)
+        assert np.all(counts == original_counts)
+
+        unique_values_sparse, counts_sparse = np.unique(
+            resampled_X_sparse[:, i].toarray(), return_counts=True
+        )
+        original_values_sparse, original_counts_sparse = np.unique(
+            X_sparse[:, i].toarray(), return_counts=True
+        )
+
+        assert np.all(unique_values_sparse == original_values_sparse)
+        assert np.all(counts_sparse == original_counts_sparse)
diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index b21ebd3aa..abf2a4bad 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -382,11 +382,15 @@ def statistic(
             self.permuted_estimator_ = self._get_estimator()
             estimator = self.permuted_estimator_
 
-            if not hasattr(self, "estimator_") or self.estimator_ is None:
+            if not hasattr(self, "estimator_"):
                 self.estimator_ = self._get_estimator()
 
-                # Ensure that the estimator_ is fitted at least
-                if not _is_fitted(self.estimator_) and is_classifier(self.estimator_):
+                # XXX: this can be improved as an extra fit can be avoided, by
+                # just doing error-checking
+                # and then setting the internal meta data structures
+                # first run a dummy fit on the samples to initialize the
+                # internal data structure of the forest
+                if is_classifier(self.estimator_):
                     _unique_y = []
                     for axis in range(y.shape[1]):
                         _unique_y.append(np.unique(y[:, axis]))
@@ -395,20 +399,20 @@ def statistic(
                         unique_y = unique_y.ravel()
                     X_dummy = np.zeros((unique_y.shape[0], X.shape[1]))
                     self.estimator_.fit(X_dummy, unique_y)
-                elif not _is_fitted(estimator):
+                else:
                     if y.ndim > 1 and y.shape[1] == 1:
                         self.estimator_.fit(X[:2], y[:2].ravel())
                     else:
                         self.estimator_.fit(X[:2], y[:2])
 
         # Store a cache of the y variable
-        if is_classifier(self._get_estimator()):
+        if is_classifier(estimator):
             self._y = y.copy()
 
-        # XXX: this can be improved as an extra fit can be avoided, by just doing error-checking
-        # and then setting the internal meta data structures
-        # first run a dummy fit on the samples to initialize the
-        # internal data structure of the forest
+        # # XXX: this can be improved as an extra fit can be avoided, by just doing error-checking
+        # # and then setting the internal meta data structures
+        # # first run a dummy fit on the samples to initialize the
+        # # internal data structure of the forest
         if not _is_fitted(estimator) and is_classifier(estimator):
             _unique_y = []
             for axis in range(y.shape[1]):
diff --git a/sktree/stats/tests/test_coleman.py b/sktree/stats/tests/test_coleman.py
index 843db417d..23904f0d5 100644
--- a/sktree/stats/tests/test_coleman.py
+++ b/sktree/stats/tests/test_coleman.py
@@ -79,10 +79,11 @@
             {
                 "estimator": RandomForestRegressor(
                     max_features="sqrt",
-                    n_estimators=125,
+                    n_estimators=250,
                     n_jobs=-1,
+                    random_state=rng.integers(0, 1000),
                 ),
-                # "random_state": seed,
+                "random_state": seed,
                 "permute_forest_fraction": 0.5,
                 "sample_dataset_per_tree": False,
             },
@@ -167,6 +168,7 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size)
                     n_jobs=-1,
                 ),
                 "sample_dataset_per_tree": False,
+                "random_state": seed,
             },
             600,  # n_samples
             1000,  # n_repeats
@@ -197,9 +199,11 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size)
                     max_features="sqrt",
                     n_estimators=200,
                     n_jobs=-1,
+                    random_state=seed,
                 ),
                 "permute_forest_fraction": 0.5,
                 "sample_dataset_per_tree": False,
+                "random_state": seed,
             },
             600,  # n_samples
             1000,  # n_repeats
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index da5285685..74e446cf3 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -58,8 +58,11 @@ def test_featureimportance_forest_permute_pertree(sample_dataset_per_tree):
     ), f"{len(est.train_test_samples_[0][1])} {n_samples * est.test_size}"
     assert len(est.train_test_samples_[0][0]) == est._n_samples_ - n_samples * est.test_size
 
+    # covariate index should work with mse
+    est.reset()
+    est.statistic(iris_X[:n_samples], iris_y[:n_samples], covariate_index=[1], metric="mse")
     with pytest.raises(RuntimeError, match="Metric must be"):
-        est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
+        est.statistic(iris_X[:n_samples], iris_y[:n_samples], covariate_index=[1], metric="mi")
 
     # covariate index should work with mse
     est.reset()