From ba6a41bbcac26c75b926503df93b0b5eb54ba2d2 Mon Sep 17 00:00:00 2001
From: Phil Schaf <flying-sheep@web.de>
Date: Tue, 7 Apr 2026 17:06:26 +0200
Subject: [PATCH] chore!: remove `use_highly_variable`

---
 src/scanpy/experimental/pp/_normalization.py | 25 ++++----
 src/scanpy/preprocessing/_docs.py            |  9 +--
 src/scanpy/preprocessing/_pca/__init__.py    | 63 +++-----------------
 src/scanpy/tools/_ingest.py                  | 15 +++--
 src/testing/scanpy/_pytest/marks.py          |  8 ---
 tests/test_deprecations.py                   |  9 ---
 tests/test_highly_variable_genes.py          |  5 +-
 tests/test_normalization.py                  | 27 +++------
 tests/test_pca.py                            | 18 ++----
 9 files changed, 44 insertions(+), 135 deletions(-)

diff --git a/src/scanpy/experimental/pp/_normalization.py b/src/scanpy/experimental/pp/_normalization.py
index cd6229d7cf..551df22be4 100644
--- a/src/scanpy/experimental/pp/_normalization.py
+++ b/src/scanpy/experimental/pp/_normalization.py
@@ -20,9 +20,9 @@
     doc_layer,
     doc_pca_chunk,
 )
-from ...get import _get_obs_rep, _set_obs_rep
-from ...preprocessing._docs import doc_mask_var_hvg
-from ...preprocessing._pca import _handle_mask_var, pca
+from ...get import _check_mask, _get_obs_rep, _set_obs_rep
+from ...preprocessing._docs import doc_mask_var
+from ...preprocessing._pca import pca
 
 if TYPE_CHECKING:
     from collections.abc import Mapping
@@ -158,7 +158,7 @@ def normalize_pearson_residuals(
     adata=doc_adata,
     dist_params=doc_dist_params,
     pca_chunk=doc_pca_chunk,
-    mask_var_hvg=doc_mask_var_hvg,
+    mask_var=doc_mask_var,
     check_values=doc_check_values,
     inplace=doc_inplace,
 )
@@ -171,8 +171,9 @@ def normalize_pearson_residuals_pca(
     n_comps: int | None = 50,
     rng: SeedLike | RNGLike | None = None,
     kwargs_pca: Mapping[str, Any] = MappingProxyType({}),
-    mask_var: np.ndarray | str | None | Default = Default("'highly_variable'"),
-    use_highly_variable: bool | None = None,
+    mask_var: np.ndarray | str | None | Default = Default(
+        "adata.var.get('highly_variable')"
+    ),
     check_values: bool = True,
     inplace: bool = True,
 ) -> AnnData | None:
@@ -190,7 +191,7 @@ def normalize_pearson_residuals_pca(
     {adata}
     {dist_params}
     {pca_chunk}
-    {mask_var_hvg}
+    {mask_var}
     {check_values}
     {inplace}
 
@@ -211,7 +212,7 @@ def normalize_pearson_residuals_pca(
         residual normalization.
     `.varm['PCs']`
         The principal components containing the loadings. When `inplace=True` and
-        `use_highly_variable=True`, this will contain empty rows for the genes not
+        `mask_var is not None`, this will contain empty rows for the genes not
         selected.
     `.uns['pca']['variance_ratio']`
         Ratio of explained variance.
@@ -219,11 +220,9 @@ def normalize_pearson_residuals_pca(
         Explained variance, equivalent to the eigenvalues of the covariance matrix.
 
     """
-    # Unify new mask argument and deprecated use_highly_varible argument
-    _, mask_var = _handle_mask_var(
-        adata, mask_var, use_highly_variable=use_highly_variable
-    )
-    del use_highly_variable
+    if isinstance(mask_var, Default):
+        mask_var = "highly_variable" if "highly_variable" in adata.var else None
+    mask_var = _check_mask(adata, mask_var, "var")
 
     if mask_var is not None:
         adata_sub = adata[:, mask_var].copy()
diff --git a/src/scanpy/preprocessing/_docs.py b/src/scanpy/preprocessing/_docs.py
index aa1378bd67..132284c104 100644
--- a/src/scanpy/preprocessing/_docs.py
+++ b/src/scanpy/preprocessing/_docs.py
@@ -15,18 +15,11 @@
     If True, use `adata.raw.X` for expression values instead of `adata.X`.\
 """
 
-doc_mask_var_hvg = """\
+doc_mask_var = """\
 mask_var
     To run only on a certain set of genes given by a boolean array
     or a string referring to an array in :attr:`~anndata.AnnData.var`.
     By default, uses `.var['highly_variable']` if available, else everything.
-use_highly_variable
-    Whether to use highly variable genes only, stored in
-    `.var['highly_variable']`.
-    By default uses them if they have been determined beforehand.
-
-    .. deprecated:: 1.10.0
-       Use `mask_var` instead
 """
 
 doc_obs_qc_args = """\
diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py
index 1cdb20bf1a..70511c5373 100644
--- a/src/scanpy/preprocessing/_pca/__init__.py
+++ b/src/scanpy/preprocessing/_pca/__init__.py
@@ -13,7 +13,7 @@
 from ..._utils import _doc_params, get_literal_vals, is_backed_type
 from ..._utils.random import _accepts_legacy_random_state, _legacy_random_state
 from ...get import _check_mask, _get_obs_rep
-from .._docs import doc_mask_var_hvg
+from .._docs import doc_mask_var
 from ._compat import _pca_compat_sparse
 
 if TYPE_CHECKING:
@@ -51,7 +51,7 @@
 type SvdSolver = SvdSolvDaskML | SvdSolvSkearn | SvdSolvPCACustom
 
 
-@_doc_params(mask_var_hvg=doc_mask_var_hvg, rng=doc_rng)
+@_doc_params(mask_var=doc_mask_var, rng=doc_rng)
 @_accepts_legacy_random_state(0)
 def pca(  # noqa: PLR0912, PLR0913, PLR0915
     data: AnnData | np.ndarray | CSBase,
@@ -65,8 +65,9 @@ def pca(  # noqa: PLR0912, PLR0913, PLR0915
     chunk_size: int | None = None,
     rng: SeedLike | RNGLike | None = None,
     return_info: bool = False,
-    mask_var: NDArray[np.bool] | str | None | Default = Default("'highly_variable'"),
-    use_highly_variable: bool | None = None,
+    mask_var: NDArray[np.bool] | str | None | Default = Default(
+        "adata.var.get('highly_variable')"
+    ),
     dtype: DTypeLike = "float32",
     key_added: str | None | Default = Default(preset=("pca", "key_added")),
     copy: bool = False,
@@ -160,7 +161,7 @@ def pca(  # noqa: PLR0912, PLR0913, PLR0915
     return_info
         Only relevant when not passing an :class:`~anndata.AnnData`:
         see “Returns”.
-    {mask_var_hvg}
+    {mask_var}
     layer
         Layer of `adata` to use as expression values.
     dtype
@@ -224,11 +225,9 @@ def pca(  # noqa: PLR0912, PLR0913, PLR0915
     else:
         adata = AnnData(data)
 
-    # Unify new mask argument and deprecated use_highly_varible argument
-    mask_var_param, mask_var = _handle_mask_var(
-        adata, mask_var, obsm=obsm, use_highly_variable=use_highly_variable
-    )
-    del use_highly_variable
+    if isinstance(mask_var, Default):
+        mask_var = "highly_variable" if "highly_variable" in adata.var else None
+    mask_var_param, mask_var = mask_var, _check_mask(adata, mask_var, "var")
     adata_comp = adata[:, mask_var] if mask_var is not None else adata
 
     if n_comps is None:
@@ -355,7 +354,6 @@ def pca(  # noqa: PLR0912, PLR0913, PLR0915
         adata.uns[key_uns] = dict(
             params=dict(
                 zero_center=zero_center,
-                use_highly_variable=mask_var_param == "highly_variable",
                 mask_var=mask_var_param,
                 **(dict(layer=layer) if layer is not None else {}),
                 **(dict(obsm=obsm) if obsm is not None else {}),
@@ -387,49 +385,6 @@ def pca(  # noqa: PLR0912, PLR0913, PLR0915
             return x_pca
 
 
-def _handle_mask_var(
-    adata: AnnData,
-    mask_var: NDArray[np.bool] | str | None | Default,
-    *,
-    obsm: str | None = None,
-    use_highly_variable: bool | None,
-) -> tuple[np.ndarray | str | None, np.ndarray | None]:
-    """Unify new mask argument and deprecated use_highly_varible argument.
-
-    Returns both the normalized mask parameter and the validated mask array.
-    """
-    if obsm:
-        if not isinstance(mask_var, Default) and mask_var is not None:
-            msg = "Argument `mask_var` is incompatible with `obsm`."
-            raise ValueError(msg)
-        return None, None
-
-    # First, verify and possibly warn
-    if use_highly_variable is not None:
-        hint = (
-            'Use_highly_variable=True can be called through mask_var="highly_variable". '
-            "Use_highly_variable=False can be called through mask_var=None"
-        )
-        msg = f"Argument `use_highly_variable` is deprecated, consider using the mask argument. {hint}"
-        warn(msg, FutureWarning)
-        if not isinstance(mask_var, Default):
-            msg = f"These arguments are incompatible. {hint}"
-            raise ValueError(msg)
-
-    # Handle default case and explicit use_highly_variable=True
-    if use_highly_variable or (
-        use_highly_variable is None
-        and isinstance(mask_var, Default)
-        and "highly_variable" in adata.var.columns
-    ):
-        mask_var = "highly_variable"
-
-    # Without highly variable genes, we don’t use a mask by default
-    if isinstance(mask_var, Default) or mask_var is None:
-        return None, None
-    return mask_var, _check_mask(adata, mask_var, "var")
-
-
 @overload
 def _handle_dask_ml_args(
     svd_solver: str | None, method: type[dmld.PCA | dmld.IncrementalPCA]
diff --git a/src/scanpy/tools/_ingest.py b/src/scanpy/tools/_ingest.py
index 54556f229d..02566383d5 100644
--- a/src/scanpy/tools/_ingest.py
+++ b/src/scanpy/tools/_ingest.py
@@ -231,7 +231,7 @@ class Ingest:
     _umap: UMAP
     # pca
     _pca_centered: bool
-    _pca_use_hvg: bool
+    _pca_mask: str | None
     _pca_basis: np.ndarray
     # adata
     _adata_ref: AnnData
@@ -338,15 +338,14 @@ def _init_neighbors(self, adata: AnnData, neighbors_key: str | None) -> None:
 
     def _init_pca(self, adata: AnnData) -> None:
         self._pca_centered = adata.uns["pca"]["params"]["zero_center"]
-        self._pca_use_hvg = adata.uns["pca"]["params"]["use_highly_variable"]
+        self._pca_mask = adata.uns["pca"]["params"]["mask_var"]
 
-        mask = "highly_variable"
-        if self._pca_use_hvg and mask not in adata.var.columns:
-            msg = f"Did not find `adata.var[{mask!r}']`."
+        if self._pca_mask and self._pca_mask not in adata.var.columns:
+            msg = f"Did not find `adata.var[{self._pca_mask!r}']`."
             raise ValueError(msg)
 
-        if self._pca_use_hvg:
-            self._pca_basis = adata.varm["PCs"][adata.var[mask]]
+        if self._pca_mask:
+            self._pca_basis = adata.varm["PCs"][adata.var[self._pca_mask]]
         else:
             self._pca_basis = adata.varm["PCs"]
 
@@ -402,7 +401,7 @@ def __init__(
     def _pca(self, n_pcs=None):
         x = self._adata_new.X
         x = x.toarray() if isinstance(x, CSBase) else x.copy()
-        if self._pca_use_hvg:
+        if self._pca_mask:
             x = x[:, self._adata_ref.var["highly_variable"]]
         if self._pca_centered:
             x -= x.mean(axis=0)
diff --git a/src/testing/scanpy/_pytest/marks.py b/src/testing/scanpy/_pytest/marks.py
index 8b25f0457d..1e83404614 100644
--- a/src/testing/scanpy/_pytest/marks.py
+++ b/src/testing/scanpy/_pytest/marks.py
@@ -1,11 +1,9 @@
 from __future__ import annotations
 
 from enum import Enum, auto
-from importlib.metadata import version
 from importlib.util import find_spec
 
 import pytest
-from packaging.version import Version
 
 
 class QuietMarkDecorator(pytest.MarkDecorator):
@@ -71,9 +69,3 @@ def skip_reason(self) -> str | None:
         if self._name_.casefold() != self.mod.casefold().replace("-", "_"):
             reason = f"{reason} (`pip install {self.mod}`)"
         return reason
-
-
-# TODO: remove once https://github.com/numba/numba/issues/10319 is fixed
-skip_numba_0_63 = pytest.mark.skipif(
-    Version(version=version("numba")) >= Version("0.63b0"), reason="numba 0.63 bug"
-)
diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py
index 925979bcb6..3a46b6b7e3 100644
--- a/tests/test_deprecations.py
+++ b/tests/test_deprecations.py
@@ -19,12 +19,3 @@ def test_deprecate_multicore_tsne() -> None:
         pytest.warns(ImportWarning, match=r"MulticoreTSNE"),
     ):
         sc.tl.tsne(pbmc, use_fast_tsne=True)
-
-
-def test_deprecate_use_highly_variable_genes():
-    pbmc = pbmc68k_reduced()
-
-    with pytest.warns(
-        FutureWarning, match="Argument `use_highly_variable` is deprecated"
-    ):
-        sc.pp.pca(pbmc, use_highly_variable=True)
diff --git a/tests/test_highly_variable_genes.py b/tests/test_highly_variable_genes.py
index c07c6e6a23..f021ee8584 100644
--- a/tests/test_highly_variable_genes.py
+++ b/tests/test_highly_variable_genes.py
@@ -18,7 +18,7 @@
 from scanpy._compat import CSRBase
 from testing.scanpy._helpers import _check_check_values_warnings
 from testing.scanpy._helpers.data import pbmc3k, pbmc68k_reduced
-from testing.scanpy._pytest.marks import needs, skip_numba_0_63
+from testing.scanpy._pytest.marks import needs
 from testing.scanpy._pytest.params import ARRAY_TYPES
 
 if TYPE_CHECKING:
@@ -165,7 +165,6 @@ def _check_pearson_hvg_columns(output_df: pd.DataFrame, n_top_genes: int):
     assert np.nanmax(output_df["highly_variable_rank"].to_numpy()) <= n_top_genes - 1
 
 
-@skip_numba_0_63
 def test_pearson_residuals_inputchecks(
     pbmc3k_parametrized_small: Callable[[], AnnData],
 ) -> None:
@@ -202,7 +201,6 @@ def test_pearson_residuals_inputchecks(
         )
 
 
-@skip_numba_0_63
 @pytest.mark.parametrize("subset", [True, False], ids=["subset", "full"])
 @pytest.mark.parametrize(
     "clip", [None, np.inf, 30], ids=["noclip", "infclip", "30clip"]
@@ -296,7 +294,6 @@ def test_pearson_residuals_general(
     _check_pearson_hvg_columns(output_df, n_top_genes)
 
 
-@skip_numba_0_63
 @pytest.mark.parametrize("subset", [True, False], ids=["subset", "full"])
 @pytest.mark.parametrize("n_top_genes", [100, 200], ids=["100n", "200n"])
 def test_pearson_residuals_batch(
diff --git a/tests/test_normalization.py b/tests/test_normalization.py
index 452b628e58..50347c8ab9 100644
--- a/tests/test_normalization.py
+++ b/tests/test_normalization.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from contextlib import nullcontext
 from functools import partial
 from typing import TYPE_CHECKING
 
@@ -18,7 +17,6 @@
     check_rep_mutation,
     check_rep_results,
 )
-from testing.scanpy._pytest.marks import skip_numba_0_63
 
 # TODO: Add support for sparse-in-dask
 from testing.scanpy._pytest.params import ARRAY_TYPES, ARRAY_TYPES_DENSE
@@ -210,7 +208,6 @@ def _check_pearson_pca_fields(ad, n_cells, n_comps):
     ), "Wrong shape of PCA output in `X_pca`"
 
 
-@skip_numba_0_63
 @pytest.mark.parametrize("n_hvgs", [100, 200])
 @pytest.mark.parametrize("n_comps", [30, 50])
 @pytest.mark.parametrize(
@@ -218,9 +215,7 @@ def _check_pearson_pca_fields(ad, n_cells, n_comps):
     [
         pytest.param(False, dict(), "n_genes", id="no_hvg"),
         pytest.param(True, dict(), "n_hvgs", id="hvg_default"),
-        pytest.param(
-            True, dict(use_highly_variable=False), "n_genes", id="hvg_opt_out"
-        ),
+        pytest.param(True, dict(mask_var=None), "n_genes", id="hvg_opt_out"),
         pytest.param(False, dict(mask_var="test_mask"), "n_unmasked", id="mask"),
     ],
 )
@@ -247,19 +242,14 @@ def test_normalize_pearson_residuals_pca(
             adata, flavor="pearson_residuals", n_top_genes=n_hvgs
         )
 
-    ctx = (
-        pytest.warns(FutureWarning, match=r"use_highly_variable.*deprecated")
-        if "use_highly_variable" in params
-        else nullcontext()
+    # inplace=False
+    adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca(
+        adata.copy(), inplace=False, n_comps=n_comps, **params
+    )
+    # inplace=True modifies the input adata object
+    sc.experimental.pp.normalize_pearson_residuals_pca(
+        adata, inplace=True, n_comps=n_comps, **params
     )
-    with ctx:  # inplace=False
-        adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca(
-            adata.copy(), inplace=False, n_comps=n_comps, **params
-        )
-    with ctx:  # inplace=True modifies the input adata object
-        sc.experimental.pp.normalize_pearson_residuals_pca(
-            adata, inplace=True, n_comps=n_comps, **params
-        )
 
     for ad, n_var_ret in (
         (adata_pca, n_var_copy),
@@ -283,7 +273,6 @@ def test_normalize_pearson_residuals_pca(
     np.testing.assert_array_equal(adata.obsm["X_pca"], adata_pca.obsm["X_pca"])
 
 
-@skip_numba_0_63
 @pytest.mark.parametrize("n_hvgs", [100, 200])
 @pytest.mark.parametrize("n_comps", [30, 50])
 def test_normalize_pearson_residuals_recipe(
diff --git a/tests/test_pca.py b/tests/test_pca.py
index 7444a737bd..0acd78b860 100644
--- a/tests/test_pca.py
+++ b/tests/test_pca.py
@@ -403,20 +403,14 @@ def test_pca_n_pcs():
 # We use all possible array types here since this error should be raised before
 # PCA can realize that it got a Dask array
 @pytest.mark.parametrize("array_type", ARRAY_TYPES_ALL)
-def test_mask_highly_var_error(array_type):
-    """Check if use_highly_variable=True throws an error if the annotation is missing."""
+def test_mask_var_error(array_type):
+    """Check if mask_var="..." throws an error if the annotation is missing."""
     adata = AnnData(array_type(A_list).astype("float32"))
-    with (
-        pytest.warns(
-            FutureWarning,
-            match=r"Argument `use_highly_variable` is deprecated, consider using the mask argument\.",
-        ),
-        pytest.raises(
-            ValueError,
-            match=r"Did not find `adata\.var\['highly_variable'\]`\.",
-        ),
+    with pytest.raises(
+        ValueError,
+        match=r"Did not find `adata\.var\['highly_variable'\]`\.",
     ):
-        sc.pp.pca(adata, use_highly_variable=True)
+        sc.pp.pca(adata, mask_var="highly_variable")
 
 
 def test_mask_length_error():