From ba6a41bbcac26c75b926503df93b0b5eb54ba2d2 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Tue, 7 Apr 2026 17:06:26 +0200 Subject: [PATCH] chore!: remove `use_highly_variable` --- src/scanpy/experimental/pp/_normalization.py | 25 ++++---- src/scanpy/preprocessing/_docs.py | 9 +-- src/scanpy/preprocessing/_pca/__init__.py | 63 +++----------------- src/scanpy/tools/_ingest.py | 15 +++-- src/testing/scanpy/_pytest/marks.py | 8 --- tests/test_deprecations.py | 9 --- tests/test_highly_variable_genes.py | 5 +- tests/test_normalization.py | 27 +++------ tests/test_pca.py | 18 ++---- 9 files changed, 44 insertions(+), 135 deletions(-) diff --git a/src/scanpy/experimental/pp/_normalization.py b/src/scanpy/experimental/pp/_normalization.py index cd6229d7cf..551df22be4 100644 --- a/src/scanpy/experimental/pp/_normalization.py +++ b/src/scanpy/experimental/pp/_normalization.py @@ -20,9 +20,9 @@ doc_layer, doc_pca_chunk, ) -from ...get import _get_obs_rep, _set_obs_rep -from ...preprocessing._docs import doc_mask_var_hvg -from ...preprocessing._pca import _handle_mask_var, pca +from ...get import _check_mask, _get_obs_rep, _set_obs_rep +from ...preprocessing._docs import doc_mask_var +from ...preprocessing._pca import pca if TYPE_CHECKING: from collections.abc import Mapping @@ -158,7 +158,7 @@ def normalize_pearson_residuals( adata=doc_adata, dist_params=doc_dist_params, pca_chunk=doc_pca_chunk, - mask_var_hvg=doc_mask_var_hvg, + mask_var=doc_mask_var, check_values=doc_check_values, inplace=doc_inplace, ) @@ -171,8 +171,9 @@ def normalize_pearson_residuals_pca( n_comps: int | None = 50, rng: SeedLike | RNGLike | None = None, kwargs_pca: Mapping[str, Any] = MappingProxyType({}), - mask_var: np.ndarray | str | None | Default = Default("'highly_variable'"), - use_highly_variable: bool | None = None, + mask_var: np.ndarray | str | None | Default = Default( + "adata.var.get('highly_variable')" + ), check_values: bool = True, inplace: bool = True, ) -> AnnData | None: @@ -190,7 +191,7 @@ def normalize_pearson_residuals_pca( {adata} {dist_params} {pca_chunk} - {mask_var_hvg} + {mask_var} {check_values} {inplace} @@ -211,7 +212,7 @@ def normalize_pearson_residuals_pca( residual normalization. `.varm['PCs']` The principal components containing the loadings. When `inplace=True` and - `use_highly_variable=True`, this will contain empty rows for the genes not + `mask_var is not None`, this will contain empty rows for the genes not selected. `.uns['pca']['variance_ratio']` Ratio of explained variance. @@ -219,11 +220,9 @@ def normalize_pearson_residuals_pca( Explained variance, equivalent to the eigenvalues of the covariance matrix. """ - # Unify new mask argument and deprecated use_highly_varible argument - _, mask_var = _handle_mask_var( - adata, mask_var, use_highly_variable=use_highly_variable - ) - del use_highly_variable + if isinstance(mask_var, Default): + mask_var = "highly_variable" if "highly_variable" in adata.var else None + mask_var = _check_mask(adata, mask_var, "var") if mask_var is not None: adata_sub = adata[:, mask_var].copy() diff --git a/src/scanpy/preprocessing/_docs.py b/src/scanpy/preprocessing/_docs.py index aa1378bd67..132284c104 100644 --- a/src/scanpy/preprocessing/_docs.py +++ b/src/scanpy/preprocessing/_docs.py @@ -15,18 +15,11 @@ If True, use `adata.raw.X` for expression values instead of `adata.X`.\ """ -doc_mask_var_hvg = """\ +doc_mask_var = """\ mask_var To run only on a certain set of genes given by a boolean array or a string referring to an array in :attr:`~anndata.AnnData.var`. By default, uses `.var['highly_variable']` if available, else everything. -use_highly_variable - Whether to use highly variable genes only, stored in - `.var['highly_variable']`. - By default uses them if they have been determined beforehand. - - .. deprecated:: 1.10.0 - Use `mask_var` instead """ doc_obs_qc_args = """\ diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index 1cdb20bf1a..70511c5373 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -13,7 +13,7 @@ from ..._utils import _doc_params, get_literal_vals, is_backed_type from ..._utils.random import _accepts_legacy_random_state, _legacy_random_state from ...get import _check_mask, _get_obs_rep -from .._docs import doc_mask_var_hvg +from .._docs import doc_mask_var from ._compat import _pca_compat_sparse if TYPE_CHECKING: @@ -51,7 +51,7 @@ type SvdSolver = SvdSolvDaskML | SvdSolvSkearn | SvdSolvPCACustom -@_doc_params(mask_var_hvg=doc_mask_var_hvg, rng=doc_rng) +@_doc_params(mask_var=doc_mask_var, rng=doc_rng) @_accepts_legacy_random_state(0) def pca( # noqa: PLR0912, PLR0913, PLR0915 data: AnnData | np.ndarray | CSBase, @@ -65,8 +65,9 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 chunk_size: int | None = None, rng: SeedLike | RNGLike | None = None, return_info: bool = False, - mask_var: NDArray[np.bool] | str | None | Default = Default("'highly_variable'"), - use_highly_variable: bool | None = None, + mask_var: NDArray[np.bool] | str | None | Default = Default( + "adata.var.get('highly_variable')" + ), dtype: DTypeLike = "float32", key_added: str | None | Default = Default(preset=("pca", "key_added")), copy: bool = False, @@ -160,7 +161,7 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 return_info Only relevant when not passing an :class:`~anndata.AnnData`: see “Returns”. - {mask_var_hvg} + {mask_var} layer Layer of `adata` to use as expression values. dtype @@ -224,11 +225,9 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 else: adata = AnnData(data) - # Unify new mask argument and deprecated use_highly_varible argument - mask_var_param, mask_var = _handle_mask_var( - adata, mask_var, obsm=obsm, use_highly_variable=use_highly_variable - ) - del use_highly_variable + if isinstance(mask_var, Default): + mask_var = "highly_variable" if "highly_variable" in adata.var else None + mask_var_param, mask_var = mask_var, _check_mask(adata, mask_var, "var") adata_comp = adata[:, mask_var] if mask_var is not None else adata if n_comps is None: @@ -355,7 +354,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 adata.uns[key_uns] = dict( params=dict( zero_center=zero_center, - use_highly_variable=mask_var_param == "highly_variable", mask_var=mask_var_param, **(dict(layer=layer) if layer is not None else {}), **(dict(obsm=obsm) if obsm is not None else {}), @@ -387,49 +385,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 return x_pca -def _handle_mask_var( - adata: AnnData, - mask_var: NDArray[np.bool] | str | None | Default, - *, - obsm: str | None = None, - use_highly_variable: bool | None, -) -> tuple[np.ndarray | str | None, np.ndarray | None]: - """Unify new mask argument and deprecated use_highly_varible argument. - - Returns both the normalized mask parameter and the validated mask array. - """ - if obsm: - if not isinstance(mask_var, Default) and mask_var is not None: - msg = "Argument `mask_var` is incompatible with `obsm`." - raise ValueError(msg) - return None, None - - # First, verify and possibly warn - if use_highly_variable is not None: - hint = ( - 'Use_highly_variable=True can be called through mask_var="highly_variable". ' - "Use_highly_variable=False can be called through mask_var=None" - ) - msg = f"Argument `use_highly_variable` is deprecated, consider using the mask argument. {hint}" - warn(msg, FutureWarning) - if not isinstance(mask_var, Default): - msg = f"These arguments are incompatible. {hint}" - raise ValueError(msg) - - # Handle default case and explicit use_highly_variable=True - if use_highly_variable or ( - use_highly_variable is None - and isinstance(mask_var, Default) - and "highly_variable" in adata.var.columns - ): - mask_var = "highly_variable" - - # Without highly variable genes, we don’t use a mask by default - if isinstance(mask_var, Default) or mask_var is None: - return None, None - return mask_var, _check_mask(adata, mask_var, "var") - - @overload def _handle_dask_ml_args( svd_solver: str | None, method: type[dmld.PCA | dmld.IncrementalPCA] diff --git a/src/scanpy/tools/_ingest.py b/src/scanpy/tools/_ingest.py index 54556f229d..02566383d5 100644 --- a/src/scanpy/tools/_ingest.py +++ b/src/scanpy/tools/_ingest.py @@ -231,7 +231,7 @@ class Ingest: _umap: UMAP # pca _pca_centered: bool - _pca_use_hvg: bool + _pca_mask: str | None _pca_basis: np.ndarray # adata _adata_ref: AnnData @@ -338,15 +338,14 @@ def _init_neighbors(self, adata: AnnData, neighbors_key: str | None) -> None: def _init_pca(self, adata: AnnData) -> None: self._pca_centered = adata.uns["pca"]["params"]["zero_center"] - self._pca_use_hvg = adata.uns["pca"]["params"]["use_highly_variable"] + self._pca_mask = adata.uns["pca"]["params"]["mask_var"] - mask = "highly_variable" - if self._pca_use_hvg and mask not in adata.var.columns: - msg = f"Did not find `adata.var[{mask!r}']`." + if self._pca_mask and self._pca_mask not in adata.var.columns: + msg = f"Did not find `adata.var[{self._pca_mask!r}']`." raise ValueError(msg) - if self._pca_use_hvg: - self._pca_basis = adata.varm["PCs"][adata.var[mask]] + if self._pca_mask: + self._pca_basis = adata.varm["PCs"][adata.var[self._pca_mask]] else: self._pca_basis = adata.varm["PCs"] @@ -402,7 +401,7 @@ def __init__( def _pca(self, n_pcs=None): x = self._adata_new.X x = x.toarray() if isinstance(x, CSBase) else x.copy() - if self._pca_use_hvg: + if self._pca_mask: x = x[:, self._adata_ref.var["highly_variable"]] if self._pca_centered: x -= x.mean(axis=0) diff --git a/src/testing/scanpy/_pytest/marks.py b/src/testing/scanpy/_pytest/marks.py index 8b25f0457d..1e83404614 100644 --- a/src/testing/scanpy/_pytest/marks.py +++ b/src/testing/scanpy/_pytest/marks.py @@ -1,11 +1,9 @@ from __future__ import annotations from enum import Enum, auto -from importlib.metadata import version from importlib.util import find_spec import pytest -from packaging.version import Version class QuietMarkDecorator(pytest.MarkDecorator): @@ -71,9 +69,3 @@ def skip_reason(self) -> str | None: if self._name_.casefold() != self.mod.casefold().replace("-", "_"): reason = f"{reason} (`pip install {self.mod}`)" return reason - - -# TODO: remove once https://github.com/numba/numba/issues/10319 is fixed -skip_numba_0_63 = pytest.mark.skipif( - Version(version=version("numba")) >= Version("0.63b0"), reason="numba 0.63 bug" -) diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py index 925979bcb6..3a46b6b7e3 100644 --- a/tests/test_deprecations.py +++ b/tests/test_deprecations.py @@ -19,12 +19,3 @@ def test_deprecate_multicore_tsne() -> None: pytest.warns(ImportWarning, match=r"MulticoreTSNE"), ): sc.tl.tsne(pbmc, use_fast_tsne=True) - - -def test_deprecate_use_highly_variable_genes(): - pbmc = pbmc68k_reduced() - - with pytest.warns( - FutureWarning, match="Argument `use_highly_variable` is deprecated" - ): - sc.pp.pca(pbmc, use_highly_variable=True) diff --git a/tests/test_highly_variable_genes.py b/tests/test_highly_variable_genes.py index c07c6e6a23..f021ee8584 100644 --- a/tests/test_highly_variable_genes.py +++ b/tests/test_highly_variable_genes.py @@ -18,7 +18,7 @@ from scanpy._compat import CSRBase from testing.scanpy._helpers import _check_check_values_warnings from testing.scanpy._helpers.data import pbmc3k, pbmc68k_reduced -from testing.scanpy._pytest.marks import needs, skip_numba_0_63 +from testing.scanpy._pytest.marks import needs from testing.scanpy._pytest.params import ARRAY_TYPES if TYPE_CHECKING: @@ -165,7 +165,6 @@ def _check_pearson_hvg_columns(output_df: pd.DataFrame, n_top_genes: int): assert np.nanmax(output_df["highly_variable_rank"].to_numpy()) <= n_top_genes - 1 -@skip_numba_0_63 def test_pearson_residuals_inputchecks( pbmc3k_parametrized_small: Callable[[], AnnData], ) -> None: @@ -202,7 +201,6 @@ def test_pearson_residuals_inputchecks( ) -@skip_numba_0_63 @pytest.mark.parametrize("subset", [True, False], ids=["subset", "full"]) @pytest.mark.parametrize( "clip", [None, np.inf, 30], ids=["noclip", "infclip", "30clip"] @@ -296,7 +294,6 @@ def test_pearson_residuals_general( _check_pearson_hvg_columns(output_df, n_top_genes) -@skip_numba_0_63 @pytest.mark.parametrize("subset", [True, False], ids=["subset", "full"]) @pytest.mark.parametrize("n_top_genes", [100, 200], ids=["100n", "200n"]) def test_pearson_residuals_batch( diff --git a/tests/test_normalization.py b/tests/test_normalization.py index 452b628e58..50347c8ab9 100644 --- a/tests/test_normalization.py +++ b/tests/test_normalization.py @@ -1,6 +1,5 @@ from __future__ import annotations -from contextlib import nullcontext from functools import partial from typing import TYPE_CHECKING @@ -18,7 +17,6 @@ check_rep_mutation, check_rep_results, ) -from testing.scanpy._pytest.marks import skip_numba_0_63 # TODO: Add support for sparse-in-dask from testing.scanpy._pytest.params import ARRAY_TYPES, ARRAY_TYPES_DENSE @@ -210,7 +208,6 @@ def _check_pearson_pca_fields(ad, n_cells, n_comps): ), "Wrong shape of PCA output in `X_pca`" -@skip_numba_0_63 @pytest.mark.parametrize("n_hvgs", [100, 200]) @pytest.mark.parametrize("n_comps", [30, 50]) @pytest.mark.parametrize( @@ -218,9 +215,7 @@ def _check_pearson_pca_fields(ad, n_cells, n_comps): [ pytest.param(False, dict(), "n_genes", id="no_hvg"), pytest.param(True, dict(), "n_hvgs", id="hvg_default"), - pytest.param( - True, dict(use_highly_variable=False), "n_genes", id="hvg_opt_out" - ), + pytest.param(True, dict(mask_var=None), "n_genes", id="hvg_opt_out"), pytest.param(False, dict(mask_var="test_mask"), "n_unmasked", id="mask"), ], ) @@ -247,19 +242,14 @@ def test_normalize_pearson_residuals_pca( adata, flavor="pearson_residuals", n_top_genes=n_hvgs ) - ctx = ( - pytest.warns(FutureWarning, match=r"use_highly_variable.*deprecated") - if "use_highly_variable" in params - else nullcontext() + # inplace=False + adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca( + adata.copy(), inplace=False, n_comps=n_comps, **params + ) + # inplace=True modifies the input adata object + sc.experimental.pp.normalize_pearson_residuals_pca( + adata, inplace=True, n_comps=n_comps, **params ) - with ctx: # inplace=False - adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca( - adata.copy(), inplace=False, n_comps=n_comps, **params - ) - with ctx: # inplace=True modifies the input adata object - sc.experimental.pp.normalize_pearson_residuals_pca( - adata, inplace=True, n_comps=n_comps, **params - ) for ad, n_var_ret in ( (adata_pca, n_var_copy), @@ -283,7 +273,6 @@ def test_normalize_pearson_residuals_pca( np.testing.assert_array_equal(adata.obsm["X_pca"], adata_pca.obsm["X_pca"]) -@skip_numba_0_63 @pytest.mark.parametrize("n_hvgs", [100, 200]) @pytest.mark.parametrize("n_comps", [30, 50]) def test_normalize_pearson_residuals_recipe( diff --git a/tests/test_pca.py b/tests/test_pca.py index 7444a737bd..0acd78b860 100644 --- a/tests/test_pca.py +++ b/tests/test_pca.py @@ -403,20 +403,14 @@ def test_pca_n_pcs(): # We use all possible array types here since this error should be raised before # PCA can realize that it got a Dask array @pytest.mark.parametrize("array_type", ARRAY_TYPES_ALL) -def test_mask_highly_var_error(array_type): - """Check if use_highly_variable=True throws an error if the annotation is missing.""" +def test_mask_var_error(array_type): + """Check if mask_var="..." throws an error if the annotation is missing.""" adata = AnnData(array_type(A_list).astype("float32")) - with ( - pytest.warns( - FutureWarning, - match=r"Argument `use_highly_variable` is deprecated, consider using the mask argument\.", - ), - pytest.raises( - ValueError, - match=r"Did not find `adata\.var\['highly_variable'\]`\.", - ), + with pytest.raises( + ValueError, + match=r"Did not find `adata\.var\['highly_variable'\]`\.", ): - sc.pp.pca(adata, use_highly_variable=True) + sc.pp.pca(adata, mask_var="highly_variable") def test_mask_length_error():