Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions src/scanpy/experimental/pp/_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
doc_layer,
doc_pca_chunk,
)
from ...get import _get_obs_rep, _set_obs_rep
from ...preprocessing._docs import doc_mask_var_hvg
from ...preprocessing._pca import _handle_mask_var, pca
from ...get import _check_mask, _get_obs_rep, _set_obs_rep
from ...preprocessing._docs import doc_mask_var
from ...preprocessing._pca import pca

if TYPE_CHECKING:
from collections.abc import Mapping
Expand Down Expand Up @@ -158,7 +158,7 @@ def normalize_pearson_residuals(
adata=doc_adata,
dist_params=doc_dist_params,
pca_chunk=doc_pca_chunk,
mask_var_hvg=doc_mask_var_hvg,
mask_var=doc_mask_var,
check_values=doc_check_values,
inplace=doc_inplace,
)
Expand All @@ -171,8 +171,9 @@ def normalize_pearson_residuals_pca(
n_comps: int | None = 50,
rng: SeedLike | RNGLike | None = None,
kwargs_pca: Mapping[str, Any] = MappingProxyType({}),
mask_var: np.ndarray | str | None | Default = Default("'highly_variable'"),
use_highly_variable: bool | None = None,
mask_var: np.ndarray | str | None | Default = Default(
"adata.var.get('highly_variable')"
),
check_values: bool = True,
inplace: bool = True,
) -> AnnData | None:
Expand All @@ -190,7 +191,7 @@ def normalize_pearson_residuals_pca(
{adata}
{dist_params}
{pca_chunk}
{mask_var_hvg}
{mask_var}
{check_values}
{inplace}

Expand All @@ -211,19 +212,17 @@ def normalize_pearson_residuals_pca(
residual normalization.
`.varm['PCs']`
The principal components containing the loadings. When `inplace=True` and
`use_highly_variable=True`, this will contain empty rows for the genes not
`mask_var is not None`, this will contain empty rows for the genes not
selected.
`.uns['pca']['variance_ratio']`
Ratio of explained variance.
`.uns['pca']['variance']`
Explained variance, equivalent to the eigenvalues of the covariance matrix.

"""
# Unify new mask argument and deprecated use_highly_varible argument
_, mask_var = _handle_mask_var(
adata, mask_var, use_highly_variable=use_highly_variable
)
del use_highly_variable
if isinstance(mask_var, Default):
mask_var = "highly_variable" if "highly_variable" in adata.var else None
mask_var = _check_mask(adata, mask_var, "var")

if mask_var is not None:
adata_sub = adata[:, mask_var].copy()
Expand Down
9 changes: 1 addition & 8 deletions src/scanpy/preprocessing/_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,11 @@
If True, use `adata.raw.X` for expression values instead of `adata.X`.\
"""

doc_mask_var_hvg = """\
doc_mask_var = """\
mask_var
To run only on a certain set of genes given by a boolean array
or a string referring to an array in :attr:`~anndata.AnnData.var`.
By default, uses `.var['highly_variable']` if available, else everything.
use_highly_variable
Whether to use highly variable genes only, stored in
`.var['highly_variable']`.
By default uses them if they have been determined beforehand.

.. deprecated:: 1.10.0
Use `mask_var` instead
"""

doc_obs_qc_args = """\
Expand Down
63 changes: 9 additions & 54 deletions src/scanpy/preprocessing/_pca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ..._utils import _doc_params, get_literal_vals, is_backed_type
from ..._utils.random import _accepts_legacy_random_state, _legacy_random_state
from ...get import _check_mask, _get_obs_rep
from .._docs import doc_mask_var_hvg
from .._docs import doc_mask_var
from ._compat import _pca_compat_sparse

if TYPE_CHECKING:
Expand Down Expand Up @@ -51,7 +51,7 @@
type SvdSolver = SvdSolvDaskML | SvdSolvSkearn | SvdSolvPCACustom


@_doc_params(mask_var_hvg=doc_mask_var_hvg, rng=doc_rng)
@_doc_params(mask_var=doc_mask_var, rng=doc_rng)
@_accepts_legacy_random_state(0)
def pca( # noqa: PLR0912, PLR0913, PLR0915
data: AnnData | np.ndarray | CSBase,
Expand All @@ -65,8 +65,9 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
chunk_size: int | None = None,
rng: SeedLike | RNGLike | None = None,
return_info: bool = False,
mask_var: NDArray[np.bool] | str | None | Default = Default("'highly_variable'"),
use_highly_variable: bool | None = None,
mask_var: NDArray[np.bool] | str | None | Default = Default(
"adata.var.get('highly_variable')"
),
dtype: DTypeLike = "float32",
key_added: str | None | Default = Default(preset=("pca", "key_added")),
copy: bool = False,
Expand Down Expand Up @@ -160,7 +161,7 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
return_info
Only relevant when not passing an :class:`~anndata.AnnData`:
see “Returns”.
{mask_var_hvg}
{mask_var}
layer
Layer of `adata` to use as expression values.
dtype
Expand Down Expand Up @@ -224,11 +225,9 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
else:
adata = AnnData(data)

# Unify new mask argument and deprecated use_highly_varible argument
mask_var_param, mask_var = _handle_mask_var(
adata, mask_var, obsm=obsm, use_highly_variable=use_highly_variable
)
del use_highly_variable
if isinstance(mask_var, Default):
mask_var = "highly_variable" if "highly_variable" in adata.var else None
mask_var_param, mask_var = mask_var, _check_mask(adata, mask_var, "var")
adata_comp = adata[:, mask_var] if mask_var is not None else adata

if n_comps is None:
Expand Down Expand Up @@ -355,7 +354,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
adata.uns[key_uns] = dict(
params=dict(
zero_center=zero_center,
use_highly_variable=mask_var_param == "highly_variable",
mask_var=mask_var_param,
**(dict(layer=layer) if layer is not None else {}),
**(dict(obsm=obsm) if obsm is not None else {}),
Expand Down Expand Up @@ -387,49 +385,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
return x_pca


def _handle_mask_var(
adata: AnnData,
mask_var: NDArray[np.bool] | str | None | Default,
*,
obsm: str | None = None,
use_highly_variable: bool | None,
) -> tuple[np.ndarray | str | None, np.ndarray | None]:
"""Unify new mask argument and deprecated use_highly_varible argument.

Returns both the normalized mask parameter and the validated mask array.
"""
if obsm:
if not isinstance(mask_var, Default) and mask_var is not None:
msg = "Argument `mask_var` is incompatible with `obsm`."
raise ValueError(msg)
return None, None

# First, verify and possibly warn
if use_highly_variable is not None:
hint = (
'Use_highly_variable=True can be called through mask_var="highly_variable". '
"Use_highly_variable=False can be called through mask_var=None"
)
msg = f"Argument `use_highly_variable` is deprecated, consider using the mask argument. {hint}"
warn(msg, FutureWarning)
if not isinstance(mask_var, Default):
msg = f"These arguments are incompatible. {hint}"
raise ValueError(msg)

# Handle default case and explicit use_highly_variable=True
if use_highly_variable or (
use_highly_variable is None
and isinstance(mask_var, Default)
and "highly_variable" in adata.var.columns
):
mask_var = "highly_variable"

# Without highly variable genes, we don’t use a mask by default
if isinstance(mask_var, Default) or mask_var is None:
return None, None
return mask_var, _check_mask(adata, mask_var, "var")


@overload
def _handle_dask_ml_args(
svd_solver: str | None, method: type[dmld.PCA | dmld.IncrementalPCA]
Expand Down
15 changes: 7 additions & 8 deletions src/scanpy/tools/_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ class Ingest:
_umap: UMAP
# pca
_pca_centered: bool
_pca_use_hvg: bool
_pca_mask: str | None
_pca_basis: np.ndarray
# adata
_adata_ref: AnnData
Expand Down Expand Up @@ -338,15 +338,14 @@ def _init_neighbors(self, adata: AnnData, neighbors_key: str | None) -> None:

def _init_pca(self, adata: AnnData) -> None:
self._pca_centered = adata.uns["pca"]["params"]["zero_center"]
self._pca_use_hvg = adata.uns["pca"]["params"]["use_highly_variable"]
self._pca_mask = adata.uns["pca"]["params"]["mask_var"]

mask = "highly_variable"
if self._pca_use_hvg and mask not in adata.var.columns:
msg = f"Did not find `adata.var[{mask!r}']`."
if self._pca_mask and self._pca_mask not in adata.var.columns:
msg = f"Did not find `adata.var[{self._pca_mask!r}']`."
raise ValueError(msg)

if self._pca_use_hvg:
self._pca_basis = adata.varm["PCs"][adata.var[mask]]
if self._pca_mask:
self._pca_basis = adata.varm["PCs"][adata.var[self._pca_mask]]
else:
self._pca_basis = adata.varm["PCs"]

Expand Down Expand Up @@ -402,7 +401,7 @@ def __init__(
def _pca(self, n_pcs=None):
x = self._adata_new.X
x = x.toarray() if isinstance(x, CSBase) else x.copy()
if self._pca_use_hvg:
if self._pca_mask:
x = x[:, self._adata_ref.var["highly_variable"]]
if self._pca_centered:
x -= x.mean(axis=0)
Expand Down
8 changes: 0 additions & 8 deletions src/testing/scanpy/_pytest/marks.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from __future__ import annotations

from enum import Enum, auto
from importlib.metadata import version
from importlib.util import find_spec

import pytest
from packaging.version import Version


class QuietMarkDecorator(pytest.MarkDecorator):
Expand Down Expand Up @@ -71,9 +69,3 @@ def skip_reason(self) -> str | None:
if self._name_.casefold() != self.mod.casefold().replace("-", "_"):
reason = f"{reason} (`pip install {self.mod}`)"
return reason


# TODO: remove once https://github.com/numba/numba/issues/10319 is fixed
skip_numba_0_63 = pytest.mark.skipif(
Version(version=version("numba")) >= Version("0.63b0"), reason="numba 0.63 bug"
)
9 changes: 0 additions & 9 deletions tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,3 @@ def test_deprecate_multicore_tsne() -> None:
pytest.warns(ImportWarning, match=r"MulticoreTSNE"),
):
sc.tl.tsne(pbmc, use_fast_tsne=True)


def test_deprecate_use_highly_variable_genes():
pbmc = pbmc68k_reduced()

with pytest.warns(
FutureWarning, match="Argument `use_highly_variable` is deprecated"
):
sc.pp.pca(pbmc, use_highly_variable=True)
5 changes: 1 addition & 4 deletions tests/test_highly_variable_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from scanpy._compat import CSRBase
from testing.scanpy._helpers import _check_check_values_warnings
from testing.scanpy._helpers.data import pbmc3k, pbmc68k_reduced
from testing.scanpy._pytest.marks import needs, skip_numba_0_63
from testing.scanpy._pytest.marks import needs
from testing.scanpy._pytest.params import ARRAY_TYPES

if TYPE_CHECKING:
Expand Down Expand Up @@ -165,7 +165,6 @@ def _check_pearson_hvg_columns(output_df: pd.DataFrame, n_top_genes: int):
assert np.nanmax(output_df["highly_variable_rank"].to_numpy()) <= n_top_genes - 1


@skip_numba_0_63
def test_pearson_residuals_inputchecks(
pbmc3k_parametrized_small: Callable[[], AnnData],
) -> None:
Expand Down Expand Up @@ -202,7 +201,6 @@ def test_pearson_residuals_inputchecks(
)


@skip_numba_0_63
@pytest.mark.parametrize("subset", [True, False], ids=["subset", "full"])
@pytest.mark.parametrize(
"clip", [None, np.inf, 30], ids=["noclip", "infclip", "30clip"]
Expand Down Expand Up @@ -296,7 +294,6 @@ def test_pearson_residuals_general(
_check_pearson_hvg_columns(output_df, n_top_genes)


@skip_numba_0_63
@pytest.mark.parametrize("subset", [True, False], ids=["subset", "full"])
@pytest.mark.parametrize("n_top_genes", [100, 200], ids=["100n", "200n"])
def test_pearson_residuals_batch(
Expand Down
27 changes: 8 additions & 19 deletions tests/test_normalization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from contextlib import nullcontext
from functools import partial
from typing import TYPE_CHECKING

Expand All @@ -18,7 +17,6 @@
check_rep_mutation,
check_rep_results,
)
from testing.scanpy._pytest.marks import skip_numba_0_63

# TODO: Add support for sparse-in-dask
from testing.scanpy._pytest.params import ARRAY_TYPES, ARRAY_TYPES_DENSE
Expand Down Expand Up @@ -210,17 +208,14 @@ def _check_pearson_pca_fields(ad, n_cells, n_comps):
), "Wrong shape of PCA output in `X_pca`"


@skip_numba_0_63
@pytest.mark.parametrize("n_hvgs", [100, 200])
@pytest.mark.parametrize("n_comps", [30, 50])
@pytest.mark.parametrize(
("do_hvg", "params", "n_var_copy_name"),
[
pytest.param(False, dict(), "n_genes", id="no_hvg"),
pytest.param(True, dict(), "n_hvgs", id="hvg_default"),
pytest.param(
True, dict(use_highly_variable=False), "n_genes", id="hvg_opt_out"
),
pytest.param(True, dict(mask_var=None), "n_genes", id="hvg_opt_out"),
pytest.param(False, dict(mask_var="test_mask"), "n_unmasked", id="mask"),
],
)
Expand All @@ -247,19 +242,14 @@ def test_normalize_pearson_residuals_pca(
adata, flavor="pearson_residuals", n_top_genes=n_hvgs
)

ctx = (
pytest.warns(FutureWarning, match=r"use_highly_variable.*deprecated")
if "use_highly_variable" in params
else nullcontext()
# inplace=False
adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca(
adata.copy(), inplace=False, n_comps=n_comps, **params
)
# inplace=True modifies the input adata object
sc.experimental.pp.normalize_pearson_residuals_pca(
adata, inplace=True, n_comps=n_comps, **params
)
with ctx: # inplace=False
adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca(
adata.copy(), inplace=False, n_comps=n_comps, **params
)
with ctx: # inplace=True modifies the input adata object
sc.experimental.pp.normalize_pearson_residuals_pca(
adata, inplace=True, n_comps=n_comps, **params
)

for ad, n_var_ret in (
(adata_pca, n_var_copy),
Expand All @@ -283,7 +273,6 @@ def test_normalize_pearson_residuals_pca(
np.testing.assert_array_equal(adata.obsm["X_pca"], adata_pca.obsm["X_pca"])


@skip_numba_0_63
@pytest.mark.parametrize("n_hvgs", [100, 200])
@pytest.mark.parametrize("n_comps", [30, 50])
def test_normalize_pearson_residuals_recipe(
Expand Down
Loading
Loading