diff --git a/.travis.yml b/.travis.yml index d5ddc0e..fd29a2f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,8 +4,12 @@ cache: pip: true directories: - /home/travis/R/Library + - /home/travis/.cache/ExperimentHub env: - R_LIBS_SITE=/home/travis/R/Library +branches: + only: + - master # All other branches should become (draft) PRs and be build that way # matrix python: @@ -17,7 +21,7 @@ python: addons: apt: sources: - - sourceline: 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' + - sourceline: 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' key_url: 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9' update: true packages: @@ -26,7 +30,8 @@ addons: before_install: - sudo R --slave -e 'install.packages("BiocManager", repos="https://cloud.r-project.org/")' - sudo R --slave -e 'options(warn = 2); BiocManager::install(c("SingleCellExperiment", "scRNAseq"))' -- pip install flit get_version rpy2 pygments h5py==2.9.0 # h5py because of https://github.com/theislab/scanpy/issues/832 +- mkdir -p ~/.cache/ExperimentHub +- pip install flit get_version 'rpy2>=3.4' pygments # install package install: - python -m rpy2.situation diff --git a/anndata2ri/conv.py b/anndata2ri/conv.py index 6ee6fc4..d53f9ae 100644 --- a/anndata2ri/conv.py +++ b/anndata2ri/conv.py @@ -1,6 +1,6 @@ from typing import Optional -from rpy2.robjects import conversion, numpy2ri, pandas2ri, default_converter, SexpVector +from rpy2.robjects import conversion, numpy2ri, pandas2ri from rpy2.robjects.conversion import overlay_converter from . import scipy2ri @@ -9,9 +9,7 @@ original_converter: Optional[conversion.Converter] = None converter = conversion.Converter("original anndata conversion") -mat_converter = default_converter + numpy2ri.converter + scipy2ri.converter -# default_converter has SexpVector registered, so we need to overwrite it. -mat_converter.rpy2py.register(SexpVector, numpy2ri.rpy2py_sexp) +mat_converter = numpy2ri.converter + scipy2ri.converter def full_converter() -> conversion.Converter: diff --git a/anndata2ri/r2py.py b/anndata2ri/r2py.py index d79fae1..47ac06c 100644 --- a/anndata2ri/r2py.py +++ b/anndata2ri/r2py.py @@ -1,9 +1,10 @@ from typing import Optional, Union +import numpy as np import pandas as pd from anndata import AnnData -from rpy2.rinterface import NULLType, SexpS4 +from rpy2.rinterface import NULLType, Sexp, SexpS4, IntSexpVector, baseenv from rpy2.robjects import default_converter, pandas2ri from rpy2.robjects.conversion import localconverter from rpy2.robjects.robject import RSlots @@ -31,13 +32,30 @@ def rpy2py_s4(obj: SexpS4) -> Optional[Union[pd.DataFrame, AnnData]]: return default_converter.rpy2py(obj) +def rpy2py_vector(v): + """ + Converts vectors. Also handles NA in int vectors: https://github.com/rpy2/rpy2/issues/376 + """ + if not isinstance(v, Sexp): + return v + if isinstance(v, IntSexpVector): + assert v._R_SIZEOF_ELT == 4, "R integer size changed away from 32 bit" + if "factor" in v.rclass: + r = pandas2ri.rpy2py(v) + else: + r = pd.array(v, dtype=pd.Int32Dtype()) + r[np.array(baseenv["is.na"](v), dtype=bool)] = pd.NA + return r + return pandas2ri.rpy2py(v) + + def rpy2py_data_frame(obj: SexpS4) -> pd.DataFrame: """ S4 DataFrame class, not data.frame """ - with localconverter(default_converter + pandas2ri.converter): - slots = RSlots(obj) - columns = dict(slots["listData"].items()) + slots = RSlots(obj) + with localconverter(default_converter): + columns = {k: rpy2py_vector(v) for k, v in slots["listData"].items()} rownames = slots["rownames"] if isinstance(rownames, NULLType): rownames = pd.RangeIndex(slots["nrows"][0]) diff --git a/anndata2ri/test_utils.py b/anndata2ri/test_utils.py index 6ae9550..e4f23c1 100644 --- a/anndata2ri/test_utils.py +++ b/anndata2ri/test_utils.py @@ -49,6 +49,11 @@ def conversion_py2rpy_activate(conv_mod: ConversionModule, dataset: Any) -> Sexp ] +@pytest.fixture(params=conversions_py2rpy) +def py2r(request) -> Callable[[ConversionModule, Any], Sexp]: + return request.param + + def conversion_rpy2py_manual(conv_mod: ConversionModule, dataset: Callable[[], Sexp]) -> Any: return conv_mod.converter.rpy2py(dataset()) @@ -73,3 +78,8 @@ def conversion_rpy2py_activate(conv_mod: ConversionModule, dataset: Callable[[], pytest.param(conversion_rpy2py_local, id="local"), pytest.param(conversion_rpy2py_activate, id="activate"), ] + + +@pytest.fixture(params=conversions_rpy2py) +def r2py(request) -> Callable[[ConversionModule, Callable[[], Sexp]], Any]: + return request.param diff --git a/pyproject.toml b/pyproject.toml index 6ca77ef..432fa14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ classifiers = [ requires-python = '>= 3.6' requires = [ 'get_version', - 'rpy2 >= 3.0.1', + 'rpy2 >= 3.4', 'tzlocal', # for pandas2ri 'anndata', ] diff --git a/tests/test_py2rpy.py b/tests/test_py2rpy.py index bc3c437..02e1f0f 100644 --- a/tests/test_py2rpy.py +++ b/tests/test_py2rpy.py @@ -9,7 +9,7 @@ import anndata2ri from anndata2ri.rpy2_ext import importr -from anndata2ri.test_utils import conversions_py2rpy +from anndata2ri.test_utils import py2r # noqa def mk_ad_simple(): @@ -39,14 +39,13 @@ def check_pca(ex): ] -@pytest.mark.parametrize("conversion", conversions_py2rpy) @pytest.mark.parametrize("check,shape,dataset", datasets) -def test_py2rpy(conversion, check, shape, dataset): +def test_py2rpy(py2r, check, shape, dataset): if dataset is sc.datasets.krumsiek11: with pytest.warns(UserWarning, match=r"Duplicated obs_names"): - ex = conversion(anndata2ri, dataset()) + ex = py2r(anndata2ri, dataset()) else: - ex = conversion(anndata2ri, dataset()) + ex = py2r(anndata2ri, dataset()) assert tuple(baseenv["dim"](ex)[::-1]) == shape check(ex) diff --git a/tests/test_rpy2py.py b/tests/test_rpy2py.py index 7212e7e..2882650 100644 --- a/tests/test_rpy2py.py +++ b/tests/test_rpy2py.py @@ -1,21 +1,31 @@ +from pathlib import Path + import pytest import pandas as pd from anndata import AnnData from rpy2.robjects import r, conversion import anndata2ri -from anndata2ri.rpy2_ext import importr, data -from anndata2ri.test_utils import conversions_rpy2py +from anndata2ri.rpy2_ext import importr +from anndata2ri.test_utils import r2py # noqa + as_ = getattr(importr("methods"), "as") se = importr("SummarizedExperiment") sce = importr("SingleCellExperiment") -sumex_allen = data("scRNAseq", "allen")["allen"] +eh = importr("ExperimentHub") +seq = importr("scRNAseq") + + +# avoid prompt +Path(eh.getExperimentHubOption("CACHE")[0]).mkdir(parents=True, exist_ok=True) def check_allen(adata): assert adata.uns.keys() == {"SuppInfo", "which_qc"} assert set(adata.obs.keys()) > {"NREADS", "NALIGNED", "Animal.ID", "passes_qc_checks_s"} + assert adata.obs["Secondary.Type"][:4].tolist() == ["L4 Ctxn3", "", "L5a Batf3", None], "NAs not conserved?" + assert adata.obs["Animal.ID"][:4].tolist() == [133632, 133632, 151560, pd.NA], "NAs not conserved?" def check_example(adata): @@ -37,38 +47,40 @@ def check_example(adata): """ expression_sets = [ - pytest.param(check_allen, (379, 20908), lambda: as_(sumex_allen, "SingleCellExperiment"), id="allen"), + pytest.param( + check_allen, + (379, 20816), + lambda: as_(seq.ReprocessedAllenData(assays="tophat_counts"), "SingleCellExperiment"), + id="allen", + ), pytest.param(lambda x: None, (0, 0), sce.SingleCellExperiment, id="empty"), pytest.param(check_example, (100, 200), lambda: r(code_example), id="example"), ] -@pytest.mark.parametrize("convert", conversions_rpy2py) @pytest.mark.parametrize("check,shape,dataset", expression_sets) -def test_convert_manual(convert, check, shape, dataset): - ad = convert(anndata2ri, dataset) +def test_convert_manual(r2py, check, shape, dataset): + ad = r2py(anndata2ri, dataset) assert isinstance(ad, AnnData) assert ad.shape == shape check(ad) -@pytest.mark.parametrize("convert", conversions_rpy2py) -def test_convert_empty_df_with_rows(convert): +def test_convert_empty_df_with_rows(r2py): df = r("S4Vectors::DataFrame(a=1:10)[, -1]") assert df.slots["nrows"][0] == 10 - df_py = convert(anndata2ri, lambda: conversion.rpy2py(df)) + df_py = r2py(anndata2ri, lambda: conversion.rpy2py(df)) assert isinstance(df_py, pd.DataFrame) -@pytest.mark.parametrize("convert", conversions_rpy2py) -def test_convert_factor(convert): +def test_convert_factor(r2py): code = """ SingleCellExperiment::SingleCellExperiment( assays = list(counts = matrix(rpois(6*4, 5), ncol=4)), - colData = S4Vectors::DataFrame(a_factor = factor(c(rep('A', 3), rep('B', 1)))) + colData = S4Vectors::DataFrame(a_factor = factor(c(rep('A', 2), NA, rep('B', 1)))) ) """ - ad = convert(anndata2ri, lambda: r(code)) + ad = r2py(anndata2ri, lambda: r(code)) assert isinstance(ad.obs["a_factor"].values, pd.Categorical) - assert all(ad.obs["a_factor"].values == pd.Categorical.from_codes([0, 0, 0, 1], ["A", "B"])) + assert ad.obs["a_factor"].values.tolist() == pd.Categorical.from_codes([0, 0, -1, 1], ["A", "B"]).tolist()