Skip to content

Commit

Permalink
Use new scRNAseq package and fix rpy2 3.4 compat (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep authored Jan 10, 2021
1 parent ea266ab commit 8a1fdd4
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 31 deletions.
9 changes: 7 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@ cache:
pip: true
directories:
- /home/travis/R/Library
- /home/travis/.cache/ExperimentHub
env:
- R_LIBS_SITE=/home/travis/R/Library
branches:
only:
- master # All other branches should become (draft) PRs and be build that way

# matrix
python:
Expand All @@ -17,7 +21,7 @@ python:
addons:
apt:
sources:
- sourceline: 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/'
- sourceline: 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/'
key_url: 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9'
update: true
packages:
Expand All @@ -26,7 +30,8 @@ addons:
before_install:
- sudo R --slave -e 'install.packages("BiocManager", repos="https://cloud.r-project.org/")'
- sudo R --slave -e 'options(warn = 2); BiocManager::install(c("SingleCellExperiment", "scRNAseq"))'
- pip install flit get_version rpy2 pygments h5py==2.9.0 # h5py because of https://github.com/theislab/scanpy/issues/832
- mkdir -p ~/.cache/ExperimentHub
- pip install flit get_version 'rpy2>=3.4' pygments
# install package
install:
- python -m rpy2.situation
Expand Down
6 changes: 2 additions & 4 deletions anndata2ri/conv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional

from rpy2.robjects import conversion, numpy2ri, pandas2ri, default_converter, SexpVector
from rpy2.robjects import conversion, numpy2ri, pandas2ri
from rpy2.robjects.conversion import overlay_converter

from . import scipy2ri
Expand All @@ -9,9 +9,7 @@
original_converter: Optional[conversion.Converter] = None
converter = conversion.Converter("original anndata conversion")

mat_converter = default_converter + numpy2ri.converter + scipy2ri.converter
# default_converter has SexpVector registered, so we need to overwrite it.
mat_converter.rpy2py.register(SexpVector, numpy2ri.rpy2py_sexp)
mat_converter = numpy2ri.converter + scipy2ri.converter


def full_converter() -> conversion.Converter:
Expand Down
26 changes: 22 additions & 4 deletions anndata2ri/r2py.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import Optional, Union

import numpy as np
import pandas as pd
from anndata import AnnData

from rpy2.rinterface import NULLType, SexpS4
from rpy2.rinterface import NULLType, Sexp, SexpS4, IntSexpVector, baseenv
from rpy2.robjects import default_converter, pandas2ri
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.robject import RSlots
Expand Down Expand Up @@ -31,13 +32,30 @@ def rpy2py_s4(obj: SexpS4) -> Optional[Union[pd.DataFrame, AnnData]]:
return default_converter.rpy2py(obj)


def rpy2py_vector(v):
"""
Converts vectors. Also handles NA in int vectors: https://github.com/rpy2/rpy2/issues/376
"""
if not isinstance(v, Sexp):
return v
if isinstance(v, IntSexpVector):
assert v._R_SIZEOF_ELT == 4, "R integer size changed away from 32 bit"
if "factor" in v.rclass:
r = pandas2ri.rpy2py(v)
else:
r = pd.array(v, dtype=pd.Int32Dtype())
r[np.array(baseenv["is.na"](v), dtype=bool)] = pd.NA
return r
return pandas2ri.rpy2py(v)


def rpy2py_data_frame(obj: SexpS4) -> pd.DataFrame:
"""
S4 DataFrame class, not data.frame
"""
with localconverter(default_converter + pandas2ri.converter):
slots = RSlots(obj)
columns = dict(slots["listData"].items())
slots = RSlots(obj)
with localconverter(default_converter):
columns = {k: rpy2py_vector(v) for k, v in slots["listData"].items()}
rownames = slots["rownames"]
if isinstance(rownames, NULLType):
rownames = pd.RangeIndex(slots["nrows"][0])
Expand Down
10 changes: 10 additions & 0 deletions anndata2ri/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ def conversion_py2rpy_activate(conv_mod: ConversionModule, dataset: Any) -> Sexp
]


@pytest.fixture(params=conversions_py2rpy)
def py2r(request) -> Callable[[ConversionModule, Any], Sexp]:
return request.param


def conversion_rpy2py_manual(conv_mod: ConversionModule, dataset: Callable[[], Sexp]) -> Any:
return conv_mod.converter.rpy2py(dataset())

Expand All @@ -73,3 +78,8 @@ def conversion_rpy2py_activate(conv_mod: ConversionModule, dataset: Callable[[],
pytest.param(conversion_rpy2py_local, id="local"),
pytest.param(conversion_rpy2py_activate, id="activate"),
]


@pytest.fixture(params=conversions_rpy2py)
def r2py(request) -> Callable[[ConversionModule, Callable[[], Sexp]], Any]:
return request.param
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [
requires-python = '>= 3.6'
requires = [
'get_version',
'rpy2 >= 3.0.1',
'rpy2 >= 3.4',
'tzlocal', # for pandas2ri
'anndata',
]
Expand Down
9 changes: 4 additions & 5 deletions tests/test_py2rpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import anndata2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import conversions_py2rpy
from anndata2ri.test_utils import py2r # noqa


def mk_ad_simple():
Expand Down Expand Up @@ -39,14 +39,13 @@ def check_pca(ex):
]


@pytest.mark.parametrize("conversion", conversions_py2rpy)
@pytest.mark.parametrize("check,shape,dataset", datasets)
def test_py2rpy(conversion, check, shape, dataset):
def test_py2rpy(py2r, check, shape, dataset):
if dataset is sc.datasets.krumsiek11:
with pytest.warns(UserWarning, match=r"Duplicated obs_names"):
ex = conversion(anndata2ri, dataset())
ex = py2r(anndata2ri, dataset())
else:
ex = conversion(anndata2ri, dataset())
ex = py2r(anndata2ri, dataset())
assert tuple(baseenv["dim"](ex)[::-1]) == shape
check(ex)

Expand Down
42 changes: 27 additions & 15 deletions tests/test_rpy2py.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,31 @@
from pathlib import Path

import pytest
import pandas as pd
from anndata import AnnData
from rpy2.robjects import r, conversion

import anndata2ri
from anndata2ri.rpy2_ext import importr, data
from anndata2ri.test_utils import conversions_rpy2py
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import r2py # noqa


as_ = getattr(importr("methods"), "as")
se = importr("SummarizedExperiment")
sce = importr("SingleCellExperiment")
sumex_allen = data("scRNAseq", "allen")["allen"]
eh = importr("ExperimentHub")
seq = importr("scRNAseq")


# avoid prompt
Path(eh.getExperimentHubOption("CACHE")[0]).mkdir(parents=True, exist_ok=True)


def check_allen(adata):
assert adata.uns.keys() == {"SuppInfo", "which_qc"}
assert set(adata.obs.keys()) > {"NREADS", "NALIGNED", "Animal.ID", "passes_qc_checks_s"}
assert adata.obs["Secondary.Type"][:4].tolist() == ["L4 Ctxn3", "", "L5a Batf3", None], "NAs not conserved?"
assert adata.obs["Animal.ID"][:4].tolist() == [133632, 133632, 151560, pd.NA], "NAs not conserved?"


def check_example(adata):
Expand All @@ -37,38 +47,40 @@ def check_example(adata):
"""

expression_sets = [
pytest.param(check_allen, (379, 20908), lambda: as_(sumex_allen, "SingleCellExperiment"), id="allen"),
pytest.param(
check_allen,
(379, 20816),
lambda: as_(seq.ReprocessedAllenData(assays="tophat_counts"), "SingleCellExperiment"),
id="allen",
),
pytest.param(lambda x: None, (0, 0), sce.SingleCellExperiment, id="empty"),
pytest.param(check_example, (100, 200), lambda: r(code_example), id="example"),
]


@pytest.mark.parametrize("convert", conversions_rpy2py)
@pytest.mark.parametrize("check,shape,dataset", expression_sets)
def test_convert_manual(convert, check, shape, dataset):
ad = convert(anndata2ri, dataset)
def test_convert_manual(r2py, check, shape, dataset):
ad = r2py(anndata2ri, dataset)
assert isinstance(ad, AnnData)
assert ad.shape == shape
check(ad)


@pytest.mark.parametrize("convert", conversions_rpy2py)
def test_convert_empty_df_with_rows(convert):
def test_convert_empty_df_with_rows(r2py):
df = r("S4Vectors::DataFrame(a=1:10)[, -1]")
assert df.slots["nrows"][0] == 10

df_py = convert(anndata2ri, lambda: conversion.rpy2py(df))
df_py = r2py(anndata2ri, lambda: conversion.rpy2py(df))
assert isinstance(df_py, pd.DataFrame)


@pytest.mark.parametrize("convert", conversions_rpy2py)
def test_convert_factor(convert):
def test_convert_factor(r2py):
code = """
SingleCellExperiment::SingleCellExperiment(
assays = list(counts = matrix(rpois(6*4, 5), ncol=4)),
colData = S4Vectors::DataFrame(a_factor = factor(c(rep('A', 3), rep('B', 1))))
colData = S4Vectors::DataFrame(a_factor = factor(c(rep('A', 2), NA, rep('B', 1))))
)
"""
ad = convert(anndata2ri, lambda: r(code))
ad = r2py(anndata2ri, lambda: r(code))
assert isinstance(ad.obs["a_factor"].values, pd.Categorical)
assert all(ad.obs["a_factor"].values == pd.Categorical.from_codes([0, 0, 0, 1], ["A", "B"]))
assert ad.obs["a_factor"].values.tolist() == pd.Categorical.from_codes([0, 0, -1, 1], ["A", "B"]).tolist()

0 comments on commit 8a1fdd4

Please sign in to comment.