Skip to content

Commit

Permalink
Support query of dense arrays in assays (#80)
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche authored Jan 22, 2025
1 parent e38e484 commit 36ae287
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 4 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Changelog

## Version 0.5.3
## Version 0.5.3 - 0.5.4

- 'config' is renamed to 'context_or_config' to accept both TileDB's context or config objects. This supports in-memory TileDB contexts from [Phil](https://github.com/hanslovsky) [[#79](https://github.com/TileOme/cellarr/pull/79)].
- Also fixes the condition for path to assay groups.
- Fixes the condition for path to assay groups.
- Supports querying of dense arrays with the `CellArrDataset` class.

## Version 0.5.1 - 0.5.2

Expand Down
9 changes: 7 additions & 2 deletions src/cellarr/queryutils_tiledb_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def subset_array(
row_subset: Union[slice, list, tuple],
column_subset: Union[slice, list, tuple],
shape: tuple,
) -> sp.csr_matrix:
) -> Union[np.ndarray, sp.csr_matrix]:
"""Subset a TileDB storing array data.
Uses multi_index to slice.
Expand All @@ -127,8 +127,13 @@ def subset_array(
Shape of the entire matrix.
Returns:
A sparse array in a csr format.
if the TileDB object is sparse, returns a sparse array in a coo format
otherwise a numpy object.
"""

if not tiledb_obj.schema.sparse:
return tiledb_obj[row_subset, column_subset]["data"]

data = tiledb_obj.multi_index[row_subset, column_subset]

# Fallback just in case
Expand Down
42 changes: 42 additions & 0 deletions tests/test_qutils_dense.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import tempfile

import anndata
import numpy as np
import pandas as pd
import pytest
import tiledb
from cellarr.queryutils_tiledb_frame import subset_array

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"

def create_dense_matrix():
tempdir = tempfile.mkdtemp()

d1 = tiledb.Dim(name="cell_index", domain=(0, 3), tile=2, dtype=np.int32)
d2 = tiledb.Dim(name="gene_index", domain=(0, 3), tile=2, dtype=np.int32)
dom = tiledb.Domain(d1, d2)
a = tiledb.Attr(name="data", dtype=np.int32)
sch = tiledb.ArraySchema(domain=dom, sparse=False, attrs=[a])
tiledb.Array.create(tempdir, sch)

data = np.array(
[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]], dtype=np.int32
)
with tiledb.open(tempdir, "w") as A:
A[:] = data

return tempdir


def test_query_cellarrdataset():

array_uri = create_dense_matrix()

tdb = tiledb.open(array_uri, "r")
res = subset_array(tdb, row_subset=slice(0,2), column_subset=slice(None), shape=(4,4))

assert res.shape == (2,4)
assert np.all(res == np.array([[1, 2, 3, 4], [5, 6, 7, 8]]))

0 comments on commit 36ae287

Please sign in to comment.