Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: 5.1 testing #7093

Closed
wants to merge 18 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
merge: main->tsmith/5.1
Bento007 committed May 28, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 2618fa72d9bc62f8ffdb73de9d70ea12833ca15b
6 changes: 2 additions & 4 deletions backend/layers/processing/h5ad_data_file.py
Original file line number Diff line number Diff line change
@@ -54,9 +54,7 @@ def __init__(

self.validate_anndata()

def to_cxg(
self, output_cxg_directory, sparse_threshold, dataset_version_id, convert_anndata_colors_to_cxg_colors=True
):
def to_cxg(self, output_cxg_directory, sparse_threshold, convert_anndata_colors_to_cxg_colors=True):
"""
Writes the following attributes of the anndata to CXG: 1) the metadata as metadata attached to an empty
DenseArray, 2) the obs DataFrame as a DenseArray, 3) the var DataFrame as a DenseArray, 4) all valid
@@ -82,7 +80,7 @@ def to_cxg(
convert_dataframe_to_cxg_array(output_cxg_directory, "var", self.var, self.var_index_column_name, ctx)
logging.info("\t...dataset var dataframe saved")

convert_uns_to_cxg_group(output_cxg_directory, self.anndata.uns, dataset_version_id, "uns", ctx)
convert_uns_to_cxg_group(output_cxg_directory, self.anndata.uns, "uns", ctx)
logging.info("\t...dataset uns dataframe saved")

self.write_anndata_embeddings_to_cxg(output_cxg_directory, ctx)
4 changes: 2 additions & 2 deletions backend/layers/processing/process_cxg.py
Original file line number Diff line number Diff line change
@@ -67,15 +67,15 @@ def process(
self.process_cxg(labeled_h5ad_filename, dataset_version_id, cellxgene_bucket, current_artifacts)

@logit
def make_cxg(self, local_filename, dataset_version_id):
def make_cxg(self, local_filename):
"""
Convert the uploaded H5AD file to the CXG format servicing the cellxgene Explorer.
"""

cxg_output_container = local_filename.replace(".h5ad", ".cxg")
try:
h5ad_data_file = H5ADDataFile(local_filename, var_index_column_name="feature_name")
h5ad_data_file.to_cxg(cxg_output_container, sparse_threshold=25.0, dataset_version_id=dataset_version_id.id)
h5ad_data_file.to_cxg(cxg_output_container, sparse_threshold=25.0)
except Exception as ex:
# TODO use a specialized exception
msg = "CXG conversion failed."
2 changes: 1 addition & 1 deletion backend/layers/processing/process_logic.py
Original file line number Diff line number Diff line change
@@ -114,7 +114,7 @@ def convert_file(
start = datetime.now()
try:
self.update_processing_status(dataset_version_id, processing_status_key, DatasetConversionStatus.CONVERTING)
file_dir = converter(local_filename, dataset_version_id)
file_dir = converter(local_filename)
self.update_processing_status(dataset_version_id, processing_status_key, DatasetConversionStatus.CONVERTED)
self.logger.info(f"Finished converting {converter} in {datetime.now() - start}")
except Exception:
2 changes: 1 addition & 1 deletion backend/layers/processing/process_seurat.py
Original file line number Diff line number Diff line change
@@ -96,7 +96,7 @@ def process(self, dataset_version_id: DatasetVersionId, artifact_bucket: str, da
)

@logit
def make_seurat(self, local_filename, *args, **kwargs):
def make_seurat(self, local_filename):
"""
Create a Seurat rds file from the AnnData file.
"""
4 changes: 2 additions & 2 deletions backend/layers/processing/utils/cxg_generation_utils.py
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@ def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata
metadata_array.meta[key] = value


def convert_uns_to_cxg_group(cxg_container, metadata_dict, dataset_version_id, group_metadata_name="uns", ctx=None):
def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="uns", ctx=None):
"""
Convert uns (unstructured) metadata to CXG output directory specified
Generate deep zoom assets for spatial data
@@ -53,7 +53,7 @@ def convert_uns_to_cxg_group(cxg_container, metadata_dict, dataset_version_id, g
for object_id, content in value.items():
if object_id not in SPATIAL_KEYS_EXCLUDE:
object_filtered = spatial_processor.filter_spatial_data(content, object_id)
spatial_processor.create_deep_zoom_assets(cxg_container, content, dataset_version_id)
spatial_processor.create_deep_zoom_assets(cxg_container, content)

metadata_array.meta[key] = pickle.dumps(object_filtered)

11 changes: 4 additions & 7 deletions backend/layers/processing/utils/spatial.py
Original file line number Diff line number Diff line change
@@ -116,26 +116,23 @@ def _generate_deep_zoom_assets(self, image_array, assets_folder):
image = pyvips.Image.new_from_memory(linear.data, w, h, bands, "uchar")
image.dzsave(os.path.join(assets_folder, "spatial"), suffix=".jpeg")

def _upload_assets(self, assets_folder, dataset_version_id):
def _upload_assets(self, assets_folder):
"""
Upload the deep zoom assets to the S3 bucket.

Args:
assets_folder (str): The folder containing the assets.
dataset_version_id (str): The UUID uniquely identifying the dataset version.
"""
version_id = dataset_version_id.replace(".cxg", "")
s3_uri = f"s3://{self.bucket_name}/{self.asset_directory}/{version_id}"
s3_uri = f"s3://{self.bucket_name}/{self.asset_directory}/{os.path.basename(assets_folder)}"
self.s3_provider.upload_directory(assets_folder, s3_uri)

def create_deep_zoom_assets(self, container_name, content, dataset_version_id):
def create_deep_zoom_assets(self, container_name, content):
"""
Create deep zoom assets for a container.

Args:
container_name (str): The name of the container.
content (dict): The content dictionary containing the image array.
dataset_version_id (str): The UUID uniquely identifying the dataset version.
"""
try:
with tempfile.TemporaryDirectory() as temp_dir:
@@ -145,7 +142,7 @@ def create_deep_zoom_assets(self, container_name, content, dataset_version_id):
image_array, _ = self._fetch_image(content)
processed_image = self._process_and_flip_image(image_array)
self._generate_deep_zoom_assets(processed_image, assets_folder)
self._upload_assets(assets_folder, dataset_version_id)
self._upload_assets(assets_folder)
except Exception as e:
logger.exception(f"Failed to create and upload deep zoom assets: {e}")
raise
15 changes: 7 additions & 8 deletions tests/unit/processing/test_h5ad_data_file.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,6 @@ def setUp(self):
self.sample_h5ad_filename = self._write_anndata_to_file(self.sample_anndata)

self.sample_output_directory = path.splitext(self.sample_h5ad_filename)[0] + ".cxg"
self.dataset_version_id = "test_dataset_version_id"

def tearDown(self):
if self.sample_h5ad_filename:
@@ -109,31 +108,31 @@ def test__create_h5ad_data_file__obs_and_var_index_names_specified_doesnt_exist_

def test__to_cxg__simple_anndata_no_corpora_and_sparse(self):
h5ad_file = H5ADDataFile(self.sample_h5ad_filename)
h5ad_file.to_cxg(self.sample_output_directory, 100, self.dataset_version_id)
h5ad_file.to_cxg(self.sample_output_directory, 100)

self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, True)

def test__to_cxg__simple_anndata_with_corpora_and_sparse(self):
h5ad_file = H5ADDataFile(self.sample_h5ad_filename)
h5ad_file.to_cxg(self.sample_output_directory, 100, self.dataset_version_id)
h5ad_file.to_cxg(self.sample_output_directory, 100)

self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, True)

def test__to_cxg__simple_anndata_no_corpora_and_dense(self):
h5ad_file = H5ADDataFile(self.sample_h5ad_filename)
h5ad_file.to_cxg(self.sample_output_directory, 0, self.dataset_version_id)
h5ad_file.to_cxg(self.sample_output_directory, 0)

self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, False)

def test__to_cxg__simple_anndata_with_corpora_and_dense(self):
h5ad_file = H5ADDataFile(self.sample_h5ad_filename)
h5ad_file.to_cxg(self.sample_output_directory, 0, self.dataset_version_id)
h5ad_file.to_cxg(self.sample_output_directory, 0)

self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, False)

def test__to_cxg__simple_anndata_with_corpora_and_dense_using_feature_name_var_index(self):
h5ad_file = H5ADDataFile(self.sample_h5ad_filename, var_index_column_name="feature_name")
h5ad_file.to_cxg(self.sample_output_directory, 0, self.dataset_version_id)
h5ad_file.to_cxg(self.sample_output_directory, 0)

self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, False)
self._validate_cxg_var_index_column_match(
@@ -143,7 +142,7 @@ def test__to_cxg__simple_anndata_with_corpora_and_dense_using_feature_name_var_i

def test__to_cxg__simple_anndata_with_different_var_index_than_h5ad(self):
h5ad_file = H5ADDataFile(self.sample_h5ad_filename, var_index_column_name="int_category")
h5ad_file.to_cxg(self.sample_output_directory, 0, self.dataset_version_id)
h5ad_file.to_cxg(self.sample_output_directory, 0)

self._validate_cxg_var_index_column_match(
self.sample_output_directory,
@@ -156,7 +155,7 @@ def test__to_cxg__with_sparse_column_encoding(self):
sparse_with_column_shift_filename = self._write_anndata_to_file(anndata)

h5ad_file = H5ADDataFile(sparse_with_column_shift_filename)
h5ad_file.to_cxg(self.sample_output_directory, 50, self.dataset_version_id)
h5ad_file.to_cxg(self.sample_output_directory, 50)

self._validate_cxg_and_h5ad_content_match(
sparse_with_column_shift_filename, self.sample_output_directory, False, has_column_encoding=True
4 changes: 3 additions & 1 deletion tests/unit/processing/test_spatial_assets_utils.py
Original file line number Diff line number Diff line change
@@ -248,7 +248,9 @@ def test__upload_assets_failure(spatial_processor, asset_folder, dataset_version
mock_upload.assert_called_once_with(asset_folder, expected_s3_uri)


def test__create_deep_zoom_assets(spatial_processor, cxg_container, valid_spatial_data, mocker, tmpdir):
def test__create_deep_zoom_assets(
spatial_processor, cxg_container, valid_spatial_data, dataset_version_id, mocker, tmpdir
):
mock_fetch_image = mocker.patch.object(spatial_processor, "_fetch_image")
mock_process_and_flip_image = mocker.patch.object(spatial_processor, "_process_and_flip_image")
mock_generate_deep_zoom_assets = mocker.patch.object(spatial_processor, "_generate_deep_zoom_assets")
You are viewing a condensed version of this merge commit. You can view the full changes here.