merge: main->tsmith/5.1

chanzuckerberg · Bento007 · May 23, 2024 · May 23, 2024 · May 24, 2024 · May 28, 2024
commit 2618fa72d9bc62f8ffdb73de9d70ea12833ca15b
diff --git a/backend/layers/processing/h5ad_data_file.py b/backend/layers/processing/h5ad_data_file.py
@@ -54,9 +54,7 @@ def __init__(
 
         self.validate_anndata()
 
-    def to_cxg(
-        self, output_cxg_directory, sparse_threshold, dataset_version_id, convert_anndata_colors_to_cxg_colors=True
-    ):
+    def to_cxg(self, output_cxg_directory, sparse_threshold, convert_anndata_colors_to_cxg_colors=True):
         """
         Writes the following attributes of the anndata to CXG: 1) the metadata as metadata attached to an empty
         DenseArray, 2) the obs DataFrame as a DenseArray, 3) the var DataFrame as a DenseArray, 4) all valid
@@ -82,7 +80,7 @@ def to_cxg(
         convert_dataframe_to_cxg_array(output_cxg_directory, "var", self.var, self.var_index_column_name, ctx)
         logging.info("\t...dataset var dataframe saved")
 
-        convert_uns_to_cxg_group(output_cxg_directory, self.anndata.uns, dataset_version_id, "uns", ctx)
+        convert_uns_to_cxg_group(output_cxg_directory, self.anndata.uns, "uns", ctx)
         logging.info("\t...dataset uns dataframe saved")
 
         self.write_anndata_embeddings_to_cxg(output_cxg_directory, ctx)

diff --git a/backend/layers/processing/process_cxg.py b/backend/layers/processing/process_cxg.py
@@ -67,15 +67,15 @@ def process(
         self.process_cxg(labeled_h5ad_filename, dataset_version_id, cellxgene_bucket, current_artifacts)
 
     @logit
-    def make_cxg(self, local_filename, dataset_version_id):
+    def make_cxg(self, local_filename):
         """
         Convert the uploaded H5AD file to the CXG format servicing the cellxgene Explorer.
         """
 
         cxg_output_container = local_filename.replace(".h5ad", ".cxg")
         try:
             h5ad_data_file = H5ADDataFile(local_filename, var_index_column_name="feature_name")
-            h5ad_data_file.to_cxg(cxg_output_container, sparse_threshold=25.0, dataset_version_id=dataset_version_id.id)
+            h5ad_data_file.to_cxg(cxg_output_container, sparse_threshold=25.0)
         except Exception as ex:
             # TODO use a specialized exception
             msg = "CXG conversion failed."

diff --git a/backend/layers/processing/process_logic.py b/backend/layers/processing/process_logic.py
@@ -114,7 +114,7 @@ def convert_file(
         start = datetime.now()
         try:
             self.update_processing_status(dataset_version_id, processing_status_key, DatasetConversionStatus.CONVERTING)
-            file_dir = converter(local_filename, dataset_version_id)
+            file_dir = converter(local_filename)
             self.update_processing_status(dataset_version_id, processing_status_key, DatasetConversionStatus.CONVERTED)
             self.logger.info(f"Finished converting {converter} in {datetime.now() - start}")
         except Exception:

diff --git a/backend/layers/processing/process_seurat.py b/backend/layers/processing/process_seurat.py
@@ -96,7 +96,7 @@ def process(self, dataset_version_id: DatasetVersionId, artifact_bucket: str, da
         )
 
     @logit
-    def make_seurat(self, local_filename, *args, **kwargs):
+    def make_seurat(self, local_filename):
         """
         Create a Seurat rds file from the AnnData file.
         """

diff --git a/backend/layers/processing/utils/cxg_generation_utils.py b/backend/layers/processing/utils/cxg_generation_utils.py
@@ -34,7 +34,7 @@ def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata
             metadata_array.meta[key] = value
 
 
-def convert_uns_to_cxg_group(cxg_container, metadata_dict, dataset_version_id, group_metadata_name="uns", ctx=None):
+def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="uns", ctx=None):
     """
     Convert uns (unstructured) metadata to CXG output directory specified
     Generate deep zoom assets for spatial data
@@ -53,7 +53,7 @@ def convert_uns_to_cxg_group(cxg_container, metadata_dict, dataset_version_id, g
                 for object_id, content in value.items():
                     if object_id not in SPATIAL_KEYS_EXCLUDE:
                         object_filtered = spatial_processor.filter_spatial_data(content, object_id)
-                        spatial_processor.create_deep_zoom_assets(cxg_container, content, dataset_version_id)
+                        spatial_processor.create_deep_zoom_assets(cxg_container, content)
 
                 metadata_array.meta[key] = pickle.dumps(object_filtered)
 

diff --git a/backend/layers/processing/utils/spatial.py b/backend/layers/processing/utils/spatial.py
@@ -116,26 +116,23 @@ def _generate_deep_zoom_assets(self, image_array, assets_folder):
         image = pyvips.Image.new_from_memory(linear.data, w, h, bands, "uchar")
         image.dzsave(os.path.join(assets_folder, "spatial"), suffix=".jpeg")
 
-    def _upload_assets(self, assets_folder, dataset_version_id):
+    def _upload_assets(self, assets_folder):
         """
         Upload the deep zoom assets to the S3 bucket.
 
         Args:
             assets_folder (str): The folder containing the assets.
-            dataset_version_id (str): The UUID uniquely identifying the dataset version.
         """
-        version_id = dataset_version_id.replace(".cxg", "")
-        s3_uri = f"s3://{self.bucket_name}/{self.asset_directory}/{version_id}"
+        s3_uri = f"s3://{self.bucket_name}/{self.asset_directory}/{os.path.basename(assets_folder)}"
         self.s3_provider.upload_directory(assets_folder, s3_uri)
 
-    def create_deep_zoom_assets(self, container_name, content, dataset_version_id):
+    def create_deep_zoom_assets(self, container_name, content):
         """
         Create deep zoom assets for a container.
 
         Args:
             container_name (str): The name of the container.
             content (dict): The content dictionary containing the image array.
-            dataset_version_id (str): The UUID uniquely identifying the dataset version.
         """
         try:
             with tempfile.TemporaryDirectory() as temp_dir:
@@ -145,7 +142,7 @@ def create_deep_zoom_assets(self, container_name, content, dataset_version_id):
                 image_array, _ = self._fetch_image(content)
                 processed_image = self._process_and_flip_image(image_array)
                 self._generate_deep_zoom_assets(processed_image, assets_folder)
-                self._upload_assets(assets_folder, dataset_version_id)
+                self._upload_assets(assets_folder)
         except Exception as e:
             logger.exception(f"Failed to create and upload deep zoom assets: {e}")
             raise

diff --git a/tests/unit/processing/test_h5ad_data_file.py b/tests/unit/processing/test_h5ad_data_file.py
@@ -20,7 +20,6 @@ def setUp(self):
         self.sample_h5ad_filename = self._write_anndata_to_file(self.sample_anndata)
 
         self.sample_output_directory = path.splitext(self.sample_h5ad_filename)[0] + ".cxg"
-        self.dataset_version_id = "test_dataset_version_id"
 
     def tearDown(self):
         if self.sample_h5ad_filename:
@@ -109,31 +108,31 @@ def test__create_h5ad_data_file__obs_and_var_index_names_specified_doesnt_exist_
 
     def test__to_cxg__simple_anndata_no_corpora_and_sparse(self):
         h5ad_file = H5ADDataFile(self.sample_h5ad_filename)
-        h5ad_file.to_cxg(self.sample_output_directory, 100, self.dataset_version_id)
+        h5ad_file.to_cxg(self.sample_output_directory, 100)
 
         self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, True)
 
     def test__to_cxg__simple_anndata_with_corpora_and_sparse(self):
         h5ad_file = H5ADDataFile(self.sample_h5ad_filename)
-        h5ad_file.to_cxg(self.sample_output_directory, 100, self.dataset_version_id)
+        h5ad_file.to_cxg(self.sample_output_directory, 100)
 
         self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, True)
 
     def test__to_cxg__simple_anndata_no_corpora_and_dense(self):
         h5ad_file = H5ADDataFile(self.sample_h5ad_filename)
-        h5ad_file.to_cxg(self.sample_output_directory, 0, self.dataset_version_id)
+        h5ad_file.to_cxg(self.sample_output_directory, 0)
 
         self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, False)
 
     def test__to_cxg__simple_anndata_with_corpora_and_dense(self):
         h5ad_file = H5ADDataFile(self.sample_h5ad_filename)
-        h5ad_file.to_cxg(self.sample_output_directory, 0, self.dataset_version_id)
+        h5ad_file.to_cxg(self.sample_output_directory, 0)
 
         self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, False)
 
     def test__to_cxg__simple_anndata_with_corpora_and_dense_using_feature_name_var_index(self):
         h5ad_file = H5ADDataFile(self.sample_h5ad_filename, var_index_column_name="feature_name")
-        h5ad_file.to_cxg(self.sample_output_directory, 0, self.dataset_version_id)
+        h5ad_file.to_cxg(self.sample_output_directory, 0)
 
         self._validate_cxg_and_h5ad_content_match(self.sample_h5ad_filename, self.sample_output_directory, False)
         self._validate_cxg_var_index_column_match(
@@ -143,7 +142,7 @@ def test__to_cxg__simple_anndata_with_corpora_and_dense_using_feature_name_var_i
 
     def test__to_cxg__simple_anndata_with_different_var_index_than_h5ad(self):
         h5ad_file = H5ADDataFile(self.sample_h5ad_filename, var_index_column_name="int_category")
-        h5ad_file.to_cxg(self.sample_output_directory, 0, self.dataset_version_id)
+        h5ad_file.to_cxg(self.sample_output_directory, 0)
 
         self._validate_cxg_var_index_column_match(
             self.sample_output_directory,
@@ -156,7 +155,7 @@ def test__to_cxg__with_sparse_column_encoding(self):
         sparse_with_column_shift_filename = self._write_anndata_to_file(anndata)
 
         h5ad_file = H5ADDataFile(sparse_with_column_shift_filename)
-        h5ad_file.to_cxg(self.sample_output_directory, 50, self.dataset_version_id)
+        h5ad_file.to_cxg(self.sample_output_directory, 50)
 
         self._validate_cxg_and_h5ad_content_match(
             sparse_with_column_shift_filename, self.sample_output_directory, False, has_column_encoding=True

diff --git a/tests/unit/processing/test_spatial_assets_utils.py b/tests/unit/processing/test_spatial_assets_utils.py
@@ -248,7 +248,9 @@ def test__upload_assets_failure(spatial_processor, asset_folder, dataset_version
     mock_upload.assert_called_once_with(asset_folder, expected_s3_uri)
 
 
-def test__create_deep_zoom_assets(spatial_processor, cxg_container, valid_spatial_data, mocker, tmpdir):
+def test__create_deep_zoom_assets(
+    spatial_processor, cxg_container, valid_spatial_data, dataset_version_id, mocker, tmpdir
+):
     mock_fetch_image = mocker.patch.object(spatial_processor, "_fetch_image")
     mock_process_and_flip_image = mocker.patch.object(spatial_processor, "_process_and_flip_image")
     mock_generate_deep_zoom_assets = mocker.patch.object(spatial_processor, "_generate_deep_zoom_assets")