From 6fd6191eb63b3f91bac412d165a1c080a431c121 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Mon, 27 Nov 2023 11:44:28 -0800 Subject: [PATCH 1/3] feat: Schema 4.0.0 enrichment --- backend/portal/api/enrichment.py | 24 +++++++ .../backend/layers/api/test_portal_api.py | 67 ++++++++++++++++++- 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/backend/portal/api/enrichment.py b/backend/portal/api/enrichment.py index e9d2cce53b2b0..9ac9e58d6f635 100644 --- a/backend/portal/api/enrichment.py +++ b/backend/portal/api/enrichment.py @@ -5,6 +5,8 @@ from collections import OrderedDict +from backend.common.feature_flag import FeatureFlagService, FeatureFlagValues + def enrich_dataset_with_ancestors(dataset, key, ontology_mapping): """ @@ -15,6 +17,16 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping): terms = [e["ontology_term_id"] for e in dataset[key]] + is_schema_4 = FeatureFlagService.is_enabled(FeatureFlagValues.SCHEMA_4) + is_tissue = key == "tissue" + if is_tissue and is_schema_4: + # TODO remove is_schema_4 condition once Schema 4 is rolled out and + # feature flag is removed (#6266). "tissue" must include "tissue_type" + # when generating ancestors; "cell_type" and "development_stage" do not. + terms = [generate_tagged_ontology_id(e) for e in dataset[key]] + else: + terms = [e["ontology_term_id"] for e in dataset[key]] + if not terms: return @@ -23,3 +35,15 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping): unique_ancestors = list(OrderedDict.fromkeys(flattened_ancestors)) if unique_ancestors: dataset[f"{key}_ancestors"] = unique_ancestors + +def generate_tagged_ontology_id(tissue): + """ + Generate ontology ID tagged with tissue_type for the given tissue. For + example, UBERON:1234567 (organoid). + """ + tissue_id = tissue["ontology_term_id"] + tissue_type = tissue["tissue_type"] + # TODO(cc) revisit None here, is this possible during migration only? + if ( tissue_type is None or tissue_type == "tissue" ): + return tissue_id + return f"{tissue_id} ({tissue_type})" \ No newline at end of file diff --git a/tests/unit/backend/layers/api/test_portal_api.py b/tests/unit/backend/layers/api/test_portal_api.py index bfe62d6cdb2d2..0b84590a4fc2f 100644 --- a/tests/unit/backend/layers/api/test_portal_api.py +++ b/tests/unit/backend/layers/api/test_portal_api.py @@ -1727,12 +1727,15 @@ def test__get_all_user_datasets_for_index_requires_auth(self): self.assertEqual(response.status_code, 401) # ✅ - def test__get_all_datasets_for_index_with_ontology_expansion(self): + def test__get_all_datasets_for_index_with_ontology_expansion_deprecated(self): + # TODO deprecated - remove with #6266. Keeping temporarily to ensure + # backwards compatibility while running both 3.0.0 and 4.0.0 (behind + # a feature flag) versions of the code. import copy modified_metadata = copy.deepcopy(self.sample_dataset_metadata) modified_metadata.development_stage = [OntologyTermId("Test", "HsapDv:0000008")] - modified_metadata.tissue = [TissueOntologyTermId("Test", "UBERON:0002048", "cell culture")] + modified_metadata.tissue = [TissueOntologyTermId("Test", "UBERON:0002048")] modified_metadata.cell_type = [OntologyTermId("Test", "CL:0000738")] dataset = self.generate_dataset(metadata=modified_metadata, publish=True) @@ -1797,6 +1800,66 @@ def convert_ontology(ontologies): ], ) + def test__get_all_datasets_for_index_with_ontology_expansion(self): + # Schema 4.0.0 version of + # test__get_all_datasets_for_index_with_ontology_expansion_deprecated + # above. Remove this comment with #6266. + import copy + + modified_metadata = copy.deepcopy(self.sample_dataset_metadata) + modified_metadata.development_stage = [OntologyTermId("Test", "HsapDv:0000008")] + modified_metadata.tissue = [TissueOntologyTermId("Test", "UBERON:0000995", "organoid")] + modified_metadata.cell_type = [OntologyTermId("Test", "CL:0000738")] + + dataset = self.generate_dataset(metadata=modified_metadata, publish=True) + + test_url = furl(path="/dp/v1/datasets/index") + + headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} + response = self.app.get(test_url.url, headers=headers) + self.assertEqual(200, response.status_code) + body = json.loads(response.data) + + actual_dataset = None + for d in body: + if d["id"] == dataset.dataset_version_id: + actual_dataset = d + self.assertIsNotNone(actual_dataset) + + def convert_ontology(ontologies): + return [dataclasses.asdict(o) for o in ontologies] + + if actual_dataset is not None: # pylance + self.assertEqual(actual_dataset["development_stage"], convert_ontology(modified_metadata.development_stage)) + self.assertEqual( + actual_dataset["development_stage_ancestors"], + ["HsapDv:0000008", "HsapDv:0000006", "HsapDv:0000002", "HsapDv:0000045", "HsapDv:0000001"], + ) + + self.assertEqual(actual_dataset["tissue"], convert_ontology(modified_metadata.tissue)) + # TODO update with fix for #6192. + self.assertCountEqual( + actual_dataset["tissue_ancestors"], + [ + "UBERON:0000995 (organoid)" + ], + ) + + self.assertEqual(actual_dataset["cell_type"], convert_ontology(modified_metadata.cell_type)) + self.assertCountEqual( + actual_dataset["cell_type_ancestors"], + [ + "CL:0000255", + "CL:0002371", + "CL:0000988", + "CL:0000738", + "CL:0000548", + "CL:0000219", + "CL:0000003", + "CL:0002242", + ], + ) + # ✅ def test__get_dataset_assets(self): # TODO: I don't think `filename` is relevant - review From 66868b16cf8b47c2002231164fa17f641a964c91 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Mon, 27 Nov 2023 15:12:59 -0800 Subject: [PATCH 2/3] Linting --- backend/portal/api/enrichment.py | 10 ++++++---- tests/unit/backend/layers/api/test_portal_api.py | 4 +--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/portal/api/enrichment.py b/backend/portal/api/enrichment.py index 9ac9e58d6f635..4b16a08c1c7ff 100644 --- a/backend/portal/api/enrichment.py +++ b/backend/portal/api/enrichment.py @@ -36,14 +36,16 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping): if unique_ancestors: dataset[f"{key}_ancestors"] = unique_ancestors + def generate_tagged_ontology_id(tissue): """ Generate ontology ID tagged with tissue_type for the given tissue. For example, UBERON:1234567 (organoid). """ tissue_id = tissue["ontology_term_id"] - tissue_type = tissue["tissue_type"] - # TODO(cc) revisit None here, is this possible during migration only? - if ( tissue_type is None or tissue_type == "tissue" ): + # Handle possible None for tissue_type (possible during migration): default + # to "tissue". + tissue_type = tissue["tissue_type"] or "tissue" + if tissue_type == "tissue": return tissue_id - return f"{tissue_id} ({tissue_type})" \ No newline at end of file + return f"{tissue_id} ({tissue_type})" diff --git a/tests/unit/backend/layers/api/test_portal_api.py b/tests/unit/backend/layers/api/test_portal_api.py index 0b84590a4fc2f..da849a462f840 100644 --- a/tests/unit/backend/layers/api/test_portal_api.py +++ b/tests/unit/backend/layers/api/test_portal_api.py @@ -1840,9 +1840,7 @@ def convert_ontology(ontologies): # TODO update with fix for #6192. self.assertCountEqual( actual_dataset["tissue_ancestors"], - [ - "UBERON:0000995 (organoid)" - ], + ["UBERON:0000995 (organoid)"], ) self.assertEqual(actual_dataset["cell_type"], convert_ontology(modified_metadata.cell_type)) From 57d26716da7c17dad6e72572f21a3a8b62e77549 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Tue, 28 Nov 2023 11:37:00 -0800 Subject: [PATCH 3/3] Updated generate_tagged_tissue_ontology_id naming. --- backend/portal/api/enrichment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/portal/api/enrichment.py b/backend/portal/api/enrichment.py index 4b16a08c1c7ff..254ad59a249f4 100644 --- a/backend/portal/api/enrichment.py +++ b/backend/portal/api/enrichment.py @@ -23,7 +23,7 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping): # TODO remove is_schema_4 condition once Schema 4 is rolled out and # feature flag is removed (#6266). "tissue" must include "tissue_type" # when generating ancestors; "cell_type" and "development_stage" do not. - terms = [generate_tagged_ontology_id(e) for e in dataset[key]] + terms = [generate_tagged_tissue_ontology_id(e) for e in dataset[key]] else: terms = [e["ontology_term_id"] for e in dataset[key]] @@ -37,7 +37,7 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping): dataset[f"{key}_ancestors"] = unique_ancestors -def generate_tagged_ontology_id(tissue): +def generate_tagged_tissue_ontology_id(tissue): """ Generate ontology ID tagged with tissue_type for the given tissue. For example, UBERON:1234567 (organoid).