From 6fd6191eb63b3f91bac412d165a1c080a431c121 Mon Sep 17 00:00:00 2001
From: MillenniumFalconMechanic <mim@clevercanary.com>
Date: Mon, 27 Nov 2023 11:44:28 -0800
Subject: [PATCH 1/3] feat: Schema 4.0.0 enrichment

---
 backend/portal/api/enrichment.py              | 24 +++++++
 .../backend/layers/api/test_portal_api.py     | 67 ++++++++++++++++++-
 2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/backend/portal/api/enrichment.py b/backend/portal/api/enrichment.py
index e9d2cce53b2b0..9ac9e58d6f635 100644
--- a/backend/portal/api/enrichment.py
+++ b/backend/portal/api/enrichment.py
@@ -5,6 +5,8 @@
 
 from collections import OrderedDict
 
+from backend.common.feature_flag import FeatureFlagService, FeatureFlagValues
+
 
 def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):
     """
@@ -15,6 +17,16 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):
 
     terms = [e["ontology_term_id"] for e in dataset[key]]
 
+    is_schema_4 = FeatureFlagService.is_enabled(FeatureFlagValues.SCHEMA_4)
+    is_tissue = key == "tissue"
+    if is_tissue and is_schema_4:
+        # TODO remove is_schema_4 condition once Schema 4 is rolled out and
+        # feature flag is removed (#6266). "tissue" must include "tissue_type"
+        # when generating ancestors; "cell_type" and "development_stage" do not.
+        terms = [generate_tagged_ontology_id(e) for e in dataset[key]]
+    else:
+        terms = [e["ontology_term_id"] for e in dataset[key]]
+
     if not terms:
         return
 
@@ -23,3 +35,15 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):
     unique_ancestors = list(OrderedDict.fromkeys(flattened_ancestors))
     if unique_ancestors:
         dataset[f"{key}_ancestors"] = unique_ancestors
+
+def generate_tagged_ontology_id(tissue):
+    """
+    Generate ontology ID tagged with tissue_type for the given tissue. For
+    example, UBERON:1234567 (organoid).
+    """
+    tissue_id = tissue["ontology_term_id"]
+    tissue_type = tissue["tissue_type"]
+    # TODO(cc) revisit None here, is this possible during migration only? 
+    if ( tissue_type is None or tissue_type == "tissue" ):
+        return tissue_id
+    return f"{tissue_id} ({tissue_type})"
\ No newline at end of file
diff --git a/tests/unit/backend/layers/api/test_portal_api.py b/tests/unit/backend/layers/api/test_portal_api.py
index bfe62d6cdb2d2..0b84590a4fc2f 100644
--- a/tests/unit/backend/layers/api/test_portal_api.py
+++ b/tests/unit/backend/layers/api/test_portal_api.py
@@ -1727,12 +1727,15 @@ def test__get_all_user_datasets_for_index_requires_auth(self):
         self.assertEqual(response.status_code, 401)
 
     # ✅
-    def test__get_all_datasets_for_index_with_ontology_expansion(self):
+    def test__get_all_datasets_for_index_with_ontology_expansion_deprecated(self):
+        # TODO deprecated - remove with #6266. Keeping temporarily to ensure
+        # backwards compatibility while running both 3.0.0 and 4.0.0 (behind
+        # a feature flag) versions of the code.
         import copy
 
         modified_metadata = copy.deepcopy(self.sample_dataset_metadata)
         modified_metadata.development_stage = [OntologyTermId("Test", "HsapDv:0000008")]
-        modified_metadata.tissue = [TissueOntologyTermId("Test", "UBERON:0002048", "cell culture")]
+        modified_metadata.tissue = [TissueOntologyTermId("Test", "UBERON:0002048")]
         modified_metadata.cell_type = [OntologyTermId("Test", "CL:0000738")]
 
         dataset = self.generate_dataset(metadata=modified_metadata, publish=True)
@@ -1797,6 +1800,66 @@ def convert_ontology(ontologies):
                 ],
             )
 
+    def test__get_all_datasets_for_index_with_ontology_expansion(self):
+        # Schema 4.0.0 version of
+        # test__get_all_datasets_for_index_with_ontology_expansion_deprecated
+        # above. Remove this comment with #6266.
+        import copy
+
+        modified_metadata = copy.deepcopy(self.sample_dataset_metadata)
+        modified_metadata.development_stage = [OntologyTermId("Test", "HsapDv:0000008")]
+        modified_metadata.tissue = [TissueOntologyTermId("Test", "UBERON:0000995", "organoid")]
+        modified_metadata.cell_type = [OntologyTermId("Test", "CL:0000738")]
+
+        dataset = self.generate_dataset(metadata=modified_metadata, publish=True)
+
+        test_url = furl(path="/dp/v1/datasets/index")
+
+        headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()}
+        response = self.app.get(test_url.url, headers=headers)
+        self.assertEqual(200, response.status_code)
+        body = json.loads(response.data)
+
+        actual_dataset = None
+        for d in body:
+            if d["id"] == dataset.dataset_version_id:
+                actual_dataset = d
+        self.assertIsNotNone(actual_dataset)
+
+        def convert_ontology(ontologies):
+            return [dataclasses.asdict(o) for o in ontologies]
+
+        if actual_dataset is not None:  # pylance
+            self.assertEqual(actual_dataset["development_stage"], convert_ontology(modified_metadata.development_stage))
+            self.assertEqual(
+                actual_dataset["development_stage_ancestors"],
+                ["HsapDv:0000008", "HsapDv:0000006", "HsapDv:0000002", "HsapDv:0000045", "HsapDv:0000001"],
+            )
+
+            self.assertEqual(actual_dataset["tissue"], convert_ontology(modified_metadata.tissue))
+            # TODO update with fix for #6192.
+            self.assertCountEqual(
+                actual_dataset["tissue_ancestors"],
+                [
+                    "UBERON:0000995 (organoid)"
+                ],
+            )
+
+            self.assertEqual(actual_dataset["cell_type"], convert_ontology(modified_metadata.cell_type))
+            self.assertCountEqual(
+                actual_dataset["cell_type_ancestors"],
+                [
+                    "CL:0000255",
+                    "CL:0002371",
+                    "CL:0000988",
+                    "CL:0000738",
+                    "CL:0000548",
+                    "CL:0000219",
+                    "CL:0000003",
+                    "CL:0002242",
+                ],
+            )
+
     # ✅
     def test__get_dataset_assets(self):
         # TODO: I don't think `filename` is relevant - review

From 66868b16cf8b47c2002231164fa17f641a964c91 Mon Sep 17 00:00:00 2001
From: MillenniumFalconMechanic <mim@clevercanary.com>
Date: Mon, 27 Nov 2023 15:12:59 -0800
Subject: [PATCH 2/3] Linting

---
 backend/portal/api/enrichment.py                 | 10 ++++++----
 tests/unit/backend/layers/api/test_portal_api.py |  4 +---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/backend/portal/api/enrichment.py b/backend/portal/api/enrichment.py
index 9ac9e58d6f635..4b16a08c1c7ff 100644
--- a/backend/portal/api/enrichment.py
+++ b/backend/portal/api/enrichment.py
@@ -36,14 +36,16 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):
     if unique_ancestors:
         dataset[f"{key}_ancestors"] = unique_ancestors
 
+
 def generate_tagged_ontology_id(tissue):
     """
     Generate ontology ID tagged with tissue_type for the given tissue. For
     example, UBERON:1234567 (organoid).
     """
     tissue_id = tissue["ontology_term_id"]
-    tissue_type = tissue["tissue_type"]
-    # TODO(cc) revisit None here, is this possible during migration only? 
-    if ( tissue_type is None or tissue_type == "tissue" ):
+    # Handle possible None for tissue_type (possible during migration): default
+    # to "tissue".
+    tissue_type = tissue["tissue_type"] or "tissue"
+    if tissue_type == "tissue":
         return tissue_id
-    return f"{tissue_id} ({tissue_type})"
\ No newline at end of file
+    return f"{tissue_id} ({tissue_type})"
diff --git a/tests/unit/backend/layers/api/test_portal_api.py b/tests/unit/backend/layers/api/test_portal_api.py
index 0b84590a4fc2f..da849a462f840 100644
--- a/tests/unit/backend/layers/api/test_portal_api.py
+++ b/tests/unit/backend/layers/api/test_portal_api.py
@@ -1840,9 +1840,7 @@ def convert_ontology(ontologies):
             # TODO update with fix for #6192.
             self.assertCountEqual(
                 actual_dataset["tissue_ancestors"],
-                [
-                    "UBERON:0000995 (organoid)"
-                ],
+                ["UBERON:0000995 (organoid)"],
             )
 
             self.assertEqual(actual_dataset["cell_type"], convert_ontology(modified_metadata.cell_type))

From 57d26716da7c17dad6e72572f21a3a8b62e77549 Mon Sep 17 00:00:00 2001
From: MillenniumFalconMechanic <mim@clevercanary.com>
Date: Tue, 28 Nov 2023 11:37:00 -0800
Subject: [PATCH 3/3] Updated generate_tagged_tissue_ontology_id naming.

---
 backend/portal/api/enrichment.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/portal/api/enrichment.py b/backend/portal/api/enrichment.py
index 4b16a08c1c7ff..254ad59a249f4 100644
--- a/backend/portal/api/enrichment.py
+++ b/backend/portal/api/enrichment.py
@@ -23,7 +23,7 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):
         # TODO remove is_schema_4 condition once Schema 4 is rolled out and
         # feature flag is removed (#6266). "tissue" must include "tissue_type"
         # when generating ancestors; "cell_type" and "development_stage" do not.
-        terms = [generate_tagged_ontology_id(e) for e in dataset[key]]
+        terms = [generate_tagged_tissue_ontology_id(e) for e in dataset[key]]
     else:
         terms = [e["ontology_term_id"] for e in dataset[key]]
 
@@ -37,7 +37,7 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):
         dataset[f"{key}_ancestors"] = unique_ancestors
 
 
-def generate_tagged_ontology_id(tissue):
+def generate_tagged_tissue_ontology_id(tissue):
     """
     Generate ontology ID tagged with tissue_type for the given tissue. For
     example, UBERON:1234567 (organoid).