pepkit · donaldcampbelljr · Jan 24, 2024 · Jan 24, 2024 · Jan 24, 2024
diff --git a/pipestat/backends/db_backend/db_parsed_schema.py b/pipestat/backends/db_backend/db_parsed_schema.py
@@ -151,6 +151,7 @@
         # TODO: parse "required" ?
         defs = {}
         for name, subdata in data.items():
+            result_indexed = False
             try:
                 typename = subdata[SCHEMA_TYPE_KEY]
             except KeyError:
@@ -162,18 +163,25 @@
             else:
                 data_type = self._get_data_type(typename)
             if data_type == CLASSES_BY_TYPE["object"] or data_type == CLASSES_BY_TYPE["array"]:
+                if "index" in subdata and subdata["index"] is True:
+                    _LOGGER.warning(f"Cannot index JSONB Column, ignoring index: True for {name} ")
                 defs[name] = (
                     data_type,
-                    Field(sa_column=Column(JSONB), default=null()),
+                    Field(
+                        sa_column=Column(JSONB),
+                        default=null(),
+                    ),
                 )
             else:
+                if "index" in subdata:
+                    result_indexed = subdata["index"]
                 defs[name] = (
-                    # Optional[subdata[SCHEMA_TYPE_KEY]],
-                    # subdata[SCHEMA_TYPE_KEY],
-                    # Optional[str],
-                    # CLASSES_BY_TYPE[subdata[SCHEMA_TYPE_KEY]],
                     data_type,
-                    Field(default=subdata.get("default"), nullable=True),
+                    Field(
+                        default=subdata.get("default"),
+                        nullable=True,
+                        index=result_indexed,
+                    ),
                 )
         return defs
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -112,6 +112,11 @@ def output_schema_as_JSON_schema():
     return get_data_file_path("output_schema_as_JSON_schema.yaml")
 
 
+@pytest.fixture
+def output_schema_with_index():
+    return get_data_file_path("sample_output_schema_with_index.yaml")
+
+
 @pytest.fixture
 def output_schema_no_refs():
     return get_data_file_path("output_schema.yaml")

diff --git a/tests/data/sample_output_schema_with_index.yaml b/tests/data/sample_output_schema_with_index.yaml
@@ -0,0 +1,87 @@
+title: An example Pipestat output schema
+description: A pipeline that uses pipestat to report sample and project level results.
+type: object
+properties:
+  pipeline_name: "default_pipeline_name"
+  samples:
+    type: object
+    properties:
+      number_of_things:
+        type: integer
+        description: "Number of things"
+      percentage_of_things:
+        type: number
+        description: "Percentage of things"
+      name_of_something:
+        type: string
+        description: "Name of something"
+      switch_value:
+        type: boolean
+        description: "Is the switch on or off"
+      md5sum:
+        type: string
+        description: "MD5SUM of an object"
+        highlight: true
+        index: True
+      collection_of_images:
+        description: "This store collection of values or objects"
+        type: array
+        items:
+          properties:
+              prop1:
+                description: "This is an example file"
+                $ref: "#/$defs/file"
+      output_file_in_object:
+        type: object
+        index: True
+        properties:
+          prop1:
+            description: "This is an example file"
+            $ref: "#/$defs/file"
+          prop2:
+            description: "This is an example image"
+            $ref: "#/$defs/image"
+        description: "Object output"
+      output_file_in_object_nested:
+        type: object
+        description: First Level
+        properties:
+          prop1:
+            type: object
+            description: Second Level
+            properties:
+              prop2:
+                type: integer
+                description: Third Level
+      output_file:
+        $ref: "#/$defs/file"
+        description: "This a path to the output file"
+      output_image:
+        $ref: "#/$defs/image"
+        description: "This a path to the output image"
+$defs:
+  image:
+    type: object
+    object_type: image
+    properties:
+      path:
+        type: string
+      thumbnail_path:
+        type: string
+      title:
+        type: string
+    required:
+      - path
+      - thumbnail_path
+      - title
+  file:
+    type: object
+    object_type: file
+    properties:
+      path:
+        type: string
+      title:
+        type: string
+    required:
+      - path
+      - title
diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py
@@ -2241,3 +2241,43 @@ def test_multi_results_summarize(
             psm.summarize()
             data = YAMLConfigManager(filepath=os.path.join(temp_dir, "aggregate_results.yaml"))
             assert r_id in data[psm.pipeline_name][psm.pipeline_type].keys()
+
+
+@pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available")
+class TestSetIndexTrue:
+    @pytest.mark.parametrize(
+        ["rec_id", "val"],
+        [
+            ("sample1", {"name_of_something": "test_name"}),
+        ],
+    )
+    @pytest.mark.parametrize("backend", ["db"])
+    def test_set_index(
+        self,
+        rec_id,
+        val,
+        config_file_path,
+        output_schema_with_index,
+        results_file_path,
+        backend,
+        range_values,
+    ):
+        with NamedTemporaryFile() as f, ContextManagerDBTesting(DB_URL):
+            results_file_path = f.name
+            args = dict(schema_path=output_schema_with_index, database_only=False)
+            backend_data = (
+                {"config_file": config_file_path}
+                if backend == "db"
+                else {"results_file_path": results_file_path}
+            )
+            args.update(backend_data)
+            psm = SamplePipestatManager(**args)
+
+            for i in range_values[:10]:
+                r_id = i[0]
+                val = i[1]
+                psm.report(record_identifier=r_id, values=val, force_overwrite=True)
+
+            mod = psm.backend.get_model(table_name=psm.backend.table_name)
+            assert mod.md5sum.index is True
+            assert mod.number_of_things.index is False