bluesky · danielballan · Nov 14, 2023 · Oct 24, 2023 · Oct 24, 2023 · Oct 25, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -63,12 +63,13 @@ jobs:
       shell: bash -l {0}
       run: |
         set -vxeuo pipefail
+        coverage run -m pytest -v -m "not slow"
+        coverage report
+      env:
         # Provide test suite with a PostgreSQL database to use.
-        export TILED_TEST_POSTGRESQL_URI=postgresql+asyncpg://postgres:secret@localhost:5432
+        TILED_TEST_POSTGRESQL_URI: postgresql+asyncpg://postgres:secret@localhost:5432
         # Opt in to LDAPAuthenticator tests.
-        export TILED_TEST_LDAP=1
-        coverage run -m pytest -v
-        coverage report
+        TILED_TEST_LDAP: 1
 
   windows_checks:
     runs-on: windows-latest

diff --git a/pytest.ini b/pytest.ini
@@ -5,3 +5,6 @@ log_cli = 1
 log_cli_level = WARNING
 log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)
 log_cli_date_format=%Y-%m-%d %H:%M:%S
+addopts = --strict-markers -m 'not slow'
+markers =
+    slow: marks tests as slow (deselect with '-m "not slow"')
diff --git a/tiled/_tests/test_catalog.py b/tiled/_tests/test_catalog.py
@@ -156,9 +156,10 @@ async def test_search(a):
     assert await d.search(Eq("number", 12)).keys_range(0, 5) == ["c"]
 
 
+@pytest.mark.slow
 @pytest.mark.asyncio
 async def test_metadata_index_is_used(a):
-    for i in range(10):
+    for i in range(10000):
         await a.create_node(
             metadata={
                 "number": i,

diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py
@@ -6,7 +6,7 @@
 import shutil
 import sys
 import uuid
-from functools import partial
+from functools import partial, reduce
 from pathlib import Path
 from urllib.parse import quote_plus, urlparse
 
@@ -938,9 +938,18 @@ def _prepare_structure(structure_family, structure):
 
 def binary_op(query, tree, operation):
     dialect_name = tree.engine.url.get_dialect().name
-    attr = orm.Node.metadata_[query.key.split(".")]
+    keys = query.key.split(".")
+    attr = orm.Node.metadata_[keys]
     if dialect_name == "sqlite":
         condition = operation(_get_value(attr, type(query.value)), query.value)
+    # specific case where GIN optomized index can be used to speed up POSTGRES equals queries
+    elif (dialect_name == "postgresql") and (operation == operator.eq):
+        condition = orm.Node.metadata_.op("@>")(
+            type_coerce(
+                key_array_to_json(keys, query.value),
+                orm.Node.metadata_.type,
+            )
+        )
     else:
         condition = operation(attr, type_coerce(query.value, orm.Node.metadata_.type))
     return tree.new_variation(conditions=tree.conditions + [condition])
@@ -1097,6 +1106,29 @@ def json_serializer(obj):
     return safe_json_dump(obj).decode()
 
 
+def key_array_to_json(keys, value):
+    """Take JSON accessor information as an array of keys and value
+
+    Parameters
+    ----------
+    keys : iterable
+        An array of keys to be created in the object.
+    value : string
+        Value assigned to the final key.
+
+    Returns
+    -------
+    json
+        JSON object for use in postgresql queries.
+
+    Examples
+    --------
+    >>> key_array_to_json(['x','y','z'], 1)
+    {'x': {'y': {'z': 1}}
+    """
+    return {keys[0]: reduce(lambda x, y: {y: x}, keys[1:][::-1], value)}
+
+
 STRUCTURES = {
     StructureFamily.container: CatalogContainerAdapter,
     StructureFamily.array: CatalogArrayAdapter,

diff --git a/tiled/catalog/core.py b/tiled/catalog/core.py
@@ -5,17 +5,20 @@
 
 # This is the alembic revision ID of the database revision
 # required by this version of Tiled.
-REQUIRED_REVISION = "0b033e7fbe30"
+REQUIRED_REVISION = "3db11ff95b6c"
 
 # This is list of all valid revisions (from current to oldest).
-ALL_REVISIONS = ["0b033e7fbe30", "83889e049ddc", "6825c778aa3c"]
+ALL_REVISIONS = ["3db11ff95b6c", "0b033e7fbe30", "83889e049ddc", "6825c778aa3c"]
 
 
 async def initialize_database(engine):
     # The definitions in .orm alter Base.metadata.
     from . import orm  # noqa: F401
 
     async with engine.connect() as connection:
+        # Install extensions
+        if engine.dialect.name == "postgresql":
+            await connection.execute(text("create extension btree_gin;"))
         # Create all tables.
         await connection.run_sync(Base.metadata.create_all)
         if engine.dialect.name == "sqlite":

diff --git a/tiled/catalog/migrations/versions/3db11ff95b6c_changing_top_level_metadata_to_btree_gin.py b/tiled/catalog/migrations/versions/3db11ff95b6c_changing_top_level_metadata_to_btree_gin.py
@@ -0,0 +1,35 @@
+"""Changing top_level_metadata to btree_gin
+
+Revision ID: 3db11ff95b6c
+Revises: 0b033e7fbe30
+Create Date: 2023-11-01 15:16:48.554420
+
+"""
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "3db11ff95b6c"
+down_revision = "0b033e7fbe30"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    connection = op.get_bind()
+    if connection.engine.dialect.name == "postgresql":
+        with op.get_context().autocommit_block():
+            op.execute(sa.text("create extension IF NOT EXISTS btree_gin;"))
+            op.drop_index("top_level_metadata", table_name="nodes")
+            op.create_index(
+                "top_level_metadata",
+                "nodes",
+                ["ancestors", "time_created", "id", "metadata"],
+                postgresql_using="gin",
+            )
+
+
+def downgrade():
+    # This _could_ be implemented but we will wait for a need since we are
+    # still in alpha releases.
+    raise NotImplementedError
diff --git a/tiled/catalog/orm.py b/tiled/catalog/orm.py
@@ -90,7 +90,7 @@ class Node(Timestamped, Base):
             "time_created",
             "id",
             "metadata",
-            postgresql_using="btree",
+            postgresql_using="gin",
         ),
         # This is used by ORDER BY with the default sorting.
         # Index("ancestors_time_created", "ancestors", "time_created"),