diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47dd13ffc..1601c1c6d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,12 +63,13 @@ jobs: shell: bash -l {0} run: | set -vxeuo pipefail + coverage run -m pytest -v -m "not slow" + coverage report + env: # Provide test suite with a PostgreSQL database to use. - export TILED_TEST_POSTGRESQL_URI=postgresql+asyncpg://postgres:secret@localhost:5432 + TILED_TEST_POSTGRESQL_URI: postgresql+asyncpg://postgres:secret@localhost:5432 # Opt in to LDAPAuthenticator tests. - export TILED_TEST_LDAP=1 - coverage run -m pytest -v - coverage report + TILED_TEST_LDAP: 1 windows_checks: runs-on: windows-latest diff --git a/pytest.ini b/pytest.ini index 18a858942..61306c81b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -5,3 +5,6 @@ log_cli = 1 log_cli_level = WARNING log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) log_cli_date_format=%Y-%m-%d %H:%M:%S +addopts = --strict-markers -m 'not slow' +markers = + slow: marks tests as slow (deselect with '-m "not slow"') diff --git a/tiled/_tests/test_catalog.py b/tiled/_tests/test_catalog.py index 4ae3eddd3..53559ee65 100644 --- a/tiled/_tests/test_catalog.py +++ b/tiled/_tests/test_catalog.py @@ -156,9 +156,10 @@ async def test_search(a): assert await d.search(Eq("number", 12)).keys_range(0, 5) == ["c"] +@pytest.mark.slow @pytest.mark.asyncio async def test_metadata_index_is_used(a): - for i in range(10): + for i in range(10000): await a.create_node( metadata={ "number": i, diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py index 52f58df72..0e13e8021 100644 --- a/tiled/catalog/adapter.py +++ b/tiled/catalog/adapter.py @@ -6,7 +6,7 @@ import shutil import sys import uuid -from functools import partial +from functools import partial, reduce from pathlib import Path from urllib.parse import quote_plus, urlparse @@ -938,9 +938,18 @@ def _prepare_structure(structure_family, structure): def binary_op(query, tree, operation): dialect_name = tree.engine.url.get_dialect().name - attr = orm.Node.metadata_[query.key.split(".")] + keys = query.key.split(".") + attr = orm.Node.metadata_[keys] if dialect_name == "sqlite": condition = operation(_get_value(attr, type(query.value)), query.value) + # specific case where GIN optomized index can be used to speed up POSTGRES equals queries + elif (dialect_name == "postgresql") and (operation == operator.eq): + condition = orm.Node.metadata_.op("@>")( + type_coerce( + key_array_to_json(keys, query.value), + orm.Node.metadata_.type, + ) + ) else: condition = operation(attr, type_coerce(query.value, orm.Node.metadata_.type)) return tree.new_variation(conditions=tree.conditions + [condition]) @@ -1097,6 +1106,29 @@ def json_serializer(obj): return safe_json_dump(obj).decode() +def key_array_to_json(keys, value): + """Take JSON accessor information as an array of keys and value + + Parameters + ---------- + keys : iterable + An array of keys to be created in the object. + value : string + Value assigned to the final key. + + Returns + ------- + json + JSON object for use in postgresql queries. + + Examples + -------- + >>> key_array_to_json(['x','y','z'], 1) + {'x': {'y': {'z': 1}} + """ + return {keys[0]: reduce(lambda x, y: {y: x}, keys[1:][::-1], value)} + + STRUCTURES = { StructureFamily.container: CatalogContainerAdapter, StructureFamily.array: CatalogArrayAdapter, diff --git a/tiled/catalog/core.py b/tiled/catalog/core.py index bdeba6e58..3cf202510 100644 --- a/tiled/catalog/core.py +++ b/tiled/catalog/core.py @@ -5,10 +5,10 @@ # This is the alembic revision ID of the database revision # required by this version of Tiled. -REQUIRED_REVISION = "0b033e7fbe30" +REQUIRED_REVISION = "3db11ff95b6c" # This is list of all valid revisions (from current to oldest). -ALL_REVISIONS = ["0b033e7fbe30", "83889e049ddc", "6825c778aa3c"] +ALL_REVISIONS = ["3db11ff95b6c", "0b033e7fbe30", "83889e049ddc", "6825c778aa3c"] async def initialize_database(engine): @@ -16,6 +16,9 @@ async def initialize_database(engine): from . import orm # noqa: F401 async with engine.connect() as connection: + # Install extensions + if engine.dialect.name == "postgresql": + await connection.execute(text("create extension btree_gin;")) # Create all tables. await connection.run_sync(Base.metadata.create_all) if engine.dialect.name == "sqlite": diff --git a/tiled/catalog/migrations/versions/3db11ff95b6c_changing_top_level_metadata_to_btree_gin.py b/tiled/catalog/migrations/versions/3db11ff95b6c_changing_top_level_metadata_to_btree_gin.py new file mode 100644 index 000000000..9a8297e23 --- /dev/null +++ b/tiled/catalog/migrations/versions/3db11ff95b6c_changing_top_level_metadata_to_btree_gin.py @@ -0,0 +1,35 @@ +"""Changing top_level_metadata to btree_gin + +Revision ID: 3db11ff95b6c +Revises: 0b033e7fbe30 +Create Date: 2023-11-01 15:16:48.554420 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "3db11ff95b6c" +down_revision = "0b033e7fbe30" +branch_labels = None +depends_on = None + + +def upgrade(): + connection = op.get_bind() + if connection.engine.dialect.name == "postgresql": + with op.get_context().autocommit_block(): + op.execute(sa.text("create extension IF NOT EXISTS btree_gin;")) + op.drop_index("top_level_metadata", table_name="nodes") + op.create_index( + "top_level_metadata", + "nodes", + ["ancestors", "time_created", "id", "metadata"], + postgresql_using="gin", + ) + + +def downgrade(): + # This _could_ be implemented but we will wait for a need since we are + # still in alpha releases. + raise NotImplementedError diff --git a/tiled/catalog/orm.py b/tiled/catalog/orm.py index bb7aeb116..6822dedc7 100644 --- a/tiled/catalog/orm.py +++ b/tiled/catalog/orm.py @@ -90,7 +90,7 @@ class Node(Timestamped, Base): "time_created", "id", "metadata", - postgresql_using="btree", + postgresql_using="gin", ), # This is used by ORDER BY with the default sorting. # Index("ancestors_time_created", "ancestors", "time_created"),