From f5efa2866132d90546d262ce9f4f3b9505ea1289 Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 5 Mar 2026 13:23:39 -0700 Subject: [PATCH 1/4] perf: Adding performance checks --- benchmarks/README.md | 15 +++ benchmarks/conftest.py | 99 +++++++++++++++++++ .../test_add_memberships_from_records.py | 95 ++++++++++++++++++ pyproject.toml | 2 + uv.lock | 24 +++++ 5 files changed, 235 insertions(+) create mode 100644 benchmarks/README.md create mode 100644 benchmarks/conftest.py create mode 100644 benchmarks/test_add_memberships_from_records.py diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..1471983 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,15 @@ +# Benchmark Tests + +This folder contains `pytest-benchmark` performance tests for hot paths. + +Run all benchmark tests: + +```bash +uv run pytest benchmarks --benchmark-only --no-cov +``` + +Run only membership bulk insert benchmark: + +```bash +uv run pytest benchmarks/test_add_memberships_from_records.py --benchmark-only --no-cov +``` diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py new file mode 100644 index 0000000..c5ea466 --- /dev/null +++ b/benchmarks/conftest.py @@ -0,0 +1,99 @@ +"""Shared fixtures for benchmark tests.""" + +from __future__ import annotations + +import uuid +from collections.abc import Generator + +import pytest + +from plexosdb import PlexosDB + + +@pytest.fixture(scope="function") +def db_instance_with_schema() -> Generator[PlexosDB, None, None]: + """Create a minimal schema-backed database for benchmark runs.""" + db = PlexosDB() + db.create_schema() + with db._db.transaction(): + db._db.execute( + "INSERT INTO t_class(class_id, name, description) VALUES (1, 'System', 'System class')" + ) + db._db.execute( + "INSERT INTO t_class(class_id, name, description) VALUES (2, 'Generator', 'Generator class')" + ) + db._db.execute("INSERT INTO t_class(class_id, name, description) VALUES (3, 'Node', 'Node class')") + db._db.execute( + "INSERT INTO t_class(class_id, name, description) VALUES (4, 'Scenario', 'Scenario class')" + ) + db._db.execute( + "INSERT INTO t_class(class_id, name, description) VALUES (5, 'DataFile', 'DataFile class')" + ) + db._db.execute( + "INSERT INTO t_class(class_id, name, description) VALUES (6, 'Storage', 'Storage class')" + ) + db._db.execute( + "INSERT INTO t_class(class_id, name, description) VALUES (7, 'Report', 'Report class')" + ) + db._db.execute("INSERT INTO t_class(class_id, name, description) VALUES (8, 'Model', 'Model class')") + db._db.execute( + "INSERT INTO t_object(object_id, name, class_id, GUID) VALUES (1, 'System', 1, ?)", + (str(uuid.uuid4()),), + ) + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (1, 1, 2, 'Generators')" + ) + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (2, 1, 3, 'Nodes')" + ) + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (3, 2, 3, 'Nodes')" + ) + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (4, 1, 4, 'Scenarios')" + ) + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (5, 1, 6, 'Storages')" + ) + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (6, 1, 8, 'Models')" + ) + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (7, 8, 7, 'Models')" + ) + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (8, 1, 7, 'Reports')" + ) + db._db.execute("INSERT INTO t_unit(unit_id, value) VALUES (1,'MW')") + db._db.execute("INSERT INTO t_unit(unit_id, value) VALUES (2,'MWh')") + db._db.execute("INSERT INTO t_unit(unit_id, value) VALUES (3,'%')") + db._db.execute( + "INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) " + "VALUES (?, ?, ?, ?)", + (9, 8, 4, "Scenarios"), + ) + db._db.execute( + "INSERT INTO t_property(property_id, collection_id, unit_id, name) VALUES (1,1,1, 'Max Capacity')" + ) + db._db.execute( + "INSERT INTO t_property(property_id, collection_id, unit_id, name) VALUES (2,1,2, 'Max Energy')" + ) + db._db.execute( + "INSERT INTO t_property(property_id, collection_id, unit_id, name) " + "VALUES (3,1,1, 'Rating Factor')" + ) + db._db.execute("INSERT INTO t_config(element, value) VALUES ('Version', '9.2')") + db._db.execute("INSERT INTO t_attribute(attribute_id, class_id, name) VALUES( 1, 2, 'Latitude')") + db._db.execute( + "INSERT INTO t_property_report(property_id, collection_id, name) VALUES (1, 1, 'Units')" + ) + yield db + db._db.close() diff --git a/benchmarks/test_add_memberships_from_records.py b/benchmarks/test_add_memberships_from_records.py new file mode 100644 index 0000000..ff98e0f --- /dev/null +++ b/benchmarks/test_add_memberships_from_records.py @@ -0,0 +1,95 @@ +"""Benchmark coverage for bulk membership insertion.""" + +from __future__ import annotations + +import uuid + +import pytest + +from plexosdb import ClassEnum, CollectionEnum, PlexosDB + + +def _insert_objects( + db: PlexosDB, + *, + class_id: int, + count: int, + prefix: str, + start_id: int, +) -> list[int]: + object_ids = [start_id + idx for idx in range(count)] + params = [ + (object_id, f"{prefix}_{idx}", class_id, str(uuid.uuid4())) + for idx, object_id in enumerate(object_ids) + ] + db._db.executemany("INSERT INTO t_object(object_id, name, class_id, GUID) VALUES (?, ?, ?, ?)", params) + return object_ids + + +@pytest.mark.benchmark +@pytest.mark.parametrize( + ("record_count", "chunksize"), + [ + pytest.param(100, 100, id="small"), + pytest.param(1_000, 1_000, id="medium"), + pytest.param(10_000, 10_000, id="large"), + ], +) +def test_add_memberships_from_records_benchmark( + benchmark, + db_instance_with_schema: PlexosDB, + record_count: int, + chunksize: int, +) -> None: + """Benchmark `add_memberships_from_records` across different payload sizes.""" + db = db_instance_with_schema + parent_class_id = db.get_class_id(ClassEnum.Generator) + child_class_id = db.get_class_id(ClassEnum.Node) + collection_id = db.get_collection_id( + CollectionEnum.Nodes, + parent_class_enum=ClassEnum.Generator, + child_class_enum=ClassEnum.Node, + ) + parent_ids = _insert_objects( + db, + class_id=parent_class_id, + count=record_count, + prefix=f"benchmark_parent_{record_count}", + start_id=10_000, + ) + child_ids = _insert_objects( + db, + class_id=child_class_id, + count=record_count, + prefix=f"benchmark_child_{record_count}", + start_id=50_000, + ) + records = [ + { + "parent_class_id": parent_class_id, + "parent_object_id": parent_id, + "collection_id": collection_id, + "child_class_id": child_class_id, + "child_object_id": child_id, + } + for parent_id, child_id in zip(parent_ids, child_ids) + ] + + def _reset_memberships() -> None: + db._db.execute( + ( + "DELETE FROM t_membership " + "WHERE collection_id = ? AND parent_class_id = ? AND child_class_id = ?" + ), + (collection_id, parent_class_id, child_class_id), + ) + + result = benchmark.pedantic( + db.add_memberships_from_records, + args=(records,), + kwargs={"chunksize": chunksize}, + setup=_reset_memberships, + rounds=10, + iterations=1, + ) + assert result is True diff --git a/pyproject.toml b/pyproject.toml index c244d1d..eda50e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ dev = [ "mypy>=1.15.0", "pre-commit>=4.2.0", "pytest>=8.3.5", + "pytest-benchmark>=5.1.0", "pytest-coverage>=0.0", "ruff>=0.11.5", ] @@ -157,6 +158,7 @@ markers = [ "checks: Functions that check existence of database entities", "getters: Functions that get data", "adders: Functions that add data", + "benchmark: Performance benchmark tests", "empty_database: Functions for test empty database", "export: Functions that export the database.", "listing: Functions that list elements of the database.", diff --git a/uv.lock b/uv.lock index e7b7044..01bcc3c 100644 --- a/uv.lock +++ b/uv.lock @@ -718,6 +718,7 @@ dev = [ { name = "mypy" }, { name = "pre-commit" }, { name = "pytest" }, + { name = "pytest-benchmark" }, { name = "pytest-coverage" }, { name = "ruff" }, ] @@ -744,6 +745,7 @@ dev = [ { name = "mypy", specifier = ">=1.15.0" }, { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=8.3.5" }, + { name = "pytest-benchmark", specifier = ">=5.1.0" }, { name = "pytest-coverage", specifier = ">=0.0" }, { name = "ruff", specifier = ">=0.11.5" }, ] @@ -816,6 +818,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" }, ] +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" }, +] + [[package]] name = "pydata-sphinx-theme" version = "0.15.4" @@ -860,6 +871,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] +[[package]] +name = "pytest-benchmark" +version = "5.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "py-cpuinfo" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/34/9f732b76456d64faffbef6232f1f9dbec7a7c4999ff46282fa418bd1af66/pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779", size = 341340, upload-time = "2025-11-09T18:48:43.215Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/29/e756e715a48959f1c0045342088d7ca9762a2f509b945f362a316e9412b7/pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803", size = 45255, upload-time = "2025-11-09T18:48:39.765Z" }, +] + [[package]] name = "pytest-cov" version = "6.2.1" From 96fc22cccf90cba60a812d6d9982e008ca50f77d Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 5 Mar 2026 13:34:28 -0700 Subject: [PATCH 2/4] fix: Adding correct bulk insert for memberships --- benchmarks/README.md | 6 ++ .../test_add_memberships_from_records.py | 18 +++--- src/plexosdb/db.py | 51 ++++++++++++--- tests/test_plexosdb_from_records.py | 62 +++++++++++++++++++ 4 files changed, 121 insertions(+), 16 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 1471983..f549276 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -13,3 +13,9 @@ Run only membership bulk insert benchmark: ```bash uv run pytest benchmarks/test_add_memberships_from_records.py --benchmark-only --no-cov ``` + +Run the `300k` case only: + +```bash +uv run pytest benchmarks/test_add_memberships_from_records.py -k xlarge_300k --benchmark-only --no-cov +``` diff --git a/benchmarks/test_add_memberships_from_records.py b/benchmarks/test_add_memberships_from_records.py index ff98e0f..639af87 100644 --- a/benchmarks/test_add_memberships_from_records.py +++ b/benchmarks/test_add_memberships_from_records.py @@ -28,11 +28,12 @@ def _insert_objects( @pytest.mark.benchmark @pytest.mark.parametrize( - ("record_count", "chunksize"), + ("record_count", "chunksize", "rounds"), [ - pytest.param(100, 100, id="small"), - pytest.param(1_000, 1_000, id="medium"), - pytest.param(10_000, 10_000, id="large"), + pytest.param(100, 100, 10, id="small"), + pytest.param(1_000, 1_000, 10, id="medium"), + pytest.param(10_000, 10_000, 10, id="large"), + pytest.param(300_000, 10_000, 2, id="xlarge_300k"), ], ) def test_add_memberships_from_records_benchmark( @@ -40,6 +41,7 @@ def test_add_memberships_from_records_benchmark( db_instance_with_schema: PlexosDB, record_count: int, chunksize: int, + rounds: int, ) -> None: """Benchmark `add_memberships_from_records` across different payload sizes.""" db = db_instance_with_schema @@ -53,7 +55,7 @@ def test_add_memberships_from_records_benchmark( parent_ids = _insert_objects( db, class_id=parent_class_id, - count=record_count, + count=1, prefix=f"benchmark_parent_{record_count}", start_id=10_000, ) @@ -67,12 +69,12 @@ def test_add_memberships_from_records_benchmark( records = [ { "parent_class_id": parent_class_id, - "parent_object_id": parent_id, + "parent_object_id": parent_ids[0], "collection_id": collection_id, "child_class_id": child_class_id, "child_object_id": child_id, } - for parent_id, child_id in zip(parent_ids, child_ids) + for child_id in child_ids ] def _reset_memberships() -> None: @@ -89,7 +91,7 @@ def _reset_memberships() -> None: args=(records,), kwargs={"chunksize": chunksize}, setup=_reset_memberships, - rounds=10, + rounds=rounds, iterations=1, ) assert result is True diff --git a/src/plexosdb/db.py b/src/plexosdb/db.py index 5ff7f6f..f3b6fe1 100644 --- a/src/plexosdb/db.py +++ b/src/plexosdb/db.py @@ -13,7 +13,6 @@ from loguru import logger -from .checks import check_memberships_from_records from .db_manager import SQLiteManager from .enums import ( ClassEnum, @@ -31,6 +30,7 @@ ) from .utils import ( apply_scenario_tags, + batched, create_membership_record, get_system_object_name, insert_property_texts, @@ -585,18 +585,53 @@ def add_memberships_from_records( >>> db.add_memberships_from_records(records) True """ - if not check_memberships_from_records(records): - msg = "Some of the records do not have all the required fields. " - msg += "Check construction of records." - raise KeyError(msg) + if not records: + logger.debug("No membership records provided") + return True + + if chunksize < 1: + msg = f"chunksize must be >= 1, received {chunksize}" + raise ValueError(msg) + query = f""" INSERT INTO {Schema.Memberships.name} (parent_class_id,parent_object_id, collection_id, child_class_id, child_object_id) VALUES - (:parent_class_id, :parent_object_id, :collection_id, :child_class_id, :child_object_id) + (?, ?, ?, ?, ?) """ - query_status = self._db.executemany(query, records) - assert query_status + error_msg = "Some of the records do not have all the required fields. Check construction of records." + + def prepare_batch( + batch_records: Sequence[dict[str, int]], + ) -> list[tuple[int, int, int, int, int]]: + params: list[tuple[int, int, int, int, int]] = [] + for record in batch_records: + # Keep strict validation semantics: exact keys, no missing or extra fields. + if len(record) != 5: + raise KeyError(error_msg) + try: + params.append( + ( + record["parent_class_id"], + record["parent_object_id"], + record["collection_id"], + record["child_class_id"], + record["child_object_id"], + ) + ) + except KeyError as exc: + raise KeyError(error_msg) from exc + return params + + with self._db.transaction(): + if chunksize >= len(records): + query_status = self._db.executemany(query, prepare_batch(records)) + assert query_status + else: + for batch in batched(records, chunksize): + query_status = self._db.executemany(query, prepare_batch(batch)) + assert query_status + logger.debug("Added {} memberships.", len(records)) return True diff --git a/tests/test_plexosdb_from_records.py b/tests/test_plexosdb_from_records.py index 3bd7a65..9ea0c6f 100644 --- a/tests/test_plexosdb_from_records.py +++ b/tests/test_plexosdb_from_records.py @@ -189,6 +189,68 @@ def test_bulk_insert_memberships_from_records(db_base: PlexosDB): _ = db.add_memberships_from_records(memberships) +def test_bulk_insert_memberships_from_records_respects_chunksize( + db_instance_with_schema: PlexosDB, + monkeypatch: pytest.MonkeyPatch, +): + from plexosdb import ClassEnum, CollectionEnum + + db = db_instance_with_schema + parent_names = [f"ChunkGen_{idx}" for idx in range(5)] + child_names = [f"ChunkNode_{idx}" for idx in range(5)] + + db.add_objects(ClassEnum.Generator, *parent_names) + db.add_objects(ClassEnum.Node, *child_names) + + parent_object_ids = db.get_objects_id(parent_names, class_enum=ClassEnum.Generator) + child_object_ids = db.get_objects_id(child_names, class_enum=ClassEnum.Node) + parent_class_id = db.get_class_id(ClassEnum.Generator) + child_class_id = db.get_class_id(ClassEnum.Node) + collection_id = db.get_collection_id( + CollectionEnum.Nodes, + parent_class_enum=ClassEnum.Generator, + child_class_enum=ClassEnum.Node, + ) + memberships = [ + { + "collection_id": collection_id, + "parent_class_id": parent_class_id, + "child_class_id": child_class_id, + "child_object_id": child_id, + "parent_object_id": parent_id, + } + for parent_id, child_id in zip(parent_object_ids, child_object_ids) + ] + + observed_batch_sizes: list[int] = [] + original_executemany = db._db.executemany + + def spy_executemany(query, params_seq): + observed_batch_sizes.append(len(params_seq)) + return original_executemany(query, params_seq) + + monkeypatch.setattr(db._db, "executemany", spy_executemany) + db.add_memberships_from_records(memberships, chunksize=2) + + assert observed_batch_sizes == [2, 2, 1] + + +def test_bulk_insert_memberships_from_records_rejects_non_positive_chunksize( + db_instance_with_schema: PlexosDB, +): + db = db_instance_with_schema + membership = { + "parent_class_id": 2, + "parent_object_id": 1, + "collection_id": 3, + "child_class_id": 3, + "child_object_id": 1, + } + + with pytest.raises(ValueError, match="chunksize must be >= 1"): + db.add_memberships_from_records([membership], chunksize=0) + + def test_add_properties_from_records_no_records(db_instance_with_schema: PlexosDB, caplog): """Gracefully handle empty payload.""" from plexosdb import ClassEnum, CollectionEnum From ed2d69926a4fc3d921b8680678cbb32dc10cc99c Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 5 Mar 2026 13:37:54 -0700 Subject: [PATCH 3/4] test: Adding more coverage --- tests/test_checks.py | 31 +++++++++++++++++++++++++++++ tests/test_plexosdb_from_records.py | 24 ++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 tests/test_checks.py diff --git a/tests/test_checks.py b/tests/test_checks.py new file mode 100644 index 0000000..db6240d --- /dev/null +++ b/tests/test_checks.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from plexosdb.checks import check_memberships_from_records + + +def test_check_memberships_from_records_valid_payload() -> None: + records = [ + { + "parent_class_id": 1, + "parent_object_id": 2, + "collection_id": 3, + "child_class_id": 4, + "child_object_id": 5, + } + ] + + assert check_memberships_from_records(records) is True + + +def test_check_memberships_from_records_invalid_payload() -> None: + records = [ + { + "parent_class_id": 1, + "parent_object_id": 2, + "collection_id": 3, + "child_class_id": 4, + "bad_key": 5, + } + ] + + assert check_memberships_from_records(records) is False diff --git a/tests/test_plexosdb_from_records.py b/tests/test_plexosdb_from_records.py index 9ea0c6f..225ded2 100644 --- a/tests/test_plexosdb_from_records.py +++ b/tests/test_plexosdb_from_records.py @@ -251,6 +251,30 @@ def test_bulk_insert_memberships_from_records_rejects_non_positive_chunksize( db.add_memberships_from_records([membership], chunksize=0) +def test_bulk_insert_memberships_from_records_accepts_empty_records( + db_instance_with_schema: PlexosDB, +): + db = db_instance_with_schema + + assert db.add_memberships_from_records([]) is True + + +def test_bulk_insert_memberships_from_records_invalid_keys_shape( + db_instance_with_schema: PlexosDB, +): + db = db_instance_with_schema + invalid_membership = { + "parent_class_id": 2, + "parent_object_id": 1, + "collection_id": 3, + "child_class_id": 3, + "bad_key": 1, + } + + with pytest.raises(KeyError, match="Some of the records do not have all the required fields"): + db.add_memberships_from_records([invalid_membership]) + + def test_add_properties_from_records_no_records(db_instance_with_schema: PlexosDB, caplog): """Gracefully handle empty payload.""" from plexosdb import ClassEnum, CollectionEnum From 643bf45e9850f1748460ba93f2d567a7df45ed0b Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 5 Mar 2026 15:11:08 -0700 Subject: [PATCH 4/4] fix: add docstring for membership batch helper --- src/plexosdb/db.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plexosdb/db.py b/src/plexosdb/db.py index f3b6fe1..9184809 100644 --- a/src/plexosdb/db.py +++ b/src/plexosdb/db.py @@ -604,6 +604,7 @@ def add_memberships_from_records( def prepare_batch( batch_records: Sequence[dict[str, int]], ) -> list[tuple[int, int, int, int, int]]: + """Validate records and map each membership dict to positional SQL parameters.""" params: list[tuple[int, int, int, int, int]] = [] for record in batch_records: # Keep strict validation semantics: exact keys, no missing or extra fields.