Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Benchmark Tests

This folder contains `pytest-benchmark` performance tests for hot paths.

Run all benchmark tests:

```bash
uv run pytest benchmarks --benchmark-only --no-cov
```

Run only membership bulk insert benchmark:

```bash
uv run pytest benchmarks/test_add_memberships_from_records.py --benchmark-only --no-cov
```

Run the `300k` case only:

```bash
uv run pytest benchmarks/test_add_memberships_from_records.py -k xlarge_300k --benchmark-only --no-cov
```
99 changes: 99 additions & 0 deletions benchmarks/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Shared fixtures for benchmark tests."""

from __future__ import annotations

import uuid
from collections.abc import Generator

import pytest

from plexosdb import PlexosDB


@pytest.fixture(scope="function")
def db_instance_with_schema() -> Generator[PlexosDB, None, None]:
"""Create a minimal schema-backed database for benchmark runs."""
db = PlexosDB()
db.create_schema()
with db._db.transaction():
db._db.execute(
"INSERT INTO t_class(class_id, name, description) VALUES (1, 'System', 'System class')"
)
db._db.execute(
"INSERT INTO t_class(class_id, name, description) VALUES (2, 'Generator', 'Generator class')"
)
db._db.execute("INSERT INTO t_class(class_id, name, description) VALUES (3, 'Node', 'Node class')")
db._db.execute(
"INSERT INTO t_class(class_id, name, description) VALUES (4, 'Scenario', 'Scenario class')"
)
db._db.execute(
"INSERT INTO t_class(class_id, name, description) VALUES (5, 'DataFile', 'DataFile class')"
)
db._db.execute(
"INSERT INTO t_class(class_id, name, description) VALUES (6, 'Storage', 'Storage class')"
)
db._db.execute(
"INSERT INTO t_class(class_id, name, description) VALUES (7, 'Report', 'Report class')"
)
db._db.execute("INSERT INTO t_class(class_id, name, description) VALUES (8, 'Model', 'Model class')")
db._db.execute(
"INSERT INTO t_object(object_id, name, class_id, GUID) VALUES (1, 'System', 1, ?)",
(str(uuid.uuid4()),),
)
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (1, 1, 2, 'Generators')"
)
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (2, 1, 3, 'Nodes')"
)
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (3, 2, 3, 'Nodes')"
)
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (4, 1, 4, 'Scenarios')"
)
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (5, 1, 6, 'Storages')"
)
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (6, 1, 8, 'Models')"
)
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (7, 8, 7, 'Models')"
)
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (8, 1, 7, 'Reports')"
)
db._db.execute("INSERT INTO t_unit(unit_id, value) VALUES (1,'MW')")
db._db.execute("INSERT INTO t_unit(unit_id, value) VALUES (2,'MWh')")
db._db.execute("INSERT INTO t_unit(unit_id, value) VALUES (3,'%')")
db._db.execute(
"INSERT INTO t_collection(collection_id, parent_class_id, child_class_id, name) "
"VALUES (?, ?, ?, ?)",
(9, 8, 4, "Scenarios"),
)
db._db.execute(
"INSERT INTO t_property(property_id, collection_id, unit_id, name) VALUES (1,1,1, 'Max Capacity')"
)
db._db.execute(
"INSERT INTO t_property(property_id, collection_id, unit_id, name) VALUES (2,1,2, 'Max Energy')"
)
db._db.execute(
"INSERT INTO t_property(property_id, collection_id, unit_id, name) "
"VALUES (3,1,1, 'Rating Factor')"
)
db._db.execute("INSERT INTO t_config(element, value) VALUES ('Version', '9.2')")
db._db.execute("INSERT INTO t_attribute(attribute_id, class_id, name) VALUES( 1, 2, 'Latitude')")
db._db.execute(
"INSERT INTO t_property_report(property_id, collection_id, name) VALUES (1, 1, 'Units')"
)
yield db
db._db.close()
97 changes: 97 additions & 0 deletions benchmarks/test_add_memberships_from_records.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Benchmark coverage for bulk membership insertion."""

from __future__ import annotations

import uuid

import pytest

from plexosdb import ClassEnum, CollectionEnum, PlexosDB


def _insert_objects(
db: PlexosDB,
*,
class_id: int,
count: int,
prefix: str,
start_id: int,
) -> list[int]:
object_ids = [start_id + idx for idx in range(count)]
params = [
(object_id, f"{prefix}_{idx}", class_id, str(uuid.uuid4()))
for idx, object_id in enumerate(object_ids)
]
db._db.executemany("INSERT INTO t_object(object_id, name, class_id, GUID) VALUES (?, ?, ?, ?)", params)
return object_ids


@pytest.mark.benchmark
@pytest.mark.parametrize(
("record_count", "chunksize", "rounds"),
[
pytest.param(100, 100, 10, id="small"),
pytest.param(1_000, 1_000, 10, id="medium"),
pytest.param(10_000, 10_000, 10, id="large"),
pytest.param(300_000, 10_000, 2, id="xlarge_300k"),
],
)
def test_add_memberships_from_records_benchmark(
benchmark,
db_instance_with_schema: PlexosDB,
record_count: int,
chunksize: int,
rounds: int,
) -> None:
"""Benchmark `add_memberships_from_records` across different payload sizes."""
db = db_instance_with_schema
parent_class_id = db.get_class_id(ClassEnum.Generator)
child_class_id = db.get_class_id(ClassEnum.Node)
collection_id = db.get_collection_id(
CollectionEnum.Nodes,
parent_class_enum=ClassEnum.Generator,
child_class_enum=ClassEnum.Node,
)
parent_ids = _insert_objects(
db,
class_id=parent_class_id,
count=1,
prefix=f"benchmark_parent_{record_count}",
start_id=10_000,
)
child_ids = _insert_objects(
db,
class_id=child_class_id,
count=record_count,
prefix=f"benchmark_child_{record_count}",
start_id=50_000,
)
records = [
{
"parent_class_id": parent_class_id,
"parent_object_id": parent_ids[0],
"collection_id": collection_id,
"child_class_id": child_class_id,
"child_object_id": child_id,
}
for child_id in child_ids
]

def _reset_memberships() -> None:
db._db.execute(
(
"DELETE FROM t_membership "
"WHERE collection_id = ? AND parent_class_id = ? AND child_class_id = ?"
),
(collection_id, parent_class_id, child_class_id),
)

result = benchmark.pedantic(
db.add_memberships_from_records,
args=(records,),
kwargs={"chunksize": chunksize},
setup=_reset_memberships,
rounds=rounds,
iterations=1,
)
assert result is True
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ dev = [
"mypy>=1.15.0",
"pre-commit>=4.2.0",
"pytest>=8.3.5",
"pytest-benchmark>=5.1.0",
"pytest-coverage>=0.0",
"ruff>=0.11.5",
]
Expand Down Expand Up @@ -157,6 +158,7 @@ markers = [
"checks: Functions that check existence of database entities",
"getters: Functions that get data",
"adders: Functions that add data",
"benchmark: Performance benchmark tests",
"empty_database: Functions for test empty database",
"export: Functions that export the database.",
"listing: Functions that list elements of the database.",
Expand Down
52 changes: 44 additions & 8 deletions src/plexosdb/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

from loguru import logger

from .checks import check_memberships_from_records
from .db_manager import SQLiteManager
from .enums import (
ClassEnum,
Expand All @@ -31,6 +30,7 @@
)
from .utils import (
apply_scenario_tags,
batched,
create_membership_record,
get_system_object_name,
insert_property_texts,
Expand Down Expand Up @@ -585,18 +585,54 @@ def add_memberships_from_records(
>>> db.add_memberships_from_records(records)
True
"""
if not check_memberships_from_records(records):
msg = "Some of the records do not have all the required fields. "
msg += "Check construction of records."
raise KeyError(msg)
if not records:
logger.debug("No membership records provided")
return True

if chunksize < 1:
msg = f"chunksize must be >= 1, received {chunksize}"
raise ValueError(msg)

query = f"""
INSERT INTO {Schema.Memberships.name}
(parent_class_id,parent_object_id, collection_id, child_class_id, child_object_id)
VALUES
(:parent_class_id, :parent_object_id, :collection_id, :child_class_id, :child_object_id)
(?, ?, ?, ?, ?)
"""
query_status = self._db.executemany(query, records)
assert query_status
error_msg = "Some of the records do not have all the required fields. Check construction of records."

def prepare_batch(
batch_records: Sequence[dict[str, int]],
) -> list[tuple[int, int, int, int, int]]:
"""Validate records and map each membership dict to positional SQL parameters."""
params: list[tuple[int, int, int, int, int]] = []
for record in batch_records:
# Keep strict validation semantics: exact keys, no missing or extra fields.
if len(record) != 5:
raise KeyError(error_msg)
try:
params.append(
(
record["parent_class_id"],
record["parent_object_id"],
record["collection_id"],
record["child_class_id"],
record["child_object_id"],
)
)
except KeyError as exc:
raise KeyError(error_msg) from exc
return params

with self._db.transaction():
if chunksize >= len(records):
query_status = self._db.executemany(query, prepare_batch(records))
assert query_status
else:
for batch in batched(records, chunksize):
query_status = self._db.executemany(query, prepare_batch(batch))
assert query_status

logger.debug("Added {} memberships.", len(records))
return True

Expand Down
31 changes: 31 additions & 0 deletions tests/test_checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import annotations

from plexosdb.checks import check_memberships_from_records


def test_check_memberships_from_records_valid_payload() -> None:
records = [
{
"parent_class_id": 1,
"parent_object_id": 2,
"collection_id": 3,
"child_class_id": 4,
"child_object_id": 5,
}
]

assert check_memberships_from_records(records) is True


def test_check_memberships_from_records_invalid_payload() -> None:
records = [
{
"parent_class_id": 1,
"parent_object_id": 2,
"collection_id": 3,
"child_class_id": 4,
"bad_key": 5,
}
]

assert check_memberships_from_records(records) is False
Loading