From e7bff7b4f0f7f3e1c33941b7cad03d9e04b951b3 Mon Sep 17 00:00:00 2001 From: Steven Bal Date: Mon, 3 Mar 2025 10:02:51 +0100 Subject: [PATCH 1/3] :heavy_plus_sign: [#538] Add django-silk for performance profiling --- requirements/dev.in | 2 ++ requirements/dev.txt | 11 +++++++++++ src/objects/conf/dev.py | 9 +++++++++ src/objects/urls.py | 4 ++++ 4 files changed, 26 insertions(+) diff --git a/requirements/dev.in b/requirements/dev.in index 2eb0f09b..15c253b5 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -8,6 +8,8 @@ bump-my-version # Debug tooling django-debug-toolbar django-extensions +django-silk +whitenoise # Documentation sphinx diff --git a/requirements/dev.txt b/requirements/dev.txt index 5cddfd21..c52e63a6 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -42,6 +42,8 @@ attrs==20.3.0 # -r requirements/ci.txt # glom # jsonschema +autopep8==2.3.2 + # via django-silk babel==2.16.0 # via # -c requirements/ci.txt @@ -206,6 +208,7 @@ django==4.2.19 # django-sendfile2 # django-sessionprofile # django-setup-configuration + # django-silk # django-simple-certmanager # django-solo # django-two-factor-auth @@ -326,6 +329,8 @@ django-setup-configuration==0.7.1 # -c requirements/ci.txt # -r requirements/ci.txt # open-api-framework +django-silk==5.3.2 + # via -r requirements/dev.in django-simple-certmanager==1.4.1 # via # -c requirements/ci.txt @@ -448,6 +453,8 @@ glom==23.5.0 # -c requirements/ci.txt # -r requirements/ci.txt # mozilla-django-oidc-db +gprof2dot==2024.6.6 + # via django-silk h11==0.14.0 # via httpcore httpcore==1.0.7 @@ -637,6 +644,7 @@ pycodestyle==2.12.1 # via # -c requirements/ci.txt # -r requirements/ci.txt + # autopep8 # flake8 pycparser==2.20 # via @@ -881,6 +889,7 @@ sqlparse==0.5.0 # -r requirements/ci.txt # django # django-debug-toolbar + # django-silk tblib==1.7.0 # via # -c requirements/ci.txt @@ -975,6 +984,8 @@ webtest==2.0.35 # django-webtest wheel==0.42.0 # via pip-tools +whitenoise==6.9.0 + # via -r requirements/dev.in wrapt==1.14.1 # via # -c requirements/ci.txt diff --git a/src/objects/conf/dev.py b/src/objects/conf/dev.py index 0c96e2f7..9f2b0adf 100644 --- a/src/objects/conf/dev.py +++ b/src/objects/conf/dev.py @@ -102,6 +102,15 @@ r"django\.db\.models\.fields", ) +# +# DJANGO-SILK +# +if config("PROFILE", default=False, add_to_docs=False): + INSTALLED_APPS += ["silk"] + MIDDLEWARE = ["silk.middleware.SilkyMiddleware"] + MIDDLEWARE + security_index = MIDDLEWARE.index("django.middleware.security.SecurityMiddleware") + MIDDLEWARE.insert(security_index + 1, "whitenoise.middleware.WhiteNoiseMiddleware") + if "test" in sys.argv: NOTIFICATIONS_DISABLED = True diff --git a/src/objects/urls.py b/src/objects/urls.py index 7350aba6..20c80b78 100644 --- a/src/objects/urls.py +++ b/src/objects/urls.py @@ -79,3 +79,7 @@ urlpatterns = [ path("__debug__/", include(debug_toolbar.urls)), ] + urlpatterns + + +if apps.is_installed("silk"): + urlpatterns += [path(r"silk/", include("silk.urls", namespace="silk"))] From 9c8d3dba36d68ea9d85d22230b342f59bf66018d Mon Sep 17 00:00:00 2001 From: Steven Bal Date: Tue, 4 Mar 2025 12:01:24 +0100 Subject: [PATCH 2/3] :construction_worker: [#538] Add performance test for objects API list --- .github/workflows/ci.yml | 22 ++++++++++++++++++++ performance_test/conftest.py | 13 ++++++++++++ performance_test/create_data.py | 17 ++++++++++++++++ performance_test/test_objects_list.py | 29 +++++++++++++++++++++++++++ requirements/ci.txt | 6 ++++++ requirements/dev.txt | 10 +++++++++ requirements/test-tools.in | 1 + 7 files changed, 98 insertions(+) create mode 100644 performance_test/conftest.py create mode 100644 performance_test/create_data.py create mode 100644 performance_test/test_objects_list.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55eb4174..ae2c2473 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,6 +49,28 @@ jobs: - name: Publish coverage report uses: codecov/codecov-action@v4 + performance-tests: + name: Run the performance test suite + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Bring up docker compose and load data + run: | + docker compose up -d --build || ( docker compose logs >&2 && exit 1; ) + until docker compose logs web | grep -q "spawned uWSGI worker"; do + echo "uWSGI not running yet, waiting..." + sleep 3 + done + docker compose exec --user root web pip install factory-boy + cat performance_test/create_data.py | docker compose exec -T web src/manage.py shell + + - name: Run tests + run: | + pip install -r requirements/ci.txt + pytest performance_test/ --benchmark-json output.json + docs: runs-on: ubuntu-latest name: Documentation build diff --git a/performance_test/conftest.py b/performance_test/conftest.py new file mode 100644 index 00000000..02cff041 --- /dev/null +++ b/performance_test/conftest.py @@ -0,0 +1,13 @@ +import pytest + + +@pytest.fixture +def benchmark_assertions(benchmark): + def wrapper(**kwargs): + stats = benchmark.stats["stats"] + for name, value in kwargs.items(): + assert ( + getattr(stats, name) < value + ), f"{name} {getattr(stats, name)}s exceeded {value}s" + + return wrapper diff --git a/performance_test/create_data.py b/performance_test/create_data.py new file mode 100644 index 00000000..b58d79a7 --- /dev/null +++ b/performance_test/create_data.py @@ -0,0 +1,17 @@ +from objects.core.tests.factories import ObjectRecordFactory, ObjectTypeFactory +from objects.token.tests.factories import TokenAuthFactory + +object_type = ObjectTypeFactory.create( + service__api_root="http://localhost:8001/api/v2/", + uuid="f1220670-8ab7-44f1-a318-bd0782e97662", +) + +token = TokenAuthFactory(token="secret", is_superuser=True) + +ObjectRecordFactory.create_batch( + 5000, + object__object_type=object_type, + start_at="2020-01-01", + version=1, + data={"kiemjaar": "1234"}, +) diff --git a/performance_test/test_objects_list.py b/performance_test/test_objects_list.py new file mode 100644 index 00000000..8cfc0676 --- /dev/null +++ b/performance_test/test_objects_list.py @@ -0,0 +1,29 @@ +import pytest +import requests +from furl import furl + +BASE_URL = furl("http://localhost:8000/api/v2/") +AUTH_HEADERS = {"Authorization": "Token secret"} + + +@pytest.mark.benchmark(max_time=60, min_rounds=5) +def test_objects_api_list(benchmark, benchmark_assertions): + """ + Regression test for maykinmedia/objects-api#538 + """ + params = { + "pageSize": 1000, + "type": "http://localhost:8001/api/v2/objecttypes/f1220670-8ab7-44f1-a318-bd0782e97662", + "data_attrs": "kiemjaar__exact__1234", + "ordering": "-record__data__contactmoment__datumContact", + } + + def make_request(): + return requests.get((BASE_URL / "objects").set(params), headers=AUTH_HEADERS) + + result = benchmark(make_request) + + assert result.status_code == 200 + assert result.json()["count"] == 5000 + + benchmark_assertions(mean=1, max=1) diff --git a/requirements/ci.txt b/requirements/ci.txt index ddbb5d65..9a98178b 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -534,6 +534,8 @@ psycopg2==2.9.9 # -c requirements/base.txt # -r requirements/base.txt # open-api-framework +py-cpuinfo==9.0.0 + # via pytest-benchmark pycodestyle==2.12.1 # via flake8 pycparser==2.20 @@ -584,6 +586,10 @@ pyrsistent==0.17.3 # -r requirements/base.txt # jsonschema pytest==8.3.3 + # via + # -r requirements/test-tools.in + # pytest-benchmark +pytest-benchmark==5.1.0 # via -r requirements/test-tools.in python-dateutil==2.9.0.post0 # via diff --git a/requirements/dev.txt b/requirements/dev.txt index c52e63a6..9fa75c9c 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -640,6 +640,11 @@ psycopg2==2.9.9 # -c requirements/ci.txt # -r requirements/ci.txt # open-api-framework +py-cpuinfo==9.0.0 + # via + # -c requirements/ci.txt + # -r requirements/ci.txt + # pytest-benchmark pycodestyle==2.12.1 # via # -c requirements/ci.txt @@ -711,6 +716,11 @@ pytest==8.3.3 # via # -c requirements/ci.txt # -r requirements/ci.txt + # pytest-benchmark +pytest-benchmark==5.1.0 + # via + # -c requirements/ci.txt + # -r requirements/ci.txt python-dateutil==2.9.0.post0 # via # -c requirements/ci.txt diff --git a/requirements/test-tools.in b/requirements/test-tools.in index 09d0e9cf..120cdd00 100644 --- a/requirements/test-tools.in +++ b/requirements/test-tools.in @@ -4,6 +4,7 @@ # Dependencies only relevant for (unit) testing codecov pytest +pytest-benchmark coverage < 5.0 django-webtest factory-boy From 39b01fab943491aa34ed0d544570e1fb5c76386d Mon Sep 17 00:00:00 2001 From: Steven Bal Date: Tue, 4 Mar 2025 13:28:58 +0100 Subject: [PATCH 3/3] :zap: [#538] Optimize objects list performance the filter to only include the objectrecords with the highest index per object was causing major performance degradations, especially in combination with filters on data_attrs. Instead of using `Max(...)` together with GROUP BY, we now use Window to figure out the max index per object which is more efficient for larger datasets --- src/objects/core/query.py | 22 +++++++++----- src/objects/tests/v2/test_object_api.py | 38 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/src/objects/core/query.py b/src/objects/core/query.py index bd1116fe..c6ee0d58 100644 --- a/src/objects/core/query.py +++ b/src/objects/core/query.py @@ -1,4 +1,6 @@ from django.db import models +from django.db.models import F, Window +from django.db.models.functions import RowNumber from vng_api_common.utils import get_uuid_from_path from zgw_consumers.models import Service @@ -43,14 +45,20 @@ def keep_max_record_per_object(self): """ Return records with the largest index for the object """ - filtered_records = self.order_by() - grouped_records = ( - filtered_records.filter(object=models.OuterRef("object")) - .values("object") - .annotate(max_index=models.Max("index")) - .values("max_index") + filtered_records = ( + self.filter(object=models.OuterRef("object")) + .annotate( + row_number=Window( + expression=RowNumber(), + partition_by=[F("object")], + order_by=F("index").desc(), + ) + ) + .filter(row_number=1) + .values("index") ) - return self.filter(index=models.Subquery(grouped_records)) + + return self.filter(index__in=filtered_records) def filter_for_date(self, date): """ diff --git a/src/objects/tests/v2/test_object_api.py b/src/objects/tests/v2/test_object_api.py index ba572946..7c28f1eb 100644 --- a/src/objects/tests/v2/test_object_api.py +++ b/src/objects/tests/v2/test_object_api.py @@ -594,6 +594,44 @@ def test_list_available_today(self): self.assertEqual(object_data["uuid"], str(self.object.uuid)) self.assertEqual(object_data["record"]["data"], {"name": "new"}) + @freeze_time("2024-08-31") + def test_only_show_latest_index(self): + """ + In the list endpoint, only the latest record that existed at the given date + should show up + """ + object_url = reverse("object-detail", kwargs={"uuid": self.object.uuid}) + object2 = ObjectFactory.create(object_type=self.object_type) + object2_url = reverse("object-detail", kwargs={"uuid": object2.uuid}) + ObjectRecordFactory.create( + object=object2, + index=1, + data={"name": "old"}, + start_at="2024-08-01", + end_at="2024-08-28", + registration_at="2024-08-02", + ) + ObjectRecordFactory.create( + object=object2, + index=2, + data={"name": "new"}, + start_at="2024-08-28", + end_at="2024-09-30", + registration_at="2024-08-02", + ) + + response = self.client.get(self.url, {"date": "2024-08-30"}) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + + data = response.json() + + self.assertEqual(data["count"], 2) + self.assertEqual(data["results"][0]["record"]["index"], 2) + self.assertEqual(data["results"][0]["url"], f"http://testserver{object2_url}") + self.assertEqual(data["results"][1]["record"]["index"], 1) + self.assertEqual(data["results"][1]["url"], f"http://testserver{object_url}") + def test_list_available_for_date(self): with self.subTest("filter on old name"): response = self.client.get(