Skip to content

Commit

Permalink
Merge pull request #1035 from thunderstore-io/chunky
Browse files Browse the repository at this point in the history
Chunked package lists
  • Loading branch information
MythicManiac authored Sep 19, 2024
2 parents 39e4f1c + 01b309d commit 73385c9
Show file tree
Hide file tree
Showing 13 changed files with 686 additions and 14 deletions.
1 change: 1 addition & 0 deletions django/thunderstore/core/tests/test_celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_task():
"thunderstore.usermedia.tasks.celery_cleanup_expired_uploads",
"thunderstore.schema_import.tasks.sync_ecosystem_schema",
"thunderstore.repository.tasks.files.extract_package_version_file_tree",
"thunderstore.repository.tasks.update_chunked_package_caches",
"thunderstore.repository.tasks.update_experimental_package_index",
"thunderstore.repository.tasks.process_package_submission",
"thunderstore.repository.tasks.cleanup_package_submissions",
Expand Down
12 changes: 11 additions & 1 deletion django/thunderstore/repository/api/v1/tasks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from thunderstore.community.models import Community, CommunitySite
from thunderstore.core.utils import capture_exception
from thunderstore.repository.api.v1.viewsets import serialize_package_list_for_community
from thunderstore.repository.models.cache import APIV1PackageCache
from thunderstore.repository.models import APIV1ChunkedPackageCache, APIV1PackageCache


def update_api_v1_caches() -> None:
Expand Down Expand Up @@ -30,3 +30,13 @@ def update_api_v1_indexes() -> None:
except Exception as e: # pragma: no cover
capture_exception(e)
APIV1PackageCache.drop_stale_cache()


def update_api_v1_chunked_package_caches() -> None:
for community in Community.objects.iterator():
try:
APIV1ChunkedPackageCache.update_for_community(community)
except Exception as e: # pragma: no cover
capture_exception(e)

APIV1ChunkedPackageCache.drop_stale_cache()
21 changes: 20 additions & 1 deletion django/thunderstore/repository/api/v1/tests/test_api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
from thunderstore.core.factories import UserFactory
from thunderstore.repository.api.v1.tasks import update_api_v1_caches
from thunderstore.repository.api.v1.viewsets import PACKAGE_SERIALIZER
from thunderstore.repository.models.cache import APIV1PackageCache
from thunderstore.repository.models.cache import (
APIV1ChunkedPackageCache,
APIV1PackageCache,
)


@pytest.mark.django_db
Expand Down Expand Up @@ -240,3 +243,19 @@ def test_api_v1_package_listing_serializer_donation_link_omission(
assert result[0]["donation_link"] == donation_link
else:
assert "donation_link" not in result[0]


@pytest.mark.django_db
@pytest.mark.parametrize("has_cache", (False, True))
def test_api_v1_community_package_listing_index__depending_on_cache__returns_302_or_503(
api_client: APIClient,
community_site: CommunitySite,
has_cache: bool,
) -> None:
if has_cache:
APIV1ChunkedPackageCache.update_for_community(community_site.community)

url = f"/c/{community_site.community.identifier}/api/v1/package-listing-index/"
response = api_client.get(url)

assert response.status_code == (302 if has_cache else 503)
92 changes: 90 additions & 2 deletions django/thunderstore/repository/api/v1/tests/test_caches.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import gzip
import json
from datetime import timedelta
from random import shuffle
from typing import Any

import pytest
Expand All @@ -11,8 +13,12 @@
SiteFactory,
)
from thunderstore.community.models import Community, CommunitySite, PackageListing
from thunderstore.repository.api.v1.tasks import update_api_v1_caches
from thunderstore.repository.models import APIV1PackageCache
from thunderstore.repository.api.v1.tasks import (
update_api_v1_caches,
update_api_v1_chunked_package_caches,
)
from thunderstore.repository.models import APIV1ChunkedPackageCache, APIV1PackageCache
from thunderstore.repository.models.cache import get_package_listing_chunk


@pytest.mark.django_db
Expand Down Expand Up @@ -139,3 +145,85 @@ def test_api_v1_cache_building_package_url_simple(
with gzip.GzipFile(fileobj=cache.data, mode="r") as f:
result = json.loads(f.read())
assert result[0]["package_url"].startswith(expected_prefix)


@pytest.mark.django_db
def test_api_v1_chunked_package_cache__builds_index_and_chunks(
community: Community,
settings: Any,
) -> None:
PackageListingFactory(community_=community)
assert APIV1ChunkedPackageCache.get_latest_for_community(community) is None

update_api_v1_chunked_package_caches()
cache = APIV1ChunkedPackageCache.get_latest_for_community(community)
assert cache is not None
assert cache.index.data_url.startswith(settings.AWS_S3_ENDPOINT_URL)

index = APIV1ChunkedPackageCache.get_blob_content(cache.index)
assert isinstance(index, list)
assert len(index) == cache.chunks.entries.count()
assert index[0].startswith(settings.AWS_S3_ENDPOINT_URL)


@pytest.mark.django_db
def test_api_v1_chunked_package_cache__drops_stale_caches() -> None:
"""
Caches are currently only soft deleted.
"""
PackageListingFactory()
assert not APIV1ChunkedPackageCache.objects.exists()

update_api_v1_chunked_package_caches()
first_cache = APIV1ChunkedPackageCache.objects.get()
assert not first_cache.is_deleted

# Only one cache for the community exists, so it won't be dropped.
APIV1ChunkedPackageCache.drop_stale_cache()
assert not first_cache.is_deleted

# Two caches exists, but neither is beyond the cutoff period.
update_api_v1_chunked_package_caches()
APIV1ChunkedPackageCache.drop_stale_cache()
second_cache = APIV1ChunkedPackageCache.get_latest_for_community(
first_cache.community,
)
assert APIV1ChunkedPackageCache.objects.count() == 2
assert second_cache
assert second_cache.pk != first_cache.pk
assert not first_cache.is_deleted
assert not second_cache.is_deleted

# The older cache should be dropped after the cutoff period.
cutoff = timedelta(hours=APIV1ChunkedPackageCache.CACHE_CUTOFF_HOURS)
first_cache.created_at = first_cache.created_at - cutoff
first_cache.save()
APIV1ChunkedPackageCache.drop_stale_cache()
first_cache.refresh_from_db()
second_cache.refresh_from_db()
assert first_cache.is_deleted
assert not second_cache.is_deleted

# The latest cache should not be dropped even if older than the cutoff period.
second_cache.created_at = second_cache.created_at - cutoff
second_cache.save()
APIV1ChunkedPackageCache.drop_stale_cache()
first_cache.refresh_from_db()
second_cache.refresh_from_db()
assert first_cache.is_deleted
assert not second_cache.is_deleted


@pytest.mark.django_db
@pytest.mark.parametrize("count", (0, 1, 2, 3, 5, 8, 13))
def test_get_package_listing_chunk__retains_received_ordering(count: int) -> None:
assert not PackageListing.objects.exists()
for _ in range(count):
PackageListingFactory()

ordering = list(PackageListing.objects.all().values_list("id", flat=True))
shuffle(ordering)
listings = get_package_listing_chunk(ordering)

for i, listing in enumerate(listings):
assert listing.id == ordering[i]
6 changes: 6 additions & 0 deletions django/thunderstore/repository/api/v1/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from rest_framework import routers

from thunderstore.repository.api.v1.views.deprecate import DeprecateModApiView
from thunderstore.repository.api.v1.views.listing_index import PackageListingIndex
from thunderstore.repository.api.v1.views.metrics import (
PackageMetricsApiView,
PackageVersionMetricsApiView,
Expand All @@ -15,6 +16,11 @@

community_urls = [
path("", include(v1_router.urls)),
path(
"package-listing-index/",
PackageListingIndex.as_view(),
name="package-listing-index",
),
]
communityless_urls = [
path("current-user/info/", CurrentUserInfoView.as_view(), name="current-user.info"),
Expand Down
33 changes: 33 additions & 0 deletions django/thunderstore/repository/api/v1/views/listing_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from django.shortcuts import get_object_or_404, redirect
from drf_yasg.utils import swagger_auto_schema # type: ignore
from rest_framework.request import Request
from rest_framework.response import Response
from rest_framework.views import APIView

from thunderstore.community.models import Community
from thunderstore.repository.models import APIV1ChunkedPackageCache


class PackageListingIndex(APIView):
"""
Return a blob file containing URLs to package listing chunks.
Client needs to gunzip and JSON parse the blob contents.
/c/{community_id}/api/v1/package-listing-index/
"""

@swagger_auto_schema(
tags=["api"],
auto_schema=None, # Hide from API docs for now.
)
def get(self, request: Request, community_identifier: str):
community = get_object_or_404(
Community.objects.listed(),
identifier=community_identifier,
)
cache = APIV1ChunkedPackageCache.get_latest_for_community(community)

if cache:
return redirect(request.build_absolute_uri(cache.index.data_url))

return Response({"error": "No cache available"}, status=503)
14 changes: 11 additions & 3 deletions django/thunderstore/repository/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,19 @@ def order_package_listing_queryset(
)


def get_package_listing_base_queryset(
community_identifier: str,
) -> QuerySet[PackageListing]:
return (
PackageListing.objects.active()
.filter_by_community_approval_rule()
.exclude(~Q(community__identifier=community_identifier))
)


def get_package_listing_queryset(community_identifier: str) -> QuerySet[PackageListing]:
return order_package_listing_queryset(
prefetch_package_listing_queryset(
PackageListing.objects.active()
.filter_by_community_approval_rule()
.exclude(~Q(community__identifier=community_identifier)),
get_package_listing_base_queryset(community_identifier),
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Generated by Django 3.1.7 on 2024-05-21 12:09

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("community", "0028_add_cover_image_fields"),
("storage", "0002_add_group"),
("repository", "0051_bigint_file_size"),
]

operations = [
migrations.CreateModel(
name="APIV1ChunkedPackageCache",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("is_deleted", models.BooleanField(default=False)),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"chunks",
models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="chunked_package_list_cache",
to="storage.datablobgroup",
),
),
(
"community",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="chunked_package_list_cache",
to="community.community",
),
),
(
"index",
models.OneToOneField(
on_delete=django.db.models.deletion.PROTECT,
related_name="chunked_package_indexes",
to="storage.datablob",
),
),
],
options={
"get_latest_by": "created_at",
},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Generated by Django 3.1.7 on 2024-05-21 12:10

import pytz
from django.db import migrations

TASK = "thunderstore.repository.tasks.update_chunked_package_caches"


def forwards(apps, schema_editor):
CrontabSchedule = apps.get_model("django_celery_beat", "CrontabSchedule")
PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask")

schedule, _ = CrontabSchedule.objects.get_or_create(
minute="0",
hour="*",
day_of_week="*",
day_of_month="*",
month_of_year="*",
timezone=pytz.timezone("UTC"),
)

PeriodicTask.objects.get_or_create(
crontab=schedule,
name="Update APIV1ChunkedPackageCache",
task=TASK,
expire_seconds=300,
)


def backwards(apps, schema_editor):
PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask")
PeriodicTask.objects.filter(task=TASK).delete()


class Migration(migrations.Migration):
dependencies = [
("repository", "0052_add_chunked_package_cache"),
("django_celery_beat", "0014_remove_clockedschedule_enabled"),
]

operations = [
migrations.RunPython(forwards, backwards),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 3.1.7 on 2024-05-22 09:01

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("storage", "0002_add_group"),
("repository", "0053_schedule_chunked_package_caching"),
]

operations = [
migrations.AlterField(
model_name="apiv1chunkedpackagecache",
name="index",
field=models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="chunked_package_indexes",
to="storage.datablob",
),
),
]
Loading

0 comments on commit 73385c9

Please sign in to comment.