Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chunked package lists #1035

Merged
merged 6 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions django/thunderstore/core/tests/test_celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_task():
"thunderstore.usermedia.tasks.celery_cleanup_expired_uploads",
"thunderstore.schema_import.tasks.sync_ecosystem_schema",
"thunderstore.repository.tasks.files.extract_package_version_file_tree",
"thunderstore.repository.tasks.update_chunked_package_caches",
"thunderstore.repository.tasks.update_experimental_package_index",
"thunderstore.repository.tasks.process_package_submission",
"thunderstore.repository.tasks.cleanup_package_submissions",
Expand Down
12 changes: 11 additions & 1 deletion django/thunderstore/repository/api/v1/tasks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from thunderstore.community.models import Community, CommunitySite
from thunderstore.core.utils import capture_exception
from thunderstore.repository.api.v1.viewsets import serialize_package_list_for_community
from thunderstore.repository.models.cache import APIV1PackageCache
from thunderstore.repository.models import APIV1ChunkedPackageCache, APIV1PackageCache


def update_api_v1_caches() -> None:
Expand Down Expand Up @@ -30,3 +30,13 @@ def update_api_v1_indexes() -> None:
except Exception as e: # pragma: no cover
capture_exception(e)
APIV1PackageCache.drop_stale_cache()


def update_api_v1_chunked_package_caches() -> None:
for community in Community.objects.iterator():
anttimaki marked this conversation as resolved.
Show resolved Hide resolved
try:
APIV1ChunkedPackageCache.update_for_community(community)
except Exception as e: # pragma: no cover
capture_exception(e)

APIV1ChunkedPackageCache.drop_stale_cache()
21 changes: 20 additions & 1 deletion django/thunderstore/repository/api/v1/tests/test_api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
from thunderstore.core.factories import UserFactory
from thunderstore.repository.api.v1.tasks import update_api_v1_caches
from thunderstore.repository.api.v1.viewsets import PACKAGE_SERIALIZER
from thunderstore.repository.models.cache import APIV1PackageCache
from thunderstore.repository.models.cache import (
APIV1ChunkedPackageCache,
APIV1PackageCache,
)


@pytest.mark.django_db
Expand Down Expand Up @@ -240,3 +243,19 @@ def test_api_v1_package_listing_serializer_donation_link_omission(
assert result[0]["donation_link"] == donation_link
else:
assert "donation_link" not in result[0]


@pytest.mark.django_db
@pytest.mark.parametrize("has_cache", (False, True))
def test_api_v1_community_package_listing_index__depending_on_cache__returns_302_or_503(
api_client: APIClient,
community_site: CommunitySite,
has_cache: bool,
) -> None:
if has_cache:
APIV1ChunkedPackageCache.update_for_community(community_site.community)

url = f"/c/{community_site.community.identifier}/api/v1/package-listing-index/"
response = api_client.get(url)

assert response.status_code == (302 if has_cache else 503)
92 changes: 90 additions & 2 deletions django/thunderstore/repository/api/v1/tests/test_caches.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import gzip
import json
from datetime import timedelta
from random import shuffle
from typing import Any

import pytest
Expand All @@ -11,8 +13,12 @@
SiteFactory,
)
from thunderstore.community.models import Community, CommunitySite, PackageListing
from thunderstore.repository.api.v1.tasks import update_api_v1_caches
from thunderstore.repository.models import APIV1PackageCache
from thunderstore.repository.api.v1.tasks import (
update_api_v1_caches,
update_api_v1_chunked_package_caches,
)
from thunderstore.repository.models import APIV1ChunkedPackageCache, APIV1PackageCache
from thunderstore.repository.models.cache import get_package_listing_chunk


@pytest.mark.django_db
Expand Down Expand Up @@ -139,3 +145,85 @@ def test_api_v1_cache_building_package_url_simple(
with gzip.GzipFile(fileobj=cache.data, mode="r") as f:
result = json.loads(f.read())
assert result[0]["package_url"].startswith(expected_prefix)


@pytest.mark.django_db
def test_api_v1_chunked_package_cache__builds_index_and_chunks(
community: Community,
settings: Any,
) -> None:
PackageListingFactory(community_=community)
assert APIV1ChunkedPackageCache.get_latest_for_community(community) is None

update_api_v1_chunked_package_caches()
cache = APIV1ChunkedPackageCache.get_latest_for_community(community)
assert cache is not None
assert cache.index.data_url.startswith(settings.AWS_S3_ENDPOINT_URL)

index = APIV1ChunkedPackageCache.get_blob_content(cache.index)
assert isinstance(index, list)
assert len(index) == cache.chunks.entries.count()
assert index[0].startswith(settings.AWS_S3_ENDPOINT_URL)


@pytest.mark.django_db
def test_api_v1_chunked_package_cache__drops_stale_caches() -> None:
"""
Caches are currently only soft deleted.
"""
PackageListingFactory()
assert not APIV1ChunkedPackageCache.objects.exists()

update_api_v1_chunked_package_caches()
first_cache = APIV1ChunkedPackageCache.objects.get()
assert not first_cache.is_deleted

# Only one cache for the community exists, so it won't be dropped.
APIV1ChunkedPackageCache.drop_stale_cache()
assert not first_cache.is_deleted

# Two caches exists, but neither is beyond the cutoff period.
update_api_v1_chunked_package_caches()
APIV1ChunkedPackageCache.drop_stale_cache()
second_cache = APIV1ChunkedPackageCache.get_latest_for_community(
first_cache.community,
)
assert APIV1ChunkedPackageCache.objects.count() == 2
assert second_cache
assert second_cache.pk != first_cache.pk
assert not first_cache.is_deleted
assert not second_cache.is_deleted

# The older cache should be dropped after the cutoff period.
cutoff = timedelta(hours=APIV1ChunkedPackageCache.CACHE_CUTOFF_HOURS)
first_cache.created_at = first_cache.created_at - cutoff
first_cache.save()
APIV1ChunkedPackageCache.drop_stale_cache()
first_cache.refresh_from_db()
second_cache.refresh_from_db()
assert first_cache.is_deleted
assert not second_cache.is_deleted

# The latest cache should not be dropped even if older than the cutoff period.
second_cache.created_at = second_cache.created_at - cutoff
second_cache.save()
APIV1ChunkedPackageCache.drop_stale_cache()
first_cache.refresh_from_db()
second_cache.refresh_from_db()
assert first_cache.is_deleted
assert not second_cache.is_deleted


@pytest.mark.django_db
@pytest.mark.parametrize("count", (0, 1, 2, 3, 5, 8, 13))
def test_get_package_listing_chunk__retains_received_ordering(count: int) -> None:
assert not PackageListing.objects.exists()
for _ in range(count):
PackageListingFactory()

ordering = list(PackageListing.objects.all().values_list("id", flat=True))
shuffle(ordering)
listings = get_package_listing_chunk(ordering)

for i, listing in enumerate(listings):
assert listing.id == ordering[i]
6 changes: 6 additions & 0 deletions django/thunderstore/repository/api/v1/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from rest_framework import routers

from thunderstore.repository.api.v1.views.deprecate import DeprecateModApiView
from thunderstore.repository.api.v1.views.listing_index import PackageListingIndex
from thunderstore.repository.api.v1.views.metrics import (
PackageMetricsApiView,
PackageVersionMetricsApiView,
Expand All @@ -15,6 +16,11 @@

community_urls = [
path("", include(v1_router.urls)),
path(
MythicManiac marked this conversation as resolved.
Show resolved Hide resolved
"package-listing-index/",
PackageListingIndex.as_view(),
name="package-listing-index",
),
]
communityless_urls = [
path("current-user/info/", CurrentUserInfoView.as_view(), name="current-user.info"),
Expand Down
33 changes: 33 additions & 0 deletions django/thunderstore/repository/api/v1/views/listing_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from django.shortcuts import get_object_or_404, redirect
from drf_yasg.utils import swagger_auto_schema # type: ignore
from rest_framework.request import Request
from rest_framework.response import Response
from rest_framework.views import APIView

from thunderstore.community.models import Community
from thunderstore.repository.models import APIV1ChunkedPackageCache


class PackageListingIndex(APIView):
"""
Return a blob file containing URLs to package listing chunks.
Client needs to gunzip and JSON parse the blob contents.

/c/{community_id}/api/v1/package-listing-index/
"""

@swagger_auto_schema(
tags=["api"],
auto_schema=None, # Hide from API docs for now.
)
def get(self, request: Request, community_identifier: str):
community = get_object_or_404(
Community.objects.listed(),
identifier=community_identifier,
)
cache = APIV1ChunkedPackageCache.get_latest_for_community(community)

if cache:
return redirect(request.build_absolute_uri(cache.index.data_url))

return Response({"error": "No cache available"}, status=503)
14 changes: 11 additions & 3 deletions django/thunderstore/repository/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,19 @@ def order_package_listing_queryset(
)


def get_package_listing_base_queryset(
community_identifier: str,
) -> QuerySet[PackageListing]:
return (
PackageListing.objects.active()
.filter_by_community_approval_rule()
.exclude(~Q(community__identifier=community_identifier))
)


def get_package_listing_queryset(community_identifier: str) -> QuerySet[PackageListing]:
return order_package_listing_queryset(
prefetch_package_listing_queryset(
PackageListing.objects.active()
.filter_by_community_approval_rule()
.exclude(~Q(community__identifier=community_identifier)),
get_package_listing_base_queryset(community_identifier),
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Generated by Django 3.1.7 on 2024-05-21 12:09

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("community", "0028_add_cover_image_fields"),
("storage", "0002_add_group"),
("repository", "0051_bigint_file_size"),
]

operations = [
migrations.CreateModel(
name="APIV1ChunkedPackageCache",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("is_deleted", models.BooleanField(default=False)),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"chunks",
models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="chunked_package_list_cache",
to="storage.datablobgroup",
),
),
(
"community",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="chunked_package_list_cache",
to="community.community",
),
),
(
"index",
models.OneToOneField(
on_delete=django.db.models.deletion.PROTECT,
related_name="chunked_package_indexes",
to="storage.datablob",
),
),
],
options={
"get_latest_by": "created_at",
},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Generated by Django 3.1.7 on 2024-05-21 12:10

import pytz
from django.db import migrations

TASK = "thunderstore.repository.tasks.update_chunked_package_caches"


def forwards(apps, schema_editor):
CrontabSchedule = apps.get_model("django_celery_beat", "CrontabSchedule")
PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask")

schedule, _ = CrontabSchedule.objects.get_or_create(
minute="0",
hour="*",
day_of_week="*",
day_of_month="*",
month_of_year="*",
timezone=pytz.timezone("UTC"),
)

PeriodicTask.objects.get_or_create(
crontab=schedule,
name="Update APIV1ChunkedPackageCache",
task=TASK,
expire_seconds=300,
)


def backwards(apps, schema_editor):
PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask")
PeriodicTask.objects.filter(task=TASK).delete()


class Migration(migrations.Migration):
dependencies = [
("repository", "0052_add_chunked_package_cache"),
("django_celery_beat", "0014_remove_clockedschedule_enabled"),
]

operations = [
migrations.RunPython(forwards, backwards),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 3.1.7 on 2024-05-22 09:01

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("storage", "0002_add_group"),
("repository", "0053_schedule_chunked_package_caching"),
]

operations = [
migrations.AlterField(
model_name="apiv1chunkedpackagecache",
name="index",
field=models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="chunked_package_indexes",
to="storage.datablob",
),
),
]
Loading
Loading