diff --git a/django/thunderstore/core/tests/test_celery.py b/django/thunderstore/core/tests/test_celery.py index 1eb8d449a..e62c1db04 100644 --- a/django/thunderstore/core/tests/test_celery.py +++ b/django/thunderstore/core/tests/test_celery.py @@ -35,6 +35,7 @@ def test_task(): "thunderstore.usermedia.tasks.celery_cleanup_expired_uploads", "thunderstore.schema_import.tasks.sync_ecosystem_schema", "thunderstore.repository.tasks.files.extract_package_version_file_tree", + "thunderstore.repository.tasks.update_chunked_package_caches", "thunderstore.repository.tasks.update_experimental_package_index", "thunderstore.repository.tasks.process_package_submission", "thunderstore.repository.tasks.cleanup_package_submissions", diff --git a/django/thunderstore/repository/api/v1/tasks.py b/django/thunderstore/repository/api/v1/tasks.py index 9a79a3f0d..f0e0e9199 100644 --- a/django/thunderstore/repository/api/v1/tasks.py +++ b/django/thunderstore/repository/api/v1/tasks.py @@ -1,7 +1,7 @@ from thunderstore.community.models import Community, CommunitySite from thunderstore.core.utils import capture_exception from thunderstore.repository.api.v1.viewsets import serialize_package_list_for_community -from thunderstore.repository.models.cache import APIV1PackageCache +from thunderstore.repository.models import APIV1ChunkedPackageCache, APIV1PackageCache def update_api_v1_caches() -> None: @@ -30,3 +30,13 @@ def update_api_v1_indexes() -> None: except Exception as e: # pragma: no cover capture_exception(e) APIV1PackageCache.drop_stale_cache() + + +def update_api_v1_chunked_package_caches() -> None: + for community in Community.objects.iterator(): + try: + APIV1ChunkedPackageCache.update_for_community(community) + except Exception as e: # pragma: no cover + capture_exception(e) + + APIV1ChunkedPackageCache.drop_stale_cache() diff --git a/django/thunderstore/repository/api/v1/tests/test_api_v1.py b/django/thunderstore/repository/api/v1/tests/test_api_v1.py index 631d3d13c..40cd94a94 100644 --- a/django/thunderstore/repository/api/v1/tests/test_api_v1.py +++ b/django/thunderstore/repository/api/v1/tests/test_api_v1.py @@ -13,7 +13,10 @@ from thunderstore.core.factories import UserFactory from thunderstore.repository.api.v1.tasks import update_api_v1_caches from thunderstore.repository.api.v1.viewsets import PACKAGE_SERIALIZER -from thunderstore.repository.models.cache import APIV1PackageCache +from thunderstore.repository.models.cache import ( + APIV1ChunkedPackageCache, + APIV1PackageCache, +) @pytest.mark.django_db @@ -240,3 +243,19 @@ def test_api_v1_package_listing_serializer_donation_link_omission( assert result[0]["donation_link"] == donation_link else: assert "donation_link" not in result[0] + + +@pytest.mark.django_db +@pytest.mark.parametrize("has_cache", (False, True)) +def test_api_v1_community_package_listing_index__depending_on_cache__returns_302_or_503( + api_client: APIClient, + community_site: CommunitySite, + has_cache: bool, +) -> None: + if has_cache: + APIV1ChunkedPackageCache.update_for_community(community_site.community) + + url = f"/c/{community_site.community.identifier}/api/v1/package-listing-index/" + response = api_client.get(url) + + assert response.status_code == (302 if has_cache else 503) diff --git a/django/thunderstore/repository/api/v1/tests/test_caches.py b/django/thunderstore/repository/api/v1/tests/test_caches.py index c980fd05d..411bb108d 100644 --- a/django/thunderstore/repository/api/v1/tests/test_caches.py +++ b/django/thunderstore/repository/api/v1/tests/test_caches.py @@ -1,5 +1,7 @@ import gzip import json +from datetime import timedelta +from random import shuffle from typing import Any import pytest @@ -11,8 +13,12 @@ SiteFactory, ) from thunderstore.community.models import Community, CommunitySite, PackageListing -from thunderstore.repository.api.v1.tasks import update_api_v1_caches -from thunderstore.repository.models import APIV1PackageCache +from thunderstore.repository.api.v1.tasks import ( + update_api_v1_caches, + update_api_v1_chunked_package_caches, +) +from thunderstore.repository.models import APIV1ChunkedPackageCache, APIV1PackageCache +from thunderstore.repository.models.cache import get_package_listing_chunk @pytest.mark.django_db @@ -139,3 +145,85 @@ def test_api_v1_cache_building_package_url_simple( with gzip.GzipFile(fileobj=cache.data, mode="r") as f: result = json.loads(f.read()) assert result[0]["package_url"].startswith(expected_prefix) + + +@pytest.mark.django_db +def test_api_v1_chunked_package_cache__builds_index_and_chunks( + community: Community, + settings: Any, +) -> None: + PackageListingFactory(community_=community) + assert APIV1ChunkedPackageCache.get_latest_for_community(community) is None + + update_api_v1_chunked_package_caches() + cache = APIV1ChunkedPackageCache.get_latest_for_community(community) + assert cache is not None + assert cache.index.data_url.startswith(settings.AWS_S3_ENDPOINT_URL) + + index = APIV1ChunkedPackageCache.get_blob_content(cache.index) + assert isinstance(index, list) + assert len(index) == cache.chunks.entries.count() + assert index[0].startswith(settings.AWS_S3_ENDPOINT_URL) + + +@pytest.mark.django_db +def test_api_v1_chunked_package_cache__drops_stale_caches() -> None: + """ + Caches are currently only soft deleted. + """ + PackageListingFactory() + assert not APIV1ChunkedPackageCache.objects.exists() + + update_api_v1_chunked_package_caches() + first_cache = APIV1ChunkedPackageCache.objects.get() + assert not first_cache.is_deleted + + # Only one cache for the community exists, so it won't be dropped. + APIV1ChunkedPackageCache.drop_stale_cache() + assert not first_cache.is_deleted + + # Two caches exists, but neither is beyond the cutoff period. + update_api_v1_chunked_package_caches() + APIV1ChunkedPackageCache.drop_stale_cache() + second_cache = APIV1ChunkedPackageCache.get_latest_for_community( + first_cache.community, + ) + assert APIV1ChunkedPackageCache.objects.count() == 2 + assert second_cache + assert second_cache.pk != first_cache.pk + assert not first_cache.is_deleted + assert not second_cache.is_deleted + + # The older cache should be dropped after the cutoff period. + cutoff = timedelta(hours=APIV1ChunkedPackageCache.CACHE_CUTOFF_HOURS) + first_cache.created_at = first_cache.created_at - cutoff + first_cache.save() + APIV1ChunkedPackageCache.drop_stale_cache() + first_cache.refresh_from_db() + second_cache.refresh_from_db() + assert first_cache.is_deleted + assert not second_cache.is_deleted + + # The latest cache should not be dropped even if older than the cutoff period. + second_cache.created_at = second_cache.created_at - cutoff + second_cache.save() + APIV1ChunkedPackageCache.drop_stale_cache() + first_cache.refresh_from_db() + second_cache.refresh_from_db() + assert first_cache.is_deleted + assert not second_cache.is_deleted + + +@pytest.mark.django_db +@pytest.mark.parametrize("count", (0, 1, 2, 3, 5, 8, 13)) +def test_get_package_listing_chunk__retains_received_ordering(count: int) -> None: + assert not PackageListing.objects.exists() + for _ in range(count): + PackageListingFactory() + + ordering = list(PackageListing.objects.all().values_list("id", flat=True)) + shuffle(ordering) + listings = get_package_listing_chunk(ordering) + + for i, listing in enumerate(listings): + assert listing.id == ordering[i] diff --git a/django/thunderstore/repository/api/v1/urls.py b/django/thunderstore/repository/api/v1/urls.py index f895b08c5..493dc5b38 100644 --- a/django/thunderstore/repository/api/v1/urls.py +++ b/django/thunderstore/repository/api/v1/urls.py @@ -2,6 +2,7 @@ from rest_framework import routers from thunderstore.repository.api.v1.views.deprecate import DeprecateModApiView +from thunderstore.repository.api.v1.views.listing_index import PackageListingIndex from thunderstore.repository.api.v1.views.metrics import ( PackageMetricsApiView, PackageVersionMetricsApiView, @@ -15,6 +16,11 @@ community_urls = [ path("", include(v1_router.urls)), + path( + "package-listing-index/", + PackageListingIndex.as_view(), + name="package-listing-index", + ), ] communityless_urls = [ path("current-user/info/", CurrentUserInfoView.as_view(), name="current-user.info"), diff --git a/django/thunderstore/repository/api/v1/views/listing_index.py b/django/thunderstore/repository/api/v1/views/listing_index.py new file mode 100644 index 000000000..c9ff535b1 --- /dev/null +++ b/django/thunderstore/repository/api/v1/views/listing_index.py @@ -0,0 +1,33 @@ +from django.shortcuts import get_object_or_404, redirect +from drf_yasg.utils import swagger_auto_schema # type: ignore +from rest_framework.request import Request +from rest_framework.response import Response +from rest_framework.views import APIView + +from thunderstore.community.models import Community +from thunderstore.repository.models import APIV1ChunkedPackageCache + + +class PackageListingIndex(APIView): + """ + Return a blob file containing URLs to package listing chunks. + Client needs to gunzip and JSON parse the blob contents. + + /c/{community_id}/api/v1/package-listing-index/ + """ + + @swagger_auto_schema( + tags=["api"], + auto_schema=None, # Hide from API docs for now. + ) + def get(self, request: Request, community_identifier: str): + community = get_object_or_404( + Community.objects.listed(), + identifier=community_identifier, + ) + cache = APIV1ChunkedPackageCache.get_latest_for_community(community) + + if cache: + return redirect(request.build_absolute_uri(cache.index.data_url)) + + return Response({"error": "No cache available"}, status=503) diff --git a/django/thunderstore/repository/cache.py b/django/thunderstore/repository/cache.py index 3a2f1276a..f3ea879bf 100644 --- a/django/thunderstore/repository/cache.py +++ b/django/thunderstore/repository/cache.py @@ -26,11 +26,19 @@ def order_package_listing_queryset( ) +def get_package_listing_base_queryset( + community_identifier: str, +) -> QuerySet[PackageListing]: + return ( + PackageListing.objects.active() + .filter_by_community_approval_rule() + .exclude(~Q(community__identifier=community_identifier)) + ) + + def get_package_listing_queryset(community_identifier: str) -> QuerySet[PackageListing]: return order_package_listing_queryset( prefetch_package_listing_queryset( - PackageListing.objects.active() - .filter_by_community_approval_rule() - .exclude(~Q(community__identifier=community_identifier)), + get_package_listing_base_queryset(community_identifier), ), ) diff --git a/django/thunderstore/repository/migrations/0052_add_chunked_package_cache.py b/django/thunderstore/repository/migrations/0052_add_chunked_package_cache.py new file mode 100644 index 000000000..232a1da62 --- /dev/null +++ b/django/thunderstore/repository/migrations/0052_add_chunked_package_cache.py @@ -0,0 +1,59 @@ +# Generated by Django 3.1.7 on 2024-05-21 12:09 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("community", "0028_add_cover_image_fields"), + ("storage", "0002_add_group"), + ("repository", "0051_bigint_file_size"), + ] + + operations = [ + migrations.CreateModel( + name="APIV1ChunkedPackageCache", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("is_deleted", models.BooleanField(default=False)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "chunks", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="chunked_package_list_cache", + to="storage.datablobgroup", + ), + ), + ( + "community", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="chunked_package_list_cache", + to="community.community", + ), + ), + ( + "index", + models.OneToOneField( + on_delete=django.db.models.deletion.PROTECT, + related_name="chunked_package_indexes", + to="storage.datablob", + ), + ), + ], + options={ + "get_latest_by": "created_at", + }, + ), + ] diff --git a/django/thunderstore/repository/migrations/0053_schedule_chunked_package_caching.py b/django/thunderstore/repository/migrations/0053_schedule_chunked_package_caching.py new file mode 100644 index 000000000..eddc5769f --- /dev/null +++ b/django/thunderstore/repository/migrations/0053_schedule_chunked_package_caching.py @@ -0,0 +1,43 @@ +# Generated by Django 3.1.7 on 2024-05-21 12:10 + +import pytz +from django.db import migrations + +TASK = "thunderstore.repository.tasks.update_chunked_package_caches" + + +def forwards(apps, schema_editor): + CrontabSchedule = apps.get_model("django_celery_beat", "CrontabSchedule") + PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask") + + schedule, _ = CrontabSchedule.objects.get_or_create( + minute="0", + hour="*", + day_of_week="*", + day_of_month="*", + month_of_year="*", + timezone=pytz.timezone("UTC"), + ) + + PeriodicTask.objects.get_or_create( + crontab=schedule, + name="Update APIV1ChunkedPackageCache", + task=TASK, + expire_seconds=300, + ) + + +def backwards(apps, schema_editor): + PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask") + PeriodicTask.objects.filter(task=TASK).delete() + + +class Migration(migrations.Migration): + dependencies = [ + ("repository", "0052_add_chunked_package_cache"), + ("django_celery_beat", "0014_remove_clockedschedule_enabled"), + ] + + operations = [ + migrations.RunPython(forwards, backwards), + ] diff --git a/django/thunderstore/repository/migrations/0054_alter_chunked_package_cache_index.py b/django/thunderstore/repository/migrations/0054_alter_chunked_package_cache_index.py new file mode 100644 index 000000000..9295badb3 --- /dev/null +++ b/django/thunderstore/repository/migrations/0054_alter_chunked_package_cache_index.py @@ -0,0 +1,24 @@ +# Generated by Django 3.1.7 on 2024-05-22 09:01 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("storage", "0002_add_group"), + ("repository", "0053_schedule_chunked_package_caching"), + ] + + operations = [ + migrations.AlterField( + model_name="apiv1chunkedpackagecache", + name="index", + field=models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="chunked_package_indexes", + to="storage.datablob", + ), + ), + ] diff --git a/django/thunderstore/repository/models/cache.py b/django/thunderstore/repository/models/cache.py index 27c1f61b9..6a0f2947a 100644 --- a/django/thunderstore/repository/models/cache.py +++ b/django/thunderstore/repository/models/cache.py @@ -1,14 +1,21 @@ import gzip import io +import json from datetime import timedelta -from typing import Optional +from typing import Any, Iterable, List, Optional from django.core.files.base import ContentFile from django.db import models from django.utils import timezone -from thunderstore.community.models import Community -from thunderstore.core.mixins import S3FileMixin +from thunderstore.community.models import Community, PackageListing +from thunderstore.core.mixins import S3FileMixin, SafeDeleteMixin +from thunderstore.repository.cache import ( + get_package_listing_base_queryset, + order_package_listing_queryset, +) +from thunderstore.storage.models import DataBlob, DataBlobGroup +from thunderstore.utils.batch import batch class APIExperimentalPackageIndexCache(S3FileMixin): @@ -111,3 +118,202 @@ def drop_stale_cache(cls): entry.delete() for entry in cls.objects.filter(community=None).iterator(): entry.delete() + + +class APIV1ChunkedPackageCache(SafeDeleteMixin): + community: Community = models.ForeignKey( + "community.Community", + related_name="chunked_package_list_cache", + on_delete=models.CASCADE, + ) + index: DataBlob = models.ForeignKey( + "storage.DataBlob", + related_name="chunked_package_indexes", + on_delete=models.PROTECT, + ) + chunks: DataBlobGroup = models.ForeignKey( + "storage.DataBlobGroup", + related_name="chunked_package_list_cache", + on_delete=models.PROTECT, + ) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + get_latest_by = "created_at" + + CACHE_CUTOFF_HOURS = 3 + UNCOMPRESSED_CHUNK_LIMIT = 14000000 # 14MB compresses into ~1MB files. + + @classmethod + def get_latest_for_community( + cls, + community: Community, + ) -> Optional["APIV1ChunkedPackageCache"]: + try: + return cls.objects.filter(community=community.pk).latest() + except APIV1ChunkedPackageCache.DoesNotExist: + return None + + @classmethod + def update_for_community( + cls, + community: Community, + chunk_size_limit: Optional[int] = None, + ) -> None: + """ + Chunk community's PackageListings into blob files and create an + index blob that points to URLs of the chunks. + """ + uncompressed_blob_size = chunk_size_limit or cls.UNCOMPRESSED_CHUNK_LIMIT + group = DataBlobGroup.objects.create( + name=f"Chunked package list: {community.identifier}", + ) + chunk_content = bytearray() + + def finalize_blob() -> None: + group.add_entry( + gzip.compress(b"[" + chunk_content + b"]", mtime=0), + name=f"Package list chunk for {community.identifier}", + content_type="application/json", + content_encoding="gzip", + ) + + for listing_ids in get_package_listing_ids(community): + for listing in get_package_listing_chunk(listing_ids): + listing_bytes = listing_to_json(listing) + + # Always add the first listing regardless of the size limit. + if not chunk_content: + chunk_content.extend(listing_bytes) + # Start new blob if adding current chunck would exceed the size limit. + # +2 for opening and closing brackets + elif ( + len(chunk_content) + len(listing_bytes) + 2 > uncompressed_blob_size + ): + finalize_blob() + chunk_content = bytearray(listing_bytes) + else: + chunk_content.extend(b"," + listing_bytes) + + if len(chunk_content) or not group.entries.exists(): + finalize_blob() + + group.set_complete() + index = get_index_blob(group) + cls.objects.create(community=community, index=index, chunks=group) + + @classmethod + def drop_stale_cache(cls) -> None: + """ + Delete objects from database and blob files from S3 buckets. + Cutoff period is used to ensure blobs still referenced by + cached data is not dropped prematurely. + + TODO: only soft deletes the parent object until we've figured + # out how to safely delete the blobs from the main and mirror + # storages. When the hard deletes are implemented, acknowledge + # that identical (e.g. empty) index/package chunk blobs are + # shared between the caches. Therefore a blob can't be deleted + # just because it's no longer used by *a* cache. + """ + for community in Community.objects.iterator(): + latest = cls.get_latest_for_community(community) + if latest is None: + continue + + cutoff = latest.created_at - timedelta(hours=cls.CACHE_CUTOFF_HOURS) + cls.objects.filter(created_at__lte=cutoff, community=community).update( + is_deleted=True, + ) + + @classmethod + def get_blob_content(cls, blob: DataBlob) -> List[Any]: + """ + QoL method for returning the content of either index or chunk blob. + """ + with gzip.open(blob.data, "rb") as f: + return json.loads(f.read()) + + +def get_package_listing_ids(community: Community) -> Iterable[List[int]]: + """ + Iterate over the PackageListing in chunks to limit the amount of + data Django keeps in memory concurrently. + """ + listing_ids = order_package_listing_queryset( + get_package_listing_base_queryset(community.identifier) + ).values_list("id", flat=True) + + yield from batch(1000, listing_ids) + + +def get_package_listing_chunk( + listing_ids: List[int], +) -> models.QuerySet["PackageListing"]: + # Keep the ordering as it was when the whole id list was read. + ordering = models.Case( + *[models.When(id=id, then=pos) for pos, id in enumerate(listing_ids)] + ) + listing_ref = PackageListing.objects.filter(pk=models.OuterRef("pk")) + + return ( + PackageListing.objects.filter(id__in=listing_ids) + .select_related("community", "package", "package__owner") + .prefetch_related("categories", "community__sites", "package__versions") + .annotate( + _rating_score=models.Subquery( + listing_ref.annotate( + ratings=models.Count("package__package_ratings"), + ).values("ratings"), + ), + ) + .order_by(ordering) + ) + + +def listing_to_json(listing: PackageListing) -> bytes: + return json.dumps( + { + "name": listing.package.name, + "full_name": listing.package.full_package_name, + "owner": listing.package.owner.name, + "package_url": listing.get_full_url(), + "donation_link": listing.package.owner.donation_link, + "date_created": listing.package.date_created.isoformat(), + "date_updated": listing.package.date_updated.isoformat(), + "uuid4": str(listing.package.uuid4), + "rating_score": listing.rating_score, + "is_pinned": listing.package.is_pinned, + "is_deprecated": listing.package.is_deprecated, + "has_nsfw_content": listing.has_nsfw_content, + "categories": [c.name for c in listing.categories.all()], + # TODO: this generates awfully lot of database hits + "versions": [ + { + "name": version.name, + "full_name": version.full_version_name, + "description": version.description, + "icon": version.icon.url, + "version_number": version.version_number, + "dependencies": [ + d.full_version_name for d in version.dependencies.all() + ], + "download_url": version.full_download_url, + "downloads": version.downloads, + "date_created": version.date_created.isoformat(), + "website_url": version.website_url, + # TODO: what is this needed for, inactive ones have been filtered out anyway? + "is_active": version.is_active, + "uuid4": str(version.uuid4), + "file_size": version.file_size, + } + for version in listing.package.available_versions + ], + }, + ).encode() + + +def get_index_blob(group: DataBlobGroup) -> DataBlob: + chunk_urls: List[str] = [e.blob.data_url for e in group.entries.all()] + index_content = gzip.compress(json.dumps(chunk_urls).encode(), mtime=0) + return DataBlob.get_or_create(index_content) diff --git a/django/thunderstore/repository/tasks/caches.py b/django/thunderstore/repository/tasks/caches.py index 29b9c4fc6..3582df34b 100644 --- a/django/thunderstore/repository/tasks/caches.py +++ b/django/thunderstore/repository/tasks/caches.py @@ -1,10 +1,13 @@ -from celery import shared_task +from celery import shared_task # type: ignore from thunderstore.core.settings import CeleryQueues from thunderstore.repository.api.experimental.views.package_index import ( update_api_experimental_package_index, ) -from thunderstore.repository.api.v1.tasks import update_api_v1_caches +from thunderstore.repository.api.v1.tasks import ( + update_api_v1_caches, + update_api_v1_chunked_package_caches, +) @shared_task( @@ -23,3 +26,13 @@ def update_api_caches(): ) def update_experimental_package_index(): update_api_experimental_package_index() + + +@shared_task( + name="thunderstore.repository.tasks.update_chunked_package_caches", + queue=CeleryQueues.BackgroundLongRunning, + soft_time_limit=60 * 60 * 23, + time_limit=60 * 60 * 24, +) +def update_chunked_community_package_caches(): + update_api_v1_chunked_package_caches() diff --git a/django/thunderstore/repository/tests/test_cache_models.py b/django/thunderstore/repository/tests/test_cache_models.py index b65f982cc..d8d505a53 100644 --- a/django/thunderstore/repository/tests/test_cache_models.py +++ b/django/thunderstore/repository/tests/test_cache_models.py @@ -8,9 +8,13 @@ from storages.backends.s3boto3 import S3Boto3Storage from thunderstore.cache.storage import get_cache_storage -from thunderstore.community.factories import CommunityFactory +from thunderstore.community.factories import CommunityFactory, PackageListingFactory from thunderstore.community.models import Community -from thunderstore.repository.models.cache import APIV1PackageCache +from thunderstore.repository.models.cache import ( + APIV1ChunkedPackageCache, + APIV1PackageCache, +) +from thunderstore.storage.models import DataBlob, DataBlobGroup from thunderstore.utils.makemigrations import StubStorage @@ -192,3 +196,161 @@ def test_api_v1_packge_cache_storage_is_s3_during_run(mocker): mocker.patch("sys.argv", ["manage.py", "runserver"]) storage = get_cache_storage() assert isinstance(storage, S3Boto3Storage) + + +@pytest.mark.django_db +def test_api_v1_chunked_package_cache__when_no_cache__get_latest_returns_none( + community: Community, +) -> None: + assert not APIV1ChunkedPackageCache.objects.exists() + assert APIV1ChunkedPackageCache.get_latest_for_community(community) is None + + +@pytest.mark.django_db +def test_api_v1_chunked_package_cache__when_one_cache__get_latest_returns_it( + community: Community, +) -> None: + APIV1ChunkedPackageCache.update_for_community(community) + assert APIV1ChunkedPackageCache.objects.count() == 1 + assert APIV1ChunkedPackageCache.get_latest_for_community(community) is not None + + +@pytest.mark.django_db +def test_api_v1_chunked_package_cache__when_many_cache__get_latest_returns_latest( + community: Community, +) -> None: + APIV1ChunkedPackageCache.update_for_community(community) + APIV1ChunkedPackageCache.update_for_community(community) + APIV1ChunkedPackageCache.update_for_community(community) + latest = APIV1ChunkedPackageCache.objects.order_by("-created_at").first() + assert APIV1ChunkedPackageCache.objects.count() == 3 + assert APIV1ChunkedPackageCache.get_latest_for_community(community).pk == latest.pk + + +@pytest.mark.django_db +def test_api_v1_chunked_package_cache__when_community_has_no_packages__creates_index_and_empty_chunk( + community: Community, +) -> None: + assert not community.package_listings.exists() + assert not APIV1ChunkedPackageCache.objects.filter(community=community).exists() + + APIV1ChunkedPackageCache.update_for_community(community) + cache = APIV1ChunkedPackageCache.objects.get(community=community) + index = APIV1ChunkedPackageCache.get_blob_content(cache.index) + assert isinstance(index, list) + assert len(index) == 1 + assert isinstance(index[0], str) + + assert cache.chunks.entries.count() == 1 + chunk = APIV1ChunkedPackageCache.get_blob_content(cache.chunks.entries.get().blob) + assert isinstance(chunk, list) + assert len(chunk) == 0 + + +@pytest.mark.django_db +def test_api_v1_chunked_package_cache__when_community_has_one_package__creates_proper_chunk( + community: Community, +) -> None: + listing = PackageListingFactory( + community_=community, + package_version_kwargs={"is_active": True}, + ) + assert community.package_listings.count() == 1 + + APIV1ChunkedPackageCache.update_for_community(community) + cache = APIV1ChunkedPackageCache.objects.get(community=community) + assert cache.chunks.entries.count() == 1 + chunk = APIV1ChunkedPackageCache.get_blob_content(cache.chunks.entries.get().blob) + assert isinstance(chunk, list) + assert len(chunk) == 1 + assert chunk[0]["name"] == listing.package.name + assert isinstance(chunk[0]["versions"], list) + assert len(chunk[0]["versions"]) == 1 + assert ( + chunk[0]["versions"][0]["full_name"] == listing.package.latest.full_version_name + ) + + +# Serialized size of a minimal listing returned by PackageListingFactory. +# Has some padding since the exact size varies a bit based on how many +# packages the test creates and thus how long the package names are. +TEST_PACKAGE_BYTES = 1000 + + +@pytest.mark.django_db +@pytest.mark.parametrize( + ( + "listing_count", + "chunk_size_limit", + "expected_chunk_count", + ), + ( + (0, 1, 1), + (1, 1, 1), + (2, 1, 2), + (3, 1, 3), + (0, TEST_PACKAGE_BYTES, 1), + (1, TEST_PACKAGE_BYTES, 1), + (2, TEST_PACKAGE_BYTES, 2), + (3, TEST_PACKAGE_BYTES, 3), + (0, TEST_PACKAGE_BYTES * 2, 1), + (1, TEST_PACKAGE_BYTES * 2, 1), + (2, TEST_PACKAGE_BYTES * 2, 1), + (3, TEST_PACKAGE_BYTES * 2, 2), + (4, TEST_PACKAGE_BYTES * 2, 2), + (5, TEST_PACKAGE_BYTES * 2, 3), + (0, TEST_PACKAGE_BYTES * 3, 1), + (1, TEST_PACKAGE_BYTES * 3, 1), + (2, TEST_PACKAGE_BYTES * 3, 1), + (3, TEST_PACKAGE_BYTES * 3, 1), + (4, TEST_PACKAGE_BYTES * 3, 2), + ), +) +def test_api_v1_chunked_package_cache__when_multiple_packages__creates_correct_amount_of_chunks( + community: Community, + listing_count: int, + chunk_size_limit: int, + expected_chunk_count: int, +) -> None: + for _ in range(listing_count): + PackageListingFactory( + community_=community, + package_version_kwargs={"is_active": True}, + ) + assert community.package_listings.count() == listing_count + + APIV1ChunkedPackageCache.update_for_community(community, chunk_size_limit) + cache = APIV1ChunkedPackageCache.objects.get(community=community) + assert cache.chunks.entries.count() == expected_chunk_count + + # Will throw if json.loads fails to parse the data. + for chunk in cache.chunks.entries.all(): + APIV1ChunkedPackageCache.get_blob_content(chunk.blob) + + +@pytest.mark.django_db +def test_api_v1_chunked_package_cache__when_no_changes_in_packages__reuses_old_blobs( + community: Community, +) -> None: + PackageListingFactory( + community_=community, + package_version_kwargs={"is_active": True}, + ) + assert community.package_listings.count() == 1 + assert not APIV1ChunkedPackageCache.objects.exists() + assert not DataBlob.objects.exists() + assert not DataBlobGroup.objects.exists() + + APIV1ChunkedPackageCache.update_for_community(community) + cache1 = APIV1ChunkedPackageCache.get_latest_for_community(community=community) + APIV1ChunkedPackageCache.update_for_community(community) + cache2 = APIV1ChunkedPackageCache.get_latest_for_community(community=community) + assert cache1 is not None + assert cache2 is not None + assert APIV1ChunkedPackageCache.objects.count() == 2 + assert cache1.pk != cache2.pk + assert DataBlob.objects.count() == 2 # One index blob, one chunk blob + assert cache1.index.pk == cache2.index.pk + assert cache1.chunks.entries.get().blob.pk == cache2.chunks.entries.get().blob.pk + assert DataBlobGroup.objects.count() == 2 # While blobs are shared, groups are not + assert cache1.chunks.pk != cache2.chunks.pk