From 3a1b5088a16b40cc752a6aef28d27f713779c333 Mon Sep 17 00:00:00 2001 From: Mythic Date: Mon, 25 Sep 2023 15:31:54 +0300 Subject: [PATCH] Add automatic file tree extraction on upload Automatically extract the file tree of new package uploads as they're processed. This can lead to a potential issue with submission processing time taking much longer than before, and as it's handled synchronously, it could lead to an increase in submission failures for large packages. There's no way to fix the above issue without moving to an entirely asynchronous submission processing, which is a task too large to take on currently. --- django/thunderstore/repository/filetree.py | 22 +++++++++++++++++++ .../thunderstore/repository/package_upload.py | 7 ++++++ django/thunderstore/repository/tasks/files.py | 20 +++++------------ .../repository/tests/test_package_upload.py | 4 ++++ 4 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 django/thunderstore/repository/filetree.py diff --git a/django/thunderstore/repository/filetree.py b/django/thunderstore/repository/filetree.py new file mode 100644 index 000000000..ee06ef0ca --- /dev/null +++ b/django/thunderstore/repository/filetree.py @@ -0,0 +1,22 @@ +import logging +from typing import IO, Any +from zipfile import ZipFile + +from thunderstore.storage.models import DataBlobGroup + +logger = logging.getLogger(__name__) + + +def create_file_tree_from_zip_data( + name: str, + zip_data: IO[Any], +) -> DataBlobGroup: + with ZipFile(zip_data) as unzip: + group: DataBlobGroup = DataBlobGroup.objects.create(name=name) + for entry in unzip.infolist(): + logger.info(f"Processing {entry.filename}") + if entry.is_dir(): + continue + group.add_entry(unzip.read(entry), entry.filename) + group.set_complete() + return group diff --git a/django/thunderstore/repository/package_upload.py b/django/thunderstore/repository/package_upload.py index 3216f5808..c47b4046f 100644 --- a/django/thunderstore/repository/package_upload.py +++ b/django/thunderstore/repository/package_upload.py @@ -9,6 +9,7 @@ from thunderstore.community.models import Community, PackageCategory from thunderstore.core.types import UserType +from thunderstore.repository.filetree import create_file_tree_from_zip_data from thunderstore.repository.models import Package, PackageVersion, Team from thunderstore.repository.package_formats import PackageFormats from thunderstore.repository.validation.categories import clean_community_categories @@ -166,6 +167,7 @@ def save(self, *args, **kwargs): self.instance.changelog = self.changelog self.instance.file_size = self.file_size self.instance.format_spec = self.format_spec + team = self.cleaned_data["team"] team.ensure_can_upload_package(self.user) # We just take the namespace with team name for now @@ -174,6 +176,11 @@ def save(self, *args, **kwargs): owner=team, name=self.instance.name, namespace=namespace )[0] + self.instance.file_tree = create_file_tree_from_zip_data( + name=f"File tree of package: {self.instance.full_version_name}", + zip_data=self.cleaned_data["file"], + ) + community_categories = self.cleaned_data.get("community_categories", {}) for community in self.cleaned_data.get("communities", []): categories = community_categories.get(community.identifier, []) diff --git a/django/thunderstore/repository/tasks/files.py b/django/thunderstore/repository/tasks/files.py index dfc3a1af0..8b4c51149 100644 --- a/django/thunderstore/repository/tasks/files.py +++ b/django/thunderstore/repository/tasks/files.py @@ -1,12 +1,11 @@ import logging import tempfile -from zipfile import ZipFile from celery import shared_task from thunderstore.core.settings import CeleryQueues +from thunderstore.repository.filetree import create_file_tree_from_zip_data from thunderstore.repository.models import PackageVersion -from thunderstore.storage.models import DataBlobGroup logger = logging.getLogger(__name__) @@ -28,19 +27,10 @@ def extract_package_version_file_tree( local_copy.write(chunk) local_copy.seek(0) - with ZipFile(local_copy) as unzip: - group: DataBlobGroup = DataBlobGroup.objects.create( - name=f"File tree of package: {package_version.full_version_name}" - ) - for entry in unzip.infolist(): - logger.info(f"Processing {entry.filename}") - if entry.is_dir(): - continue - group.add_entry( - unzip.read(entry), - entry.filename, - ) - group.set_complete() + group = create_file_tree_from_zip_data( + name=f"File tree of package: {package_version.full_version_name}", + zip_data=local_copy, + ) package_version.file_tree = group package_version.save(update_fields=("file_tree",)) diff --git a/django/thunderstore/repository/tests/test_package_upload.py b/django/thunderstore/repository/tests/test_package_upload.py index bca3260e8..842da4744 100644 --- a/django/thunderstore/repository/tests/test_package_upload.py +++ b/django/thunderstore/repository/tests/test_package_upload.py @@ -56,6 +56,8 @@ def test_package_upload(user, manifest_v1_data, community, changelog): assert version.format_spec == PackageFormats.get_active_format() assert version.package.namespace == team.get_namespace() assert version.package.namespace.name == team.name + assert version.file_tree is not None + assert version.file_tree.entries.count() == 3 if changelog is None else 4 @pytest.mark.django_db @@ -112,3 +114,5 @@ def test_package_upload_with_extra_data(user, community, manifest_v1_data, chang assert listing.categories.count() == 1 assert listing.categories.first() == category assert listing.has_nsfw_content is True + assert version.file_tree is not None + assert version.file_tree.entries.count() == 3 if changelog is None else 4