Skip to content

Commit

Permalink
Add automatic file tree extraction on upload
Browse files Browse the repository at this point in the history
Automatically extract the file tree of new package uploads as they're
processed.

This can lead to a potential issue with submission processing time
taking much longer than before, and as it's handled synchronously, it
could lead to an increase in submission failures for large packages.

There's no way to fix the above issue without moving to an entirely
asynchronous submission processing, which is a task too large to take on
currently.
  • Loading branch information
MythicManiac committed Sep 25, 2023
1 parent 517b358 commit 3a1b508
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 15 deletions.
22 changes: 22 additions & 0 deletions django/thunderstore/repository/filetree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import logging
from typing import IO, Any
from zipfile import ZipFile

from thunderstore.storage.models import DataBlobGroup

logger = logging.getLogger(__name__)


def create_file_tree_from_zip_data(
name: str,
zip_data: IO[Any],
) -> DataBlobGroup:
with ZipFile(zip_data) as unzip:
group: DataBlobGroup = DataBlobGroup.objects.create(name=name)
for entry in unzip.infolist():
logger.info(f"Processing {entry.filename}")
if entry.is_dir():
continue

Check warning on line 19 in django/thunderstore/repository/filetree.py

View check run for this annotation

Codecov / codecov/patch

django/thunderstore/repository/filetree.py#L19

Added line #L19 was not covered by tests
group.add_entry(unzip.read(entry), entry.filename)
group.set_complete()
return group
7 changes: 7 additions & 0 deletions django/thunderstore/repository/package_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from thunderstore.community.models import Community, PackageCategory
from thunderstore.core.types import UserType
from thunderstore.repository.filetree import create_file_tree_from_zip_data
from thunderstore.repository.models import Package, PackageVersion, Team
from thunderstore.repository.package_formats import PackageFormats
from thunderstore.repository.validation.categories import clean_community_categories
Expand Down Expand Up @@ -166,6 +167,7 @@ def save(self, *args, **kwargs):
self.instance.changelog = self.changelog
self.instance.file_size = self.file_size
self.instance.format_spec = self.format_spec

team = self.cleaned_data["team"]
team.ensure_can_upload_package(self.user)
# We just take the namespace with team name for now
Expand All @@ -174,6 +176,11 @@ def save(self, *args, **kwargs):
owner=team, name=self.instance.name, namespace=namespace
)[0]

self.instance.file_tree = create_file_tree_from_zip_data(
name=f"File tree of package: {self.instance.full_version_name}",
zip_data=self.cleaned_data["file"],
)

community_categories = self.cleaned_data.get("community_categories", {})
for community in self.cleaned_data.get("communities", []):
categories = community_categories.get(community.identifier, [])
Expand Down
20 changes: 5 additions & 15 deletions django/thunderstore/repository/tasks/files.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import logging
import tempfile
from zipfile import ZipFile

from celery import shared_task

from thunderstore.core.settings import CeleryQueues
from thunderstore.repository.filetree import create_file_tree_from_zip_data
from thunderstore.repository.models import PackageVersion
from thunderstore.storage.models import DataBlobGroup

logger = logging.getLogger(__name__)

Expand All @@ -28,19 +27,10 @@ def extract_package_version_file_tree(
local_copy.write(chunk)
local_copy.seek(0)

with ZipFile(local_copy) as unzip:
group: DataBlobGroup = DataBlobGroup.objects.create(
name=f"File tree of package: {package_version.full_version_name}"
)
for entry in unzip.infolist():
logger.info(f"Processing {entry.filename}")
if entry.is_dir():
continue
group.add_entry(
unzip.read(entry),
entry.filename,
)
group.set_complete()
group = create_file_tree_from_zip_data(
name=f"File tree of package: {package_version.full_version_name}",
zip_data=local_copy,
)

package_version.file_tree = group
package_version.save(update_fields=("file_tree",))
Expand Down
4 changes: 4 additions & 0 deletions django/thunderstore/repository/tests/test_package_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def test_package_upload(user, manifest_v1_data, community, changelog):
assert version.format_spec == PackageFormats.get_active_format()
assert version.package.namespace == team.get_namespace()
assert version.package.namespace.name == team.name
assert version.file_tree is not None
assert version.file_tree.entries.count() == 3 if changelog is None else 4


@pytest.mark.django_db
Expand Down Expand Up @@ -112,3 +114,5 @@ def test_package_upload_with_extra_data(user, community, manifest_v1_data, chang
assert listing.categories.count() == 1
assert listing.categories.first() == category
assert listing.has_nsfw_content is True
assert version.file_tree is not None
assert version.file_tree.entries.count() == 3 if changelog is None else 4

0 comments on commit 3a1b508

Please sign in to comment.