From 74b5de1f8fab50b150d3fc00f3eabcfacfe4fdd0 Mon Sep 17 00:00:00 2001 From: Quirin Pamp Date: Wed, 10 Aug 2022 16:43:07 +0200 Subject: [PATCH] fixup! Add optimize mode for sync tasks [noissue] --- .../app/serializers/repository_serializers.py | 10 +++++- pulp_deb/app/tasks/synchronizing.py | 34 ++++++++++++------- requirements.txt | 2 +- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/pulp_deb/app/serializers/repository_serializers.py b/pulp_deb/app/serializers/repository_serializers.py index b13a19d31..ab9211fd0 100644 --- a/pulp_deb/app/serializers/repository_serializers.py +++ b/pulp_deb/app/serializers/repository_serializers.py @@ -28,7 +28,15 @@ class AptRepositorySyncURLSerializer(RepositorySyncURLSerializer): """ optimize = serializers.BooleanField( - help_text=_("Whether or not to optimize sync."), required=False, default=True + help_text=_( + "Using optimize sync, will skip the processing of metadata if the checksum has not " + "changed since the last sync. This greately improves re-sync performance in such " + "situations. If you feel the sync is missing something that has changed about the " + "remote repository you are syncing, try using optimize=False for a full re-sync. " + "Consider opening an issue on why we should not optimize in your use case." + ), + required=False, + default=True, ) diff --git a/pulp_deb/app/tasks/synchronizing.py b/pulp_deb/app/tasks/synchronizing.py index 815882132..06ff4258f 100644 --- a/pulp_deb/app/tasks/synchronizing.py +++ b/pulp_deb/app/tasks/synchronizing.py @@ -160,7 +160,7 @@ def synchronize(remote_pk, repository_pk, mirror, optimize): if not remote.url: raise ValueError(_("A remote must have a url specified to synchronize.")) - first_stage = DebFirstStage(remote, optimize, previous_repo_version) + first_stage = DebFirstStage(remote, optimize, mirror, previous_repo_version) DebDeclarativeVersion(first_stage, repository, mirror=mirror).create() @@ -501,7 +501,7 @@ class DebFirstStage(Stage): The first stage of a pulp_deb sync pipeline. """ - def __init__(self, remote, optimize, previous_repo_version, *args, **kwargs): + def __init__(self, remote, optimize, mirror, previous_repo_version, *args, **kwargs): """ The first stage of a pulp_deb sync pipeline. @@ -517,9 +517,15 @@ def __init__(self, remote, optimize, previous_repo_version, *args, **kwargs): self.previous_sync_info = defaultdict(dict, previous_repo_version.info) self.sync_info = defaultdict() self.sync_info["remote_options"] = self._gen_remote_options() + self.sync_info["sync_options"] = { + "optimize": optimize, + "mirror": mirror, + } self.parsed_url = urlparse(remote.url) - self.unchanged_remote = ( + self.sync_options_unchanged = ( self.previous_sync_info["remote_options"] == self.sync_info["remote_options"] + and self.previous_sync_info["sync_options"]["mirror"] + == self.sync_info["sync_options"]["mirror"] ) async def run(self): @@ -556,6 +562,11 @@ def _gen_remote_options(self): "components": self.remote.components, "architectures": self.remote.architectures, "policy": self.remote.policy, + "sync_sources": self.remote.sync_sources, + "sync_udebs": self.remote.sync_udebs, + "sync_installer": self.remote.sync_installer, + "gpgkey": self.remote.gpgkey, + "ignore_missing_package_indices": self.remote.ignore_missing_package_indices, } async def _handle_distribution(self, distribution): @@ -575,7 +586,7 @@ async def _handle_distribution(self, distribution): release_file = await self._create_unit(release_file_dc) if release_file is None: return - if self.optimize and self.unchanged_remote: + if self.optimize and self.sync_options_unchanged: previous_release_file = await _get_previous_release_file( self.previous_repo_version, distribution ) @@ -771,14 +782,6 @@ async def _handle_flat_repo(self, file_references, release_file, release): # Await all tasks await asyncio.gather(*pending_tasks) - def _nested_defaultdict(self, existing=None, **kwargs): - if existing is None: - existing = {} - if not isinstance(existing, dict): - return existing - existing = {key: self._nested_defaultdict(val) for key, val in existing.items()} - return defaultdict(self._nested_defaultdict, existing, **kwargs) - async def _handle_package_index( self, release_file, @@ -845,7 +848,7 @@ async def _handle_package_index( else: raise NoPackageIndexFile(relative_dir=package_index_dir) - if self.optimize and self.unchanged_remote: + if self.optimize and self.sync_options_unchanged: previous_package_index = await _get_previous_package_index( self.previous_repo_version, relative_path ) @@ -1084,6 +1087,11 @@ def _readd_previous_package_indices(previous_version, new_version, distribution) PackageIndex.objects.filter(relative_path__contains=distribution) ) ) + new_version.add_content( + previous_version.get_content( + InstallerFileIndex.objects.filter(relative_path__contains=distribution) + ) + ) @sync_to_async diff --git a/requirements.txt b/requirements.txt index 077f1f215..0de05f607 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -pulpcore>=3.20,<3.25 +pulpcore>=3.21.0.dev,<3.25 python-debian>=0.1.44,<0.2.0