From 6cf3a84111133cb2c2b10c11bfd0ea34272efdf5 Mon Sep 17 00:00:00 2001 From: Tobias Grigo Date: Wed, 29 Jun 2022 00:00:59 +0200 Subject: [PATCH] Add optimize mode for sync tasks This will change the default behavior of synchronize to always use optimize unless it is explicitly set to False. closes #564 --- CHANGES/564.feature | 1 + .../0020_aptrepository_last_sync_details.py | 18 ++ pulp_deb/app/models/repository.py | 3 + pulp_deb/app/serializers/__init__.py | 6 +- .../app/serializers/repository_serializers.py | 16 +- pulp_deb/app/tasks/synchronizing.py | 72 +++++++- pulp_deb/app/viewsets/repository.py | 13 +- .../functional/api/test_sync_optimize.py | 161 ++++++++++++++++++ pulp_deb/tests/functional/constants.py | 9 + 9 files changed, 285 insertions(+), 14 deletions(-) create mode 100644 CHANGES/564.feature create mode 100644 pulp_deb/app/migrations/0020_aptrepository_last_sync_details.py create mode 100644 pulp_deb/tests/functional/api/test_sync_optimize.py diff --git a/CHANGES/564.feature b/CHANGES/564.feature new file mode 100644 index 000000000..dbde64dff --- /dev/null +++ b/CHANGES/564.feature @@ -0,0 +1 @@ +Added optimize mode to repository synchronization. diff --git a/pulp_deb/app/migrations/0020_aptrepository_last_sync_details.py b/pulp_deb/app/migrations/0020_aptrepository_last_sync_details.py new file mode 100644 index 000000000..24f581ca2 --- /dev/null +++ b/pulp_deb/app/migrations/0020_aptrepository_last_sync_details.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.13 on 2022-06-28 16:09 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('deb', '0019_immutable_metadata_constraints'), + ] + + operations = [ + migrations.AddField( + model_name='aptrepository', + name='last_sync_details', + field=models.JSONField(default=dict), + ), + ] diff --git a/pulp_deb/app/models/repository.py b/pulp_deb/app/models/repository.py index 80ebbed89..35e8d951f 100644 --- a/pulp_deb/app/models/repository.py +++ b/pulp_deb/app/models/repository.py @@ -1,3 +1,4 @@ +from django.db import models from pulpcore.plugin.models import Repository from pulpcore.plugin.repo_version_utils import remove_duplicates, validate_repo_version @@ -39,6 +40,8 @@ class AptRepository(Repository): AptRemote, ] + last_sync_details = models.JSONField(default=dict) + class Meta: default_related_name = "%(app_label)s_%(model_name)s" diff --git a/pulp_deb/app/serializers/__init__.py b/pulp_deb/app/serializers/__init__.py index b792264f3..83cf769d9 100644 --- a/pulp_deb/app/serializers/__init__.py +++ b/pulp_deb/app/serializers/__init__.py @@ -23,4 +23,8 @@ from .remote_serializers import AptRemoteSerializer -from .repository_serializers import AptRepositorySerializer, CopySerializer +from .repository_serializers import ( + AptRepositorySerializer, + AptRepositorySyncURLSerializer, + CopySerializer, +) diff --git a/pulp_deb/app/serializers/repository_serializers.py b/pulp_deb/app/serializers/repository_serializers.py index 1b08daa18..b13a19d31 100644 --- a/pulp_deb/app/serializers/repository_serializers.py +++ b/pulp_deb/app/serializers/repository_serializers.py @@ -1,5 +1,9 @@ from gettext import gettext as _ -from pulpcore.plugin.serializers import RepositorySerializer, validate_unknown_fields +from pulpcore.plugin.serializers import ( + RepositorySerializer, + RepositorySyncURLSerializer, + validate_unknown_fields, +) from pulp_deb.app.models import AptRepository @@ -18,6 +22,16 @@ class Meta: model = AptRepository +class AptRepositorySyncURLSerializer(RepositorySyncURLSerializer): + """ + A Serializer for AptRepository Sync. + """ + + optimize = serializers.BooleanField( + help_text=_("Whether or not to optimize sync."), required=False, default=True + ) + + class CopySerializer(serializers.Serializer): """ A serializer for Content Copy API. diff --git a/pulp_deb/app/tasks/synchronizing.py b/pulp_deb/app/tasks/synchronizing.py index 24ed440e0..7b4939437 100644 --- a/pulp_deb/app/tasks/synchronizing.py +++ b/pulp_deb/app/tasks/synchronizing.py @@ -22,7 +22,6 @@ Artifact, ProgressReport, Remote, - Repository, ) from pulpcore.plugin.stages import ( @@ -51,6 +50,7 @@ PackageReleaseComponent, InstallerPackage, AptRemote, + AptRepository, ) from pulp_deb.app.serializers import ( @@ -137,7 +137,7 @@ def __init__(self, release_file_path, unknown_value, *args, **kwargs): pass -def synchronize(remote_pk, repository_pk, mirror): +def synchronize(remote_pk, repository_pk, mirror, optimize): """ Sync content from the remote repository. @@ -147,18 +147,23 @@ def synchronize(remote_pk, repository_pk, mirror): remote_pk (str): The remote PK. repository_pk (str): The repository PK. mirror (bool): True for mirror mode, False for additive. + optimize (bool): Optimize mode. Raises: ValueError: If the remote does not specify a URL to sync """ remote = AptRemote.objects.get(pk=remote_pk) - repository = Repository.objects.get(pk=repository_pk) + repository = AptRepository.objects.get(pk=repository_pk) if not remote.url: raise ValueError(_("A remote must have a url specified to synchronize.")) - first_stage = DebFirstStage(remote) + last_sync_details = {} + if optimize: + last_sync_details = repository.last_sync_details + + first_stage = DebFirstStage(remote, optimize, last_sync_details) DebDeclarativeVersion(first_stage, repository, mirror=mirror).create() @@ -498,7 +503,7 @@ class DebFirstStage(Stage): The first stage of a pulp_deb sync pipeline. """ - def __init__(self, remote, *args, **kwargs): + def __init__(self, remote, optimize, last_sync_details, *args, **kwargs): """ The first stage of a pulp_deb sync pipeline. @@ -508,6 +513,8 @@ def __init__(self, remote, *args, **kwargs): """ super().__init__(*args, **kwargs) self.remote = remote + self.optimize = optimize + self.last_sync_details = last_sync_details self.parsed_url = urlparse(remote.url) async def run(self): @@ -536,6 +543,16 @@ def _to_d_artifact(self, relative_path, data=None): deferred_download=False, ) + def _has_remote_changed(self, distribution, components, architectures): + compare = { + "components": components, + "architectures": architectures, + } + for key in self.last_sync_details[distribution]["remote_options"]: + if self.last_sync_details[distribution]["remote_options"][key] != compare[key]: + return True + return False + async def _handle_distribution(self, distribution): log.info(_('Downloading Release file for distribution: "{}"').format(distribution)) # Create release_file @@ -553,7 +570,31 @@ async def _handle_distribution(self, distribution): release_file = await self._create_unit(release_file_dc) if release_file is None: return - # Create release object + if self.optimize and distribution in self.last_sync_details: + if not self._has_remote_changed( + distribution, self.remote.components, self.remote.architectures + ): + if ( + self.last_sync_details[distribution]["release_file_artifact_set_sha256"] + == release_file.artifact_set_sha256 + ): + log.info( + _(f"ReleaseFile has not changed for distribution {distribution}. Skip sync") + ) + async with ProgressReport( + message="Skipping ReleaseFile sync (no change from previous sync)", + code="sync.release_file.was_skipped", + ) as pb: + await pb.aincrement() + return + self.last_sync_details[distribution] = { + "release_file_artifact_set_sha256": release_file.artifact_set_sha256, + "remote_options": { + "components": self.remote.components, + "architectures": self.remote.architectures, + }, + "package_indices": [], + } release_unit = Release( codename=release_file.codename, suite=release_file.suite, distribution=distribution ) @@ -799,6 +840,25 @@ async def _handle_package_index( else: raise NoPackageIndexFile(relative_dir=package_index_dir) + if self.optimize: + for package in self.last_sync_details[release_file.distribution]["package_indices"]: + if package_index.artifact_set_sha256 == package["artifact_set_sha256"]: + log.warning( + _(f"PackageIndex has not changed for path: {relative_path}. Skip sync.") + ) + async with ProgressReport( + message="Skipping PackageIndex sync (no change from previous sync)", + code="sync.package_index.was_skipped", + ) as pb: + await pb.aincrement() + return + self.last_sync_details[release_file.distribution]["package_indices"].append( + { + "relative_path": package_index.relative_path, + "artifact_set_sha256": package_index.artifact_set_sha256, + } + ) + # Interpret policy to download Artifacts or not deferred_download = self.remote.policy != Remote.IMMEDIATE # parse package_index diff --git a/pulp_deb/app/viewsets/repository.py b/pulp_deb/app/viewsets/repository.py index c18b86a26..2d62c809f 100644 --- a/pulp_deb/app/viewsets/repository.py +++ b/pulp_deb/app/viewsets/repository.py @@ -5,11 +5,10 @@ from rest_framework import viewsets from rest_framework.serializers import ValidationError as DRFValidationError +from pulp_deb.app.serializers import AptRepositorySyncURLSerializer + from pulpcore.plugin.actions import ModifyRepositoryActionMixin -from pulpcore.plugin.serializers import ( - AsyncOperationResponseSerializer, - RepositorySyncURLSerializer, -) +from pulpcore.plugin.serializers import AsyncOperationResponseSerializer from pulpcore.plugin.models import RepositoryVersion from pulpcore.plugin.tasking import dispatch from pulpcore.plugin.viewsets import ( @@ -42,13 +41,13 @@ class AptRepositoryViewSet(RepositoryViewSet, ModifyRepositoryActionMixin): summary="Sync from remote", responses={202: AsyncOperationResponseSerializer}, ) - @action(detail=True, methods=["post"], serializer_class=RepositorySyncURLSerializer) + @action(detail=True, methods=["post"], serializer_class=AptRepositorySyncURLSerializer) def sync(self, request, pk): """ Dispatches a sync task. """ repository = self.get_object() - serializer = RepositorySyncURLSerializer( + serializer = AptRepositorySyncURLSerializer( data=request.data, context={"request": request, "repository_pk": pk} ) @@ -56,6 +55,7 @@ def sync(self, request, pk): serializer.is_valid(raise_exception=True) remote = serializer.validated_data.get("remote", repository.remote) mirror = serializer.validated_data.get("mirror", True) + optimize = serializer.validated_data.get("optimize") result = dispatch( func=tasks.synchronize, @@ -65,6 +65,7 @@ def sync(self, request, pk): "remote_pk": remote.pk, "repository_pk": repository.pk, "mirror": mirror, + "optimize": optimize, }, ) return OperationPostponedResponse(result, request) diff --git a/pulp_deb/tests/functional/api/test_sync_optimize.py b/pulp_deb/tests/functional/api/test_sync_optimize.py new file mode 100644 index 000000000..1a550fbdf --- /dev/null +++ b/pulp_deb/tests/functional/api/test_sync_optimize.py @@ -0,0 +1,161 @@ +from pulp_smash.pulp3.bindings import monitor_task +from pulp_smash.pulp3.utils import gen_repo +import pytest + +from pulp_deb.tests.functional.constants import ( + DEB_FIXTURE_ARCH, + DEB_FIXTURE_ARCH_UPDATE, + DEB_FIXTURE_COMPONENT, + DEB_FIXTURE_COMPONENT_UPDATE, + DEB_FIXTURE_SINGLE_DIST, + DEB_FIXTURE_URL, + DEB_FIXTURE_DISTRIBUTIONS, + DEB_FIXTURE_URL_UPDATE, + DEB_REPORT_CODE_SKIP_PACKAGE, + DEB_REPORT_CODE_SKIP_RELEASE, +) +from pulp_deb.tests.functional.utils import gen_deb_remote + +from pulpcore.client.pulp_deb import RepositorySyncURL + + +def test_sync_optimize_skip_unchanged_release_file( + gen_remote, gen_repository, has_task_skipped, synchronize_and_get_task +): + """Test whether synchronization is skipped when the ReleaseFile of a remote + has not been changed. + + 1. Create a repository and a remote. + 2. Sync the repository. + 3. Assert that the sync was not skipped. + 4. Sync the repository again. + 5. Assert that this time the sync was skipped. + """ + + repo = gen_repository() + remote = gen_remote() + task = synchronize_and_get_task(remote, repo) + + assert not has_task_skipped(task) + + task_skip = synchronize_and_get_task(remote, repo) + + assert has_task_skipped(task_skip) + + +@pytest.mark.parametrize( + "remote_params, remote_diff_params", + [ + ( + [DEB_FIXTURE_URL, DEB_FIXTURE_DISTRIBUTIONS, None, None], + [DEB_FIXTURE_URL_UPDATE, DEB_FIXTURE_DISTRIBUTIONS, None, None], + ), + ( + [DEB_FIXTURE_URL, DEB_FIXTURE_SINGLE_DIST, DEB_FIXTURE_COMPONENT, None], + [DEB_FIXTURE_URL, DEB_FIXTURE_SINGLE_DIST, DEB_FIXTURE_COMPONENT_UPDATE, None], + ), + ( + [DEB_FIXTURE_URL, DEB_FIXTURE_SINGLE_DIST, None, DEB_FIXTURE_ARCH], + [DEB_FIXTURE_URL, DEB_FIXTURE_SINGLE_DIST, None, DEB_FIXTURE_ARCH_UPDATE], + ), + ( + [DEB_FIXTURE_URL, DEB_FIXTURE_SINGLE_DIST, DEB_FIXTURE_COMPONENT, None], + [DEB_FIXTURE_URL_UPDATE, DEB_FIXTURE_SINGLE_DIST, DEB_FIXTURE_COMPONENT_UPDATE, None], + ), + ], +) +def test_sync_optimize_no_skip( + gen_remote, + gen_repository, + has_task_skipped, + remote_params, + remote_diff_params, + synchronize_and_get_task, +): + """Test whether repository synchronizations have not been skipped for certain conditions. + + The following cases are tested: + + * `Sync a repository with updated ReleaseFile.`_ + * `Sync a repository with same ReleaseFile but updated Components.`_ + * `Sync a repository with same ReleaseFile but updated Architectures.`_ + * `Sync a repository with updated ReleaseFile and updated Components.`_ + + 1. Create a repository and a remote. + 2. Synchronize the repository. + 3. Assert that the synchronization was not skipped. + 4. Create a new remote with different conditions. + 5. Synchronize the repository with the new remote. + 6. Assert that the synchronization was not skipped. + """ + + # TODO: First case needs still some investigation as it still fails. + repo = gen_repository() + remote = gen_remote( + url=remote_params[0], + distributions=remote_params[1], + components=remote_params[2], + architectures=remote_params[3], + ) + task = synchronize_and_get_task(remote, repo) + assert not has_task_skipped(task) + remote_diff = gen_remote( + url=remote_diff_params[0], + distributions=remote_diff_params[1], + components=remote_diff_params[2], + architectures=remote_diff_params[3], + ) + task_diff = synchronize_and_get_task(remote_diff, repo) + assert not has_task_skipped(task_diff) + + +@pytest.fixture +def has_task_skipped(): + """Checks if a given task has skipped either ReleaseFile or + PackageIndex sync.""" + + def _has_task_skipped(task): + for report in task.progress_reports: + if ( + report.code == DEB_REPORT_CODE_SKIP_RELEASE + or report.code == DEB_REPORT_CODE_SKIP_PACKAGE + ): + return True + return False + + return _has_task_skipped + + +@pytest.fixture +def synchronize_and_get_task(apt_repository_api): + """Synchronizes a given repository with a given remote and + returns the monitored task.""" + + def _synchronize_and_get_task(remote, repo): + repository_sync_data = RepositorySyncURL(remote=remote.pulp_href) + sync_response = apt_repository_api.sync(repo.pulp_href, repository_sync_data) + return monitor_task(sync_response.task) + + return _synchronize_and_get_task + + +@pytest.fixture +def gen_repository(apt_repository_api, gen_object_with_cleanup): + """Generates a semi-random repository with cleanup.""" + + def _gen_repository(): + return gen_object_with_cleanup(apt_repository_api, gen_repo()) + + return _gen_repository + + +@pytest.fixture +def gen_remote(apt_remote_api, gen_object_with_cleanup): + """Generates a remote with cleanup. Also allows for parameters to be set manually.""" + + def _gen_remote(url=DEB_FIXTURE_URL, distributions=DEB_FIXTURE_DISTRIBUTIONS, **kwargs): + return gen_object_with_cleanup( + apt_remote_api, gen_deb_remote(url=url, distributions=distributions, **kwargs) + ) + + return _gen_remote diff --git a/pulp_deb/tests/functional/constants.py b/pulp_deb/tests/functional/constants.py index d53cb2daf..8546137b4 100644 --- a/pulp_deb/tests/functional/constants.py +++ b/pulp_deb/tests/functional/constants.py @@ -51,7 +51,13 @@ def _clean_dict(d): DEB_SINGLE_REQUEST_UPLOAD_PATH = urljoin(BASE_PATH, "deb/upload/") DEB_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "debian/") +DEB_FIXTURE_URL_UPDATE = urljoin(PULP_FIXTURES_BASE_URL, "debian_update/") DEB_FIXTURE_DISTRIBUTIONS = "ragnarok nosuite" +DEB_FIXTURE_SINGLE_DIST = "ragnarok" +DEB_FIXTURE_COMPONENT = "asgard" +DEB_FIXTURE_COMPONENT_UPDATE = "jotunheimr" +DEB_FIXTURE_ARCH = "ppc64" +DEB_FIXTURE_ARCH_UPDATE = "armeb" DEB_FIXTURE_SUMMARY = _clean_dict( { @@ -85,6 +91,9 @@ def _clean_dict(d): DEB_FIXTURE_PACKAGE_COUNT = DEB_FIXTURE_SUMMARY.get(DEB_PACKAGE_NAME, 0) +DEB_REPORT_CODE_SKIP_RELEASE = "sync.release_file.was_skipped" +DEB_REPORT_CODE_SKIP_PACKAGE = "sync.package_index.was_skipped" + DEB_PACKAGE_RELPATH = "pool/asgard/o/odin/odin_1.0_ppc64.deb" DEB_PACKAGE_URL = urljoin(DEB_FIXTURE_URL, DEB_PACKAGE_RELPATH) DEB_GENERIC_CONTENT_RELPATH = "dists/ragnarok/asgard/binary-armeb/Release"