From cb14c932142a2e2dae814c86ec4810da32af9328 Mon Sep 17 00:00:00 2001 From: Grant Gainey Date: Fri, 7 Jun 2024 12:44:35 -0400 Subject: [PATCH] Dropped repo_concurrency in favor of a setting. Defaults to 5-workers. --- pulp_rpm/app/serializers/prune.py | 11 ----------- pulp_rpm/app/settings.py | 1 + pulp_rpm/app/tasks/prune.py | 17 +++++++++++++++-- pulp_rpm/app/viewsets/prune.py | 1 - 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pulp_rpm/app/serializers/prune.py b/pulp_rpm/app/serializers/prune.py index 4b3e09f30..d59781a09 100644 --- a/pulp_rpm/app/serializers/prune.py +++ b/pulp_rpm/app/serializers/prune.py @@ -33,17 +33,6 @@ class PrunePackagesSerializer(serializers.Serializer, ValidateFieldsMixin): default=14, ) - repo_concurrency = serializers.IntegerField( - help_text=( - "Number of concurrent workers to use to do the pruning. " - "If not set then the default value will be used." - ), - allow_null=True, - required=False, - min_value=1, - default=10, - ) - dry_run = serializers.BooleanField( help_text=_( "Determine what would-be-pruned and log the list of packages. " diff --git a/pulp_rpm/app/settings.py b/pulp_rpm/app/settings.py index 02186b359..79add69ca 100644 --- a/pulp_rpm/app/settings.py +++ b/pulp_rpm/app/settings.py @@ -16,3 +16,4 @@ SOLVER_DEBUG_LOGS = True RPM_METADATA_USE_REPO_PACKAGE_TIME = False NOCACHE_LIST = ["repomd.xml", "repomd.xml.asc", "repomd.xml.key"] +PRUNE_WORKERS_MAX = 5 diff --git a/pulp_rpm/app/tasks/prune.py b/pulp_rpm/app/tasks/prune.py index 6d371e1da..606924bc6 100644 --- a/pulp_rpm/app/tasks/prune.py +++ b/pulp_rpm/app/tasks/prune.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta from logging import getLogger, DEBUG +from django.conf import settings from django.db.models import F, Subquery from django.utils import timezone @@ -105,7 +106,6 @@ def prune_repo_packages(repo_pk, keep_days, dry_run): def prune_packages( repo_pks, keep_days=14, - repo_concurrency=10, dry_run=False, ): """ @@ -126,6 +126,19 @@ def prune_packages( repos_to_prune = RpmRepository.objects.filter(pk__in=repo_pks) task_group = TaskGroup.current() + # We want to be able to limit the number of available-workers that prune will consume, + # so that pulp can continue to work while doing an import. We accomplish this by creating + # a reserved-resource string for each repo-prune-task based on that repo's index in + # the dispatch loop, mod number-of-workers-to-consume. + # + # By default, prune will consume up to 5 workers. + # + # (This comment and code below based on + # https://github.com/pulp/pulpcore/blob/main/pulpcore/app/tasks/importer.py#L503-L512 + # When we have a generic-approach to throttling mass-task-spawning, both places should + # be refactored to take advantage thereof. + prune_workers = int(settings.get("PRUNE_WORKERS_MAX", 5)) + gpr = GroupProgressReport( message="Pruning old Packages", code="rpm.package.prune", @@ -140,7 +153,7 @@ def prune_packages( # This will keep an "all repositories" prune from locking up all the workers # until all repositories are completed. for index, a_repo in enumerate(repos_to_prune): - worker_rsrc = f"rpm-prune-worker-{index % repo_concurrency}" + worker_rsrc = f"rpm-prune-worker-{index % prune_workers}" exclusive_resources = [worker_rsrc, a_repo] dispatch( diff --git a/pulp_rpm/app/viewsets/prune.py b/pulp_rpm/app/viewsets/prune.py index 4080b5738..1037871e1 100644 --- a/pulp_rpm/app/viewsets/prune.py +++ b/pulp_rpm/app/viewsets/prune.py @@ -66,7 +66,6 @@ def prune_packages(self, request): kwargs={ "repo_pks": repos_to_prune_pks, "keep_days": serializer.validated_data["keep_days"], - "repo_concurrency": serializer.validated_data["repo_concurrency"], "dry_run": serializer.validated_data["dry_run"], }, )