Skip to content

Commit

Permalink
Dropped repo_concurrency in favor of a setting.
Browse files Browse the repository at this point in the history
Defaults to 5-workers.
  • Loading branch information
ggainey committed Jun 7, 2024
1 parent 4d17deb commit cb14c93
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 14 deletions.
11 changes: 0 additions & 11 deletions pulp_rpm/app/serializers/prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,6 @@ class PrunePackagesSerializer(serializers.Serializer, ValidateFieldsMixin):
default=14,
)

repo_concurrency = serializers.IntegerField(
help_text=(
"Number of concurrent workers to use to do the pruning. "
"If not set then the default value will be used."
),
allow_null=True,
required=False,
min_value=1,
default=10,
)

dry_run = serializers.BooleanField(
help_text=_(
"Determine what would-be-pruned and log the list of packages. "
Expand Down
1 change: 1 addition & 0 deletions pulp_rpm/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
SOLVER_DEBUG_LOGS = True
RPM_METADATA_USE_REPO_PACKAGE_TIME = False
NOCACHE_LIST = ["repomd.xml", "repomd.xml.asc", "repomd.xml.key"]
PRUNE_WORKERS_MAX = 5
17 changes: 15 additions & 2 deletions pulp_rpm/app/tasks/prune.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import datetime, timedelta
from logging import getLogger, DEBUG

from django.conf import settings
from django.db.models import F, Subquery
from django.utils import timezone

Expand Down Expand Up @@ -105,7 +106,6 @@ def prune_repo_packages(repo_pk, keep_days, dry_run):
def prune_packages(
repo_pks,
keep_days=14,
repo_concurrency=10,
dry_run=False,
):
"""
Expand All @@ -126,6 +126,19 @@ def prune_packages(
repos_to_prune = RpmRepository.objects.filter(pk__in=repo_pks)
task_group = TaskGroup.current()

# We want to be able to limit the number of available-workers that prune will consume,
# so that pulp can continue to work while doing an import. We accomplish this by creating
# a reserved-resource string for each repo-prune-task based on that repo's index in
# the dispatch loop, mod number-of-workers-to-consume.
#
# By default, prune will consume up to 5 workers.
#
# (This comment and code below based on
# https://github.com/pulp/pulpcore/blob/main/pulpcore/app/tasks/importer.py#L503-L512
# When we have a generic-approach to throttling mass-task-spawning, both places should
# be refactored to take advantage thereof.
prune_workers = int(settings.get("PRUNE_WORKERS_MAX", 5))

gpr = GroupProgressReport(
message="Pruning old Packages",
code="rpm.package.prune",
Expand All @@ -140,7 +153,7 @@ def prune_packages(
# This will keep an "all repositories" prune from locking up all the workers
# until all repositories are completed.
for index, a_repo in enumerate(repos_to_prune):
worker_rsrc = f"rpm-prune-worker-{index % repo_concurrency}"
worker_rsrc = f"rpm-prune-worker-{index % prune_workers}"
exclusive_resources = [worker_rsrc, a_repo]

dispatch(
Expand Down
1 change: 0 additions & 1 deletion pulp_rpm/app/viewsets/prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def prune_packages(self, request):
kwargs={
"repo_pks": repos_to_prune_pks,
"keep_days": serializer.validated_data["keep_days"],
"repo_concurrency": serializer.validated_data["repo_concurrency"],
"dry_run": serializer.validated_data["dry_run"],
},
)
Expand Down

0 comments on commit cb14c93

Please sign in to comment.