Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.21] Publish perf backports #3228

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/3225.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Load one Artifact checksum type taken package_checksum_type during publish metadata generation.
1 change: 1 addition & 0 deletions CHANGES/3226.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ContentArtifact query performance impovement. Use values() instead of select_related().
56 changes: 27 additions & 29 deletions pulp_rpm/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,8 @@ def publish_artifacts(self, content, prefix=""):
published_artifacts = []

# Special case for Packages
contentartifact_qs = (
ContentArtifact.objects.filter(content__in=content)
.filter(content__pulp_type=Package.get_pulp_type())
.select_related("content__rpm_package__time_build")
contentartifact_qs = ContentArtifact.objects.filter(content__in=content).filter(
content__pulp_type=Package.get_pulp_type()
)

rel_path_mapping = defaultdict(list)
Expand Down Expand Up @@ -470,8 +468,6 @@ def generate_repo_metadata(
fil_db = cr.FilelistsSqlite(fil_db_path)
oth_db = cr.OtherSqlite(oth_db_path)

packages = Package.objects.filter(pk__in=content)

# We want to support publishing with a different checksum type than the one built-in to the
# package itself, so we need to get the correct checksums somehow if there is an override.
# We must also take into consideration that if the package has not been downloaded the only
Expand All @@ -482,45 +478,47 @@ def generate_repo_metadata(
# simple foreign keys and avoid messing with the many-to-many relationship, which doesn't
# work with select_related() and performs poorly with prefetch_related(). This is fine
# because we know that Packages should only ever have one artifact per content.
contentartifact_qs = (
ContentArtifact.objects.filter(content__in=packages.only("pk"))
.select_related(
# content__rpm_package is a bit of a hack, exploiting the way django sets up model
# inheritance, but it works and is unlikely to break. All content artifacts being
# accessed here have an associated Package since they originally came from the
# Package queryset.
"artifact",
"content__rpm_package",
)
.only("artifact", "content__rpm_package__checksum_type", "content__rpm_package__pkgId")
)
fields = [
"content_id",
"content__rpm_package__checksum_type",
"content__rpm_package__pkgId",
]
artifact_checksum = None
if package_checksum_type:
package_checksum_type = package_checksum_type.lower()
artifact_checksum = f"artifact__{package_checksum_type}"
fields.append(artifact_checksum)

contentartifact_qs = ContentArtifact.objects.filter(
content__in=content, content__pulp_type=Package.get_pulp_type()
).values(*fields)

pkg_to_hash = {}
for ca in contentartifact_qs.iterator():
if package_checksum_type:
package_checksum_type = package_checksum_type.lower()
pkgid = getattr(ca.artifact, package_checksum_type, None)
pkgid = ca.get(artifact_checksum, None)

if not package_checksum_type or not pkgid:
if ca.content.rpm_package.checksum_type not in settings.ALLOWED_CONTENT_CHECKSUMS:
if ca["content__rpm_package__checksum_type"] not in settings.ALLOWED_CONTENT_CHECKSUMS:
raise ValueError(
"Package {} as content unit {} contains forbidden checksum type '{}', "
"thus can't be published. {}".format(
ca.content.rpm_package.nevra,
ca.content.pk,
ca.content.rpm_package.checksum_type,
"Package with pkgId {} as content unit {} contains forbidden checksum type "
"'{}', thus can't be published. {}".format(
ca["content__rpm_package__pkgId"],
ca["content_id"],
ca["content__rpm_package__checksum_type"],
ALLOWED_CHECKSUM_ERROR_MSG,
)
)
package_checksum_type = ca.content.rpm_package.checksum_type
pkgid = ca.content.rpm_package.pkgId
package_checksum_type = ca["content__rpm_package__checksum_type"]
pkgid = ca["content__rpm_package__pkgId"]

pkg_to_hash[ca.content_id] = (package_checksum_type, pkgid)
pkg_to_hash[ca["content_id"]] = (package_checksum_type, pkgid)

# TODO: this is meant to be a !! *temporary* !! fix for
# https://github.com/pulp/pulp_rpm/issues/2407
pkg_pks_to_ignore = set()
latest_build_time_by_nevra = defaultdict(list)
packages = Package.objects.filter(pk__in=content)
for pkg in packages.only(
"pk", "name", "epoch", "version", "release", "arch", "time_build"
).iterator():
Expand Down
14 changes: 5 additions & 9 deletions pulp_rpm/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,19 +173,15 @@ def add_metadata_to_publication(publication, version, prefix=""):
published_artifacts = []

# Handle packages
pkg_data = (
ContentArtifact.objects.filter(
content__in=version.content, content__pulp_type=Package.get_pulp_type()
)
.select_related("content__rpm_package")
.only("pk", "artifact", "content", "content__rpm_package__pkgId")
)
pkg_data = ContentArtifact.objects.filter(
content__in=version.content, content__pulp_type=Package.get_pulp_type()
).values("pk", "content__rpm_package__pkgId")
for ca in pkg_data.iterator():
for relative_path in pkgid_to_location_href[str(version.repository.pk)][
ca.content.rpm_package.pkgId
ca["content__rpm_package__pkgId"]
]:
pa = PublishedArtifact(
content_artifact=ca,
content_artifact_id=ca["pk"],
relative_path=os.path.join(prefix, relative_path),
publication=publication,
)
Expand Down
Loading