Skip to content

Commit 43a4de6

Browse files
committed
Add workaround for applying backport from PR-6064 with django-command
1 parent 18cb45e commit 43a4de6

File tree

9 files changed

+149
-32
lines changed

9 files changed

+149
-32
lines changed

.github/workflows/scripts/script.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ cmd_user_prefix bash -c "django-admin makemigrations core --check --dry-run"
123123
cmd_user_prefix bash -c "django-admin makemigrations file --check --dry-run"
124124
cmd_user_prefix bash -c "django-admin makemigrations certguard --check --dry-run"
125125

126+
# See pulpcore.app.util.ENABLE_6064_BACKPORT_WORKAROUND for context.
127+
# This needs to be set here because it relies on service init.
128+
# Its being tested in only one scenario to have both cases covered.
129+
if [[ "$TEST" == "s3" ]]; then
130+
cmd_prefix pulpcore-manager backport-patch-6064
131+
fi
132+
126133
# Run unit tests.
127134
cmd_user_prefix bash -c "PULP_DATABASES__default__USER=postgres pytest -v -r sx --color=yes --suppress-no-test-exit-code -p no:pulpcore --pyargs pulpcore.tests.unit"
128135
cmd_user_prefix bash -c "PULP_DATABASES__default__USER=postgres pytest -v -r sx --color=yes --suppress-no-test-exit-code -p no:pulpcore --pyargs pulp_file.tests.unit"

CHANGES/5725.bugfix

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@ On a request for on-demand content in the content app, a corrupted Remote that
22
contains the wrong binary (for that content) prevented other Remotes from being
33
attempted on future requests. Now the last failed Remotes are temporarily ignored
44
and others may be picked.
5+
6+
Because the [original](https://github.com/pulp/pulpcore/pull/6064) contains a migraton,
7+
this is backported here as an optional patch which can be enabled by running the
8+
pulpcore-manager command: `backport-patch-6064`.

pulpcore/app/apps.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,15 @@ def ready(self):
250250
super().ready()
251251
from . import checks # noqa
252252

253+
# Backport workaround for https://github.com/pulp/pulpcore/pull/6064
254+
from pulpcore.app.models import RemoteArtifact
255+
from django.db import models
256+
import pulpcore.app.util
257+
258+
if pulpcore.app.util.failed_at_exists(connection, RemoteArtifact):
259+
pulpcore.app.util.ENABLE_6064_BACKPORT_WORKAROUND = True
260+
RemoteArtifact.add_to_class("failed_at", models.DateTimeField(null=True))
261+
253262
post_migrate.connect(
254263
_ensure_default_domain, sender=self, dispatch_uid="ensure_default_domain"
255264
)
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from django.core.management.base import BaseCommand
2+
from gettext import gettext as _
3+
from django.db import connection
4+
from pulpcore.app.models import RemoteArtifact
5+
6+
7+
CHECK_COL_QUERY = """
8+
SELECT COUNT(*)
9+
FROM information_schema.columns
10+
WHERE table_name = %s
11+
AND column_name = %s;
12+
"""
13+
14+
MODIFY_QUERY_TMPL = """
15+
ALTER TABLE {}
16+
ADD COLUMN {} TIMESTAMPTZ DEFAULT NULL;
17+
"""
18+
19+
HELP = _(
20+
"""
21+
Enables patch backport of #6064 (https://github.com/pulp/pulpcore/pull/6064).
22+
23+
The fix prevents corrupted remotes from making content unreacahble by adding
24+
a cooldown time, which is tracked by a new field, 'RemoteArtifact.failed_at'.
25+
This command adds the field to the appropriate table.
26+
"""
27+
)
28+
29+
30+
class Command(BaseCommand):
31+
help = HELP
32+
33+
def add_arguments(self, parser):
34+
parser.add_argument(
35+
"--dry-run",
36+
action="store_true",
37+
help="Run the migration in dry-run mode without saving changes",
38+
)
39+
40+
def handle(self, *args, **options):
41+
dry_run = options.get("dry_run", False)
42+
try:
43+
with connection.cursor() as cursor:
44+
# Check if column already exists
45+
table_name = RemoteArtifact._meta.db_table
46+
field_name = "failed_at"
47+
cursor.execute(CHECK_COL_QUERY, [table_name, field_name])
48+
field_exists = cursor.fetchone()[0] > 0
49+
if field_exists:
50+
self._print_success(f"Field '{table_name}.{field_name}' already exists.")
51+
self._print_success("Nothing to be done")
52+
return
53+
54+
# Add field to table
55+
self._print_info(f"Adding {field_name!r} column to {table_name!r}...")
56+
MODIFY_QUERY = MODIFY_QUERY_TMPL.format(table_name, field_name)
57+
if not dry_run:
58+
cursor.execute(MODIFY_QUERY)
59+
self._print_success("Done")
60+
else:
61+
self._print_warn("[DRY-RUN] SQL that would be executed:")
62+
self._print_info(MODIFY_QUERY)
63+
except Exception as e:
64+
self._print_error(f"Migration failed: {str(e)}")
65+
raise
66+
67+
def _print_info(self, msg):
68+
self.stdout.write(msg)
69+
70+
def _print_success(self, msg):
71+
self.stdout.write(self.style.SUCCESS(msg))
72+
73+
def _print_error(self, msg):
74+
self.stdout.write(self.style.ERROR(msg))
75+
76+
def _print_warn(self, msg):
77+
self.stdout.write(self.style.WARNING(msg))

pulpcore/app/migrations/0126_remoteartifact_failed_at.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

pulpcore/app/models/content.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,6 @@ class RemoteArtifact(BaseModel, QueryMixin):
722722
sha256 = models.CharField(max_length=64, null=True, db_index=True)
723723
sha384 = models.CharField(max_length=96, null=True, db_index=True)
724724
sha512 = models.CharField(max_length=128, null=True, db_index=True)
725-
failed_at = models.DateTimeField(null=True)
726725

727726
content_artifact = models.ForeignKey(ContentArtifact, on_delete=models.CASCADE)
728727
remote = models.ForeignKey("Remote", on_delete=models.CASCADE)

pulpcore/app/util.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,28 @@
3030
from pulpcore.exceptions.validation import InvalidSignatureError
3131

3232

33+
# Backport workaround for https://github.com/pulp/pulpcore/pull/6064
34+
# If 'pulpcore-manager backport-patch-6064' was run, the field
35+
# RemoteArtifact.failed_at will be set and the backport will take effect.
36+
ENABLE_6064_BACKPORT_WORKAROUND = False
37+
38+
39+
def failed_at_exists(connection, ra_class) -> bool:
40+
"""Whtether 'failed_at' exists in the database."""
41+
table_name = ra_class._meta.db_table
42+
field_name = "failed_at"
43+
CHECK_COL_QUERY = """
44+
SELECT COUNT(*)
45+
FROM information_schema.columns
46+
WHERE table_name = %s
47+
AND column_name = %s;
48+
"""
49+
with connection.cursor() as cursor:
50+
cursor.execute(CHECK_COL_QUERY, [table_name, field_name])
51+
field_exists = cursor.fetchone()[0] > 0
52+
return field_exists
53+
54+
3355
# a little cache so viewset_for_model doesn't have to iterate over every app every time
3456
_model_viewset_cache = {}
3557
STRIPPED_API_ROOT = settings.API_ROOT.strip("/")

pulpcore/content/handler.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
MetricsEmitter,
6161
get_domain,
6262
cache_key,
63+
ENABLE_6064_BACKPORT_WORKAROUND,
6364
)
6465

6566
from pulpcore.exceptions import ( # noqa: E402
@@ -852,12 +853,15 @@ async def _stream_content_artifact(self, request, response, content_artifact):
852853
ClientConnectionError,
853854
)
854855

855-
protection_time = settings.REMOTE_CONTENT_FETCH_FAILURE_COOLDOWN
856-
remote_artifacts = (
857-
content_artifact.remoteartifact_set.select_related("remote")
858-
.order_by_acs()
859-
.exclude(failed_at__gte=timezone.now() - timedelta(seconds=protection_time))
860-
)
856+
remote_artifacts = content_artifact.remoteartifact_set.select_related(
857+
"remote"
858+
).order_by_acs()
859+
860+
if ENABLE_6064_BACKPORT_WORKAROUND:
861+
protection_time = settings.REMOTE_CONTENT_FETCH_FAILURE_COOLDOWN
862+
remote_artifacts = remote_artifacts.exclude(
863+
failed_at__gte=timezone.now() - timedelta(seconds=protection_time)
864+
)
861865
async for remote_artifact in remote_artifacts:
862866
try:
863867
response = await self._stream_remote_artifact(request, response, remote_artifact)
@@ -1166,18 +1170,25 @@ async def finalize():
11661170
try:
11671171
download_result = await downloader.run()
11681172
except DigestValidationError:
1169-
remote_artifact.failed_at = timezone.now()
1170-
await remote_artifact.asave()
1173+
COOLDOWN_MSG = ""
1174+
if ENABLE_6064_BACKPORT_WORKAROUND:
1175+
remote_artifact.failed_at = timezone.now()
1176+
await remote_artifact.asave()
1177+
REMOTE_CONTENT_FETCH_FAILURE_COOLDOWN = (
1178+
settings.REMOTE_CONTENT_FETCH_FAILURE_COOLDOWN
1179+
)
1180+
COOLDOWN_MSG = (
1181+
"- Marking this Remote to be ignored for "
1182+
f"{REMOTE_CONTENT_FETCH_FAILURE_COOLDOWN=}s.\n\n"
1183+
)
11711184
await downloader.session.close()
11721185
close_tcp_connection(request.transport._sock)
1173-
REMOTE_CONTENT_FETCH_FAILURE_COOLDOWN = settings.REMOTE_CONTENT_FETCH_FAILURE_COOLDOWN
11741186
raise RuntimeError(
11751187
f"Pulp tried streaming {remote_artifact.url!r} to "
11761188
"the client, but it failed checksum validation.\n\n"
11771189
"We can't recover from wrong data already sent so we are:\n"
11781190
"- Forcing the connection to close.\n"
1179-
"- Marking this Remote to be ignored for "
1180-
f"{REMOTE_CONTENT_FETCH_FAILURE_COOLDOWN=}s.\n\n"
1191+
f"{COOLDOWN_MSG}"
11811192
"If the Remote is known to be fixed, try resyncing the associated repository.\n"
11821193
"If the Remote is known to be permanently corrupted, try removing "
11831194
"affected Pulp Remote, adding a good one and resyncing.\n"

pulpcore/tests/functional/api/using_plugin/test_content_delivery.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,5 +236,11 @@ def download_from_distribution(content, distribution):
236236
download_from_distribution(content_name, distribution)
237237

238238
# WHEN/THEN (second request)
239-
actual_checksum = download_from_distribution(content_name, distribution)
240-
assert actual_checksum == expected_checksum
239+
from pulpcore.app.util import ENABLE_6064_BACKPORT_WORKAROUND
240+
241+
if ENABLE_6064_BACKPORT_WORKAROUND:
242+
actual_checksum = download_from_distribution(content_name, distribution)
243+
assert actual_checksum == expected_checksum
244+
else:
245+
with pytest.raises(ClientPayloadError, match="Response payload is not completed"):
246+
download_from_distribution(content_name, distribution)

0 commit comments

Comments
 (0)