Skip to content

Commit

Permalink
Made sync more tolerant of poorly configured webservers
Browse files Browse the repository at this point in the history
Treeinfo download will ignore results that look like HTML. Some
webservers return 200 with an HTML error page rather than 404.

closes #3599

(cherry picked from commit 298f3a9)
  • Loading branch information
dralley authored and ggainey committed Jun 14, 2024
1 parent e6207b8 commit 846a141
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGES/3599.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Made sync more tolerant of poorly configured webservers.
6 changes: 3 additions & 3 deletions pulp_rpm/app/kickstart/treeinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ class PulpTreeInfo(TreeInfo):
"""

def load(self, f):
def loads(self, s):
"""
Load data from a file.
Load data from a string.
"""
try:
super().load(f)
super().loads(s)
except MissingSectionHeaderError:
raise TypeError(_("Treeinfo file should have INI format"))

Expand Down
17 changes: 15 additions & 2 deletions pulp_rpm/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,9 @@ def get_treeinfo_data(remote, remote_url):

namespaces = [".treeinfo", "treeinfo"]
for namespace in namespaces:
treeinfo_url = urlpath_sanitize(remote_url, namespace)
downloader = remote.get_downloader(
url=urlpath_sanitize(remote_url, namespace),
url=treeinfo_url,
silence_errors_for_response_status_codes={403, 404},
)

Expand All @@ -415,7 +416,19 @@ def get_treeinfo_data(remote, remote_url):
continue

treeinfo = PulpTreeInfo()
treeinfo.load(f=result.path)
with open(result.path, "r") as f:
treeinfo_str = f.read()
# some impolitely configured webservers return HTTP 200 with an HTML error page
# when a resource isn't found, instead of returning an HTTP 404 code
if treeinfo_str.startswith("<"):
# in the event that the response looks like HTML rather than an INI file,
# let's just pretend it returned 404
log.debug(
f"Server returned 200 for {treeinfo_url}, but the result looks like HTML"
" rather than treeinfo. Ignoring it."
)
continue
treeinfo.loads(treeinfo_str)
sha256 = result.artifact_attributes["sha256"]
treeinfo_data = TreeinfoData(treeinfo.parsed_sections())

Expand Down

0 comments on commit 846a141

Please sign in to comment.