From 39e878f91ab3d2c546dde0b15ca9f8837f98f1f0 Mon Sep 17 00:00:00 2001 From: Tobias Grigo Date: Mon, 20 Jan 2025 14:38:36 +0100 Subject: [PATCH] Refactor DebUpdateReleaseFileAttributes and add unit tests [noissue] --- pulp_deb/app/tasks/synchronizing.py | 280 +++++++++++------- .../tests/unit/test_release_file_helpers.py | 220 ++++++++++++++ 2 files changed, 389 insertions(+), 111 deletions(-) create mode 100644 pulp_deb/tests/unit/test_release_file_helpers.py diff --git a/pulp_deb/app/tasks/synchronizing.py b/pulp_deb/app/tasks/synchronizing.py index 2b00a201..aa8f6678 100644 --- a/pulp_deb/app/tasks/synchronizing.py +++ b/pulp_deb/app/tasks/synchronizing.py @@ -12,7 +12,7 @@ from collections import defaultdict from tempfile import NamedTemporaryFile from debian import deb822 -from urllib.parse import quote, urlparse, urlunparse, urljoin +from urllib.parse import quote, urlparse, urlunparse from django.conf import settings from django.db.utils import IntegrityError @@ -351,133 +351,117 @@ def __init__(self, remote, *args, **kwargs): super().__init__(*args, **kwargs) self.remote = remote self.gpgkey = remote.gpgkey + self.gpg = None if self.gpgkey: gnupghome = os.path.join(os.getcwd(), "gpg-home") - os.makedirs(gnupghome) + os.makedirs(gnupghome, exist_ok=True) self.gpg = gnupg.GPG(gpgbinary="/usr/bin/gpg", gnupghome=gnupghome) import_res = self.gpg.import_keys(self.gpgkey) if import_res.count == 0: log.warning(_("Key import failed.")) - pass async def run(self): """ - Parse ReleaseFile content units. - - Update release content with information obtained from its artifact. + Parse ReleaseFile content units, verify GPG if needed, and update attributes. """ - dropped_count = 0 async with ProgressReport( message="Update ReleaseFile units", code="update.release_file" ) as pb: async for d_content in self.items(): - if isinstance(d_content.content, ReleaseFile): - release_file = d_content.content - da_names = { - os.path.basename(da.relative_path): da for da in d_content.d_artifacts - } - if "Release" in da_names: - if "Release.gpg" in da_names: - if self.gpgkey: - with NamedTemporaryFile() as tmp_file: - tmp_file.write(da_names["Release"].artifact.file.read()) - tmp_file.flush() - verified = self.gpg.verify_file( - da_names["Release.gpg"].artifact.file, tmp_file.name - ) - if verified.valid: - log.info(_("Verification of Release successful.")) - release_file_artifact = da_names["Release"].artifact - release_file.relative_path = da_names["Release"].relative_path - else: - log.warning(_("Verification of Release failed. Dropping it.")) - d_content.d_artifacts.remove(da_names.pop("Release")) - d_content.d_artifacts.remove(da_names.pop("Release.gpg")) - dropped_count += 1 - else: - release_file_artifact = da_names["Release"].artifact - release_file.relative_path = da_names["Release"].relative_path - else: - if self.gpgkey: - d_content.d_artifacts.remove(da_names["Release"]) - else: - release_file_artifact = da_names["Release"].artifact - release_file.relative_path = da_names["Release"].relative_path - else: - if "Release.gpg" in da_names: - # No need to keep the signature without "Release" - d_content.d_artifacts.remove(da_names.pop("Release.gpg")) - - if "InRelease" in da_names: - if self.gpgkey: - verified = self.gpg.verify_file(da_names["InRelease"].artifact.file) - if verified.valid: - log.info(_("Verification of InRelease successful.")) - release_file_artifact = da_names["InRelease"].artifact - release_file.relative_path = da_names["InRelease"].relative_path - else: - log.warning(_("Verification of InRelease failed. Dropping it.")) - d_content.d_artifacts.remove(da_names.pop("InRelease")) - dropped_count += 1 - else: - release_file_artifact = da_names["InRelease"].artifact - release_file.relative_path = da_names["InRelease"].relative_path + release_file = d_content.content + if not isinstance(release_file, ReleaseFile): + await self.put(d_content) + continue - if not d_content.d_artifacts: - # No (proper) artifacts left -> distribution not found - release_file_url = urljoin(self.remote.url, release_file.relative_path) - if dropped_count > 0: - raise NoValidSignatureForKey(url=release_file_url) - else: - raise NoReleaseFile(url=release_file_url) - - release_file.sha256 = release_file_artifact.sha256 - release_file.artifact_set_sha256 = _get_artifact_set_sha256( - d_content, ReleaseFile.SUPPORTED_ARTIFACTS - ) - release_file_dict = deb822.Release(release_file_artifact.file) - if "codename" in release_file_dict: - release_file.codename = release_file_dict["Codename"] - if "suite" in release_file_dict: - release_file.suite = release_file_dict["Suite"] - - if "components" in release_file_dict: - release_file.components = release_file_dict["Components"] - elif "component" in release_file_dict and settings.PERMISSIVE_SYNC: - release_file.components = release_file_dict["Component"] - elif release_file.distribution[-1] == "/": - message = ( - "The Release file for distribution '{}' contains no 'Components' " - "field, but since we are dealing with a flat repo, we can continue " - "regardless." - ) - log.warning(_(message).format(release_file.distribution)) - # TODO: Consider not setting the field at all (requires migrations). - release_file.components = "" - else: - raise MissingReleaseFileField(release_file.distribution, "Components") - - if "architectures" in release_file_dict: - release_file.architectures = release_file_dict["Architectures"] - elif "architecture" in release_file_dict and settings.PERMISSIVE_SYNC: - release_file.architectures = release_file_dict["Architecture"] - elif release_file.distribution[-1] == "/": - message = ( - "The Release file for distribution '{}' contains no 'Architectures' " - "field, but since we are dealing with a flat repo, we can extract them " - "from the repos single Package index later." - ) - log.warning(_(message).format(release_file.distribution)) - release_file.architectures = "" - else: - raise MissingReleaseFileField(release_file.distribution, "Architectures") - - log.debug(_("Codename: {}").format(release_file.codename)) - log.debug(_("Components: {}").format(release_file.components)) - log.debug(_("Architectures: {}").format(release_file.architectures)) - await pb.aincrement() + await self.process_release_file_d_content(d_content, pb) await self.put(d_content) + async def process_release_file_d_content(self, d_content, pb): + """ + Orchestrates the steps for a single ReleaseFile item. + """ + release_da, release_gpg_da, inrelease_da = _collect_release_artifacts(d_content) + + release_artifact = None + if self.gpg: + release_artifact = self.verify_gpg_artifacts( + d_content, release_da, release_gpg_da, inrelease_da + ) + else: + # If no gpgkey, pick main artifact in an order: InRelease => Release => None + if inrelease_da: + release_artifact = inrelease_da.artifact + d_content.content.relative_path = inrelease_da.relative_path + elif release_da: + release_artifact = release_da.artifact + d_content.content.relative_path = release_da.relative_path + + # If there isn't a valid release + if not release_artifact: + if release_da in d_content.d_artifacts: + d_content.d_artifacts.remove(release_da) + if release_gpg_da in d_content.d_artifacts: + d_content.d_artifacts.remove(release_gpg_da) + if inrelease_da in d_content.d_artifacts: + d_content.d_artifacts.remove(inrelease_da) + raise NoReleaseFile(url=os.path.join(self.remote.url, d_content.content.relative_path)) + + d_content.content.sha256 = release_artifact.sha256 + d_content.content.artifact_set_sha256 = _get_artifact_set_sha256( + d_content, ReleaseFile.SUPPORTED_ARTIFACTS + ) + _parse_release_file_attributes(d_content, release_artifact) + await pb.aincrement() + + def verify_gpg_artifacts(self, d_content, release_da, release_gpg_da, inrelease_da): + """ + Handle GPG verification. Returns the main artifact or raises and exception. + """ + if inrelease_da: + if self.verify_single_file(inrelease_da.artifact): + d_content.content.relative_path = inrelease_da.relative_path + return inrelease_da.artifact + else: + log.warning(_("Verification of InRelease failed. Removing it.")) + d_content.d_artifacts.remove(inrelease_da) + + if release_da and release_gpg_da: + if self.verify_detached_signature(release_da.artifact, release_gpg_da.artifact): + d_content.content.relative_path = release_da.relative_path + return release_da.artifact + else: + log.warning(_("Verification of Release + Release.gpg failed. Removing it.")) + d_content.d_artifacts.remove(release_da) + d_content.d_artifacts.remove(release_gpg_da) + elif release_da: + log.warning(_("Release found bu no signature and gpgkey was provided. Removing it.")) + d_content.d_artifacts.remove(release_da) + + raise NoValidSignatureForKey(url=os.path.join(self.remote.url, "Release")) + + def verify_single_file(self, artifact): + """ + Attempt to verify an inline-signed file (InRelease). + Returns True if valid, False otherwise. + """ + with artifact.file.open("rb") as inrelease_fh: + verified = self.gpg.verify_file(inrelease_fh) + return bool(verified.valid) + + def verify_detached_signature(self, release_artifact, release_gpg_artifact): + """ + Attempt to verify a detached signature with "Release" and "Release.gpg". + Return True if valid, False otherwise. + """ + with NamedTemporaryFile() as tmp_file: + with release_artifact.file.open("rb") as rel_fh: + tmp_file.write(rel_fh.read()) + tmp_file.flush() + + with release_gpg_artifact.file.open("rb") as detached_fh: + verified = self.gpg.verify_file(detached_fh, tmp_file.name) + return bool(verified.valid) + class DebUpdatePackageIndexAttributes(Stage): # TODO: Needs a new name """ @@ -1322,6 +1306,80 @@ def _save_artifact_blocking(d_artifact): d_artifact.artifact.touch() +def _collect_release_artifacts(d_content): + """ + Return (release_da, release_gpg_da, inrelease_da) from d_content.d_artifacts. + + Looks for items whose filename is exactly "Release", "Release.gpg", or "InRelease". + If not found, return None for that slot. + """ + release_da = None + release_gpg_da = None + inrelease_da = None + + da_names = {os.path.basename(da.relative_path): da for da in d_content.d_artifacts} + if "Release" in da_names: + release_da = da_names["Release"] + if "Release.gpg" in da_names: + release_gpg_da = da_names["Release.gpg"] + if "InRelease" in da_names: + inrelease_da = da_names["InRelease"] + + return release_da, release_gpg_da, inrelease_da + + +def _parse_release_file_attributes(d_content, main_artifact): + """ + Parse the contents of main_artifact as a 'Release' file and update + d_content.content.* fields accordingly (e.g. codename, suite, etc.). + """ + from debian import deb822 + + with main_artifact.file.open("rb") as fh: + release_file_data = fh.read() + release_dict = deb822.Release(release_file_data) + + if "codename" in release_dict: + d_content.content.codename = release_dict["Codename"] + if "suite" in release_dict: + d_content.content.suite = release_dict["Suite"] + if "components" in release_dict: + d_content.content.components = release_dict["Components"] + elif "component" in release_dict and settings.PERMISSIVE_SYNC: + d_content.content.components = release_dict["Component"] + elif d_content.content.distribution[-1] == "/": + message = ( + "The Release file for distribution '{}' contains no 'Components' " + "field, but since we are dealing with a flat repo, we can continue " + "regardless." + ) + log.warning(_(message).format(d_content.content.distribution)) + # TODO: Consider not setting the field at all (requires migrations). + d_content.content.components = "" + else: + raise MissingReleaseFileField(d_content.content.distribution, "Components") + + if "architectures" in release_dict: + d_content.content.architectures = release_dict["Architectures"] + elif "architecture" in release_dict and settings.PERMISSIVE_SYNC: + d_content.content.architectures = release_dict["Architecture"] + elif d_content.content.distribution[-1] == "/": + message = ( + "The Release file for distribution '{}' contains no 'Architectures' " + "field, but since we are dealing with a flat repo, we can extract them " + "from the repos single Package index later." + ) + log.warning(_(message).format(d_content.content.distribution)) + d_content.content.architectures = "" + else: + raise MissingReleaseFileField(d_content.content.distribution, "Architectures") + + log.debug(f"Codename: {d_content.content.codename}") + log.debug(f"Suite: {d_content.content.suite}") + log.debug(f"Components: {d_content.content.components}") + log.debug(f"Architectures: {d_content.content.architectures}") + + def _get_artifact_set_sha256(d_content, supported_artifacts): """ Get the checksum of checksums for a set of artifacts associated with a multi artifact diff --git a/pulp_deb/tests/unit/test_release_file_helpers.py b/pulp_deb/tests/unit/test_release_file_helpers.py new file mode 100644 index 00000000..e263ca51 --- /dev/null +++ b/pulp_deb/tests/unit/test_release_file_helpers.py @@ -0,0 +1,220 @@ +import pytest +from unittest.mock import MagicMock, patch +from django.test import override_settings + +from pulp_deb.app.tasks.synchronizing import ( + MissingReleaseFileField, + _collect_release_artifacts, + _parse_release_file_attributes, +) + + +@pytest.fixture +def mock_d_content(): + """ + Return a mock of the 'd_content' object. + """ + d_content = MagicMock() + d_content.content.distribution = "buster" + d_content.content.codename = None + d_content.content.suite = None + d_content.content.components = None + d_content.content.architectures = None + return d_content + + +@pytest.fixture +def mock_main_artifact(tmp_path): + """ + Create a fixture that has .file.open(...) returning a real file object. + We'll let the test function write the file content as needed. + """ + artifact = MagicMock() + fake_file_path = tmp_path / "Release" + artifact._fake_file_path = fake_file_path + + def _open(mode="rb"): + return open(fake_file_path, mode) + + artifact.file.open.side_effect = _open + + return artifact + + +def test_collect_release_artifacts_all_found(mock_d_content): + """ + Test if release artifacts paths are correctly collected. + """ + mock_d_artifact_release = MagicMock() + mock_d_artifact_release.relative_path = "some/Release" + mock_d_artifact_gpg = MagicMock() + mock_d_artifact_gpg.relative_path = "some/Release.gpg" + mock_d_artifact_inrelease = MagicMock() + mock_d_artifact_inrelease.relative_path = "some/InRelease" + + mock_d_content.d_artifacts = [ + mock_d_artifact_release, + mock_d_artifact_gpg, + mock_d_artifact_inrelease, + ] + + release_da, release_gpg_da, inrelease_da = _collect_release_artifacts(mock_d_content) + assert release_da == mock_d_artifact_release + assert release_gpg_da == mock_d_artifact_gpg + assert inrelease_da == mock_d_artifact_inrelease + + +def test_collect_release_artifacts_none_found(mock_d_content): + """ + Test that unrelated artifacts do not set the release files. + """ + # None of Release, Release.gpg or InRelease + mock_artifact_1 = MagicMock() + mock_artifact_1.relative_path = "something/else" + mock_artifact_2 = MagicMock() + mock_artifact_2.relative_path = "else/something" + + mock_d_content.d_artifacts = [mock_artifact_1, mock_artifact_2] + + release_da, release_gpg_da, inrelease_da = _collect_release_artifacts(mock_d_content) + assert release_da is None + assert release_gpg_da is None + assert inrelease_da is None + + +def test_collect_release_artifacts_some_found(mock_d_content): + """ + Test that the correct artifact gets returned if there are some missing. + """ + mock_artifact_release = MagicMock() + mock_artifact_release.relative_path = "dist/Release" + mock_d_content.d_artifacts = [mock_artifact_release] + + release_da, release_gpg_da, inrelease_da = _collect_release_artifacts(mock_d_content) + assert release_da == mock_artifact_release + assert release_gpg_da is None + assert inrelease_da is None + + +@pytest.mark.parametrize("dist_ends_slash", [False, True]) +def test_parse_release_file_attributes_normal(mock_d_content, mock_main_artifact, dist_ends_slash): + """ + Test if a valid Release file is returned. + """ + if dist_ends_slash: + mock_d_content.content.distribution = "buster/" + else: + mock_d_content.content.distribution = "buster" + + data = ( + "Codename: buster\n" + "Suite: stable\n" + "Components: main contrib\n" + "Architectures: amd64 i386\n" + ) + mock_main_artifact._fake_file_path.write_text(data) + + _parse_release_file_attributes(mock_d_content, mock_main_artifact) + + assert mock_d_content.content.codename == "buster" + assert mock_d_content.content.suite == "stable" + assert mock_d_content.content.components == "main contrib" + assert mock_d_content.content.architectures == "amd64 i386" + + +@pytest.mark.parametrize("field_name", ["Components", "Architectures"]) +def test_parse_release_file_attributes_missing_fields_nonflat( + field_name, mock_d_content, mock_main_artifact +): + """ + If a field is missing in a non flat repository, then we raise MissingReleaseFileField. + This applies for "components" or "architectures" if they are missing from the Release file. + """ + mock_d_content.content.distribution = "buster" + + if field_name == "Components": + data = "Codename: buster\n" "Suite: stable\n" "Architectures: amd64 i386\n" + else: + data = "Codename: buster\n" "Suite: stable\n" "Components: main contrib\n" + + mock_main_artifact._fake_file_path.write_text(data) + + with pytest.raises(MissingReleaseFileField) as exc: + _parse_release_file_attributes(mock_d_content, mock_main_artifact) + + assert field_name in str(exc.value) + + +def test_parse_release_file_attributes_missing_fields_flat_repo(mock_d_content, mock_main_artifact): + """ + If fields are missing in a flat repository, we log a warning and set them to "". + """ + mock_d_content.content.distribution = "buster/" + + data = "Codename: buster\n" "Suite: stable\n" + mock_main_artifact._fake_file_path.write_text(data) + + with patch("pulp_deb.app.tasks.synchronizing.log.warning") as mock_log_warn: + _parse_release_file_attributes(mock_d_content, mock_main_artifact) + + assert mock_d_content.content.components == "" + assert mock_d_content.content.architectures == "" + + assert mock_log_warn.call_count == 2 + calls = [c[0][0] for c in mock_log_warn.call_args_list] + assert "contains no 'Components' field" in calls[0] + assert "contains no 'Architectures' field" in calls[1] + + +@override_settings(PERMISSIVE_SYNC=True) +def test_parse_release_file_attributes_permissive_component(mock_d_content, mock_main_artifact): + """ + Test if PERMISSIVE_SYNC=True allows for the "Component" field in a Release file to be parsed. + """ + mock_d_content.content.distribution = "buster" + + data = "Codename: buster\n" "Suite: stable\n" "Component: main\n" "Architectures: amd64\n" + mock_main_artifact._fake_file_path.write_text(data) + + _parse_release_file_attributes(mock_d_content, mock_main_artifact) + + assert mock_d_content.content.components == "main" + assert mock_d_content.content.architectures is not None + + +@override_settings(PERMISSIVE_SYNC=True) +def test_parse_release_file_attributes_permissive_architecture(mock_d_content, mock_main_artifact): + """ + Test if PERMISSIVE_SYNC=True allows for the "Architecture" field in a Release file to be parsed. + """ + mock_d_content.content.distribution = "buster" + data = "Codename: buster\n" "Suite: stable\n" "Components: main\n" "Architecture: amd64 i386\n" + mock_main_artifact._fake_file_path.write_text(data) + + _parse_release_file_attributes(mock_d_content, mock_main_artifact) + + assert mock_d_content.content.components is not None + assert mock_d_content.content.architectures == "amd64 i386" + + +@override_settings(PERMISSIVE_SYNC=False) +@pytest.mark.parametrize("field_name", ["Components", "Architectures"]) +def test_parse_release_file_attributes_permissive_disabled_compoent( + mock_d_content, mock_main_artifact, field_name +): + """ + Test whether PERMISSIVE_SYNC=False correctly raises MissingReleaseFileField, + if fields in Release are called "Component" or "Architecture" and not their plurals. + """ + mock_d_content.content.distribution = "buster" + + if field_name == "Components": + data = "Codename: buster\n" "Suite: stable\n" "Component: main\n" "Architectures: amd64\n" + else: + data = "Codename: buster\n" "Suite: stable\n" "Components: main\n" "Architecture: amd64\n" + + mock_main_artifact._fake_file_path.write_text(data) + + with pytest.raises(MissingReleaseFileField) as exc: + _parse_release_file_attributes(mock_d_content, mock_main_artifact) + assert f"missing the required field '{field_name}'" in str(exc.value)