diff --git a/CHANGES/1767.feature b/CHANGES/1767.feature new file mode 100644 index 000000000..3ab279012 --- /dev/null +++ b/CHANGES/1767.feature @@ -0,0 +1 @@ +Added `architecture`, `os`, and `compressed_layers_size` fields to Manifest. diff --git a/pulp_container/app/management/commands/container-handle-image-data.py b/pulp_container/app/management/commands/container-handle-image-data.py index a8635f1e7..6f498156e 100644 --- a/pulp_container/app/management/commands/container-handle-image-data.py +++ b/pulp_container/app/management/commands/container-handle-image-data.py @@ -1,3 +1,4 @@ +import json from json.decoder import JSONDecodeError from gettext import gettext as _ @@ -42,7 +43,11 @@ def handle(self, *args, **options): manifests_v1 = Manifest.objects.filter(data__isnull=True, media_type=MEDIA_TYPE.MANIFEST_V1) manifests_updated_count += self.update_manifests(manifests_v1) - manifests_v2 = Manifest.objects.filter(Q(data__isnull=True) | Q(annotations={}, labels={})) + manifests_v2 = Manifest.objects.filter( + Q(data__isnull=True) + | Q(annotations={}, labels={}) + | Q(architecture__isnull=True, os__isnull=True, compressed_layers_size__isnull=True) + ) manifests_v2 = manifests_v2.exclude( media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI, MEDIA_TYPE.MANIFEST_V1] ) @@ -68,6 +73,17 @@ def handle(self, *args, **options): def update_manifests(self, manifests_qs): manifests_updated_count = 0 manifests_to_update = [] + fields_to_update = [ + "annotations", + "labels", + "is_bootable", + "is_flatpak", + "data", + "os", + "architecture", + "compressed_layers_size", + ] + for manifest in manifests_qs.iterator(): # suppress non-existing/already migrated artifacts and corrupted JSON files with suppress(ObjectDoesNotExist, JSONDecodeError): @@ -76,7 +92,6 @@ def update_manifests(self, manifests_qs): manifests_to_update.append(manifest) if len(manifests_to_update) > 1000: - fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"] manifests_qs.model.objects.bulk_update( manifests_to_update, fields_to_update, @@ -85,7 +100,6 @@ def update_manifests(self, manifests_qs): manifests_to_update.clear() if manifests_to_update: - fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"] manifests_qs.model.objects.bulk_update( manifests_to_update, fields_to_update, @@ -100,11 +114,24 @@ def init_manifest(self, manifest): manifest_data, raw_bytes_data = get_content_data(manifest_artifact) manifest.data = raw_bytes_data.decode("utf-8") - if not (manifest.annotations or manifest.labels): + if not ( + manifest.annotations + or manifest.labels + or manifest.architecture + or manifest.os + or manifest.compressed_layers_size + ): manifest.init_metadata(manifest_data) manifest._artifacts.clear() return True + elif manifest.media_type not in [MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI] and not ( + manifest.architecture or manifest.os or manifest.compressed_layers_size + ): + manifest_data = json.loads(manifest.data) + manifest.init_architecture_and_os(manifest_data) + manifest.init_compressed_layers_size(manifest_data) + return True return False diff --git a/pulp_container/app/migrations/0042_update_manifest_and_manifestlistmanifest.py b/pulp_container/app/migrations/0042_update_manifest_and_manifestlistmanifest.py new file mode 100644 index 000000000..c2cfb2e94 --- /dev/null +++ b/pulp_container/app/migrations/0042_update_manifest_and_manifestlistmanifest.py @@ -0,0 +1,49 @@ +# Generated by Django 4.2.16 on 2024-10-02 12:04 +import warnings + +from django.db import migrations, models + +def print_warning_for_updating_manifest_fields(apps, schema_editor): + warnings.warn( + "Run 'pulpcore-manager container-handle-image-data' to update the manifests' " + "os, architecture, and compressed_layers_size fields." + ) + +class Migration(migrations.Migration): + + dependencies = [ + ('container', '0041_add_pull_through_pull_permissions'), + ] + + operations = [ + migrations.AddField( + model_name='manifest', + name='architecture', + field=models.TextField(null=True), + ), + migrations.AddField( + model_name='manifest', + name='compressed_layers_size', + field=models.IntegerField(null=True), + ), + migrations.AddField( + model_name='manifest', + name='os', + field=models.TextField(null=True), + ), + migrations.AlterField( + model_name='manifestlistmanifest', + name='architecture', + field=models.TextField(blank=True, default=''), + ), + migrations.AlterField( + model_name='manifestlistmanifest', + name='os', + field=models.TextField(blank=True, default=''), + ), + migrations.RunPython( + print_warning_for_updating_manifest_fields, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + ] diff --git a/pulp_container/app/models.py b/pulp_container/app/models.py index b96701c92..73aafe969 100644 --- a/pulp_container/app/models.py +++ b/pulp_container/app/models.py @@ -77,6 +77,11 @@ class Manifest(Content): labels (models.JSONField): Metadata stored inside the image configuration. is_bootable (models.BooleanField): Indicates whether the image is bootable or not. is_flatpak (models.BooleanField): Indicates whether the image is a flatpak package or not. + architecture (models.TextField): CPU architecture for which the binaries in the image are + designed to run. + os (models.TextField): Operating System which the image is built to run on. + compressed_layers_size (models.IntegerField): Sum of the sizes, in bytes, of all compressed + layers. Relations: blobs (models.ManyToManyField): Many-to-many relationship with Blob. @@ -103,6 +108,9 @@ class Manifest(Content): annotations = models.JSONField(default=dict) labels = models.JSONField(default=dict) + architecture = models.TextField(null=True) + os = models.TextField(null=True) + compressed_layers_size = models.IntegerField(null=True) is_bootable = models.BooleanField(default=False) is_flatpak = models.BooleanField(default=False) @@ -124,6 +132,8 @@ def init_metadata(self, manifest_data=None): has_annotations = self.init_annotations(manifest_data) has_labels = self.init_labels() has_image_nature = self.init_image_nature() + self.init_architecture_and_os(manifest_data) + self.init_compressed_layers_size(manifest_data) return has_annotations or has_labels or has_image_nature def init_annotations(self, manifest_data=None): @@ -176,6 +186,31 @@ def init_manifest_nature(self): else: return False + def init_architecture_and_os(self, manifest_data): + # manifestv2 schema1 has the architecture definition in the Manifest (not in the ConfigBlob) + if architecture := manifest_data.get("architecture", None): + self.architecture = architecture + return + + manifest_config = manifest_data.get("config", None) + config_blob_sha256 = manifest_config.get("digest", None) + blob_artifact = Artifact.objects.get(sha256=config_blob_sha256.removeprefix("sha256:")) + config_blob, _ = get_content_data(blob_artifact) + self.architecture = config_blob.get("architecture", None) + self.os = config_blob.get("os", None) + + def init_compressed_layers_size(self, manifest_data): + # manifestv2 schema1 has only blobSum definition for each layer + if manifest_data.get("fsLayers", None): + self.compressed_layers_size = 0 + return + + layers = manifest_data.get("layers") + compressed_size = 0 + for layer in layers: + compressed_size += layer.get("size") + self.compressed_layers_size = compressed_size + def is_bootable_image(self): if ( self.annotations.get("containers.bootc") == "1" @@ -222,8 +257,9 @@ class ManifestListManifest(models.Model): manifest_list (models.ForeignKey): Many-to-one relationship with ManifestList. """ - architecture = models.TextField() - os = models.TextField() + # in oci-index spec, platform is an optional field + architecture = models.TextField(default="", blank=True) + os = models.TextField(default="", blank=True) os_version = models.TextField(default="", blank=True) os_features = models.TextField(default="", blank=True) features = models.TextField(default="", blank=True) diff --git a/pulp_container/app/registry.py b/pulp_container/app/registry.py index f907ef98d..0136f1c7f 100644 --- a/pulp_container/app/registry.py +++ b/pulp_container/app/registry.py @@ -455,9 +455,12 @@ async def init_pending_content(self, digest, manifest_data, media_type, raw_text data=raw_text_data, ) - # skip if media_type of schema1 + # if media_type of schema1 configure only manifest architecture and compressed layers size if media_type in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI): await sync_to_async(manifest.init_metadata)(manifest_data=manifest_data) + else: + await sync_to_async(manifest.init_architecture_and_os)(manifest_data=manifest_data) + await sync_to_async(manifest.init_compressed_layers_size)(manifest_data=manifest_data) try: await manifest.asave() diff --git a/pulp_container/app/serializers.py b/pulp_container/app/serializers.py index d2922bf18..b12d09230 100644 --- a/pulp_container/app/serializers.py +++ b/pulp_container/app/serializers.py @@ -103,6 +103,21 @@ class ManifestSerializer(NoArtifactContentSerializer): default=False, help_text=_("A boolean determining whether the image bundles a Flatpak application"), ) + architecture = serializers.CharField( + help_text="The CPU architecture which the binaries in this image are built to run on.", + required=False, + default=None, + ) + os = serializers.CharField( + help_text="The name of the operating system which the image is built to run on.", + required=False, + default=None, + ) + compressed_layers_size = serializers.IntegerField( + help_text="Specifies the sum of the sizes, in bytes, of all compressed layers", + required=False, + default=None, + ) class Meta: fields = NoArtifactContentSerializer.Meta.fields + ( @@ -116,6 +131,9 @@ class Meta: "labels", "is_bootable", "is_flatpak", + "architecture", + "os", + "compressed_layers_size", ) model = models.Manifest diff --git a/pulp_container/app/tasks/builder.py b/pulp_container/app/tasks/builder.py index 58daa3796..27beb53c2 100644 --- a/pulp_container/app/tasks/builder.py +++ b/pulp_container/app/tasks/builder.py @@ -13,7 +13,7 @@ Tag, ) from pulp_container.constants import MEDIA_TYPE -from pulp_container.app.utils import calculate_digest +from pulp_container.app.utils import calculate_digest, get_content_data from pulpcore.plugin.models import Artifact, ContentArtifact, Content @@ -83,11 +83,18 @@ def add_image_from_directory_to_repository(path, repository, tag): config_blob = get_or_create_blob(manifest_json["config"], manifest, path) manifest.config_blob = config_blob - manifest.save() + blob_artifact = Artifact.objects.get(sha256=config_blob.digest.removeprefix("sha256:")) + config_blob_dict, _ = get_content_data(blob_artifact) + manifest.architecture = config_blob_dict.get("architecture", None) + manifest.os = config_blob_dict.get("os", None) pks_to_add = [] + compressed_size = 0 for layer in manifest_json["layers"]: + compressed_size += layer.get("size") pks_to_add.append(get_or_create_blob(layer, manifest, path).pk) + manifest.compressed_layers_size = compressed_size + manifest.save() pks_to_add.extend([manifest.pk, tag.pk, config_blob.pk]) new_repo_version.add_content(Content.objects.filter(pk__in=pks_to_add)) diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 15ade2596..425af5143 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -332,12 +332,15 @@ async def handle_blobs(self, manifest_dc, content_data): Handle blobs. """ manifest_dc.extra_data["blob_dcs"] = [] + compressed_size = 0 for layer in content_data.get("layers") or content_data.get("fsLayers"): if not self._include_layer(layer): continue + compressed_size += layer.get("size", 0) blob_dc = self.create_blob(layer) manifest_dc.extra_data["blob_dcs"].append(blob_dc) await self.put(blob_dc) + manifest_dc.content.compressed_layers_size = compressed_size layer = content_data.get("config", None) if layer: blob_dc = self.create_blob(layer, deferred_download=False) @@ -390,8 +393,8 @@ def create_manifest(self, manifest_data, raw_text_data, media_type, digest=None) media_type=media_type, data=raw_text_data, annotations=manifest_data.get("annotations", {}), + architecture=manifest_data.get("architecture", None), ) - manifest_dc = DeclarativeContent(content=manifest) return manifest_dc @@ -470,13 +473,14 @@ async def create_listed_manifest(self, manifest_data): ) platform = {} - p = manifest_data["platform"] - platform["architecture"] = p["architecture"] - platform["os"] = p["os"] - platform["features"] = p.get("features", "") - platform["variant"] = p.get("variant", "") - platform["os.version"] = p.get("os.version", "") - platform["os.features"] = p.get("os.features", "") + # in oci-index spec, platform is an optional field + if p := manifest_data.get("platform", None): + platform["architecture"] = p["architecture"] + platform["os"] = p["os"] + platform["features"] = p.get("features", "") + platform["variant"] = p.get("variant", "") + platform["os.version"] = p.get("os.version", "") + platform["os.features"] = p.get("os.features", "") man_dc = DeclarativeContent(content=manifest) return {"manifest_dc": man_dc, "platform": platform, "content_data": content_data} @@ -626,19 +630,31 @@ def _post_save(self, batch): manifest_lists.append(dc.content) for listed_manifest in dc.extra_data["listed_manifests"]: manifest_dc = listed_manifest["manifest_dc"] - platform = listed_manifest["platform"] - manifest_list_manifests.append( - ManifestListManifest( - manifest_list=manifest_dc.content, - image_manifest=dc.content, - architecture=platform["architecture"], - os=platform["os"], - features=platform.get("features"), - variant=platform.get("variant"), - os_version=platform.get("os.version"), - os_features=platform.get("os.features"), - ) + manifest_list_manifest = ManifestListManifest( + manifest_list=manifest_dc.content, + image_manifest=dc.content, ) + if platform := listed_manifest.get("platform"): + manifest_list_manifest.architecture = platform["architecture"] + manifest_list_manifest.os = platform["os"] + manifest_list_manifest.features = platform.get("features") + manifest_list_manifest.variant = platform.get("variant") + manifest_list_manifest.os_version = platform.get("os.version") + manifest_list_manifest.os_features = platform.get("os.features") + manifest_list_manifests.append(manifest_list_manifest) + continue + + if "config_blob_dc" in dc.extra_data: + manifest_dc = dc.content + config_blob_sha256 = dc.extra_data["config_blob_dc"].content.digest + blob_artifact = Artifact.objects.get( + sha256=config_blob_sha256.removeprefix("sha256:") + ) + config_blob, _ = get_content_data(blob_artifact) + manifest_dc.architecture = config_blob.get("architecture", None) + manifest_dc.os = config_blob.get("os", None) + manifest_dc.save() + if blob_manifests: BlobManifest.objects.bulk_create(blob_manifests, ignore_conflicts=True) if manifest_list_manifests: diff --git a/pulp_container/tests/functional/api/test_build_images.py b/pulp_container/tests/functional/api/test_build_images.py index 5569dfa31..41ef67d93 100644 --- a/pulp_container/tests/functional/api/test_build_images.py +++ b/pulp_container/tests/functional/api/test_build_images.py @@ -31,6 +31,7 @@ def containerfile_name(): def test_build_image( pulpcore_bindings, + container_manifest_api, container_repository_api, container_distribution_api, gen_object_with_cleanup, @@ -61,3 +62,9 @@ def test_build_image( local_registry.pull(distribution.base_path) image = local_registry.inspect(distribution.base_path) assert image[0]["Config"]["Cmd"] == ["cat", "/tmp/inside-image.txt"] + + manifest = container_manifest_api.list(digest=image[0]["Digest"]) + manifest = manifest.to_dict()["results"][0] + assert manifest["architecture"] == "amd64" + assert manifest["os"] == "linux" + assert manifest["compressed_layers_size"] > 0 diff --git a/pulp_container/tests/functional/api/test_pull_through_cache.py b/pulp_container/tests/functional/api/test_pull_through_cache.py index f4f6ac01d..4679464bd 100644 --- a/pulp_container/tests/functional/api/test_pull_through_cache.py +++ b/pulp_container/tests/functional/api/test_pull_through_cache.py @@ -38,6 +38,7 @@ def _add_pull_through_entities_to_cleanup(path): def pull_and_verify( anonymous_user, add_pull_through_entities_to_cleanup, + container_manifest_api, container_pull_through_distribution_api, container_distribution_api, container_repository_api, @@ -59,6 +60,13 @@ def _pull_and_verify(images, pull_through_distribution): local_registry.pull(local_image_path) local_image = local_registry.inspect(local_image_path) + # 1.1. check pulp manifest model fields + manifest = container_manifest_api.list(digest=local_image[0]["Digest"]) + manifest = manifest.to_dict()["results"][0] + assert manifest["architecture"] == "amd64" + assert manifest["os"] == "linux" + assert manifest["compressed_layers_size"] > 0 + path, tag = local_image_path.split(":") tags_to_verify.append(tag) diff --git a/pulp_container/tests/functional/api/test_push_content.py b/pulp_container/tests/functional/api/test_push_content.py index baf659f62..a0d179af8 100644 --- a/pulp_container/tests/functional/api/test_push_content.py +++ b/pulp_container/tests/functional/api/test_push_content.py @@ -39,6 +39,7 @@ def test_push_using_registry_client_admin( add_to_cleanup, registry_client, local_registry, + container_manifest_api, container_namespace_api, ): """Test push with official registry client and logged in as admin.""" @@ -48,6 +49,15 @@ def test_push_using_registry_client_admin( registry_client.pull(image_path) local_registry.tag_and_push(image_path, local_url) local_registry.pull(local_url) + + # check pulp manifest model fields + local_image = local_registry.inspect(local_url) + manifest = container_manifest_api.list(digest=local_image[0]["Digest"]) + manifest = manifest.to_dict()["results"][0] + assert manifest["architecture"] == "amd64" + assert manifest["os"] == "linux" + assert manifest["compressed_layers_size"] > 0 + # ensure that same content can be pushed twice without permission errors local_registry.tag_and_push(image_path, local_url) diff --git a/pulp_container/tests/functional/api/test_sync.py b/pulp_container/tests/functional/api/test_sync.py index bfb088fdd..aa4a8f18b 100644 --- a/pulp_container/tests/functional/api/test_sync.py +++ b/pulp_container/tests/functional/api/test_sync.py @@ -3,7 +3,12 @@ import pytest from pulpcore.tests.functional.utils import PulpTaskError -from pulp_container.tests.functional.constants import PULP_FIXTURE_1, PULP_LABELED_FIXTURE +from pulp_container.constants import MEDIA_TYPE +from pulp_container.tests.functional.constants import ( + PULP_FIXTURE_1, + PULP_LABELED_FIXTURE, + PULP_HELLO_WORLD_LINUX_AMD64_DIGEST, +) from pulp_container.tests.functional.constants import ( REGISTRY_V2_FEED_URL, @@ -39,8 +44,14 @@ def _synced_container_repository_factory( @pytest.mark.parallel -def test_basic_sync(container_repo, container_remote, container_repository_api, container_sync): - container_sync(container_repo, container_remote) +def test_basic_sync( + container_repo, + container_remote, + container_repository_api, + container_sync, + container_manifest_api, +): + repo_version = container_sync(container_repo, container_remote).created_resources[0] repository = container_repository_api.read(container_repo.pulp_href) assert "versions/1/" in repository.latest_version_href @@ -53,6 +64,16 @@ def test_basic_sync(container_repo, container_remote, container_repository_api, assert repository.latest_version_href == latest_version_href + manifest = container_manifest_api.list( + repository_version=repo_version, + media_type=[MEDIA_TYPE.MANIFEST_V2], + digest=PULP_HELLO_WORLD_LINUX_AMD64_DIGEST, + ) + manifest = manifest.to_dict()["results"][0] + assert manifest["architecture"] == "amd64" + assert manifest["os"] == "linux" + assert manifest["compressed_layers_size"] > 0 + @pytest.mark.parallel def test_sync_labelled_image(