From fa0871161c3207ea0a2745718df6bddd578b70db Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Sat, 27 Jul 2024 09:23:34 -0700 Subject: [PATCH] Add in-toto format as hash of shard hashes This converts model serialization manifests that record every model file shard hash into an in-toto payload that can then be passed to Sigstore's `sign_intoto` for signing to generate a Sigstore `Bundle` (if using Sigstore). To identify the models, we compute a hash of all hashes of the file shards and use that as the subject. The individual file hashes are used as the payload and we would have the verifier check them as part of the verification process. Signed-off-by: Mihai Maruseac --- model_signing/signing/in_toto.py | 98 +++++++++++++++++++ model_signing/signing/in_toto_test.py | 65 ++++++++++++ .../deep_model_folder | 35 +++++++ .../empty_model_file | 14 +++ .../empty_model_folder | 14 +++ .../model_folder_with_empty_file | 14 +++ .../sample_model_file | 20 ++++ .../sample_model_folder | 65 ++++++++++++ .../symlink_model_folder | 20 ++++ 9 files changed, 345 insertions(+) create mode 100644 model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/deep_model_folder create mode 100644 model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_file create mode 100644 model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_folder create mode 100644 model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/model_folder_with_empty_file create mode 100644 model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_file create mode 100644 model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_folder create mode 100644 model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/symlink_model_folder diff --git a/model_signing/signing/in_toto.py b/model_signing/signing/in_toto.py index 547475b6..eca3eaf7 100644 --- a/model_signing/signing/in_toto.py +++ b/model_signing/signing/in_toto.py @@ -254,3 +254,101 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self: predicate_top_level_name="files", ) return cls(statement) + + +class DigestOfShardDigestsIntotoPayload(IntotoPayload): + """In-toto payload where the subject is a digest of digests of file shards. + + This payload is supposed to be used for manifests where every file shard in + the model is matched with a digest. Because existing tooling only supports + established hashing algorithms, we record every such digest in the predicate + part and compute a hash for the subject by using sha256 on the concatenation + of the shard hashes. To ensure determinism, the hashes are sorted + by file shard (alphabetically by name, then ordered by start offset). + + Example: + ```json + { + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "18b5a4..." + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "6efa14...", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f0:0:16" + }, + { + "digest": "a9bc14...", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f1:0:16" + }, + { + "digest": "5f597e...", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f2:0:16" + }, + { + "digest": "eaf677...", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f3:0:16" + } + ] + } + } + ``` + + A missing predicate, or a predicate for which an entry does not have valid + name, digest, or algorithm should be considered invalid and fail integrity + verification. + + See also https://github.com/sigstore/sigstore-python/issues/1018. + """ + + predicate_type: Final[str] = ( + "https://model_signing/DigestOfShardDigests/v0.1" + ) + + def __init__(self, statement: statement.Statement): + """Builds an instance of this in-toto payload. + + Don't call this directly in production. Use `from_manifest()` instead. + + Args: + statement: The DSSE statement representing this in-toto payload. + """ + self.statement = statement + + @classmethod + @override + def from_manifest(cls, manifest: manifest_module.Manifest) -> Self: + """Converts a manifest to the signing payload used for signing. + + The manifest must be one where every model shard is paired with its own + digest. Currently, this is only `ShardLevelManifest`. + + Args: + manifest: the manifest to convert to signing payload. + + Returns: + An instance of `DigestOfDigestsIntotoPayload`. + + Raises: + TypeError: If the manifest is not `ShardLevelManifest`. + """ + if not isinstance(manifest, manifest_module.ShardLevelManifest): + raise TypeError("Only ShardLevelManifest is supported") + + statement = _convert_descriptors_to_hashed_statement( + manifest, + predicate_type=cls.predicate_type, + predicate_top_level_name="shards", + ) + return cls(statement) diff --git a/model_signing/signing/in_toto_test.py b/model_signing/signing/in_toto_test.py index c854fad1..bed2c4be 100644 --- a/model_signing/signing/in_toto_test.py +++ b/model_signing/signing/in_toto_test.py @@ -32,6 +32,7 @@ from model_signing.hashing import memory from model_signing.manifest import manifest as manifest_module from model_signing.serialization import serialize_by_file +from model_signing.serialization import serialize_by_file_shard from model_signing.signing import in_toto @@ -143,3 +144,67 @@ def test_only_runs_on_expected_manifest_types(self): match="Only FileLevelManifest is supported", ): in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest) + + +class TestDigestOfShardDigestsIntotoPayload: + + def _hasher_factory( + self, path: pathlib.Path, start: int, end: int + ) -> file.ShardedFileHasher: + return file.ShardedFileHasher( + path, memory.SHA256(), start=start, end=end + ) + + @pytest.mark.parametrize("model_fixture_name", test_support.all_test_models) + def test_known_models(self, request, model_fixture_name): + # Set up variables (arrange) + testdata_path = request.path.parent / "testdata" + test_path = testdata_path / "in_toto" + test_class_path = test_path / "TestDigestOfShardDigestsIntotoPayload" + golden_path = test_class_path / model_fixture_name + should_update = request.config.getoption("update_goldens") + model = request.getfixturevalue(model_fixture_name) + + # Compute payload (act) + serializer = serialize_by_file_shard.ManifestSerializer( + self._hasher_factory, allow_symlinks=True + ) + manifest = serializer.serialize(model) + payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest( + manifest + ) + + # Compare with golden, or write to golden (approximately "assert") + if should_update: + with open(golden_path, "w", encoding="utf-8") as f: + f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n") + else: + with open(golden_path, "r", encoding="utf-8") as f: + json_contents = f.read() + expected_proto = json_format.Parse( + json_contents, statement_pb2.Statement() + ) + + assert payload.statement.pb == expected_proto + + def test_produces_valid_statements(self, sample_model_folder): + serializer = serialize_by_file_shard.ManifestSerializer( + self._hasher_factory, allow_symlinks=True + ) + manifest = serializer.serialize(sample_model_folder) + + payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest( + manifest + ) + + payload.statement.validate() + + def test_only_runs_on_expected_manifest_types(self): + digest = hashing.Digest("test", b"test_digest") + manifest = manifest_module.DigestManifest(digest) + + with pytest.raises( + TypeError, + match="Only ShardLevelManifest is supported", + ): + in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(manifest) diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/deep_model_folder b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/deep_model_folder new file mode 100644 index 00000000..42b52a19 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/deep_model_folder @@ -0,0 +1,35 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "18b5a45fe7983f7194e8ffd96c80f5f0ec53191bf4a32b6aff293f043e816d7a" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f0:0:16" + }, + { + "digest": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f1:0:16" + }, + { + "digest": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f2:0:16" + }, + { + "digest": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f3:0:16" + } + ] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_file b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_file new file mode 100644 index 00000000..898052c3 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_file @@ -0,0 +1,14 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_folder b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_folder new file mode 100644 index 00000000..898052c3 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_folder @@ -0,0 +1,14 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/model_folder_with_empty_file b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/model_folder_with_empty_file new file mode 100644 index 00000000..898052c3 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/model_folder_with_empty_file @@ -0,0 +1,14 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_file b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_file new file mode 100644 index 00000000..8dee2069 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_file @@ -0,0 +1,20 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b", + "algorithm": "file-sha256-1000000", + "name": ".:0:22" + } + ] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_folder b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_folder new file mode 100644 index 00000000..461cee86 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_folder @@ -0,0 +1,65 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "143cc682e555951649f18e2761c3d526d2502996f5e32dc187ef7f8a614f8df7" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249", + "algorithm": "file-sha256-1000000", + "name": "d0/f00:0:23" + }, + { + "digest": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6", + "algorithm": "file-sha256-1000000", + "name": "d0/f01:0:23" + }, + { + "digest": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426", + "algorithm": "file-sha256-1000000", + "name": "d0/f02:0:23" + }, + { + "digest": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f", + "algorithm": "file-sha256-1000000", + "name": "d1/f10:0:23" + }, + { + "digest": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf", + "algorithm": "file-sha256-1000000", + "name": "d1/f11:0:23" + }, + { + "digest": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3", + "algorithm": "file-sha256-1000000", + "name": "d1/f12:0:23" + }, + { + "digest": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc", + "algorithm": "file-sha256-1000000", + "name": "f0:0:24" + }, + { + "digest": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae", + "algorithm": "file-sha256-1000000", + "name": "f1:0:24" + }, + { + "digest": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719", + "algorithm": "file-sha256-1000000", + "name": "f2:0:24" + }, + { + "digest": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a", + "algorithm": "file-sha256-1000000", + "name": "f3:0:24" + } + ] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/symlink_model_folder b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/symlink_model_folder new file mode 100644 index 00000000..dcba2415 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/symlink_model_folder @@ -0,0 +1,20 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b", + "algorithm": "file-sha256-1000000", + "name": "symlink_file:0:22" + } + ] + } +}