diff --git a/model_signing/signing/in_toto.py b/model_signing/signing/in_toto.py index 547475b6..eca3eaf7 100644 --- a/model_signing/signing/in_toto.py +++ b/model_signing/signing/in_toto.py @@ -254,3 +254,101 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self: predicate_top_level_name="files", ) return cls(statement) + + +class DigestOfShardDigestsIntotoPayload(IntotoPayload): + """In-toto payload where the subject is a digest of digests of file shards. + + This payload is supposed to be used for manifests where every file shard in + the model is matched with a digest. Because existing tooling only supports + established hashing algorithms, we record every such digest in the predicate + part and compute a hash for the subject by using sha256 on the concatenation + of the shard hashes. To ensure determinism, the hashes are sorted + by file shard (alphabetically by name, then ordered by start offset). + + Example: + ```json + { + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "18b5a4..." + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "6efa14...", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f0:0:16" + }, + { + "digest": "a9bc14...", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f1:0:16" + }, + { + "digest": "5f597e...", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f2:0:16" + }, + { + "digest": "eaf677...", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f3:0:16" + } + ] + } + } + ``` + + A missing predicate, or a predicate for which an entry does not have valid + name, digest, or algorithm should be considered invalid and fail integrity + verification. + + See also https://github.com/sigstore/sigstore-python/issues/1018. + """ + + predicate_type: Final[str] = ( + "https://model_signing/DigestOfShardDigests/v0.1" + ) + + def __init__(self, statement: statement.Statement): + """Builds an instance of this in-toto payload. + + Don't call this directly in production. Use `from_manifest()` instead. + + Args: + statement: The DSSE statement representing this in-toto payload. + """ + self.statement = statement + + @classmethod + @override + def from_manifest(cls, manifest: manifest_module.Manifest) -> Self: + """Converts a manifest to the signing payload used for signing. + + The manifest must be one where every model shard is paired with its own + digest. Currently, this is only `ShardLevelManifest`. + + Args: + manifest: the manifest to convert to signing payload. + + Returns: + An instance of `DigestOfDigestsIntotoPayload`. + + Raises: + TypeError: If the manifest is not `ShardLevelManifest`. + """ + if not isinstance(manifest, manifest_module.ShardLevelManifest): + raise TypeError("Only ShardLevelManifest is supported") + + statement = _convert_descriptors_to_hashed_statement( + manifest, + predicate_type=cls.predicate_type, + predicate_top_level_name="shards", + ) + return cls(statement) diff --git a/model_signing/signing/in_toto_test.py b/model_signing/signing/in_toto_test.py index c854fad1..bed2c4be 100644 --- a/model_signing/signing/in_toto_test.py +++ b/model_signing/signing/in_toto_test.py @@ -32,6 +32,7 @@ from model_signing.hashing import memory from model_signing.manifest import manifest as manifest_module from model_signing.serialization import serialize_by_file +from model_signing.serialization import serialize_by_file_shard from model_signing.signing import in_toto @@ -143,3 +144,67 @@ def test_only_runs_on_expected_manifest_types(self): match="Only FileLevelManifest is supported", ): in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest) + + +class TestDigestOfShardDigestsIntotoPayload: + + def _hasher_factory( + self, path: pathlib.Path, start: int, end: int + ) -> file.ShardedFileHasher: + return file.ShardedFileHasher( + path, memory.SHA256(), start=start, end=end + ) + + @pytest.mark.parametrize("model_fixture_name", test_support.all_test_models) + def test_known_models(self, request, model_fixture_name): + # Set up variables (arrange) + testdata_path = request.path.parent / "testdata" + test_path = testdata_path / "in_toto" + test_class_path = test_path / "TestDigestOfShardDigestsIntotoPayload" + golden_path = test_class_path / model_fixture_name + should_update = request.config.getoption("update_goldens") + model = request.getfixturevalue(model_fixture_name) + + # Compute payload (act) + serializer = serialize_by_file_shard.ManifestSerializer( + self._hasher_factory, allow_symlinks=True + ) + manifest = serializer.serialize(model) + payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest( + manifest + ) + + # Compare with golden, or write to golden (approximately "assert") + if should_update: + with open(golden_path, "w", encoding="utf-8") as f: + f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n") + else: + with open(golden_path, "r", encoding="utf-8") as f: + json_contents = f.read() + expected_proto = json_format.Parse( + json_contents, statement_pb2.Statement() + ) + + assert payload.statement.pb == expected_proto + + def test_produces_valid_statements(self, sample_model_folder): + serializer = serialize_by_file_shard.ManifestSerializer( + self._hasher_factory, allow_symlinks=True + ) + manifest = serializer.serialize(sample_model_folder) + + payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest( + manifest + ) + + payload.statement.validate() + + def test_only_runs_on_expected_manifest_types(self): + digest = hashing.Digest("test", b"test_digest") + manifest = manifest_module.DigestManifest(digest) + + with pytest.raises( + TypeError, + match="Only ShardLevelManifest is supported", + ): + in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(manifest) diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/deep_model_folder b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/deep_model_folder new file mode 100644 index 00000000..42b52a19 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/deep_model_folder @@ -0,0 +1,35 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "18b5a45fe7983f7194e8ffd96c80f5f0ec53191bf4a32b6aff293f043e816d7a" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f0:0:16" + }, + { + "digest": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f1:0:16" + }, + { + "digest": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f2:0:16" + }, + { + "digest": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008", + "algorithm": "file-sha256-1000000", + "name": "d0/d1/d2/d3/d4/f3:0:16" + } + ] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_file b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_file new file mode 100644 index 00000000..898052c3 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_file @@ -0,0 +1,14 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_folder b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_folder new file mode 100644 index 00000000..898052c3 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/empty_model_folder @@ -0,0 +1,14 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/model_folder_with_empty_file b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/model_folder_with_empty_file new file mode 100644 index 00000000..898052c3 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/model_folder_with_empty_file @@ -0,0 +1,14 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_file b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_file new file mode 100644 index 00000000..8dee2069 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_file @@ -0,0 +1,20 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b", + "algorithm": "file-sha256-1000000", + "name": ".:0:22" + } + ] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_folder b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_folder new file mode 100644 index 00000000..461cee86 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/sample_model_folder @@ -0,0 +1,65 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "143cc682e555951649f18e2761c3d526d2502996f5e32dc187ef7f8a614f8df7" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249", + "algorithm": "file-sha256-1000000", + "name": "d0/f00:0:23" + }, + { + "digest": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6", + "algorithm": "file-sha256-1000000", + "name": "d0/f01:0:23" + }, + { + "digest": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426", + "algorithm": "file-sha256-1000000", + "name": "d0/f02:0:23" + }, + { + "digest": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f", + "algorithm": "file-sha256-1000000", + "name": "d1/f10:0:23" + }, + { + "digest": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf", + "algorithm": "file-sha256-1000000", + "name": "d1/f11:0:23" + }, + { + "digest": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3", + "algorithm": "file-sha256-1000000", + "name": "d1/f12:0:23" + }, + { + "digest": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc", + "algorithm": "file-sha256-1000000", + "name": "f0:0:24" + }, + { + "digest": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae", + "algorithm": "file-sha256-1000000", + "name": "f1:0:24" + }, + { + "digest": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719", + "algorithm": "file-sha256-1000000", + "name": "f2:0:24" + }, + { + "digest": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a", + "algorithm": "file-sha256-1000000", + "name": "f3:0:24" + } + ] + } +} diff --git a/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/symlink_model_folder b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/symlink_model_folder new file mode 100644 index 00000000..dcba2415 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestDigestOfShardDigestsIntotoPayload/symlink_model_folder @@ -0,0 +1,20 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "digest": { + "sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178" + } + } + ], + "predicateType": "https://model_signing/DigestOfShardDigests/v0.1", + "predicate": { + "shards": [ + { + "digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b", + "algorithm": "file-sha256-1000000", + "name": "symlink_file:0:22" + } + ] + } +}