Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add in-toto format as hash of shard hashes #265

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions model_signing/signing/in_toto.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,101 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
predicate_top_level_name="files",
)
return cls(statement)


class DigestOfShardDigestsIntotoPayload(IntotoPayload):
"""In-toto payload where the subject is a digest of digests of file shards.

This payload is supposed to be used for manifests where every file shard in
the model is matched with a digest. Because existing tooling only supports
established hashing algorithms, we record every such digest in the predicate
part and compute a hash for the subject by using sha256 on the concatenation
of the shard hashes. To ensure determinism, the hashes are sorted
by file shard (alphabetically by name, then ordered by start offset).

Example:
```json
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "18b5a4..."
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious if the non-sharded version can be represented as a special case of this one, with the shard=0.
Not sure it's worth it to try to optimize it. Thinking more about it, I like that it's separate to avoid implementation problems :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, we can try that in the future.

"predicate": {
"shards": [
{
"digest": "6efa14...",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f0:0:16"
},
{
"digest": "a9bc14...",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f1:0:16"
},
{
"digest": "5f597e...",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f2:0:16"
},
{
"digest": "eaf677...",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f3:0:16"
}
]
}
}
```

A missing predicate, or a predicate for which an entry does not have valid
name, digest, or algorithm should be considered invalid and fail integrity
verification.

See also https://github.com/sigstore/sigstore-python/issues/1018.
"""

predicate_type: Final[str] = (
"https://model_signing/DigestOfShardDigests/v0.1"
)

def __init__(self, statement: statement.Statement):
"""Builds an instance of this in-toto payload.

Don't call this directly in production. Use `from_manifest()` instead.

Args:
statement: The DSSE statement representing this in-toto payload.
"""
self.statement = statement

@classmethod
@override
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
"""Converts a manifest to the signing payload used for signing.

The manifest must be one where every model shard is paired with its own
digest. Currently, this is only `ShardLevelManifest`.

Args:
manifest: the manifest to convert to signing payload.

Returns:
An instance of `DigestOfDigestsIntotoPayload`.

Raises:
TypeError: If the manifest is not `ShardLevelManifest`.
"""
if not isinstance(manifest, manifest_module.ShardLevelManifest):
raise TypeError("Only ShardLevelManifest is supported")

statement = _convert_descriptors_to_hashed_statement(
manifest,
predicate_type=cls.predicate_type,
predicate_top_level_name="shards",
)
return cls(statement)
65 changes: 65 additions & 0 deletions model_signing/signing/in_toto_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from model_signing.hashing import memory
from model_signing.manifest import manifest as manifest_module
from model_signing.serialization import serialize_by_file
from model_signing.serialization import serialize_by_file_shard
from model_signing.signing import in_toto


Expand Down Expand Up @@ -143,3 +144,67 @@ def test_only_runs_on_expected_manifest_types(self):
match="Only FileLevelManifest is supported",
):
in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)


class TestDigestOfShardDigestsIntotoPayload:

def _hasher_factory(
self, path: pathlib.Path, start: int, end: int
) -> file.ShardedFileHasher:
return file.ShardedFileHasher(
path, memory.SHA256(), start=start, end=end
)

@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
def test_known_models(self, request, model_fixture_name):
# Set up variables (arrange)
testdata_path = request.path.parent / "testdata"
test_path = testdata_path / "in_toto"
test_class_path = test_path / "TestDigestOfShardDigestsIntotoPayload"
golden_path = test_class_path / model_fixture_name
should_update = request.config.getoption("update_goldens")
model = request.getfixturevalue(model_fixture_name)

# Compute payload (act)
serializer = serialize_by_file_shard.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(model)
payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(
manifest
)

# Compare with golden, or write to golden (approximately "assert")
if should_update:
with open(golden_path, "w", encoding="utf-8") as f:
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
else:
with open(golden_path, "r", encoding="utf-8") as f:
json_contents = f.read()
expected_proto = json_format.Parse(
json_contents, statement_pb2.Statement()
)

assert payload.statement.pb == expected_proto

def test_produces_valid_statements(self, sample_model_folder):
serializer = serialize_by_file_shard.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(sample_model_folder)

payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(
manifest
)

payload.statement.validate()

def test_only_runs_on_expected_manifest_types(self):
digest = hashing.Digest("test", b"test_digest")
manifest = manifest_module.DigestManifest(digest)

with pytest.raises(
TypeError,
match="Only ShardLevelManifest is supported",
):
in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(manifest)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "18b5a45fe7983f7194e8ffd96c80f5f0ec53191bf4a32b6aff293f043e816d7a"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f0:0:16"
},
{
"digest": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f1:0:16"
},
{
"digest": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f2:0:16"
},
{
"digest": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f3:0:16"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": []
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": []
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": []
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
"algorithm": "file-sha256-1000000",
"name": ".:0:22"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "143cc682e555951649f18e2761c3d526d2502996f5e32dc187ef7f8a614f8df7"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249",
"algorithm": "file-sha256-1000000",
"name": "d0/f00:0:23"
},
{
"digest": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6",
"algorithm": "file-sha256-1000000",
"name": "d0/f01:0:23"
},
{
"digest": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426",
"algorithm": "file-sha256-1000000",
"name": "d0/f02:0:23"
},
{
"digest": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f",
"algorithm": "file-sha256-1000000",
"name": "d1/f10:0:23"
},
{
"digest": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf",
"algorithm": "file-sha256-1000000",
"name": "d1/f11:0:23"
},
{
"digest": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3",
"algorithm": "file-sha256-1000000",
"name": "d1/f12:0:23"
},
{
"digest": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc",
"algorithm": "file-sha256-1000000",
"name": "f0:0:24"
},
{
"digest": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae",
"algorithm": "file-sha256-1000000",
"name": "f1:0:24"
},
{
"digest": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719",
"algorithm": "file-sha256-1000000",
"name": "f2:0:24"
},
{
"digest": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a",
"algorithm": "file-sha256-1000000",
"name": "f3:0:24"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
"algorithm": "file-sha256-1000000",
"name": "symlink_file:0:22"
}
]
}
}
Loading