Skip to content

Commit

Permalink
Add tests for serialize_v0 for directory models
Browse files Browse the repository at this point in the history
Signed-off-by: laurentsimon <laurentsimon@google.com>
  • Loading branch information
laurentsimon committed Oct 31, 2023
1 parent 41f0ada commit f9155a4
Show file tree
Hide file tree
Showing 2 changed files with 225 additions and 13 deletions.
8 changes: 4 additions & 4 deletions model_signing/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,13 +348,13 @@ def serialize_v0(path: Path, chunk: int, signature_path: Path,

hash = hashlib.sha256()
for child in children:
child_hash = Serializer._serialize_node(child, chunk, " ")
child_hash = Serializer._serialize_node(child, chunk, " ", ignorepaths)
hash.update(child_hash)
content = hash.digest()
return Hasher.root_folder(path, content)

@staticmethod
def _serialize_node(path: Path, chunk: int, indent="") -> bytes:
def _serialize_node(path: Path, chunk: int, indent="", ignorepaths: [Path] = []) -> bytes:
if not allow_symlinks and path.is_symlink():
raise ValueError(f"{str(path)} is a symlink")

Expand All @@ -364,14 +364,14 @@ def _serialize_node(path: Path, chunk: int, indent="") -> bytes:
if not path.is_dir():
raise ValueError(f"{str(path)} is not a dir")

children = sorted([x for x in path.iterdir()])
children = sorted([x for x in path.iterdir() if x not in ignorepaths])
# TODO: remove this special case?
if len(children) == 0:
return Hasher.node_folder(path, b"empty")

hash = hashlib.sha256()
for child in children:
child_hash = Serializer._serialize_node(child, chunk, indent + " ")
child_hash = Serializer._serialize_node(child, chunk, indent + " ", ignorepaths)
hash.update(child_hash)
content = hash.digest()
return Hasher.node_folder(path, content)
230 changes: 221 additions & 9 deletions model_signing/serialize_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,46 @@


# Utility functions.
def create_folder(name: str) -> Path:
def create_empty_folder(name: str) -> Path:
p = os.path.join(os.getcwd(), testdata_dir, name)
os.makedirs(p)
return Path(p)


def create_random_folders(name: str) -> (Path, int, [Path], [Path]):
p = os.path.join(os.getcwd(), testdata_dir, name)

content = os.urandom(1)
dirs = [p]
# Generate 8 directories.
for i in range(8):
bit = (content[0] >> i) & 1
if bit > 0:
# Add depth to the previously-created directory.
dirs[-1] = os.path.join(dirs[-1], "dir_%d" % i)
else:
# Add a directory in the same directory as the previous entry.
parent = os.path.dirname(dirs[-1])
if Path(parent) == Path(p).parent:
parent = str(p)
dirs += [os.path.join(parent, "dir_%d" % i)]
for d in dirs:
os.makedirs(d)

# Create at most 3 files in each directory.
files = []
for d in dirs:
b = os.urandom(1)
n = b[0] & 3
for i in range(n):
files += [os.path.join(d, "file_%d" % n)]
content = os.urandom(28)
with open(files[-1], "wb") as f:
f.write(content)

return Path(p), 28, [Path(d) for d in sorted(dirs)], [Path(f) for f in sorted(files)] # noqa: E501 ignore long line warning


def create_symlinks(src: str, dst: str) -> Path:
psrc = os.path.join(os.getcwd(), testdata_dir, src)
pdst = os.path.join(os.getcwd(), testdata_dir, dst)
Expand Down Expand Up @@ -58,7 +92,7 @@ class Test_serialize_v0:
# symlink in root folder raises ValueError exception.
def test_symlink_root(self):
folder = "with_root_symlinks"
model = create_folder(folder)
model = create_empty_folder(folder)
sig = signature_path(model)
create_symlinks(".", os.path.join(folder, "root_link"))
with pytest.raises(ValueError):
Expand All @@ -67,9 +101,9 @@ def test_symlink_root(self):

# symlink in non-root folder raises ValueError exception.
def test_symlink_nonroot(self):
model = create_folder("with_nonroot_symlinks")
model = create_empty_folder("with_nonroot_symlinks")
sub_folder = model.joinpath("sub")
create_folder(str(sub_folder))
create_empty_folder(str(sub_folder))
sig = signature_path(model)
create_symlinks(".", os.path.join(sub_folder, "sub_link"))
with pytest.raises(ValueError):
Expand Down Expand Up @@ -100,7 +134,7 @@ def test_file_chuncks(self):
assert (r == result)
cleanup_model(model)

# File serialization raises error for negativ chunk values.
# File serialization raises error for negative chunk values.
def test_file_negative_chuncks(self):
file = "model_file"
data = b"hellow world content"
Expand All @@ -127,6 +161,25 @@ def test_different_filename(self):

assert (r0 == r1)

# Folder serialization works.
def test_known_folder(self):
folder = "some_folder"
model = create_empty_folder(folder)
sig = signature_path(model)
os.mkdir(model.joinpath("dir1"))
os.mkdir(model.joinpath("dir2"))
os.mkdir(model.joinpath("dir3"))
with open(model.joinpath("dir1", "f11"), "wb") as f:
f.write(b"content f11")
with open(model.joinpath("dir1", "f12"), "wb") as f:
f.write(b"content f12")
with open(model.joinpath("dir3", "f31"), "wb") as f:
f.write(b"content f31")
result = Serializer.serialize_v0(model, 0, sig)
expected = b's\xac\xf7\xbdC\x14\x97fv\x97\x9c\xd3\xe4=,\xe7\x99.d(oP\xff\xe2\xd8~\xa2\x9cS\xe2/\xd9' # noqa: E501 ignore long line warning
assert (result == expected)
cleanup_model(model)

# File serialization returns a different result for different model
# contents.
def test_altered_file(self):
Expand All @@ -147,14 +200,173 @@ def test_altered_file(self):
cleanup_model(altered_model)
cleanup_model(model)

# TODO(#57): directory support.
# Folder serialization raises error for negativ chunk values.
def test_folder_negative_chuncks(self):
dir = "model_dir"
model = create_empty_folder(dir)
sig_path = signature_path(model)
with pytest.raises(ValueError):
_ = Serializer.serialize_v0(model, -1, sig_path)
cleanup_model(model)

# Folder serialization returns the same results for different folder names.
def test_different_dirname(self):
folder = "model_dir"
model = create_empty_folder(folder)
sig = signature_path(model)
os.mkdir(model.joinpath("dir1"))
os.mkdir(model.joinpath("dir2"))
os.mkdir(model.joinpath("dir3"))
with open(model.joinpath("dir1", "f11"), "wb") as f:
f.write(b"content f11")
with open(model.joinpath("dir1", "f12"), "wb") as f:
f.write(b"content f12")
with open(model.joinpath("dir3", "f31"), "wb") as f:
f.write(b"content f31")
r0 = Serializer.serialize_v0(model, 0, sig)

# Rename the folder.
new_model = model.parent.joinpath("model_dir2")
os.rename(model, new_model)
sig_path = signature_path(new_model)
r1 = Serializer.serialize_v0(new_model, 0, sig_path)
cleanup_model(new_model)

assert (r0 == r1)

# Folder serialization returns the same results for different folder names
# that are ignored.
def test_different_ignored_paths(self):
folder = "model_dir"
model = create_empty_folder(folder)
sig = signature_path(model)
os.mkdir(model.joinpath("dir1"))
os.mkdir(model.joinpath("dir2"))
os.mkdir(model.joinpath("dir2/dir3"))
with open(model.joinpath("dir1", "f11"), "wb") as f:
f.write(b"content f11")
with open(model.joinpath("dir2", "f21"), "wb") as f:
f.write(b"content f21")
with open(model.joinpath("dir2/dir3", "f31"), "wb") as f:
f.write(b"content f31")
r1 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir1")])
r2 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir2")])
r3 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir2/dir3")]) # noqa: E501 ignore long line warning

# Rename the file under dir1.
new_file = model.joinpath("dir1/f11_altered")
os.rename(model.joinpath("dir1/f11"), new_file)
r11 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir1")])
assert (r11 == r1)
os.rename(new_file, model.joinpath("dir1/f11"))

# Update the file under dir1.
r11 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir1")])
with open(model.joinpath("dir1", "f11"), "wb") as f:
f.write(b"content f11 altered")
assert (r11 == r1)
with open(model.joinpath("dir1", "f11"), "wb") as f:
f.write(b"content f11")

# Rename the folder dir2.
new_dir = model.joinpath("dir2/dir3_altered")
os.rename(model.joinpath("dir2/dir3"), new_dir)
r22 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir2")])
assert (r22 == r2)
os.rename(new_dir, model.joinpath("dir2/dir3"))

# Add a file under dir2.
with open(model.joinpath("dir2", "new_file"), "wb") as f:
f.write(b"new file!!")
r22 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir2")])
assert (r22 == r2)
os.unlink(model.joinpath("dir2", "new_file"))

# Update the content of f31 file.
with open(model.joinpath("dir2/dir3", "f31"), "wb") as f:
f.write(b"content f31 altered")
r22 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir2")])
assert (r22 == r2)
r33 = Serializer.serialize_v0(model, 0, sig, [model.joinpath("dir2/dir3")]) # noqa: E501 ignore long line warning
assert (r33 == r3)
with open(model.joinpath("dir2/dir3", "f31"), "wb") as f:
f.write(b"content f31")

cleanup_model(model)

# Folder serialization return different values for different
# sub-directory names.
def test_random_folder_different_folder_names(self):
dir = "model_dir"
model, _, dirs, _ = create_random_folders(dir)
sig_path = signature_path(model)
result = Serializer.serialize_v0(model, 0, sig_path)
for d in dirs:
if d == model:
# Ignore the model folder.
continue
new_folder = d.parent.joinpath(d.name + "_altered")
os.rename(d, new_folder)
r = Serializer.serialize_v0(model, 0, sig_path)
os.rename(new_folder, d)
assert (r != result)
cleanup_model(model)

# Folder serialization return different values for different file names.
def test_random_folder_different_filenames(self):
dir = "model_dir"
model, _, _, files = create_random_folders(dir)
sig_path = signature_path(model)
result = Serializer.serialize_v0(model, 0, sig_path)
for f in files:
new_file = f.parent.joinpath(f.name + "_altered")
os.rename(f, new_file)
r = Serializer.serialize_v0(model, 0, sig_path)
os.rename(new_file, f)
assert (r != result)
cleanup_model(model)

# Folder serialization return different values for different file contents.
def test_random_folder_different_file_content(self):
dir = "model_dir"
model, _, _, files = create_random_folders(dir)
sig_path = signature_path(model)
result = Serializer.serialize_v0(model, 0, sig_path)
for f in files:
content = b''
with open(f, "rb") as ff:
content = ff.read()
for c in range(len(content)):
# Alter the file content, one byte at a time.
altered_content = content[:c] + bytes([content[c] ^ 1]) + \
content[c+1:]
with open(f, "wb") as ff:
ff.write(altered_content)
r = Serializer.serialize_v0(model, 0, sig_path)
assert (r != result)
# Write the original content back to the file.
with open(f, "wb") as ff:
ff.write(content)
cleanup_model(model)

# Folder serialization return same results for different chunk sizes.
def test_random_folder_different_chunks(self):
dir = "model_dir"
model, max_size, _, _ = create_random_folders(dir)
sig_path = signature_path(model)
result = Serializer.serialize_v0(model, 0, sig_path)
# NOTE: we want to also test a chunk size larger than the files size.
for c in range(1, max_size + 1):
r = Serializer.serialize_v0(model, c, sig_path)
assert (r == result)
cleanup_model(model)


class Test_serialize_v1:
# symlink in root folder raises ValueError exception.
def test_symlink_root(self):
folder = "with_root_symlinks"
model = create_folder(folder)
model = create_empty_folder(folder)
sig = signature_path(model)
create_symlinks(".", os.path.join(folder, "root_link"))
with pytest.raises(ValueError):
Expand All @@ -163,9 +375,9 @@ def test_symlink_root(self):

# symlink in non-root folder raises ValueError exception.
def test_symlink_nonroot(self):
model = create_folder("with_nonroot_symlinks")
model = create_empty_folder("with_nonroot_symlinks")
sub_folder = model.joinpath("sub")
create_folder(str(sub_folder))
create_empty_folder(str(sub_folder))
sig = signature_path(model)
create_symlinks(".", os.path.join(sub_folder, "sub_link"))
with pytest.raises(ValueError):
Expand Down

0 comments on commit f9155a4

Please sign in to comment.