Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions dissect/util/compression/snappy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# References:
# - https://github.com/google/snappy/blob/main/format_description.txt
from __future__ import annotations

import io
import struct
from typing import BinaryIO

_H = struct.Struct("<H")
_I = struct.Struct("<I")


def varint(src: BinaryIO) -> int:
result = 0
shift = 0

while byte := src.read(1):
value = byte[0]
if value < 0x80:
return result | (value << shift)
result |= (value & 0x7F) << shift
shift += 7

raise EOFError("Unexpected EOF while reading varint")


def decompress(src: bytes | BinaryIO) -> bytes:
"""Snappy decompress from a file-like object or bytes.

Decompresses until the stored uncompressed length in the preamble.

Args:
src: File-like object or bytes to decompress.

Returns:
The decompressed data.
"""
if not hasattr(src, "read"):
src = io.BytesIO(src)

dst = io.BytesIO()

uncompressed_length = varint(src)

while dst.tell() < uncompressed_length:
tag_byte = src.read(1)[0]

if (tag := tag_byte & 0b11) == 0:
# Literal
length = tag_byte >> 2
if length < 60:
length += 1
elif length == 60:
length = src.read(1)[0] + 1
elif length == 61:
length = _H.unpack(src.read(2))[0] + 1
elif length == 62:
length = _I.unpack(src.read(3) + b"\x00")[0] + 1
elif length == 63:
length = _I.unpack(src.read(4))[0] + 1

if len(buf := src.read(length)) < length:
raise EOFError("Unexpected EOF while reading literal")

dst.write(buf)
continue

# Copy with 1, 2 or 4 byte offset
if tag == 1:
length = ((tag_byte >> 2) & 0b111) + 4
offset = ((tag_byte & 0b11100000) << 3) | src.read(1)[0]
elif tag == 2:
length = (tag_byte >> 2) + 1
offset = _H.unpack(src.read(2))[0]
else:
length = (tag_byte >> 2) + 1
offset = _I.unpack(src.read(4))[0]

dst_offset = dst.tell() - offset
buf = dst.getbuffer()[dst_offset : dst_offset + length].tobytes()
if offset - length <= 0:
buf = (buf * ((length // len(buf)) + 1))[:length]

dst.write(buf)

return dst.getvalue()
37 changes: 7 additions & 30 deletions tests/compression/test_lz4.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pytest_benchmark.fixture import BenchmarkFixture


@pytest.mark.parametrize(
PARAMS = (
("data", "digest"),
[
pytest.param(
Expand Down Expand Up @@ -41,37 +41,14 @@
),
],
)
def test_lz4_decompress(lz4: ModuleType, data: str, digest: str) -> None:
assert hashlib.sha256(lz4.decompress(bytes.fromhex(data))).hexdigest() == digest


@pytest.mark.benchmark
def test_benchmark_lz4_decompress(lz4: ModuleType, benchmark: BenchmarkFixture) -> None:
buf = bytes.fromhex("ff0c4c5a3420636f6d7072657373696f6e207465737420737472696e671b00db507472696e67")
assert benchmark(lz4.decompress, buf) == b"LZ4 compression test string" * 10
@pytest.mark.parametrize(*PARAMS)
def test_lz4_decompress(lz4: ModuleType, data: str, digest: str) -> None:
assert hashlib.sha256(lz4.decompress(bytes.fromhex(data))).hexdigest() == digest


@pytest.mark.benchmark
def test_benchmark_large_lz4_decompress(lz4: ModuleType, benchmark: BenchmarkFixture) -> None:
buf = bytes.fromhex(
"ffffa94c6f72656d20697073756d20646f6c6f722073697420616d657420636f"
"6e73656374657475722061646970697363696e6720656c69742e205175697371"
"75652066617563696275732065782073617069656e2076697461652070656c6c"
"656e7465737175652073656d20706c6163657261742e20496e20696420637572"
"737573206d69207072657469756d2074656c6c7573206475697320636f6e7661"
"6c6c69732e2054656d707573206c656f2065752061656e65616e207365642064"
"69616d2075726e612074656d706f722e2050756c76696e617220766976616d75"
"73206672696e67696c6c61206c61637573206e6563206d657475732062696265"
"6e64756d20656765737461732e20496163756c6973206d61737361206e69736c"
"206d616c657375616461206c6163696e696120696e7465676572206e756e6320"
"706f73756572652e2055742068656e6472657269742073656d7065722076656c"
"20636c61737320617074656e742074616369746920736f63696f7371752e2041"
"64206c69746f726120746f727175656e742070657220636f6e75626961206e6f"
"7374726120696e636570746f732068696d656e61656f732e0a0ab701ffffffff"
"ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
"ffffffffffffffffffffffffffffffffff4550656f732e0a"
)
assert (
hashlib.sha256(benchmark(lz4.decompress, buf)).hexdigest()
== "73d3dd96ca2e2f0144a117019256d770ee7c6febeaee09b24956c723ae22b529"
)
@pytest.mark.parametrize(*PARAMS)
def test_benchmark_lz4_decompress(lz4: ModuleType, data: str, digest: str, benchmark: BenchmarkFixture) -> None:
assert hashlib.sha256(benchmark(lz4.decompress, bytes.fromhex(data))).hexdigest() == digest
41 changes: 7 additions & 34 deletions tests/compression/test_lzbitmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pytest_benchmark.fixture import BenchmarkFixture


@pytest.mark.parametrize(
PARAMS = (
("data", "digest"),
[
pytest.param(
Expand Down Expand Up @@ -49,41 +49,14 @@
),
],
)
def test_lzbitmap_decompress(data: str, digest: str) -> None:
assert hashlib.sha256(lzbitmap.decompress(bytes.fromhex(data))).hexdigest() == digest


@pytest.mark.benchmark
def test_benchmark_lzbitmap_decompress(benchmark: BenchmarkFixture) -> None:
buf = bytes.fromhex(
"5a424d092d0000a0000018000018000018000061616161616161617835ef"
"340f0000f10f00000000000000000000000000060000000000",
)
assert benchmark(lzbitmap.decompress, buf) == b"a" * 158 + b"xa"
@pytest.mark.parametrize(*PARAMS)
def test_lzbitmap_decompress(data: str, digest: str) -> None:
assert hashlib.sha256(lzbitmap.decompress(bytes.fromhex(data))).hexdigest() == digest


@pytest.mark.benchmark
def test_benchmark_large_lzbitmap_decompress(benchmark: BenchmarkFixture) -> None:
buf = bytes.fromhex(
"5a424d09d80100df36002301005001007601004c6f72656d20697073756d"
"20646f6c6f722073742061657420636e7365636574757220616469706973"
"696e67656c69742e517573716566617563627520657820736170656e7661"
"6570656c6c6e6573656d6c61637261496e20696475727573206d69707269"
"74656c757364206f76616c7354656d7075206c656f7561656e6561646469"
"6d206e2074656d706f72506c6e7276697675736672696c61206c636e636d"
"657473626962656e64756d67657461732e496c696d6173206e6c6d616c75"
"64616c696e616e746567726e6e63707365722e5574206864726572697473"
"6d7072766c637373207074656e74747469736f6f73712e41646c696f7261"
"6f7175657465727562696f74726370746f686d6f732e0a0a4c0d2c341b41"
"3e26113c5f6e805b6b7c65529d967ec4b310edb922ca7b1deca5faf4434a"
"fbfa52fb8272b2ffb7016f7fcbc1b9f1373af99e3eb4e94fa9b3bafe39d3"
"6959add6f36b55eecdb59d2ec3d1d029fc0055a9cbed016111718114103f"
"480147110116011040f125f3ffffffffffffffffffffffffffffffffffff"
"ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
"ffffffffffffffff1f002cf68f57d9c576d73bfd7cd3d756000006000000"
"0000",
)
assert (
hashlib.sha256(benchmark(lzbitmap.decompress, buf)).hexdigest()
== "73d3dd96ca2e2f0144a117019256d770ee7c6febeaee09b24956c723ae22b529"
)
@pytest.mark.parametrize(*PARAMS)
def test_benchmark_lzbitmap_decompress(data: str, digest: str, benchmark: BenchmarkFixture) -> None:
assert hashlib.sha256(benchmark(lzbitmap.decompress, bytes.fromhex(data))).hexdigest() == digest
36 changes: 7 additions & 29 deletions tests/compression/test_lzfse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pytest_benchmark.fixture import BenchmarkFixture


@pytest.mark.parametrize(
PARAMS = (
("data", "digest"),
[
pytest.param(
Expand Down Expand Up @@ -40,36 +40,14 @@
),
],
)
def test_lzfse_decompress(data: str, digest: str) -> None:
assert hashlib.sha256(lzfse.decompress(bytes.fromhex(data))).hexdigest() == digest


@pytest.mark.benchmark
def test_benchmark_lzfse_decompress(benchmark: BenchmarkFixture) -> None:
buf = bytes.fromhex("6276786e2c01000013000000c803616263f0fff005e163060000000000000062767824")
assert benchmark(lzfse.decompress, buf) == b"abc" * 100
@pytest.mark.parametrize(*PARAMS)
def test_lzfse_decompress(data: str, digest: str) -> None:
assert hashlib.sha256(lzfse.decompress(bytes.fromhex(data))).hexdigest() == digest


@pytest.mark.benchmark
def test_benchmark_large_lzfse_decompress(benchmark: BenchmarkFixture) -> None:
buf = bytes.fromhex(
"62767832df360000a401200e000d0030b92c8bef56220070a50000003984"
"70085f00b0000000383e59c0f1090000005fc027710c0000fc031c1f03c7"
"700cc70000000000000000006c00003600000000005f07000000e7000000"
"00100060418004120000e06338f9061e8067629dacdf013e836fe041f807"
"be80fd14fc0e3fc29f704700000000000000000000000000000000000000"
"000000000000000000000000000000000000101288d24318d9f446277ac6"
"885a4b5dea360854e4c4616262d667f9f1ff53187e598fe2f5ddf7b768f4"
"bcc1f6441b9e55e0d1be84b4b91544337f11c4d0d615068c79817f5d19f2"
"09c83975cf9669b7f3d1024d9cc795e8ac449090696a7660585fac1a891c"
"40557bb46c1b62a35ab2608d574e82ba9f3956d0f811370c78d69b24240f"
"fd80ec4eccb6dc1e7f1c6f2f276a71e9c73183844c3dce83088eeed6c77c"
"3e35316f414db430fcd2e22d0c07998d601addd5907f852df080386fe69e"
"b78675198704b4bf5361caaf482e9333c6de0d46fbf87b4387fc6ac57116"
"0300000000000000000066b7fffffff3fffa3ff7ff1fd2273e1f85c5f04f"
"0f4945ab8462767824"
)
assert (
hashlib.sha256(benchmark(lzfse.decompress, buf)).hexdigest()
== "73d3dd96ca2e2f0144a117019256d770ee7c6febeaee09b24956c723ae22b529"
)
@pytest.mark.parametrize(*PARAMS)
def test_benchmark_lzfse_decompress(data: str, digest: str, benchmark: BenchmarkFixture) -> None:
assert hashlib.sha256(benchmark(lzfse.decompress, bytes.fromhex(data))).hexdigest() == digest
131 changes: 7 additions & 124 deletions tests/compression/test_lznt1.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pytest_benchmark.fixture import BenchmarkFixture


@pytest.mark.parametrize(
PARAMS = (
("data", "digest"),
[
pytest.param(
Expand Down Expand Up @@ -130,131 +130,14 @@
),
],
)
def test_lznt1_decompress(data: str, digest: str) -> None:
assert hashlib.sha256(lznt1.decompress(bytes.fromhex(data)).rstrip(b"\x00")).hexdigest() == digest


@pytest.mark.benchmark
def test_benchmark_lznt1_decompress(benchmark: BenchmarkFixture) -> None:
buf = bytes.fromhex(
"38b08846232000204720410010a24701a045204400084501507900c045200524"
"138805b4024a44ef0358028c091601484500be009e000401189000"
)
assert benchmark(lznt1.decompress, buf) == (
b"F# F# G A A G F# E D D E F# F# E E F# F# G A A "
b"G F# E D D E F# E D D E E F# D E F# G F# D E F# "
b"G F# E D E A F# F# G A A G F# E D D E F# E D D\x00"
)
@pytest.mark.parametrize(*PARAMS)
def test_lznt1_decompress(data: str, digest: str) -> None:
assert hashlib.sha256(lznt1.decompress(bytes.fromhex(data)).rstrip(b"\x00")).hexdigest() == digest


@pytest.mark.benchmark
def test_benchmark_large_lznt1_decompress(benchmark: BenchmarkFixture) -> None:
buf = bytes.fromhex(
"36b3004c6f72656d2069700073756d20646f6c6f00722073697420616d006574"
"20636f6e736500637465747572206100646970697363696e006720656c69742e"
"2000517569737175652000666175636962757300206578207361706900656e20"
"7669746165002070656c6c656e740a65024a7300c6706c616308657261007649"
"6e206940642063757273006e6d8069207072657469007f36740032001164005a"
"017c7661006c6c69732e205465046d7000166c656f206500752061656e65616e"
"002073656420646961006d2075726e6120740100226f722e2050756c2076696e"
"617200837661126d0034667200b2696c6c0c6120007d000f6e6563200300db00"
"09626962656e64810075656765737461803310496163750038206d6100737361"
"206e69736c8100056c6573756164021f40696e69612069006967006572206e75"
"6e632000706f73756572652e202055742068802672651672009e007670801076"
"656c0820636c802520617074018084207461636974694020736f63696f808a2e"
"902041642080a56f72805fa46f7200a66e74809a72017d84756200356e6f7374"
"800d00696e636570746f73002068696d656e6165e06f732e0a0a7fdbbf6dbf6d"
"fbbf6dbf6d758176c17b80cbbf6d7fdbb3c0e47fdb7320bf6dbf6d20bf6dce73"
"bf6dbf6dbf6d7175bf6dbf6def9fa49fa49fa49fa46cc0979fa49fa43c76699f"
"a49fa49fa49fa47373f99fa420749fa49fa47fdb7fdb7fdbff7fdb7fdb7fdb7f"
"db7fdb7fdbbf6de0757fbf6dbf6dbf6dbf6dbf6dbf6d2f8965fe742f896f1b2f"
"892f892f89b0352f89f7df362f892f89702f89d0052f892f89ff2f892f89118d"
"2f892f892f89df362f89ffdf362f89df369fa49fa49fa49fa49fa4ff9fa49fa4"
"2f899fa49fa49fa46f1b6f1bf9bf6d6d20df369fa49fa49fa49fa4fb9fa49fa4"
"69f0519fa42f899fa40fc03f0fc00fc0b0150fc00fc00fc02e207f0fc00fc0e0"
"940fc00fc00fc00fc067ff0fc0001d0fc00fc0bf6d0fc00fc0f097ff2f89c0bd"
"0fc00fc081039fa40fc00fc0ff7fdb4f524f524f524f524f52e0134f52df4f52"
"4f524f524f527fdb73df36df36cfb01a4f524f527fdb6e647fdb7fdbfb7fdb7f"
"db6edf36df36df36df36df363fdf369fa490c8df36df36eaf635b30073206475"
"69732063006f6e76616c6c6973002e2054656d70757300206c656f2065752000"
"61656e65616e2073006564206469616d204075726e61207400886f00722e2050"
"756c7669006e61722076697661026d00686672696e6769806c6c61206c616300"
"1e806e6563206d6574001200626962656e64756d802065676573746100ce0649"
"004200e0206d6173734061206e69736c000a6c206573756164023e696e406961"
"20696e7400307200206e756e6320706f0073756572652e205508742068004d72"
"6572692274009a6d70650082656c0820636c004b2061707400656e7420746163"
"6900746920736f63696f007371752e20416420206c69746f7200bf6f729c7175"
"01230138007d75620035106e6f7374800d696e63006570746f73206869006d65"
"6e61656f732e000a0a4c6f72656d20086970730060646f6c6f68722073803c61"
"006d819c7308656374007272206164206970697363808120653100352e205180"
"ad8034206620617563696200836578802073617069656e009900746165207065"
"6c6c9500446500506581642070009c00657261742e20496e0020696420637572"
"7301801b6d692070726574fa69803f74001980087fdbbf6dbf6d9f008ebf6dbf"
"6dbf6d7fdb73207fdbfdbf6d73407fdf36bf6dbf6dbf6dbf6df9bf6d6163bf6d"
"bf6d9fa49fa4a00c7f9fa49fa49fa49fa49fa49fa49fa467799fa476699fa49f"
"a47fdb7fdb737e207fdb7fdb7fdb7fdbe1d67fdb74fd40cb7281d37fdbbf6dbf"
"6dbf6dbf6de7bf6dbf6ddf367320bf6dbf6d2f89ffd0722f892f896f1b2f892f"
"892f892f89ff2f892f892f892f892f896f1b2f892f89f92f8974756f1b6f1b6f"
"1b6f1b6f1bff6f1b9fa49fa49fa49fa49fa49fa49fa4ff9fa49fa49fa4df36bf"
"6d9fa49fa4df36ff9fa49fa49fa49fa44f529fa44f52507aff2f899fa49fa40f"
"c00fc00fc00fc0e232ff106d2f894f520fc00fc00fc00fc00fc0bf0fc00fc04f"
"52bf6d0fc00fc065df36ff40bc0fc02f890fc02f894f527fdba034ff10177fdb"
"7fdb7fdb7fdb7fdb7fdb7fdbcfd0cb7fdb2f897fdb73737fdb7fdbcf7fdb0fc0"
"7fdb9fa465743156df365fdf36df36df36df367fdb6d40f36cde6cf019bf6dbf"
"6dbf6d70eff6b0040f2f89eff6eff67adb1fb3007375616461206c610063696e"
"696120696e007465676572206e75006e6320706f7375650072652e2055742068"
"00656e647265726974202073656d7000847665006c20636c6173732000617074"
"656e742074010084746920736f6369006f7371752e20416400206c69746f7261"
"20190008717501460170636f6e84756200d46e6f7374003600696e636570746f"
"73002068696d656e6165006f732e0a0a4c6f7200656d20697073756d0020646f"
"6c6f722073410079616d657420003e7300656374657475722040616469706973"
"00b967042065006a2e2051756901007a65206661756369006275732065782073"
"00617069656e207669007461652070656c6c4d008865022500c9207000f96500"
"7261742e20496e208069642063757273801b006d692070726574696d803f7400"
"19800864002d013e7600616c6c69732e2054030083000b6c656f206575802061"
"656e65616e808d0064206469616d20750c726e007b00116f722e200050756c76"
"696e617211804176616d001a667269606e67696c6c82bb80076e18656320806d"
"80046269624700b4803a80c2737461803349486163750038206d00b56120206e"
"69736c00056c65fb7fdbbf6d6ebf6dbf6dbf6dbf6d7fdbff7fdbbf6dbf6dbf6d"
"bf6dbf6dbf6dbf6df9bf6d7373bf6d9fa49fa49fa49fa46f9fa49fa49fa49fa4"
"209fa49fa476fe699fa49fa49fa47fdb7fdb40337fdb3f7fdb7fdbe0d37fdb40"
"677fdb7320df7fdbbf6d6f1bbf6d6f1b63a051bf6d7e20bf6dbf6d2f894f522f"
"892f8969bf2f89101b2f892f896f1b2f89202f89ff2f892f892f8960972f892f"
"892f892f89be202f892f892f892f892f89652f89ff4f529fa49fa49fa49fa49f"
"a4df369fa4fb9fa49fa4746f1bbf6d9fa49fa4df36e79fa4df364f526d708032"
"6f1b6f1bff6f1b6f1b6f1b6f1bbf6d80790fc00fc0ff0fc04f520fc00fc00fc0"
"0fc00fc0df36ffdf36df36df36df36bf6d0fc00fc06f1b6c73650fc00fc0200f"
"c00fc067f76f1b6f1b6f1b6edf36df36df36df36ffdf36df36df36df36df367f"
"dbbf6d7fdbff7fdb7fdbbf6d2f89806bdf366f1b7fdbfe736f1b7fdbbf6dbf6d"
"bf6dbf6dbf6d3fbf6dbf6deff6f161bf6db86d0bb300732e0a0a4c6f7265006d"
"20697073756d2000646f6c6f72207369007420616d65742063006f6e73656374"
"6574007572206164697069007363696e6720656c0069742e2051756973007175"
"6520666175630069627573206578200073617069656e20760069746165207065"
"6ca06c656e7465024a7300c680706c6163657261007600496e20696420637504"
"727300376d6920707268657469007f74003200116403005a017c76616c6c6973"
"402e2054656d7000166c00656f206575206165006e65616e2073656400206469"
"616d207572106e61207400226f722e002050756c76696e612272008376616d00"
"346672c100b2696c6c6120007d000f306e65632000db000962691062656e6480"
"3a65676508737461803349616375010038206d6173736120106e69736c00056c"
"657308756164021f696e69610420690069676572206e00756e6320706f737500"
"6572652e2055742062688026726572009e00767081801076656c20636c802510"
"206170748084207461006369746920736f6304696f808a2e204164204980a56f"
"72805f6f7200a66e4a74809a72017d756200356e086f7374800d696e63650070"
"746f732068696de0656e61656f7fdb804fbf6dffbf6dbf6dbf6dbf6d7fdb7fdb"
"407dbf6dfbbf6dbf6d20bf6dbf6dbf6d2069bf6d7fbf6dbf6d81759fa49fa4c0"
"48409a6dfe209fa49fa49fa49fa49fa49fa49fa4efe0b49fa49fa494a4001f00"
"3f043f04ff3f043f043f043f043f043f041f021f01ff1f011f011f011f011f01"
"1f011f011f01ff1f011f011f011f011f011f011f011f01ff1f011f011f011f01"
"1f011f011f011f01ff1f011f011f011f011f011f011f011f01ff1f011f011f01"
"1f011f011f011f011f01ff1f011f011f011f011f011f011f011f01ff1f011f01"
"1f011f011f011f011f011f01ff1f011f011f011f011f011f011f011f01ff1f01"
"1f011f011f011f011f011f011f01ff1f011f011f011f011f011f011f011f01ff"
"1f011f011f011f011f011f011f011f01ff1f011f011f011f011f011f011f011f"
"01ff1f011f011f011f011f011f011f011f017f1f011f011f011f011f011f011b"
"0103b00200fc0f03b00200fc0f03b00200fc0f03b00200fc0f03b00200fc0f03"
"b00200fc0f03b00200fc0f03b00200fc0f03b00200fc0f03b00200fc0f03b002"
"00fc0f03b00200fc0f0000000000000000000000000000000000000000000000"
)
assert hashlib.sha256(benchmark(lznt1.decompress, buf).rstrip(b"\x00")).hexdigest() == (
"73d3dd96ca2e2f0144a117019256d770ee7c6febeaee09b24956c723ae22b529"
)
@pytest.mark.parametrize(*PARAMS)
def test_benchmark_lznt1_decompress(data: str, digest: str, benchmark: BenchmarkFixture) -> None:
assert hashlib.sha256(benchmark(lznt1.decompress, bytes.fromhex(data)).rstrip(b"\x00")).hexdigest() == digest
Loading
Loading