From 53aee506725bc2e37aca2113fc22ac176b5e2501 Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Tue, 9 Sep 2025 10:02:58 +0200 Subject: [PATCH 1/7] Add Apple Sparse Image Format (ASIF) --- dissect/hypervisor/disk/asif.py | 270 ++++++++++++++++++++++++++++ dissect/hypervisor/disk/c_asif.py | 37 ++++ dissect/hypervisor/disk/c_asif.pyi | 69 +++++++ tests/_data/disk/asif/basic.asif.gz | Bin 0 -> 300859 bytes tests/conftest.py | 5 + tests/disk/test_asif.py | 17 ++ 6 files changed, 398 insertions(+) create mode 100644 dissect/hypervisor/disk/asif.py create mode 100644 dissect/hypervisor/disk/c_asif.py create mode 100644 dissect/hypervisor/disk/c_asif.pyi create mode 100644 tests/_data/disk/asif/basic.asif.gz create mode 100644 tests/disk/test_asif.py diff --git a/dissect/hypervisor/disk/asif.py b/dissect/hypervisor/disk/asif.py new file mode 100644 index 0000000..11175cf --- /dev/null +++ b/dissect/hypervisor/disk/asif.py @@ -0,0 +1,270 @@ +from __future__ import annotations + +import io +import plistlib +from functools import cached_property, lru_cache +from typing import Any, BinaryIO +from uuid import UUID + +from dissect.util.stream import AlignedStream + +from dissect.hypervisor.disk.c_asif import c_asif +from dissect.hypervisor.exceptions import InvalidSignature + + +class ASIF: + """Apple Sparse Image Format (ASIF) disk image. + + ASIF disk images are a virtual disk format introduced in macOS Tahoe. They can be used in Apple's Virtualization + framework, as well as through Disk Utility. + + An ASIF file is pretty straight forward. There's a small header which, among some other details, contains two + directory offsets. Each directory contains a list of tables, which in turn contain a list of data entries. Each data + entry points to a chunk of data in the ASIF file. The chunk size is defined in the header and is typically 1 MiB. + The chunk size is always a multiple of the block size, which is also defined in the header (typically 512 bytes). + Each directory has a version number, and the directory with the highest version number is the active directory. This + allows for atomic updates of the directory/table data. + + The maximum virtual disk size seems to be just under 4 PiB, with a small portion at the end reserved for metadata. + The actual size of the virtual disk is defined in the header, as well as the maximum size the disk can grow to. + + The offset to the metadata block is typically ``(4 PiB - 1 chunk)``, meaning it's within the reserved area. + The metadata block contains a small header and a plist. The plist should contain an ``internal metadata`` and + ``user metadata`` dictionary. Besides a "stable uuid", it's unclear what the metadata is used for or how to set it. + + Args: + fh: File-like object containing the ASIF image. + + Resources: + - Reversing ``diskimagescontroller`` + - https://developer.apple.com/documentation/virtualization/vzdiskimagestoragedeviceattachment/ + """ + + def __init__(self, fh: BinaryIO): + self.fh = fh + + self.header = c_asif.asif_header(fh) + if self.header.header_signature != c_asif.ASIF_HEADER_SIGNATURE: + raise InvalidSignature( + "Not a valid ASIF image " + f"(expected {c_asif.ASIF_HEADER_SIGNATURE:#x}, got {self.header.header_signature:#x})" + ) + + self.guid = UUID(bytes=self.header.guid) + self.block_size = self.header.block_size + self.chunk_size = self.header.chunk_size + self.size = self.header.sector_count * self.block_size + self.max_size = self.header.max_sector_count * self.block_size + + # This is taken from the assembly with some creative variable naming + self._blocks_per_chunk = self.chunk_size // self.block_size + + # Uncertain about these variable names, but the math is correct + reserved_size = 4 * self.chunk_size + self._num_reserved_table_entries = ( + 1 if reserved_size < self._blocks_per_chunk else reserved_size // self._blocks_per_chunk + ) + + self._max_table_entries = self.chunk_size >> 3 + self._num_table_entries = self._max_table_entries - ( + self._max_table_entries % (self._num_reserved_table_entries + 1) + ) + self._num_reserved_directory_entries = (self._num_reserved_table_entries + self._num_table_entries) // ( + self._num_reserved_table_entries + 1 + ) + self._usable_entry_count = self._num_table_entries - self._num_reserved_directory_entries + # This is the size in bytes of data covered by a single table + self._size_per_table = self._usable_entry_count * self.chunk_size + + max_size = self.block_size * self.header.max_sector_count + self._num_directory_entries = (self._size_per_table + max_size - 1) // self._size_per_table + + self._aligned_table_size = ( + (self.block_size + 8 * self._num_table_entries - 1) // self.block_size * self.block_size + ) + + self.directories = sorted( + (Directory(self, offset) for offset in self.header.directory_offsets), + key=lambda d: d.version, + reverse=True, + ) + self.active_directory = self.directories[0] + + self.metadata_header = None + self.metadata: dict[str, Any] | None = None + if self.header.metadata_chunk: + # Open the file in reserved mode to read from the reserved area + with self.open(reserved=True) as disk: + metadata_offset = self.header.metadata_chunk * self.chunk_size + disk.seek(metadata_offset) + self.metadata_header = c_asif.asif_meta_header(disk) + + if self.metadata_header.header_signature != c_asif.ASIF_META_HEADER_SIGNATURE: + raise InvalidSignature( + "Invalid a ASIF metadata header" + f"(expected {c_asif.ASIF_META_HEADER_SIGNATURE:#x}, got {self.metadata_header.header_signature:#x})" # noqa: E501 + ) + + disk.seek(metadata_offset + self.metadata_header.header_size) + buf = disk.read(self.metadata_header.data_size).strip(b"\x00") + self.metadata = plistlib.load(io.BytesIO(buf)) + + @property + def internal_metadata(self) -> dict[str, Any] | None: + """Get internal metadata from the ASIF image. + + Returns: + A dictionary containing the internal metadata. + """ + if not self.metadata: + return None + + return self.metadata.get("internal metadata") + + @property + def user_metadata(self) -> dict[str, Any] | None: + """Get user metadata from the ASIF image. + + Returns: + A dictionary containing the user metadata. + """ + if not self.metadata: + return None + + return self.metadata.get("user metadata") + + def open(self, reserved: bool = False) -> DataStream: + """Open a stream to read the ASIF image data. + + Args: + reserved: Whether to allow reading into the reserved area of the ASIF image. + + Returns: + A stream-like object that can be used to read the image data. + """ + return DataStream(self, reserved) + + +class Directory: + """ASIF Directory. + + A directory has a version (``uint64``) followed by a list of table entries (``uint64[]``). + Each table entry is a chunk number and points to a table in the ASIF image. + + Args: + asif: The ASIF image this directory belongs to. + offset: Offset of the directory in the ASIF image. + """ + + def __init__(self, asif: ASIF, offset: int): + self.asif = asif + self.offset = offset + + self.asif.fh.seek(offset) + self.version = c_asif.uint64(self.asif.fh) + + self.table = lru_cache(128)(self.table) + + def __repr__(self) -> str: + return f"" + + @cached_property + def entries(self) -> list[int]: + """List of table entries in the directory.""" + # Seek over the version + self.asif.fh.seek(self.offset + 8) + return c_asif.uint64[self.asif._num_directory_entries](self.asif.fh) + + def table(self, index: int) -> Table: + """Get a table from the directory. + + Args: + index: Index of the table in the directory. + """ + if index >= self.asif._num_directory_entries: + raise IndexError("Table index out of range") + return Table(self, index) + + +class Table: + """ASIF Table. + + A table contains a list of data entries (``uint64[]``). Each data entry is a chunk number and points to a chunk of + data in the ASIF image. Each table covers a fixed amount of data in the virtual disk. + + Data entries have 55 bits usable for the chunk number and 9 bits reserved for flags. + + .. rubric :: Encoding + .. code-block:: c + + 0b00000000 01111111 11111111 11111111 11111111 11111111 11111111 11111111 (chunk number) + 0b00111111 10000000 00000000 00000000 00000000 00000000 00000000 00000000 (reserved) + 0b01000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (entry dirty) + 0b10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (content dirty) + + Args: + directory: The directory this table belongs to. + index: Index of the table in the directory. + """ + + def __init__(self, directory: Directory, index: int): + self.asif = directory.asif + self.directory = directory + self.index = index + + self.offset = self.directory.entries[index] * self.asif.chunk_size + self.virtual_offset = index * self.asif._size_per_table + + def __repr__(self) -> str: + return f"" + + @cached_property + def entries(self) -> list[int]: + """List of data entries in the table.""" + self.asif.fh.seek(self.offset) + return c_asif.uint64[self.asif._num_table_entries](self.asif.fh) + + +class DataStream(AlignedStream): + """Stream to read data from an ASIF image. + + Args: + asif: The ASIF image to read from. + reserved: Whether to allow reading into the reserved area of the ASIF image. + """ + + def __init__(self, asif: ASIF, reserved: bool = False): + super().__init__(asif.max_size if reserved else asif.size, align=asif.chunk_size) + self.asif = asif + self.reserved = reserved + self.directory = asif.active_directory + + def _read(self, offset: int, length: int) -> bytes: + result = [] + while length: + table = self.directory.table(offset // self.asif._size_per_table) + relative_block_index = (offset // self.asif.block_size) - (table.virtual_offset // self.asif.block_size) + data_idx = ( + relative_block_index // self.asif._blocks_per_chunk + + relative_block_index // self.asif._blocks_per_chunk * self.asif._num_reserved_table_entries + ) // self.asif._num_reserved_table_entries + + entry = table.entries[data_idx] + # 0x8000000000000000 = content dirty bit + # 0x4000000000000000 = entry dirty bit + # 0x3F80000000000000 = reserved bits + + value = entry & 0x7FFFFFFFFFFFFF + raw_offset = value * self.asif.chunk_size + + read_length = min(length, self.asif.chunk_size) + if value == 0: + result.append(b"\x00" * read_length) + else: + self.asif.fh.seek(raw_offset) + result.append(self.asif.fh.read(read_length)) + + offset += read_length + length -= read_length + + return b"".join(result) diff --git a/dissect/hypervisor/disk/c_asif.py b/dissect/hypervisor/disk/c_asif.py new file mode 100644 index 0000000..1162223 --- /dev/null +++ b/dissect/hypervisor/disk/c_asif.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +asif_def = """ +#define ASIF_HEADER_SIGNATURE 0x73686477 // 'shdw' +#define ASIF_META_HEADER_SIGNATURE 0x6D657461 // 'meta' + +struct asif_header { + uint32 header_signature; + uint32 header_version; + uint32 header_size; + uint32 header_flags; + uint64 directory_offsets[2]; + char guid[16]; + uint64 sector_count; + uint64 max_sector_count; + uint32 chunk_size; + uint16 block_size; + uint16 total_segments; + uint64 metadata_chunk; + char unk_50[16]; + uint32 read_only_flags; + uint32 metadata_flags; + uint32 metadata_read_only_flags; +}; + +struct asif_meta_header { + uint32 header_signature; + uint32 header_version; + uint32 header_size; + uint64 data_size; + uint64 unk_14; +}; +""" + +c_asif = cstruct(endian=">").load(asif_def) diff --git a/dissect/hypervisor/disk/c_asif.pyi b/dissect/hypervisor/disk/c_asif.pyi new file mode 100644 index 0000000..be0e547 --- /dev/null +++ b/dissect/hypervisor/disk/c_asif.pyi @@ -0,0 +1,69 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, overload + +import dissect.cstruct as __cs__ +from typing_extensions import TypeAlias + +class _c_asif(__cs__.cstruct): + ASIF_HEADER_SIGNATURE: Literal[1936221303] = ... + ASIF_META_HEADER_SIGNATURE: Literal[1835365473] = ... + class asif_header(__cs__.Structure): + header_signature: _c_asif.uint32 + header_version: _c_asif.uint32 + header_size: _c_asif.uint32 + header_flags: _c_asif.uint32 + directory_offsets: __cs__.Array[_c_asif.uint64] + guid: __cs__.CharArray + sector_count: _c_asif.uint64 + max_sector_count: _c_asif.uint64 + chunk_size: _c_asif.uint32 + block_size: _c_asif.uint16 + total_segments: _c_asif.uint16 + metadata_chunk: _c_asif.uint64 + unk_50: __cs__.CharArray + read_only_flags: _c_asif.uint32 + metadata_flags: _c_asif.uint32 + metadata_read_only_flags: _c_asif.uint32 + @overload + def __init__( + self, + header_signature: _c_asif.uint32 | None = ..., + header_version: _c_asif.uint32 | None = ..., + header_size: _c_asif.uint32 | None = ..., + header_flags: _c_asif.uint32 | None = ..., + directory_offsets: __cs__.Array[_c_asif.uint64] | None = ..., + guid: __cs__.CharArray | None = ..., + sector_count: _c_asif.uint64 | None = ..., + max_sector_count: _c_asif.uint64 | None = ..., + chunk_size: _c_asif.uint32 | None = ..., + block_size: _c_asif.uint16 | None = ..., + total_segments: _c_asif.uint16 | None = ..., + metadata_chunk: _c_asif.uint64 | None = ..., + unk_50: __cs__.CharArray | None = ..., + read_only_flags: _c_asif.uint32 | None = ..., + metadata_flags: _c_asif.uint32 | None = ..., + metadata_read_only_flags: _c_asif.uint32 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class asif_meta_header(__cs__.Structure): + header_signature: _c_asif.uint32 + header_version: _c_asif.uint32 + header_size: _c_asif.uint32 + data_size: _c_asif.uint64 + unk_14: _c_asif.uint64 + @overload + def __init__( + self, + header_signature: _c_asif.uint32 | None = ..., + header_version: _c_asif.uint32 | None = ..., + header_size: _c_asif.uint32 | None = ..., + data_size: _c_asif.uint64 | None = ..., + unk_14: _c_asif.uint64 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_asif` is an instance of `_c_asif`, but then we can't use it in type hints +c_asif: TypeAlias = _c_asif diff --git a/tests/_data/disk/asif/basic.asif.gz b/tests/_data/disk/asif/basic.asif.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d53552c1f6c316d190df13f5c8ecf0554d62d87 GIT binary patch literal 300859 zcmeI*dzjXBnaA-#t<@UB+EUv@aj!ho1&-JmgFK;GASsMShKEup11HU~q)=?LgMqWI ztF9(GtEHtd!@(1vh)%0(f@BVrYaS|B(`R%toF54>v(HcF{>{hxd(YEf;%~3(-5+}& z#)9khz2_t1vn$AFZVp^lB3_c?^oaP_Qyti_~?^Q+4l>- z-{aeNUG{;GUby|#_g-|&frsw=#LeUGAO5X7{&>bWKXvSTuYY;f@~fwx^U+UqKe&F= zisfUkntlE&YwrKyxyLpibZPI>IdARz)5d$x`sFTLe~>ro|JJ{__uRU!d-fVq{Rzx% z?Q5U7>W%uD*pGX8UDv~-ht}2AcN{SP6VDy{+wA2fZ$JDk`B!(JaM0XcpR_-2o7cIw z?l_`+RuAP?b=$Q*diO0Y!@o5mf5)x=x%BJSZQJ_Sn@e|3B|C3^I)h$h^_%~pB|Fs} zyyFgsd}DL>bD#deFW}|fh;?84$B#a{cJo2sJ9X*@r*8hnS!bX9tf+UuB=G zez)q~Pkw&y$EvMZ6So5+u zHyw0R!^V5YOt`7%hM!)w`OFh8zop^q4g0h_HoE17yLZ2F+S&)t{Xo~%KR)TA&8I9s z{i4$^8+F~5YkxFj;^p7na@NXkesb)6&+qf2Q5St_<){sNUU2CxXWW#1uH%st$Gzvy zuCbG*&-j;>Pmdq{r+ufMaM{%xH=TR;9q+k%?etldy?(dL_pW`msdC+>EnUC7=H;Cy zc5M7%{Xu=3Hf%TO%WjJ={O#5}j)}us*5nU5wFGK?joU$|mO!nqaXaYL5~%gH>g_!A z>Td{b*ZOqdgU9Xp*^>u;SE1H_?{)jy;=jD@tBuwF_3AG*)=%F4fbR@B;`giWoOjw+ z9?h;;a_h{;Zks=9<)|N>aNDfMZkzS_vwIJl_VIOVm(S^b;)1(pe1FW8;VXK7^0vv} zThy`s_mv%+e?I2e;ma>R_Q*x&FS>sH!tCwu(0uQhV;0mR7Rv-uZe8W*fMdh(5n9$EifrM~&TF_RZ8f9Tj5i;tDM#R z%8Em$Eoqx_eA~^Pjg`}zdsZAVZCP96Z`vxI&6U~BTUH$M_~J9Be5CD1of9k5n>VZ& z|M=1~8gFlVtaDc7oaW6d4tspbi&MVdwz6|t<&5TyD~^18*^7-E+ctO3shro`H~ZkR zi}#o^tnGW9=dVBiH-}YD>G|{Q!^bY&qp_v!{?0E}&g}W?>_f*cxn@e=)M{}{95JQp1;jLGqrPAE9Y4%}vEqP)}Q`_yGCsbzjWV4UFYuOWx-EA8?zgd~n)4S>5Ig7_l*|V*s z^T^59rVR` ztU+n2U&wV{{j5Pp>zcct8+YlD`dWYV>;JWAHfjuEf) z&L?`zrKklE=Tn#rH0TUiq#Zh`$-t)2=@_xO_XkAJfJ}c^01-Wj4ZrFMNMyzB2u9Kt zIvpdn^j=8x3=Tx}BsN4(Kq9N+n~U2K6>AHfjuCym7Z5$>Qq%&7^C?UQ8gvFM(hi-} zWMGr)jL!eL=6hS~vhJ1l@3wm2Yi~%mfAvS@^USsAIcRih3Do*p32&!AeYVTikv`GW zmGH^<5OMTS`HsgLOs1qd{pqtq)=v6FPsc$=Dz-z%B%+E=q++{Vr$2q(oVAcX=AxAg zQVrju<3)5vM>Tve>9b?jMEZE2eCC2872Ba>5>Z7bQn8(+&s(xdq>s61<$_eh_vm;L zozYPZ-%I+`XAPu}_sM52C{nQhVRkwB08g^8ornG*)3}#eY{UT zb3u`c?a(oasG<|8*iO>tpR!4$kGW{&f>guz=y(yG(NPWGOZx1dHIP2uC!e{XNX2&O zm_$_3iBxPS>9a>RjPx-Vtz3|5_#PcEqBAGR&Ko%Bh7stYbCQn4L6CJ|M1A{E<7 z`s|ankUr+3l?zf0-=pJ2bVf%td@t#l*bW_&h$=dfitQwQKAg3XKIWp83sMc=qvJ(%Mn^S# zFX{8qtcmpTKKaZAMJl#K$0VYPPNZTxNuM#4$an)J_U?4T##TSEjkH_EYJ~*q?PnJJZmR?5}@jW3yM^1 zhmJ`^6`e@Mc9K3vWG$qRxoG8rRKxe^coCh^Q4QZq`g}5LB7MA1K662litW%biKwCz zsn|}^=Tq4v(#Kr1azU!$dvv^r&giIy?KHevvxu8hJcIcQyRMCl4Y$xgSxoi^YV=h{`Al2|aI$lI) zbX3Fll0Ki$8b}}Slh0gGq+&aCOd_i2L@KtE^f@sbM*5hGRxU_2e22qq< zPWmK3)dd$6sn`x3lZYxhk&5jkeWqtEq>s61<$_eh_vm;LozYPZ-%I*@DQhBqyiY!J zL6M5>&@qXqq7$juPSWS}Y!c~XE?T)D)$lz!UPNbfRKxd@J~Oih(#QMcGZz%8*bW_& zh$=dfitQwQ&d7$5KIWp83sMc=qvJ(%Mn^S#FX{8utos%|@8f~0GZz$~6f1NJil9L! zKq*#|K4)d?NS^{m8ZJmMk`|qWL>A}>M$$_9oSn6kJ_%5D!39MswnN7xqKZzWVmnEn zbFvoF$6T~>L8{?Tve=`%O$UdHErJWzG!f&!Fcg-$^cH0T5<#Y)oW+u1tOr+|@$3lfZ^MJFMV z1v-L}w30sG$=XSu1gN^;f+7{$p<@zJMJH0Routoqvli0FT(ojQs^NQdyok=|sD|$) zeZHSHkv`rhpShq&#dheJL{!mhVRkwB08g^8ornG`PZy_DWCW8K-HNG z3Q&p_It4}0pc9}JD@mV!%hr)T1&lOYkYFS&Iths^&=HKJmGt@dtex~pfT{~FC{nQ< zIwlcSbRreoN%~x#wU9pMqLm9$4d0{VMRZ0-HGD7W^B-9g>EnI!nG1?kY=@3XL=~M# z#deZDS7nn(A9K;l1*wMb(eWZWqoW$Wm-M+hYao5RPd;-&k&5lmF^Q<66RFrv(&w6N z80lj!TDc(A@I5+SL}zqV!}pRtty%XHKJVj!sxuc9pcE@~3W}gXCqOAyl0MgE>qwsh zMj9?iFp?IXghUqT2u9LM`usR+Cw&s2>VgZ3RBVTiNkkQ$NX2%NJ~w17q>s61<$_eh z_vm;LozYPZ-%I-ZG;1P#yiY!JL6M5>&@qXqq7$juPSWS5Y!c~XE?T)D)$lz!UPNbf zRKxd@J`1x3(#QMcGZz%8*bW_&h$=dfitQwQ+OlD!kGW{&f>guz=y(yG(NPWGOZqI% zx)<|#9}iTWxu5{0SfNu;1PwX?O0kmkS(>dQeF_+9xFEquT67W;S)d~rNh|4dOV&>M zBtX>#7Zj=34jq$-Dmsyh?Ie9}%UVbubJ5BLsfO>-@gh2-qZ+=K^tmHzB7MA1K662l zitW%biKwCzsn|}^XGJ!N^f4E$T##z`9vv^DGdil_dr6-=vj)<~`{Xkh6sgz_9g~PE zI+2R)Bz;z8!$==<(aHs>hVRkwB08g^8on=dI$BqEchA^k``!E>v2Ewod(W-wx@WH; z)gN_kYhU}sRonK5Pk*!mh@Vc1!!HX1lYtetGhmT+g-%E7z1{6ZPa;$R5j}|we?=OQ z$coz$jHE4eI$D3;-9+>Z4n*`MHbhTABCFz?i`x+uYYUx@*892}h#qq(Y5~Og6ea@= zIs+DIhfZoTuqkvpTK~Jddpf_@GeEPoD}aa|Pg9-I2~dg^It4}06gnNPzvyl!dJ>`n zi0Dadh@OB%R@{zYByFM7(YmI)iRc*|i0Dadh@OB%R>e0Lw<9Xn7CIfR4|O*XJ?2u> z0*Lb|Oa>Zs1}xGJoz!Gtlk1EgTAe#aJiOuBm#?fpdEjerNOuJv>#5<+G1tK1cb;nD zlu@TYc^=tt?YK*a)YtskzwTdrYP-6+zCJ!vI5;%nGq??Z0a(kXlscpH4-enl+L=B1 zMSj)u+SlVd{k0yiozQu$$7?5aUhDDNDc9)_o?kyh@VpkMlz*DwpE2=|WHaCCCK1&V z>huTCZ+}hj_!leTYY{yDk!huTCAHGZQ_!leTYY{yDk!huTCGt&tk|6(P4ErQ2ClFdApNkp}TI{m@(r{f47|6(P4ErQ2C zlFdApNkp}TI{m@({1F6?f3Xt27Qy2m$z~qQB%)eEo&MnI`Y^%cU#x_$Mez7XvYE#+ ziKv!Pr$2aJ+=t-tFIK|WB6$2G+00{^L{v+t(;qy4-i6@tFIK|WB6$2G+00{^L{v+t z(;qy4c{9P|U#x_$Mez7XvYE#+iKv!Pr$2aJ&Ys}^ACG&o622C}o8ZYx_+o@l@J_F;c&x!>@_h+)EPNg~d?Wv6 zKlgSed@hp5y-_jpSOrDUlvSsrwf7GTX6&*3Zo$ua8?ILGJ(qu}XbtbOo)7=B=it$J ztN^806*|@EdFD!@r+~4h0OFg=NTi}87)gsxLL!Sory4!aUq=QvU7% ze>obw)2pkpo^myMUc7|pNuI5&??nKE3NuG-6 z2}opB)>E!VPxb?%M_y`cTh{j?dXlFidIAzzmGzXX(X;77qDNk8Yg^X$B6^aiB66h?=cq|@kFo9b8{FE(y-n54FiLCf`k5p`jj!8rnok+!Ynm#)%Bz??9D;K01 zzDLK4=!}kP_+Haz=seQL`{Xkh6sgz_9g~PEI+2R)G=1Jalk_nctz3|5_#PcEqBA&@qXqq7$juPSfYzBS{}~(aHs>hVRkwB08g^8ot-`8D3BN zc%OXcf+7{$p<@zJMJH0RouTfsSA#t)|c3Ye=62sJh^S zA{E=AV-itCCsMJUrqBBpl0N35l?zf0-=pJ2bVf%te6Q&_ zkYFS&Iths^&=HKJ)$|#^hV)5*stYbCQn4L6CJ|M1A{Elh0gGq+&aCOd_i2L@Kt^^f_iG>0>Thxggc>Jvv@QXLMA<_nJP9 z6G$KLlh0gGq+&aCOd_i2L@Kt^^qD%6^f4E$T##z`9vv^DGdil_drhBd^`wvY$!9Jo zQn4L6CJ|M1A{E5~9e7hF)JVmow9 zBC6;_Dz?+~`SL>2$6T~>L8{?guz=y(yG(NPWGYx*=#Abq?~K662litW%biKwCzsn|}_=iHH`kGW{& zf>guz=y(yG(NPWGYx>NoCw;t6K662litW%biKwCzsn|}_=Yp==`Mi(8NacbABWck| zNMwPIU?i=k&xLD9p9HA7;DRC*+o59;QAH=AxAgQVrju<3)5vM>Tw} z=`(*G>EnI!nG1?kY=@3XL=~M##dexLm&_!6%tb2~q#C|Q$BXEUj%xT`)911Yq>uN> zXD%pGu^l=l5mj^|729e0Tse~TF&C{|kZSlI9WSCYI;!D&O`iqzq>uN>XD%pGu^l=l z5mj^|729e0{Abr~eBQ@kq;f%mk+kR}B(gw9Fp^f&=O=4Op9HA7;DRC*+o59;QAHTw}>9c4a>EnI!nG1?kY=@3XL=~M##dexLOJTw}>2vD@(#QMcGZz%8*bW_&h$=dfitRLgmX9QT%tb2~q#C|Q z$BXEUj%xT`)90>w(#QMcGZz%8*bW_&h$=dfitRLg?&-Rf&-)mRR4zy`k`|qWL>A}> zM$&5f{MQ=NCjqK1xS&YIcIcQyRMCl4Y^Uk-z(Uf;T(ojQs^NQdyok=|sD|$~eb&w+ zeY{UTb3u`c?a(oasG<|8*iO^u;hCh5xoG8rRKxe^coCh^Q4QZ~`mCQo`gosw=7J&> z+o59;QAH-@gh2-qZ+=~^m)9V^zlCV%mqa%wnN7xqKZzW zVmnQr-*?@@=Y0%DDi6xoG8rRKxe^coCh^ zQ4QZ~`fRBueY{UTb3u`c?a(oasG<|8*iO@D$P3H(ypO?1<$?qwY0*hYWPy%gB(0{; zo7R#(2~c&x1w|^hL&qedicX|rJ58UR7Lh*YqLm9$4d0{VMRZ0-HGHq>Gju-b<9+g( z3yM^1hmJ`^6`e@McA7qKpGEqZi&id3HGGeb7tt9V)$qNh&$}j)KHevvxu8hJcIcQy zRMCl4Y^Uk-?op(VxoG8rRKxe^coCh^Q4QZ~`V1dR`gosw=7J&>+o59;QAH+o59;QAHGjTrY<9+g(3yM^1 zhmJ`^6`e@McA7rN%p!fvMJpGi8oo!zi|CAwYWQB$r*R_b<9+g(3yM^1hmJ`^6`e@M zcA7p@N0C0}qLm9$4d0{VMRZ0-HGHq>Gi@m8<9+g(3yM^1hmJ`^6`e@McA7q?ys((h z`xuN=E=Vwv7M+Ad7U&2@(rWsgwwClsfT{~FC{nQ&@qXqq7$juuF&ad?Y*FT#va@6W?y+@>gv7c)^**p*O2OuKDV{6ed4M& z=BF|6qZUB?bOun04xNG`XwV5zidCUgjh+kJiJk(+ngWREF%qfh2u9MPlaR=w(5Xhx z51NP`q1oCNKtxYs!(WaDB(mam1S4q+ooe*VZys=mE&t>gIkI-yw3m~E=vEf%e0g0@*9l=Q2 zLZ=!%S2htng98ygi4D;ckjSd|=Hhll#o9ur8a)ddh#qq(Y5~Og6ea@=Is+DIhfZoT zu*r2s53SA}Bl@np;^iyrPagQ%8`53D$9ihGbIdhx`1ofnoHFWI^4xI6xJ!rB*ZkSP t?q7UrySlo*K0Z@8I5goixD9^+Sj#5Yd7(_7*4<^z`^J8?uI@8C{~sV#=(hj> literal 0 HcmV?d00001 diff --git a/tests/conftest.py b/tests/conftest.py index 6a7895e..9bb7818 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -108,6 +108,11 @@ def snapshot_qcow2() -> Iterator[BinaryIO]: yield from open_file_gz("_data/disk/qcow2/snapshot.qcow2.gz") +@pytest.fixture +def basic_asif() -> Iterator[BinaryIO]: + yield from open_file_gz("_data/disk/asif/basic.asif.gz") + + @pytest.fixture def envelope() -> Iterator[BinaryIO]: yield from open_file("_data/util/envelope/local.tgz.ve") diff --git a/tests/disk/test_asif.py b/tests/disk/test_asif.py new file mode 100644 index 0000000..915e1de --- /dev/null +++ b/tests/disk/test_asif.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from typing import BinaryIO + +from dissect.hypervisor.disk.asif import ASIF + + +def test_asif(basic_asif: BinaryIO) -> None: + """Test ASIF parsing.""" + asif = ASIF(basic_asif) + + assert asif.internal_metadata == {"stable uuid": "13db9632-b79f-4e95-aada-835d5ef97bba"} + assert asif.user_metadata == {} + + with asif.open() as stream: + for i in range(100): + assert stream.read(1024 * 1024).strip(bytes([i])) == b"", f"Mismatch at offset {i * 1024 * 1024:#x}" From 5e0ed3342051c17ab3470f7540d3051a4d289166 Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:39:34 +0200 Subject: [PATCH 2/7] Changes --- dissect/hypervisor/disk/asif.py | 37 +++++++++++++-------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/dissect/hypervisor/disk/asif.py b/dissect/hypervisor/disk/asif.py index 11175cf..aab9712 100644 --- a/dissect/hypervisor/disk/asif.py +++ b/dissect/hypervisor/disk/asif.py @@ -46,8 +46,8 @@ def __init__(self, fh: BinaryIO): self.header = c_asif.asif_header(fh) if self.header.header_signature != c_asif.ASIF_HEADER_SIGNATURE: raise InvalidSignature( - "Not a valid ASIF image " - f"(expected {c_asif.ASIF_HEADER_SIGNATURE:#x}, got {self.header.header_signature:#x})" + f"Not a valid ASIF image (expected {c_asif.ASIF_HEADER_SIGNATURE:#x}, " + f"got {self.header.header_signature:#x})" ) self.guid = UUID(bytes=self.header.guid) @@ -72,9 +72,9 @@ def __init__(self, fh: BinaryIO): self._num_reserved_directory_entries = (self._num_reserved_table_entries + self._num_table_entries) // ( self._num_reserved_table_entries + 1 ) - self._usable_entry_count = self._num_table_entries - self._num_reserved_directory_entries + self._num_usable_entries = self._num_table_entries - self._num_reserved_directory_entries # This is the size in bytes of data covered by a single table - self._size_per_table = self._usable_entry_count * self.chunk_size + self._size_per_table = self._num_usable_entries * self.chunk_size max_size = self.block_size * self.header.max_sector_count self._num_directory_entries = (self._size_per_table + max_size - 1) // self._size_per_table @@ -91,7 +91,7 @@ def __init__(self, fh: BinaryIO): self.active_directory = self.directories[0] self.metadata_header = None - self.metadata: dict[str, Any] | None = None + self.metadata: dict[str, Any] = {} if self.header.metadata_chunk: # Open the file in reserved mode to read from the reserved area with self.open(reserved=True) as disk: @@ -101,8 +101,8 @@ def __init__(self, fh: BinaryIO): if self.metadata_header.header_signature != c_asif.ASIF_META_HEADER_SIGNATURE: raise InvalidSignature( - "Invalid a ASIF metadata header" - f"(expected {c_asif.ASIF_META_HEADER_SIGNATURE:#x}, got {self.metadata_header.header_signature:#x})" # noqa: E501 + f"Invalid a ASIF metadata header (expected {c_asif.ASIF_META_HEADER_SIGNATURE:#x}, " + f"got {self.metadata_header.header_signature:#x})" ) disk.seek(metadata_offset + self.metadata_header.header_size) @@ -110,28 +110,22 @@ def __init__(self, fh: BinaryIO): self.metadata = plistlib.load(io.BytesIO(buf)) @property - def internal_metadata(self) -> dict[str, Any] | None: + def internal_metadata(self) -> dict[str, Any]: """Get internal metadata from the ASIF image. Returns: A dictionary containing the internal metadata. """ - if not self.metadata: - return None - - return self.metadata.get("internal metadata") + return self.metadata.get("internal metadata", {}) @property - def user_metadata(self) -> dict[str, Any] | None: + def user_metadata(self) -> dict[str, Any]: """Get user metadata from the ASIF image. Returns: A dictionary containing the user metadata. """ - if not self.metadata: - return None - - return self.metadata.get("user metadata") + return self.metadata.get("user metadata", {}) def open(self, reserved: bool = False) -> DataStream: """Open a stream to read the ASIF image data. @@ -149,6 +143,7 @@ class Directory: """ASIF Directory. A directory has a version (``uint64``) followed by a list of table entries (``uint64[]``). + The version number is used to determine the active directory, with the highest version being the active one. Each table entry is a chunk number and points to a table in the ASIF image. Args: @@ -249,16 +244,14 @@ def _read(self, offset: int, length: int) -> bytes: + relative_block_index // self.asif._blocks_per_chunk * self.asif._num_reserved_table_entries ) // self.asif._num_reserved_table_entries - entry = table.entries[data_idx] # 0x8000000000000000 = content dirty bit # 0x4000000000000000 = entry dirty bit # 0x3F80000000000000 = reserved bits - - value = entry & 0x7FFFFFFFFFFFFF - raw_offset = value * self.asif.chunk_size + chunk = table.entries[data_idx] & 0x7FFFFFFFFFFFFF + raw_offset = chunk * self.asif.chunk_size read_length = min(length, self.asif.chunk_size) - if value == 0: + if chunk == 0: result.append(b"\x00" * read_length) else: self.asif.fh.seek(raw_offset) From 876fe1e76869547cd07c3d8854cc3e431bb2cf4d Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:40:57 +0200 Subject: [PATCH 3/7] Improve comment --- dissect/hypervisor/disk/asif.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dissect/hypervisor/disk/asif.py b/dissect/hypervisor/disk/asif.py index aab9712..1241ab3 100644 --- a/dissect/hypervisor/disk/asif.py +++ b/dissect/hypervisor/disk/asif.py @@ -56,10 +56,10 @@ def __init__(self, fh: BinaryIO): self.size = self.header.sector_count * self.block_size self.max_size = self.header.max_sector_count * self.block_size - # This is taken from the assembly with some creative variable naming + # The following math is taken from the assembly with some creative variable naming + # It's possible that some of this can be simplified or the names improved self._blocks_per_chunk = self.chunk_size // self.block_size - # Uncertain about these variable names, but the math is correct reserved_size = 4 * self.chunk_size self._num_reserved_table_entries = ( 1 if reserved_size < self._blocks_per_chunk else reserved_size // self._blocks_per_chunk From 6c40a86c00898af97291cac356d58a0e26c2e7ed Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:57:34 +0200 Subject: [PATCH 4/7] Add comment --- dissect/hypervisor/disk/asif.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dissect/hypervisor/disk/asif.py b/dissect/hypervisor/disk/asif.py index 1241ab3..f6a9f8c 100644 --- a/dissect/hypervisor/disk/asif.py +++ b/dissect/hypervisor/disk/asif.py @@ -60,6 +60,7 @@ def __init__(self, fh: BinaryIO): # It's possible that some of this can be simplified or the names improved self._blocks_per_chunk = self.chunk_size // self.block_size + # This check doesn't really make sense, but keep it in for now reserved_size = 4 * self.chunk_size self._num_reserved_table_entries = ( 1 if reserved_size < self._blocks_per_chunk else reserved_size // self._blocks_per_chunk From 54ed442464eb7ee31441d2a07c45692e4e41082c Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:09:47 +0200 Subject: [PATCH 5/7] load -> loads --- dissect/hypervisor/disk/asif.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dissect/hypervisor/disk/asif.py b/dissect/hypervisor/disk/asif.py index f6a9f8c..1c23ef8 100644 --- a/dissect/hypervisor/disk/asif.py +++ b/dissect/hypervisor/disk/asif.py @@ -107,8 +107,7 @@ def __init__(self, fh: BinaryIO): ) disk.seek(metadata_offset + self.metadata_header.header_size) - buf = disk.read(self.metadata_header.data_size).strip(b"\x00") - self.metadata = plistlib.load(io.BytesIO(buf)) + self.metadata = plistlib.loads(disk.read(self.metadata_header.data_size).strip(b"\x00")) @property def internal_metadata(self) -> dict[str, Any]: From 2f5e55784e27ad608deb3f195aeef1adee94e40f Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:13:44 +0200 Subject: [PATCH 6/7] Lint --- dissect/hypervisor/disk/asif.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dissect/hypervisor/disk/asif.py b/dissect/hypervisor/disk/asif.py index 1c23ef8..0dbc94f 100644 --- a/dissect/hypervisor/disk/asif.py +++ b/dissect/hypervisor/disk/asif.py @@ -1,6 +1,5 @@ from __future__ import annotations -import io import plistlib from functools import cached_property, lru_cache from typing import Any, BinaryIO From d343fcede83c042bee0e0058845823f0f8938d6c Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:14:33 +0200 Subject: [PATCH 7/7] Add asif reimport --- dissect/hypervisor/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dissect/hypervisor/__init__.py b/dissect/hypervisor/__init__.py index 2fd706c..53294ba 100644 --- a/dissect/hypervisor/__init__.py +++ b/dissect/hypervisor/__init__.py @@ -1,8 +1,9 @@ from dissect.hypervisor.descriptor import hyperv, ovf, pvs, vbox, vmx -from dissect.hypervisor.disk import hdd, qcow2, vdi, vhd, vhdx, vmdk +from dissect.hypervisor.disk import asif, hdd, qcow2, vdi, vhd, vhdx, vmdk from dissect.hypervisor.util import envelope, vmtar __all__ = [ + "asif", "envelope", "hdd", "hyperv",