diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b4f63f --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +tests/_data/** filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index ffc1e26..7c60315 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # dissect.evidence -A Dissect module implementing a parsers for various forensic evidence file containers, currently: AD1, ASDF and EWF. For +A Dissect module implementing parsers for various forensic evidence file containers, currently: AD1, ADCRYPT, ASDF and EWF. For more information, please see [the documentation](https://docs.dissect.tools/en/latest/projects/dissect.evidence/index.html). ## Requirements diff --git a/dissect/evidence/__init__.py b/dissect/evidence/__init__.py index bf20eb6..64360ac 100644 --- a/dissect/evidence/__init__.py +++ b/dissect/evidence/__init__.py @@ -1,7 +1,11 @@ +from __future__ import annotations + +from dissect.evidence.ad1.ad1 import AD1 from dissect.evidence.asdf.asdf import AsdfSnapshot, AsdfStream -from dissect.evidence.ewf import EWF +from dissect.evidence.ewf.ewf import EWF __all__ = [ + "AD1", "EWF", "AsdfSnapshot", "AsdfStream", diff --git a/dissect/evidence/ad1.py b/dissect/evidence/ad1.py deleted file mode 100644 index fe4cce3..0000000 --- a/dissect/evidence/ad1.py +++ /dev/null @@ -1,252 +0,0 @@ -from __future__ import annotations - -import zlib -from typing import Any, BinaryIO - -from dissect.cstruct import cstruct -from dissect.util.stream import AlignedStream, RelativeStream - -ad1_def = """ -enum EntryType : uint32 { - File = 0x0, - Directory = 0x5 -}; - -enum MetaType : uint32 { - FileClass = 0x2, - FileSize = 0x3, - PhysicalSize = 0x4, - StartCluster = 0x6, - DateAccessed = 0x7, - DateCreated = 0x8, - DateModified = 0x9, - Unknown_1 = 0xd, - Unknown_2 = 0xe, - ActualFile = 0x1e, - Unknown_3 = 0x1002, - Unknown_4 = 0x1003, - Unknown_5 = 0x1004, - Unknown_6 = 0x1005, - MD5 = 0x5001, - SHA1 = 0x5002, - ClusterSize = 0x9001, - ClusterCount = 0x9002, - FreeClusterCount = 0x9003, - VolumeSerialNumber = 0x9006 -}; - -typedef struct { - char magic[16]; - uint32 unk1; - uint32 unk2; - uint32 unk3; - uint32 unk4; - uint16 unk5; - uint16 version; - uint32 unk6; - uint64 logical_image_offset; -} SegmentedFileHeader; - -typedef struct { - char magic[16]; - uint32 unk1; - uint32 unk2; - uint32 chunk_size; // This is supposed to be uint64? But that doesn't seem right - uint32 unk3; - uint32 unk4; - uint64 entry_offset; - uint32 name_len; - uint32 unk5; - uint64 name_offset; - uint64 unk6; - uint64 unk7; - uint64 unk8; - uint64 unk9; - char name[name_len]; -} LogicalImageHeader; - -typedef struct { - uint64 next; - uint64 child; - uint64 meta; - uint64 unk1; - uint64 size; - EntryType type; - uint32 name_len; - char name[name_len]; - uint64 unk2; - uint64 num_chunks; - uint64 chunks[num_chunks]; -} FileEntry; - -typedef struct { - uint64 next; - uint32 category; - MetaType type; - uint32 len; - char data[len]; -} FileMeta; -""" -c_ad1 = cstruct().load(ad1_def) - -EntryType = c_ad1.EntryType -MetaType = c_ad1.MetaType - - -class AD1: - def __init__(self, fh: BinaryIO): - self.fh = fh - self.header = c_ad1.SegmentedFileHeader(fh) - - offset = self.header.logical_image_offset - self.logical_image = LogicalImage(RelativeStream(fh, offset)) - self.root = self.logical_image - - def __getattr__(self, k: str) -> Any: - if k in self.header.__class__.fields: - return getattr(self.header, k) - - return super().__getattr__(k) - - def entry(self, path: str = "") -> LogicalImage | FileEntry: - components = path.lstrip("/").split("/") - current = self.root - - if components[0] == "": - return current - - for c in components: - for item in current.children: - if item.name == c: - current = item - - if current.name == components[-1]: - return current - - raise IOError(f"Path not found: {path}") - - def listdir(self, path: str) -> list[FileEntry]: - return [e.name for e in self.entry(path).children] - - def get(self, path: str) -> LogicalImage | FileEntry: - return self.entry(path) - - def open(self, path: str) -> FileObject: - return self.entry(path).open() - - -class LogicalImage: - def __init__(self, fh: BinaryIO): - self.fh = fh - self.header = c_ad1.LogicalImageHeader(fh) - - self.children = [] - offset = self.header.entry_offset - while offset != 0: - child = FileEntry(self, offset) - offset = child.entry.next - self.children.append(child) - - def __repr__(self) -> str: - return f"" - - def __getattr__(self, k: str) -> Any: - if k in self.header.__class__.fields: - return getattr(self.header, k) - - return object.__getattribute__(self, k) - - -class FileEntry: - def __init__(self, image: LogicalImage, offset: int): - fh = image.fh - fh.seek(offset) - self.image = image - self.offset = offset - self.entry = c_ad1.FileEntry(fh) - self.size = self.entry.size - - self.meta = [] - offset = self.entry.meta - while offset != 0: - meta = FileMeta(image, offset) - offset = meta.entry.next - self.meta.append(meta) - - self.children = [] - offset = self.entry.child - while offset != 0: - child = FileEntry(image, offset) - offset = child.entry.next - self.children.append(child) - - def __repr__(self) -> str: - file_type = "Unknown type" - if self.is_file(): - file_type = "File" - elif self.is_dir(): - file_type = "Directory" - return f"<{file_type} name={self.entry.name}>" - - def __getattr__(self, k: str) -> Any: - if k in self.entry.__class__.fields: - return getattr(self.entry, k) - - return object.__getattribute__(self, k) - - def open(self) -> FileObject: - return FileObject(self) - - def is_file(self) -> bool: - return self.entry.type == EntryType.File - - def is_dir(self) -> bool: - return self.entry.type == EntryType.Directory - - -class FileMeta: - def __init__(self, image: LogicalImage, offset: int): - fh = image.fh - fh.seek(offset) - self.image = image - self.offset = offset - self.entry = c_ad1.FileMeta(fh) - - def __repr__(self) -> str: - return f"" - - def __getattr__(self, k: str) -> Any: - if k in self.entry.__class__.fields: - return getattr(self.entry, k) - - return object.__getattribute__(self, k) - - -class FileObject(AlignedStream): - def __init__(self, entry: FileEntry): - self.entry = entry - super().__init__(self.entry.size, self.entry.image.chunk_size) - - def _read(self, offset: int, length: int) -> bytes: - r = [] - fh = self.entry.image.fh - chunk_size = self.entry.image.chunk_size - - chunk = offset // chunk_size - chunk_count = (length + chunk_size - 1) // chunk_size - - chunk_offsets = self.entry.entry.chunks[chunk : chunk + chunk_count + 1] - if len(chunk_offsets) != chunk_count + 1: - chunk_offsets.append(self.entry.entry.meta) - - fh.seek(chunk_offsets[0]) - buf = fh.read(chunk_offsets[-1] - chunk_offsets[0]) - - prev_offset = chunk_offsets[0] - for offset in chunk_offsets[1:]: - chunk_size = offset - prev_offset - r.append(zlib.decompress(buf[:chunk_size])) - buf = buf[chunk_size:] - prev_offset = offset - - return b"".join(r) diff --git a/dissect/evidence/ad1/__init__.py b/dissect/evidence/ad1/__init__.py new file mode 100644 index 0000000..be96321 --- /dev/null +++ b/dissect/evidence/ad1/__init__.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from dissect.evidence.ad1.ad1 import AD1, FileEntry, FileMeta, LogicalImage, MetaType, SegmentFile +from dissect.evidence.ad1.stream import AD1Stream, FileStream +from dissect.evidence.exception import ( + Error, + FileNotFoundError, + NotADirectoryError, + NotASymlinkError, +) + +__all__ = [ + "AD1", + "AD1Stream", + "Error", + "FileEntry", + "FileMeta", + "FileNotFoundError", + "FileStream", + "LogicalImage", + "MetaType", + "NotADirectoryError", + "NotASymlinkError", + "SegmentFile", +] diff --git a/dissect/evidence/ad1/ad1.py b/dissect/evidence/ad1/ad1.py new file mode 100644 index 0000000..628ef79 --- /dev/null +++ b/dissect/evidence/ad1/ad1.py @@ -0,0 +1,446 @@ +from __future__ import annotations + +import re +from datetime import datetime, timezone +from functools import cached_property +from pathlib import Path, PurePath, PurePosixPath, PureWindowsPath +from typing import TYPE_CHECKING, BinaryIO + +from dissect.evidence.ad1.c_ad1 import c_ad1 +from dissect.evidence.ad1.stream import AD1Stream, FileStream +from dissect.evidence.adcrypt.adcrypt import ADCrypt, is_adcrypt +from dissect.evidence.exception import FileNotFoundError, NotADirectoryError, NotASymlinkError + +if TYPE_CHECKING: + from collections.abc import Iterator + +EntryType = c_ad1.EntryType +MetaType = c_ad1.MetaType +FileClassType = c_ad1.FileClassType + +MAX_OPEN_SEGMENTS = 128 + + +def find_files(path: Path) -> list[Path]: + files = set() + for file in path.parent.iterdir(): + if file.stem == path.stem and re.match(r"^\.ad[0-9]+$", file.suffix.lower()): + files.add(file) + return sorted(files, key=lambda file: int(file.suffix[3:])) + + +class AD1: + """AccessData Logical Image (AD1v4) implementation. + + Supports ``zlib`` compressed images and ADCRYPT encrypted images. + + Should be initialized using a list of segment paths or file-like objects, e.g.:: + + fs = AD1([Path("file.ad1"), Path("file.ad2")]) + fs = AD1([Path("file.ad1").open("rb"), Path("file.ad2").open("rb")]) + + If the AD1 container is ADCRYPT encrypted, it can be unlocked using either a passphrase or private key:: + + fs.unlock(passphrase="my secret passphrase") + fs.unlock(private_key=Path("path/to/private/key.pem")) + + Resources: + - Reverse engineering FTK Imager + - https://github.com/pcbje/pyad1/blob/master/documentation/AccessData%20Format%20(AD1).asciidoc + - https://github.com/al3ks1s/AD1-tools + - https://web.archive.org/web/20231013073319/https://tmairi.github.io/posts/dissecting-the-ad1-file-format/ + - https://al3ks1s.fr/posts/adventures-part-1/ + """ + + def __init__(self, fh: BinaryIO | list[BinaryIO]): + fhs = [fh] if not isinstance(fh, list) else fh + self.fh = fhs + self.root = VirtualEntry(self, "/") + + self._segments: dict[int, SegmentFile] = {} + self._segment_lru = [] + self._segment_offsets = [] + + self.size = 0 + self.stream: AD1Stream | None = None + self.logical_image: LogicalImage | None = None + + if not self.fh: + raise ValueError("No segment files provided for AD1 container") + + self.adcrypt = None + + first_segment = self.segment(0) + if is_adcrypt(first_segment.fh): + self.adcrypt = ADCrypt(first_segment.fh) + else: + self._open_ad1() + + def is_adcrypt(self) -> bool: + """Return whether the AD1 container is ADCRYPT encrypted.""" + return self.adcrypt is not None + + def is_locked(self) -> bool: + """Return whether the ADCRYPT container is locked.""" + return self.is_adcrypt() and self.adcrypt.is_locked() + + def segment(self, idx: int) -> SegmentFile: + """Open a segment by index. + + Implements a simple LRU cache to limit the number of open segments. + + Args: + idx: Index or URI of the segment to open. + """ + # Poor mans LRU + if idx in self._segments: + self._segment_lru.remove(idx) + self._segment_lru.append(idx) + return self._segments[idx] + + if len(self._segment_lru) >= MAX_OPEN_SEGMENTS: + oldest_idx = self._segment_lru.pop(0) + oldest_segment = self._segments.pop(oldest_idx) + + # Don't close it if we received it as a file-like object + if not hasattr(self.fh[oldest_idx], "read"): + oldest_segment.fh.close() + + del oldest_segment + + fh = self.fh[idx] + if not hasattr(fh, "read"): + fh = fh.open("rb") if isinstance(fh, Path) else Path(fh).open("rb") # noqa: SIM115 + + if self.is_adcrypt() and not self.is_locked(): + fh = self.adcrypt.wrap(fh, idx) + + segment = SegmentFile(fh) + + self._segments[idx] = segment + self._segment_lru.append(idx) + + return segment + + def unlock(self, *, passphrase: str | bytes | None = None, private_key: Path | bytes | None = None) -> None: + """Unlock the ADCRYPT container with a given passphrase or private key. + + Args: + passphrase: The passphrase to unlock the container. + private_key: The private key to unlock the container. + + Raises: + RuntimeError: If required dependencies are missing. + ValueError: If unlocking failed. + """ + self.adcrypt.unlock(passphrase=passphrase, private_key=private_key) + + # Reset LRU + self._segments = {} + self._segment_lru = [] + + # Open the AD1 + self._open_ad1() + + def _open_ad1(self) -> None: + self._segment_offsets = [] + + offset = 0 + for i in range(len(self.fh)): + segment = self.segment(i) + if segment.header.magic != c_ad1.ADSEGMENTEDFILE_MAGIC.encode(): + raise ValueError(f"Invalid AD1 segment file magic in segment {i}") + + if segment.number != i + 1: + raise ValueError(f"Invalid AD1 segment number in segment {i}, got {segment.number}, expected {i + 1}") + + offset += segment.size + self._segment_offsets.append(offset) + + self.size = offset + self.stream = AD1Stream(self) + + # The first .ad1 file contains a logical image header + first_segment = self.segment(0) + first_segment.fh.seek(first_segment.header.logical_image_offset) + self.logical_image = LogicalImage(first_segment.fh) + + # We need to create some fake entries for all parts leading up to `logical_image.name` + # This name commonly contains the full path each entry in the container is relative to + _hallicinate_root_entries(self) + + def entry(self, path: str, entry: FileEntry | None = None) -> FileEntry: + """Return a :class:`FileEntry` based on the given absolute ``path``. + + Args: + path: Absolute path within the AD1 container. + entry: The starting entry for relative paths. Defaults to the root entry. + + Raises: + ValueError: If the ADCRYPT container is locked. + FileNotFoundError if the given ``path`` is not found in the container. + + Returns: + :class:`FileEntry` when the given ``path`` is found. + """ + if self.is_locked(): + raise ValueError("AD1 container is locked by ADCRYPT") + + entry = entry or self.root + + for part in path.split("/"): + if not part: + continue + + for child in entry.iterdir(): + if child.name == part and child.type != EntryType.Deleted: + entry = child + break + else: + raise FileNotFoundError(f"File not found: {path}") + + return entry + + def get(self, path: str) -> FileEntry: + """Shortcut for ``AD1.entry(path)``.""" + return self.entry(path) + + def open(self, path: str) -> FileStream: + """Shortcut for ``AD1.entry(path).open()``.""" + return self.entry(path).open() + + +def _hallicinate_root_entries(ad1: AD1) -> None: + # We need to create some fake entries for all parts leading up to `logical_image.name` + # This name commonly contains the full path each entry in the container is relative to + # Not always though, so do some poor mans heuristics + root_name = ad1.logical_image.name + if root_name == "Custom Content Image([Multi])": + ad1.root.entry.child = ad1.logical_image.header.first_file_offset + if len(ad1.root.children) != 1: + raise ValueError("Unexpected number of root children for Custom Content Image([Multi])") + + root_name = ad1.root.children[0].name.split(":", 1)[-1] + first_file_entry = ad1.root.children[0].entry + is_multi_image = True + else: + is_multi_image = False + + root_path = PureWindowsPath(root_name) if "/" not in root_name and "\\" in root_name else PurePosixPath(root_name) + parts = list(root_path.parts) + parent = _create_root_entries(ad1, ad1.root, root_path, parts) + + if is_multi_image: + parent.name = parent.name + parent.offset = ad1.logical_image.header.first_file_offset + parent.entry = first_file_entry + else: + # Add the first file offset as the first child offset of the last root part + parent.entry.child = ad1.logical_image.header.first_file_offset + + +def _create_root_entries(ad1: AD1, parent: FileEntry, path: PurePath, parts: list[str]) -> FileEntry: + while parts: + part = parts.pop(0) + if path.drive and part == f"{path.drive}\\": + part = path.drive + + entry = VirtualEntry(ad1, part) + parent.children = [entry] + parent = entry + + return parent + + +class SegmentFile: + """Represents an AD1 segmented file.""" + + def __init__(self, fh: BinaryIO): + self.fh = fh + self.fh.seek(0) + self.header = c_ad1.SegmentedFileHeader(self.fh) + self.number = self.header.segment_number + self.count = self.header.segment_count + self.size = self.header.segment_size - 512 # Subtract header size + + def __repr__(self) -> str: + return f"" + + +class LogicalImage: + """Represents an AD1 logical image.""" + + def __init__(self, fh: BinaryIO): + self.fh = fh + self.header = c_ad1.LogicalImageHeader(fh) + self.name = self.header.name.decode() + self.version = self.header.version + self.offset = self.header.first_file_offset + self.chunk_size = self.header.chunk_size + + def __repr__(self) -> str: + return ( + f"" + ) + + +class FileEntry: + """Represents a file entry in an AD1 logical image.""" + + def __init__(self, ad1: AD1, offset: int): + self.ad1 = ad1 + self.offset = offset + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} type={self.type.name} name={self.name!r} size={self.size}>" + + @cached_property + def entry(self) -> c_ad1.FileEntry: + self.ad1.stream.seek(self.offset) + return c_ad1.FileEntry(self.ad1.stream) + + @cached_property + def name(self) -> str: + return self.entry.name.decode() + + @cached_property + def type(self) -> EntryType: + return self.entry.type + + @cached_property + def meta(self) -> dict[MetaType, FileMeta]: + result = {} + + offset = self.entry.meta + while offset != 0: + meta = FileMeta(self.ad1, offset) + offset = meta.next + result[meta.type] = meta + + return result + + @cached_property + def children(self) -> list[FileEntry]: + result = [] + + offset = self.entry.child + while offset != 0: + child = FileEntry(self.ad1, offset) + result.append(child) + offset = child.entry.next + + return result + + @cached_property + def size(self) -> int: + if meta := self.meta.get(MetaType.FileSize): + return int(meta.data) + return 0 + + @cached_property + def atime(self) -> datetime: + if meta := self.meta.get(MetaType.DateAccessed): + return convert_ts(meta.data) + return datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def ctime(self) -> datetime: + if meta := self.meta.get(MetaType.MFTFileDateChanged, self.meta.get(MetaType.DateModified)): + return convert_ts(meta.data) + return datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def mtime(self) -> datetime: + if meta := self.meta.get(MetaType.DateModified): + return convert_ts(meta.data) + return datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def btime(self) -> datetime: + if meta := self.meta.get(MetaType.DateCreated): + return convert_ts(meta.data) + return datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def md5(self) -> str | None: + if meta := self.meta.get(MetaType.MD5): + return meta.data.decode() + return None + + @cached_property + def sha1(self) -> str | None: + if meta := self.meta.get(MetaType.SHA1): + return meta.data.decode() + return None + + def is_file(self) -> bool: + return self.type in (EntryType.File, EntryType.Unknown_File) + + def is_dir(self) -> bool: + return self.type == EntryType.Directory + + def is_symlink(self) -> bool: + if meta := self.meta.get(MetaType.FileClass): + return int.from_bytes(meta.data, "little") == FileClassType.ReparsePoint + return False + + def listdir(self) -> list[str]: + if not self.is_dir(): + raise NotADirectoryError(self.name) + return [child.name for child in self.children] + + def iterdir(self) -> Iterator[FileEntry]: + if not self.is_dir(): + raise NotADirectoryError(self.name) + yield from self.children + + def readlink(self) -> str: + if not self.is_symlink(): + raise NotASymlinkError(self.name) + + # TODO: Investigate symlinks for unix-like filesystems. + + reparse_point = c_ad1.ReparsePoint(self.open()) + return reparse_point.link.strip("\00").split("\00")[-1] + + def open(self) -> FileStream: + """Open the file entry for reading.""" + if self.is_dir(): + raise IsADirectoryError(self.name) + return FileStream(self) + + +class VirtualEntry(FileEntry): + """Represents the root entry in an AD1 logical image.""" + + def __init__(self, ad1: AD1, name: str): + super().__init__(ad1, -1) + self.entry = c_ad1.FileEntry(name=name.encode(), type=EntryType.Directory, size=0) + + +class FileMeta: + """Represents a single AD1 logical file metadata item found inside :class:`FileEntry`.""" + + def __init__(self, ad1: AD1, offset: int): + self.ad1 = ad1 + self.offset = offset + + self.ad1.stream.seek(offset) + self.entry = c_ad1.FileMeta(self.ad1.stream) + + self.category = self.entry.category + self.type = self.entry.type + self.data = self.entry.data + + self.next = self.entry.next + + def __repr__(self) -> str: + return f"" + + +def convert_ts(value: bytes) -> datetime: + """Convert an AD1 timestamp to datetime object. Assuming this is UTC.""" + + # DateCreated does not (always) have ``.%f`` precision. + fmt = "%Y%m%dT%H%M%S.%f" if b"." in value else "%Y%m%dT%H%M%S" + return datetime.strptime(value.decode(), fmt).replace(tzinfo=timezone.utc) diff --git a/dissect/evidence/ad1/c_ad1.py b/dissect/evidence/ad1/c_ad1.py new file mode 100644 index 0000000..35181bf --- /dev/null +++ b/dissect/evidence/ad1/c_ad1.py @@ -0,0 +1,185 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +ad1_def = """ +enum EntryType : uint32 { + File = 0x0, + Unknown_File = 0x1, + Deleted = 0x2, + Directory = 0x5, +}; + +enum FileClassType : uint32 { + File = 0x1, // b"1" + Directory = 0x3, // b"3" + ReparsePoint = 0x3131, // b"11" +}; + +enum MetaType : uint32 { + // Generic attributes + ItemContentHashes = 0x1, + FileClass = 0x2, + FileSize = 0x3, + PhysicalSize = 0x4, + Timestamps = 0x5, + StartCluster = 0x6, + DateAccessed = 0x7, + DateCreated = 0x8, + DateModified = 0x9, + // .. 0xa, 0xb, 0xc .. + Encrypted = 0xd, + Compressed = 0xe, + // .. 0xf .. + ActualFile = 0x1e, + StartSector = 0x1f, + ADSCount = 0x24, // Alternate Data Stream Count + + // DOS attributes + ShortFilename = 0x1001, + Hidden = 0x1002, + System = 0x1003, + ReadOnly = 0x1004, + Archive = 0x1005, + + // NTFS attributes + MFTRecordNumber = 0xa001, + MFTDateChanged = 0xa002, // Specifies the MFT record change timestamp of the file. + MFTIsResident = 0xa003, + MFTIsOffline = 0xa004, + MFTIsSparse = 0xa005, + MFTIsTemporary = 0xa006, + MFTOwnerSid = 0xa007, + MFTOwnerName = 0xa008, + MFTGroupSid = 0xa009, + MFTGroupName = 0xa00a, + + MFTFileDateCreated = 0xa01c, // According to the filename attribute in the MFT. + MFTFileDateModified = 0xa01d, + MFTFileDateAccessed = 0xa01e, + MFTFileDateChanged = 0xa01f, + MFTFileSize = 0xa020, + MFTFilePhysicalSize = 0xa021, + + // 8.3 MFT Filename + // 0xa022, + // 0xa023, + // 0xa024, + // 0xa025, + // 0xa026, + // 0xa027, + + IndxFilename = 0xa028, // According to the filename attribute in the $I30 INDX. + IndxFileSize = 0xa029, + IndxPhysicalSize = 0xa02a, + IndxDateCreated = 0xa02b, + IndxDateModified = 0xa02c, + IndxDateAccessed = 0xa02d, + IndxDateChanged = 0xa02e, + + // 8.3 INDX + // 0xa02f, 0xa030, 0xa031, 0xa032, 0xa033, 0xa034, 0xa035 + + // NTFS Access Control Entry (0) + AceType = 0x1000001, + AceInheritable = 0x1000004, + AceSID = 0x1000005, // The Security ID of the user or group this ACE applies to. + AceName = 0x1000006, // The name of the user or roup this ACE applies to. + AceAccessMask = 0x1000007, // Raw bitmask specifying the actions this ACE controls. + AceExecuteFile = 0x1000008, + AceReadData = 0x1000009, + AceWriteData = 0x100000a, + AceAppendData = 0x100000b, + AceTraverseFolder = 0x100000c, + AceListFolder = 0x100000d, + AceCreateFiles = 0x100000e, + AceCreateFolders = 0x100000f, + AceDeleteChildren = 0x1000010, + AceDeleteSelf = 0x1000011, + AceReadPermissions = 0x1000012, + AceChangePermissions = 0x1000013, + AceTakeOwnership = 0x1000014, + // .. 0x10010XX - 0x10060XX .. + + // Verification hashes + MD5 = 0x5001, + SHA1 = 0x5002, + + // TODO: Clean up + ClusterSize = 0x9001, + ClusterCount = 0x9002, + FreeClusterCount = 0x9003, + VolumeSerialNumber = 0x9006, + PosixPermissions = 0x2001, +}; + +#define ADSEGMENTEDFILE_MAGIC ADSEGMENTEDFILE\00 + +typedef struct { + char magic[16]; // b"ADSEGMENTEDFILE" + padding + uint32 unk1; // 0x01 + uint32 unk2; // 0x02 + uint32 segment_number; // segment number starts at 0x01 + uint32 segment_count; // number of segments + uint64 segment_size; // off by 512 bytes + uint32 logical_image_offset; + char padding[468]; // 0x00 +} SegmentedFileHeader; + +typedef struct { + char magic[16]; // b"ADLOGICALIMAGE" + padding + uint32 version; // 0x03 or 0x04 + uint32 unk1; // 0x01 + uint32 chunk_size; // zlib chunk size (uint64?) + uint64 metadata_offset; + uint64 first_file_offset; + uint32 name_len; + + // ADv4 (offset 48 contains name[name_len] in ADv3) + char unk_magic[4]; // b"AD" + (2 * 0x00) + uint64 name_offset; // 0x5c + uint64 attr_guid_offset; + uint64 unk2; // 0x00 + uint64 locs_guid_offset; + uint64 unk3; // 0x00 + // END ADv4 + + char name[name_len]; +} LogicalImageHeader; + +typedef struct { + uint64 next; // Next FileEntry in same hierarchy level + uint64 child; // Next FileEntry within this dir, 0x00 if file + uint64 meta; // Offset of first FileMeta entry + uint64 zlib_meta; // Offset of zlib chunk metadata + uint64 size; // Decompressed file size, 0x00 if no data + EntryType type; // 0x00 = file, 0x05 = directory + uint32 name_len; + char name[name_len]; + uint64 parent_index; // Parent folder index, 0x00 if at root +} FileEntry; + +typedef struct { + uint64 num_chunks; // only if FileEntry.size != 0x00 + uint64 chunks[num_chunks]; +} FileEntryChunks; + +typedef struct { + uint64 next; + uint32 category; + MetaType type; + uint32 len; + char data[len]; +} FileMeta; + +typedef struct { + char unk1[352]; // version 4 only +} Footer; + +typedef struct { + CHAR unknown[16]; + WCHAR link[EOF]; +} ReparsePoint; +""" + +c_ad1 = cstruct().load(ad1_def) diff --git a/dissect/evidence/ad1/c_ad1.pyi b/dissect/evidence/ad1/c_ad1.pyi new file mode 100644 index 0000000..a50418b --- /dev/null +++ b/dissect/evidence/ad1/c_ad1.pyi @@ -0,0 +1,218 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, TypeAlias, overload + +import dissect.cstruct as __cs__ + +class _c_ad1(__cs__.cstruct): + ADSEGMENTEDFILE_MAGIC: Literal["ADSEGMENTEDFILE\x00"] = ... + class EntryType(__cs__.Enum): + File = ... + Unknown_File = ... + Deleted = ... + Directory = ... + + class FileClassType(__cs__.Enum): + File = ... + Directory = ... + ReparsePoint = ... + + class MetaType(__cs__.Enum): + ItemContentHashes = ... + FileClass = ... + FileSize = ... + PhysicalSize = ... + Timestamps = ... + StartCluster = ... + DateAccessed = ... + DateCreated = ... + DateModified = ... + Encrypted = ... + Compressed = ... + ActualFile = ... + StartSector = ... + ADSCount = ... + ShortFilename = ... + Hidden = ... + System = ... + ReadOnly = ... + Archive = ... + MFTRecordNumber = ... + MFTDateChanged = ... + MFTIsResident = ... + MFTIsOffline = ... + MFTIsSparse = ... + MFTIsTemporary = ... + MFTOwnerSid = ... + MFTOwnerName = ... + MFTGroupSid = ... + MFTGroupName = ... + MFTFileDateCreated = ... + MFTFileDateModified = ... + MFTFileDateAccessed = ... + MFTFileDateChanged = ... + MFTFileSize = ... + MFTFilePhysicalSize = ... + IndxFilename = ... + IndxFileSize = ... + IndxPhysicalSize = ... + IndxDateCreated = ... + IndxDateModified = ... + IndxDateAccessed = ... + IndxDateChanged = ... + AceType = ... + AceInheritable = ... + AceSID = ... + AceName = ... + AceAccessMask = ... + AceExecuteFile = ... + AceReadData = ... + AceWriteData = ... + AceAppendData = ... + AceTraverseFolder = ... + AceListFolder = ... + AceCreateFiles = ... + AceCreateFolders = ... + AceDeleteChildren = ... + AceDeleteSelf = ... + AceReadPermissions = ... + AceChangePermissions = ... + AceTakeOwnership = ... + MD5 = ... + SHA1 = ... + ClusterSize = ... + ClusterCount = ... + FreeClusterCount = ... + VolumeSerialNumber = ... + PosixPermissions = ... + + class SegmentedFileHeader(__cs__.Structure): + magic: __cs__.CharArray + unk1: _c_ad1.uint32 + unk2: _c_ad1.uint32 + segment_number: _c_ad1.uint32 + segment_count: _c_ad1.uint32 + segment_size: _c_ad1.uint64 + logical_image_offset: _c_ad1.uint32 + padding: __cs__.CharArray + @overload + def __init__( + self, + magic: __cs__.CharArray | None = ..., + unk1: _c_ad1.uint32 | None = ..., + unk2: _c_ad1.uint32 | None = ..., + segment_number: _c_ad1.uint32 | None = ..., + segment_count: _c_ad1.uint32 | None = ..., + segment_size: _c_ad1.uint64 | None = ..., + logical_image_offset: _c_ad1.uint32 | None = ..., + padding: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class LogicalImageHeader(__cs__.Structure): + magic: __cs__.CharArray + version: _c_ad1.uint32 + unk1: _c_ad1.uint32 + chunk_size: _c_ad1.uint32 + metadata_offset: _c_ad1.uint64 + first_file_offset: _c_ad1.uint64 + name_len: _c_ad1.uint32 + unk_magic: __cs__.CharArray + name_offset: _c_ad1.uint64 + attr_guid_offset: _c_ad1.uint64 + unk2: _c_ad1.uint64 + locs_guid_offset: _c_ad1.uint64 + unk3: _c_ad1.uint64 + name: __cs__.CharArray + @overload + def __init__( + self, + magic: __cs__.CharArray | None = ..., + version: _c_ad1.uint32 | None = ..., + unk1: _c_ad1.uint32 | None = ..., + chunk_size: _c_ad1.uint32 | None = ..., + metadata_offset: _c_ad1.uint64 | None = ..., + first_file_offset: _c_ad1.uint64 | None = ..., + name_len: _c_ad1.uint32 | None = ..., + unk_magic: __cs__.CharArray | None = ..., + name_offset: _c_ad1.uint64 | None = ..., + attr_guid_offset: _c_ad1.uint64 | None = ..., + unk2: _c_ad1.uint64 | None = ..., + locs_guid_offset: _c_ad1.uint64 | None = ..., + unk3: _c_ad1.uint64 | None = ..., + name: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class FileEntry(__cs__.Structure): + next: _c_ad1.uint64 + child: _c_ad1.uint64 + meta: _c_ad1.uint64 + zlib_meta: _c_ad1.uint64 + size: _c_ad1.uint64 + type: _c_ad1.EntryType + name_len: _c_ad1.uint32 + name: __cs__.CharArray + parent_index: _c_ad1.uint64 + @overload + def __init__( + self, + next: _c_ad1.uint64 | None = ..., + child: _c_ad1.uint64 | None = ..., + meta: _c_ad1.uint64 | None = ..., + zlib_meta: _c_ad1.uint64 | None = ..., + size: _c_ad1.uint64 | None = ..., + type: _c_ad1.EntryType | None = ..., + name_len: _c_ad1.uint32 | None = ..., + name: __cs__.CharArray | None = ..., + parent_index: _c_ad1.uint64 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class FileEntryChunks(__cs__.Structure): + num_chunks: _c_ad1.uint64 + chunks: __cs__.Array[_c_ad1.uint64] + @overload + def __init__( + self, num_chunks: _c_ad1.uint64 | None = ..., chunks: __cs__.Array[_c_ad1.uint64] | None = ... + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class FileMeta(__cs__.Structure): + next: _c_ad1.uint64 + category: _c_ad1.uint32 + type: _c_ad1.MetaType + len: _c_ad1.uint32 + data: __cs__.CharArray + @overload + def __init__( + self, + next: _c_ad1.uint64 | None = ..., + category: _c_ad1.uint32 | None = ..., + type: _c_ad1.MetaType | None = ..., + len: _c_ad1.uint32 | None = ..., + data: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class Footer(__cs__.Structure): + unk1: __cs__.CharArray + @overload + def __init__(self, unk1: __cs__.CharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class ReparsePoint(__cs__.Structure): + unknown: __cs__.CharArray + link: __cs__.WcharArray + @overload + def __init__(self, unknown: __cs__.CharArray | None = ..., link: __cs__.WcharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_ad1` is an instance of `_c_ad1`, but then we can't use it in type hints +c_ad1: TypeAlias = _c_ad1 diff --git a/dissect/evidence/ad1/stream.py b/dissect/evidence/ad1/stream.py new file mode 100644 index 0000000..ae45cf5 --- /dev/null +++ b/dissect/evidence/ad1/stream.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import zlib +from bisect import bisect_right +from typing import TYPE_CHECKING + +from dissect.util.stream import AlignedStream + +from dissect.evidence.ad1.c_ad1 import c_ad1 + +if TYPE_CHECKING: + from dissect.evidence.ad1.ad1 import AD1, FileEntry + + +class AD1Stream(AlignedStream): + """Provide a stitched stream over all AD1 segments.""" + + def __init__(self, ad1: AD1): + self.ad1 = ad1 + super().__init__(self.ad1.size) + + def _read(self, offset: int, length: int) -> bytes: + result = [] + + idx = bisect_right(self.ad1._segment_offsets, offset) + while length > 0: + if idx > len(self.ad1._segment_offsets) - 1: + break + + segment = self.ad1.segment(idx) + segment_offset = 0 if idx == 0 else self.ad1._segment_offsets[idx - 1] + offset_in_segment = offset - segment_offset + read_size = min(length, segment.size - offset_in_segment) + + segment.fh.seek(512 + offset_in_segment) # Skip segment header + result.append(segment.fh.read(read_size)) + + offset += read_size + length -= read_size + idx += 1 + + return b"".join(result) + + +class FileStream(AlignedStream): + """Custom stream implementation for AD1 :class:`FileEntry` file contents.""" + + def __init__(self, entry: FileEntry): + self.entry = entry + self.stream = self.entry.ad1.stream + self.chunk_size = self.entry.ad1.logical_image.chunk_size + + self.stream.seek(self.entry.entry.zlib_meta) + self.chunks = [*c_ad1.FileEntryChunks(self.stream).chunks, self.entry.entry.meta] + + super().__init__(self.entry.size, self.chunk_size) + + def _read(self, offset: int, length: int) -> bytes: + result = [] + + chunk, offset_in_chunk = divmod(offset, self.chunk_size) + chunk_count = (length + self.chunk_size - 1) // self.chunk_size + chunk_offsets = self.chunks[chunk : chunk + chunk_count + 1] + + for i, chunk_offset in enumerate(chunk_offsets[:-1]): + compressed_chunk_size = chunk_offsets[i + 1] - chunk_offset + + self.stream.seek(chunk_offset) + buf = zlib.decompress(self.stream.read(compressed_chunk_size)) + + read_size = min(length, self.chunk_size - offset_in_chunk) + result.append(buf[offset_in_chunk : offset_in_chunk + read_size]) + + offset += read_size + length -= read_size + offset_in_chunk = 0 + + return b"".join(result) diff --git a/dissect/evidence/adcrypt/__init__.py b/dissect/evidence/adcrypt/__init__.py new file mode 100644 index 0000000..b03f415 --- /dev/null +++ b/dissect/evidence/adcrypt/__init__.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from dissect.evidence.adcrypt.adcrypt import ADCrypt, is_adcrypt +from dissect.evidence.adcrypt.stream import ADCryptStream +from dissect.evidence.exception import Error + +__all__ = [ + "ADCrypt", + "ADCryptStream", + "Error", + "is_adcrypt", +] diff --git a/dissect/evidence/adcrypt/adcrypt.py b/dissect/evidence/adcrypt/adcrypt.py new file mode 100644 index 0000000..8fc5f7d --- /dev/null +++ b/dissect/evidence/adcrypt/adcrypt.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import hashlib +import hmac +from pathlib import Path +from typing import BinaryIO + +from dissect.evidence.adcrypt.c_adcrypt import c_adcrypt +from dissect.evidence.adcrypt.stream import ADCryptStream + +try: + from Crypto.Cipher import AES, PKCS1_v1_5 + from Crypto.Protocol.KDF import PBKDF2 + from Crypto.PublicKey import RSA + from Crypto.Util import Counter + + HAS_CRYPTO = True +except ImportError: + HAS_CRYPTO = False + + +def is_adcrypt(fh: BinaryIO) -> bool: + """Check if the file handle is an ADCRYPT container. + + Args: + fh: The file handle to check. + """ + fh.seek(0) + return fh.read(8) == c_adcrypt.ADCRYPT_MAGIC.encode() + + +class ADCrypt: + """Access Data ADCRYPT encrypted container implementation. + + Not particularly useful on its own, but used by other evidence types such as AD1. + Pass the first segment file handle to this class, then use :meth:`unlock` to unlock the container, + and :meth:`wrap` to wrap other segment file handles into decrypting streams. + + References: + - Reverse engineering ``adencrypt.dll`` + - https://github.com/libyal/libewf/blob/main/documentation/Expert%20Witness%20Compression%20Format%20(EWF).asciidoc#7-ad-encryption + - https://github.com/log2timeline/plaso/issues/2726#issuecomment-517444736 + """ + + def __init__(self, fh: BinaryIO) -> None: + self.fh = fh + self.fh.seek(0) + + try: + self.header: c_adcrypt.Header = c_adcrypt.Header(self.fh) + except EOFError: + raise ValueError("File handle is not an ADCRYPT container: Unable to read ADCRYPT header") + + if self.header.magic != c_adcrypt.ADCRYPT_MAGIC.encode(): + raise ValueError(f"File handle is not an ADCRYPT container: Unknown magic {self.header.magic!r}") + + if self.header.version != 1: + raise ValueError(f"Unsupported ADCRYPT container version {self.header.version!r}") + + self.key: bytes | None = None + + def is_locked(self) -> bool: + """Return whether the ADCRYPT container is locked.""" + return self.key is None + + def unlock(self, *, passphrase: str | bytes | None = None, private_key: Path | bytes | None = None) -> None: + """Unlock the ADCRYPT container with a given passphrase or private key. + + Args: + passphrase: The passphrase to unlock the container. + private_key: The private key to unlock the container. + + Raises: + RuntimeError: If required dependencies are missing. + ValueError: If unlocking failed. + """ + if not HAS_CRYPTO: + raise RuntimeError("Missing required dependency 'pycryptodome' for ADCRYPT decryption") + + pkey = adcrypt_kdf( + passphrase, + private_key, + self.header.enc_salt, + self.header.key_len, + self.header.pbkdf2_count, + self.header.hash_algo.name.lower(), + ) + + # Verify the HMAC of EKEY using PKEY + hash algo, comparing with header HMAC + if hmac.digest(pkey, self.header.enc_key, self.header.hash_algo.name.lower()) != self.header.hmac_enc_key: + raise ValueError("Unable to unlock: HMAC verification of passphrase failed") + + # Decrypt EKEY using PKEY + ctr = Counter.new(128, initial_value=0, little_endian=True) + cipher = AES.new(pkey, AES.MODE_CTR, counter=ctr) + self.key = cipher.decrypt(self.header.enc_key) + + def wrap(self, fh: BinaryIO, index: int) -> ADCryptStream: + """Wrap a file handle into an :class:`ADCryptStream` for decryption. + + Args: + fh: The file handle to wrap. + index: The segment index. + + Raises: + ValueError: If the container is not unlocked. + """ + if self.is_locked(): + raise ValueError("ADCRYPT container is not unlocked") + + return ADCryptStream(fh, self.key, index) + + +def adcrypt_kdf( + passphrase: str | bytes | None, + private_key: Path | bytes | None, + salt: bytes, + key_len: int, + count: int, + algorithm: str, +) -> bytes: + """Derive the ADCRYPT decryption key. + + Args: + passphrase: The passphrase to unlock the container. + private_key: The private key to unlock the container. + salt: The salt used for key derivation. + key_len: The length of the derived key. + count: The number of iterations for PBKDF2. + algorithm: The hash algorithm to use. + + Returns: + The derived key as bytes. + """ + if isinstance(passphrase, str): + passphrase = passphrase.encode() + + # If a private key was used, the passphrase is empty. + passphrase_hash = b"" + if passphrase is not None and private_key is None: + passphrase_hash = hashlib.new(algorithm, passphrase).digest() + + # If no private key was used, the "encrypted" salt is the plaintext salt as-is. + derived_salt = salt + + # Decrypt the salt if a private key was provided. + if private_key is not None: + rsa_key = RSA.import_key(private_key.read_bytes() if isinstance(private_key, Path) else private_key, passphrase) + if not (derived_salt := PKCS1_v1_5.new(rsa_key).decrypt(salt, sentinel=None, expected_pt_len=16)): + raise ValueError("Failed to decrypt salt using provided private key") + + return PBKDF2(passphrase_hash, derived_salt, key_len, count) diff --git a/dissect/evidence/adcrypt/c_adcrypt.py b/dissect/evidence/adcrypt/c_adcrypt.py new file mode 100644 index 0000000..650be9e --- /dev/null +++ b/dissect/evidence/adcrypt/c_adcrypt.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +adcrypt_def = """ +#define ADCRYPT_MAGIC ADCRYPT\00 + +enum EncAlgo : uint32 { + AES128 = 0x1, + AES192 = 0x2, + AES256 = 0x3, +}; + +enum HashAlgo : uint32 { + SHA256 = 0x1, + SHA512 = 0x2, +}; + +struct Header { + char magic[8]; // b"ADCRYPT\x00" + uint32 version; // 0x01 + uint32 header_size; // 0x200 aka offset enc data + int16 num_passwords; // always -0x1 ? + int16 num_raw_keys; // always -0x1 ? + int16 num_certificates; // always -0x1 ? + char reserved[2]; // 00 00 + EncAlgo enc_algo; + HashAlgo hash_algo; + uint32 pbkdf2_count; + uint32 salt_len; + uint32 key_len; + uint32 hmac_len; + char enc_salt[salt_len]; + char enc_key[key_len]; + char hmac_enc_key[hmac_len]; + // padding until 0x200 +}; +""" + +c_adcrypt = cstruct().load(adcrypt_def) diff --git a/dissect/evidence/adcrypt/c_adcrypt.pyi b/dissect/evidence/adcrypt/c_adcrypt.pyi new file mode 100644 index 0000000..447fa79 --- /dev/null +++ b/dissect/evidence/adcrypt/c_adcrypt.pyi @@ -0,0 +1,58 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, TypeAlias, overload + +import dissect.cstruct as __cs__ + +class _c_adcrypt(__cs__.cstruct): + ADCRYPT_MAGIC: Literal["ADCRYPT\x00"] = ... + class EncAlgo(__cs__.Enum): + AES128 = ... + AES192 = ... + AES256 = ... + + class HashAlgo(__cs__.Enum): + SHA256 = ... + SHA512 = ... + + class Header(__cs__.Structure): + magic: __cs__.CharArray + version: _c_adcrypt.uint32 + header_size: _c_adcrypt.uint32 + num_passwords: _c_adcrypt.int16 + num_raw_keys: _c_adcrypt.int16 + num_certificates: _c_adcrypt.int16 + reserved: __cs__.CharArray + enc_algo: _c_adcrypt.EncAlgo + hash_algo: _c_adcrypt.HashAlgo + pbkdf2_count: _c_adcrypt.uint32 + salt_len: _c_adcrypt.uint32 + key_len: _c_adcrypt.uint32 + hmac_len: _c_adcrypt.uint32 + enc_salt: __cs__.CharArray + enc_key: __cs__.CharArray + hmac_enc_key: __cs__.CharArray + @overload + def __init__( + self, + magic: __cs__.CharArray | None = ..., + version: _c_adcrypt.uint32 | None = ..., + header_size: _c_adcrypt.uint32 | None = ..., + num_passwords: _c_adcrypt.int16 | None = ..., + num_raw_keys: _c_adcrypt.int16 | None = ..., + num_certificates: _c_adcrypt.int16 | None = ..., + reserved: __cs__.CharArray | None = ..., + enc_algo: _c_adcrypt.EncAlgo | None = ..., + hash_algo: _c_adcrypt.HashAlgo | None = ..., + pbkdf2_count: _c_adcrypt.uint32 | None = ..., + salt_len: _c_adcrypt.uint32 | None = ..., + key_len: _c_adcrypt.uint32 | None = ..., + hmac_len: _c_adcrypt.uint32 | None = ..., + enc_salt: __cs__.CharArray | None = ..., + enc_key: __cs__.CharArray | None = ..., + hmac_enc_key: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_adcrypt` is an instance of `_c_adcrypt`, but then we can't use it in type hints +c_adcrypt: TypeAlias = _c_adcrypt diff --git a/dissect/evidence/adcrypt/stream.py b/dissect/evidence/adcrypt/stream.py new file mode 100644 index 0000000..137bad4 --- /dev/null +++ b/dissect/evidence/adcrypt/stream.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import io +from typing import BinaryIO + +from dissect.util.stream import AlignedStream + +try: + from Crypto.Cipher import AES + from Crypto.Util import Counter + + HAS_CRYPTO = True +except ImportError: + HAS_CRYPTO = False + + +class ADCryptStream(AlignedStream): + def __init__(self, fh: BinaryIO, key: bytes, index: int): + if not HAS_CRYPTO: + raise RuntimeError("Missing required dependency 'pycryptodome' for ADCRYPT decryption") + + self.fh = fh + self.key = key + self.index = index + + self.fh.seek(0, io.SEEK_END) + size = self.fh.tell() - (512 if index == 0 else 0) # Skip ADCRYPT header + super().__init__(size) + + def _read(self, offset: int, length: int) -> bytes: + self.fh.seek(offset + (512 if self.index == 0 else 0)) # Skip ADCRYPT header + buf = self.fh.read(length) + + ctr = Counter.new( + 128, + initial_value=self.index << 64 | (offset // (128 // 8)), + little_endian=True, + ) + cipher = AES.new(self.key, AES.MODE_CTR, counter=ctr) + return cipher.decrypt(buf) diff --git a/dissect/evidence/asdf/asdf.py b/dissect/evidence/asdf/asdf.py index f160db8..7622316 100644 --- a/dissect/evidence/asdf/asdf.py +++ b/dissect/evidence/asdf/asdf.py @@ -11,12 +11,12 @@ from collections import defaultdict from typing import TYPE_CHECKING, BinaryIO -from dissect.cstruct import cstruct from dissect.util import ts from dissect.util.stream import AlignedStream, RangeStream -from dissect.evidence.asdf.streams import CompressedStream, Crc32Stream, HashedStream -from dissect.evidence.exceptions import ( +from dissect.evidence.asdf.c_asdf import c_asdf +from dissect.evidence.asdf.stream import CompressedStream, Crc32Stream, HashedStream +from dissect.evidence.exception import ( InvalidBlock, InvalidSnapshot, UnsupportedVersion, @@ -41,54 +41,6 @@ FOOTER_MAGIC = b"FT\xa5\xdf" SPARSE_BYTES = b"\xa5\xdf" -asdf_def = """ -flag FILE_FLAG : uint32 { - SHA256 = 0x01, -}; - -flag BLOCK_FLAG : uint8 { - CRC32 = 0x01, - COMPRESS = 0x02, -}; - -struct header { - char magic[4]; // File magic, must be "ASDF" - FILE_FLAG flags; // File flags - uint8 version; // File version - char reserved1[7]; // Reserved - uint64 timestamp; // Creation timestamp of the file - char reserved2[8]; // Reserved - char guid[16]; // GUID, should be unique per writer -}; - -struct block { - char magic[4]; // Block magic, must be "BL\\xa5\\xdf" - BLOCK_FLAG flags; // Block flags - uint8 idx; // Stream index, some reserved values have special meaning - char reserved[2]; // Reserved - uint64 offset; // Absolute offset of block in stream - uint64 size; // Size of block in stream -}; - -struct table_entry { - BLOCK_FLAG flags; // Block flags - uint8 idx; // Stream index, some reserved values have special meaning - char reserved[2]; // Reserved - uint64 offset; // Absolute offset of block in stream - uint64 size; // Size of block in stream - uint64 file_offset; // Absolute offset of block in file - uint64 file_size; // Size of block in file -}; - -struct footer { - char magic[4]; // Footer magic, must be "FT\\xa5\\xdf" - char reserved[4]; // Reserved - uint64 table_offset; // Offset in file to start of block table - char sha256[32]; // SHA256 of this file up until this hash -}; -""" -c_asdf = cstruct().load(asdf_def) - class AsdfWriter(io.RawIOBase): """ASDF file writer. diff --git a/dissect/evidence/asdf/c_asdf.py b/dissect/evidence/asdf/c_asdf.py new file mode 100644 index 0000000..8da56ce --- /dev/null +++ b/dissect/evidence/asdf/c_asdf.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +asdf_def = """ +flag FILE_FLAG : uint32 { + SHA256 = 0x01, +}; + +flag BLOCK_FLAG : uint8 { + CRC32 = 0x01, + COMPRESS = 0x02, +}; + +struct header { + char magic[4]; // File magic, must be "ASDF" + FILE_FLAG flags; // File flags + uint8 version; // File version + char reserved1[7]; // Reserved + uint64 timestamp; // Creation timestamp of the file + char reserved2[8]; // Reserved + char guid[16]; // GUID, should be unique per writer +}; + +struct block { + char magic[4]; // Block magic, must be "BL\\xa5\\xdf" + BLOCK_FLAG flags; // Block flags + uint8 idx; // Stream index, some reserved values have special meaning + char reserved[2]; // Reserved + uint64 offset; // Absolute offset of block in stream + uint64 size; // Size of block in stream +}; + +struct table_entry { + BLOCK_FLAG flags; // Block flags + uint8 idx; // Stream index, some reserved values have special meaning + char reserved[2]; // Reserved + uint64 offset; // Absolute offset of block in stream + uint64 size; // Size of block in stream + uint64 file_offset; // Absolute offset of block in file + uint64 file_size; // Size of block in file +}; + +struct footer { + char magic[4]; // Footer magic, must be "FT\\xa5\\xdf" + char reserved[4]; // Reserved + uint64 table_offset; // Offset in file to start of block table + char sha256[32]; // SHA256 of this file up until this hash +}; +""" + +c_asdf = cstruct().load(asdf_def) diff --git a/dissect/evidence/asdf/c_asdf.pyi b/dissect/evidence/asdf/c_asdf.pyi new file mode 100644 index 0000000..58c6f12 --- /dev/null +++ b/dissect/evidence/asdf/c_asdf.pyi @@ -0,0 +1,95 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, TypeAlias, overload + +import dissect.cstruct as __cs__ + +class _c_asdf(__cs__.cstruct): + class FILE_FLAG(__cs__.Flag): + SHA256 = ... + + class BLOCK_FLAG(__cs__.Flag): + CRC32 = ... + COMPRESS = ... + + class header(__cs__.Structure): + magic: __cs__.CharArray + flags: _c_asdf.FILE_FLAG + version: _c_asdf.uint8 + reserved1: __cs__.CharArray + timestamp: _c_asdf.uint64 + reserved2: __cs__.CharArray + guid: __cs__.CharArray + @overload + def __init__( + self, + magic: __cs__.CharArray | None = ..., + flags: _c_asdf.FILE_FLAG | None = ..., + version: _c_asdf.uint8 | None = ..., + reserved1: __cs__.CharArray | None = ..., + timestamp: _c_asdf.uint64 | None = ..., + reserved2: __cs__.CharArray | None = ..., + guid: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class block(__cs__.Structure): + magic: __cs__.CharArray + flags: _c_asdf.BLOCK_FLAG + idx: _c_asdf.uint8 + reserved: __cs__.CharArray + offset: _c_asdf.uint64 + size: _c_asdf.uint64 + @overload + def __init__( + self, + magic: __cs__.CharArray | None = ..., + flags: _c_asdf.BLOCK_FLAG | None = ..., + idx: _c_asdf.uint8 | None = ..., + reserved: __cs__.CharArray | None = ..., + offset: _c_asdf.uint64 | None = ..., + size: _c_asdf.uint64 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class table_entry(__cs__.Structure): + flags: _c_asdf.BLOCK_FLAG + idx: _c_asdf.uint8 + reserved: __cs__.CharArray + offset: _c_asdf.uint64 + size: _c_asdf.uint64 + file_offset: _c_asdf.uint64 + file_size: _c_asdf.uint64 + @overload + def __init__( + self, + flags: _c_asdf.BLOCK_FLAG | None = ..., + idx: _c_asdf.uint8 | None = ..., + reserved: __cs__.CharArray | None = ..., + offset: _c_asdf.uint64 | None = ..., + size: _c_asdf.uint64 | None = ..., + file_offset: _c_asdf.uint64 | None = ..., + file_size: _c_asdf.uint64 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class footer(__cs__.Structure): + magic: __cs__.CharArray + reserved: __cs__.CharArray + table_offset: _c_asdf.uint64 + sha256: __cs__.CharArray + @overload + def __init__( + self, + magic: __cs__.CharArray | None = ..., + reserved: __cs__.CharArray | None = ..., + table_offset: _c_asdf.uint64 | None = ..., + sha256: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_asdf` is an instance of `_c_asdf`, but then we can't use it in type hints +c_asdf: TypeAlias = _c_asdf diff --git a/dissect/evidence/asdf/streams.py b/dissect/evidence/asdf/stream.py similarity index 100% rename from dissect/evidence/asdf/streams.py rename to dissect/evidence/asdf/stream.py diff --git a/dissect/evidence/ewf/__init__.py b/dissect/evidence/ewf/__init__.py new file mode 100644 index 0000000..1be30e7 --- /dev/null +++ b/dissect/evidence/ewf/__init__.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from dissect.evidence.ewf.c_ewf import c_ewf +from dissect.evidence.ewf.ewf import ( + EWF, + EWFError, + EWFStream, + HeaderSection, + SectionDescriptor, + Segment, + TableSection, + VolumeSection, +) + +__all__ = [ + "EWF", + "EWFError", + "EWFStream", + "HeaderSection", + "SectionDescriptor", + "Segment", + "TableSection", + "VolumeSection", + "c_ewf", +] diff --git a/dissect/evidence/ewf/c_ewf.py b/dissect/evidence/ewf/c_ewf.py new file mode 100644 index 0000000..84c21ea --- /dev/null +++ b/dissect/evidence/ewf/c_ewf.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +ewf_def = """ +enum MediaType : uint8 { + Removable = 0x00, + Fixed = 0x01, + Optical = 0x03, + Logical = 0x0e, + RAM = 0x10 +}; + +enum MediaFlags : uint8 { + Image = 0x01, + Physical = 0x02, + Fastbloc = 0x04, + Tablaeu = 0x08 +}; + +enum CompressionLevel : uint8 { + None = 0x00, + Good = 0x01, + Best = 0x02 +}; + +typedef struct { + char signature[8]; + uint8 fields_start; + uint16 segment_number; + uint16 fields_end; +} EWFHeader; + +typedef struct { + char type[16]; + uint64 next; + uint64 size; + uint8 pad[40]; + uint32 checksum; +} EWFSectionDescriptor; + +typedef struct { + uint32 reserved_1; + uint32 chunk_count; + uint32 sector_count; + uint32 sector_size; + uint32 total_sector_count; + uint8 reserved[20]; + uint8 pad[45]; + char signature[5]; + uint32 checksum; +} EWFVolumeSectionSpec; + +typedef struct { + MediaType media_type; + uint8 reserved_1[3]; + uint32 chunk_count; + uint32 sector_count; + uint32 sector_size; + uint64 total_sector_count; + uint32 num_cylinders; + uint32 num_heads; + uint32 num_sectors; + uint8 media_flags; + uint8 unknown_1[3]; + uint32 palm_start_sector; + uint32 unknown_2; + uint32 smart_start_sector; + CompressionLevel compression_level; + uint8 unknown_3[3]; + uint32 error_granularity; + uint32 unknown_4; + uint8 uuid[16]; + uint8 pad[963]; + char signature[5]; + uint32 checksum; +} EWFVolumeSection; + +typedef struct { + uint32 num_entries; + uint32 _; + uint64 base_offset; + uint32 _; + uint32 checksum; + uint32 entries[num_entries]; +} EWFTableSection; +""" + +c_ewf = cstruct().load(ewf_def) diff --git a/dissect/evidence/ewf/c_ewf.pyi b/dissect/evidence/ewf/c_ewf.pyi new file mode 100644 index 0000000..f100653 --- /dev/null +++ b/dissect/evidence/ewf/c_ewf.pyi @@ -0,0 +1,156 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, TypeAlias, overload + +import dissect.cstruct as __cs__ + +class _c_ewf(__cs__.cstruct): + class MediaType(__cs__.Enum): + Removable = ... + Fixed = ... + Optical = ... + Logical = ... + RAM = ... + + class MediaFlags(__cs__.Enum): + Image = ... + Physical = ... + Fastbloc = ... + Tablaeu = ... + + class CompressionLevel(__cs__.Enum): + # None = ... + Good = ... + Best = ... + + class EWFHeader(__cs__.Structure): + signature: __cs__.CharArray + fields_start: _c_ewf.uint8 + segment_number: _c_ewf.uint16 + fields_end: _c_ewf.uint16 + @overload + def __init__( + self, + signature: __cs__.CharArray | None = ..., + fields_start: _c_ewf.uint8 | None = ..., + segment_number: _c_ewf.uint16 | None = ..., + fields_end: _c_ewf.uint16 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EWFSectionDescriptor(__cs__.Structure): + type: __cs__.CharArray + next: _c_ewf.uint64 + size: _c_ewf.uint64 + pad: __cs__.Array[_c_ewf.uint8] + checksum: _c_ewf.uint32 + @overload + def __init__( + self, + type: __cs__.CharArray | None = ..., + next: _c_ewf.uint64 | None = ..., + size: _c_ewf.uint64 | None = ..., + pad: __cs__.Array[_c_ewf.uint8] | None = ..., + checksum: _c_ewf.uint32 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EWFVolumeSectionSpec(__cs__.Structure): + reserved_1: _c_ewf.uint32 + chunk_count: _c_ewf.uint32 + sector_count: _c_ewf.uint32 + sector_size: _c_ewf.uint32 + total_sector_count: _c_ewf.uint32 + reserved: __cs__.Array[_c_ewf.uint8] + pad: __cs__.Array[_c_ewf.uint8] + signature: __cs__.CharArray + checksum: _c_ewf.uint32 + @overload + def __init__( + self, + reserved_1: _c_ewf.uint32 | None = ..., + chunk_count: _c_ewf.uint32 | None = ..., + sector_count: _c_ewf.uint32 | None = ..., + sector_size: _c_ewf.uint32 | None = ..., + total_sector_count: _c_ewf.uint32 | None = ..., + reserved: __cs__.Array[_c_ewf.uint8] | None = ..., + pad: __cs__.Array[_c_ewf.uint8] | None = ..., + signature: __cs__.CharArray | None = ..., + checksum: _c_ewf.uint32 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EWFVolumeSection(__cs__.Structure): + media_type: _c_ewf.MediaType + reserved_1: __cs__.Array[_c_ewf.uint8] + chunk_count: _c_ewf.uint32 + sector_count: _c_ewf.uint32 + sector_size: _c_ewf.uint32 + total_sector_count: _c_ewf.uint64 + num_cylinders: _c_ewf.uint32 + num_heads: _c_ewf.uint32 + num_sectors: _c_ewf.uint32 + media_flags: _c_ewf.uint8 + unknown_1: __cs__.Array[_c_ewf.uint8] + palm_start_sector: _c_ewf.uint32 + unknown_2: _c_ewf.uint32 + smart_start_sector: _c_ewf.uint32 + compression_level: _c_ewf.CompressionLevel + unknown_3: __cs__.Array[_c_ewf.uint8] + error_granularity: _c_ewf.uint32 + unknown_4: _c_ewf.uint32 + uuid: __cs__.Array[_c_ewf.uint8] + pad: __cs__.Array[_c_ewf.uint8] + signature: __cs__.CharArray + checksum: _c_ewf.uint32 + @overload + def __init__( + self, + media_type: _c_ewf.MediaType | None = ..., + reserved_1: __cs__.Array[_c_ewf.uint8] | None = ..., + chunk_count: _c_ewf.uint32 | None = ..., + sector_count: _c_ewf.uint32 | None = ..., + sector_size: _c_ewf.uint32 | None = ..., + total_sector_count: _c_ewf.uint64 | None = ..., + num_cylinders: _c_ewf.uint32 | None = ..., + num_heads: _c_ewf.uint32 | None = ..., + num_sectors: _c_ewf.uint32 | None = ..., + media_flags: _c_ewf.uint8 | None = ..., + unknown_1: __cs__.Array[_c_ewf.uint8] | None = ..., + palm_start_sector: _c_ewf.uint32 | None = ..., + unknown_2: _c_ewf.uint32 | None = ..., + smart_start_sector: _c_ewf.uint32 | None = ..., + compression_level: _c_ewf.CompressionLevel | None = ..., + unknown_3: __cs__.Array[_c_ewf.uint8] | None = ..., + error_granularity: _c_ewf.uint32 | None = ..., + unknown_4: _c_ewf.uint32 | None = ..., + uuid: __cs__.Array[_c_ewf.uint8] | None = ..., + pad: __cs__.Array[_c_ewf.uint8] | None = ..., + signature: __cs__.CharArray | None = ..., + checksum: _c_ewf.uint32 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EWFTableSection(__cs__.Structure): + num_entries: _c_ewf.uint32 + _: _c_ewf.uint32 + base_offset: _c_ewf.uint64 + checksum: _c_ewf.uint32 + entries: __cs__.Array[_c_ewf.uint32] + @overload + def __init__( + self, + num_entries: _c_ewf.uint32 | None = ..., + _: _c_ewf.uint32 | None = ..., + base_offset: _c_ewf.uint64 | None = ..., + checksum: _c_ewf.uint32 | None = ..., + entries: __cs__.Array[_c_ewf.uint32] | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_ewf` is an instance of `_c_ewf`, but then we can't use it in type hints +c_ewf: TypeAlias = _c_ewf diff --git a/dissect/evidence/ewf.py b/dissect/evidence/ewf/ewf.py similarity index 83% rename from dissect/evidence/ewf.py rename to dissect/evidence/ewf/ewf.py index a60fed2..e02ae41 100644 --- a/dissect/evidence/ewf.py +++ b/dissect/evidence/ewf/ewf.py @@ -8,99 +8,14 @@ from pathlib import Path from typing import BinaryIO -from dissect.cstruct import cstruct from dissect.util.stream import AlignedStream -from dissect.evidence.exceptions import EWFError +from dissect.evidence.ewf import c_ewf +from dissect.evidence.exception import EWFError log = logging.getLogger(__name__) log.setLevel(os.getenv("DISSECT_LOG_EWF", "CRITICAL")) -ewf_def = """ -enum MediaType : uint8 { - Removable = 0x00, - Fixed = 0x01, - Optical = 0x03, - Logical = 0x0e, - RAM = 0x10 -}; - -enum MediaFlags : uint8 { - Image = 0x01, - Physical = 0x02, - Fastbloc = 0x04, - Tablaeu = 0x08 -}; - -enum CompressionLevel : uint8 { - None = 0x00, - Good = 0x01, - Best = 0x02 -}; - -typedef struct { - char signature[8]; - uint8 fields_start; - uint16 segment_number; - uint16 fields_end; -} EWFHeader; - -typedef struct { - char type[16]; - uint64 next; - uint64 size; - uint8 pad[40]; - uint32 checksum; -} EWFSectionDescriptor; - -typedef struct { - uint32 reserved_1; - uint32 chunk_count; - uint32 sector_count; - uint32 sector_size; - uint32 total_sector_count; - uint8 reserved[20]; - uint8 pad[45]; - char signature[5]; - uint32 checksum; -} EWFVolumeSectionSpec; - -typedef struct { - MediaType media_type; - uint8 reserved_1[3]; - uint32 chunk_count; - uint32 sector_count; - uint32 sector_size; - uint64 total_sector_count; - uint32 num_cylinders; - uint32 num_heads; - uint32 num_sectors; - uint8 media_flags; - uint8 unknown_1[3]; - uint32 palm_start_sector; - uint32 unknown_2; - uint32 smart_start_sector; - CompressionLevel compression_level; - uint8 unknown_3[3]; - uint32 error_granularity; - uint32 unknown_4; - uint8 uuid[16]; - uint8 pad[963]; - char signature[5]; - uint32 checksum; -} EWFVolumeSection; - -typedef struct { - uint32 num_entries; - uint32 _; - uint64 base_offset; - uint32 _; - uint32 checksum; - uint32 entries[num_entries]; -} EWFTableSection; -""" - -c_ewf = cstruct().load(ewf_def) MAX_OPEN_SEGMENTS = 128 @@ -124,7 +39,7 @@ def find_files(path: str | Path) -> list[Path]: class EWF: """Expert Witness Disk Image Format.""" - def __init__(self, fh: BinaryIO | list[BinaryIO]): + def __init__(self, fh: BinaryIO | list[BinaryIO] | Path | list[Path]): fhs = [fh] if not isinstance(fh, list) else fh self.fh = fhs @@ -138,7 +53,7 @@ def __init__(self, fh: BinaryIO | list[BinaryIO]): for i in range(len(fhs)): try: - segment = self.open_segment(i) + segment = self.segment(i) except Exception: log.exception("Failed to parse as EWF file: %s", fh) continue @@ -162,12 +77,12 @@ def __init__(self, fh: BinaryIO | list[BinaryIO]): self.chunk_size = self.volume.sector_count * self.volume.sector_size max_size = self.volume.chunk_count * self.volume.sector_count * self.volume.sector_size - last_table = self.open_segment(len(self.fh) - 1).tables[-1] + last_table = self.segment(len(self.fh) - 1).tables[-1] last_chunk_size = len(last_table.read_chunk(last_table.num_entries - 1)) self.size = max_size - (self.chunk_size - last_chunk_size) - def open_segment(self, idx: int) -> Segment: + def segment(self, idx: int) -> Segment: # Poor mans LRU if idx in self._segments: self._segment_lru.remove(idx) @@ -226,7 +141,7 @@ def _read(self, offset: int, length: int) -> bytes: if segment_idx > len(self.ewf._segment_offsets): raise EWFError(f"Missing EWF file for segment index: {segment_idx}") - segment = self.ewf.open_segment(segment_idx) + segment = self.ewf.segment(segment_idx) segment_remaining_sectors = segment.sector_count - (sector_offset - segment.sector_offset) segment_sectors = min(segment_remaining_sectors, sector_count) diff --git a/dissect/evidence/exceptions.py b/dissect/evidence/exception.py similarity index 61% rename from dissect/evidence/exceptions.py rename to dissect/evidence/exception.py index dd8bc59..50130b0 100644 --- a/dissect/evidence/exceptions.py +++ b/dissect/evidence/exception.py @@ -1,6 +1,21 @@ class Error(Exception): - """Base class for exceptions for this module. - It is used to recognize errors specific to this module""" + pass + + +class FileNotFoundError(Error, FileNotFoundError): + pass + + +class IsADirectoryError(Error, IsADirectoryError): + pass + + +class NotADirectoryError(Error, NotADirectoryError): + pass + + +class NotASymlinkError(Error): + pass class EWFError(Error): diff --git a/dissect/evidence/tools/adcrypt.py b/dissect/evidence/tools/adcrypt.py new file mode 100644 index 0000000..189df3e --- /dev/null +++ b/dissect/evidence/tools/adcrypt.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import argparse +import logging +import shutil +from pathlib import Path + +from dissect.evidence.ad1.ad1 import find_files as find_ad1_files +from dissect.evidence.adcrypt.adcrypt import ADCrypt, is_adcrypt +from dissect.evidence.ewf.ewf import find_files as find_ewf_files +from dissect.evidence.tools.util import catch_sigpipe + +logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]\t%(message)s") +log = logging.getLogger(__name__) + + +@catch_sigpipe +def main() -> None: + help_formatter = argparse.ArgumentDefaultsHelpFormatter + parser = argparse.ArgumentParser( + prog="adcrypt", + description="Decrypt E01 or AD1 ADCRYPT encrypted segment files.", + fromfile_prefix_chars="@", + formatter_class=help_formatter, + ) + + parser.add_argument("input", type=Path, help="path to encrypted file") + parser.add_argument("-p", "--passphrase", type=str, help="user passphrase or certificate passphrase") + parser.add_argument("-c", "--certificate", type=Path, help="user certificate") + parser.add_argument("-o", "--output", type=Path, required=True, help="path to output directory") + + args = parser.parse_args() + + in_path: Path = args.input.resolve() + out_path: Path = args.output.resolve() + + if not in_path.exists(): + parser.exit(f"Input file does not exist: {in_path}") + + if not out_path.is_dir(): + parser.exit(f"Output directory does not exist: {out_path}") + + if in_path.parent == out_path: + parser.exit("Output directory cannot be the same as the input file directory") + + if not args.passphrase and not args.certificate: + parser.exit("No passphrase or certificate provided") + + segments = find_ad1_files(in_path) if in_path.suffix.lower() == ".ad1" else find_ewf_files(in_path) + if not segments: + parser.exit(f"No AD1 or E01 segment files found at: {in_path}") + + with segments[0].open("rb") as fh: + if not is_adcrypt(fh): + parser.exit(f"File is not an ADCRYPT container: {segments[0]}") + + adcrypt = ADCrypt(fh) + + try: + adcrypt.unlock(passphrase=args.passphrase, private_key=args.certificate) + except (ValueError, TypeError) as e: + log.exception(e, exc_info=False) # noqa: TRY401 + parser.exit(1) + + for i, segment in enumerate(segments): + with segment.open("rb") as fh_in, out_path.joinpath(segment.name).open("wb") as fh_out: + fh_crypt = adcrypt.wrap(fh_in, index=i) + log.info("Decrypting segment file %r (%s MB) ..", segment.name, fh_crypt.size // 1024 // 1024) + shutil.copyfileobj(fh_crypt, fh_out) + + log.info("Finished decrypting file(s), result saved to %s", out_path) diff --git a/dissect/evidence/tools/asdf/repair.py b/dissect/evidence/tools/asdf/repair.py index 096e774..e534ec7 100644 --- a/dissect/evidence/tools/asdf/repair.py +++ b/dissect/evidence/tools/asdf/repair.py @@ -8,7 +8,7 @@ from pathlib import Path from dissect.evidence.asdf import asdf -from dissect.evidence.asdf.streams import HashedStream +from dissect.evidence.asdf.stream import HashedStream def main() -> int: diff --git a/dissect/evidence/tools/util.py b/dissect/evidence/tools/util.py new file mode 100644 index 0000000..d62e1aa --- /dev/null +++ b/dissect/evidence/tools/util.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import errno +import os +import sys +from functools import wraps +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable + + +def catch_sigpipe(func: Callable) -> Callable: + """Catches ``KeyboardInterrupt`` and ``BrokenPipeError`` (``OSError 22`` on Windows).""" + + @wraps(func) + def wrapper(*args, **kwargs) -> int: + try: + return func(*args, **kwargs) + except KeyboardInterrupt: + print("Aborted!", file=sys.stderr) + return 1 + except OSError as e: + # Only catch BrokenPipeError or OSError 22 + if e.errno in (errno.EPIPE, errno.EINVAL): + devnull = os.open(os.devnull, os.O_WRONLY) + os.dup2(devnull, sys.stdout.fileno()) + return 1 + # Raise other exceptions + raise + + return wrapper diff --git a/pyproject.toml b/pyproject.toml index 25e573f..9a09d58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,9 @@ documentation = "https://docs.dissect.tools/en/latest/projects/dissect.evidence" repository = "https://github.com/fox-it/dissect.evidence" [project.optional-dependencies] +full = [ + "pycryptodome", +] dev = [ "dissect.cstruct>=4.0.dev,<5.0.dev", "dissect.util>=3.0.dev,<4.0.dev", @@ -44,6 +47,7 @@ dev = [ [dependency-groups] test = [ "pytest", + "pycryptodome", ] lint = [ "ruff==0.13.1", @@ -67,6 +71,8 @@ asdf-meta = "dissect.evidence.tools.asdf.meta:main" asdf-repair = "dissect.evidence.tools.asdf.repair:main" asdf-verify = "dissect.evidence.tools.asdf.verify:main" +adcrypt = "dissect.evidence.tools.adcrypt:main" + [tool.ruff] line-length = 120 required-version = ">=0.13.1" @@ -112,7 +118,8 @@ select = [ ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003"] [tool.ruff.lint.per-file-ignores] -"tests/docs/**" = ["INP001"] +"tests/_docs/**" = ["INP001"] +"*.pyi" = ["E", "F", "PYI"] [tool.ruff.lint.isort] known-first-party = ["dissect.evidence"] diff --git a/tests/_data/ad1/compressed.ad1 b/tests/_data/ad1/compressed.ad1 new file mode 100644 index 0000000..96c93c4 --- /dev/null +++ b/tests/_data/ad1/compressed.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d88b6186b732dd7be752df52ed863bd9d2c273b1c8b2b3520e9032bfa1018a7c +size 2197 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad1 b/tests/_data/ad1/encrypted-certificate/encrypted.ad1 new file mode 100644 index 0000000..e1db39c --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8b641de1a8490bd8dac6dab4b5252358d39ffd07ad71091581efe65e134091 +size 10486272 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad1.csv b/tests/_data/ad1/encrypted-certificate/encrypted.ad1.csv new file mode 100644 index 0000000..9e6a4f5 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad1.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c103df1e0fd8809e542902bec8bd47c139a953a7b1052f985c3bc393321ffb +size 5346 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad1.txt b/tests/_data/ad1/encrypted-certificate/encrypted.ad1.txt new file mode 100644 index 0000000..b9a2011 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad1.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905b94bfb16a1a2abbcc84b6d096ed7d6543024bf420012c810b3c8e9e4df384 +size 1333 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad2 b/tests/_data/ad1/encrypted-certificate/encrypted.ad2 new file mode 100644 index 0000000..0e29206 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b55aa38b1309fa0919d886be0785d8732611a81b706e5e56278dda2c2c733e9a +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad3 b/tests/_data/ad1/encrypted-certificate/encrypted.ad3 new file mode 100644 index 0000000..b72aae7 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07cb83a405d770a55ae1822200d4a97c17825555e98efe46c9aef3a81d4b2471 +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad4 b/tests/_data/ad1/encrypted-certificate/encrypted.ad4 new file mode 100644 index 0000000..dfc462c --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00e980bc5874e0d04931bec2966f7972d0685b99dddda81b91a3e1924e10958 +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad5 b/tests/_data/ad1/encrypted-certificate/encrypted.ad5 new file mode 100644 index 0000000..5f6d8ad --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4094dc5068809feffa4a76726f559c0dc2ab83be0caf999b4dd01e55fec891a +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad6 b/tests/_data/ad1/encrypted-certificate/encrypted.ad6 new file mode 100644 index 0000000..eea05c2 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad6 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6ca8b068dc011bdce2b5d95e6773e1370c717cee6378102fe41a7ee9f81d27 +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad7 b/tests/_data/ad1/encrypted-certificate/encrypted.ad7 new file mode 100644 index 0000000..e20baa7 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad7 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ab919a239e060a95b0b7ae07b211d4141c3574c6da19054e4e135dd5492397 +size 4879306 diff --git a/tests/_data/ad1/encrypted-certificate/key b/tests/_data/ad1/encrypted-certificate/key new file mode 100644 index 0000000..55ec2fb --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/key @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25ae23283b944d75442d3e68251965360f332d90be0ff4a6e705f14172fb3e5 +size 1679 diff --git a/tests/_data/ad1/encrypted-certificate/key.pub b/tests/_data/ad1/encrypted-certificate/key.pub new file mode 100644 index 0000000..25c9382 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/key.pub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2726fe4fe1b5f94c427d320bcc5800c8ada22b5c68172477decbe04af79d6e9d +size 451 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad1 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1 new file mode 100644 index 0000000..a880bd8 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8126f55a545935a465a3b632bbced287b2843fae2a5f398c48d8a98e1bdbd26a +size 5243392 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.csv b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.csv new file mode 100644 index 0000000..fc1c322 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b35e282a4e3779daf0064ea2cc8151ce5595b4785553d9cae41c80cb88b39bc7 +size 4442 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.txt b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.txt new file mode 100644 index 0000000..5092ebd --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f365b39abd294b05f970a257bd5ce139422db8413b1aa160f4d396125c596756 +size 1591 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad10 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad10 new file mode 100644 index 0000000..2b6b155 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad10 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd03b931de0bc28660aa2ce48837483ec11d84ee40bacbaabc8128e1d10e6227 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad11 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad11 new file mode 100644 index 0000000..8ddd568 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad11 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5f556af11969a548d7885582424d10aa9c4419f59f5a82430c535df196fd78 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad12 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad12 new file mode 100644 index 0000000..d333422 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad12 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb8d89a1fc55d10afafeb4e547d950b24a21e299108bc3495c5efc66b81d516 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad13 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad13 new file mode 100644 index 0000000..8f9c02d --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad13 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79d0afc7c4c1be7d538caea99fcc076e8f70f6dd7f1fa7db8594d508ced8156 +size 4879997 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad2 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad2 new file mode 100644 index 0000000..e8a7ca4 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11fc38388348b8634a37d18076630b3b76b19ff06a6598cffe8ea8c6945ed53 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad3 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad3 new file mode 100644 index 0000000..8664b80 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2dc33da7512f67d3328c9fa324560e191bbb8e1d9e2d33b8ffcb7972715771 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad4 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad4 new file mode 100644 index 0000000..43437b3 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c407b3c6efb7b52f47a3c481c3386b2b9581cdd8bbde3bd239212199c68cd221 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad5 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad5 new file mode 100644 index 0000000..1cc85cf --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5486f180f12f40d608615acb61bcca4b707ecfb63786059eec6d43d37bcf8f +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad6 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad6 new file mode 100644 index 0000000..08bb7f9 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad6 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98843957581e49c0a11d946d01001eb34e4a733b2abfa6ed15f4af5f0e4ab5e0 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad7 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad7 new file mode 100644 index 0000000..d977c17 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad7 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37671e847c1f69a284e338c27c7003907f3b25720c22b77bb744c0d32b3c8520 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad8 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad8 new file mode 100644 index 0000000..2253c79 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad8 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25e381827e9ce27902951453d78e0e9b315b539efa6da9f9411f76c4e5d1530 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad9 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad9 new file mode 100644 index 0000000..1227e24 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad9 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f15ca2877bb31870c68cf4997565a33150d0ceaed2aee731aaff72b24f74681 +size 5242880 diff --git a/tests/_data/ad1/long.ad1 b/tests/_data/ad1/long.ad1 new file mode 100644 index 0000000..2f7f46f --- /dev/null +++ b/tests/_data/ad1/long.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1245a140cfd79870781080d74aeec2f90c9b4530b2ac12e9a3b77c6015262b0f +size 2554 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad1 b/tests/_data/ad1/pcbje/text-and-pictures.ad1 new file mode 100644 index 0000000..434078e --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b48affafe6826f226bb4b3e0c97add2bc8766a6740ad992001515767d955ff8d +size 2097152 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad1.txt b/tests/_data/ad1/pcbje/text-and-pictures.ad1.txt new file mode 100644 index 0000000..2b8ed4a --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad1.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24301f28955b835630b6ba7c026741b6bba307a1f6377ae567c9d4e230d26a93 +size 1051 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad2 b/tests/_data/ad1/pcbje/text-and-pictures.ad2 new file mode 100644 index 0000000..c381c10 --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb53246dec28cf699f68656233138dc7842d789ca2aed7c712b281f19cbb062 +size 2097152 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad3 b/tests/_data/ad1/pcbje/text-and-pictures.ad3 new file mode 100644 index 0000000..0a0754b --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262db84b9d479b6e7ff1e68aafb89739ea55105f03225ef1d69298c72472d05b +size 2097152 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad4 b/tests/_data/ad1/pcbje/text-and-pictures.ad4 new file mode 100644 index 0000000..359f06b --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50791bbb4a8bc374f386d6dbad702a153fa217df0df3b91b73bbf0a960ab8dd +size 1429862 diff --git a/tests/_data/ad1/test.ad1 b/tests/_data/ad1/test.ad1 new file mode 100644 index 0000000..66b0b05 --- /dev/null +++ b/tests/_data/ad1/test.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7b2a1b296a75590fd3f31d2d595cdad6c2442c2f394251c57506e9c488481a +size 2264 diff --git a/tests/_data/ewf/ewf.E01 b/tests/_data/ewf/ewf.E01 new file mode 100644 index 0000000..964a591 --- /dev/null +++ b/tests/_data/ewf/ewf.E01 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9b150a1f40024c7b0c3cf3c09cf809a8636419cab5f55ab4d4f78c918c1e082 +size 7630 diff --git a/tests/docs/Makefile b/tests/_docs/Makefile similarity index 100% rename from tests/docs/Makefile rename to tests/_docs/Makefile diff --git a/dissect/evidence/aff4.py b/tests/_docs/__init__.py similarity index 100% rename from dissect/evidence/aff4.py rename to tests/_docs/__init__.py diff --git a/tests/docs/conf.py b/tests/_docs/conf.py similarity index 83% rename from tests/docs/conf.py rename to tests/_docs/conf.py index 7ef62d3..49e2f26 100644 --- a/tests/docs/conf.py +++ b/tests/_docs/conf.py @@ -1,3 +1,5 @@ +project = "dissect.evidence" + extensions = [ "autoapi.extension", "sphinx.ext.autodoc", @@ -32,3 +34,8 @@ autodoc_member_order = "groupwise" autosectionlabel_prefix_document = True + +suppress_warnings = [ + # https://github.com/readthedocs/sphinx-autoapi/issues/285 + "autoapi.python_import_resolution", +] diff --git a/tests/docs/index.rst b/tests/_docs/index.rst similarity index 100% rename from tests/docs/index.rst rename to tests/_docs/index.rst diff --git a/tests/_utils.py b/tests/_utils.py new file mode 100644 index 0000000..7ace382 --- /dev/null +++ b/tests/_utils.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from pathlib import Path + + +def absolute_path(filename: str) -> Path: + return Path(__file__).parent.joinpath(filename).resolve() diff --git a/tests/conftest.py b/tests/conftest.py index c8a7ea4..3cfa66f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ import pytest from dissect.evidence.asdf import AsdfWriter +from tests._utils import absolute_path if TYPE_CHECKING: from collections.abc import Iterator @@ -18,23 +19,65 @@ def open_data(name: str) -> Iterator[BinaryIO]: @pytest.fixture -def ad1_data() -> Iterator[BinaryIO]: - yield from open_data("data/ad1_test.ad1") +def ad1_basic() -> Iterator[BinaryIO]: + yield from open_data("_data/ad1/test.ad1") @pytest.fixture -def ad1_data_long() -> Iterator[BinaryIO]: - yield from open_data("data/ad1_long.ad1") +def ad1_long() -> Iterator[BinaryIO]: + yield from open_data("_data/ad1/long.ad1") @pytest.fixture -def ad1_data_compressed() -> Iterator[BinaryIO]: - yield from open_data("data/ad1_test_compressed.ad1") +def ad1_compressed() -> Iterator[BinaryIO]: + yield from open_data("_data/ad1/compressed.ad1") + + +@pytest.fixture +def ad1_segmented() -> list[Path]: + return [ + absolute_path("_data/ad1/pcbje/text-and-pictures.ad1"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad2"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad3"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad4"), + ] + + +@pytest.fixture +def ad1_encrypted_passphrase() -> list[Path]: + return [ + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad1"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad2"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad3"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad4"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad5"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad6"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad7"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad8"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad9"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad10"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad11"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad12"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad13"), + ] + + +@pytest.fixture +def ad1_encrypted_certificate() -> list[Path]: + return [ + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad1"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad2"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad3"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad4"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad5"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad6"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad7"), + ] @pytest.fixture def ewf_data() -> Iterator[BinaryIO]: - yield from open_data("data/ewf.E01") + yield from open_data("_data/ewf/ewf.E01") @pytest.fixture diff --git a/tests/data/ad1_long.ad1 b/tests/data/ad1_long.ad1 deleted file mode 100644 index db49960..0000000 Binary files a/tests/data/ad1_long.ad1 and /dev/null differ diff --git a/tests/data/ad1_test.ad1 b/tests/data/ad1_test.ad1 deleted file mode 100644 index c22486c..0000000 Binary files a/tests/data/ad1_test.ad1 and /dev/null differ diff --git a/tests/data/ad1_test_compressed.ad1 b/tests/data/ad1_test_compressed.ad1 deleted file mode 100644 index 7baa504..0000000 Binary files a/tests/data/ad1_test_compressed.ad1 and /dev/null differ diff --git a/tests/data/ewf.E01 b/tests/data/ewf.E01 deleted file mode 100644 index ba270b1..0000000 Binary files a/tests/data/ewf.E01 and /dev/null differ diff --git a/tests/test_ad1.py b/tests/test_ad1.py index 97ed03b..0393fc0 100644 --- a/tests/test_ad1.py +++ b/tests/test_ad1.py @@ -1,30 +1,56 @@ from __future__ import annotations import hashlib -from typing import BinaryIO +from datetime import datetime, timezone +from typing import TYPE_CHECKING, BinaryIO -from dissect.evidence import ad1 +import pytest +from dissect.evidence.ad1 import ad1 +from dissect.evidence.ad1.ad1 import EntryType, find_files +from tests._utils import absolute_path -def test_ad1(ad1_data: BinaryIO) -> None: - a = ad1.AD1(ad1_data) +if TYPE_CHECKING: + from pathlib import Path - assert a.header.magic == b"ADSEGMENTEDFILE\x00" - assert a.root.name == b"E:\\AD1_test" - assert len(a.root.children) == 2 - assert a.root.children[0].name == b"doc1.txt" - assert a.root.children[0].open().read() == b"Inhoud document 1" +def test_ad1(ad1_basic: BinaryIO) -> None: + """Test if we can parse a basic non-segmented AD1 file with no file hierarchy.""" -def test_ad1_long(ad1_data_long: BinaryIO) -> None: - a = ad1.AD1(ad1_data_long) + fs = ad1.AD1(ad1_basic) + assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" - assert a.header.magic == b"ADSEGMENTEDFILE\x00" - assert a.root.name == b"E:\\testdatamap 2 met spaties en een heel stuk langer" - assert len(a.root.children) == 2 + assert fs.root.is_dir() + assert fs.root.listdir() == ["E:"] - entry = a.root.children[0] - assert entry.name == b"een lange filenaam 1 met spaties.txt" + file = fs.get("E:/AD1_test/doc1.txt") + assert file.is_file() + assert file.size == 17 + assert file.atime == datetime(2017, 3, 31, 18, 2, 31, 189682, tzinfo=timezone.utc) + assert file.open().read() == b"Inhoud document 1" + + +def test_ad1_long(ad1_long: BinaryIO) -> None: + """Test if we can parse a basic non-segmented AD1 file with long file names.""" + + fs = ad1.AD1(ad1_long) + + assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" + assert fs.root.is_dir() + + assert [file.name for file in fs.root.children] == ["E:"] + + assert [file.name for file in fs.get("E:").children] == [ + "testdatamap 2 met spaties en een heel stuk langer", + ] + + assert [file.name for file in fs.get("E:/testdatamap 2 met spaties en een heel stuk langer").iterdir()] == [ + "een lange filenaam 1 met spaties.txt", + "Een nog langere bestandsnaam met nog meer tekens en 12345.txt", + ] + + entry = fs.get("E:/testdatamap 2 met spaties en een heel stuk langer").children[0] + assert entry.name == "een lange filenaam 1 met spaties.txt" assert entry.open().read() == ( b"masdhdslkfjasdfjlksadjflkjsda;lfj\r\nasdflk\r\na;lsdkf\r\n" b";lasdklf;lkasd\r\n;lk\r\nfask;ldkf\r\n;lka\r\nsd;lkf\r\n" @@ -33,10 +59,202 @@ def test_ad1_long(ad1_data_long: BinaryIO) -> None: b"'g'asldjg';askg\r\nkqe\r\n-[" ) md5sum = hashlib.md5(entry.open().read()) - assert md5sum.hexdigest().encode() == next(meta for meta in entry.meta if meta.type == ad1.MetaType.MD5).data + assert md5sum.hexdigest() == entry.md5 + + +def test_ad1_compressed(ad1_compressed: BinaryIO) -> None: + """Test if we can parse a non-segmented AD1 file with standard zlib compression.""" + + fs = ad1.AD1(ad1_compressed) + + assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" + + assert fs.get("/").listdir() == ["E:"] + assert fs.get("E:/AD1_test").listdir() == ["doc1.txt", "doc2.txt"] + assert fs.get("E:/AD1_test/doc1.txt").open().read() == b"Inhoud document 1" + + +@pytest.mark.parametrize( + ("path", "expected_files"), + [ + pytest.param( + "_data/ad1/pcbje/text-and-pictures.ad1", + [ + "text-and-pictures.ad1", + "text-and-pictures.ad2", + "text-and-pictures.ad3", + "text-and-pictures.ad4", + ], + id="segmented-simple", + ), + pytest.param( + "_data/ad1/encrypted-passphrase/encrypted.ad1", + [ + "encrypted.ad1", + "encrypted.ad2", + "encrypted.ad3", + "encrypted.ad4", + "encrypted.ad5", + "encrypted.ad6", + "encrypted.ad7", + "encrypted.ad8", + "encrypted.ad9", + "encrypted.ad10", + "encrypted.ad11", + "encrypted.ad12", + "encrypted.ad13", + ], + id="segmented-natural-sorting", + ), + ], +) +def test_ad1_find_files(path: str, expected_files: list[str]) -> None: + """Test if we correctly find and order segmented AD1 files and do not find .txt or .csv artifact files.""" + + files = find_files(absolute_path(path)) + assert [file.name for file in files] == expected_files + + +def test_ad1_segmented(ad1_segmented: list[Path]) -> None: + """Test if we can parse segmented AD1 files. + + References: + - https://github.com/pcbje/pyad1/tree/master/test_data + """ + + fs = ad1.AD1(ad1_segmented) + + assert len(fs.fh) == 4 + assert fs.segment(0).number == 1 + assert fs.segment(0).count == 4 + assert fs.segment(0).size == 0x200000 - 512 + + assert fs.logical_image.version == 4 + assert fs.logical_image.name == "C:\\Users\\pcbje\\Desktop\\Data" + + dir = fs.get("C:/Users/pcbje/Desktop/Data/Pictures") + assert dir.is_dir() + assert not dir.is_symlink() + assert not dir.is_file() + assert dir.name == "Pictures" + assert dir.type == EntryType.Directory + assert dir.size == 0 + assert dir.btime == datetime(2018, 5, 2, 7, 34, 11, 284926, tzinfo=timezone.utc) + assert dir.mtime == datetime(2018, 5, 2, 7, 42, 39, 841574, tzinfo=timezone.utc) + assert dir.ctime == datetime(2018, 5, 2, 7, 42, 39, 841574, tzinfo=timezone.utc) + assert dir.atime == datetime(2018, 5, 2, 7, 46, 59, 164650, tzinfo=timezone.utc) + assert dir.listdir() == [ + "0-0-581-Hydrangeas.jpg", + "1-0-858-Chrysanthemum.jpg", + "2-0-826-Desert.jpg", + "4-0-757-Jellyfish.jpg", + "5-0-762-Koala.jpg", + "6-0-548-Lighthouse.jpg", + "7-0-759-Penguins.jpg", + ] + + picture = fs.get("C:/Users/pcbje/Desktop/Data/Pictures/5-0-762-Koala.jpg") + assert picture.is_file() + assert not picture.is_dir() + assert not picture.is_symlink() + assert picture.btime == datetime(2018, 1, 28, 7, 18, 0, tzinfo=timezone.utc) + assert picture.mtime == datetime(2018, 5, 2, 7, 42, 34, 287014, tzinfo=timezone.utc) + assert picture.ctime == datetime(2018, 5, 2, 7, 42, 34, 287014, tzinfo=timezone.utc) + assert picture.atime == datetime(2018, 5, 2, 7, 42, 35, 611785, tzinfo=timezone.utc) + + buf = picture.open().read() + assert picture.name == "5-0-762-Koala.jpg" + assert picture.size == 780831 + assert len(buf) == 780831 + assert picture.sha1 == "9c3dcb1f9185a314ea25d51aed3b5881b32f420c" + assert hashlib.sha1(buf).hexdigest() == "9c3dcb1f9185a314ea25d51aed3b5881b32f420c" + + +def test_adcrypt_passphrase(ad1_encrypted_passphrase: list[Path]) -> None: + """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" + fs = ad1.AD1(ad1_encrypted_passphrase) + + assert fs.is_adcrypt() + assert fs.is_locked() + + with pytest.raises(ValueError, match="AD1 container is locked by ADCRYPT"): + fs.get("/") + + with pytest.raises(ValueError, match="Unable to unlock: HMAC verification of passphrase failed"): + fs.unlock(passphrase="asdf") + + fs.unlock(passphrase="password") + + assert fs.adcrypt.key.hex() == "9030a43f29689a045e815cf4f0ad82b68850063b414f2797f0897e188f98d7b4" + + assert fs.get("C:/Users/User/Downloads").listdir() == [ + "7z2501-x64.exe", + "desktop.ini", + "Exterro_FTK_Imager_(x64)-4.7.3.81.exe", + "hans-veth-8y--BAFlC9c-unsplash.jpg", + "marc-olivier-jodoin-tauPAnOIGvE-unsplash.jpg", + "marek-szturc-8Ou3EZmTMWA-unsplash.jpg", + "milo-weiler-1AIYdIb3O5M-unsplash.jpg", + ] + + for file in fs.get("C:/Users/User/Downloads").iterdir(): + buf = file.open().read() + assert len(buf) == file.size + assert hashlib.sha1(buf).hexdigest() == file.sha1 + + +def test_adcrypt_certificate(ad1_encrypted_certificate: list[Path]) -> None: + """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" + fs = ad1.AD1(ad1_encrypted_certificate) + + assert fs.is_adcrypt() + assert fs.is_locked() + + with pytest.raises(ValueError, match="AD1 container is locked by ADCRYPT"): + fs.get("/") + + with pytest.raises(ValueError, match="Unable to unlock: HMAC verification of passphrase failed"): + fs.unlock(passphrase="asdf") + + fs.unlock(private_key=absolute_path("_data/ad1/encrypted-certificate/key")) + + assert fs.adcrypt.key.hex() == "6cc0a9f94f944381cc51be474e5da6178059324bb457a87e0035b80f80ff9d4b" + + assert fs.get("C:/Users/User/Downloads").listdir() == [ + "desktop.ini", + "hans-veth-8y--BAFlC9c-unsplash.jpg", + "key.pem", + "marc-olivier-jodoin-tauPAnOIGvE-unsplash.jpg", + "marek-szturc-8Ou3EZmTMWA-unsplash.jpg", + "milo-weiler-1AIYdIb3O5M-unsplash.jpg", + "programs", + ] + + for file in fs.get("C:/Users/User/Downloads").iterdir(): + if file.is_dir(): + continue + + buf = file.open().read() + assert len(buf) == file.size + assert hashlib.sha1(buf).hexdigest() == file.sha1 + + assert fs.get("C:/Users/User/Downloads/programs").listdir() == [ + "7z2501-x64.exe", + "Exterro_FTK_Imager_(x64)-4.7.3.81.exe", + ] + + +def test_ad1_segment_lru(ad1_segmented: list[Path], monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(ad1, "MAX_OPEN_SEGMENTS", 2) + + fs = ad1.AD1(ad1_segmented) + assert fs._segment_lru == [3, 0] + fs.segment(2) + assert fs._segment_lru == [0, 2] -def test_ad1_compressed(ad1_data_compressed: BinaryIO) -> None: - a = ad1.AD1(ad1_data_compressed) + fs.segment(1) + assert fs._segment_lru == [2, 1] - assert a.root.children[0].open().read() == b"Inhoud document 1" + picture = fs.get("C:/Users/pcbje/Desktop/Data/Pictures/5-0-762-Koala.jpg") + assert hashlib.sha1(picture.open().read()).hexdigest() == "9c3dcb1f9185a314ea25d51aed3b5881b32f420c" diff --git a/tests/test_asdf.py b/tests/test_asdf.py index a303e13..d1b6e33 100644 --- a/tests/test_asdf.py +++ b/tests/test_asdf.py @@ -6,8 +6,8 @@ import pytest from dissect.evidence.asdf.asdf import AsdfSnapshot, AsdfWriter -from dissect.evidence.asdf.streams import CompressedStream, Crc32Stream, HashedStream -from dissect.evidence.exceptions import InvalidSnapshot +from dissect.evidence.asdf.stream import CompressedStream, Crc32Stream, HashedStream +from dissect.evidence.exception import InvalidSnapshot def test_asdf(asdf_writer: AsdfWriter) -> None: diff --git a/tests/test_ewf.py b/tests/test_ewf.py index 08a1da6..c8f3daa 100644 --- a/tests/test_ewf.py +++ b/tests/test_ewf.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, BinaryIO from unittest.mock import MagicMock, patch -from dissect.evidence import ewf +from dissect.evidence.ewf import ewf if TYPE_CHECKING: import pytest @@ -16,7 +16,7 @@ def test_ewf(ewf_data: BinaryIO) -> None: assert e.open().read(4097) == (b"\xde\xad\xbe\xef" * 1024) + b"\n" -@patch("dissect.evidence.ewf.Segment") +@patch("dissect.evidence.ewf.ewf.Segment") def test_ewf_open_segment(MockSegment: MagicMock, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(ewf, "MAX_OPEN_SEGMENTS", 2) @@ -30,12 +30,12 @@ def test_ewf_open_segment(MockSegment: MagicMock, monkeypatch: pytest.MonkeyPatc assert e._segment_offsets == [2, 4, 6] assert e._segment_lru == [2, 3] - tmp = e.open_segment(0) + tmp = e.segment(0) assert tmp.offset == 0 assert tmp.sector_offset == 0 assert e._segment_lru == [3, 0] - tmp = e.open_segment(1) + tmp = e.segment(1) assert tmp.offset == 1024 assert tmp.sector_offset == 2 assert e._segment_lru == [0, 1] diff --git a/tests/test_exception.py b/tests/test_exception.py new file mode 100644 index 0000000..bcf5fdf --- /dev/null +++ b/tests/test_exception.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import pytest + +from dissect.evidence import exception + + +@pytest.mark.parametrize( + ("exc", "std"), + [ + (exception.FileNotFoundError, FileNotFoundError), + (exception.IsADirectoryError, IsADirectoryError), + (exception.NotADirectoryError, NotADirectoryError), + ], +) +def test_filesystem_error_subclass(exc: exception.Error, std: Exception) -> None: + assert issubclass(exc, std) + assert isinstance(exc(), std) + + with pytest.raises(std): + raise exc() diff --git a/tests/tools/__init__.py b/tests/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/tools/test_adcrypt.py b/tests/tools/test_adcrypt.py new file mode 100644 index 0000000..9d51a5d --- /dev/null +++ b/tests/tools/test_adcrypt.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import hashlib +import logging +from typing import TYPE_CHECKING + +from dissect.evidence.ad1.ad1 import find_files +from dissect.evidence.tools import adcrypt +from tests._utils import absolute_path + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +def test_adcrypt_passphrase(tmp_path: Path, caplog: pytest.LogCaptureFixture, monkeypatch: pytest.MonkeyPatch) -> None: + """Test if we can decrypt ADCRYPT AD1 images using the adcrypt tool.""" + + with caplog.at_level(logging.DEBUG, adcrypt.log.name), monkeypatch.context() as m: + m.setattr( + "sys.argv", + [ + "adcrypt", + str(absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad1")), + "-p", + "password", + "-o", + str(tmp_path), + ], + ) + + adcrypt.main() + + for i in range(1, 14): + assert f"Decrypting segment file 'encrypted.ad{i}'" in caplog.text + + assert tmp_path.joinpath("encrypted.ad1").exists() + + ctx = hashlib.sha1() + for path in find_files(tmp_path.joinpath("encrypted.ad1")): + ctx.update(path.read_bytes()) + + assert ctx.hexdigest() == "3b7449fd09e5803006ce1b3aba5bb4c48c083f12" + + +def test_adcrypt_certificate(tmp_path: Path, caplog: pytest.LogCaptureFixture, monkeypatch: pytest.MonkeyPatch) -> None: + """Test if we can decrypt ADCRYPT AD1 images using the adcrypt tool.""" + + with caplog.at_level(logging.DEBUG, adcrypt.log.name), monkeypatch.context() as m: + m.setattr( + "sys.argv", + [ + "adcrypt", + str(absolute_path("_data/ad1/encrypted-certificate/encrypted.ad1")), + "-c", + str(absolute_path("_data/ad1/encrypted-certificate/key")), + "-o", + str(tmp_path), + ], + ) + + adcrypt.main() + + for i in range(1, 8): + assert f"Decrypting segment file 'encrypted.ad{i}'" in caplog.text + + assert tmp_path.joinpath("encrypted.ad1").exists() + + ctx = hashlib.sha1() + for path in find_files(tmp_path.joinpath("encrypted.ad1")): + ctx.update(path.read_bytes()) + + assert ctx.hexdigest() == "23cdf7c35327d5b24c81ff48b483ae805c27df6a" diff --git a/tox.ini b/tox.ini index 39e3968..9685232 100644 --- a/tox.ini +++ b/tox.ini @@ -60,12 +60,12 @@ deps = sphinx-design furo commands = - make -C tests/docs clean - make -C tests/docs html + make -C tests/_docs clean + make -C tests/_docs html [testenv:docs-linkcheck] allowlist_externals = make deps = {[testenv:docs-build]deps} commands = - make -C tests/docs clean - make -C tests/docs linkcheck + make -C tests/_docs clean + make -C tests/_docs linkcheck