From 78c6983aa49c86f956877cfe70be56e4dd730d83 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 1 Sep 2025 10:46:05 +0200 Subject: [PATCH 1/9] fix docs --- README.md | 2 +- pyproject.toml | 2 +- tests/{docs => _docs}/Makefile | 0 tests/_docs/__init__.py | 0 tests/{docs => _docs}/conf.py | 8 ++++++++ tests/{docs => _docs}/index.rst | 0 tox.ini | 8 ++++---- 7 files changed, 14 insertions(+), 6 deletions(-) rename tests/{docs => _docs}/Makefile (100%) create mode 100644 tests/_docs/__init__.py rename tests/{docs => _docs}/conf.py (81%) rename tests/{docs => _docs}/index.rst (100%) diff --git a/README.md b/README.md index ffc1e26..7c60315 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # dissect.evidence -A Dissect module implementing a parsers for various forensic evidence file containers, currently: AD1, ASDF and EWF. For +A Dissect module implementing parsers for various forensic evidence file containers, currently: AD1, ADCRYPT, ASDF and EWF. For more information, please see [the documentation](https://docs.dissect.tools/en/latest/projects/dissect.evidence/index.html). ## Requirements diff --git a/pyproject.toml b/pyproject.toml index c03d40d..ecea7f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,7 +92,7 @@ select = [ ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003"] [tool.ruff.lint.per-file-ignores] -"tests/docs/**" = ["INP001"] +"tests/_docs/**" = ["INP001"] [tool.ruff.lint.isort] known-first-party = ["dissect.evidence"] diff --git a/tests/docs/Makefile b/tests/_docs/Makefile similarity index 100% rename from tests/docs/Makefile rename to tests/_docs/Makefile diff --git a/tests/_docs/__init__.py b/tests/_docs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/docs/conf.py b/tests/_docs/conf.py similarity index 81% rename from tests/docs/conf.py rename to tests/_docs/conf.py index 7ef62d3..faf8d58 100644 --- a/tests/docs/conf.py +++ b/tests/_docs/conf.py @@ -1,3 +1,5 @@ +project = "dissect.evidence" + extensions = [ "autoapi.extension", "sphinx.ext.autodoc", @@ -32,3 +34,9 @@ autodoc_member_order = "groupwise" autosectionlabel_prefix_document = True + +suppress_warnings = [ + # https://github.com/readthedocs/sphinx-autoapi/issues/285 + "autoapi.python_import_resolution", + "ref.python", +] diff --git a/tests/docs/index.rst b/tests/_docs/index.rst similarity index 100% rename from tests/docs/index.rst rename to tests/_docs/index.rst diff --git a/tox.ini b/tox.ini index e82fbf9..07a0873 100644 --- a/tox.ini +++ b/tox.ini @@ -62,12 +62,12 @@ deps = sphinx-design furo commands = - make -C tests/docs clean - make -C tests/docs html + make -C tests/_docs clean + make -C tests/_docs html [testenv:docs-linkcheck] allowlist_externals = make deps = {[testenv:docs-build]deps} commands = - make -C tests/docs clean - make -C tests/docs linkcheck + make -C tests/_docs clean + make -C tests/_docs linkcheck From 1473cb10f9cb8920490057d8fc68540bf60c7a3c Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:15:10 +0200 Subject: [PATCH 2/9] add ad1 support --- .gitattributes | 1 + dissect/evidence/__init__.py | 6 +- dissect/evidence/ad1.py | 252 -------------- dissect/evidence/ad1/__init__.py | 23 ++ dissect/evidence/ad1/ad1.py | 321 ++++++++++++++++++ dissect/evidence/ad1/c_ad1.py | 183 ++++++++++ dissect/evidence/ad1/c_ad1.pyi | 172 ++++++++++ dissect/evidence/aff4.py | 0 dissect/evidence/asdf/asdf.pyi | 64 ++++ dissect/evidence/ewf/__init__.py | 25 ++ dissect/evidence/ewf/c_ewf.py | 89 +++++ dissect/evidence/ewf/c_ewf.pyi | 102 ++++++ dissect/evidence/{ => ewf}/ewf.py | 87 +---- dissect/evidence/exceptions.py | 16 + tests/_data/ad1/compressed.ad1 | 3 + tests/_data/ad1/long.ad1 | 3 + tests/_data/ad1/pcbje/text-and-pictures.ad1 | 3 + .../_data/ad1/pcbje/text-and-pictures.ad1.txt | 3 + tests/_data/ad1/pcbje/text-and-pictures.ad2 | 3 + tests/_data/ad1/pcbje/text-and-pictures.ad3 | 3 + tests/_data/ad1/pcbje/text-and-pictures.ad4 | 3 + tests/_data/ad1/test.ad1 | 3 + tests/_data/ewf/ewf.E01 | 3 + tests/_utils.py | 7 + tests/conftest.py | 22 +- tests/data/ad1_long.ad1 | Bin 2554 -> 0 bytes tests/data/ad1_test.ad1 | Bin 2264 -> 0 bytes tests/data/ad1_test_compressed.ad1 | Bin 2197 -> 0 bytes tests/data/ewf.E01 | Bin 7630 -> 0 bytes tests/test_ad1.py | 110 +++++- tests/test_ewf.py | 4 +- 31 files changed, 1152 insertions(+), 359 deletions(-) create mode 100644 .gitattributes delete mode 100644 dissect/evidence/ad1.py create mode 100644 dissect/evidence/ad1/__init__.py create mode 100644 dissect/evidence/ad1/ad1.py create mode 100644 dissect/evidence/ad1/c_ad1.py create mode 100644 dissect/evidence/ad1/c_ad1.pyi delete mode 100644 dissect/evidence/aff4.py create mode 100644 dissect/evidence/asdf/asdf.pyi create mode 100644 dissect/evidence/ewf/__init__.py create mode 100644 dissect/evidence/ewf/c_ewf.py create mode 100644 dissect/evidence/ewf/c_ewf.pyi rename dissect/evidence/{ => ewf}/ewf.py (85%) create mode 100644 tests/_data/ad1/compressed.ad1 create mode 100644 tests/_data/ad1/long.ad1 create mode 100644 tests/_data/ad1/pcbje/text-and-pictures.ad1 create mode 100644 tests/_data/ad1/pcbje/text-and-pictures.ad1.txt create mode 100644 tests/_data/ad1/pcbje/text-and-pictures.ad2 create mode 100644 tests/_data/ad1/pcbje/text-and-pictures.ad3 create mode 100644 tests/_data/ad1/pcbje/text-and-pictures.ad4 create mode 100644 tests/_data/ad1/test.ad1 create mode 100644 tests/_data/ewf/ewf.E01 create mode 100644 tests/_utils.py delete mode 100644 tests/data/ad1_long.ad1 delete mode 100644 tests/data/ad1_test.ad1 delete mode 100644 tests/data/ad1_test_compressed.ad1 delete mode 100644 tests/data/ewf.E01 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b4f63f --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +tests/_data/** filter=lfs diff=lfs merge=lfs -text diff --git a/dissect/evidence/__init__.py b/dissect/evidence/__init__.py index bf20eb6..64360ac 100644 --- a/dissect/evidence/__init__.py +++ b/dissect/evidence/__init__.py @@ -1,7 +1,11 @@ +from __future__ import annotations + +from dissect.evidence.ad1.ad1 import AD1 from dissect.evidence.asdf.asdf import AsdfSnapshot, AsdfStream -from dissect.evidence.ewf import EWF +from dissect.evidence.ewf.ewf import EWF __all__ = [ + "AD1", "EWF", "AsdfSnapshot", "AsdfStream", diff --git a/dissect/evidence/ad1.py b/dissect/evidence/ad1.py deleted file mode 100644 index fe4cce3..0000000 --- a/dissect/evidence/ad1.py +++ /dev/null @@ -1,252 +0,0 @@ -from __future__ import annotations - -import zlib -from typing import Any, BinaryIO - -from dissect.cstruct import cstruct -from dissect.util.stream import AlignedStream, RelativeStream - -ad1_def = """ -enum EntryType : uint32 { - File = 0x0, - Directory = 0x5 -}; - -enum MetaType : uint32 { - FileClass = 0x2, - FileSize = 0x3, - PhysicalSize = 0x4, - StartCluster = 0x6, - DateAccessed = 0x7, - DateCreated = 0x8, - DateModified = 0x9, - Unknown_1 = 0xd, - Unknown_2 = 0xe, - ActualFile = 0x1e, - Unknown_3 = 0x1002, - Unknown_4 = 0x1003, - Unknown_5 = 0x1004, - Unknown_6 = 0x1005, - MD5 = 0x5001, - SHA1 = 0x5002, - ClusterSize = 0x9001, - ClusterCount = 0x9002, - FreeClusterCount = 0x9003, - VolumeSerialNumber = 0x9006 -}; - -typedef struct { - char magic[16]; - uint32 unk1; - uint32 unk2; - uint32 unk3; - uint32 unk4; - uint16 unk5; - uint16 version; - uint32 unk6; - uint64 logical_image_offset; -} SegmentedFileHeader; - -typedef struct { - char magic[16]; - uint32 unk1; - uint32 unk2; - uint32 chunk_size; // This is supposed to be uint64? But that doesn't seem right - uint32 unk3; - uint32 unk4; - uint64 entry_offset; - uint32 name_len; - uint32 unk5; - uint64 name_offset; - uint64 unk6; - uint64 unk7; - uint64 unk8; - uint64 unk9; - char name[name_len]; -} LogicalImageHeader; - -typedef struct { - uint64 next; - uint64 child; - uint64 meta; - uint64 unk1; - uint64 size; - EntryType type; - uint32 name_len; - char name[name_len]; - uint64 unk2; - uint64 num_chunks; - uint64 chunks[num_chunks]; -} FileEntry; - -typedef struct { - uint64 next; - uint32 category; - MetaType type; - uint32 len; - char data[len]; -} FileMeta; -""" -c_ad1 = cstruct().load(ad1_def) - -EntryType = c_ad1.EntryType -MetaType = c_ad1.MetaType - - -class AD1: - def __init__(self, fh: BinaryIO): - self.fh = fh - self.header = c_ad1.SegmentedFileHeader(fh) - - offset = self.header.logical_image_offset - self.logical_image = LogicalImage(RelativeStream(fh, offset)) - self.root = self.logical_image - - def __getattr__(self, k: str) -> Any: - if k in self.header.__class__.fields: - return getattr(self.header, k) - - return super().__getattr__(k) - - def entry(self, path: str = "") -> LogicalImage | FileEntry: - components = path.lstrip("/").split("/") - current = self.root - - if components[0] == "": - return current - - for c in components: - for item in current.children: - if item.name == c: - current = item - - if current.name == components[-1]: - return current - - raise IOError(f"Path not found: {path}") - - def listdir(self, path: str) -> list[FileEntry]: - return [e.name for e in self.entry(path).children] - - def get(self, path: str) -> LogicalImage | FileEntry: - return self.entry(path) - - def open(self, path: str) -> FileObject: - return self.entry(path).open() - - -class LogicalImage: - def __init__(self, fh: BinaryIO): - self.fh = fh - self.header = c_ad1.LogicalImageHeader(fh) - - self.children = [] - offset = self.header.entry_offset - while offset != 0: - child = FileEntry(self, offset) - offset = child.entry.next - self.children.append(child) - - def __repr__(self) -> str: - return f"" - - def __getattr__(self, k: str) -> Any: - if k in self.header.__class__.fields: - return getattr(self.header, k) - - return object.__getattribute__(self, k) - - -class FileEntry: - def __init__(self, image: LogicalImage, offset: int): - fh = image.fh - fh.seek(offset) - self.image = image - self.offset = offset - self.entry = c_ad1.FileEntry(fh) - self.size = self.entry.size - - self.meta = [] - offset = self.entry.meta - while offset != 0: - meta = FileMeta(image, offset) - offset = meta.entry.next - self.meta.append(meta) - - self.children = [] - offset = self.entry.child - while offset != 0: - child = FileEntry(image, offset) - offset = child.entry.next - self.children.append(child) - - def __repr__(self) -> str: - file_type = "Unknown type" - if self.is_file(): - file_type = "File" - elif self.is_dir(): - file_type = "Directory" - return f"<{file_type} name={self.entry.name}>" - - def __getattr__(self, k: str) -> Any: - if k in self.entry.__class__.fields: - return getattr(self.entry, k) - - return object.__getattribute__(self, k) - - def open(self) -> FileObject: - return FileObject(self) - - def is_file(self) -> bool: - return self.entry.type == EntryType.File - - def is_dir(self) -> bool: - return self.entry.type == EntryType.Directory - - -class FileMeta: - def __init__(self, image: LogicalImage, offset: int): - fh = image.fh - fh.seek(offset) - self.image = image - self.offset = offset - self.entry = c_ad1.FileMeta(fh) - - def __repr__(self) -> str: - return f"" - - def __getattr__(self, k: str) -> Any: - if k in self.entry.__class__.fields: - return getattr(self.entry, k) - - return object.__getattribute__(self, k) - - -class FileObject(AlignedStream): - def __init__(self, entry: FileEntry): - self.entry = entry - super().__init__(self.entry.size, self.entry.image.chunk_size) - - def _read(self, offset: int, length: int) -> bytes: - r = [] - fh = self.entry.image.fh - chunk_size = self.entry.image.chunk_size - - chunk = offset // chunk_size - chunk_count = (length + chunk_size - 1) // chunk_size - - chunk_offsets = self.entry.entry.chunks[chunk : chunk + chunk_count + 1] - if len(chunk_offsets) != chunk_count + 1: - chunk_offsets.append(self.entry.entry.meta) - - fh.seek(chunk_offsets[0]) - buf = fh.read(chunk_offsets[-1] - chunk_offsets[0]) - - prev_offset = chunk_offsets[0] - for offset in chunk_offsets[1:]: - chunk_size = offset - prev_offset - r.append(zlib.decompress(buf[:chunk_size])) - buf = buf[chunk_size:] - prev_offset = offset - - return b"".join(r) diff --git a/dissect/evidence/ad1/__init__.py b/dissect/evidence/ad1/__init__.py new file mode 100644 index 0000000..d4dd654 --- /dev/null +++ b/dissect/evidence/ad1/__init__.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from dissect.evidence.ad1.ad1 import AD1, AD1LogicalImage, AD1SegmentFile, FileEntry, FileMeta, FileObject, MetaType +from dissect.evidence.exceptions import ( + Error, + FileNotFoundError, + NotADirectoryError, + NotASymlinkError, +) + +__all__ = [ + "AD1", + "AD1LogicalImage", + "AD1SegmentFile", + "Error", + "FileEntry", + "FileMeta", + "FileNotFoundError", + "FileObject", + "MetaType", + "NotADirectoryError", + "NotASymlinkError", +] diff --git a/dissect/evidence/ad1/ad1.py b/dissect/evidence/ad1/ad1.py new file mode 100644 index 0000000..099b826 --- /dev/null +++ b/dissect/evidence/ad1/ad1.py @@ -0,0 +1,321 @@ +from __future__ import annotations + +import re +import zlib +from datetime import datetime, timezone +from functools import cached_property +from pathlib import Path, PurePosixPath, PureWindowsPath +from typing import TYPE_CHECKING, BinaryIO + +from dissect.util.stream import AlignedStream, MappingStream, RelativeStream + +from dissect.evidence.ad1.c_ad1 import c_ad1 +from dissect.evidence.exceptions import FileNotFoundError, NotADirectoryError, NotASymlinkError + +if TYPE_CHECKING: + from collections.abc import Iterator + + +EntryType = c_ad1.EntryType +MetaType = c_ad1.MetaType +FileClassType = c_ad1.FileClassType + +def atoi(text: str) -> int | str: + return int(text) if text.isdigit() else text + +def natural_keys(text: str | Path) -> list[int|str]: + return [atoi(c) for c in re.split(r"(\d+)", str(text))] + +def find_files(path: Path) -> set[Path]: + files = set() + for file in path.parent.iterdir(): + if file.stem == path.stem and re.match(r"^\.ad[0-9]+$", file.suffix.lower()): + files.add(file) + return sorted(files, key=natural_keys) + + +class AD1: + """AccessData Logical Image (AD1v4) implementation. + + Supports ``zlib`` compressed images. Does not support encrypted (``b"ADCRYPT"``) images. + + Should be initialized using a list of segment files, e.g.:: + + fs = AD1([Path("file.ad1").open("rb"), Path("file.ad2").open("rb")]) + + Resources: + - Reversing FTK Imager + - https://github.com/pcbje/pyad1/blob/master/documentation/AccessData%20Format%20(AD1).asciidoc + - https://github.com/al3ks1s/AD1-tools + - https://web.archive.org/web/20231013073319/https://tmairi.github.io/posts/dissecting-the-ad1-file-format/ + - https://al3ks1s.fr/posts/adventures-part-1/ + """ + + def __init__(self, fh: BinaryIO | list[BinaryIO]): + self.fhs: list[BinaryIO] = fh if isinstance(fh, list) else [fh] + self.segments: list[AD1SegmentFile] = [] + self.stream = MappingStream() + self.logical_image: AD1LogicalImage = None + self.root: FileEntry = None + + if len(self.fhs) < 1 or not all(hasattr(fh, "read") for fh in self.fhs): + raise ValueError(f"Invalid given file handles: {fh!r}") + + for fh in self.fhs: + # Each file contains a segment header + segment = AD1SegmentFile(fh) + self.segments.append(segment) + + # Add the segment file handle to the mapping stream, minus the segment header. + # TODO: Does this work as expected? + self.stream.add(self.stream.size or 0, segment.header.segment_size - 512, fh, 512) + + # The first .ad1 file contains an image header + offset = self.segments[0].header.logical_image_offset + self.logical_image = AD1LogicalImage(RelativeStream(self.fhs[0], offset)) # NOTE: Unnecesary RelativeStream? + self.root = FileEntry(self, -1, is_root=True, root_name="/") + + # Add entries for all parts in logical_image.name + root_name = self.logical_image.header.name.decode() + root_path = ( + PureWindowsPath(root_name) if "/" not in root_name and "\\" in root_name else PurePosixPath(root_name) + ) + parts = list(root_path.parts) + parent = self.root + + while parts: + part = parts.pop(0) + if root_path.drive and part == f"{root_path.drive}\\": + part = root_path.drive + entry = FileEntry(self, -1, is_root=True, root_name=part) + parent.children = [entry] + parent = entry + + # Add the first children to the last root part + offset = self.logical_image.header.first_file_offset + entry.children = [] + while offset != 0: + child = FileEntry(self, offset) + entry.children.append(child) + offset = child.entry.next + + def entry(self, path: str) -> FileEntry: + components = path.lstrip("/").split("/") + current = self.root + + if components[0] == "": + return current + + for c in components: + for entry in current.iterdir(): + if entry.name == c and entry.entry.type != EntryType.Deleted: + current = entry + + if current.name == components[-1]: + return current + + raise FileNotFoundError(f"Path not found: {path}") + + def get(self, path: str) -> FileEntry: + return self.entry(path) + + def open(self, path: str) -> FileObject: + return self.entry(path).open() + + +class AD1SegmentFile: + def __init__(self, fh: BinaryIO): + self.fh = fh + self.header = c_ad1.SegmentedFileHeader(fh) + self.number = self.header.segment_number + self.count = self.header.segment_count + self.size = self.header.segment_size + + def __repr__(self) -> str: + return f"" + + +class AD1LogicalImage: + def __init__(self, fh: BinaryIO): + self.fh = fh + self.header = c_ad1.LogicalImageHeader(fh) + self.name = self.header.name + self.version = self.header.version + self.offset = self.header.first_file_offset + self.chunk_size = self.header.chunk_size + + def __repr__(self) -> str: + return f"" + + +class FileEntry: + def __init__(self, ad1: AD1, offset: int, is_root: bool = False, root_name: str | None = None): + self.ad1 = ad1 + self.offset = offset + self.is_root = is_root + + self.entry = None + self.type = None + self.meta = [] + + if is_root: + self.entry = c_ad1.FileEntry(name=root_name.encode(), type=EntryType.Directory, size=0) + + else: + fh = ad1.stream + fh.seek(offset) + self.entry = c_ad1.FileEntry(fh) + self.size = self.entry.size + self.type = self.entry.type + + offset = self.entry.meta + while offset != 0: + meta = FileMeta(ad1.stream, offset) + offset = meta.entry.next + self.meta.append(meta) + + def __repr__(self) -> str: + if self.is_symlink(): + file_type = "AD1Symlink" + elif self.is_file(): + file_type = "AD1File" + elif self.is_dir(): + file_type = "AD1Directory" + else: + file_type = "AD1UnknownType" + return f"<{file_type} name={self.name!r} size={self.size!r}>" + + @cached_property + def name(self) -> str: + return self.entry.name.decode() + + @cached_property + def children(self) -> list[FileEntry]: + children = [] + offset = self.entry.child + while offset != 0: + child = FileEntry(self.ad1, offset) + children.append(child) + offset = child.entry.next + + return children + + def open(self) -> FileObject: + return FileObject(self) + + def is_file(self) -> bool: + return self.entry.type in (EntryType.File, EntryType.Unknown_File) + + def is_dir(self) -> bool: + return self.entry.type == EntryType.Directory + + def is_symlink(self) -> bool: + if meta := self.get_meta(MetaType.FileClass): + return int.from_bytes(meta.data, "little") == FileClassType.ReparsePoint + return False + + def listdir(self) -> list[str]: + if not self.is_dir(): + raise NotADirectoryError(self.name) + return [child.name for child in self.children] + + def iterdir(self) -> Iterator[FileEntry]: + if not self.is_dir(): + raise NotADirectoryError(self.name) + yield from self.children + + def readlink(self) -> str: + if not self.is_symlink(): + raise NotASymlinkError(self.name) + + # TODO: Investigate symlinks for unix-like filesystems. + + reparse_point = c_ad1.ReparsePoint(self.open()) + return reparse_point.link.strip("\00").split("\00")[-1] + + def get_meta(self, attr: int | c_ad1.MetaType) -> c_ad1.FileMeta | None: + return next((m for m in self.meta if m.type == attr), None) + + @cached_property + def size(self) -> int: + meta = self.get_meta(c_ad1.MetaType.FileSize) + return meta.data if meta else 0 + + @cached_property + def atime(self) -> datetime: + meta = self.get_meta(c_ad1.MetaType.DateAccessed) + return convert_ts(meta.data) if meta else datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def ctime(self) -> datetime: + meta = self.get_meta(c_ad1.MetaType.DateModified) + # We could use MetaType.MFTFileDateChanged here depending on the fs + return convert_ts(meta.data) if meta else datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def mtime(self) -> datetime: + meta = self.get_meta(c_ad1.MetaType.DateModified) + return convert_ts(meta.data) if meta else datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def btime(self) -> datetime: + meta = self.get_meta(c_ad1.MetaType.DateCreated) + return convert_ts(meta.data) if meta else datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + +def convert_ts(input: bytes) -> datetime: + """Convert an AD1 timestamp to datetime object. Assuming this is UTC.""" + + # DateCreated does not (always) have ``.%f`` precision. + fmt = "%Y%m%dT%H%M%S.%f" if b"." in input else "%Y%m%dT%H%M%S" + return datetime.strptime(input.decode(), fmt).replace(tzinfo=timezone.utc) + + +class FileMeta: + def __init__(self, stream: MappingStream, offset: int): + self.stream = stream + self.offset = offset + + fh = stream + fh.seek(offset) + self.entry = c_ad1.FileMeta(fh) + + self.type = self.entry.type + self.data = self.entry.data + + def __repr__(self) -> str: + return f"" + + +# TODO: Can we just use ZlibStream from dissect.util.stream? +class FileObject(AlignedStream): + def __init__(self, entry: FileEntry): + self.entry = entry + super().__init__(self.entry.size, self.entry.ad1.logical_image.chunk_size) + + def _read(self, offset: int, length: int) -> bytes: + r = [] + fh = self.entry.ad1.stream + chunk_size = self.entry.ad1.logical_image.chunk_size + + chunk = offset // chunk_size + chunk_count = (length + chunk_size - 1) // chunk_size + + self.entry.ad1.stream.seek(self.entry.entry.zlib_meta) + chunks = c_ad1.FileEntryChunks(self.entry.ad1.stream).chunks + + chunk_offsets = chunks[chunk : chunk + chunk_count + 1] + if len(chunk_offsets) != chunk_count + 1: + chunk_offsets.append(self.entry.entry.meta) + + fh.seek(chunk_offsets[0]) + buf = fh.read(chunk_offsets[-1] - chunk_offsets[0]) + + prev_offset = chunk_offsets[0] + for offset in chunk_offsets[1:]: + chunk_size = offset - prev_offset + r.append(zlib.decompress(buf[:chunk_size])) + buf = buf[chunk_size:] + prev_offset = offset + + return b"".join(r) diff --git a/dissect/evidence/ad1/c_ad1.py b/dissect/evidence/ad1/c_ad1.py new file mode 100644 index 0000000..186905c --- /dev/null +++ b/dissect/evidence/ad1/c_ad1.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +ad1_def = """ +enum EntryType : uint32 { + File = 0x0, + Unknown_File = 0x1, + Deleted = 0x2, + Directory = 0x5, +}; + +enum FileClassType : uint32 { + File = 0x1, // b"1" + Directory = 0x3, // b"3" + ReparsePoint = 0x3131, // b"11" +}; + +enum MetaType : uint32 { + // Generic attributes + ItemContentHashes = 0x1, + FileClass = 0x2, + FileSize = 0x3, + PhysicalSize = 0x4, + Timestamps = 0x5, + StartCluster = 0x6, + DateAccessed = 0x7, + DateCreated = 0x8, + DateModified = 0x9, + // .. 0xa, 0xb, 0xc .. + Encrypted = 0xd, + Compressed = 0xe, + // .. 0xf .. + ActualFile = 0x1e, + StartSector = 0x1f, + ADSCount = 0x24, // Alternate Data Stream Count + + // DOS attributes + ShortFilename = 0x1001, + Hidden = 0x1002, + System = 0x1003, + ReadOnly = 0x1004, + Archive = 0x1005, + + // NTFS attributes + MFTRecordNumber = 0xa001, + MFTDateChanged = 0xa002, // Specifies the MFT record change timestamp of the file. + MFTIsResident = 0xa003, + MFTIsOffline = 0xa004, + MFTIsSparse = 0xa005, + MFTIsTemporary = 0xa006, + MFTOwnerSid = 0xa007, + MFTOwnerName = 0xa008, + MFTGroupSid = 0xa009, + MFTGroupName = 0xa00a, + + MFTFileDateCreated = 0xa01c, // According to the filename attribute in the MFT. + MFTFileDateModified = 0xa01d, + MFTFileDateAccessed = 0xa01e, + MFTFileDateChanged = 0xa01f, + MFTFileSize = 0xa020, + MFTFilePhysicalSize = 0xa021, + + // 8.3 MFT Filename + // 0xa022, + // 0xa023, + // 0xa024, + // 0xa025, + // 0xa026, + // 0xa027, + + IndxFilename = 0xa028, // According to the filename attribute in the $I30 INDX. + IndxFileSize = 0xa029, + IndxPhysicalSize = 0xa02a, + IndxDateCreated = 0xa02b, + IndxDateModified = 0xa02c, + IndxDateAccessed = 0xa02d, + IndxDateChanged = 0xa02e, + + // 8.3 INDX + // 0xa02f, 0xa030, 0xa031, 0xa032, 0xa033, 0xa034, 0xa035 + + // NTFS Access Control Entry (0) + AceType = 0x1000001, + AceInheritable = 0x1000004, + AceSID = 0x1000005, // The Security ID of the user or group this ACE applies to. + AceName = 0x1000006, // The name of the user or roup this ACE applies to. + AceAccessMask = 0x1000007, // Raw bitmask specifying the actions this ACE controls. + AceExecuteFile = 0x1000008, + AceReadData = 0x1000009, + AceWriteData = 0x100000a, + AceAppendData = 0x100000b, + AceTraverseFolder = 0x100000c, + AceListFolder = 0x100000d, + AceCreateFiles = 0x100000e, + AceCreateFolders = 0x100000f, + AceDeleteChildren = 0x1000010, + AceDeleteSelf = 0x1000011, + AceReadPermissions = 0x1000012, + AceChangePermissions = 0x1000013, + AceTakeOwnership = 0x1000014, + // .. 0x10010XX - 0x10060XX .. + + // Verification hashes + MD5 = 0x5001, + SHA1 = 0x5002, + + // TODO: Clean up + ClusterSize = 0x9001, + ClusterCount = 0x9002, + FreeClusterCount = 0x9003, + VolumeSerialNumber = 0x9006, + PosixPermissions = 0x2001, +}; + +typedef struct { + char magic[16]; // b"ADSEGMENTEDFILE" + padding + uint32 unk1; // 0x01 + uint32 unk2; // 0x02 + uint32 segment_number; // segment number starts at 0x01 + uint32 segment_count; // number of segments + uint64 segment_size; // off by 512 bytes + uint32 logical_image_offset; + char padding[468]; // 0x00 +} SegmentedFileHeader; + +typedef struct { + char magic[16]; // b"ADLOGICALIMAGE" + padding + uint32 version; // 0x03 or 0x04 + uint32 unk1; // 0x01 + uint32 chunk_size; // zlib chunk size (uint64?) + uint64 metadata_offset; + uint64 first_file_offset; + uint32 name_len; + + // ADv4 (offset 48 contains name[name_len] in ADv3) + char unk_magic[4]; // b"AD" + (2 * 0x00) + uint64 name_offset; // 0x5c + uint64 attr_guid_offset; + uint64 unk2; // 0x00 + uint64 locs_guid_offset; + uint64 unk3; // 0x00 + // END ADv4 + + char name[name_len]; +} LogicalImageHeader; + +typedef struct { + uint64 next; // Next FileEntry in same hierarchy level + uint64 child; // Next FileEntry within this dir, 0x00 if file + uint64 meta; // Offset of first FileMeta entry + uint64 zlib_meta; // Offset of zlib chunk metadata + uint64 size; // Decompressed file size, 0x00 if no data + EntryType type; // 0x00 = file, 0x05 = directory + uint32 name_len; + char name[name_len]; + uint64 parent_index; // Parent folder index, 0x00 if at root +} FileEntry; + +typedef struct { + uint64 num_chunks; // only if FileEntry.size != 0x00 + uint64 chunks[num_chunks]; +} FileEntryChunks; + +typedef struct { + uint64 next; + uint32 category; + MetaType type; + uint32 len; + char data[len]; +} FileMeta; + +typedef struct { + char unk1[352]; // version 4 only +} Footer; + +typedef struct { + CHAR unknown[16]; + WCHAR link[EOF]; +} ReparsePoint; +""" + +c_ad1 = cstruct().load(ad1_def) diff --git a/dissect/evidence/ad1/c_ad1.pyi b/dissect/evidence/ad1/c_ad1.pyi new file mode 100644 index 0000000..5f60abe --- /dev/null +++ b/dissect/evidence/ad1/c_ad1.pyi @@ -0,0 +1,172 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, overload + +import dissect.cstruct as __cs__ +from typing_extensions import TypeAlias + +class _c_ad1(__cs__.cstruct): + class EntryType(__cs__.Enum): + File = ... + Unknown_File = ... + Deleted = ... + Directory = ... + + class FileClassType(__cs__.Enum): + File = ... + Directory = ... + ReparsePoint = ... + + class MetaType(__cs__.Enum): + ItemContentHashes = ... + FileClass = ... + FileSize = ... + PhysicalSize = ... + Timestamps = ... + StartCluster = ... + DateAccessed = ... + DateCreated = ... + DateModified = ... + Encrypted = ... + Compressed = ... + ActualFile = ... + StartSector = ... + ADSCount = ... + ShortFilename = ... + Hidden = ... + System = ... + ReadOnly = ... + Archive = ... + MFTRecordNumber = ... + MFTDateChanged = ... + MFTIsResident = ... + MFTIsOffline = ... + MFTIsSparse = ... + MFTIsTemporary = ... + MFTOwnerSid = ... + MFTOwnerName = ... + MFTGroupSid = ... + MFTGroupName = ... + MFTFileDateCreated = ... + MFTFileDateModified = ... + MFTFileDateAccessed = ... + MFTFileDateChanged = ... + MFTFileSize = ... + MFTFilePhysicalSize = ... + IndxFilename = ... + IndxFileSize = ... + IndxPhysicalSize = ... + IndxDateCreated = ... + IndxDateModified = ... + IndxDateAccessed = ... + IndxDateChanged = ... + AceType = ... + AceInheritable = ... + AceSID = ... + AceName = ... + AceAccessMask = ... + AceExecuteFile = ... + AceReadData = ... + AceWriteData = ... + AceAppendData = ... + AceTraverseFolder = ... + AceListFolder = ... + AceCreateFiles = ... + AceCreateFolders = ... + AceDeleteChildren = ... + AceDeleteSelf = ... + AceReadPermissions = ... + AceChangePermissions = ... + AceTakeOwnership = ... + MD5 = ... + SHA1 = ... + ClusterSize = ... + ClusterCount = ... + FreeClusterCount = ... + VolumeSerialNumber = ... + PosixPermissions = ... + + class SegmentedFileHeader(__cs__.Structure): + magic: __cs__.CharArray + unk1: _c_ad1.uint32 + unk2: _c_ad1.uint32 + segment_number: _c_ad1.uint32 + segment_count: _c_ad1.uint32 + segment_size: _c_ad1.uint64 + logical_image_offset: _c_ad1.uint32 + padding: __cs__.CharArray + @overload + def __init__(self, magic: __cs__.CharArray | None = ..., unk1: _c_ad1.uint32 | None = ..., unk2: _c_ad1.uint32 | None = ..., segment_number: _c_ad1.uint32 | None = ..., segment_count: _c_ad1.uint32 | None = ..., segment_size: _c_ad1.uint64 | None = ..., logical_image_offset: _c_ad1.uint32 | None = ..., padding: __cs__.CharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class LogicalImageHeader(__cs__.Structure): + magic: __cs__.CharArray + version: _c_ad1.uint32 + unk1: _c_ad1.uint32 + chunk_size: _c_ad1.uint32 + metadata_offset: _c_ad1.uint64 + first_file_offset: _c_ad1.uint64 + name_len: _c_ad1.uint32 + unk_magic: __cs__.CharArray + name_offset: _c_ad1.uint64 + attr_guid_offset: _c_ad1.uint64 + unk2: _c_ad1.uint64 + locs_guid_offset: _c_ad1.uint64 + unk3: _c_ad1.uint64 + name: __cs__.CharArray + @overload + def __init__(self, magic: __cs__.CharArray | None = ..., version: _c_ad1.uint32 | None = ..., unk1: _c_ad1.uint32 | None = ..., chunk_size: _c_ad1.uint32 | None = ..., metadata_offset: _c_ad1.uint64 | None = ..., first_file_offset: _c_ad1.uint64 | None = ..., name_len: _c_ad1.uint32 | None = ..., unk_magic: __cs__.CharArray | None = ..., name_offset: _c_ad1.uint64 | None = ..., attr_guid_offset: _c_ad1.uint64 | None = ..., unk2: _c_ad1.uint64 | None = ..., locs_guid_offset: _c_ad1.uint64 | None = ..., unk3: _c_ad1.uint64 | None = ..., name: __cs__.CharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class FileEntry(__cs__.Structure): + next: _c_ad1.uint64 + child: _c_ad1.uint64 + meta: _c_ad1.uint64 + zlib_meta: _c_ad1.uint64 + size: _c_ad1.uint64 + type: _c_ad1.EntryType + name_len: _c_ad1.uint32 + name: __cs__.CharArray + parent_index: _c_ad1.uint64 + @overload + def __init__(self, next: _c_ad1.uint64 | None = ..., child: _c_ad1.uint64 | None = ..., meta: _c_ad1.uint64 | None = ..., zlib_meta: _c_ad1.uint64 | None = ..., size: _c_ad1.uint64 | None = ..., type: _c_ad1.EntryType | None = ..., name_len: _c_ad1.uint32 | None = ..., name: __cs__.CharArray | None = ..., parent_index: _c_ad1.uint64 | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class FileEntryChunks(__cs__.Structure): + num_chunks: _c_ad1.uint64 + chunks: __cs__.Array[_c_ad1.uint64] + @overload + def __init__(self, num_chunks: _c_ad1.uint64 | None = ..., chunks: __cs__.Array[_c_ad1.uint64] | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class FileMeta(__cs__.Structure): + next: _c_ad1.uint64 + category: _c_ad1.uint32 + type: _c_ad1.MetaType + len: _c_ad1.uint32 + data: __cs__.CharArray + @overload + def __init__(self, next: _c_ad1.uint64 | None = ..., category: _c_ad1.uint32 | None = ..., type: _c_ad1.MetaType | None = ..., len: _c_ad1.uint32 | None = ..., data: __cs__.CharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class Footer(__cs__.Structure): + unk1: __cs__.CharArray + @overload + def __init__(self, unk1: __cs__.CharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class ReparsePoint(__cs__.Structure): + unknown: __cs__.CharArray + link: __cs__.WcharArray + @overload + def __init__(self, unknown: __cs__.CharArray | None = ..., link: __cs__.WcharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_ad1` is an instance of `_c_ad1`, but then we can't use it in type hints +c_ad1: TypeAlias = _c_ad1 diff --git a/dissect/evidence/aff4.py b/dissect/evidence/aff4.py deleted file mode 100644 index e69de29..0000000 diff --git a/dissect/evidence/asdf/asdf.pyi b/dissect/evidence/asdf/asdf.pyi new file mode 100644 index 0000000..420d56b --- /dev/null +++ b/dissect/evidence/asdf/asdf.pyi @@ -0,0 +1,64 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, overload + +import dissect.cstruct as __cs__ +from typing_extensions import TypeAlias + +class _c_asdf(__cs__.cstruct): + class FILE_FLAG(__cs__.Flag): + SHA256 = ... + + class BLOCK_FLAG(__cs__.Flag): + CRC32 = ... + COMPRESS = ... + + class header(__cs__.Structure): + magic: __cs__.CharArray + flags: _c_asdf.FILE_FLAG + version: _c_asdf.uint8 + reserved1: __cs__.CharArray + timestamp: _c_asdf.uint64 + reserved2: __cs__.CharArray + guid: __cs__.CharArray + @overload + def __init__(self, magic: __cs__.CharArray | None = ..., flags: _c_asdf.FILE_FLAG | None = ..., version: _c_asdf.uint8 | None = ..., reserved1: __cs__.CharArray | None = ..., timestamp: _c_asdf.uint64 | None = ..., reserved2: __cs__.CharArray | None = ..., guid: __cs__.CharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class block(__cs__.Structure): + magic: __cs__.CharArray + flags: _c_asdf.BLOCK_FLAG + idx: _c_asdf.uint8 + reserved: __cs__.CharArray + offset: _c_asdf.uint64 + size: _c_asdf.uint64 + @overload + def __init__(self, magic: __cs__.CharArray | None = ..., flags: _c_asdf.BLOCK_FLAG | None = ..., idx: _c_asdf.uint8 | None = ..., reserved: __cs__.CharArray | None = ..., offset: _c_asdf.uint64 | None = ..., size: _c_asdf.uint64 | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class table_entry(__cs__.Structure): + flags: _c_asdf.BLOCK_FLAG + idx: _c_asdf.uint8 + reserved: __cs__.CharArray + offset: _c_asdf.uint64 + size: _c_asdf.uint64 + file_offset: _c_asdf.uint64 + file_size: _c_asdf.uint64 + @overload + def __init__(self, flags: _c_asdf.BLOCK_FLAG | None = ..., idx: _c_asdf.uint8 | None = ..., reserved: __cs__.CharArray | None = ..., offset: _c_asdf.uint64 | None = ..., size: _c_asdf.uint64 | None = ..., file_offset: _c_asdf.uint64 | None = ..., file_size: _c_asdf.uint64 | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class footer(__cs__.Structure): + magic: __cs__.CharArray + reserved: __cs__.CharArray + table_offset: _c_asdf.uint64 + sha256: __cs__.CharArray + @overload + def __init__(self, magic: __cs__.CharArray | None = ..., reserved: __cs__.CharArray | None = ..., table_offset: _c_asdf.uint64 | None = ..., sha256: __cs__.CharArray | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_asdf` is an instance of `_c_asdf`, but then we can't use it in type hints +c_asdf: TypeAlias = _c_asdf diff --git a/dissect/evidence/ewf/__init__.py b/dissect/evidence/ewf/__init__.py new file mode 100644 index 0000000..1be30e7 --- /dev/null +++ b/dissect/evidence/ewf/__init__.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from dissect.evidence.ewf.c_ewf import c_ewf +from dissect.evidence.ewf.ewf import ( + EWF, + EWFError, + EWFStream, + HeaderSection, + SectionDescriptor, + Segment, + TableSection, + VolumeSection, +) + +__all__ = [ + "EWF", + "EWFError", + "EWFStream", + "HeaderSection", + "SectionDescriptor", + "Segment", + "TableSection", + "VolumeSection", + "c_ewf", +] diff --git a/dissect/evidence/ewf/c_ewf.py b/dissect/evidence/ewf/c_ewf.py new file mode 100644 index 0000000..84c21ea --- /dev/null +++ b/dissect/evidence/ewf/c_ewf.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +ewf_def = """ +enum MediaType : uint8 { + Removable = 0x00, + Fixed = 0x01, + Optical = 0x03, + Logical = 0x0e, + RAM = 0x10 +}; + +enum MediaFlags : uint8 { + Image = 0x01, + Physical = 0x02, + Fastbloc = 0x04, + Tablaeu = 0x08 +}; + +enum CompressionLevel : uint8 { + None = 0x00, + Good = 0x01, + Best = 0x02 +}; + +typedef struct { + char signature[8]; + uint8 fields_start; + uint16 segment_number; + uint16 fields_end; +} EWFHeader; + +typedef struct { + char type[16]; + uint64 next; + uint64 size; + uint8 pad[40]; + uint32 checksum; +} EWFSectionDescriptor; + +typedef struct { + uint32 reserved_1; + uint32 chunk_count; + uint32 sector_count; + uint32 sector_size; + uint32 total_sector_count; + uint8 reserved[20]; + uint8 pad[45]; + char signature[5]; + uint32 checksum; +} EWFVolumeSectionSpec; + +typedef struct { + MediaType media_type; + uint8 reserved_1[3]; + uint32 chunk_count; + uint32 sector_count; + uint32 sector_size; + uint64 total_sector_count; + uint32 num_cylinders; + uint32 num_heads; + uint32 num_sectors; + uint8 media_flags; + uint8 unknown_1[3]; + uint32 palm_start_sector; + uint32 unknown_2; + uint32 smart_start_sector; + CompressionLevel compression_level; + uint8 unknown_3[3]; + uint32 error_granularity; + uint32 unknown_4; + uint8 uuid[16]; + uint8 pad[963]; + char signature[5]; + uint32 checksum; +} EWFVolumeSection; + +typedef struct { + uint32 num_entries; + uint32 _; + uint64 base_offset; + uint32 _; + uint32 checksum; + uint32 entries[num_entries]; +} EWFTableSection; +""" + +c_ewf = cstruct().load(ewf_def) diff --git a/dissect/evidence/ewf/c_ewf.pyi b/dissect/evidence/ewf/c_ewf.pyi new file mode 100644 index 0000000..bf507e7 --- /dev/null +++ b/dissect/evidence/ewf/c_ewf.pyi @@ -0,0 +1,102 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, overload + +import dissect.cstruct as __cs__ +from typing_extensions import TypeAlias + +class _c_ewf(__cs__.cstruct): + class MediaType(__cs__.Enum): + Removable = ... + Fixed = ... + Optical = ... + Logical = ... + RAM = ... + + class MediaFlags(__cs__.Enum): + Image = ... + Physical = ... + Fastbloc = ... + Tablaeu = ... + + class CompressionLevel(__cs__.Enum): + None = ... + Good = ... + Best = ... + + class EWFHeader(__cs__.Structure): + signature: __cs__.CharArray + fields_start: _c_ewf.uint8 + segment_number: _c_ewf.uint16 + fields_end: _c_ewf.uint16 + @overload + def __init__(self, signature: __cs__.CharArray | None = ..., fields_start: _c_ewf.uint8 | None = ..., segment_number: _c_ewf.uint16 | None = ..., fields_end: _c_ewf.uint16 | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EWFSectionDescriptor(__cs__.Structure): + type: __cs__.CharArray + next: _c_ewf.uint64 + size: _c_ewf.uint64 + pad: __cs__.Array[_c_ewf.uint8] + checksum: _c_ewf.uint32 + @overload + def __init__(self, type: __cs__.CharArray | None = ..., next: _c_ewf.uint64 | None = ..., size: _c_ewf.uint64 | None = ..., pad: __cs__.Array[_c_ewf.uint8] | None = ..., checksum: _c_ewf.uint32 | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EWFVolumeSectionSpec(__cs__.Structure): + reserved_1: _c_ewf.uint32 + chunk_count: _c_ewf.uint32 + sector_count: _c_ewf.uint32 + sector_size: _c_ewf.uint32 + total_sector_count: _c_ewf.uint32 + reserved: __cs__.Array[_c_ewf.uint8] + pad: __cs__.Array[_c_ewf.uint8] + signature: __cs__.CharArray + checksum: _c_ewf.uint32 + @overload + def __init__(self, reserved_1: _c_ewf.uint32 | None = ..., chunk_count: _c_ewf.uint32 | None = ..., sector_count: _c_ewf.uint32 | None = ..., sector_size: _c_ewf.uint32 | None = ..., total_sector_count: _c_ewf.uint32 | None = ..., reserved: __cs__.Array[_c_ewf.uint8] | None = ..., pad: __cs__.Array[_c_ewf.uint8] | None = ..., signature: __cs__.CharArray | None = ..., checksum: _c_ewf.uint32 | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EWFVolumeSection(__cs__.Structure): + media_type: _c_ewf.MediaType + reserved_1: __cs__.Array[_c_ewf.uint8] + chunk_count: _c_ewf.uint32 + sector_count: _c_ewf.uint32 + sector_size: _c_ewf.uint32 + total_sector_count: _c_ewf.uint64 + num_cylinders: _c_ewf.uint32 + num_heads: _c_ewf.uint32 + num_sectors: _c_ewf.uint32 + media_flags: _c_ewf.uint8 + unknown_1: __cs__.Array[_c_ewf.uint8] + palm_start_sector: _c_ewf.uint32 + unknown_2: _c_ewf.uint32 + smart_start_sector: _c_ewf.uint32 + compression_level: _c_ewf.CompressionLevel + unknown_3: __cs__.Array[_c_ewf.uint8] + error_granularity: _c_ewf.uint32 + unknown_4: _c_ewf.uint32 + uuid: __cs__.Array[_c_ewf.uint8] + pad: __cs__.Array[_c_ewf.uint8] + signature: __cs__.CharArray + checksum: _c_ewf.uint32 + @overload + def __init__(self, media_type: _c_ewf.MediaType | None = ..., reserved_1: __cs__.Array[_c_ewf.uint8] | None = ..., chunk_count: _c_ewf.uint32 | None = ..., sector_count: _c_ewf.uint32 | None = ..., sector_size: _c_ewf.uint32 | None = ..., total_sector_count: _c_ewf.uint64 | None = ..., num_cylinders: _c_ewf.uint32 | None = ..., num_heads: _c_ewf.uint32 | None = ..., num_sectors: _c_ewf.uint32 | None = ..., media_flags: _c_ewf.uint8 | None = ..., unknown_1: __cs__.Array[_c_ewf.uint8] | None = ..., palm_start_sector: _c_ewf.uint32 | None = ..., unknown_2: _c_ewf.uint32 | None = ..., smart_start_sector: _c_ewf.uint32 | None = ..., compression_level: _c_ewf.CompressionLevel | None = ..., unknown_3: __cs__.Array[_c_ewf.uint8] | None = ..., error_granularity: _c_ewf.uint32 | None = ..., unknown_4: _c_ewf.uint32 | None = ..., uuid: __cs__.Array[_c_ewf.uint8] | None = ..., pad: __cs__.Array[_c_ewf.uint8] | None = ..., signature: __cs__.CharArray | None = ..., checksum: _c_ewf.uint32 | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EWFTableSection(__cs__.Structure): + num_entries: _c_ewf.uint32 + _: _c_ewf.uint32 + base_offset: _c_ewf.uint64 + checksum: _c_ewf.uint32 + entries: __cs__.Array[_c_ewf.uint32] + @overload + def __init__(self, num_entries: _c_ewf.uint32 | None = ..., _: _c_ewf.uint32 | None = ..., base_offset: _c_ewf.uint64 | None = ..., checksum: _c_ewf.uint32 | None = ..., entries: __cs__.Array[_c_ewf.uint32] | None = ...): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_ewf` is an instance of `_c_ewf`, but then we can't use it in type hints +c_ewf: TypeAlias = _c_ewf diff --git a/dissect/evidence/ewf.py b/dissect/evidence/ewf/ewf.py similarity index 85% rename from dissect/evidence/ewf.py rename to dissect/evidence/ewf/ewf.py index a60fed2..fbb82cd 100644 --- a/dissect/evidence/ewf.py +++ b/dissect/evidence/ewf/ewf.py @@ -8,99 +8,14 @@ from pathlib import Path from typing import BinaryIO -from dissect.cstruct import cstruct from dissect.util.stream import AlignedStream +from dissect.evidence.ewf import c_ewf from dissect.evidence.exceptions import EWFError log = logging.getLogger(__name__) log.setLevel(os.getenv("DISSECT_LOG_EWF", "CRITICAL")) -ewf_def = """ -enum MediaType : uint8 { - Removable = 0x00, - Fixed = 0x01, - Optical = 0x03, - Logical = 0x0e, - RAM = 0x10 -}; - -enum MediaFlags : uint8 { - Image = 0x01, - Physical = 0x02, - Fastbloc = 0x04, - Tablaeu = 0x08 -}; - -enum CompressionLevel : uint8 { - None = 0x00, - Good = 0x01, - Best = 0x02 -}; - -typedef struct { - char signature[8]; - uint8 fields_start; - uint16 segment_number; - uint16 fields_end; -} EWFHeader; - -typedef struct { - char type[16]; - uint64 next; - uint64 size; - uint8 pad[40]; - uint32 checksum; -} EWFSectionDescriptor; - -typedef struct { - uint32 reserved_1; - uint32 chunk_count; - uint32 sector_count; - uint32 sector_size; - uint32 total_sector_count; - uint8 reserved[20]; - uint8 pad[45]; - char signature[5]; - uint32 checksum; -} EWFVolumeSectionSpec; - -typedef struct { - MediaType media_type; - uint8 reserved_1[3]; - uint32 chunk_count; - uint32 sector_count; - uint32 sector_size; - uint64 total_sector_count; - uint32 num_cylinders; - uint32 num_heads; - uint32 num_sectors; - uint8 media_flags; - uint8 unknown_1[3]; - uint32 palm_start_sector; - uint32 unknown_2; - uint32 smart_start_sector; - CompressionLevel compression_level; - uint8 unknown_3[3]; - uint32 error_granularity; - uint32 unknown_4; - uint8 uuid[16]; - uint8 pad[963]; - char signature[5]; - uint32 checksum; -} EWFVolumeSection; - -typedef struct { - uint32 num_entries; - uint32 _; - uint64 base_offset; - uint32 _; - uint32 checksum; - uint32 entries[num_entries]; -} EWFTableSection; -""" - -c_ewf = cstruct().load(ewf_def) MAX_OPEN_SEGMENTS = 128 diff --git a/dissect/evidence/exceptions.py b/dissect/evidence/exceptions.py index dd8bc59..004bd22 100644 --- a/dissect/evidence/exceptions.py +++ b/dissect/evidence/exceptions.py @@ -3,6 +3,22 @@ class Error(Exception): It is used to recognize errors specific to this module""" +class FileNotFoundError(Error, FileNotFoundError): + pass + + +class IsADirectoryError(Error, IsADirectoryError): + pass + + +class NotADirectoryError(Error, NotADirectoryError): + pass + + +class NotASymlinkError(Error): + pass + + class EWFError(Error): """Related to EWF (Expert Witness disk image Format)""" diff --git a/tests/_data/ad1/compressed.ad1 b/tests/_data/ad1/compressed.ad1 new file mode 100644 index 0000000..96c93c4 --- /dev/null +++ b/tests/_data/ad1/compressed.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d88b6186b732dd7be752df52ed863bd9d2c273b1c8b2b3520e9032bfa1018a7c +size 2197 diff --git a/tests/_data/ad1/long.ad1 b/tests/_data/ad1/long.ad1 new file mode 100644 index 0000000..2f7f46f --- /dev/null +++ b/tests/_data/ad1/long.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1245a140cfd79870781080d74aeec2f90c9b4530b2ac12e9a3b77c6015262b0f +size 2554 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad1 b/tests/_data/ad1/pcbje/text-and-pictures.ad1 new file mode 100644 index 0000000..434078e --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b48affafe6826f226bb4b3e0c97add2bc8766a6740ad992001515767d955ff8d +size 2097152 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad1.txt b/tests/_data/ad1/pcbje/text-and-pictures.ad1.txt new file mode 100644 index 0000000..2b8ed4a --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad1.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24301f28955b835630b6ba7c026741b6bba307a1f6377ae567c9d4e230d26a93 +size 1051 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad2 b/tests/_data/ad1/pcbje/text-and-pictures.ad2 new file mode 100644 index 0000000..c381c10 --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb53246dec28cf699f68656233138dc7842d789ca2aed7c712b281f19cbb062 +size 2097152 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad3 b/tests/_data/ad1/pcbje/text-and-pictures.ad3 new file mode 100644 index 0000000..0a0754b --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262db84b9d479b6e7ff1e68aafb89739ea55105f03225ef1d69298c72472d05b +size 2097152 diff --git a/tests/_data/ad1/pcbje/text-and-pictures.ad4 b/tests/_data/ad1/pcbje/text-and-pictures.ad4 new file mode 100644 index 0000000..359f06b --- /dev/null +++ b/tests/_data/ad1/pcbje/text-and-pictures.ad4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50791bbb4a8bc374f386d6dbad702a153fa217df0df3b91b73bbf0a960ab8dd +size 1429862 diff --git a/tests/_data/ad1/test.ad1 b/tests/_data/ad1/test.ad1 new file mode 100644 index 0000000..66b0b05 --- /dev/null +++ b/tests/_data/ad1/test.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7b2a1b296a75590fd3f31d2d595cdad6c2442c2f394251c57506e9c488481a +size 2264 diff --git a/tests/_data/ewf/ewf.E01 b/tests/_data/ewf/ewf.E01 new file mode 100644 index 0000000..964a591 --- /dev/null +++ b/tests/_data/ewf/ewf.E01 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9b150a1f40024c7b0c3cf3c09cf809a8636419cab5f55ab4d4f78c918c1e082 +size 7630 diff --git a/tests/_utils.py b/tests/_utils.py new file mode 100644 index 0000000..7ace382 --- /dev/null +++ b/tests/_utils.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from pathlib import Path + + +def absolute_path(filename: str) -> Path: + return Path(__file__).parent.joinpath(filename).resolve() diff --git a/tests/conftest.py b/tests/conftest.py index c8a7ea4..318d0d1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ import pytest from dissect.evidence.asdf import AsdfWriter +from tests._utils import absolute_path if TYPE_CHECKING: from collections.abc import Iterator @@ -19,22 +20,35 @@ def open_data(name: str) -> Iterator[BinaryIO]: @pytest.fixture def ad1_data() -> Iterator[BinaryIO]: - yield from open_data("data/ad1_test.ad1") + yield from open_data("_data/ad1/test.ad1") @pytest.fixture def ad1_data_long() -> Iterator[BinaryIO]: - yield from open_data("data/ad1_long.ad1") + yield from open_data("_data/ad1/long.ad1") @pytest.fixture def ad1_data_compressed() -> Iterator[BinaryIO]: - yield from open_data("data/ad1_test_compressed.ad1") + yield from open_data("_data/ad1/compressed.ad1") + + +@pytest.fixture +def ad1_data_segmented() -> list[BinaryIO]: + return [ + absolute_path("_data/ad1/pcbje/text-and-pictures.ad1").open("rb"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad2").open("rb"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad3").open("rb"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad4").open("rb"), + ] + + + @pytest.fixture def ewf_data() -> Iterator[BinaryIO]: - yield from open_data("data/ewf.E01") + yield from open_data("_data/ewf/ewf.E01") @pytest.fixture diff --git a/tests/data/ad1_long.ad1 b/tests/data/ad1_long.ad1 deleted file mode 100644 index db49960bea3af3612c78aa2214cf2a67b3d65e17..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2554 zcmeHHU2GIp6rL>w%IeZa0SyrxBFVNSZsvDqmJQgYyKbsmYHS;-)WY4~JG0%HZeeGt zB}R0kL|cq8F`~ggei|MqL>?5Qi3VdLMu<-aNf1L&6Tlea4-k!7&)Lq(ZBl*k(Rh)PbNz5rkt8RTUclAOE=uq|&+0YsMpwxuH2s5kbIv$}#%XNw# zB}vyRM#5-!9neYFqmC5`CtWZ(u9b8hnBW>vi+E1b4RD7PgPC;U7J(a-Iuj@sy=_Ofd~v z(Hk)n7)X)`2xegs*JI5bu#g8@Ibb;exYI9>um?DMT@x3n%mH(FU?&IE^T2)%m)+2Y5g5+|x@CG<{jPy;23n^yYzGa@#60vo zDN(Q5MA-5O4l!01ede%Iwe@91)%4kVYEN)T?NI2c7U9$sp7Gc3!g1MP365cYk%@z) z!te+XGZF?PWzCx-CoTptkPawS9!>>DEG7y9n%sgf599A>D9d_GiffuAb#b1v(eS$G zW1Q!#G}NV-BFk}8*%f$VYBOl~CI`%*;c*V&X?Tva&!FKi95921Hw8a$JbMNWS8)JO z!~Vd|)6mSDreWr#gse&`)zkq)HPwN<6*u&RqN%geP-AhG5Yr$D(^4&yQUjbV3RJcT zv5=06p=gYyy1Vbs_Oz$*c^X0R!G88mj~_W!%)YlYUiq-%+<*@`{#1jWdw50mrQJ_2 zylts^4*w>`fs^01)ondMg8+$vo?K!OYwX4OwYYr~z*wqyBzN}2d zvDp1hCobyw$;?3(TeZ3W#ka7|Y+K*5`HS*x?|=WtEf02VZN*j(tLL5bi!XH?UAyAt z3(F@*u$6C}y3iWEV`c7z-ZPipm^j;42mEHQP4@q|e{cHW*~OcB=Ka1T*V)!pt6lZI XAm6one%+UEA5S0Z9ec768b1Cfqjx@6g{OTZ|`g&@3&_U7SaMiXzxd;;-LH=+kv|9M#Mrwb4a0oZktf5E9{$ev~ zxh-roR<7x*Jy1Nlg9Vq-O~3#^dX`1uzID{rQIkZZi?WP5ks(SliXy|*3~7X8!a;A& zJjO9sEX7pJs4htw(%xrGXibC~oWivRJn|(6tm1*I9I%N3kXd|mh=f_0VO`V2?X;N# zmhphW0ku4^hXa;VAozJ3e{EL(LP*Fmm`HCZIsF_5O|uBp4oq!fQZ zP7Tf3LA_$ovPvd#3341&v~*W>ER8s#OEVQqCbq8V$h9h&f6s40*I+O|QCayjQ;xKh za@I3`kWZB}AsqOZa->KD{5%|KaN)tip8u5du$%K>D)eJ7j|*;p-W$M$95@=v=X|=) z0w$(QHp6cID$-Cv{Js2WF2Gz!#I87aSKk~vDAXAYiiYM{5L{qLk?No;blhCGM$|N9 zO;QlYTw1pQ^C6D8lx}5Fi<+V;$x4wi!By$@c@C)3?Qss^b$f=3SLyaF2UO|yM-Hgc zZ9V&D0nY38ZWf1f2D){TPLNAfMY2uPwuqycI#zT--*k)>2UDs)c-=adEgPyKnKrg9 zMMj#W>5f4xB3gtfHdYN+vPGiDIy)aocBSI5K_fM2<*(kk@l&(?$@eyBr4LJmmC(uY zL-lI|cvm6#;aJNNazcpBwFs99#*iR40HU!v;pV;@}HZ4FA#}^!hT;Q9QB2Q z4CBT(%l^ zu4xi~ir9JCd~A!bnq3C+#(Q(hLdXO>uw?Ul2QQyHd*;q){XHMYyyv6mZ?9+FOwElR zr8INLG}W_oM>i2?KCS=?0Foob3zsd#x&SN3BAMlR)QA+8u)XEs&Fk zb5>z1D0r-#YS|Vt43X7jTQoGuGFTgD3L0hQ_- zH=l7Lou9qc>>Kgi;xFiXdouCinw2-)dU4+Ssh1sITzKN*<5zo3&lQI4x}$9uc@gvn z3YSI$#0e&=NZ?Kq_w{+FcvXd$IN=v6a(3QjBNxG@ut3e(c*nDnS7k+x7Z8aAlf%2U z6p+I^odRgyZKwP>ynBHHa(FjN0Xe+;hyrNd{XzUN&Mfb2BwNU~M1j*)Ro5&-P-Rn) zE&QdLreK&jG%Ymm3{B@1QQ=hG)HQ)e5+})qVrdquS(c!iqGEG8YstRG#`{7|;Q$mU z#|o>w(*1S+q2n7v?=F^{51dUUu#^5Li^Ry*6`>ago}Rm8G5;<6eaHvT{or?Rh`sm8 zPU-vUzBnxGT3N+q@l>jIc)K_GYgHlyt2uM9b2K%+p>D8xG`YTf@(3(p%;BD!IB_VF zY-o>0ZSAdl4#S+SZ7o%W z^L78RpZ{F=Ky*tj%yu;|g2n=1QY-{q&EQrMX4cXbH_l5giz5nWKdbmF@(wx}U;p#g5H_%W*{r~^~ diff --git a/tests/data/ewf.E01 b/tests/data/ewf.E01 deleted file mode 100644 index ba270b1331b3852cd6972d59179be6c62d672f8d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7630 zcmZ<_bK~UY`p>|~$iTpmk(!v2T4V%dL4XNJ5D3(vG=xW({K>TPcm-qWglxVh1A&(3 z>-TnZcCB}=U=N z-k#FE>!82pwGDD}4)(2b(77nO>rvUVZBCiXneI>9&bwW1zwRHeeMgpih4Rjde_yWk z+GMtN%=_Ky*&p5|=-x?R=-s(2}XJ?{-xK9<<;|E2cD^~Yb=&o+PVyQ_IG z``zuUW#4{g(s^wA5IvZpnIJ(zYA`7>XIvQe!GuUHlFShHK7PwmzEU%jN70#d4(w_7t`WqwNoy~4t&hb z+Vk>6{<;U6*4e+RpHEvQ6Wi6~e`-;T`O-5NF715y%=rud{zs}-=j6l1?_O?w@vfWu z`iE-~pBY!*(1SbQ_43vF9*a4&gm{>SKG#Y>g4vMG) zl($kLdr5_vLsQ#Z9np-CQ_oNojdDgqU^E1VWC;9V_o*sQO)kkVDu%oNk~oAJC;%5C zis8q?D0*-0zW1YeGz>=5z-Ss6O#`E8U^ESkrh(BkFq#HN)4*sN7)=ACX<#%B4DU3+ zwf{l!qmsm=9Hene20IDJ_(2X)!w3jPrB(GMO!J5wa^Ku_O`UItB(ISqP8R0a-Jq$^9U^K-d9f#OQ!5C=^D)Xb6n>5C9Fx zW+WD8AO}Pfw45nG79@hbg=xxaFQw4%hap8b!(PWf_IqnR7hRh^$B~06`FY62H3Q5( MbhU)|ZcLif0qLtz!vFvP diff --git a/tests/test_ad1.py b/tests/test_ad1.py index 97ed03b..b82cc48 100644 --- a/tests/test_ad1.py +++ b/tests/test_ad1.py @@ -1,30 +1,50 @@ from __future__ import annotations import hashlib +from datetime import datetime, timezone from typing import BinaryIO from dissect.evidence import ad1 +from dissect.evidence.ad1.ad1 import EntryType, MetaType def test_ad1(ad1_data: BinaryIO) -> None: - a = ad1.AD1(ad1_data) + """Test if we can parse a basic non-segmented AD1 file with no file hierarchy.""" - assert a.header.magic == b"ADSEGMENTEDFILE\x00" - assert a.root.name == b"E:\\AD1_test" - assert len(a.root.children) == 2 - assert a.root.children[0].name == b"doc1.txt" - assert a.root.children[0].open().read() == b"Inhoud document 1" + fs = ad1.AD1(ad1_data) + assert fs.segments[0].header.magic == b"ADSEGMENTEDFILE\x00" + + assert fs.root.is_dir() + assert list(fs.root.listdir()) == ["E:"] + + file = fs.get("E:/AD1_test/doc1.txt") + assert file.is_file() + assert file.size == 17 + assert file.atime == datetime(2017, 3, 31, 18, 2, 31, 189682, tzinfo=timezone.utc) + assert file.open().read() == b"Inhoud document 1" def test_ad1_long(ad1_data_long: BinaryIO) -> None: - a = ad1.AD1(ad1_data_long) + """Test if we can parse a basic non-segmented AD1 file with long file names.""" + + fs = ad1.AD1(ad1_data_long) + + assert fs.segments[0].header.magic == b"ADSEGMENTEDFILE\x00" + assert fs.root.is_dir() + + assert [file.name for file in fs.root.children] == ["E:"] - assert a.header.magic == b"ADSEGMENTEDFILE\x00" - assert a.root.name == b"E:\\testdatamap 2 met spaties en een heel stuk langer" - assert len(a.root.children) == 2 + assert [file.name for file in fs.get("E:").children] == [ + "testdatamap 2 met spaties en een heel stuk langer", + ] - entry = a.root.children[0] - assert entry.name == b"een lange filenaam 1 met spaties.txt" + assert [file.name for file in fs.get("E:/testdatamap 2 met spaties en een heel stuk langer").iterdir()] == [ + "een lange filenaam 1 met spaties.txt", + "Een nog langere bestandsnaam met nog meer tekens en 12345.txt", + ] + + entry = fs.get("E:/testdatamap 2 met spaties en een heel stuk langer").children[0] + assert entry.name == "een lange filenaam 1 met spaties.txt" assert entry.open().read() == ( b"masdhdslkfjasdfjlksadjflkjsda;lfj\r\nasdflk\r\na;lsdkf\r\n" b";lasdklf;lkasd\r\n;lk\r\nfask;ldkf\r\n;lka\r\nsd;lkf\r\n" @@ -37,6 +57,68 @@ def test_ad1_long(ad1_data_long: BinaryIO) -> None: def test_ad1_compressed(ad1_data_compressed: BinaryIO) -> None: - a = ad1.AD1(ad1_data_compressed) + """Test if we can parse a non-segmented AD1 file with standard zlib compression.""" + + fs = ad1.AD1(ad1_data_compressed) + + assert fs.segments[0].header.magic == b"ADSEGMENTEDFILE\x00" + + assert fs.get("/").listdir() == ["E:"] + assert fs.get("E:/AD1_test").listdir() == ["doc1.txt", "doc2.txt"] + assert fs.get("E:/AD1_test/doc1.txt").open().read() == b"Inhoud document 1" + + +def test_ad1_segmented(ad1_data_segmented: list[BinaryIO]) -> None: + """Test if we can parse segmented AD1 files. + + References: + - https://github.com/pcbje/pyad1/tree/master/test_data + """ + + fs = ad1.AD1(ad1_data_segmented) + + assert len(fs.segments) == 4 + assert len(fs.stream._runs) == 4 + assert fs.segments[0].number == 1 + assert fs.segments[0].count == 4 + assert fs.segments[0].size == 0x200000 + + assert fs.logical_image.version == 4 + assert fs.logical_image.name == b"C:\\Users\\pcbje\\Desktop\\Data" + + dir = fs.get("C:/Users/pcbje/Desktop/Data/Pictures") + assert dir.is_dir() + assert not dir.is_symlink() + assert not dir.is_file() + assert dir.name == "Pictures" + assert dir.type == EntryType.Directory + assert dir.size == 0 + assert dir.btime == datetime(2018, 5, 2, 7, 34, 11, 284926, tzinfo=timezone.utc) + assert dir.mtime == datetime(2018, 5, 2, 7, 42, 39, 841574, tzinfo=timezone.utc) + assert dir.ctime == datetime(2018, 5, 2, 7, 42, 39, 841574, tzinfo=timezone.utc) + assert dir.atime == datetime(2018, 5, 2, 7, 46, 59, 164650, tzinfo=timezone.utc) + assert dir.listdir() == [ + "0-0-581-Hydrangeas.jpg", + "1-0-858-Chrysanthemum.jpg", + "2-0-826-Desert.jpg", + "4-0-757-Jellyfish.jpg", + "5-0-762-Koala.jpg", + "6-0-548-Lighthouse.jpg", + "7-0-759-Penguins.jpg", + ] + + picture = fs.get("C:/Users/pcbje/Desktop/Data/Pictures/5-0-762-Koala.jpg") + assert picture.is_file() + assert not picture.is_dir() + assert not picture.is_symlink() + assert picture.btime == datetime(2018, 1, 28, 7, 18, 0, tzinfo=timezone.utc) + assert picture.mtime == datetime(2018, 5, 2, 7, 42, 34, 287014, tzinfo=timezone.utc) + assert picture.ctime == datetime(2018, 5, 2, 7, 42, 34, 287014, tzinfo=timezone.utc) + assert picture.atime == datetime(2018, 5, 2, 7, 42, 35, 611785, tzinfo=timezone.utc) - assert a.root.children[0].open().read() == b"Inhoud document 1" + buf = picture.open().read() + assert picture.name == "5-0-762-Koala.jpg" + assert picture.size == 780831 + assert len(buf) == 780831 + assert picture.get_meta(MetaType.SHA1).data == b"9c3dcb1f9185a314ea25d51aed3b5881b32f420c" + assert hashlib.sha1(buf).hexdigest() == "9c3dcb1f9185a314ea25d51aed3b5881b32f420c" diff --git a/tests/test_ewf.py b/tests/test_ewf.py index 08a1da6..1fc428a 100644 --- a/tests/test_ewf.py +++ b/tests/test_ewf.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, BinaryIO from unittest.mock import MagicMock, patch -from dissect.evidence import ewf +from dissect.evidence.ewf import ewf if TYPE_CHECKING: import pytest @@ -16,7 +16,7 @@ def test_ewf(ewf_data: BinaryIO) -> None: assert e.open().read(4097) == (b"\xde\xad\xbe\xef" * 1024) + b"\n" -@patch("dissect.evidence.ewf.Segment") +@patch("dissect.evidence.ewf.ewf.Segment") def test_ewf_open_segment(MockSegment: MagicMock, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(ewf, "MAX_OPEN_SEGMENTS", 2) From 037c000ac9caea8c6038a75e3699561c7e0305fb Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:55:22 +0200 Subject: [PATCH 3/9] add adcrypt implementation --- dissect/evidence/adcrypt/__init__.py | 9 ++ dissect/evidence/adcrypt/adcrypt.py | 143 ++++++++++++++++++ dissect/evidence/adcrypt/c_adcrypt.py | 40 +++++ dissect/evidence/adcrypt/c_adcrypt.pyi | 59 ++++++++ dissect/evidence/tools/adcrypt.py | 66 ++++++++ dissect/evidence/tools/utils.py | 29 ++++ pyproject.toml | 4 + .../ad1/encrypted-certificate/encrypted.ad1 | 3 + .../encrypted-certificate/encrypted.ad1.csv | 3 + .../encrypted-certificate/encrypted.ad1.txt | 3 + .../ad1/encrypted-certificate/encrypted.ad2 | 3 + .../ad1/encrypted-certificate/encrypted.ad3 | 3 + .../ad1/encrypted-certificate/encrypted.ad4 | 3 + .../ad1/encrypted-certificate/encrypted.ad5 | 3 + .../ad1/encrypted-certificate/encrypted.ad6 | 3 + .../ad1/encrypted-certificate/encrypted.ad7 | 3 + tests/_data/ad1/encrypted-certificate/key | 3 + tests/_data/ad1/encrypted-certificate/key.pub | 3 + .../ad1/encrypted-passphrase/encrypted.ad1 | 3 + .../encrypted-passphrase/encrypted.ad1.csv | 3 + .../encrypted-passphrase/encrypted.ad1.txt | 3 + .../ad1/encrypted-passphrase/encrypted.ad10 | 3 + .../ad1/encrypted-passphrase/encrypted.ad11 | 3 + .../ad1/encrypted-passphrase/encrypted.ad12 | 3 + .../ad1/encrypted-passphrase/encrypted.ad13 | 3 + .../ad1/encrypted-passphrase/encrypted.ad2 | 3 + .../ad1/encrypted-passphrase/encrypted.ad3 | 3 + .../ad1/encrypted-passphrase/encrypted.ad4 | 3 + .../ad1/encrypted-passphrase/encrypted.ad5 | 3 + .../ad1/encrypted-passphrase/encrypted.ad6 | 3 + .../ad1/encrypted-passphrase/encrypted.ad7 | 3 + .../ad1/encrypted-passphrase/encrypted.ad8 | 3 + .../ad1/encrypted-passphrase/encrypted.ad9 | 3 + tests/conftest.py | 29 ++++ tests/test_adcrypt.py | 34 +++++ 35 files changed, 491 insertions(+) create mode 100644 dissect/evidence/adcrypt/__init__.py create mode 100644 dissect/evidence/adcrypt/adcrypt.py create mode 100644 dissect/evidence/adcrypt/c_adcrypt.py create mode 100644 dissect/evidence/adcrypt/c_adcrypt.pyi create mode 100644 dissect/evidence/tools/adcrypt.py create mode 100644 dissect/evidence/tools/utils.py create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad1 create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad1.csv create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad1.txt create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad2 create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad3 create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad4 create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad5 create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad6 create mode 100644 tests/_data/ad1/encrypted-certificate/encrypted.ad7 create mode 100644 tests/_data/ad1/encrypted-certificate/key create mode 100644 tests/_data/ad1/encrypted-certificate/key.pub create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad1 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad1.csv create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad1.txt create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad10 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad11 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad12 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad13 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad2 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad3 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad4 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad5 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad6 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad7 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad8 create mode 100644 tests/_data/ad1/encrypted-passphrase/encrypted.ad9 create mode 100644 tests/test_adcrypt.py diff --git a/dissect/evidence/adcrypt/__init__.py b/dissect/evidence/adcrypt/__init__.py new file mode 100644 index 0000000..7284de1 --- /dev/null +++ b/dissect/evidence/adcrypt/__init__.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from dissect.evidence.adcrypt.adcrypt import ADCrypt +from dissect.evidence.exceptions import Error + +__all__ = [ + "ADCrypt", + "Error", +] diff --git a/dissect/evidence/adcrypt/adcrypt.py b/dissect/evidence/adcrypt/adcrypt.py new file mode 100644 index 0000000..7fc1e2d --- /dev/null +++ b/dissect/evidence/adcrypt/adcrypt.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +from pathlib import Path +from typing import BinaryIO + +from dissect.evidence.adcrypt.c_adcrypt import c_adcrypt + +try: + from Crypto import Hash + from Crypto.Cipher import AES, PKCS1_v1_5 + from Crypto.Hash import HMAC + from Crypto.Protocol.KDF import PBKDF2 + from Crypto.PublicKey import RSA + from Crypto.Util import Counter + + HAS_CRYPTO = True +except ImportError: + HAS_CRYPTO = False + + +class ADCrypt: + """Access Data ADCRYPT encrypted container implementation. + + References: + - Reversing adencrypt.dll + - https://github.com/libyal/libewf/blob/main/documentation/Expert%20Witness%20Compression%20Format%20(EWF).asciidoc#7-ad-encryption + - https://github.com/log2timeline/plaso/issues/2726#issuecomment-517444736 + """ + + def __init__(self, fhs: BinaryIO | list[BinaryIO]): + self.fhs = fhs if isinstance(fhs, list) else [fhs] + self.segments: list[ADCryptSegment] = [] + + try: + self.header: c_adcrypt.Header = c_adcrypt.Header(self.fhs[0]) + except EOFError: + raise ValueError("File handle is not an ADCRYPT container: Unable to read ADCRYPT header") + + if self.header.magic != c_adcrypt.ADCRYPT_MAGIC.encode(): + raise ValueError(f"File handle is not an ADCRYPT container: Unknown magic {self.header.magic!r}") + + if self.header.version != 1: + raise ValueError(f"Unsupported ADCRYPT container version {self.header.version!r}") + + for i, fh in enumerate(self.fhs): + self.segments.append(ADCryptSegment(fh, i)) + # TODO: We should probably create a mapping stream. + + def decrypt(self, *, passphrase: str | bytes | None = None, private_key: Path | BinaryIO | None = None) -> None: + """Attempt to decrypt all ADCRYPT segment files. + + Raises: + ImportError if dependencies are missing. + ValueError if decryption failed. + """ + + if not HAS_CRYPTO: + raise ImportError("Missing required dependency 'pycryptodome' for ADCRYPT decryption.") + + if all(segment.decrypted for segment in self.segments): + return + + if not private_key and isinstance(passphrase, str): + passphrase = passphrase.encode() + + # If a private key was used, the passphrase is empty. + passphrase_hash = b"" + + if passphrase and not private_key: + hash = Hash.new(self.header.hash_algo.name) + hash.update(passphrase) + passphrase_hash = hash.digest() + + # If no private key was used, the "encrypted" salt is the plaintext salt as-is. + salt = self.header.enc_salt + + # Decrypt the salt if a private key was provided. + if private_key: + rsa_key = RSA.import_key( + private_key.read_bytes() if isinstance(private_key, Path) else private_key, passphrase + ) + pkcs_cipher = PKCS1_v1_5.new(rsa_key) + if not (salt := pkcs_cipher.decrypt(self.header.enc_salt, sentinel=None, expected_pt_len=16)): + raise ValueError("Failed to decrypt salt using provided private key") + + key_len = self.header.key_len + count = self.header.pbkdf2_count + pkey = PBKDF2(passphrase_hash, salt, key_len, count) + + # Verify the HMAC of EKEY using PKEY + hash algo, comparing with header HMAC + hmac = HMAC.new(pkey, digestmod=Hash.new(self.header.hash_algo.name)) + hmac.update(self.header.enc_key) + try: + hmac.verify(self.header.hmac_enc_key) + except ValueError as e: + raise ValueError("Unable to decrypt: HMAC verification of passphrase failed") from e + + # Decrypt EKEY using PKEY + # TODO: Set counter bit length according to EncAlgo + ctr = Counter.new(128, initial_value=0, little_endian=True) + cipher = AES.new(pkey, AES.MODE_CTR, counter=ctr) + fkey = cipher.decrypt(self.header.enc_key) + self.key = fkey + + for segment in self.segments: + segment.decrypt(self.key) + + +class ADCryptSegment: + def __init__(self, fh: BinaryIO, index: int): + self.index = index + self.fh = fh + self.decrypted = False + + def __repr__(self) -> str: + return f"" + + def decrypt(self, fkey: bytes) -> None: + """Prepare this segment for decrypted reading.""" + + if self.decrypted: + return + + # TODO: Set counter bit length according to EncAlgo + ctr = Counter.new(128, initial_value=self.index << 64, little_endian=True) + cipher = AES.new(fkey, AES.MODE_CTR, counter=ctr) + + # Offset for ADCRYPT header in first segment. + # TODO: We should use the header size as offset, it could be different than 512. + if self.index == 0: + self.fh.seek(512) + + self.key = fkey + self._cipher = cipher + self.decrypted = True + + # TODO: Check for plaintext headers, e.g. b"ADSEGMENTEDFILE", b"ADLOGICALIMAGE", b"EVF\x09\x0d\x0a\xff\x00" + # and b"LVF\x09\x0d\x0a\xff\x00". + + def read(self, blocks: int | None = None) -> bytes: + # TODO: Since AES CTR mode is used, we can seek to an offset of the ciphertext and calculate the counter value + # based on the offset (random block read). + return self._cipher.decrypt(self.fh.read(blocks * 16 if blocks else None)) diff --git a/dissect/evidence/adcrypt/c_adcrypt.py b/dissect/evidence/adcrypt/c_adcrypt.py new file mode 100644 index 0000000..650be9e --- /dev/null +++ b/dissect/evidence/adcrypt/c_adcrypt.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +adcrypt_def = """ +#define ADCRYPT_MAGIC ADCRYPT\00 + +enum EncAlgo : uint32 { + AES128 = 0x1, + AES192 = 0x2, + AES256 = 0x3, +}; + +enum HashAlgo : uint32 { + SHA256 = 0x1, + SHA512 = 0x2, +}; + +struct Header { + char magic[8]; // b"ADCRYPT\x00" + uint32 version; // 0x01 + uint32 header_size; // 0x200 aka offset enc data + int16 num_passwords; // always -0x1 ? + int16 num_raw_keys; // always -0x1 ? + int16 num_certificates; // always -0x1 ? + char reserved[2]; // 00 00 + EncAlgo enc_algo; + HashAlgo hash_algo; + uint32 pbkdf2_count; + uint32 salt_len; + uint32 key_len; + uint32 hmac_len; + char enc_salt[salt_len]; + char enc_key[key_len]; + char hmac_enc_key[hmac_len]; + // padding until 0x200 +}; +""" + +c_adcrypt = cstruct().load(adcrypt_def) diff --git a/dissect/evidence/adcrypt/c_adcrypt.pyi b/dissect/evidence/adcrypt/c_adcrypt.pyi new file mode 100644 index 0000000..1bda902 --- /dev/null +++ b/dissect/evidence/adcrypt/c_adcrypt.pyi @@ -0,0 +1,59 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, overload + +import dissect.cstruct as __cs__ +from typing_extensions import TypeAlias + +class _c_adcrypt(__cs__.cstruct): + ADCRYPT_MAGIC: Literal["ADCRYPT\x00"] = ... + class EncAlgo(__cs__.Enum): + AES128 = ... + AES192 = ... + AES256 = ... + + class HashAlgo(__cs__.Enum): + SHA256 = ... + SHA512 = ... + + class Header(__cs__.Structure): + magic: __cs__.CharArray + version: _c_adcrypt.uint32 + header_size: _c_adcrypt.uint32 + num_passwords: _c_adcrypt.int16 + num_raw_keys: _c_adcrypt.int16 + num_certificates: _c_adcrypt.int16 + reserved: __cs__.CharArray + enc_algo: _c_adcrypt.EncAlgo + hash_algo: _c_adcrypt.HashAlgo + pbkdf2_count: _c_adcrypt.uint32 + salt_len: _c_adcrypt.uint32 + key_len: _c_adcrypt.uint32 + hmac_len: _c_adcrypt.uint32 + enc_salt: __cs__.CharArray + enc_key: __cs__.CharArray + hmac_enc_key: __cs__.CharArray + @overload + def __init__( + self, + magic: __cs__.CharArray | None = ..., + version: _c_adcrypt.uint32 | None = ..., + header_size: _c_adcrypt.uint32 | None = ..., + num_passwords: _c_adcrypt.int16 | None = ..., + num_raw_keys: _c_adcrypt.int16 | None = ..., + num_certificates: _c_adcrypt.int16 | None = ..., + reserved: __cs__.CharArray | None = ..., + enc_algo: _c_adcrypt.EncAlgo | None = ..., + hash_algo: _c_adcrypt.HashAlgo | None = ..., + pbkdf2_count: _c_adcrypt.uint32 | None = ..., + salt_len: _c_adcrypt.uint32 | None = ..., + key_len: _c_adcrypt.uint32 | None = ..., + hmac_len: _c_adcrypt.uint32 | None = ..., + enc_salt: __cs__.CharArray | None = ..., + enc_key: __cs__.CharArray | None = ..., + hmac_enc_key: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_adcrypt` is an instance of `_c_adcrypt`, but then we can't use it in type hints +c_adcrypt: TypeAlias = _c_adcrypt diff --git a/dissect/evidence/tools/adcrypt.py b/dissect/evidence/tools/adcrypt.py new file mode 100644 index 0000000..892f24a --- /dev/null +++ b/dissect/evidence/tools/adcrypt.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import argparse +import logging +from pathlib import Path + +from dissect.evidence.ad1.ad1 import find_files as find_ad1_files +from dissect.evidence.adcrypt.adcrypt import ADCrypt +from dissect.evidence.ewf.ewf import find_files as find_ewf_files +from dissect.evidence.tools.utils import catch_sigpipe + +logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]\t%(message)s") +log = logging.getLogger(__name__) + + +@catch_sigpipe +def main() -> None: + help_formatter = argparse.ArgumentDefaultsHelpFormatter + parser = argparse.ArgumentParser( + prog="adecrypt", + description="Decrypt E01 or AD1 ADCRYPT encrypted segment files.", + fromfile_prefix_chars="@", + formatter_class=help_formatter, + ) + + parser.add_argument("input", type=Path, help="path to encrypted file") + parser.add_argument("-p", "--passphrase", type=str, help="user passphrase or certificate passphrase") + parser.add_argument("-c", "--certificate", type=Path, help="user certificate") + parser.add_argument("-o", "--output", type=Path, required=True, help="path to output file") + + args = parser.parse_args() + + in_path = args.input.resolve() + out_path = args.output.resolve() + + if not in_path.exists(): + parser.exit(f"Input file doesn't exist: {in_path}") + + if not out_path.is_dir(): + parser.exit(f"Output dir does not exist: {out_path}") + + if in_path.parent == out_path: + parser.exit("Output dir cannot be same as parent of input file") + + if not args.passphrase and not args.certificate: + parser.exit("No passphrase or certificate provided") + + segment_paths = find_ad1_files(in_path) if in_path.suffix.lower() == ".ad1" else find_ewf_files(in_path) + + adcrypt = ADCrypt([path.open("rb") for path in segment_paths]) + + try: + adcrypt.decrypt(passphrase=args.passphrase, private_key=args.certificate) + except (ValueError, TypeError) as e: + log.exception(e, exc_info=False) # noqa: TRY401 + parser.exit(1) + + log.info("Calculated decryption keys for %s segment files (%r)", len(segment_paths), segment_paths[0].name) + + for i, segment in enumerate(adcrypt.segments): + with out_path.joinpath(segment_paths[i].name).open("wb") as fh: + size = segment_paths[i].lstat().st_size // 1024 // 1024 + log.info("Decrypting segment file %r (%s MB) ..", segment_paths[i].name, size) + fh.write(segment.read()) + + log.info("Finished decrypting file(s), result saved to %s", out_path) diff --git a/dissect/evidence/tools/utils.py b/dissect/evidence/tools/utils.py new file mode 100644 index 0000000..de18479 --- /dev/null +++ b/dissect/evidence/tools/utils.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +import errno +import os +import sys +from functools import wraps +from typing import Callable + + +def catch_sigpipe(func: Callable) -> Callable: + """Catches ``KeyboardInterrupt`` and ``BrokenPipeError`` (``OSError 22`` on Windows).""" + + @wraps(func) + def wrapper(*args, **kwargs) -> int: + try: + return func(*args, **kwargs) + except KeyboardInterrupt: + print("Aborted!", file=sys.stderr) + return 1 + except OSError as e: + # Only catch BrokenPipeError or OSError 22 + if e.errno in (errno.EPIPE, errno.EINVAL): + devnull = os.open(os.devnull, os.O_WRONLY) + os.dup2(devnull, sys.stdout.fileno()) + return 1 + # Raise other exceptions + raise + + return wrapper diff --git a/pyproject.toml b/pyproject.toml index ecea7f1..eeb4ef5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ classifiers = [ dependencies = [ "dissect.cstruct>=4,<5", "dissect.util>=3,<4", + "pycryptodome>=3.23.0,<4", ] dynamic = ["version"] @@ -39,6 +40,7 @@ repository = "https://github.com/fox-it/dissect.evidence" dev = [ "dissect.cstruct>=4.0.dev,<5.0.dev", "dissect.util>=3.0.dev,<4.0.dev", + "pycryptodome>=3.23.0.dev,<4.0.dev", ] [project.scripts] @@ -47,6 +49,8 @@ asdf-meta = "dissect.evidence.tools.asdf.meta:main" asdf-repair = "dissect.evidence.tools.asdf.repair:main" asdf-verify = "dissect.evidence.tools.asdf.verify:main" +adecrypt = "dissect.evidence.tools.adcrypt:main" + [tool.ruff] line-length = 120 required-version = ">=0.9.0" diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad1 b/tests/_data/ad1/encrypted-certificate/encrypted.ad1 new file mode 100644 index 0000000..e1db39c --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8b641de1a8490bd8dac6dab4b5252358d39ffd07ad71091581efe65e134091 +size 10486272 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad1.csv b/tests/_data/ad1/encrypted-certificate/encrypted.ad1.csv new file mode 100644 index 0000000..9e6a4f5 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad1.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c103df1e0fd8809e542902bec8bd47c139a953a7b1052f985c3bc393321ffb +size 5346 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad1.txt b/tests/_data/ad1/encrypted-certificate/encrypted.ad1.txt new file mode 100644 index 0000000..b9a2011 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad1.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905b94bfb16a1a2abbcc84b6d096ed7d6543024bf420012c810b3c8e9e4df384 +size 1333 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad2 b/tests/_data/ad1/encrypted-certificate/encrypted.ad2 new file mode 100644 index 0000000..0e29206 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b55aa38b1309fa0919d886be0785d8732611a81b706e5e56278dda2c2c733e9a +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad3 b/tests/_data/ad1/encrypted-certificate/encrypted.ad3 new file mode 100644 index 0000000..b72aae7 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07cb83a405d770a55ae1822200d4a97c17825555e98efe46c9aef3a81d4b2471 +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad4 b/tests/_data/ad1/encrypted-certificate/encrypted.ad4 new file mode 100644 index 0000000..dfc462c --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00e980bc5874e0d04931bec2966f7972d0685b99dddda81b91a3e1924e10958 +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad5 b/tests/_data/ad1/encrypted-certificate/encrypted.ad5 new file mode 100644 index 0000000..5f6d8ad --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4094dc5068809feffa4a76726f559c0dc2ab83be0caf999b4dd01e55fec891a +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad6 b/tests/_data/ad1/encrypted-certificate/encrypted.ad6 new file mode 100644 index 0000000..eea05c2 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad6 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6ca8b068dc011bdce2b5d95e6773e1370c717cee6378102fe41a7ee9f81d27 +size 10485760 diff --git a/tests/_data/ad1/encrypted-certificate/encrypted.ad7 b/tests/_data/ad1/encrypted-certificate/encrypted.ad7 new file mode 100644 index 0000000..e20baa7 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/encrypted.ad7 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ab919a239e060a95b0b7ae07b211d4141c3574c6da19054e4e135dd5492397 +size 4879306 diff --git a/tests/_data/ad1/encrypted-certificate/key b/tests/_data/ad1/encrypted-certificate/key new file mode 100644 index 0000000..55ec2fb --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/key @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25ae23283b944d75442d3e68251965360f332d90be0ff4a6e705f14172fb3e5 +size 1679 diff --git a/tests/_data/ad1/encrypted-certificate/key.pub b/tests/_data/ad1/encrypted-certificate/key.pub new file mode 100644 index 0000000..25c9382 --- /dev/null +++ b/tests/_data/ad1/encrypted-certificate/key.pub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2726fe4fe1b5f94c427d320bcc5800c8ada22b5c68172477decbe04af79d6e9d +size 451 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad1 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1 new file mode 100644 index 0000000..a880bd8 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8126f55a545935a465a3b632bbced287b2843fae2a5f398c48d8a98e1bdbd26a +size 5243392 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.csv b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.csv new file mode 100644 index 0000000..fc1c322 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b35e282a4e3779daf0064ea2cc8151ce5595b4785553d9cae41c80cb88b39bc7 +size 4442 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.txt b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.txt new file mode 100644 index 0000000..5092ebd --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad1.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f365b39abd294b05f970a257bd5ce139422db8413b1aa160f4d396125c596756 +size 1591 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad10 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad10 new file mode 100644 index 0000000..2b6b155 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad10 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd03b931de0bc28660aa2ce48837483ec11d84ee40bacbaabc8128e1d10e6227 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad11 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad11 new file mode 100644 index 0000000..8ddd568 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad11 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5f556af11969a548d7885582424d10aa9c4419f59f5a82430c535df196fd78 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad12 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad12 new file mode 100644 index 0000000..d333422 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad12 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb8d89a1fc55d10afafeb4e547d950b24a21e299108bc3495c5efc66b81d516 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad13 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad13 new file mode 100644 index 0000000..8f9c02d --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad13 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79d0afc7c4c1be7d538caea99fcc076e8f70f6dd7f1fa7db8594d508ced8156 +size 4879997 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad2 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad2 new file mode 100644 index 0000000..e8a7ca4 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11fc38388348b8634a37d18076630b3b76b19ff06a6598cffe8ea8c6945ed53 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad3 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad3 new file mode 100644 index 0000000..8664b80 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2dc33da7512f67d3328c9fa324560e191bbb8e1d9e2d33b8ffcb7972715771 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad4 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad4 new file mode 100644 index 0000000..43437b3 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c407b3c6efb7b52f47a3c481c3386b2b9581cdd8bbde3bd239212199c68cd221 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad5 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad5 new file mode 100644 index 0000000..1cc85cf --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5486f180f12f40d608615acb61bcca4b707ecfb63786059eec6d43d37bcf8f +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad6 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad6 new file mode 100644 index 0000000..08bb7f9 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad6 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98843957581e49c0a11d946d01001eb34e4a733b2abfa6ed15f4af5f0e4ab5e0 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad7 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad7 new file mode 100644 index 0000000..d977c17 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad7 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37671e847c1f69a284e338c27c7003907f3b25720c22b77bb744c0d32b3c8520 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad8 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad8 new file mode 100644 index 0000000..2253c79 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad8 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25e381827e9ce27902951453d78e0e9b315b539efa6da9f9411f76c4e5d1530 +size 5242880 diff --git a/tests/_data/ad1/encrypted-passphrase/encrypted.ad9 b/tests/_data/ad1/encrypted-passphrase/encrypted.ad9 new file mode 100644 index 0000000..1227e24 --- /dev/null +++ b/tests/_data/ad1/encrypted-passphrase/encrypted.ad9 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f15ca2877bb31870c68cf4997565a33150d0ceaed2aee731aaff72b24f74681 +size 5242880 diff --git a/tests/conftest.py b/tests/conftest.py index 318d0d1..67fda08 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -43,7 +43,36 @@ def ad1_data_segmented() -> list[BinaryIO]: ] +@pytest.fixture +def ad1_data_encrypted_passphrase() -> list[BinaryIO]: + return [ + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad1").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad2").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad3").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad4").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad5").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad6").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad7").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad8").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad9").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad10").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad11").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad12").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad13").open("rb"), + ] + +@pytest.fixture +def ad1_data_encrypted_certificate() -> list[BinaryIO]: + return [ + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad1").open("rb"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad2").open("rb"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad3").open("rb"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad4").open("rb"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad5").open("rb"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad6").open("rb"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad7").open("rb"), + ] @pytest.fixture diff --git a/tests/test_adcrypt.py b/tests/test_adcrypt.py new file mode 100644 index 0000000..ca32763 --- /dev/null +++ b/tests/test_adcrypt.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from typing import BinaryIO + +from dissect.evidence.adcrypt.adcrypt import ADCrypt +from tests._utils import absolute_path + + +def test_adcrypt_ad1_passphrase(ad1_data_encrypted_passphrase: list[BinaryIO]) -> None: + """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" + + adcrypt = ADCrypt(ad1_data_encrypted_passphrase) + adcrypt.decrypt(passphrase="password") + + assert adcrypt.key.hex() == "9030a43f29689a045e815cf4f0ad82b68850063b414f2797f0897e188f98d7b4" + assert all(segment.decrypted for segment in adcrypt.segments) + + plain = adcrypt.segments[0].read(512) + assert plain.startswith(b"ADSEGMENTEDFILE") + assert b"ADLOGICALIMAGE" in plain + + +def test_adcrypt_ad1_certificate(ad1_data_encrypted_certificate: list[BinaryIO]) -> None: + """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" + + adcrypt = ADCrypt(ad1_data_encrypted_certificate) + adcrypt.decrypt(private_key=absolute_path("_data/ad1/encrypted-certificate/key")) + + assert adcrypt.key.hex() == "6cc0a9f94f944381cc51be474e5da6178059324bb457a87e0035b80f80ff9d4b" + assert all(segment.decrypted for segment in adcrypt.segments) + + plain = adcrypt.segments[0].read(512) + assert plain.startswith(b"ADSEGMENTEDFILE") + assert b"ADLOGICALIMAGE" in plain From a26647d6a76480ce3285a3c93635d1a45288de4f Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:56:06 +0200 Subject: [PATCH 4/9] fix linter and add tests for ad1.find_files --- dissect/evidence/ad1/ad1.py | 7 +++-- dissect/evidence/ad1/c_ad1.pyi | 56 +++++++++++++++++++++++++++++++--- dissect/evidence/asdf/asdf.pyi | 40 +++++++++++++++++++++--- dissect/evidence/ewf/c_ewf.pyi | 2 +- pyproject.toml | 1 + tests/test_ad1.py | 46 +++++++++++++++++++++++++++- 6 files changed, 139 insertions(+), 13 deletions(-) diff --git a/dissect/evidence/ad1/ad1.py b/dissect/evidence/ad1/ad1.py index 099b826..b404dc7 100644 --- a/dissect/evidence/ad1/ad1.py +++ b/dissect/evidence/ad1/ad1.py @@ -20,12 +20,15 @@ MetaType = c_ad1.MetaType FileClassType = c_ad1.FileClassType + def atoi(text: str) -> int | str: return int(text) if text.isdigit() else text -def natural_keys(text: str | Path) -> list[int|str]: + +def natural_keys(text: str | Path) -> list[int | str]: return [atoi(c) for c in re.split(r"(\d+)", str(text))] + def find_files(path: Path) -> set[Path]: files = set() for file in path.parent.iterdir(): @@ -145,7 +148,7 @@ def __init__(self, fh: BinaryIO): self.chunk_size = self.header.chunk_size def __repr__(self) -> str: - return f"" + return f"" # noqa: E501 class FileEntry: diff --git a/dissect/evidence/ad1/c_ad1.pyi b/dissect/evidence/ad1/c_ad1.pyi index 5f60abe..864dc95 100644 --- a/dissect/evidence/ad1/c_ad1.pyi +++ b/dissect/evidence/ad1/c_ad1.pyi @@ -95,7 +95,17 @@ class _c_ad1(__cs__.cstruct): logical_image_offset: _c_ad1.uint32 padding: __cs__.CharArray @overload - def __init__(self, magic: __cs__.CharArray | None = ..., unk1: _c_ad1.uint32 | None = ..., unk2: _c_ad1.uint32 | None = ..., segment_number: _c_ad1.uint32 | None = ..., segment_count: _c_ad1.uint32 | None = ..., segment_size: _c_ad1.uint64 | None = ..., logical_image_offset: _c_ad1.uint32 | None = ..., padding: __cs__.CharArray | None = ...): ... + def __init__( + self, + magic: __cs__.CharArray | None = ..., + unk1: _c_ad1.uint32 | None = ..., + unk2: _c_ad1.uint32 | None = ..., + segment_number: _c_ad1.uint32 | None = ..., + segment_count: _c_ad1.uint32 | None = ..., + segment_size: _c_ad1.uint64 | None = ..., + logical_image_offset: _c_ad1.uint32 | None = ..., + padding: __cs__.CharArray | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -115,7 +125,23 @@ class _c_ad1(__cs__.cstruct): unk3: _c_ad1.uint64 name: __cs__.CharArray @overload - def __init__(self, magic: __cs__.CharArray | None = ..., version: _c_ad1.uint32 | None = ..., unk1: _c_ad1.uint32 | None = ..., chunk_size: _c_ad1.uint32 | None = ..., metadata_offset: _c_ad1.uint64 | None = ..., first_file_offset: _c_ad1.uint64 | None = ..., name_len: _c_ad1.uint32 | None = ..., unk_magic: __cs__.CharArray | None = ..., name_offset: _c_ad1.uint64 | None = ..., attr_guid_offset: _c_ad1.uint64 | None = ..., unk2: _c_ad1.uint64 | None = ..., locs_guid_offset: _c_ad1.uint64 | None = ..., unk3: _c_ad1.uint64 | None = ..., name: __cs__.CharArray | None = ...): ... + def __init__( + self, + magic: __cs__.CharArray | None = ..., + version: _c_ad1.uint32 | None = ..., + unk1: _c_ad1.uint32 | None = ..., + chunk_size: _c_ad1.uint32 | None = ..., + metadata_offset: _c_ad1.uint64 | None = ..., + first_file_offset: _c_ad1.uint64 | None = ..., + name_len: _c_ad1.uint32 | None = ..., + unk_magic: __cs__.CharArray | None = ..., + name_offset: _c_ad1.uint64 | None = ..., + attr_guid_offset: _c_ad1.uint64 | None = ..., + unk2: _c_ad1.uint64 | None = ..., + locs_guid_offset: _c_ad1.uint64 | None = ..., + unk3: _c_ad1.uint64 | None = ..., + name: __cs__.CharArray | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -130,7 +156,18 @@ class _c_ad1(__cs__.cstruct): name: __cs__.CharArray parent_index: _c_ad1.uint64 @overload - def __init__(self, next: _c_ad1.uint64 | None = ..., child: _c_ad1.uint64 | None = ..., meta: _c_ad1.uint64 | None = ..., zlib_meta: _c_ad1.uint64 | None = ..., size: _c_ad1.uint64 | None = ..., type: _c_ad1.EntryType | None = ..., name_len: _c_ad1.uint32 | None = ..., name: __cs__.CharArray | None = ..., parent_index: _c_ad1.uint64 | None = ...): ... + def __init__( + self, + next: _c_ad1.uint64 | None = ..., + child: _c_ad1.uint64 | None = ..., + meta: _c_ad1.uint64 | None = ..., + zlib_meta: _c_ad1.uint64 | None = ..., + size: _c_ad1.uint64 | None = ..., + type: _c_ad1.EntryType | None = ..., + name_len: _c_ad1.uint32 | None = ..., + name: __cs__.CharArray | None = ..., + parent_index: _c_ad1.uint64 | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -138,7 +175,9 @@ class _c_ad1(__cs__.cstruct): num_chunks: _c_ad1.uint64 chunks: __cs__.Array[_c_ad1.uint64] @overload - def __init__(self, num_chunks: _c_ad1.uint64 | None = ..., chunks: __cs__.Array[_c_ad1.uint64] | None = ...): ... + def __init__( + self, num_chunks: _c_ad1.uint64 | None = ..., chunks: __cs__.Array[_c_ad1.uint64] | None = ... + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -149,7 +188,14 @@ class _c_ad1(__cs__.cstruct): len: _c_ad1.uint32 data: __cs__.CharArray @overload - def __init__(self, next: _c_ad1.uint64 | None = ..., category: _c_ad1.uint32 | None = ..., type: _c_ad1.MetaType | None = ..., len: _c_ad1.uint32 | None = ..., data: __cs__.CharArray | None = ...): ... + def __init__( + self, + next: _c_ad1.uint64 | None = ..., + category: _c_ad1.uint32 | None = ..., + type: _c_ad1.MetaType | None = ..., + len: _c_ad1.uint32 | None = ..., + data: __cs__.CharArray | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... diff --git a/dissect/evidence/asdf/asdf.pyi b/dissect/evidence/asdf/asdf.pyi index 420d56b..5f028d9 100644 --- a/dissect/evidence/asdf/asdf.pyi +++ b/dissect/evidence/asdf/asdf.pyi @@ -21,7 +21,16 @@ class _c_asdf(__cs__.cstruct): reserved2: __cs__.CharArray guid: __cs__.CharArray @overload - def __init__(self, magic: __cs__.CharArray | None = ..., flags: _c_asdf.FILE_FLAG | None = ..., version: _c_asdf.uint8 | None = ..., reserved1: __cs__.CharArray | None = ..., timestamp: _c_asdf.uint64 | None = ..., reserved2: __cs__.CharArray | None = ..., guid: __cs__.CharArray | None = ...): ... + def __init__( + self, + magic: __cs__.CharArray | None = ..., + flags: _c_asdf.FILE_FLAG | None = ..., + version: _c_asdf.uint8 | None = ..., + reserved1: __cs__.CharArray | None = ..., + timestamp: _c_asdf.uint64 | None = ..., + reserved2: __cs__.CharArray | None = ..., + guid: __cs__.CharArray | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -33,7 +42,15 @@ class _c_asdf(__cs__.cstruct): offset: _c_asdf.uint64 size: _c_asdf.uint64 @overload - def __init__(self, magic: __cs__.CharArray | None = ..., flags: _c_asdf.BLOCK_FLAG | None = ..., idx: _c_asdf.uint8 | None = ..., reserved: __cs__.CharArray | None = ..., offset: _c_asdf.uint64 | None = ..., size: _c_asdf.uint64 | None = ...): ... + def __init__( + self, + magic: __cs__.CharArray | None = ..., + flags: _c_asdf.BLOCK_FLAG | None = ..., + idx: _c_asdf.uint8 | None = ..., + reserved: __cs__.CharArray | None = ..., + offset: _c_asdf.uint64 | None = ..., + size: _c_asdf.uint64 | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -46,7 +63,16 @@ class _c_asdf(__cs__.cstruct): file_offset: _c_asdf.uint64 file_size: _c_asdf.uint64 @overload - def __init__(self, flags: _c_asdf.BLOCK_FLAG | None = ..., idx: _c_asdf.uint8 | None = ..., reserved: __cs__.CharArray | None = ..., offset: _c_asdf.uint64 | None = ..., size: _c_asdf.uint64 | None = ..., file_offset: _c_asdf.uint64 | None = ..., file_size: _c_asdf.uint64 | None = ...): ... + def __init__( + self, + flags: _c_asdf.BLOCK_FLAG | None = ..., + idx: _c_asdf.uint8 | None = ..., + reserved: __cs__.CharArray | None = ..., + offset: _c_asdf.uint64 | None = ..., + size: _c_asdf.uint64 | None = ..., + file_offset: _c_asdf.uint64 | None = ..., + file_size: _c_asdf.uint64 | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -56,7 +82,13 @@ class _c_asdf(__cs__.cstruct): table_offset: _c_asdf.uint64 sha256: __cs__.CharArray @overload - def __init__(self, magic: __cs__.CharArray | None = ..., reserved: __cs__.CharArray | None = ..., table_offset: _c_asdf.uint64 | None = ..., sha256: __cs__.CharArray | None = ...): ... + def __init__( + self, + magic: __cs__.CharArray | None = ..., + reserved: __cs__.CharArray | None = ..., + table_offset: _c_asdf.uint64 | None = ..., + sha256: __cs__.CharArray | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... diff --git a/dissect/evidence/ewf/c_ewf.pyi b/dissect/evidence/ewf/c_ewf.pyi index bf507e7..46b1abb 100644 --- a/dissect/evidence/ewf/c_ewf.pyi +++ b/dissect/evidence/ewf/c_ewf.pyi @@ -19,7 +19,7 @@ class _c_ewf(__cs__.cstruct): Tablaeu = ... class CompressionLevel(__cs__.Enum): - None = ... + # None = ... Good = ... Best = ... diff --git a/pyproject.toml b/pyproject.toml index eeb4ef5..f30bbff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,7 @@ ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM1 [tool.ruff.lint.per-file-ignores] "tests/_docs/**" = ["INP001"] +"*.pyi" = ["E", "F", "PYI"] [tool.ruff.lint.isort] known-first-party = ["dissect.evidence"] diff --git a/tests/test_ad1.py b/tests/test_ad1.py index b82cc48..3b3b12c 100644 --- a/tests/test_ad1.py +++ b/tests/test_ad1.py @@ -4,8 +4,11 @@ from datetime import datetime, timezone from typing import BinaryIO +import pytest + from dissect.evidence import ad1 -from dissect.evidence.ad1.ad1 import EntryType, MetaType +from dissect.evidence.ad1.ad1 import EntryType, MetaType, find_files +from tests._utils import absolute_path def test_ad1(ad1_data: BinaryIO) -> None: @@ -68,6 +71,47 @@ def test_ad1_compressed(ad1_data_compressed: BinaryIO) -> None: assert fs.get("E:/AD1_test/doc1.txt").open().read() == b"Inhoud document 1" +@pytest.mark.parametrize( + ("path", "expected_files"), + [ + pytest.param( + "_data/ad1/pcbje/text-and-pictures.ad1", + [ + "text-and-pictures.ad1", + "text-and-pictures.ad2", + "text-and-pictures.ad3", + "text-and-pictures.ad4", + ], + id="segmented-simple", + ), + pytest.param( + "_data/ad1/encrypted-passphrase/encrypted.ad1", + [ + "encrypted.ad1", + "encrypted.ad2", + "encrypted.ad3", + "encrypted.ad4", + "encrypted.ad5", + "encrypted.ad6", + "encrypted.ad7", + "encrypted.ad8", + "encrypted.ad9", + "encrypted.ad10", + "encrypted.ad11", + "encrypted.ad12", + "encrypted.ad13", + ], + id="segmented-natural-sorting", + ), + ], +) +def test_ad1_find_files(path: str, expected_files: list[str]) -> None: + """Test if we correctly find and order segmented AD1 files and do not find .txt or .csv artifact files.""" + + files = find_files(absolute_path(path)) + assert [file.name for file in files] == expected_files + + def test_ad1_segmented(ad1_data_segmented: list[BinaryIO]) -> None: """Test if we can parse segmented AD1 files. From fed891c918957dde48063d6b92bb09f0b71b7e3e Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 28 Oct 2025 15:28:36 +0100 Subject: [PATCH 5/9] implement review feedback --- dissect/evidence/ad1/ad1.py | 37 +++++++++-- dissect/evidence/asdf/asdf.py | 50 +------------- dissect/evidence/asdf/c_asdf.py | 52 +++++++++++++++ .../evidence/asdf/{asdf.pyi => c_asdf.pyi} | 3 +- dissect/evidence/ewf/c_ewf.pyi | 65 +++++++++++++++++-- dissect/evidence/tools/adcrypt.py | 4 +- dissect/evidence/tools/{utils.py => util.py} | 0 pyproject.toml | 5 +- tests/_docs/conf.py | 1 - 9 files changed, 148 insertions(+), 69 deletions(-) create mode 100644 dissect/evidence/asdf/c_asdf.py rename dissect/evidence/asdf/{asdf.pyi => c_asdf.pyi} (97%) rename dissect/evidence/tools/{utils.py => util.py} (100%) diff --git a/dissect/evidence/ad1/ad1.py b/dissect/evidence/ad1/ad1.py index b404dc7..f79c4b7 100644 --- a/dissect/evidence/ad1/ad1.py +++ b/dissect/evidence/ad1/ad1.py @@ -40,7 +40,7 @@ def find_files(path: Path) -> set[Path]: class AD1: """AccessData Logical Image (AD1v4) implementation. - Supports ``zlib`` compressed images. Does not support encrypted (``b"ADCRYPT"``) images. + Supports ``zlib`` compressed images. Does not directly support encrypted (``b"ADCRYPT"``) images. Should be initialized using a list of segment files, e.g.:: @@ -70,7 +70,6 @@ def __init__(self, fh: BinaryIO | list[BinaryIO]): self.segments.append(segment) # Add the segment file handle to the mapping stream, minus the segment header. - # TODO: Does this work as expected? self.stream.add(self.stream.size or 0, segment.header.segment_size - 512, fh, 512) # The first .ad1 file contains an image header @@ -78,7 +77,8 @@ def __init__(self, fh: BinaryIO | list[BinaryIO]): self.logical_image = AD1LogicalImage(RelativeStream(self.fhs[0], offset)) # NOTE: Unnecesary RelativeStream? self.root = FileEntry(self, -1, is_root=True, root_name="/") - # Add entries for all parts in logical_image.name + # Add entries for all parts in logical_image.name. This name commonly contains the full path each entry in the + # container is relative to. root_name = self.logical_image.header.name.decode() root_path = ( PureWindowsPath(root_name) if "/" not in root_name and "\\" in root_name else PurePosixPath(root_name) @@ -103,6 +103,15 @@ def __init__(self, fh: BinaryIO | list[BinaryIO]): offset = child.entry.next def entry(self, path: str) -> FileEntry: + """Return a :class:`FileEntry` based on the given absolute `path`. + + Raises: + FileNotFoundError if the given `path` is not found in the `AD1` container. + + Returns: + :class:`FileEntry` when the given `path` is found. + """ + components = path.lstrip("/").split("/") current = self.root @@ -120,13 +129,19 @@ def entry(self, path: str) -> FileEntry: raise FileNotFoundError(f"Path not found: {path}") def get(self, path: str) -> FileEntry: + """Shortcut method to ``AD1.entry()`` for the given ``path``.""" + return self.entry(path) def open(self, path: str) -> FileObject: + """Shortcut method to ``FileEntry.open()`` for the given ``path``.""" + return self.entry(path).open() class AD1SegmentFile: + """Represents an AD1 segmented file.""" + def __init__(self, fh: BinaryIO): self.fh = fh self.header = c_ad1.SegmentedFileHeader(fh) @@ -139,6 +154,8 @@ def __repr__(self) -> str: class AD1LogicalImage: + """Represents an AD1 logical image.""" + def __init__(self, fh: BinaryIO): self.fh = fh self.header = c_ad1.LogicalImageHeader(fh) @@ -152,6 +169,8 @@ def __repr__(self) -> str: class FileEntry: + """Represents a file entry in an AD1 logical image.""" + def __init__(self, ad1: AD1, offset: int, is_root: bool = False, root_name: str | None = None): self.ad1 = ad1 self.offset = offset @@ -275,6 +294,8 @@ def convert_ts(input: bytes) -> datetime: class FileMeta: + """Represents a single AD1 logical file metadata item found inside :class:`FileEntry`.""" + def __init__(self, stream: MappingStream, offset: int): self.stream = stream self.offset = offset @@ -292,10 +313,15 @@ def __repr__(self) -> str: # TODO: Can we just use ZlibStream from dissect.util.stream? class FileObject(AlignedStream): + """Custom stream format implementation for AD1 :class:`FileEntry` file contents.""" + def __init__(self, entry: FileEntry): self.entry = entry super().__init__(self.entry.size, self.entry.ad1.logical_image.chunk_size) + self.entry.ad1.stream.seek(self.entry.entry.zlib_meta) + self.chunks = c_ad1.FileEntryChunks(self.entry.ad1.stream).chunks + def _read(self, offset: int, length: int) -> bytes: r = [] fh = self.entry.ad1.stream @@ -304,10 +330,7 @@ def _read(self, offset: int, length: int) -> bytes: chunk = offset // chunk_size chunk_count = (length + chunk_size - 1) // chunk_size - self.entry.ad1.stream.seek(self.entry.entry.zlib_meta) - chunks = c_ad1.FileEntryChunks(self.entry.ad1.stream).chunks - - chunk_offsets = chunks[chunk : chunk + chunk_count + 1] + chunk_offsets = self.chunks[chunk : chunk + chunk_count + 1] if len(chunk_offsets) != chunk_count + 1: chunk_offsets.append(self.entry.entry.meta) diff --git a/dissect/evidence/asdf/asdf.py b/dissect/evidence/asdf/asdf.py index f160db8..b59cf41 100644 --- a/dissect/evidence/asdf/asdf.py +++ b/dissect/evidence/asdf/asdf.py @@ -11,10 +11,10 @@ from collections import defaultdict from typing import TYPE_CHECKING, BinaryIO -from dissect.cstruct import cstruct from dissect.util import ts from dissect.util.stream import AlignedStream, RangeStream +from dissect.evidence.asdf.c_asdf import c_asdf from dissect.evidence.asdf.streams import CompressedStream, Crc32Stream, HashedStream from dissect.evidence.exceptions import ( InvalidBlock, @@ -41,54 +41,6 @@ FOOTER_MAGIC = b"FT\xa5\xdf" SPARSE_BYTES = b"\xa5\xdf" -asdf_def = """ -flag FILE_FLAG : uint32 { - SHA256 = 0x01, -}; - -flag BLOCK_FLAG : uint8 { - CRC32 = 0x01, - COMPRESS = 0x02, -}; - -struct header { - char magic[4]; // File magic, must be "ASDF" - FILE_FLAG flags; // File flags - uint8 version; // File version - char reserved1[7]; // Reserved - uint64 timestamp; // Creation timestamp of the file - char reserved2[8]; // Reserved - char guid[16]; // GUID, should be unique per writer -}; - -struct block { - char magic[4]; // Block magic, must be "BL\\xa5\\xdf" - BLOCK_FLAG flags; // Block flags - uint8 idx; // Stream index, some reserved values have special meaning - char reserved[2]; // Reserved - uint64 offset; // Absolute offset of block in stream - uint64 size; // Size of block in stream -}; - -struct table_entry { - BLOCK_FLAG flags; // Block flags - uint8 idx; // Stream index, some reserved values have special meaning - char reserved[2]; // Reserved - uint64 offset; // Absolute offset of block in stream - uint64 size; // Size of block in stream - uint64 file_offset; // Absolute offset of block in file - uint64 file_size; // Size of block in file -}; - -struct footer { - char magic[4]; // Footer magic, must be "FT\\xa5\\xdf" - char reserved[4]; // Reserved - uint64 table_offset; // Offset in file to start of block table - char sha256[32]; // SHA256 of this file up until this hash -}; -""" -c_asdf = cstruct().load(asdf_def) - class AsdfWriter(io.RawIOBase): """ASDF file writer. diff --git a/dissect/evidence/asdf/c_asdf.py b/dissect/evidence/asdf/c_asdf.py new file mode 100644 index 0000000..8da56ce --- /dev/null +++ b/dissect/evidence/asdf/c_asdf.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +asdf_def = """ +flag FILE_FLAG : uint32 { + SHA256 = 0x01, +}; + +flag BLOCK_FLAG : uint8 { + CRC32 = 0x01, + COMPRESS = 0x02, +}; + +struct header { + char magic[4]; // File magic, must be "ASDF" + FILE_FLAG flags; // File flags + uint8 version; // File version + char reserved1[7]; // Reserved + uint64 timestamp; // Creation timestamp of the file + char reserved2[8]; // Reserved + char guid[16]; // GUID, should be unique per writer +}; + +struct block { + char magic[4]; // Block magic, must be "BL\\xa5\\xdf" + BLOCK_FLAG flags; // Block flags + uint8 idx; // Stream index, some reserved values have special meaning + char reserved[2]; // Reserved + uint64 offset; // Absolute offset of block in stream + uint64 size; // Size of block in stream +}; + +struct table_entry { + BLOCK_FLAG flags; // Block flags + uint8 idx; // Stream index, some reserved values have special meaning + char reserved[2]; // Reserved + uint64 offset; // Absolute offset of block in stream + uint64 size; // Size of block in stream + uint64 file_offset; // Absolute offset of block in file + uint64 file_size; // Size of block in file +}; + +struct footer { + char magic[4]; // Footer magic, must be "FT\\xa5\\xdf" + char reserved[4]; // Reserved + uint64 table_offset; // Offset in file to start of block table + char sha256[32]; // SHA256 of this file up until this hash +}; +""" + +c_asdf = cstruct().load(asdf_def) diff --git a/dissect/evidence/asdf/asdf.pyi b/dissect/evidence/asdf/c_asdf.pyi similarity index 97% rename from dissect/evidence/asdf/asdf.pyi rename to dissect/evidence/asdf/c_asdf.pyi index 5f028d9..58c6f12 100644 --- a/dissect/evidence/asdf/asdf.pyi +++ b/dissect/evidence/asdf/c_asdf.pyi @@ -1,8 +1,7 @@ # Generated by cstruct-stubgen -from typing import BinaryIO, Literal, overload +from typing import BinaryIO, TypeAlias, overload import dissect.cstruct as __cs__ -from typing_extensions import TypeAlias class _c_asdf(__cs__.cstruct): class FILE_FLAG(__cs__.Flag): diff --git a/dissect/evidence/ewf/c_ewf.pyi b/dissect/evidence/ewf/c_ewf.pyi index 46b1abb..9198c02 100644 --- a/dissect/evidence/ewf/c_ewf.pyi +++ b/dissect/evidence/ewf/c_ewf.pyi @@ -29,7 +29,13 @@ class _c_ewf(__cs__.cstruct): segment_number: _c_ewf.uint16 fields_end: _c_ewf.uint16 @overload - def __init__(self, signature: __cs__.CharArray | None = ..., fields_start: _c_ewf.uint8 | None = ..., segment_number: _c_ewf.uint16 | None = ..., fields_end: _c_ewf.uint16 | None = ...): ... + def __init__( + self, + signature: __cs__.CharArray | None = ..., + fields_start: _c_ewf.uint8 | None = ..., + segment_number: _c_ewf.uint16 | None = ..., + fields_end: _c_ewf.uint16 | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -40,7 +46,14 @@ class _c_ewf(__cs__.cstruct): pad: __cs__.Array[_c_ewf.uint8] checksum: _c_ewf.uint32 @overload - def __init__(self, type: __cs__.CharArray | None = ..., next: _c_ewf.uint64 | None = ..., size: _c_ewf.uint64 | None = ..., pad: __cs__.Array[_c_ewf.uint8] | None = ..., checksum: _c_ewf.uint32 | None = ...): ... + def __init__( + self, + type: __cs__.CharArray | None = ..., + next: _c_ewf.uint64 | None = ..., + size: _c_ewf.uint64 | None = ..., + pad: __cs__.Array[_c_ewf.uint8] | None = ..., + checksum: _c_ewf.uint32 | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -55,7 +68,18 @@ class _c_ewf(__cs__.cstruct): signature: __cs__.CharArray checksum: _c_ewf.uint32 @overload - def __init__(self, reserved_1: _c_ewf.uint32 | None = ..., chunk_count: _c_ewf.uint32 | None = ..., sector_count: _c_ewf.uint32 | None = ..., sector_size: _c_ewf.uint32 | None = ..., total_sector_count: _c_ewf.uint32 | None = ..., reserved: __cs__.Array[_c_ewf.uint8] | None = ..., pad: __cs__.Array[_c_ewf.uint8] | None = ..., signature: __cs__.CharArray | None = ..., checksum: _c_ewf.uint32 | None = ...): ... + def __init__( + self, + reserved_1: _c_ewf.uint32 | None = ..., + chunk_count: _c_ewf.uint32 | None = ..., + sector_count: _c_ewf.uint32 | None = ..., + sector_size: _c_ewf.uint32 | None = ..., + total_sector_count: _c_ewf.uint32 | None = ..., + reserved: __cs__.Array[_c_ewf.uint8] | None = ..., + pad: __cs__.Array[_c_ewf.uint8] | None = ..., + signature: __cs__.CharArray | None = ..., + checksum: _c_ewf.uint32 | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -83,7 +107,31 @@ class _c_ewf(__cs__.cstruct): signature: __cs__.CharArray checksum: _c_ewf.uint32 @overload - def __init__(self, media_type: _c_ewf.MediaType | None = ..., reserved_1: __cs__.Array[_c_ewf.uint8] | None = ..., chunk_count: _c_ewf.uint32 | None = ..., sector_count: _c_ewf.uint32 | None = ..., sector_size: _c_ewf.uint32 | None = ..., total_sector_count: _c_ewf.uint64 | None = ..., num_cylinders: _c_ewf.uint32 | None = ..., num_heads: _c_ewf.uint32 | None = ..., num_sectors: _c_ewf.uint32 | None = ..., media_flags: _c_ewf.uint8 | None = ..., unknown_1: __cs__.Array[_c_ewf.uint8] | None = ..., palm_start_sector: _c_ewf.uint32 | None = ..., unknown_2: _c_ewf.uint32 | None = ..., smart_start_sector: _c_ewf.uint32 | None = ..., compression_level: _c_ewf.CompressionLevel | None = ..., unknown_3: __cs__.Array[_c_ewf.uint8] | None = ..., error_granularity: _c_ewf.uint32 | None = ..., unknown_4: _c_ewf.uint32 | None = ..., uuid: __cs__.Array[_c_ewf.uint8] | None = ..., pad: __cs__.Array[_c_ewf.uint8] | None = ..., signature: __cs__.CharArray | None = ..., checksum: _c_ewf.uint32 | None = ...): ... + def __init__( + self, + media_type: _c_ewf.MediaType | None = ..., + reserved_1: __cs__.Array[_c_ewf.uint8] | None = ..., + chunk_count: _c_ewf.uint32 | None = ..., + sector_count: _c_ewf.uint32 | None = ..., + sector_size: _c_ewf.uint32 | None = ..., + total_sector_count: _c_ewf.uint64 | None = ..., + num_cylinders: _c_ewf.uint32 | None = ..., + num_heads: _c_ewf.uint32 | None = ..., + num_sectors: _c_ewf.uint32 | None = ..., + media_flags: _c_ewf.uint8 | None = ..., + unknown_1: __cs__.Array[_c_ewf.uint8] | None = ..., + palm_start_sector: _c_ewf.uint32 | None = ..., + unknown_2: _c_ewf.uint32 | None = ..., + smart_start_sector: _c_ewf.uint32 | None = ..., + compression_level: _c_ewf.CompressionLevel | None = ..., + unknown_3: __cs__.Array[_c_ewf.uint8] | None = ..., + error_granularity: _c_ewf.uint32 | None = ..., + unknown_4: _c_ewf.uint32 | None = ..., + uuid: __cs__.Array[_c_ewf.uint8] | None = ..., + pad: __cs__.Array[_c_ewf.uint8] | None = ..., + signature: __cs__.CharArray | None = ..., + checksum: _c_ewf.uint32 | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... @@ -94,7 +142,14 @@ class _c_ewf(__cs__.cstruct): checksum: _c_ewf.uint32 entries: __cs__.Array[_c_ewf.uint32] @overload - def __init__(self, num_entries: _c_ewf.uint32 | None = ..., _: _c_ewf.uint32 | None = ..., base_offset: _c_ewf.uint64 | None = ..., checksum: _c_ewf.uint32 | None = ..., entries: __cs__.Array[_c_ewf.uint32] | None = ...): ... + def __init__( + self, + num_entries: _c_ewf.uint32 | None = ..., + _: _c_ewf.uint32 | None = ..., + base_offset: _c_ewf.uint64 | None = ..., + checksum: _c_ewf.uint32 | None = ..., + entries: __cs__.Array[_c_ewf.uint32] | None = ..., + ): ... @overload def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... diff --git a/dissect/evidence/tools/adcrypt.py b/dissect/evidence/tools/adcrypt.py index 892f24a..dbb1618 100644 --- a/dissect/evidence/tools/adcrypt.py +++ b/dissect/evidence/tools/adcrypt.py @@ -7,7 +7,7 @@ from dissect.evidence.ad1.ad1 import find_files as find_ad1_files from dissect.evidence.adcrypt.adcrypt import ADCrypt from dissect.evidence.ewf.ewf import find_files as find_ewf_files -from dissect.evidence.tools.utils import catch_sigpipe +from dissect.evidence.tools.util import catch_sigpipe logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]\t%(message)s") log = logging.getLogger(__name__) @@ -17,7 +17,7 @@ def main() -> None: help_formatter = argparse.ArgumentDefaultsHelpFormatter parser = argparse.ArgumentParser( - prog="adecrypt", + prog="adcrypt", description="Decrypt E01 or AD1 ADCRYPT encrypted segment files.", fromfile_prefix_chars="@", formatter_class=help_formatter, diff --git a/dissect/evidence/tools/utils.py b/dissect/evidence/tools/util.py similarity index 100% rename from dissect/evidence/tools/utils.py rename to dissect/evidence/tools/util.py diff --git a/pyproject.toml b/pyproject.toml index 208c9c9..1ff570a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ dependencies = [ "dissect.cstruct>=4,<5", "dissect.util>=3,<4", - "pycryptodome>=3.23.0,<4", + "pycryptodome", ] dynamic = ["version"] @@ -40,7 +40,6 @@ repository = "https://github.com/fox-it/dissect.evidence" dev = [ "dissect.cstruct>=4.0.dev,<5.0.dev", "dissect.util>=3.0.dev,<4.0.dev", - "pycryptodome>=3.23.0.dev,<4.0.dev", ] [dependency-groups] @@ -69,7 +68,7 @@ asdf-meta = "dissect.evidence.tools.asdf.meta:main" asdf-repair = "dissect.evidence.tools.asdf.repair:main" asdf-verify = "dissect.evidence.tools.asdf.verify:main" -adecrypt = "dissect.evidence.tools.adcrypt:main" +adcrypt = "dissect.evidence.tools.adcrypt:main" [tool.ruff] line-length = 120 diff --git a/tests/_docs/conf.py b/tests/_docs/conf.py index faf8d58..49e2f26 100644 --- a/tests/_docs/conf.py +++ b/tests/_docs/conf.py @@ -38,5 +38,4 @@ suppress_warnings = [ # https://github.com/readthedocs/sphinx-autoapi/issues/285 "autoapi.python_import_resolution", - "ref.python", ] From f5a6159c80dc06580a261c47c1a32ea6b15207f5 Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Fri, 5 Dec 2025 19:59:40 +0100 Subject: [PATCH 6/9] Changes --- dissect/evidence/ad1/__init__.py | 10 +- dissect/evidence/ad1/ad1.py | 502 +++++++++++------- dissect/evidence/ad1/c_ad1.py | 200 +++---- dissect/evidence/ad1/c_ad1.pyi | 4 +- dissect/evidence/ad1/stream.py | 78 +++ dissect/evidence/adcrypt/__init__.py | 2 +- dissect/evidence/adcrypt/adcrypt.py | 202 ++++--- dissect/evidence/adcrypt/c_adcrypt.pyi | 3 +- dissect/evidence/asdf/asdf.py | 4 +- .../evidence/asdf/{streams.py => stream.py} | 0 dissect/evidence/ewf/c_ewf.pyi | 3 +- dissect/evidence/ewf/ewf.py | 12 +- .../evidence/{exceptions.py => exception.py} | 3 +- dissect/evidence/tools/adcrypt.py | 39 +- dissect/evidence/tools/asdf/repair.py | 2 +- dissect/evidence/tools/util.py | 5 +- pyproject.toml | 5 +- tests/test_ad1.py | 99 +++- tests/test_adcrypt.py | 34 -- tests/test_asdf.py | 4 +- tests/test_ewf.py | 4 +- tests/test_exception.py | 21 + 22 files changed, 758 insertions(+), 478 deletions(-) create mode 100644 dissect/evidence/ad1/stream.py rename dissect/evidence/asdf/{streams.py => stream.py} (100%) rename dissect/evidence/{exceptions.py => exception.py} (84%) delete mode 100644 tests/test_adcrypt.py create mode 100644 tests/test_exception.py diff --git a/dissect/evidence/ad1/__init__.py b/dissect/evidence/ad1/__init__.py index d4dd654..eafba6a 100644 --- a/dissect/evidence/ad1/__init__.py +++ b/dissect/evidence/ad1/__init__.py @@ -1,7 +1,7 @@ from __future__ import annotations -from dissect.evidence.ad1.ad1 import AD1, AD1LogicalImage, AD1SegmentFile, FileEntry, FileMeta, FileObject, MetaType -from dissect.evidence.exceptions import ( +from dissect.evidence.ad1.ad1 import AD1, FileEntry, FileMeta, FileStream, LogicalImage, MetaType, SegmentFile +from dissect.evidence.exception import ( Error, FileNotFoundError, NotADirectoryError, @@ -10,14 +10,14 @@ __all__ = [ "AD1", - "AD1LogicalImage", - "AD1SegmentFile", "Error", "FileEntry", "FileMeta", "FileNotFoundError", - "FileObject", + "FileStream", + "LogicalImage", "MetaType", "NotADirectoryError", "NotASymlinkError", + "SegmentFile", ] diff --git a/dissect/evidence/ad1/ad1.py b/dissect/evidence/ad1/ad1.py index f79c4b7..1040828 100644 --- a/dissect/evidence/ad1/ad1.py +++ b/dissect/evidence/ad1/ad1.py @@ -1,53 +1,51 @@ from __future__ import annotations import re -import zlib from datetime import datetime, timezone from functools import cached_property -from pathlib import Path, PurePosixPath, PureWindowsPath +from pathlib import Path, PurePath, PurePosixPath, PureWindowsPath from typing import TYPE_CHECKING, BinaryIO -from dissect.util.stream import AlignedStream, MappingStream, RelativeStream - from dissect.evidence.ad1.c_ad1 import c_ad1 -from dissect.evidence.exceptions import FileNotFoundError, NotADirectoryError, NotASymlinkError +from dissect.evidence.ad1.stream import AD1Stream, FileStream +from dissect.evidence.adcrypt.adcrypt import ADCrypt, is_adcrypt +from dissect.evidence.exception import FileNotFoundError, NotADirectoryError, NotASymlinkError if TYPE_CHECKING: from collections.abc import Iterator - EntryType = c_ad1.EntryType MetaType = c_ad1.MetaType FileClassType = c_ad1.FileClassType +MAX_OPEN_SEGMENTS = 128 -def atoi(text: str) -> int | str: - return int(text) if text.isdigit() else text - - -def natural_keys(text: str | Path) -> list[int | str]: - return [atoi(c) for c in re.split(r"(\d+)", str(text))] - -def find_files(path: Path) -> set[Path]: +def find_files(path: Path) -> list[Path]: files = set() for file in path.parent.iterdir(): if file.stem == path.stem and re.match(r"^\.ad[0-9]+$", file.suffix.lower()): files.add(file) - return sorted(files, key=natural_keys) + return sorted(files, key=lambda file: int(file.suffix[3:])) class AD1: """AccessData Logical Image (AD1v4) implementation. - Supports ``zlib`` compressed images. Does not directly support encrypted (``b"ADCRYPT"``) images. + Supports ``zlib`` compressed images and ADCRYPT encrypted images. - Should be initialized using a list of segment files, e.g.:: + Should be initialized using a list of segment paths or file-like objects, e.g.:: + fs = AD1([Path("file.ad1"), Path("file.ad2")]) fs = AD1([Path("file.ad1").open("rb"), Path("file.ad2").open("rb")]) + If the AD1 container is ADCRYPT encrypted, it can be unlocked using either a passphrase or private key:: + + fs.unlock(passphrase="my secret passphrase") + fs.unlock(private_key=Path("path/to/private/key.pem")) + Resources: - - Reversing FTK Imager + - Reverse engineering FTK Imager - https://github.com/pcbje/pyad1/blob/master/documentation/AccessData%20Format%20(AD1).asciidoc - https://github.com/al3ks1s/AD1-tools - https://web.archive.org/web/20231013073319/https://tmairi.github.io/posts/dissecting-the-ad1-file-format/ @@ -55,184 +53,331 @@ class AD1: """ def __init__(self, fh: BinaryIO | list[BinaryIO]): - self.fhs: list[BinaryIO] = fh if isinstance(fh, list) else [fh] - self.segments: list[AD1SegmentFile] = [] - self.stream = MappingStream() - self.logical_image: AD1LogicalImage = None - self.root: FileEntry = None - - if len(self.fhs) < 1 or not all(hasattr(fh, "read") for fh in self.fhs): - raise ValueError(f"Invalid given file handles: {fh!r}") - - for fh in self.fhs: - # Each file contains a segment header - segment = AD1SegmentFile(fh) - self.segments.append(segment) - - # Add the segment file handle to the mapping stream, minus the segment header. - self.stream.add(self.stream.size or 0, segment.header.segment_size - 512, fh, 512) - - # The first .ad1 file contains an image header - offset = self.segments[0].header.logical_image_offset - self.logical_image = AD1LogicalImage(RelativeStream(self.fhs[0], offset)) # NOTE: Unnecesary RelativeStream? - self.root = FileEntry(self, -1, is_root=True, root_name="/") - - # Add entries for all parts in logical_image.name. This name commonly contains the full path each entry in the - # container is relative to. - root_name = self.logical_image.header.name.decode() - root_path = ( - PureWindowsPath(root_name) if "/" not in root_name and "\\" in root_name else PurePosixPath(root_name) - ) - parts = list(root_path.parts) - parent = self.root - - while parts: - part = parts.pop(0) - if root_path.drive and part == f"{root_path.drive}\\": - part = root_path.drive - entry = FileEntry(self, -1, is_root=True, root_name=part) - parent.children = [entry] - parent = entry - - # Add the first children to the last root part - offset = self.logical_image.header.first_file_offset - entry.children = [] - while offset != 0: - child = FileEntry(self, offset) - entry.children.append(child) - offset = child.entry.next + fhs = [fh] if not isinstance(fh, list) else fh + self.fh = fhs + self.root = VirtualEntry(self, "/") + + self._segments: dict[int, SegmentFile] = {} + self._segment_lru = [] + self._segment_offsets = [] + + self.size = 0 + self.stream: AD1Stream | None = None + self.logical_image: LogicalImage | None = None + + if not self.fh: + raise ValueError("No segment files provided for AD1 container") + + self.adcrypt = None + + first_segment = self.segment(0) + if is_adcrypt(first_segment.fh): + self.adcrypt = ADCrypt(first_segment.fh) + else: + self._open_ad1() + + def is_adcrypt(self) -> bool: + """Return whether the AD1 container is ADCRYPT encrypted.""" + return self.adcrypt is not None + + def is_locked(self) -> bool: + """Return whether the ADCRYPT container is locked.""" + return self.is_adcrypt() and self.adcrypt.is_locked() + + def segment(self, idx: int) -> SegmentFile: + """Open a segment by index. + + Implements a simple LRU cache to limit the number of open segments. + + Args: + idx: Index or URI of the segment to open. + """ + # Poor mans LRU + if idx in self._segments: + self._segment_lru.remove(idx) + self._segment_lru.append(idx) + return self._segments[idx] + + if len(self._segment_lru) >= MAX_OPEN_SEGMENTS: + oldest_idx = self._segment_lru.pop(0) + oldest_segment = self._segments.pop(oldest_idx) + + # Don't close it if we received it as a file-like object + if not hasattr(self.fh[oldest_idx], "read"): + oldest_segment.fh.close() + + del oldest_segment + + fh = self.fh[idx] + if not hasattr(fh, "read"): + fh = fh.open("rb") if isinstance(fh, Path) else Path(fh).open("rb") # noqa: SIM115 + + if self.is_adcrypt() and not self.is_locked(): + fh = self.adcrypt.wrap(fh, idx) + + segment = SegmentFile(fh) + + self._segments[idx] = segment + self._segment_lru.append(idx) + + return segment - def entry(self, path: str) -> FileEntry: - """Return a :class:`FileEntry` based on the given absolute `path`. + def unlock(self, *, passphrase: str | bytes | None = None, private_key: Path | bytes | None = None) -> None: + """Unlock the ADCRYPT container with a given passphrase or private key. + + Args: + passphrase: The passphrase to unlock the container. + private_key: The private key to unlock the container. Raises: - FileNotFoundError if the given `path` is not found in the `AD1` container. + RuntimeError: If required dependencies are missing. + ValueError: If unlocking failed. + """ + self.adcrypt.unlock(passphrase=passphrase, private_key=private_key) + + # Reset LRU + self._segments = {} + self._segment_lru = [] + + # Open the AD1 + self._open_ad1() + + def _open_ad1(self) -> None: + self._segment_offsets = [] + + offset = 0 + for i in range(len(self.fh)): + segment = self.segment(i) + if segment.header.magic != c_ad1.ADSEGMENTEDFILE_MAGIC.encode(): + raise ValueError(f"Invalid AD1 segment file magic in segment {i}") + + offset += segment.size + self._segment_offsets.append(offset) + + self.size = offset + self.stream = AD1Stream(self) + + # The first .ad1 file contains a logical image header + first_segment = self.segment(0) + first_segment.fh.seek(first_segment.header.logical_image_offset) + self.logical_image = LogicalImage(first_segment.fh) + + # We need to create some fake entries for all parts leading up to `logical_image.name` + # This name commonly contains the full path each entry in the container is relative to + _hallicinate_root_entries(self) + + def entry(self, path: str, entry: FileEntry | None = None) -> FileEntry: + """Return a :class:`FileEntry` based on the given absolute ``path``. + + Args: + path: Absolute path within the AD1 container. + entry: The starting entry for relative paths. Defaults to the root entry. + + Raises: + ValueError: If the ADCRYPT container is locked. + FileNotFoundError if the given ``path`` is not found in the container. Returns: - :class:`FileEntry` when the given `path` is found. + :class:`FileEntry` when the given ``path`` is found. """ + if self.is_locked(): + raise ValueError("AD1 container is locked by ADCRYPT") - components = path.lstrip("/").split("/") - current = self.root - - if components[0] == "": - return current + entry = entry or self.root - for c in components: - for entry in current.iterdir(): - if entry.name == c and entry.entry.type != EntryType.Deleted: - current = entry + for part in path.split("/"): + if not part: + continue - if current.name == components[-1]: - return current + for child in entry.iterdir(): + if child.name == part and child.type != EntryType.Deleted: + entry = child + break + else: + raise FileNotFoundError(f"File not found: {path}") - raise FileNotFoundError(f"Path not found: {path}") + return entry def get(self, path: str) -> FileEntry: - """Shortcut method to ``AD1.entry()`` for the given ``path``.""" - + """Shortcut for ``AD1.entry(path)``.""" return self.entry(path) - def open(self, path: str) -> FileObject: - """Shortcut method to ``FileEntry.open()`` for the given ``path``.""" - + def open(self, path: str) -> FileStream: + """Shortcut for ``AD1.entry(path).open()``.""" return self.entry(path).open() -class AD1SegmentFile: +def _hallicinate_root_entries(ad1: AD1) -> None: + # We need to create some fake entries for all parts leading up to `logical_image.name` + # This name commonly contains the full path each entry in the container is relative to + # Not always though, so do some poor mans heuristics + root_name = ad1.logical_image.name + if root_name == "Custom Content Image([Multi])": + ad1.root.entry.child = ad1.logical_image.header.first_file_offset + if len(ad1.root.children) != 1: + raise ValueError("Unexpected number of root children for Custom Content Image([Multi])") + + root_name = ad1.root.children[0].name.split(":", 1)[-1] + first_file_entry = ad1.root.children[0].entry + is_multi_image = True + else: + is_multi_image = False + + root_path = PureWindowsPath(root_name) if "/" not in root_name and "\\" in root_name else PurePosixPath(root_name) + parts = list(root_path.parts) + parent = _create_root_entries(ad1, ad1.root, root_path, parts) + + if is_multi_image: + parent.name = parent.name + parent.offset = ad1.logical_image.header.first_file_offset + parent.entry = first_file_entry + else: + # Add the first file offset as the first child offset of the last root part + parent.entry.child = ad1.logical_image.header.first_file_offset + + +def _create_root_entries(ad1: AD1, parent: FileEntry, path: PurePath, parts: list[str]) -> FileEntry: + while parts: + part = parts.pop(0) + if path.drive and part == f"{path.drive}\\": + part = path.drive + + entry = VirtualEntry(ad1, part) + parent.children = [entry] + parent = entry + + return parent + + +class SegmentFile: """Represents an AD1 segmented file.""" def __init__(self, fh: BinaryIO): self.fh = fh - self.header = c_ad1.SegmentedFileHeader(fh) + self.fh.seek(0) + self.header = c_ad1.SegmentedFileHeader(self.fh) self.number = self.header.segment_number self.count = self.header.segment_count - self.size = self.header.segment_size + self.size = self.header.segment_size - 512 # Subtract header size def __repr__(self) -> str: - return f"" + return f"" -class AD1LogicalImage: +class LogicalImage: """Represents an AD1 logical image.""" def __init__(self, fh: BinaryIO): self.fh = fh self.header = c_ad1.LogicalImageHeader(fh) - self.name = self.header.name + self.name = self.header.name.decode() self.version = self.header.version self.offset = self.header.first_file_offset self.chunk_size = self.header.chunk_size def __repr__(self) -> str: - return f"" # noqa: E501 + return ( + f"" + ) class FileEntry: """Represents a file entry in an AD1 logical image.""" - def __init__(self, ad1: AD1, offset: int, is_root: bool = False, root_name: str | None = None): + def __init__(self, ad1: AD1, offset: int): self.ad1 = ad1 self.offset = offset - self.is_root = is_root - - self.entry = None - self.type = None - self.meta = [] - - if is_root: - self.entry = c_ad1.FileEntry(name=root_name.encode(), type=EntryType.Directory, size=0) - - else: - fh = ad1.stream - fh.seek(offset) - self.entry = c_ad1.FileEntry(fh) - self.size = self.entry.size - self.type = self.entry.type - - offset = self.entry.meta - while offset != 0: - meta = FileMeta(ad1.stream, offset) - offset = meta.entry.next - self.meta.append(meta) def __repr__(self) -> str: - if self.is_symlink(): - file_type = "AD1Symlink" - elif self.is_file(): - file_type = "AD1File" - elif self.is_dir(): - file_type = "AD1Directory" - else: - file_type = "AD1UnknownType" - return f"<{file_type} name={self.name!r} size={self.size!r}>" + return f"<{self.__class__.__name__} type={self.type.name} name={self.name!r} size={self.size}>" + + @cached_property + def entry(self) -> c_ad1.FileEntry: + self.ad1.stream.seek(self.offset) + return c_ad1.FileEntry(self.ad1.stream) @cached_property def name(self) -> str: return self.entry.name.decode() + @cached_property + def type(self) -> EntryType: + return self.entry.type + + @cached_property + def meta(self) -> dict[MetaType, FileMeta]: + result = {} + + offset = self.entry.meta + while offset != 0: + meta = FileMeta(self.ad1, offset) + offset = meta.next + result[meta.type] = meta + + return result + @cached_property def children(self) -> list[FileEntry]: - children = [] + result = [] + offset = self.entry.child while offset != 0: child = FileEntry(self.ad1, offset) - children.append(child) + result.append(child) offset = child.entry.next - return children + return result + + @cached_property + def size(self) -> int: + if meta := self.meta.get(MetaType.FileSize): + return int(meta.data) + return 0 + + @cached_property + def atime(self) -> datetime: + if meta := self.meta.get(MetaType.DateAccessed): + return convert_ts(meta.data) + return datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) - def open(self) -> FileObject: - return FileObject(self) + @cached_property + def ctime(self) -> datetime: + if meta := self.meta.get(MetaType.MFTFileDateChanged, self.meta.get(MetaType.DateModified)): + return convert_ts(meta.data) + return datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def mtime(self) -> datetime: + if meta := self.meta.get(MetaType.DateModified): + return convert_ts(meta.data) + return datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def btime(self) -> datetime: + if meta := self.meta.get(MetaType.DateCreated): + return convert_ts(meta.data) + return datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + @cached_property + def md5(self) -> str | None: + if meta := self.meta.get(MetaType.MD5): + return meta.data.decode() + return None + + @cached_property + def sha1(self) -> str | None: + if meta := self.meta.get(MetaType.SHA1): + return meta.data.decode() + return None def is_file(self) -> bool: - return self.entry.type in (EntryType.File, EntryType.Unknown_File) + return self.type in (EntryType.File, EntryType.Unknown_File) def is_dir(self) -> bool: - return self.entry.type == EntryType.Directory + return self.type == EntryType.Directory def is_symlink(self) -> bool: - if meta := self.get_meta(MetaType.FileClass): + if meta := self.meta.get(MetaType.FileClass): return int.from_bytes(meta.data, "little") == FileClassType.ReparsePoint return False @@ -255,93 +400,44 @@ def readlink(self) -> str: reparse_point = c_ad1.ReparsePoint(self.open()) return reparse_point.link.strip("\00").split("\00")[-1] - def get_meta(self, attr: int | c_ad1.MetaType) -> c_ad1.FileMeta | None: - return next((m for m in self.meta if m.type == attr), None) + def open(self) -> FileStream: + """Open the file entry for reading.""" + if self.is_dir(): + raise IsADirectoryError(self.name) + return FileStream(self) - @cached_property - def size(self) -> int: - meta = self.get_meta(c_ad1.MetaType.FileSize) - return meta.data if meta else 0 - @cached_property - def atime(self) -> datetime: - meta = self.get_meta(c_ad1.MetaType.DateAccessed) - return convert_ts(meta.data) if meta else datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) +class VirtualEntry(FileEntry): + """Represents the root entry in an AD1 logical image.""" - @cached_property - def ctime(self) -> datetime: - meta = self.get_meta(c_ad1.MetaType.DateModified) - # We could use MetaType.MFTFileDateChanged here depending on the fs - return convert_ts(meta.data) if meta else datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) - - @cached_property - def mtime(self) -> datetime: - meta = self.get_meta(c_ad1.MetaType.DateModified) - return convert_ts(meta.data) if meta else datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) - - @cached_property - def btime(self) -> datetime: - meta = self.get_meta(c_ad1.MetaType.DateCreated) - return convert_ts(meta.data) if meta else datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) - - -def convert_ts(input: bytes) -> datetime: - """Convert an AD1 timestamp to datetime object. Assuming this is UTC.""" - - # DateCreated does not (always) have ``.%f`` precision. - fmt = "%Y%m%dT%H%M%S.%f" if b"." in input else "%Y%m%dT%H%M%S" - return datetime.strptime(input.decode(), fmt).replace(tzinfo=timezone.utc) + def __init__(self, ad1: AD1, name: str): + super().__init__(ad1, -1) + self.entry = c_ad1.FileEntry(name=name.encode(), type=EntryType.Directory, size=0) class FileMeta: """Represents a single AD1 logical file metadata item found inside :class:`FileEntry`.""" - def __init__(self, stream: MappingStream, offset: int): - self.stream = stream + def __init__(self, ad1: AD1, offset: int): + self.ad1 = ad1 self.offset = offset - fh = stream - fh.seek(offset) - self.entry = c_ad1.FileMeta(fh) + self.ad1.stream.seek(offset) + self.entry = c_ad1.FileMeta(self.ad1.stream) + self.category = self.entry.category self.type = self.entry.type self.data = self.entry.data - def __repr__(self) -> str: - return f"" - - -# TODO: Can we just use ZlibStream from dissect.util.stream? -class FileObject(AlignedStream): - """Custom stream format implementation for AD1 :class:`FileEntry` file contents.""" + self.next = self.entry.next - def __init__(self, entry: FileEntry): - self.entry = entry - super().__init__(self.entry.size, self.entry.ad1.logical_image.chunk_size) - - self.entry.ad1.stream.seek(self.entry.entry.zlib_meta) - self.chunks = c_ad1.FileEntryChunks(self.entry.ad1.stream).chunks - - def _read(self, offset: int, length: int) -> bytes: - r = [] - fh = self.entry.ad1.stream - chunk_size = self.entry.ad1.logical_image.chunk_size - - chunk = offset // chunk_size - chunk_count = (length + chunk_size - 1) // chunk_size - - chunk_offsets = self.chunks[chunk : chunk + chunk_count + 1] - if len(chunk_offsets) != chunk_count + 1: - chunk_offsets.append(self.entry.entry.meta) + def __repr__(self) -> str: + return f"" - fh.seek(chunk_offsets[0]) - buf = fh.read(chunk_offsets[-1] - chunk_offsets[0]) - prev_offset = chunk_offsets[0] - for offset in chunk_offsets[1:]: - chunk_size = offset - prev_offset - r.append(zlib.decompress(buf[:chunk_size])) - buf = buf[chunk_size:] - prev_offset = offset +def convert_ts(value: bytes) -> datetime: + """Convert an AD1 timestamp to datetime object. Assuming this is UTC.""" - return b"".join(r) + # DateCreated does not (always) have ``.%f`` precision. + fmt = "%Y%m%dT%H%M%S.%f" if b"." in value else "%Y%m%dT%H%M%S" + return datetime.strptime(value.decode(), fmt).replace(tzinfo=timezone.utc) diff --git a/dissect/evidence/ad1/c_ad1.py b/dissect/evidence/ad1/c_ad1.py index 186905c..35181bf 100644 --- a/dissect/evidence/ad1/c_ad1.py +++ b/dissect/evidence/ad1/c_ad1.py @@ -4,62 +4,62 @@ ad1_def = """ enum EntryType : uint32 { - File = 0x0, - Unknown_File = 0x1, - Deleted = 0x2, - Directory = 0x5, + File = 0x0, + Unknown_File = 0x1, + Deleted = 0x2, + Directory = 0x5, }; enum FileClassType : uint32 { - File = 0x1, // b"1" - Directory = 0x3, // b"3" - ReparsePoint = 0x3131, // b"11" + File = 0x1, // b"1" + Directory = 0x3, // b"3" + ReparsePoint = 0x3131, // b"11" }; enum MetaType : uint32 { // Generic attributes - ItemContentHashes = 0x1, - FileClass = 0x2, - FileSize = 0x3, - PhysicalSize = 0x4, - Timestamps = 0x5, - StartCluster = 0x6, - DateAccessed = 0x7, - DateCreated = 0x8, - DateModified = 0x9, + ItemContentHashes = 0x1, + FileClass = 0x2, + FileSize = 0x3, + PhysicalSize = 0x4, + Timestamps = 0x5, + StartCluster = 0x6, + DateAccessed = 0x7, + DateCreated = 0x8, + DateModified = 0x9, // .. 0xa, 0xb, 0xc .. - Encrypted = 0xd, - Compressed = 0xe, + Encrypted = 0xd, + Compressed = 0xe, // .. 0xf .. - ActualFile = 0x1e, - StartSector = 0x1f, - ADSCount = 0x24, // Alternate Data Stream Count + ActualFile = 0x1e, + StartSector = 0x1f, + ADSCount = 0x24, // Alternate Data Stream Count // DOS attributes - ShortFilename = 0x1001, - Hidden = 0x1002, - System = 0x1003, - ReadOnly = 0x1004, - Archive = 0x1005, + ShortFilename = 0x1001, + Hidden = 0x1002, + System = 0x1003, + ReadOnly = 0x1004, + Archive = 0x1005, // NTFS attributes - MFTRecordNumber = 0xa001, - MFTDateChanged = 0xa002, // Specifies the MFT record change timestamp of the file. - MFTIsResident = 0xa003, - MFTIsOffline = 0xa004, - MFTIsSparse = 0xa005, - MFTIsTemporary = 0xa006, - MFTOwnerSid = 0xa007, - MFTOwnerName = 0xa008, - MFTGroupSid = 0xa009, - MFTGroupName = 0xa00a, - - MFTFileDateCreated = 0xa01c, // According to the filename attribute in the MFT. - MFTFileDateModified = 0xa01d, - MFTFileDateAccessed = 0xa01e, - MFTFileDateChanged = 0xa01f, - MFTFileSize = 0xa020, - MFTFilePhysicalSize = 0xa021, + MFTRecordNumber = 0xa001, + MFTDateChanged = 0xa002, // Specifies the MFT record change timestamp of the file. + MFTIsResident = 0xa003, + MFTIsOffline = 0xa004, + MFTIsSparse = 0xa005, + MFTIsTemporary = 0xa006, + MFTOwnerSid = 0xa007, + MFTOwnerName = 0xa008, + MFTGroupSid = 0xa009, + MFTGroupName = 0xa00a, + + MFTFileDateCreated = 0xa01c, // According to the filename attribute in the MFT. + MFTFileDateModified = 0xa01d, + MFTFileDateAccessed = 0xa01e, + MFTFileDateChanged = 0xa01f, + MFTFileSize = 0xa020, + MFTFilePhysicalSize = 0xa021, // 8.3 MFT Filename // 0xa022, @@ -69,96 +69,98 @@ // 0xa026, // 0xa027, - IndxFilename = 0xa028, // According to the filename attribute in the $I30 INDX. - IndxFileSize = 0xa029, - IndxPhysicalSize = 0xa02a, - IndxDateCreated = 0xa02b, - IndxDateModified = 0xa02c, - IndxDateAccessed = 0xa02d, - IndxDateChanged = 0xa02e, + IndxFilename = 0xa028, // According to the filename attribute in the $I30 INDX. + IndxFileSize = 0xa029, + IndxPhysicalSize = 0xa02a, + IndxDateCreated = 0xa02b, + IndxDateModified = 0xa02c, + IndxDateAccessed = 0xa02d, + IndxDateChanged = 0xa02e, // 8.3 INDX // 0xa02f, 0xa030, 0xa031, 0xa032, 0xa033, 0xa034, 0xa035 // NTFS Access Control Entry (0) - AceType = 0x1000001, - AceInheritable = 0x1000004, - AceSID = 0x1000005, // The Security ID of the user or group this ACE applies to. - AceName = 0x1000006, // The name of the user or roup this ACE applies to. - AceAccessMask = 0x1000007, // Raw bitmask specifying the actions this ACE controls. - AceExecuteFile = 0x1000008, - AceReadData = 0x1000009, - AceWriteData = 0x100000a, - AceAppendData = 0x100000b, - AceTraverseFolder = 0x100000c, - AceListFolder = 0x100000d, - AceCreateFiles = 0x100000e, - AceCreateFolders = 0x100000f, - AceDeleteChildren = 0x1000010, - AceDeleteSelf = 0x1000011, - AceReadPermissions = 0x1000012, - AceChangePermissions = 0x1000013, - AceTakeOwnership = 0x1000014, + AceType = 0x1000001, + AceInheritable = 0x1000004, + AceSID = 0x1000005, // The Security ID of the user or group this ACE applies to. + AceName = 0x1000006, // The name of the user or roup this ACE applies to. + AceAccessMask = 0x1000007, // Raw bitmask specifying the actions this ACE controls. + AceExecuteFile = 0x1000008, + AceReadData = 0x1000009, + AceWriteData = 0x100000a, + AceAppendData = 0x100000b, + AceTraverseFolder = 0x100000c, + AceListFolder = 0x100000d, + AceCreateFiles = 0x100000e, + AceCreateFolders = 0x100000f, + AceDeleteChildren = 0x1000010, + AceDeleteSelf = 0x1000011, + AceReadPermissions = 0x1000012, + AceChangePermissions = 0x1000013, + AceTakeOwnership = 0x1000014, // .. 0x10010XX - 0x10060XX .. // Verification hashes - MD5 = 0x5001, - SHA1 = 0x5002, + MD5 = 0x5001, + SHA1 = 0x5002, // TODO: Clean up - ClusterSize = 0x9001, - ClusterCount = 0x9002, - FreeClusterCount = 0x9003, - VolumeSerialNumber = 0x9006, - PosixPermissions = 0x2001, + ClusterSize = 0x9001, + ClusterCount = 0x9002, + FreeClusterCount = 0x9003, + VolumeSerialNumber = 0x9006, + PosixPermissions = 0x2001, }; +#define ADSEGMENTEDFILE_MAGIC ADSEGMENTEDFILE\00 + typedef struct { - char magic[16]; // b"ADSEGMENTEDFILE" + padding - uint32 unk1; // 0x01 - uint32 unk2; // 0x02 - uint32 segment_number; // segment number starts at 0x01 - uint32 segment_count; // number of segments - uint64 segment_size; // off by 512 bytes + char magic[16]; // b"ADSEGMENTEDFILE" + padding + uint32 unk1; // 0x01 + uint32 unk2; // 0x02 + uint32 segment_number; // segment number starts at 0x01 + uint32 segment_count; // number of segments + uint64 segment_size; // off by 512 bytes uint32 logical_image_offset; - char padding[468]; // 0x00 + char padding[468]; // 0x00 } SegmentedFileHeader; typedef struct { - char magic[16]; // b"ADLOGICALIMAGE" + padding - uint32 version; // 0x03 or 0x04 - uint32 unk1; // 0x01 - uint32 chunk_size; // zlib chunk size (uint64?) + char magic[16]; // b"ADLOGICALIMAGE" + padding + uint32 version; // 0x03 or 0x04 + uint32 unk1; // 0x01 + uint32 chunk_size; // zlib chunk size (uint64?) uint64 metadata_offset; uint64 first_file_offset; uint32 name_len; // ADv4 (offset 48 contains name[name_len] in ADv3) - char unk_magic[4]; // b"AD" + (2 * 0x00) - uint64 name_offset; // 0x5c + char unk_magic[4]; // b"AD" + (2 * 0x00) + uint64 name_offset; // 0x5c uint64 attr_guid_offset; - uint64 unk2; // 0x00 + uint64 unk2; // 0x00 uint64 locs_guid_offset; - uint64 unk3; // 0x00 + uint64 unk3; // 0x00 // END ADv4 char name[name_len]; } LogicalImageHeader; typedef struct { - uint64 next; // Next FileEntry in same hierarchy level - uint64 child; // Next FileEntry within this dir, 0x00 if file - uint64 meta; // Offset of first FileMeta entry - uint64 zlib_meta; // Offset of zlib chunk metadata - uint64 size; // Decompressed file size, 0x00 if no data - EntryType type; // 0x00 = file, 0x05 = directory + uint64 next; // Next FileEntry in same hierarchy level + uint64 child; // Next FileEntry within this dir, 0x00 if file + uint64 meta; // Offset of first FileMeta entry + uint64 zlib_meta; // Offset of zlib chunk metadata + uint64 size; // Decompressed file size, 0x00 if no data + EntryType type; // 0x00 = file, 0x05 = directory uint32 name_len; char name[name_len]; - uint64 parent_index; // Parent folder index, 0x00 if at root + uint64 parent_index; // Parent folder index, 0x00 if at root } FileEntry; typedef struct { - uint64 num_chunks; // only if FileEntry.size != 0x00 + uint64 num_chunks; // only if FileEntry.size != 0x00 uint64 chunks[num_chunks]; } FileEntryChunks; @@ -171,7 +173,7 @@ } FileMeta; typedef struct { - char unk1[352]; // version 4 only + char unk1[352]; // version 4 only } Footer; typedef struct { diff --git a/dissect/evidence/ad1/c_ad1.pyi b/dissect/evidence/ad1/c_ad1.pyi index 864dc95..a50418b 100644 --- a/dissect/evidence/ad1/c_ad1.pyi +++ b/dissect/evidence/ad1/c_ad1.pyi @@ -1,10 +1,10 @@ # Generated by cstruct-stubgen -from typing import BinaryIO, Literal, overload +from typing import BinaryIO, Literal, TypeAlias, overload import dissect.cstruct as __cs__ -from typing_extensions import TypeAlias class _c_ad1(__cs__.cstruct): + ADSEGMENTEDFILE_MAGIC: Literal["ADSEGMENTEDFILE\x00"] = ... class EntryType(__cs__.Enum): File = ... Unknown_File = ... diff --git a/dissect/evidence/ad1/stream.py b/dissect/evidence/ad1/stream.py new file mode 100644 index 0000000..ae45cf5 --- /dev/null +++ b/dissect/evidence/ad1/stream.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import zlib +from bisect import bisect_right +from typing import TYPE_CHECKING + +from dissect.util.stream import AlignedStream + +from dissect.evidence.ad1.c_ad1 import c_ad1 + +if TYPE_CHECKING: + from dissect.evidence.ad1.ad1 import AD1, FileEntry + + +class AD1Stream(AlignedStream): + """Provide a stitched stream over all AD1 segments.""" + + def __init__(self, ad1: AD1): + self.ad1 = ad1 + super().__init__(self.ad1.size) + + def _read(self, offset: int, length: int) -> bytes: + result = [] + + idx = bisect_right(self.ad1._segment_offsets, offset) + while length > 0: + if idx > len(self.ad1._segment_offsets) - 1: + break + + segment = self.ad1.segment(idx) + segment_offset = 0 if idx == 0 else self.ad1._segment_offsets[idx - 1] + offset_in_segment = offset - segment_offset + read_size = min(length, segment.size - offset_in_segment) + + segment.fh.seek(512 + offset_in_segment) # Skip segment header + result.append(segment.fh.read(read_size)) + + offset += read_size + length -= read_size + idx += 1 + + return b"".join(result) + + +class FileStream(AlignedStream): + """Custom stream implementation for AD1 :class:`FileEntry` file contents.""" + + def __init__(self, entry: FileEntry): + self.entry = entry + self.stream = self.entry.ad1.stream + self.chunk_size = self.entry.ad1.logical_image.chunk_size + + self.stream.seek(self.entry.entry.zlib_meta) + self.chunks = [*c_ad1.FileEntryChunks(self.stream).chunks, self.entry.entry.meta] + + super().__init__(self.entry.size, self.chunk_size) + + def _read(self, offset: int, length: int) -> bytes: + result = [] + + chunk, offset_in_chunk = divmod(offset, self.chunk_size) + chunk_count = (length + self.chunk_size - 1) // self.chunk_size + chunk_offsets = self.chunks[chunk : chunk + chunk_count + 1] + + for i, chunk_offset in enumerate(chunk_offsets[:-1]): + compressed_chunk_size = chunk_offsets[i + 1] - chunk_offset + + self.stream.seek(chunk_offset) + buf = zlib.decompress(self.stream.read(compressed_chunk_size)) + + read_size = min(length, self.chunk_size - offset_in_chunk) + result.append(buf[offset_in_chunk : offset_in_chunk + read_size]) + + offset += read_size + length -= read_size + offset_in_chunk = 0 + + return b"".join(result) diff --git a/dissect/evidence/adcrypt/__init__.py b/dissect/evidence/adcrypt/__init__.py index 7284de1..e459327 100644 --- a/dissect/evidence/adcrypt/__init__.py +++ b/dissect/evidence/adcrypt/__init__.py @@ -1,7 +1,7 @@ from __future__ import annotations from dissect.evidence.adcrypt.adcrypt import ADCrypt -from dissect.evidence.exceptions import Error +from dissect.evidence.exception import Error __all__ = [ "ADCrypt", diff --git a/dissect/evidence/adcrypt/adcrypt.py b/dissect/evidence/adcrypt/adcrypt.py index 7fc1e2d..3c9fccb 100644 --- a/dissect/evidence/adcrypt/adcrypt.py +++ b/dissect/evidence/adcrypt/adcrypt.py @@ -1,14 +1,17 @@ from __future__ import annotations +import hashlib +import hmac +import io from pathlib import Path from typing import BinaryIO +from dissect.util.stream import AlignedStream + from dissect.evidence.adcrypt.c_adcrypt import c_adcrypt try: - from Crypto import Hash from Crypto.Cipher import AES, PKCS1_v1_5 - from Crypto.Hash import HMAC from Crypto.Protocol.KDF import PBKDF2 from Crypto.PublicKey import RSA from Crypto.Util import Counter @@ -17,22 +20,38 @@ except ImportError: HAS_CRYPTO = False +MAX_OPEN_SEGMENTS = 128 + + +def is_adcrypt(fh: BinaryIO) -> bool: + """Check if the file handle is an ADCRYPT container. + + Args: + fh: The file handle to check. + """ + fh.seek(0) + return fh.read(8) == c_adcrypt.ADCRYPT_MAGIC.encode() + class ADCrypt: """Access Data ADCRYPT encrypted container implementation. + Not particularly useful on its own, but used by other evidence types such as AD1. + Pass the first segment file handle to this class, then use :meth:`unlock` to unlock the container, + and :meth:`wrap` to wrap other segment file handles into decrypting streams. + References: - - Reversing adencrypt.dll + - Reverse engineering ``adencrypt.dll`` - https://github.com/libyal/libewf/blob/main/documentation/Expert%20Witness%20Compression%20Format%20(EWF).asciidoc#7-ad-encryption - https://github.com/log2timeline/plaso/issues/2726#issuecomment-517444736 """ - def __init__(self, fhs: BinaryIO | list[BinaryIO]): - self.fhs = fhs if isinstance(fhs, list) else [fhs] - self.segments: list[ADCryptSegment] = [] + def __init__(self, fh: BinaryIO) -> None: + self.fh = fh + self.fh.seek(0) try: - self.header: c_adcrypt.Header = c_adcrypt.Header(self.fhs[0]) + self.header: c_adcrypt.Header = c_adcrypt.Header(self.fh) except EOFError: raise ValueError("File handle is not an ADCRYPT container: Unable to read ADCRYPT header") @@ -42,102 +61,119 @@ def __init__(self, fhs: BinaryIO | list[BinaryIO]): if self.header.version != 1: raise ValueError(f"Unsupported ADCRYPT container version {self.header.version!r}") - for i, fh in enumerate(self.fhs): - self.segments.append(ADCryptSegment(fh, i)) - # TODO: We should probably create a mapping stream. + self.key: bytes | None = None + + def is_locked(self) -> bool: + """Return whether the ADCRYPT container is locked.""" + return self.key is None + + def unlock(self, *, passphrase: str | bytes | None = None, private_key: Path | bytes | None = None) -> None: + """Unlock the ADCRYPT container with a given passphrase or private key. - def decrypt(self, *, passphrase: str | bytes | None = None, private_key: Path | BinaryIO | None = None) -> None: - """Attempt to decrypt all ADCRYPT segment files. + Args: + passphrase: The passphrase to unlock the container. + private_key: The private key to unlock the container. Raises: - ImportError if dependencies are missing. - ValueError if decryption failed. + RuntimeError: If required dependencies are missing. + ValueError: If unlocking failed. """ - if not HAS_CRYPTO: - raise ImportError("Missing required dependency 'pycryptodome' for ADCRYPT decryption.") - - if all(segment.decrypted for segment in self.segments): - return - - if not private_key and isinstance(passphrase, str): - passphrase = passphrase.encode() + raise RuntimeError("Missing required dependency 'pycryptodome' for ADCRYPT decryption.") - # If a private key was used, the passphrase is empty. - passphrase_hash = b"" - - if passphrase and not private_key: - hash = Hash.new(self.header.hash_algo.name) - hash.update(passphrase) - passphrase_hash = hash.digest() - - # If no private key was used, the "encrypted" salt is the plaintext salt as-is. - salt = self.header.enc_salt - - # Decrypt the salt if a private key was provided. - if private_key: - rsa_key = RSA.import_key( - private_key.read_bytes() if isinstance(private_key, Path) else private_key, passphrase - ) - pkcs_cipher = PKCS1_v1_5.new(rsa_key) - if not (salt := pkcs_cipher.decrypt(self.header.enc_salt, sentinel=None, expected_pt_len=16)): - raise ValueError("Failed to decrypt salt using provided private key") - - key_len = self.header.key_len - count = self.header.pbkdf2_count - pkey = PBKDF2(passphrase_hash, salt, key_len, count) + pkey = adcrypt_kdf( + passphrase, + private_key, + self.header.enc_salt, + self.header.key_len, + self.header.pbkdf2_count, + self.header.hash_algo.name.lower(), + ) # Verify the HMAC of EKEY using PKEY + hash algo, comparing with header HMAC - hmac = HMAC.new(pkey, digestmod=Hash.new(self.header.hash_algo.name)) - hmac.update(self.header.enc_key) - try: - hmac.verify(self.header.hmac_enc_key) - except ValueError as e: - raise ValueError("Unable to decrypt: HMAC verification of passphrase failed") from e + if hmac.digest(pkey, self.header.enc_key, self.header.hash_algo.name.lower()) != self.header.hmac_enc_key: + raise ValueError("Unable to unlock: HMAC verification of passphrase failed") # Decrypt EKEY using PKEY - # TODO: Set counter bit length according to EncAlgo ctr = Counter.new(128, initial_value=0, little_endian=True) cipher = AES.new(pkey, AES.MODE_CTR, counter=ctr) - fkey = cipher.decrypt(self.header.enc_key) - self.key = fkey + self.key = cipher.decrypt(self.header.enc_key) - for segment in self.segments: - segment.decrypt(self.key) + def wrap(self, fh: BinaryIO, index: int) -> ADCryptStream: + """Wrap a file handle into an :class:`ADCryptStream` for decryption. + Args: + fh: The file handle to wrap. + index: The segment index. -class ADCryptSegment: - def __init__(self, fh: BinaryIO, index: int): - self.index = index - self.fh = fh - self.decrypted = False + Raises: + ValueError: If the container is not unlocked. + """ + if self.is_locked(): + raise ValueError("ADCRYPT container is not unlocked") - def __repr__(self) -> str: - return f"" + return ADCryptStream(fh, self.key, index) - def decrypt(self, fkey: bytes) -> None: - """Prepare this segment for decrypted reading.""" - if self.decrypted: - return +class ADCryptStream(AlignedStream): + def __init__(self, fh: BinaryIO, key: bytes, index: int): + self.fh = fh + self.key = key + self.index = index - # TODO: Set counter bit length according to EncAlgo - ctr = Counter.new(128, initial_value=self.index << 64, little_endian=True) - cipher = AES.new(fkey, AES.MODE_CTR, counter=ctr) + self.fh.seek(0, io.SEEK_END) + size = self.fh.tell() - (512 if index == 0 else 0) # Skip ADCRYPT header + super().__init__(size) + + def _read(self, offset: int, length: int) -> bytes: + self.fh.seek(offset + (512 if self.index == 0 else 0)) # Skip ADCRYPT header + buf = self.fh.read(length) + + ctr = Counter.new( + 128, + initial_value=self.index << 64 | (offset // (128 // 8)), + little_endian=True, + ) + cipher = AES.new(self.key, AES.MODE_CTR, counter=ctr) + return cipher.decrypt(buf) + + +def adcrypt_kdf( + passphrase: str | bytes | None, + private_key: Path | bytes | None, + salt: bytes, + key_len: int, + count: int, + algorithm: str, +) -> bytes: + """Derive the ADCRYPT decryption key. + + Args: + passphrase: The passphrase to unlock the container. + private_key: The private key to unlock the container. + salt: The salt used for key derivation. + key_len: The length of the derived key. + count: The number of iterations for PBKDF2. + algorithm: The hash algorithm to use. + + Returns: + The derived key as bytes. + """ + if isinstance(passphrase, str): + passphrase = passphrase.encode() - # Offset for ADCRYPT header in first segment. - # TODO: We should use the header size as offset, it could be different than 512. - if self.index == 0: - self.fh.seek(512) + # If a private key was used, the passphrase is empty. + passphrase_hash = b"" + if passphrase is not None and private_key is None: + passphrase_hash = hashlib.new(algorithm, passphrase).digest() - self.key = fkey - self._cipher = cipher - self.decrypted = True + # If no private key was used, the "encrypted" salt is the plaintext salt as-is. + derived_salt = salt - # TODO: Check for plaintext headers, e.g. b"ADSEGMENTEDFILE", b"ADLOGICALIMAGE", b"EVF\x09\x0d\x0a\xff\x00" - # and b"LVF\x09\x0d\x0a\xff\x00". + # Decrypt the salt if a private key was provided. + if private_key is not None: + rsa_key = RSA.import_key(private_key.read_bytes() if isinstance(private_key, Path) else private_key, passphrase) + if not (derived_salt := PKCS1_v1_5.new(rsa_key).decrypt(salt, sentinel=None, expected_pt_len=16)): + raise ValueError("Failed to decrypt salt using provided private key") - def read(self, blocks: int | None = None) -> bytes: - # TODO: Since AES CTR mode is used, we can seek to an offset of the ciphertext and calculate the counter value - # based on the offset (random block read). - return self._cipher.decrypt(self.fh.read(blocks * 16 if blocks else None)) + return PBKDF2(passphrase_hash, derived_salt, key_len, count) diff --git a/dissect/evidence/adcrypt/c_adcrypt.pyi b/dissect/evidence/adcrypt/c_adcrypt.pyi index 1bda902..447fa79 100644 --- a/dissect/evidence/adcrypt/c_adcrypt.pyi +++ b/dissect/evidence/adcrypt/c_adcrypt.pyi @@ -1,8 +1,7 @@ # Generated by cstruct-stubgen -from typing import BinaryIO, Literal, overload +from typing import BinaryIO, Literal, TypeAlias, overload import dissect.cstruct as __cs__ -from typing_extensions import TypeAlias class _c_adcrypt(__cs__.cstruct): ADCRYPT_MAGIC: Literal["ADCRYPT\x00"] = ... diff --git a/dissect/evidence/asdf/asdf.py b/dissect/evidence/asdf/asdf.py index b59cf41..7622316 100644 --- a/dissect/evidence/asdf/asdf.py +++ b/dissect/evidence/asdf/asdf.py @@ -15,8 +15,8 @@ from dissect.util.stream import AlignedStream, RangeStream from dissect.evidence.asdf.c_asdf import c_asdf -from dissect.evidence.asdf.streams import CompressedStream, Crc32Stream, HashedStream -from dissect.evidence.exceptions import ( +from dissect.evidence.asdf.stream import CompressedStream, Crc32Stream, HashedStream +from dissect.evidence.exception import ( InvalidBlock, InvalidSnapshot, UnsupportedVersion, diff --git a/dissect/evidence/asdf/streams.py b/dissect/evidence/asdf/stream.py similarity index 100% rename from dissect/evidence/asdf/streams.py rename to dissect/evidence/asdf/stream.py diff --git a/dissect/evidence/ewf/c_ewf.pyi b/dissect/evidence/ewf/c_ewf.pyi index 9198c02..f100653 100644 --- a/dissect/evidence/ewf/c_ewf.pyi +++ b/dissect/evidence/ewf/c_ewf.pyi @@ -1,8 +1,7 @@ # Generated by cstruct-stubgen -from typing import BinaryIO, Literal, overload +from typing import BinaryIO, TypeAlias, overload import dissect.cstruct as __cs__ -from typing_extensions import TypeAlias class _c_ewf(__cs__.cstruct): class MediaType(__cs__.Enum): diff --git a/dissect/evidence/ewf/ewf.py b/dissect/evidence/ewf/ewf.py index fbb82cd..e02ae41 100644 --- a/dissect/evidence/ewf/ewf.py +++ b/dissect/evidence/ewf/ewf.py @@ -11,7 +11,7 @@ from dissect.util.stream import AlignedStream from dissect.evidence.ewf import c_ewf -from dissect.evidence.exceptions import EWFError +from dissect.evidence.exception import EWFError log = logging.getLogger(__name__) log.setLevel(os.getenv("DISSECT_LOG_EWF", "CRITICAL")) @@ -39,7 +39,7 @@ def find_files(path: str | Path) -> list[Path]: class EWF: """Expert Witness Disk Image Format.""" - def __init__(self, fh: BinaryIO | list[BinaryIO]): + def __init__(self, fh: BinaryIO | list[BinaryIO] | Path | list[Path]): fhs = [fh] if not isinstance(fh, list) else fh self.fh = fhs @@ -53,7 +53,7 @@ def __init__(self, fh: BinaryIO | list[BinaryIO]): for i in range(len(fhs)): try: - segment = self.open_segment(i) + segment = self.segment(i) except Exception: log.exception("Failed to parse as EWF file: %s", fh) continue @@ -77,12 +77,12 @@ def __init__(self, fh: BinaryIO | list[BinaryIO]): self.chunk_size = self.volume.sector_count * self.volume.sector_size max_size = self.volume.chunk_count * self.volume.sector_count * self.volume.sector_size - last_table = self.open_segment(len(self.fh) - 1).tables[-1] + last_table = self.segment(len(self.fh) - 1).tables[-1] last_chunk_size = len(last_table.read_chunk(last_table.num_entries - 1)) self.size = max_size - (self.chunk_size - last_chunk_size) - def open_segment(self, idx: int) -> Segment: + def segment(self, idx: int) -> Segment: # Poor mans LRU if idx in self._segments: self._segment_lru.remove(idx) @@ -141,7 +141,7 @@ def _read(self, offset: int, length: int) -> bytes: if segment_idx > len(self.ewf._segment_offsets): raise EWFError(f"Missing EWF file for segment index: {segment_idx}") - segment = self.ewf.open_segment(segment_idx) + segment = self.ewf.segment(segment_idx) segment_remaining_sectors = segment.sector_count - (sector_offset - segment.sector_offset) segment_sectors = min(segment_remaining_sectors, sector_count) diff --git a/dissect/evidence/exceptions.py b/dissect/evidence/exception.py similarity index 84% rename from dissect/evidence/exceptions.py rename to dissect/evidence/exception.py index 004bd22..50130b0 100644 --- a/dissect/evidence/exceptions.py +++ b/dissect/evidence/exception.py @@ -1,6 +1,5 @@ class Error(Exception): - """Base class for exceptions for this module. - It is used to recognize errors specific to this module""" + pass class FileNotFoundError(Error, FileNotFoundError): diff --git a/dissect/evidence/tools/adcrypt.py b/dissect/evidence/tools/adcrypt.py index dbb1618..189df3e 100644 --- a/dissect/evidence/tools/adcrypt.py +++ b/dissect/evidence/tools/adcrypt.py @@ -2,10 +2,11 @@ import argparse import logging +import shutil from pathlib import Path from dissect.evidence.ad1.ad1 import find_files as find_ad1_files -from dissect.evidence.adcrypt.adcrypt import ADCrypt +from dissect.evidence.adcrypt.adcrypt import ADCrypt, is_adcrypt from dissect.evidence.ewf.ewf import find_files as find_ewf_files from dissect.evidence.tools.util import catch_sigpipe @@ -26,41 +27,45 @@ def main() -> None: parser.add_argument("input", type=Path, help="path to encrypted file") parser.add_argument("-p", "--passphrase", type=str, help="user passphrase or certificate passphrase") parser.add_argument("-c", "--certificate", type=Path, help="user certificate") - parser.add_argument("-o", "--output", type=Path, required=True, help="path to output file") + parser.add_argument("-o", "--output", type=Path, required=True, help="path to output directory") args = parser.parse_args() - in_path = args.input.resolve() - out_path = args.output.resolve() + in_path: Path = args.input.resolve() + out_path: Path = args.output.resolve() if not in_path.exists(): - parser.exit(f"Input file doesn't exist: {in_path}") + parser.exit(f"Input file does not exist: {in_path}") if not out_path.is_dir(): - parser.exit(f"Output dir does not exist: {out_path}") + parser.exit(f"Output directory does not exist: {out_path}") if in_path.parent == out_path: - parser.exit("Output dir cannot be same as parent of input file") + parser.exit("Output directory cannot be the same as the input file directory") if not args.passphrase and not args.certificate: parser.exit("No passphrase or certificate provided") - segment_paths = find_ad1_files(in_path) if in_path.suffix.lower() == ".ad1" else find_ewf_files(in_path) + segments = find_ad1_files(in_path) if in_path.suffix.lower() == ".ad1" else find_ewf_files(in_path) + if not segments: + parser.exit(f"No AD1 or E01 segment files found at: {in_path}") - adcrypt = ADCrypt([path.open("rb") for path in segment_paths]) + with segments[0].open("rb") as fh: + if not is_adcrypt(fh): + parser.exit(f"File is not an ADCRYPT container: {segments[0]}") + + adcrypt = ADCrypt(fh) try: - adcrypt.decrypt(passphrase=args.passphrase, private_key=args.certificate) + adcrypt.unlock(passphrase=args.passphrase, private_key=args.certificate) except (ValueError, TypeError) as e: log.exception(e, exc_info=False) # noqa: TRY401 parser.exit(1) - log.info("Calculated decryption keys for %s segment files (%r)", len(segment_paths), segment_paths[0].name) - - for i, segment in enumerate(adcrypt.segments): - with out_path.joinpath(segment_paths[i].name).open("wb") as fh: - size = segment_paths[i].lstat().st_size // 1024 // 1024 - log.info("Decrypting segment file %r (%s MB) ..", segment_paths[i].name, size) - fh.write(segment.read()) + for i, segment in enumerate(segments): + with segment.open("rb") as fh_in, out_path.joinpath(segment.name).open("wb") as fh_out: + fh_crypt = adcrypt.wrap(fh_in, index=i) + log.info("Decrypting segment file %r (%s MB) ..", segment.name, fh_crypt.size // 1024 // 1024) + shutil.copyfileobj(fh_crypt, fh_out) log.info("Finished decrypting file(s), result saved to %s", out_path) diff --git a/dissect/evidence/tools/asdf/repair.py b/dissect/evidence/tools/asdf/repair.py index 096e774..e534ec7 100644 --- a/dissect/evidence/tools/asdf/repair.py +++ b/dissect/evidence/tools/asdf/repair.py @@ -8,7 +8,7 @@ from pathlib import Path from dissect.evidence.asdf import asdf -from dissect.evidence.asdf.streams import HashedStream +from dissect.evidence.asdf.stream import HashedStream def main() -> int: diff --git a/dissect/evidence/tools/util.py b/dissect/evidence/tools/util.py index de18479..d62e1aa 100644 --- a/dissect/evidence/tools/util.py +++ b/dissect/evidence/tools/util.py @@ -4,7 +4,10 @@ import os import sys from functools import wraps -from typing import Callable +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable def catch_sigpipe(func: Callable) -> Callable: diff --git a/pyproject.toml b/pyproject.toml index 1ff570a..9a09d58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ classifiers = [ dependencies = [ "dissect.cstruct>=4,<5", "dissect.util>=3,<4", - "pycryptodome", ] dynamic = ["version"] @@ -37,6 +36,9 @@ documentation = "https://docs.dissect.tools/en/latest/projects/dissect.evidence" repository = "https://github.com/fox-it/dissect.evidence" [project.optional-dependencies] +full = [ + "pycryptodome", +] dev = [ "dissect.cstruct>=4.0.dev,<5.0.dev", "dissect.util>=3.0.dev,<4.0.dev", @@ -45,6 +47,7 @@ dev = [ [dependency-groups] test = [ "pytest", + "pycryptodome", ] lint = [ "ruff==0.13.1", diff --git a/tests/test_ad1.py b/tests/test_ad1.py index 3b3b12c..42bb138 100644 --- a/tests/test_ad1.py +++ b/tests/test_ad1.py @@ -7,7 +7,7 @@ import pytest from dissect.evidence import ad1 -from dissect.evidence.ad1.ad1 import EntryType, MetaType, find_files +from dissect.evidence.ad1.ad1 import EntryType, find_files from tests._utils import absolute_path @@ -15,10 +15,10 @@ def test_ad1(ad1_data: BinaryIO) -> None: """Test if we can parse a basic non-segmented AD1 file with no file hierarchy.""" fs = ad1.AD1(ad1_data) - assert fs.segments[0].header.magic == b"ADSEGMENTEDFILE\x00" + assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" assert fs.root.is_dir() - assert list(fs.root.listdir()) == ["E:"] + assert fs.root.listdir() == ["E:"] file = fs.get("E:/AD1_test/doc1.txt") assert file.is_file() @@ -32,7 +32,7 @@ def test_ad1_long(ad1_data_long: BinaryIO) -> None: fs = ad1.AD1(ad1_data_long) - assert fs.segments[0].header.magic == b"ADSEGMENTEDFILE\x00" + assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" assert fs.root.is_dir() assert [file.name for file in fs.root.children] == ["E:"] @@ -56,7 +56,7 @@ def test_ad1_long(ad1_data_long: BinaryIO) -> None: b"'g'asldjg';askg\r\nkqe\r\n-[" ) md5sum = hashlib.md5(entry.open().read()) - assert md5sum.hexdigest().encode() == next(meta for meta in entry.meta if meta.type == ad1.MetaType.MD5).data + assert md5sum.hexdigest() == entry.md5 def test_ad1_compressed(ad1_data_compressed: BinaryIO) -> None: @@ -64,7 +64,7 @@ def test_ad1_compressed(ad1_data_compressed: BinaryIO) -> None: fs = ad1.AD1(ad1_data_compressed) - assert fs.segments[0].header.magic == b"ADSEGMENTEDFILE\x00" + assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" assert fs.get("/").listdir() == ["E:"] assert fs.get("E:/AD1_test").listdir() == ["doc1.txt", "doc2.txt"] @@ -121,14 +121,13 @@ def test_ad1_segmented(ad1_data_segmented: list[BinaryIO]) -> None: fs = ad1.AD1(ad1_data_segmented) - assert len(fs.segments) == 4 - assert len(fs.stream._runs) == 4 - assert fs.segments[0].number == 1 - assert fs.segments[0].count == 4 - assert fs.segments[0].size == 0x200000 + assert len(fs.fh) == 4 + assert fs.segment(0).number == 1 + assert fs.segment(0).count == 4 + assert fs.segment(0).size == 0x200000 - 512 assert fs.logical_image.version == 4 - assert fs.logical_image.name == b"C:\\Users\\pcbje\\Desktop\\Data" + assert fs.logical_image.name == "C:\\Users\\pcbje\\Desktop\\Data" dir = fs.get("C:/Users/pcbje/Desktop/Data/Pictures") assert dir.is_dir() @@ -164,5 +163,79 @@ def test_ad1_segmented(ad1_data_segmented: list[BinaryIO]) -> None: assert picture.name == "5-0-762-Koala.jpg" assert picture.size == 780831 assert len(buf) == 780831 - assert picture.get_meta(MetaType.SHA1).data == b"9c3dcb1f9185a314ea25d51aed3b5881b32f420c" + assert picture.sha1 == "9c3dcb1f9185a314ea25d51aed3b5881b32f420c" assert hashlib.sha1(buf).hexdigest() == "9c3dcb1f9185a314ea25d51aed3b5881b32f420c" + + +def test_adcrypt_passphrase(ad1_data_encrypted_passphrase: list[BinaryIO]) -> None: + """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" + fs = ad1.AD1(ad1_data_encrypted_passphrase) + + assert fs.is_adcrypt() + assert fs.is_locked() + + with pytest.raises(ValueError, match="AD1 container is locked by ADCRYPT"): + fs.get("/") + + with pytest.raises(ValueError, match="Unable to unlock: HMAC verification of passphrase failed"): + fs.unlock(passphrase="asdf") + + fs.unlock(passphrase="password") + + assert fs.adcrypt.key.hex() == "9030a43f29689a045e815cf4f0ad82b68850063b414f2797f0897e188f98d7b4" + + assert fs.get("C:/Users/User/Downloads").listdir() == [ + "7z2501-x64.exe", + "desktop.ini", + "Exterro_FTK_Imager_(x64)-4.7.3.81.exe", + "hans-veth-8y--BAFlC9c-unsplash.jpg", + "marc-olivier-jodoin-tauPAnOIGvE-unsplash.jpg", + "marek-szturc-8Ou3EZmTMWA-unsplash.jpg", + "milo-weiler-1AIYdIb3O5M-unsplash.jpg", + ] + + for file in fs.get("C:/Users/User/Downloads").iterdir(): + buf = file.open().read() + assert len(buf) == file.size + assert hashlib.sha1(buf).hexdigest() == file.sha1 + + +def test_adcrypt_certificate(ad1_data_encrypted_certificate: list[BinaryIO]) -> None: + """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" + fs = ad1.AD1(ad1_data_encrypted_certificate) + + assert fs.is_adcrypt() + assert fs.is_locked() + + with pytest.raises(ValueError, match="AD1 container is locked by ADCRYPT"): + fs.get("/") + + with pytest.raises(ValueError, match="Unable to unlock: HMAC verification of passphrase failed"): + fs.unlock(passphrase="asdf") + + fs.unlock(private_key=absolute_path("_data/ad1/encrypted-certificate/key")) + + assert fs.adcrypt.key.hex() == "6cc0a9f94f944381cc51be474e5da6178059324bb457a87e0035b80f80ff9d4b" + + assert fs.get("C:/Users/User/Downloads").listdir() == [ + "desktop.ini", + "hans-veth-8y--BAFlC9c-unsplash.jpg", + "key.pem", + "marc-olivier-jodoin-tauPAnOIGvE-unsplash.jpg", + "marek-szturc-8Ou3EZmTMWA-unsplash.jpg", + "milo-weiler-1AIYdIb3O5M-unsplash.jpg", + "programs", + ] + + for file in fs.get("C:/Users/User/Downloads").iterdir(): + if file.is_dir(): + continue + + buf = file.open().read() + assert len(buf) == file.size + assert hashlib.sha1(buf).hexdigest() == file.sha1 + + assert fs.get("C:/Users/User/Downloads/programs").listdir() == [ + "7z2501-x64.exe", + "Exterro_FTK_Imager_(x64)-4.7.3.81.exe", + ] diff --git a/tests/test_adcrypt.py b/tests/test_adcrypt.py deleted file mode 100644 index ca32763..0000000 --- a/tests/test_adcrypt.py +++ /dev/null @@ -1,34 +0,0 @@ -from __future__ import annotations - -from typing import BinaryIO - -from dissect.evidence.adcrypt.adcrypt import ADCrypt -from tests._utils import absolute_path - - -def test_adcrypt_ad1_passphrase(ad1_data_encrypted_passphrase: list[BinaryIO]) -> None: - """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" - - adcrypt = ADCrypt(ad1_data_encrypted_passphrase) - adcrypt.decrypt(passphrase="password") - - assert adcrypt.key.hex() == "9030a43f29689a045e815cf4f0ad82b68850063b414f2797f0897e188f98d7b4" - assert all(segment.decrypted for segment in adcrypt.segments) - - plain = adcrypt.segments[0].read(512) - assert plain.startswith(b"ADSEGMENTEDFILE") - assert b"ADLOGICALIMAGE" in plain - - -def test_adcrypt_ad1_certificate(ad1_data_encrypted_certificate: list[BinaryIO]) -> None: - """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" - - adcrypt = ADCrypt(ad1_data_encrypted_certificate) - adcrypt.decrypt(private_key=absolute_path("_data/ad1/encrypted-certificate/key")) - - assert adcrypt.key.hex() == "6cc0a9f94f944381cc51be474e5da6178059324bb457a87e0035b80f80ff9d4b" - assert all(segment.decrypted for segment in adcrypt.segments) - - plain = adcrypt.segments[0].read(512) - assert plain.startswith(b"ADSEGMENTEDFILE") - assert b"ADLOGICALIMAGE" in plain diff --git a/tests/test_asdf.py b/tests/test_asdf.py index a303e13..d1b6e33 100644 --- a/tests/test_asdf.py +++ b/tests/test_asdf.py @@ -6,8 +6,8 @@ import pytest from dissect.evidence.asdf.asdf import AsdfSnapshot, AsdfWriter -from dissect.evidence.asdf.streams import CompressedStream, Crc32Stream, HashedStream -from dissect.evidence.exceptions import InvalidSnapshot +from dissect.evidence.asdf.stream import CompressedStream, Crc32Stream, HashedStream +from dissect.evidence.exception import InvalidSnapshot def test_asdf(asdf_writer: AsdfWriter) -> None: diff --git a/tests/test_ewf.py b/tests/test_ewf.py index 1fc428a..c8f3daa 100644 --- a/tests/test_ewf.py +++ b/tests/test_ewf.py @@ -30,12 +30,12 @@ def test_ewf_open_segment(MockSegment: MagicMock, monkeypatch: pytest.MonkeyPatc assert e._segment_offsets == [2, 4, 6] assert e._segment_lru == [2, 3] - tmp = e.open_segment(0) + tmp = e.segment(0) assert tmp.offset == 0 assert tmp.sector_offset == 0 assert e._segment_lru == [3, 0] - tmp = e.open_segment(1) + tmp = e.segment(1) assert tmp.offset == 1024 assert tmp.sector_offset == 2 assert e._segment_lru == [0, 1] diff --git a/tests/test_exception.py b/tests/test_exception.py new file mode 100644 index 0000000..bcf5fdf --- /dev/null +++ b/tests/test_exception.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import pytest + +from dissect.evidence import exception + + +@pytest.mark.parametrize( + ("exc", "std"), + [ + (exception.FileNotFoundError, FileNotFoundError), + (exception.IsADirectoryError, IsADirectoryError), + (exception.NotADirectoryError, NotADirectoryError), + ], +) +def test_filesystem_error_subclass(exc: exception.Error, std: Exception) -> None: + assert issubclass(exc, std) + assert isinstance(exc(), std) + + with pytest.raises(std): + raise exc() From f2142bb0b79244f3c2eddcd1db62c6b92eb8a4f4 Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:09:16 +0100 Subject: [PATCH 7/9] Final tweaks --- dissect/evidence/ad1/__init__.py | 4 +- dissect/evidence/ad1/ad1.py | 3 ++ dissect/evidence/adcrypt/__init__.py | 5 ++- dissect/evidence/adcrypt/adcrypt.py | 29 +------------- dissect/evidence/adcrypt/stream.py | 40 +++++++++++++++++++ tests/conftest.py | 60 ++++++++++++++-------------- tests/test_ad1.py | 47 +++++++++++++++------- 7 files changed, 115 insertions(+), 73 deletions(-) create mode 100644 dissect/evidence/adcrypt/stream.py diff --git a/dissect/evidence/ad1/__init__.py b/dissect/evidence/ad1/__init__.py index eafba6a..be96321 100644 --- a/dissect/evidence/ad1/__init__.py +++ b/dissect/evidence/ad1/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations -from dissect.evidence.ad1.ad1 import AD1, FileEntry, FileMeta, FileStream, LogicalImage, MetaType, SegmentFile +from dissect.evidence.ad1.ad1 import AD1, FileEntry, FileMeta, LogicalImage, MetaType, SegmentFile +from dissect.evidence.ad1.stream import AD1Stream, FileStream from dissect.evidence.exception import ( Error, FileNotFoundError, @@ -10,6 +11,7 @@ __all__ = [ "AD1", + "AD1Stream", "Error", "FileEntry", "FileMeta", diff --git a/dissect/evidence/ad1/ad1.py b/dissect/evidence/ad1/ad1.py index 1040828..628ef79 100644 --- a/dissect/evidence/ad1/ad1.py +++ b/dissect/evidence/ad1/ad1.py @@ -151,6 +151,9 @@ def _open_ad1(self) -> None: if segment.header.magic != c_ad1.ADSEGMENTEDFILE_MAGIC.encode(): raise ValueError(f"Invalid AD1 segment file magic in segment {i}") + if segment.number != i + 1: + raise ValueError(f"Invalid AD1 segment number in segment {i}, got {segment.number}, expected {i + 1}") + offset += segment.size self._segment_offsets.append(offset) diff --git a/dissect/evidence/adcrypt/__init__.py b/dissect/evidence/adcrypt/__init__.py index e459327..b03f415 100644 --- a/dissect/evidence/adcrypt/__init__.py +++ b/dissect/evidence/adcrypt/__init__.py @@ -1,9 +1,12 @@ from __future__ import annotations -from dissect.evidence.adcrypt.adcrypt import ADCrypt +from dissect.evidence.adcrypt.adcrypt import ADCrypt, is_adcrypt +from dissect.evidence.adcrypt.stream import ADCryptStream from dissect.evidence.exception import Error __all__ = [ "ADCrypt", + "ADCryptStream", "Error", + "is_adcrypt", ] diff --git a/dissect/evidence/adcrypt/adcrypt.py b/dissect/evidence/adcrypt/adcrypt.py index 3c9fccb..73523dd 100644 --- a/dissect/evidence/adcrypt/adcrypt.py +++ b/dissect/evidence/adcrypt/adcrypt.py @@ -2,13 +2,11 @@ import hashlib import hmac -import io from pathlib import Path from typing import BinaryIO -from dissect.util.stream import AlignedStream - from dissect.evidence.adcrypt.c_adcrypt import c_adcrypt +from dissect.evidence.adcrypt.stream import ADCryptStream try: from Crypto.Cipher import AES, PKCS1_v1_5 @@ -79,7 +77,7 @@ def unlock(self, *, passphrase: str | bytes | None = None, private_key: Path | b ValueError: If unlocking failed. """ if not HAS_CRYPTO: - raise RuntimeError("Missing required dependency 'pycryptodome' for ADCRYPT decryption.") + raise RuntimeError("Missing required dependency 'pycryptodome' for ADCRYPT decryption") pkey = adcrypt_kdf( passphrase, @@ -115,29 +113,6 @@ def wrap(self, fh: BinaryIO, index: int) -> ADCryptStream: return ADCryptStream(fh, self.key, index) -class ADCryptStream(AlignedStream): - def __init__(self, fh: BinaryIO, key: bytes, index: int): - self.fh = fh - self.key = key - self.index = index - - self.fh.seek(0, io.SEEK_END) - size = self.fh.tell() - (512 if index == 0 else 0) # Skip ADCRYPT header - super().__init__(size) - - def _read(self, offset: int, length: int) -> bytes: - self.fh.seek(offset + (512 if self.index == 0 else 0)) # Skip ADCRYPT header - buf = self.fh.read(length) - - ctr = Counter.new( - 128, - initial_value=self.index << 64 | (offset // (128 // 8)), - little_endian=True, - ) - cipher = AES.new(self.key, AES.MODE_CTR, counter=ctr) - return cipher.decrypt(buf) - - def adcrypt_kdf( passphrase: str | bytes | None, private_key: Path | bytes | None, diff --git a/dissect/evidence/adcrypt/stream.py b/dissect/evidence/adcrypt/stream.py new file mode 100644 index 0000000..137bad4 --- /dev/null +++ b/dissect/evidence/adcrypt/stream.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import io +from typing import BinaryIO + +from dissect.util.stream import AlignedStream + +try: + from Crypto.Cipher import AES + from Crypto.Util import Counter + + HAS_CRYPTO = True +except ImportError: + HAS_CRYPTO = False + + +class ADCryptStream(AlignedStream): + def __init__(self, fh: BinaryIO, key: bytes, index: int): + if not HAS_CRYPTO: + raise RuntimeError("Missing required dependency 'pycryptodome' for ADCRYPT decryption") + + self.fh = fh + self.key = key + self.index = index + + self.fh.seek(0, io.SEEK_END) + size = self.fh.tell() - (512 if index == 0 else 0) # Skip ADCRYPT header + super().__init__(size) + + def _read(self, offset: int, length: int) -> bytes: + self.fh.seek(offset + (512 if self.index == 0 else 0)) # Skip ADCRYPT header + buf = self.fh.read(length) + + ctr = Counter.new( + 128, + initial_value=self.index << 64 | (offset // (128 // 8)), + little_endian=True, + ) + cipher = AES.new(self.key, AES.MODE_CTR, counter=ctr) + return cipher.decrypt(buf) diff --git a/tests/conftest.py b/tests/conftest.py index 67fda08..3cfa66f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,59 +19,59 @@ def open_data(name: str) -> Iterator[BinaryIO]: @pytest.fixture -def ad1_data() -> Iterator[BinaryIO]: +def ad1_basic() -> Iterator[BinaryIO]: yield from open_data("_data/ad1/test.ad1") @pytest.fixture -def ad1_data_long() -> Iterator[BinaryIO]: +def ad1_long() -> Iterator[BinaryIO]: yield from open_data("_data/ad1/long.ad1") @pytest.fixture -def ad1_data_compressed() -> Iterator[BinaryIO]: +def ad1_compressed() -> Iterator[BinaryIO]: yield from open_data("_data/ad1/compressed.ad1") @pytest.fixture -def ad1_data_segmented() -> list[BinaryIO]: +def ad1_segmented() -> list[Path]: return [ - absolute_path("_data/ad1/pcbje/text-and-pictures.ad1").open("rb"), - absolute_path("_data/ad1/pcbje/text-and-pictures.ad2").open("rb"), - absolute_path("_data/ad1/pcbje/text-and-pictures.ad3").open("rb"), - absolute_path("_data/ad1/pcbje/text-and-pictures.ad4").open("rb"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad1"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad2"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad3"), + absolute_path("_data/ad1/pcbje/text-and-pictures.ad4"), ] @pytest.fixture -def ad1_data_encrypted_passphrase() -> list[BinaryIO]: +def ad1_encrypted_passphrase() -> list[Path]: return [ - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad1").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad2").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad3").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad4").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad5").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad6").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad7").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad8").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad9").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad10").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad11").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad12").open("rb"), - absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad13").open("rb"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad1"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad2"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad3"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad4"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad5"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad6"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad7"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad8"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad9"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad10"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad11"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad12"), + absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad13"), ] @pytest.fixture -def ad1_data_encrypted_certificate() -> list[BinaryIO]: +def ad1_encrypted_certificate() -> list[Path]: return [ - absolute_path("_data/ad1/encrypted-certificate/encrypted.ad1").open("rb"), - absolute_path("_data/ad1/encrypted-certificate/encrypted.ad2").open("rb"), - absolute_path("_data/ad1/encrypted-certificate/encrypted.ad3").open("rb"), - absolute_path("_data/ad1/encrypted-certificate/encrypted.ad4").open("rb"), - absolute_path("_data/ad1/encrypted-certificate/encrypted.ad5").open("rb"), - absolute_path("_data/ad1/encrypted-certificate/encrypted.ad6").open("rb"), - absolute_path("_data/ad1/encrypted-certificate/encrypted.ad7").open("rb"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad1"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad2"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad3"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad4"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad5"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad6"), + absolute_path("_data/ad1/encrypted-certificate/encrypted.ad7"), ] diff --git a/tests/test_ad1.py b/tests/test_ad1.py index 42bb138..0393fc0 100644 --- a/tests/test_ad1.py +++ b/tests/test_ad1.py @@ -2,19 +2,22 @@ import hashlib from datetime import datetime, timezone -from typing import BinaryIO +from typing import TYPE_CHECKING, BinaryIO import pytest -from dissect.evidence import ad1 +from dissect.evidence.ad1 import ad1 from dissect.evidence.ad1.ad1 import EntryType, find_files from tests._utils import absolute_path +if TYPE_CHECKING: + from pathlib import Path -def test_ad1(ad1_data: BinaryIO) -> None: + +def test_ad1(ad1_basic: BinaryIO) -> None: """Test if we can parse a basic non-segmented AD1 file with no file hierarchy.""" - fs = ad1.AD1(ad1_data) + fs = ad1.AD1(ad1_basic) assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" assert fs.root.is_dir() @@ -27,10 +30,10 @@ def test_ad1(ad1_data: BinaryIO) -> None: assert file.open().read() == b"Inhoud document 1" -def test_ad1_long(ad1_data_long: BinaryIO) -> None: +def test_ad1_long(ad1_long: BinaryIO) -> None: """Test if we can parse a basic non-segmented AD1 file with long file names.""" - fs = ad1.AD1(ad1_data_long) + fs = ad1.AD1(ad1_long) assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" assert fs.root.is_dir() @@ -59,10 +62,10 @@ def test_ad1_long(ad1_data_long: BinaryIO) -> None: assert md5sum.hexdigest() == entry.md5 -def test_ad1_compressed(ad1_data_compressed: BinaryIO) -> None: +def test_ad1_compressed(ad1_compressed: BinaryIO) -> None: """Test if we can parse a non-segmented AD1 file with standard zlib compression.""" - fs = ad1.AD1(ad1_data_compressed) + fs = ad1.AD1(ad1_compressed) assert fs.segment(0).header.magic == b"ADSEGMENTEDFILE\x00" @@ -112,14 +115,14 @@ def test_ad1_find_files(path: str, expected_files: list[str]) -> None: assert [file.name for file in files] == expected_files -def test_ad1_segmented(ad1_data_segmented: list[BinaryIO]) -> None: +def test_ad1_segmented(ad1_segmented: list[Path]) -> None: """Test if we can parse segmented AD1 files. References: - https://github.com/pcbje/pyad1/tree/master/test_data """ - fs = ad1.AD1(ad1_data_segmented) + fs = ad1.AD1(ad1_segmented) assert len(fs.fh) == 4 assert fs.segment(0).number == 1 @@ -167,9 +170,9 @@ def test_ad1_segmented(ad1_data_segmented: list[BinaryIO]) -> None: assert hashlib.sha1(buf).hexdigest() == "9c3dcb1f9185a314ea25d51aed3b5881b32f420c" -def test_adcrypt_passphrase(ad1_data_encrypted_passphrase: list[BinaryIO]) -> None: +def test_adcrypt_passphrase(ad1_encrypted_passphrase: list[Path]) -> None: """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" - fs = ad1.AD1(ad1_data_encrypted_passphrase) + fs = ad1.AD1(ad1_encrypted_passphrase) assert fs.is_adcrypt() assert fs.is_locked() @@ -200,9 +203,9 @@ def test_adcrypt_passphrase(ad1_data_encrypted_passphrase: list[BinaryIO]) -> No assert hashlib.sha1(buf).hexdigest() == file.sha1 -def test_adcrypt_certificate(ad1_data_encrypted_certificate: list[BinaryIO]) -> None: +def test_adcrypt_certificate(ad1_encrypted_certificate: list[Path]) -> None: """Test if we can decrypt ADCRYPT AD1 images, in this example a segmented AD1 logical image.""" - fs = ad1.AD1(ad1_data_encrypted_certificate) + fs = ad1.AD1(ad1_encrypted_certificate) assert fs.is_adcrypt() assert fs.is_locked() @@ -239,3 +242,19 @@ def test_adcrypt_certificate(ad1_data_encrypted_certificate: list[BinaryIO]) -> "7z2501-x64.exe", "Exterro_FTK_Imager_(x64)-4.7.3.81.exe", ] + + +def test_ad1_segment_lru(ad1_segmented: list[Path], monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(ad1, "MAX_OPEN_SEGMENTS", 2) + + fs = ad1.AD1(ad1_segmented) + assert fs._segment_lru == [3, 0] + + fs.segment(2) + assert fs._segment_lru == [0, 2] + + fs.segment(1) + assert fs._segment_lru == [2, 1] + + picture = fs.get("C:/Users/pcbje/Desktop/Data/Pictures/5-0-762-Koala.jpg") + assert hashlib.sha1(picture.open().read()).hexdigest() == "9c3dcb1f9185a314ea25d51aed3b5881b32f420c" From 162c507e43070fa7f8d6ba4d0247119ead9ce352 Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:26:22 +0100 Subject: [PATCH 8/9] Remove unused constant --- dissect/evidence/adcrypt/adcrypt.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dissect/evidence/adcrypt/adcrypt.py b/dissect/evidence/adcrypt/adcrypt.py index 73523dd..8fc5f7d 100644 --- a/dissect/evidence/adcrypt/adcrypt.py +++ b/dissect/evidence/adcrypt/adcrypt.py @@ -18,8 +18,6 @@ except ImportError: HAS_CRYPTO = False -MAX_OPEN_SEGMENTS = 128 - def is_adcrypt(fh: BinaryIO) -> bool: """Check if the file handle is an ADCRYPT container. From f5db15a367863865703c7192fbdae089a240697c Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:47:14 +0100 Subject: [PATCH 9/9] Add tools test --- tests/tools/__init__.py | 0 tests/tools/test_adcrypt.py | 74 +++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 tests/tools/__init__.py create mode 100644 tests/tools/test_adcrypt.py diff --git a/tests/tools/__init__.py b/tests/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/tools/test_adcrypt.py b/tests/tools/test_adcrypt.py new file mode 100644 index 0000000..9d51a5d --- /dev/null +++ b/tests/tools/test_adcrypt.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import hashlib +import logging +from typing import TYPE_CHECKING + +from dissect.evidence.ad1.ad1 import find_files +from dissect.evidence.tools import adcrypt +from tests._utils import absolute_path + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +def test_adcrypt_passphrase(tmp_path: Path, caplog: pytest.LogCaptureFixture, monkeypatch: pytest.MonkeyPatch) -> None: + """Test if we can decrypt ADCRYPT AD1 images using the adcrypt tool.""" + + with caplog.at_level(logging.DEBUG, adcrypt.log.name), monkeypatch.context() as m: + m.setattr( + "sys.argv", + [ + "adcrypt", + str(absolute_path("_data/ad1/encrypted-passphrase/encrypted.ad1")), + "-p", + "password", + "-o", + str(tmp_path), + ], + ) + + adcrypt.main() + + for i in range(1, 14): + assert f"Decrypting segment file 'encrypted.ad{i}'" in caplog.text + + assert tmp_path.joinpath("encrypted.ad1").exists() + + ctx = hashlib.sha1() + for path in find_files(tmp_path.joinpath("encrypted.ad1")): + ctx.update(path.read_bytes()) + + assert ctx.hexdigest() == "3b7449fd09e5803006ce1b3aba5bb4c48c083f12" + + +def test_adcrypt_certificate(tmp_path: Path, caplog: pytest.LogCaptureFixture, monkeypatch: pytest.MonkeyPatch) -> None: + """Test if we can decrypt ADCRYPT AD1 images using the adcrypt tool.""" + + with caplog.at_level(logging.DEBUG, adcrypt.log.name), monkeypatch.context() as m: + m.setattr( + "sys.argv", + [ + "adcrypt", + str(absolute_path("_data/ad1/encrypted-certificate/encrypted.ad1")), + "-c", + str(absolute_path("_data/ad1/encrypted-certificate/key")), + "-o", + str(tmp_path), + ], + ) + + adcrypt.main() + + for i in range(1, 8): + assert f"Decrypting segment file 'encrypted.ad{i}'" in caplog.text + + assert tmp_path.joinpath("encrypted.ad1").exists() + + ctx = hashlib.sha1() + for path in find_files(tmp_path.joinpath("encrypted.ad1")): + ctx.update(path.read_bytes()) + + assert ctx.hexdigest() == "23cdf7c35327d5b24c81ff48b483ae805c27df6a"