From a003405b629dc9cee18e674faacb078acd6984f7 Mon Sep 17 00:00:00 2001 From: Marten Ringwelski Date: Tue, 7 Nov 2023 09:51:10 +0100 Subject: [PATCH] chore: Drop dependency on fact_helper_file The only benefit it provides over the python package magic is that it includes some additional magic. Now that this file is in its own repo we can simple use the MAGIC environment variable. As the fact_extractor is only ever run in docker it suffices to set this in the dockerfile. --- Dockerfile | 4 ++++ fact_extractor/helperFunctions/statistics.py | 4 ++-- fact_extractor/install/common.py | 1 + fact_extractor/install/unpacker.py | 1 - .../plugins/unpacking/generic_carver/code/generic_carver.py | 4 ++-- .../plugins/unpacking/generic_fs/code/generic_fs.py | 5 ++--- fact_extractor/unpacker/unpackBase.py | 4 ++-- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 172fb861..d472a81c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,5 +13,9 @@ WORKDIR /opt/app/fact_extractor RUN . /venv/bin/activate && install/pre_install.sh RUN . /venv/bin/activate && /venv/bin/python3.11 install.py +# Install additional magic database +RUN wget -O /firmware.mgc \ + https://github.com/maringuu/firmware-magic-database/releases/download/v0.1.0/firmware.mgc +ENV MAGIC=/usr/share/file/magic.mgc:/firmware.mgc ENTRYPOINT ["./docker_extraction.py"] diff --git a/fact_extractor/helperFunctions/statistics.py b/fact_extractor/helperFunctions/statistics.py index 1333e70d..61443c40 100644 --- a/fact_extractor/helperFunctions/statistics.py +++ b/fact_extractor/helperFunctions/statistics.py @@ -1,3 +1,4 @@ +import magic from configparser import ConfigParser from contextlib import suppress from pathlib import Path @@ -7,7 +8,6 @@ from common_helper_unpacking_classifier import ( avg_entropy, get_binary_size_without_padding, is_compressed ) -from fact_helper_file import get_file_type_from_path from helperFunctions.config import read_list_from_config @@ -28,7 +28,7 @@ def get_unpack_status(file_path: str, binary: bytes, extracted_files: List[Path] meta_data['entropy'] = avg_entropy(binary) if not extracted_files and meta_data.get('number_of_excluded_files', 0) == 0: - if get_file_type_from_path(file_path)['mime'] in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\ + if magic.from_file(file_path, mime=True) in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\ or not is_compressed(binary, compress_entropy_threshold=config.getfloat('ExpertSettings', 'unpack_threshold'), classifier=avg_entropy): meta_data['summary'] = ['unpacked'] else: diff --git a/fact_extractor/install/common.py b/fact_extractor/install/common.py index b687e277..f1daad4d 100644 --- a/fact_extractor/install/common.py +++ b/fact_extractor/install/common.py @@ -35,6 +35,7 @@ 'pytest', 'pytest-cov', 'testresources', + 'python-magic', ], }, } diff --git a/fact_extractor/install/unpacker.py b/fact_extractor/install/unpacker.py index eca6abd6..ce8b0207 100644 --- a/fact_extractor/install/unpacker.py +++ b/fact_extractor/install/unpacker.py @@ -158,7 +158,6 @@ 'pluginbase', 'git+https://github.com/armbues/python-entropy', # To be checked. Original dependency was deleted. 'git+https://github.com/fkie-cad/common_helper_unpacking_classifier.git', - 'git+https://github.com/fkie-cad/fact_helper_file.git', 'git+https://github.com/wummel/patool.git', 'archmage', # jefferson + deps diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index a11b7eee..9f21867d 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -3,13 +3,13 @@ ''' from __future__ import annotations +import magic import logging import re import shutil from pathlib import Path from common_helper_process import execute_shell_command -from fact_helper_file import get_file_type_from_path NAME = 'generic_carver' MIME_PATTERNS = ['generic/carver'] @@ -45,7 +45,7 @@ def remove_false_positive_archives(self) -> str: for file_path in self.unpack_directory.glob('**/*'): if not file_path.is_file(): continue - file_type = get_file_type_from_path(file_path)['mime'] + file_type = magic.from_file(file_path, mime=True) if file_type == 'application/x-tar' or self._is_possible_tar(file_type, file_path): self._remove_invalid_archives(file_path, 'tar -tvf {}', 'does not look like a tar archive') diff --git a/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py b/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py index f35fd9f4..fdc213b0 100644 --- a/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py +++ b/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py @@ -2,13 +2,12 @@ This plugin mounts filesystem images and extracts their content ''' import re +import magic from shlex import split from subprocess import run, PIPE, STDOUT from tempfile import TemporaryDirectory from time import sleep -from fact_helper_file import get_file_type_from_path - NAME = 'genericFS' MIME_PATTERNS = [ 'filesystem/btrfs', 'filesystem/dosmbr', 'filesystem/f2fs', 'filesystem/jfs', 'filesystem/minix', @@ -28,7 +27,7 @@ def unpack_function(file_path, tmp_dir): - mime_type = get_file_type_from_path(file_path)['mime'] + mime_type = magic.from_file(file_path, mime=True) if mime_type == 'filesystem/dosmbr': output = _mount_from_boot_record(file_path, tmp_dir) else: diff --git a/fact_extractor/unpacker/unpackBase.py b/fact_extractor/unpacker/unpackBase.py index fb0d629f..ea5c011e 100644 --- a/fact_extractor/unpacker/unpackBase.py +++ b/fact_extractor/unpacker/unpackBase.py @@ -4,9 +4,9 @@ from time import time import fnmatch from typing import Callable, Dict, List, Tuple +import magic from common_helper_files import get_files_in_dir -from fact_helper_file import get_file_type_from_path from helperFunctions.config import read_list_from_config from helperFunctions.plugin import import_plugins @@ -50,7 +50,7 @@ def get_unpacker(self, mime_type: str): return self.unpacker_plugins['generic/carver'] def extract_files_from_file(self, file_path: str, tmp_dir) -> Tuple[List, Dict]: - current_unpacker = self.get_unpacker(get_file_type_from_path(file_path)['mime']) + current_unpacker = self.get_unpacker(magic.from_file(file_path, mime=True)) return self._extract_files_from_file_using_specific_unpacker(file_path, tmp_dir, current_unpacker) def unpacking_fallback(self, file_path, tmp_dir, old_meta, fallback_plugin_mime) -> Tuple[List, Dict]: