diff --git a/fact_extractor/__init__.py b/fact_extractor/__init__.py new file mode 100644 index 00000000..6747f654 --- /dev/null +++ b/fact_extractor/__init__.py @@ -0,0 +1,6 @@ +import os +import pathlib as pl + +firmware_magic_path = pl.Path(__file__).parent.parent / "bin" / "firmware.mgc" + +os.environ["MAGIC"] = f'/usr/lib/file/magic.mgc:{firmware_magic_path}' diff --git a/fact_extractor/helperFunctions/statistics.py b/fact_extractor/helperFunctions/statistics.py index 1333e70d..61443c40 100644 --- a/fact_extractor/helperFunctions/statistics.py +++ b/fact_extractor/helperFunctions/statistics.py @@ -1,3 +1,4 @@ +import magic from configparser import ConfigParser from contextlib import suppress from pathlib import Path @@ -7,7 +8,6 @@ from common_helper_unpacking_classifier import ( avg_entropy, get_binary_size_without_padding, is_compressed ) -from fact_helper_file import get_file_type_from_path from helperFunctions.config import read_list_from_config @@ -28,7 +28,7 @@ def get_unpack_status(file_path: str, binary: bytes, extracted_files: List[Path] meta_data['entropy'] = avg_entropy(binary) if not extracted_files and meta_data.get('number_of_excluded_files', 0) == 0: - if get_file_type_from_path(file_path)['mime'] in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\ + if magic.from_file(file_path, mime=True) in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\ or not is_compressed(binary, compress_entropy_threshold=config.getfloat('ExpertSettings', 'unpack_threshold'), classifier=avg_entropy): meta_data['summary'] = ['unpacked'] else: diff --git a/fact_extractor/install/common.py b/fact_extractor/install/common.py index 7bd026ee..bd710870 100644 --- a/fact_extractor/install/common.py +++ b/fact_extractor/install/common.py @@ -1,4 +1,5 @@ import logging +import subprocess as sp import os from contextlib import suppress from pathlib import Path @@ -49,6 +50,16 @@ def main(distribution): with suppress(FileExistsError): os.mkdir('../bin') + sp.run( + [ + "wget", + "--output-document", + "../bin/firmware.mgc", + "https://github.com/fkie-cad/firmware-magic-database/releases/download/v0.2.0/firmware.mgc", + ], + check=True, + ) + config = load_config('main.cfg') data_folder = config.get('unpack', 'data_folder') os.makedirs(str(Path(data_folder, 'files')), exist_ok=True) diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index a11b7eee..9f21867d 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -3,13 +3,13 @@ ''' from __future__ import annotations +import magic import logging import re import shutil from pathlib import Path from common_helper_process import execute_shell_command -from fact_helper_file import get_file_type_from_path NAME = 'generic_carver' MIME_PATTERNS = ['generic/carver'] @@ -45,7 +45,7 @@ def remove_false_positive_archives(self) -> str: for file_path in self.unpack_directory.glob('**/*'): if not file_path.is_file(): continue - file_type = get_file_type_from_path(file_path)['mime'] + file_type = magic.from_file(file_path, mime=True) if file_type == 'application/x-tar' or self._is_possible_tar(file_type, file_path): self._remove_invalid_archives(file_path, 'tar -tvf {}', 'does not look like a tar archive') diff --git a/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py b/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py index f35fd9f4..fdc213b0 100644 --- a/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py +++ b/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py @@ -2,13 +2,12 @@ This plugin mounts filesystem images and extracts their content ''' import re +import magic from shlex import split from subprocess import run, PIPE, STDOUT from tempfile import TemporaryDirectory from time import sleep -from fact_helper_file import get_file_type_from_path - NAME = 'genericFS' MIME_PATTERNS = [ 'filesystem/btrfs', 'filesystem/dosmbr', 'filesystem/f2fs', 'filesystem/jfs', 'filesystem/minix', @@ -28,7 +27,7 @@ def unpack_function(file_path, tmp_dir): - mime_type = get_file_type_from_path(file_path)['mime'] + mime_type = magic.from_file(file_path, mime=True) if mime_type == 'filesystem/dosmbr': output = _mount_from_boot_record(file_path, tmp_dir) else: diff --git a/fact_extractor/unpacker/unpackBase.py b/fact_extractor/unpacker/unpackBase.py index fb0d629f..ea5c011e 100644 --- a/fact_extractor/unpacker/unpackBase.py +++ b/fact_extractor/unpacker/unpackBase.py @@ -4,9 +4,9 @@ from time import time import fnmatch from typing import Callable, Dict, List, Tuple +import magic from common_helper_files import get_files_in_dir -from fact_helper_file import get_file_type_from_path from helperFunctions.config import read_list_from_config from helperFunctions.plugin import import_plugins @@ -50,7 +50,7 @@ def get_unpacker(self, mime_type: str): return self.unpacker_plugins['generic/carver'] def extract_files_from_file(self, file_path: str, tmp_dir) -> Tuple[List, Dict]: - current_unpacker = self.get_unpacker(get_file_type_from_path(file_path)['mime']) + current_unpacker = self.get_unpacker(magic.from_file(file_path, mime=True)) return self._extract_files_from_file_using_specific_unpacker(file_path, tmp_dir, current_unpacker) def unpacking_fallback(self, file_path, tmp_dir, old_meta, fallback_plugin_mime) -> Tuple[List, Dict]: diff --git a/requirements-unpackers.txt b/requirements-unpackers.txt index 85a3b75d..8956be14 100644 --- a/requirements-unpackers.txt +++ b/requirements-unpackers.txt @@ -1,7 +1,7 @@ # FixMe: deprecated pluginbase~=1.0.1 git+https://github.com/fkie-cad/common_helper_unpacking_classifier.git -git+https://github.com/fkie-cad/fact_helper_file.git +python-magic patool~=2.2.0 # jffs2: jefferson + deps git+https://github.com/sviehb/jefferson.git@v0.4.1