Skip to content

Commit e17567f

Browse files
authored
Mach-O File Format Parsing (#1562)
* Adds first pass at the Macho-O parser * Ports Plaso Mach-0 parsing code * Base 20240912 * Adds directory walk * Cleaned up logging * Outputs parsing result JSON objects * Writes Mach-O parse result JSON output file * Adds segments and sections * Pretty prints JSON * Remove RawDisk for the time being as it is not supported (yet) * First pass at JSON formated report * Adds symbols * Adds architecture info and parsing time * Adds magic and flags * Adds Signature CodeDirectory items to report * Completes signature report * Adds tgz with three FatBinaries, cleans up debug logging * Adds TLSH * Switches Segment size to hex * Puts safety net around signature parsing * Fixes SymHash calculation * Adds ssdeep * Adds ppdeep and py-tlsh to server and api-server * Adds imports * Adds section details * Clean up evidence type and remove iocs section from report * Places output reports in location corresponding to input evidence * Adds MachoExtraction Evidence type * Update poetry.lock * Introduces Mach-O test * Adds Mach-O test case * Switches from ppdeep to pyssdeep * Adds cd_hash calculation * Removing test data that is no longer used * Cleans up log statements * Add comment to explain lambda for json.dumps() * Fix output formatting * Formatting cleanups and os.path.relpath for simpler rel path calc * Adds assertions to unit test * Reduces scope of imported libraries to worker * Removes dev docker compose file * Updates poetry to the latest status
1 parent 54b2a0b commit e17567f

File tree

12 files changed

+797
-8
lines changed

12 files changed

+797
-8
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
/_sources/
1414
/.tox
1515
charts/
16+
conf/
17+
evidence/
1618

1719
# And don't care about the 'egg'.
1820
/turbinia.egg-info

docker/server/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,3 @@ ENV TURBINIA_DEBUG_PORT ${TURBINIA_DEBUG_PORT:-20000}
5656
CMD ["/home/turbinia/start.sh"]
5757
# Expose Prometheus endpoint.
5858
EXPOSE 9200/tcp
59-

poetry.lock

Lines changed: 88 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ turbiniactl = "turbinia.turbiniactl:main"
1515
[tool.poetry.dependencies]
1616
python = "^3.10"
1717
acstore = { version = "20240128" }
18+
asn1crypto = { version = ">= 0.24.0", optional = true }
1819
backoff = { version = ">=2.2.1" }
1920
celery = { version = "^5.2.2" }
2021
dfDewey = { version = "^20231016", optional = true }
@@ -25,14 +26,17 @@ filelock = { version = "*" }
2526
google-api-core = { version = "<3.0.0", optional = true }
2627
google-generativeai = { version = ">=0.8.2" }
2728
libcloudforensics = "^20240325"
29+
lief = { version = ">= 0.15.1", optional = true }
2830
opensearch-py = {version = "2.4.2"}
2931
pandas = { version = "^2.1.0" }
3032
plaso = { version = "20240308", optional = true }
33+
pyssdeep = { version = "1.0.0", optional = true }
3134
prometheus_client = { version = "^0.17.1" }
3235
protobuf = { version = ">=3.19.0", optional = true }
3336
pydantic = { version = "^1.10.5,<2"}
3437
pyglove = { version = ">=0.4.4" }
3538
pyhindsight = { version = "^20230327.0", optional = true }
39+
py-tlsh = { version = ">= 4.7.2", optional = true }
3640
ratelimit = { version = ">=2.2.1" }
3741
redis = { version = "^4.4.4" }
3842
urllib3 = [
@@ -58,10 +62,14 @@ yapf = "*"
5862

5963
[tool.poetry.extras]
6064
worker = [
65+
"asn1crypto",
6166
"dfimagetools",
6267
"dfDewey",
68+
"lief",
6369
"plaso",
70+
"pyssdeep",
6471
"pyhindsight",
72+
"py-tlsh",
6573
]
6674

6775
[tool.yapfignore]

test_data/macho-3.tgz

57.7 KB
Binary file not shown.

turbinia/config/turbinia_config_tmpl.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,11 @@
219219
'programs': ['hashcat', 'john'],
220220
'docker_image': None,
221221
'timeout': 1200
222+
}, {
223+
'job': 'MachoAnalysisJob',
224+
'programs': ['grep'],
225+
'docker_image': None,
226+
'timeout': 3600
222227
}, {
223228
'job': 'YaraAnalysisJob',
224229
'programs': ['/opt/fraken/fraken'],

turbinia/evidence.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,6 +1201,11 @@ class BinaryExtraction(CompressedDirectory):
12011201
pass
12021202

12031203

1204+
class MachoExtraction(CompressedDirectory):
1205+
"""Mach-O details extracted from evidence."""
1206+
pass
1207+
1208+
12041209
class DockerContainer(Evidence):
12051210
"""Evidence object for a DockerContainer filesystem.
12061211

turbinia/jobs/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from turbinia.jobs import jupyter
3030
from turbinia.jobs import linux_acct
3131
from turbinia.jobs import llm_artifacts_analyzer
32+
from turbinia.jobs import macho
3233
from turbinia.jobs import yara
3334
from turbinia.jobs import partitions
3435
from turbinia.jobs import photorec

turbinia/jobs/macho.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Job to execute macho analysis task."""
2+
3+
from turbinia.evidence import MachoExtraction
4+
from turbinia.evidence import Directory
5+
from turbinia.evidence import RawDisk
6+
from turbinia.evidence import ReportText
7+
from turbinia.jobs import interface
8+
from turbinia.jobs import manager
9+
from turbinia.workers.analysis import macho
10+
11+
12+
class MachoAnalysisJob(interface.TurbiniaJob):
13+
"""Mach-O analysis job."""
14+
15+
evidence_input = [MachoExtraction]
16+
evidence_output = [ReportText]
17+
18+
NAME = 'MachoAnalysisJob'
19+
20+
def create_tasks(self, evidence):
21+
"""Create task.
22+
Args:
23+
evidence: List of evidence objects to process
24+
Returns:
25+
A list of tasks to schedule.
26+
"""
27+
tasks = [macho.MachoAnalysisTask() for _ in evidence]
28+
return tasks
29+
30+
31+
manager.JobsManager.RegisterJob(MachoAnalysisJob)

turbinia/task_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class TaskLoader():
5454
'LinuxAccountAnalysisTask',
5555
'LinuxSSHAnalysisTask',
5656
'LLMAnalyzerTask',
57+
'MachoAnalysisTask',
5758
'YaraAnalysisTask',
5859
'PartitionEnumerationTask',
5960
'PhotorecTask',
@@ -106,6 +107,7 @@ def get_task(self, task_name):
106107
from turbinia.workers.analysis.jupyter import JupyterAnalysisTask
107108
from turbinia.workers.analysis.linux_acct import LinuxAccountAnalysisTask
108109
from turbinia.workers.analysis.llm_analyzer import LLMAnalyzerTask
110+
from turbinia.workers.analysis.macho import MachoAnalysisTask
109111
from turbinia.workers.analysis.postgresql_acct import PostgresAccountAnalysisTask
110112
from turbinia.workers.analysis.redis import RedisAnalysisTask
111113
from turbinia.workers.analysis.ssh_analyzer import LinuxSSHAnalysisTask

0 commit comments

Comments
 (0)