diff --git a/frontend/public/icons/ai.svg b/frontend/public/icons/ai.svg new file mode 100644 index 000000000..248a191f2 --- /dev/null +++ b/frontend/public/icons/ai.svg @@ -0,0 +1 @@ + diff --git a/frontend/src/resource/ResourceTreeToolbar.svelte b/frontend/src/resource/ResourceTreeToolbar.svelte index 933e39ce3..9f344b2ac 100644 --- a/frontend/src/resource/ResourceTreeToolbar.svelte +++ b/frontend/src/resource/ResourceTreeToolbar.svelte @@ -1,13 +1,14 @@ + +
+ {#await promptPromise} + + {:then _} +
+ + + + + {#if analyzer == "LlmAnalyzer"} + + {/if} +
+ {:catch e} +

{e}

+ {/await} +
{#await resultPromise}{:then result}{#if result}{result}{/if}{:catch e}Try re-running the analyzer.
+{e}{/await}
+
+ + +
+
diff --git a/ofrak_core/CHANGELOG.md b/ofrak_core/CHANGELOG.md index 808ce22e1..297f46765 100644 --- a/ofrak_core/CHANGELOG.md +++ b/ofrak_core/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Add UEFI binary unpacker. ([#399](https://github.com/redballoonsecurity/ofrak/pull/399)) - Add recursive identify functionality in the GUI. ([#435](https://github.com/redballoonsecurity/ofrak/pull/435)) - Add generic DecompilationAnalysis classes. ([#453](https://github.com/redballoonsecurity/ofrak/pull/453)) +- Add OFRAK AI analyzers that use LLMs to generate natural language representations of resources. ([#508](https://github.com/redballoonsecurity/ofrak/pull/508)) ### Fixed - Improved flushing of filesystem entries (including symbolic links and other types) to disk. ([#373](https://github.com/redballoonsecurity/ofrak/pull/373)) diff --git a/ofrak_core/Dockerstub b/ofrak_core/Dockerstub index b81261935..4a4c3ce5b 100644 --- a/ofrak_core/Dockerstub +++ b/ofrak_core/Dockerstub @@ -5,6 +5,7 @@ RUN apt-get -y update && \ build-essential \ cmake \ cpio \ + curl \ git \ genisoimage \ liblz4-dev \ @@ -75,3 +76,6 @@ RUN cd /tmp && \ make install && \ cd /tmp && \ rm -r UEFITool-A68 + +# Install Ollama +RUN curl -L "https://ollama.com/download/ollama-linux-""$TARGETARCH"".tgz" | tar -C /usr/ -xzv diff --git a/ofrak_core/Makefile b/ofrak_core/Makefile index c82f6be7e..32e79b35e 100644 --- a/ofrak_core/Makefile +++ b/ofrak_core/Makefile @@ -28,5 +28,6 @@ ofrak/gui/public: npm install && \ npm run build && \ cd ../ofrak_core && \ + rm -rf ofrak/gui/public ; \ cp -r ../frontend/dist ofrak/gui/public ; \ fi diff --git a/ofrak_core/ofrak/core/llm.py b/ofrak_core/ofrak/core/llm.py new file mode 100644 index 000000000..bda67679c --- /dev/null +++ b/ofrak_core/ofrak/core/llm.py @@ -0,0 +1,266 @@ +import dataclasses +import json +import os +from dataclasses import dataclass +from enum import Enum +from typing import Optional, List, Dict, Any, Union + +import aiohttp + +from ofrak import Analyzer, Resource, ResourceAttributes, ResourceFilter +from ofrak.core import ComplexBlock, Program +from ofrak.core.decompilation import DecompilationAnalysis +from ofrak.core.entropy import DataSummary +from ofrak.model.component_model import ComponentConfig +from ofrak.model.viewable_tag_model import AttributesType +from ofrak_type import Range + + +@dataclass(**ResourceAttributes.DATACLASS_PARAMS) +class LlmAttributes(ResourceAttributes): + description: str + + +@dataclass +class LlmAnalyzerConfig(ComponentConfig): + api_url: str + model: str + api_key: Optional[str] = None + prompt: Optional[str] = None + system_prompt: str = "You are a reverse engineer. You return concise technical descriptions of binaries, and what they do." + examples: Optional[List[str]] = None + + +class LlmAnalyzer(Analyzer[LlmAnalyzerConfig, LlmAttributes]): + """ + This analyzer uses a Large Language Model (LLM) to describe the resource being analyzed in natural language. + + The analyzer works with local models run using Ollama or remote models such as OpenAI's ChatGPT. To run a local + Ollama instance, use the following commands: + + ``` + curl -fsSL https://ollama.com/install.sh | sh + ollama pull llama3.2 + ollama serve + # Use http://localhost:11434/api/chat as the API URL in the analyzer config + ``` + + It is advisable to tune the results by editing the system prompt and/or adding examples. The `prompt` field of the + config should only be overridden by other analyzers that want to use the LLM in a more specific way (e.g., the + `LlmFunctionAnalyzer` is specifically for analyzing decompilation output). + """ + + targets = () + outputs = (LlmAttributes,) + + async def analyze(self, resource: Resource, config: LlmAnalyzerConfig = None) -> LlmAttributes: + if config is None: + config = LlmAnalyzerConfig("http://localhost:11434/api/chat", "llama3.2") + + headers = ( + {"Authorization": f"Bearer {config.api_key}"} if config.api_key is not None else dict() + ) + + if config.prompt is None: + prompt = f"""Tell me what the following binary is and what it does. + +# Metadata +{await dump_attributes(resource)} + +# First 100 bytes +{await hex_dump(resource)}...""" + else: + prompt = config.prompt + + body = { + "model": config.model, + "messages": [ + { + "role": "system", + "content": config.system_prompt, + }, + *( + [ + {"role": "user" if i % 2 == 0 else "assistant", "content": example} + for i, example in enumerate(config.examples) + ] + if config.examples is not None + else [] + ), + { + "role": "user", + "content": prompt, + }, + ], + "stream": False, + } + + # TODO: class-wide client session instance for connection pooling + async with aiohttp.ClientSession() as session: + async with session.post(config.api_url, json=body, headers=headers) as response: + response.raise_for_status() + data = await response.json() + if "message" in data: + message = data["message"] + elif "choices" in data and data["choices"]: + message = data["choices"][0]["message"] + return LlmAttributes(message["content"]) + + +class LlmFunctionAnalyzer(Analyzer[LlmAnalyzerConfig, LlmAttributes]): + # Targets ComplexBlock, but we don't want it to run automatically + targets = () + outputs = (LlmAttributes,) + + async def analyze(self, resource: Resource, config: LlmAnalyzerConfig = None) -> LlmAttributes: + if not resource.has_tag(ComplexBlock): + raise RuntimeError("This analyzer can only be run on complex blocks") + await resource.unpack_recursively() + decompilation = await resource.view_as(DecompilationAnalysis) + + if config is None: + config = LlmAnalyzerConfig("http://localhost:11434/api/chat", "llama3.2") + config.system_prompt = ( + "You are a computer program for reverse engineering. You return " + "concise technical summaries of disassembled and decompiled " + "functions, and what they do without additional commentary. You " + "always respond with only one or two sentences." + ) + config.examples = None + config.prompt = f"""# Decompilation +{decompilation.decompilation} + +# Metadata +{await dump_attributes(resource)} + +Describe what this function does. +""" + await resource.run(LlmAnalyzer, config) + return resource.get_attributes(LlmAttributes) + + +class LlmProgramAnalyzer(Analyzer[LlmAnalyzerConfig, LlmAttributes]): + # Targets Program, but we don't want it to run automatically + targets = () + outputs = (LlmAttributes,) + + async def analyze(self, resource: Resource, config: LlmAnalyzerConfig = None) -> LlmAttributes: + if not resource.has_tag(Program): + raise RuntimeError("This analyzer can only be run on programs") + + await resource.unpack_recursively() + program = await resource.view_as(Program) + # Rough heuristic that the largest code region is probably the text section + text_section = max(await program.get_code_regions(), key=lambda cr: cr.size) + functions = list( + await text_section.resource.get_descendants_as_view( + ComplexBlock, r_filter=ResourceFilter.with_tags(ComplexBlock) + ) + ) + # TODO: Should this be concurrent? + # await asyncio.gather(*(function.resource.run(LlmAnalyzer, config) for function in functions)) + for function in functions: + await function.resource.run(LlmFunctionAnalyzer, config) + descriptions = [ + f"- {function.name}: {function.resource.get_attributes(LlmAttributes).description.splitlines()[0]}" + for function in functions + ] + + if config is None: + config = LlmAnalyzerConfig("http://localhost:11434/api/chat", "llama3.2") + config.system_prompt = ( + "You are a computer program for reverse engineering. You return " + "concise technical summaries of disassembled and decompiled " + "programs, and what they do without additional commentary. You " + "always respond with only one or two sentences." + ) + config.prompt = f"""# Functions +{chr(10).join(descriptions)} + +# Metadata +{await dump_attributes(resource)} + +Describe what the entire program does based on its functions and metadata. +""" + await resource.run(LlmAnalyzer, config) + return resource.get_attributes(LlmAttributes) + + +def indent(s: str, spaces: int = 2) -> str: + return "\n".join(" " * spaces + line for line in s.splitlines()) + + +def make_serializable(o): + if o is None: + return o + elif isinstance(o, (int, float)): + return o + elif isinstance(o, str): + return o + elif isinstance(o, (list, set)): + return [make_serializable(x) for x in o] + elif isinstance(o, dict): + return {make_serializable(k): make_serializable(v) for k, v in o.items()} + elif isinstance(o, type): + return o.__name__ + elif isinstance(o, Enum): + return str(o) + elif isinstance(o, bytes): + return o.hex() + elif dataclasses.is_dataclass(o): + return make_serializable(dataclasses.asdict(o)) + elif isinstance(o, os.stat_result): + return { + name: getattr(o, name) + for name in [ + "st_mode", + "st_ino", + "st_dev", + "st_nlink", + "st_uid", + "st_gid", + "st_size", + "st_atime", + "st_mtime", + "st_ctime", + ] + if hasattr(o, name) + } + else: + return repr(o) + + +def serialize(o: Optional[Union[int, float, str, List[Any], Dict[Any, Any]]]) -> str: + if o is None: + return "null" + elif isinstance(o, (int, float)): + return str(o) + elif isinstance(o, str): + return json.dumps(o) + elif isinstance(o, list): + return "\n".join("- " + serialize(x) for x in o) + elif isinstance(o, dict): + result = [] + for k, v in o.items(): + s = serialize(v) + if "\n" in s: + result.append(f"{k}:\n{indent(s)}") + else: + result.append(f"{k}: {s}") + return "\n".join(result) + + +async def dump_attributes(resource: Resource) -> str: + model = resource.get_model() + # The data summary is un-informative and verbose + if DataSummary in model.attributes: + del model.attributes[DataSummary] + # We pretty-print the decompilation analysis + if AttributesType[DecompilationAnalysis] in model.attributes: + del model.attributes[AttributesType[DecompilationAnalysis]] + return serialize(make_serializable(model.attributes)) + + +async def hex_dump(resource: Resource) -> str: + data = await resource.get_data(Range(0, 100)) + return " ".join(f"{b:0>2x}" for b in data) diff --git a/ofrak_core/test_ofrak/components/test_llm_components.py b/ofrak_core/test_ofrak/components/test_llm_components.py new file mode 100644 index 000000000..1ce609235 --- /dev/null +++ b/ofrak_core/test_ofrak/components/test_llm_components.py @@ -0,0 +1,101 @@ +import asyncio +import os + +import pytest + +import ofrak_ghidra +import test_ofrak +from ofrak import OFRAKContext +from ofrak.core import Elf +from ofrak.core.llm import ( + LlmAnalyzer, + LlmAnalyzerConfig, + LlmProgramAnalyzer, + LlmAttributes, + LlmFunctionAnalyzer, +) + + +@pytest.fixture() +@pytest.mark.asyncio +async def ollama(): + async def run_task(): + proc = None + try: + proc = await asyncio.subprocess.create_subprocess_exec("ollama", "serve") + await proc.communicate() + except asyncio.CancelledError: + if proc is not None: + proc.kill() + + task = asyncio.create_task(run_task()) + await asyncio.sleep(5) + yield + task.cancel() + + +@pytest.fixture() +@pytest.mark.asyncio +async def model(ollama) -> str: + # Smallest chat model currently available on ollama + model_name = "qwen2.5:0.5b" + proc = await asyncio.subprocess.create_subprocess_exec("ollama", "pull", model_name) + await proc.communicate() + yield model_name + + +@pytest.fixture(autouse=True) +def ghidra_components(ofrak_injector): + ofrak_injector.discover(ofrak_ghidra) + + +async def test_llm_component(ofrak_context: OFRAKContext, model: str): + root_path = os.path.join(test_ofrak.components.ASSETS_DIR, "elf", "busybox_elf_exec_noscop") + root = await ofrak_context.create_root_resource_from_file(root_path) + await root.unpack() + await root.auto_run(all_analyzers=True) + await root.run( + LlmAnalyzer, + LlmAnalyzerConfig( + "http://localhost:11434/api/chat", + model, + ), + ) + attributes = root.get_attributes(LlmAttributes) + assert attributes.description, f"LlmAnalyzer did not generate valid description attributes." + + +async def test_llm_function_component(ofrak_context: OFRAKContext, model: str): + root_path = os.path.join(test_ofrak.components.ASSETS_DIR, "elf", "hello_elf_dyn") + root = await ofrak_context.create_root_resource_from_file(root_path) + await root.unpack_recursively() + elf = await root.view_as(Elf) + main = await elf.get_function_complex_block("main") + await main.resource.run( + LlmFunctionAnalyzer, + LlmAnalyzerConfig( + "http://localhost:11434/api/chat", + model, + ), + ) + attributes = main.resource.get_attributes(LlmAttributes) + assert ( + attributes.description + ), f"LlmFunctoinAnalyzer did not generate valid description attributes." + + +async def test_llm_program_component(ofrak_context: OFRAKContext, model: str): + root_path = os.path.join(test_ofrak.components.ASSETS_DIR, "elf", "hello_elf_dyn") + root = await ofrak_context.create_root_resource_from_file(root_path) + await root.unpack_recursively() + await root.run( + LlmProgramAnalyzer, + LlmAnalyzerConfig( + "http://localhost:11434/api/chat", + model, + ), + ) + attributes = root.get_attributes(LlmAttributes) + assert ( + attributes.description + ), f"LlmProgramAnalyzer did not generate valid description attributes."