diff --git a/clientlib/decompiler_imports.dl b/clientlib/decompiler_imports.dl index 022411da..38e9e880 100644 --- a/clientlib/decompiler_imports.dl +++ b/clientlib/decompiler_imports.dl @@ -399,6 +399,9 @@ ValidGlobalTerminalBlock(block) :- FallbackFunction(func) :- PublicFunctionSelector(func, "0x00000000"). +// Can be "default" or "scalable" +.decl DecompilerConfig(config: symbol) +.input DecompilerConfig // Dynamic Information diff --git a/clientlib/dominators.dl b/clientlib/dominators.dl index 28489bac..7dac8b8e 100644 --- a/clientlib/dominators.dl +++ b/clientlib/dominators.dl @@ -331,9 +331,10 @@ FunctionReachableFromPublic(callee, selector, @list_append(callerStack, callerBl FunctionReachableFromPublic_Metadata(function, selectorNorm, nil, nil, nil):- PublicFunctionId(function, selectorNorm, _). -FunctionReachableFromPublic_Metadata(callee, selector, @list_append(callerStack, callerBlock), @list_append(originalCalls, original), @list_append(functionsCalled, callee)):- +FunctionReachableFromPublic_Metadata(callee, selector, @list_append(callerStack, callerBlock), @list_concat(originalCalls, originalList), @list_append(@list_concat(functionsCalled, inlinedFuns), callee)):- FunctionReachableFromPublic_Metadata(caller, selector, callerStack, originalCalls, functionsCalled), InFunction(callerBlock, caller), CallGraphEdge(callerBlock, callee), Block_Tail(callerBlock, callStmt), - Statement_OriginalStatement(callStmt, original). + Statement_OriginalStatementList(callStmt, originalList), + Statement_InlineInfo(callStmt, inlinedFuns). diff --git a/gigahorse.py b/gigahorse.py index 84ab66ed..13245899 100755 --- a/gigahorse.py +++ b/gigahorse.py @@ -17,10 +17,12 @@ # Local project imports from src.common import GIGAHORSE_DIR, DEFAULT_SOUFFLE_BIN, log -from src.runners import get_souffle_executable_path, compile_datalog, AbstractFactGenerator, DecompilerFactGenerator, CustomFactGenerator, AnalysisExecutor, TimeoutException +from src.runners import get_souffle_executable_path, compile_datalog, AbstractFactGenerator, DecompilerFactGenerator, CustomFactGenerator, MixedFactGenerator, AnalysisExecutor, TimeoutException ## Constants +TAC_GEN_CONFIG_FILE = 'tac_gen_config.json' + DEFAULT_RESULTS_FILE = 'results.json' """File to write results to by default.""" @@ -100,7 +102,7 @@ const=DEFAULT_CACHE_DIR, metavar="DIR", help="the location to were temporary files are placed.") - + parser.add_argument("-j", "--jobs", @@ -220,7 +222,7 @@ def get_souffle_macros() -> str: return souffle_macros -def analyze_contract(index: int, contract_filename: str, result_queue, fact_generator: AbstractFactGenerator, souffle_clients: List[str], other_clients: List[str]) -> None: +def analyze_contract(index: int, contract_filename: str, result_queue, fact_generator: AbstractFactGenerator, souffle_clients: List[str], other_clients: List[str]) -> None: """ Perform static analysis on a contract, storing the result in the queue. This is a worker function to be passed to a subprocess. @@ -355,7 +357,7 @@ def flush_queue(run_sig: Any, result_queue: SimpleQueue, result_list: Any) -> No def write_results(res_list: Any, results_file: str) -> None: """ Filters the results in res_list, logging the appropriate messages - and writting them to the results_file json file + and writting them to the results_file json file """ total = len(res_list) vulnerability_counts: DefaultDict[str, int] = defaultdict(int) @@ -384,13 +386,13 @@ def write_results(res_list: Any, results_file: str) -> None: for res, sums in analytics_sums_sorted: log(" {}: {}".format(res, sums)) log('\n') - + vulnerability_counts_sorted = sorted(list(vulnerability_counts.items()), key = lambda a: a[0]) if vulnerability_counts_sorted: log('-'*80) log('Summary (flagged contracts)') log('-'*80) - + for res, count in vulnerability_counts_sorted: log(" {}: {:.2f}%".format(res, 100 * count / total)) @@ -401,7 +403,7 @@ def write_results(res_list: Any, results_file: str) -> None: for k, v in meta_counts.items(): log(f" {k}: {v} of {total} contracts") log('\n') - + log("\nWriting results to {}".format(results_file)) with open(results_file, 'w') as f: f.write(json.dumps(list(res_list), indent=1)) @@ -493,7 +495,7 @@ def batch_analysis(fact_generator: AbstractFactGenerator, souffle_clients: List[ sys.exit(1) -def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None: +def run_gigahorse(args, fact_generator: AbstractFactGenerator) -> None: """ Run gigahorse, passing the cmd line args and fact generator type as arguments """ @@ -502,8 +504,7 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None: analysis_executor = AnalysisExecutor(args.timeout_secs, args.interpreted, args.minimum_client_time, args.debug, args.souffle_bin, args.cache_dir, get_souffle_macros()) - fact_generator = fact_gen_class(args, analysis_executor) - + fact_generator.analysis_executor = analysis_executor clients_split = [a.strip() for a in args.client.split(',')] souffle_clients = [a for a in clients_split if a.endswith('.dl')] @@ -527,7 +528,7 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None: if args.restart: log("Removing working directory {}".format(args.working_dir)) - shutil.rmtree(args.working_dir, ignore_errors = True) + shutil.rmtree(args.working_dir, ignore_errors = True) if not args.interpreted: for p in running_processes: @@ -544,13 +545,6 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None: contracts = [] - # Filter according to the given pattern. - re_string = fact_generator.pattern - if not re_string.endswith("$"): - re_string = re_string + "$" - pattern = re.compile(re_string) - - for filepath in args.filepath: if os.path.isdir(filepath): if args.interpreted: @@ -558,8 +552,8 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None: unfiltered = [join(filepath, f) for f in os.listdir(filepath)] else: unfiltered = [filepath] - - contracts += [u for u in unfiltered if pattern.match(u) is not None] + + contracts += [u for u in unfiltered if fact_generator.match_pattern(u)] contracts = contracts[args.skip:] @@ -592,18 +586,25 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None: default=False, help="Disables the scalable fallback configuration (using a hybrid-precise context configuration) that kicks off" " if decompilation with the default (transactional) config takes up more than half of the total timeout.") - parser.add_argument("--custom_fact_generator", - nargs="*", - default=None, - help="Adds custom scripts for non-default fact generation. Takes a list of paths for the custom fact generation scripts. " - " Fact generation scripts can also be Datalog files. The default is the decompilation fact generation from bytecode files.") - parser.add_argument("--custom_file_pattern", - nargs="?", - default=".*.hex", - help="Adds a custom file filtering RegEx. The default is .hex (bytecode) files.") args = parser.parse_args() - if args.custom_fact_generator == None: - run_gigahorse(args, DecompilerFactGenerator) - else: - run_gigahorse(args, CustomFactGenerator) \ No newline at end of file + + tac_gen_config_json = os.path.join(os.path.dirname(os.path.abspath(__file__)),TAC_GEN_CONFIG_FILE) + with open(tac_gen_config_json, 'r') as config: + tac_gen_config = json.loads(config.read()) + if len(tac_gen_config["handlers"]) == 0: #if no handlers defined, default to classic decompilation + run_gigahorse(args, DecompilerFactGenerator(args, ".*.hex")) + elif len(tac_gen_config["handlers"]) == 1: # if one handler defined, can be either classic decompilation, or custom script + tac_gen = tac_gen_config["handlers"][0] + if tac_gen["tacGenScripts"]["defaultDecomp"] == "true": + run_gigahorse(args, DecompilerFactGenerator(args, tac_gen["fileRegex"])) + else: + run_gigahorse(args, CustomFactGenerator(tac_gen["fileRegex"], tac_gen["tacGenScripts"]["customScripts"])) + elif len(tac_gen_config["handlers"]) > 1: # if multiple handlers have been defined, they will be selected based on the file regex + fact_generator = MixedFactGenerator(args) + for tac_gen in tac_gen_config["handlers"]: + pattern = tac_gen["fileRegex"] + scripts = tac_gen["tacGenScripts"]["customScripts"] + is_default = tac_gen["tacGenScripts"]["defaultDecomp"] == "true" + fact_generator.add_fact_generator(pattern, scripts, is_default, args) + run_gigahorse(args, fact_generator) diff --git a/logic/decompiler_output.dl b/logic/decompiler_output.dl index a724a656..9c8c72e3 100644 --- a/logic/decompiler_output.dl +++ b/logic/decompiler_output.dl @@ -6,6 +6,15 @@ .output ByteCodeHex(IO="file", filename="bytecode.hex") +.decl DecompilerConfig(config: symbol) btree_delete +.output DecompilerConfig + +DecompilerConfig("default"). + +DecompilerConfig(default) <= DecompilerConfig(other):- + default = "default", + other != default. + .decl GlobalEntryBlock(block: IRBlock) .output GlobalEntryBlock diff --git a/logic/fallback_scalable.dl b/logic/fallback_scalable.dl index 9977b9d5..a0da2c84 100644 --- a/logic/fallback_scalable.dl +++ b/logic/fallback_scalable.dl @@ -3,4 +3,6 @@ #define MAX_STACK_HEIGHT 30 -#include "main.dl" \ No newline at end of file +#include "main.dl" + +DecompilerConfig("scalable"). diff --git a/souffle-addon b/souffle-addon index ed4f2c02..33fd2051 160000 --- a/souffle-addon +++ b/souffle-addon @@ -1 +1 @@ -Subproject commit ed4f2c02d6e1e80a0d8f5cf056e4920fe1181fc0 +Subproject commit 33fd2051585f0be4955fecce6f4f7e82e65872d8 diff --git a/src/runners.py b/src/runners.py index d405529d..e61091d9 100644 --- a/src/runners.py +++ b/src/runners.py @@ -7,8 +7,9 @@ import time import shutil import json +import re -from typing import Tuple, List, Any, Optional +from typing import Tuple, List, Any, Optional, Dict from abc import ABC, abstractmethod @@ -18,8 +19,8 @@ devnull = subprocess.DEVNULL -DEFAULT_MEMORY_LIMIT = 45 * 1_000_000_000 -"""Hard capped memory limit for analyses processes (30 GB)""" +DEFAULT_MEMORY_LIMIT = 50 * 1_000_000_000 +"""Hard capped memory limit for analyses processes (50 GB)""" souffle_env = os.environ.copy() @@ -95,7 +96,7 @@ def run_souffle_client(self, souffle_client: str, in_dir: str, out_dir: str, sta errors.append(os.path.basename(souffle_client)) log(souffle_err) return errors, timeouts - + def run_script_client(self, script_client: str, in_dir: str, out_dir: str, start_time: float): errors = [] timeouts = [] @@ -103,7 +104,7 @@ def run_script_client(self, script_client: str, in_dir: str, out_dir: str, start client_split[0] = join(os.getcwd(), client_split[0]) client_name = client_split[0].split('/')[-1] err_filename = join(out_dir, client_name+'.err') - + runtime = run_process( client_split, self.calc_timeout(start_time), @@ -200,12 +201,20 @@ def imprecise_decomp_out(out_dir: str) -> bool: class AbstractFactGenerator(ABC): - analysis_executor: AnalysisExecutor - pattern: str + _analysis_executor: AnalysisExecutor + pattern: re.Pattern def __init__(self, args, analysis_executor: AnalysisExecutor): pass + @property + def analysis_executor(self) -> AnalysisExecutor: + return self._analysis_executor + + @analysis_executor.setter + def analysis_executor(self, analysis_executor: AnalysisExecutor): + self._analysis_executor = analysis_executor + @abstractmethod def generate_facts(self, contract_filename: str, work_dir: str, out_dir: str) -> Tuple[float, float, str]: pass @@ -218,16 +227,74 @@ def get_datalog_files(self) -> List[str]: def decomp_out_produced(self, out_dir: str) -> bool: pass + @abstractmethod + def match_pattern(self, contract_filename: str) -> bool: + pass + + +class MixedFactGenerator(AbstractFactGenerator): + fact_generators: Dict[re.Pattern, AbstractFactGenerator] + out_dir_to_gen: Dict[str, AbstractFactGenerator] + contract_filename_to_gen: Dict[str, AbstractFactGenerator] + + def __init__(self, args): + self.fact_generators = {} + self.out_dir_to_gen = {} + self.contract_filename_to_gen = {} + + @property + def analysis_executor(self) -> AnalysisExecutor: + return self._analysis_executor + + @analysis_executor.setter + def analysis_executor(self, analysis_executor: AnalysisExecutor): + self._analysis_executor = analysis_executor + for fact_gen in self.fact_generators.values(): + fact_gen.analysis_executor = analysis_executor + + def generate_facts(self, contract_filename: str, work_dir: str, out_dir: str) -> Tuple[float, float, str]: + generator = self.contract_filename_to_gen[contract_filename] + del self.contract_filename_to_gen[contract_filename] + self.out_dir_to_gen[out_dir] = generator + return generator.generate_facts(contract_filename, work_dir, out_dir) + + def get_datalog_files(self) -> List[str]: + datalog_files = [] + for fact_gen in self.fact_generators.values(): + datalog_files += fact_gen.get_datalog_files() + return datalog_files + + def decomp_out_produced(self, out_dir: str) -> bool: + result = self.out_dir_to_gen[out_dir].decomp_out_produced(out_dir) + del self.out_dir_to_gen[out_dir] + return result + + def match_pattern(self, contract_filename: str) -> bool: + for gen in self.fact_generators.values(): + if gen.match_pattern(contract_filename): + self.contract_filename_to_gen[contract_filename] = gen + return True + return False + + def add_fact_generator(self, pattern: str, scripts: List[str], is_default: bool, args): + if not pattern.endswith("$"): + pattern = pattern + "$" + if is_default: + self.fact_generators[re.compile(pattern)] = DecompilerFactGenerator(args, pattern) + else: + self.fact_generators[re.compile(pattern)] = CustomFactGenerator(pattern, scripts) + class DecompilerFactGenerator(AbstractFactGenerator): decompiler_dl = join(GIGAHORSE_DIR, 'logic/main.dl') fallback_scalable_decompiler_dl = join(GIGAHORSE_DIR, 'logic/fallback_scalable.dl') - def __init__(self, args, analysis_executor: AnalysisExecutor): + def __init__(self, args, pattern: str): self.context_depth = args.context_depth self.disable_scalable_fallback = args.disable_scalable_fallback - self.analysis_executor = analysis_executor - self.pattern = args.custom_file_pattern + if not pattern.endswith("$"): + pattern = pattern + "$" + self.pattern = re.compile(pattern) pre_clients_split = [a.strip() for a in args.pre_client.split(',')] self.souffle_pre_clients = [a for a in pre_clients_split if a.endswith('.dl')] @@ -239,7 +306,7 @@ def __init__(self, args, analysis_executor: AnalysisExecutor): def generate_facts(self, contract_filename: str, work_dir: str, out_dir: str) -> Tuple[float, float, str]: with open(contract_filename) as file: bytecode = file.read().strip() - + if os.path.exists(metad:= f"{contract_filename[:-4]}_metadata.json"): metadata = json.load(open(metad)) else: @@ -267,7 +334,7 @@ def generate_facts(self, contract_filename: str, work_dir: str, out_dir: str) -> decompiler_config = self.run_decomp(contract_filename, work_dir, out_dir, disassemble_start) return decomp_start - disassemble_start, time.time() - decomp_start, decompiler_config - + def get_datalog_files(self) -> List[str]: datalog_files = self.souffle_pre_clients + [DecompilerFactGenerator.decompiler_dl] if not self.disable_scalable_fallback: @@ -294,20 +361,21 @@ def run_decomp(self, contract_filename: str, in_dir: str, out_dir: str, start_ti raise TimeoutException() return config - + + def match_pattern(self, contract_filename: str) -> bool: + return self.pattern.match(contract_filename) is not None + def decomp_out_produced(self, out_dir: str) -> bool: """Hacky. Needed to ensure process was not killed due to exceeding the memory limit.""" return os.path.exists(join(out_dir, 'Analytics_JumpToMany.csv')) and os.path.exists(join(out_dir, 'TAC_Def.csv')) - -class CustomFactGenerator(AbstractFactGenerator): - analysis_executor: AnalysisExecutor - pattern: str - def __init__(self, args, analysis_executor: AnalysisExecutor): - self.analysis_executor = analysis_executor - self.pattern = args.custom_file_pattern - self.fact_generator_scripts = args.custom_fact_generator +class CustomFactGenerator(AbstractFactGenerator): + def __init__(self, pattern: str, custom_fact_gen_scripts: List[str]): + if not pattern.endswith("$"): + pattern = pattern + "$" + self.pattern = re.compile(pattern) + self.fact_generator_scripts = custom_fact_gen_scripts def generate_facts(self, contract_filename: str, work_dir: str, out_dir: str) -> Tuple[float, float, str]: errors = [] @@ -328,5 +396,8 @@ def generate_facts(self, contract_filename: str, work_dir: str, out_dir: str) -> def get_datalog_files(self) -> List[str]: return [a for a in self.fact_generator_scripts if a.endswith('.dl')] + def match_pattern(self, contract_filename: str) -> bool: + return self.pattern.match(contract_filename) is not None + def decomp_out_produced(self, out_dir: str) -> bool: - return os.path.exists(join(out_dir, 'TAC_Def.csv')) \ No newline at end of file + return os.path.exists(join(out_dir, 'TAC_Def.csv')) diff --git a/tac_gen_config.json b/tac_gen_config.json new file mode 100644 index 00000000..11888b84 --- /dev/null +++ b/tac_gen_config.json @@ -0,0 +1,11 @@ +{ + "handlers":[ + { + "fileRegex": ".*.hex", + "tacGenScripts": { + "defaultDecomp": "true", + "customScripts": [] + } + } + ] +}