Skip to content

Commit

Permalink
Merge branch 'master' of github.com:nevillegrech/gigahorse-toolchain …
Browse files Browse the repository at this point in the history
…into unsoundness_global_experiment
  • Loading branch information
sifislag committed Jan 18, 2024
2 parents 6abddb6 + 2c3994b commit c5cdba6
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 59 deletions.
3 changes: 3 additions & 0 deletions clientlib/decompiler_imports.dl
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ ValidGlobalTerminalBlock(block) :-

FallbackFunction(func) :- PublicFunctionSelector(func, "0x00000000").

// Can be "default" or "scalable"
.decl DecompilerConfig(config: symbol)
.input DecompilerConfig


// Dynamic Information
Expand Down
5 changes: 3 additions & 2 deletions clientlib/dominators.dl
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,10 @@ FunctionReachableFromPublic(callee, selector, @list_append(callerStack, callerBl
FunctionReachableFromPublic_Metadata(function, selectorNorm, nil, nil, nil):-
PublicFunctionId(function, selectorNorm, _).

FunctionReachableFromPublic_Metadata(callee, selector, @list_append(callerStack, callerBlock), @list_append(originalCalls, original), @list_append(functionsCalled, callee)):-
FunctionReachableFromPublic_Metadata(callee, selector, @list_append(callerStack, callerBlock), @list_concat(originalCalls, originalList), @list_append(@list_concat(functionsCalled, inlinedFuns), callee)):-
FunctionReachableFromPublic_Metadata(caller, selector, callerStack, originalCalls, functionsCalled),
InFunction(callerBlock, caller),
CallGraphEdge(callerBlock, callee),
Block_Tail(callerBlock, callStmt),
Statement_OriginalStatement(callStmt, original).
Statement_OriginalStatementList(callStmt, originalList),
Statement_InlineInfo(callStmt, inlinedFuns).
67 changes: 34 additions & 33 deletions gigahorse.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@

# Local project imports
from src.common import GIGAHORSE_DIR, DEFAULT_SOUFFLE_BIN, log
from src.runners import get_souffle_executable_path, compile_datalog, AbstractFactGenerator, DecompilerFactGenerator, CustomFactGenerator, AnalysisExecutor, TimeoutException
from src.runners import get_souffle_executable_path, compile_datalog, AbstractFactGenerator, DecompilerFactGenerator, CustomFactGenerator, MixedFactGenerator, AnalysisExecutor, TimeoutException

## Constants

TAC_GEN_CONFIG_FILE = 'tac_gen_config.json'

DEFAULT_RESULTS_FILE = 'results.json'
"""File to write results to by default."""

Expand Down Expand Up @@ -100,7 +102,7 @@
const=DEFAULT_CACHE_DIR,
metavar="DIR",
help="the location to were temporary files are placed.")


parser.add_argument("-j",
"--jobs",
Expand Down Expand Up @@ -220,7 +222,7 @@ def get_souffle_macros() -> str:

return souffle_macros

def analyze_contract(index: int, contract_filename: str, result_queue, fact_generator: AbstractFactGenerator, souffle_clients: List[str], other_clients: List[str]) -> None:
def analyze_contract(index: int, contract_filename: str, result_queue, fact_generator: AbstractFactGenerator, souffle_clients: List[str], other_clients: List[str]) -> None:
"""
Perform static analysis on a contract, storing the result in the queue.
This is a worker function to be passed to a subprocess.
Expand Down Expand Up @@ -355,7 +357,7 @@ def flush_queue(run_sig: Any, result_queue: SimpleQueue, result_list: Any) -> No
def write_results(res_list: Any, results_file: str) -> None:
"""
Filters the results in res_list, logging the appropriate messages
and writting them to the results_file json file
and writting them to the results_file json file
"""
total = len(res_list)
vulnerability_counts: DefaultDict[str, int] = defaultdict(int)
Expand Down Expand Up @@ -384,13 +386,13 @@ def write_results(res_list: Any, results_file: str) -> None:
for res, sums in analytics_sums_sorted:
log(" {}: {}".format(res, sums))
log('\n')

vulnerability_counts_sorted = sorted(list(vulnerability_counts.items()), key = lambda a: a[0])
if vulnerability_counts_sorted:
log('-'*80)
log('Summary (flagged contracts)')
log('-'*80)

for res, count in vulnerability_counts_sorted:
log(" {}: {:.2f}%".format(res, 100 * count / total))

Expand All @@ -401,7 +403,7 @@ def write_results(res_list: Any, results_file: str) -> None:
for k, v in meta_counts.items():
log(f" {k}: {v} of {total} contracts")
log('\n')

log("\nWriting results to {}".format(results_file))
with open(results_file, 'w') as f:
f.write(json.dumps(list(res_list), indent=1))
Expand Down Expand Up @@ -493,7 +495,7 @@ def batch_analysis(fact_generator: AbstractFactGenerator, souffle_clients: List[
sys.exit(1)


def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None:
def run_gigahorse(args, fact_generator: AbstractFactGenerator) -> None:
"""
Run gigahorse, passing the cmd line args and fact generator type as arguments
"""
Expand All @@ -502,8 +504,7 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None:

analysis_executor = AnalysisExecutor(args.timeout_secs, args.interpreted, args.minimum_client_time, args.debug, args.souffle_bin, args.cache_dir, get_souffle_macros())

fact_generator = fact_gen_class(args, analysis_executor)

fact_generator.analysis_executor = analysis_executor

clients_split = [a.strip() for a in args.client.split(',')]
souffle_clients = [a for a in clients_split if a.endswith('.dl')]
Expand All @@ -527,7 +528,7 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None:

if args.restart:
log("Removing working directory {}".format(args.working_dir))
shutil.rmtree(args.working_dir, ignore_errors = True)
shutil.rmtree(args.working_dir, ignore_errors = True)

if not args.interpreted:
for p in running_processes:
Expand All @@ -544,22 +545,15 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None:

contracts = []

# Filter according to the given pattern.
re_string = fact_generator.pattern
if not re_string.endswith("$"):
re_string = re_string + "$"
pattern = re.compile(re_string)


for filepath in args.filepath:
if os.path.isdir(filepath):
if args.interpreted:
log("[WARNING]: Running batch analysis in interpreted mode.")
unfiltered = [join(filepath, f) for f in os.listdir(filepath)]
else:
unfiltered = [filepath]
contracts += [u for u in unfiltered if pattern.match(u) is not None]

contracts += [u for u in unfiltered if fact_generator.match_pattern(u)]

contracts = contracts[args.skip:]

Expand Down Expand Up @@ -592,18 +586,25 @@ def run_gigahorse(args, fact_gen_class: Type[AbstractFactGenerator]) -> None:
default=False,
help="Disables the scalable fallback configuration (using a hybrid-precise context configuration) that kicks off"
" if decompilation with the default (transactional) config takes up more than half of the total timeout.")
parser.add_argument("--custom_fact_generator",
nargs="*",
default=None,
help="Adds custom scripts for non-default fact generation. Takes a list of paths for the custom fact generation scripts. "
" Fact generation scripts can also be Datalog files. The default is the decompilation fact generation from bytecode files.")
parser.add_argument("--custom_file_pattern",
nargs="?",
default=".*.hex",
help="Adds a custom file filtering RegEx. The default is .hex (bytecode) files.")

args = parser.parse_args()
if args.custom_fact_generator == None:
run_gigahorse(args, DecompilerFactGenerator)
else:
run_gigahorse(args, CustomFactGenerator)

tac_gen_config_json = os.path.join(os.path.dirname(os.path.abspath(__file__)),TAC_GEN_CONFIG_FILE)
with open(tac_gen_config_json, 'r') as config:
tac_gen_config = json.loads(config.read())
if len(tac_gen_config["handlers"]) == 0: #if no handlers defined, default to classic decompilation
run_gigahorse(args, DecompilerFactGenerator(args, ".*.hex"))
elif len(tac_gen_config["handlers"]) == 1: # if one handler defined, can be either classic decompilation, or custom script
tac_gen = tac_gen_config["handlers"][0]
if tac_gen["tacGenScripts"]["defaultDecomp"] == "true":
run_gigahorse(args, DecompilerFactGenerator(args, tac_gen["fileRegex"]))
else:
run_gigahorse(args, CustomFactGenerator(tac_gen["fileRegex"], tac_gen["tacGenScripts"]["customScripts"]))
elif len(tac_gen_config["handlers"]) > 1: # if multiple handlers have been defined, they will be selected based on the file regex
fact_generator = MixedFactGenerator(args)
for tac_gen in tac_gen_config["handlers"]:
pattern = tac_gen["fileRegex"]
scripts = tac_gen["tacGenScripts"]["customScripts"]
is_default = tac_gen["tacGenScripts"]["defaultDecomp"] == "true"
fact_generator.add_fact_generator(pattern, scripts, is_default, args)
run_gigahorse(args, fact_generator)
9 changes: 9 additions & 0 deletions logic/decompiler_output.dl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@

.output ByteCodeHex(IO="file", filename="bytecode.hex")

.decl DecompilerConfig(config: symbol) btree_delete
.output DecompilerConfig

DecompilerConfig("default").

DecompilerConfig(default) <= DecompilerConfig(other):-
default = "default",
other != default.

.decl GlobalEntryBlock(block: IRBlock)
.output GlobalEntryBlock

Expand Down
4 changes: 3 additions & 1 deletion logic/fallback_scalable.dl
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@

#define MAX_STACK_HEIGHT 30

#include "main.dl"
#include "main.dl"

DecompilerConfig("scalable").
2 changes: 1 addition & 1 deletion souffle-addon
Submodule souffle-addon updated 2 files
+1 −1 lists.cpp
+9 −1 lists_test.dl
Loading

0 comments on commit c5cdba6

Please sign in to comment.