Skip to content

Commit

Permalink
Artificially merge vps-audit benchmarks for presentation
Browse files Browse the repository at this point in the history
TODO: actually merge them
LLazarek committed Jan 14, 2025

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent fd639b3 commit 5e3cc4e
Showing 3 changed files with 19 additions and 14 deletions.
9 changes: 9 additions & 0 deletions infrastructure/all_scripts.py
Original file line number Diff line number Diff line change
@@ -6,11 +6,19 @@

from project_root import get_project_root

# TODO: deleteme after merging vps-audits
benchmark_rename_map = {
'vps-audit-negate': 'vps-audit'
}

def get_all_scripts(
scripts_file: Path = get_project_root() / 'infrastructure/data/script-globs.json'
) -> list[Path]:
scripts = scripts_file.read_text()
benchmark_data: dict[str, dict[str, any]] = json.loads(scripts)
# TODO: deleteme after merging vps-audits
for old, new in benchmark_rename_map.items():
benchmark_data[new]["scripts"] = benchmark_data[new]["scripts"] + benchmark_data.pop(old)["scripts"]
return {
benchmark_name: [
script
@@ -23,3 +31,4 @@ def get_all_scripts(
if __name__ == "__main__":
for bench in get_all_scripts().keys():
print(bench)
print(get_all_scripts())
22 changes: 9 additions & 13 deletions infrastructure/colossal_table.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
import viz.dynamic as dyn
import sys

from all_scripts import get_all_scripts
from all_scripts import get_all_scripts, benchmark_rename_map
from project_root import get_project_root

root = get_project_root()
@@ -111,19 +111,10 @@ def citation(benchmark):
# aurpkg is just 1
# bio is just 1
'web-index/scripts/ngrams.sh',
# vps-audit is just 1
'vps-audit/scripts/vps-audit.sh',
'makeself/makeself/test/lsmtest/lsmtest.sh'
]

def benchmark_name(benchmark):
benchmark_name_map = {
'vps-audit-negate': 'vps-audit-n'
}
if benchmark in benchmark_name_map:
return benchmark_name_map[benchmark]
else:
return benchmark

def script_name(script):
script_name_map = {
"encrypt_files.sh": "encrypt.sh",
@@ -154,7 +145,8 @@ def count_constructs(series):
def read_loc_data():
loc_data = pd.read_csv(loc_data_path, header=None)
loc_data.columns = ['script', 'loc']
loc_data['benchmark'] = loc_data['script'].apply(lambda x: x.split('/')[0])
map_df = stx.get_map_df()
loc_data = loc_data.merge(map_df, on='script')
loc_data_bench = loc_data.groupby('benchmark').agg({
'loc': 'sum',
'script': 'count'
@@ -279,7 +271,7 @@ def main():
for _, row in big_bench.iterrows():
numscripts_shown = 0
numscripts = row['number_of_scripts']
print(f"\\bs{{{benchmark_name(row['benchmark'])}}} & {short_category(row['benchmark'])} & {row['number_of_scripts']} & {row['loc']} & {make_input_description(row)} & {row['constructs']} & {row['unique_cmds']} & {format_number(row['time_in_shell'])} & {format_number(row['time_in_commands'])} & {prettify_bytes_number(row['max_unique_set_size'])} & {prettify_bytes_number(row['io_chars'])} & {row['sys_calls']} & {row['file_descriptors']} & {citation(row['benchmark'])} \\\\")
print(f"\\bs{{{row['benchmark']}}} & {short_category(row['benchmark'])} & {row['number_of_scripts']} & {row['loc']} & {make_input_description(row)} & {row['constructs']} & {row['unique_cmds']} & {format_number(row['time_in_shell'])} & {format_number(row['time_in_commands'])} & {prettify_bytes_number(row['max_unique_set_size'])} & {prettify_bytes_number(row['io_chars'])} & {row['sys_calls']} & {row['file_descriptors']} & {citation(row['benchmark'])} \\\\")
# now print the details of all scripts in the benchmark
for _, row_script in big_script.iterrows():
if row_script['benchmark'] == row['benchmark'] and any([fnmatch.fnmatch(row_script['script'], pattern) for pattern in scripts_to_include]):
@@ -309,6 +301,10 @@ def round_whole(numstr):
\\end{tabular}
""")

print('time', file=sys.stderr)
print(agg_order, file=sys.stderr)
print([format_value(v) for v in big_bench['time'].agg(agg_order).values], file=sys.stderr)


if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion infrastructure/viz/syntax.py
Original file line number Diff line number Diff line change
@@ -141,7 +141,7 @@ def node_heatmap(df, outdir=None):

plt.figure(figsize=(5.5, 6))
sns.heatmap(heatmap_data,
cmap='Greys',
cmap='Reds',
annot=annot_data,
fmt='',
cbar_kws={'label': 'Occurrences (* denotes more than 5)',

0 comments on commit 5e3cc4e

Please sign in to comment.