-
Notifications
You must be signed in to change notification settings - Fork 394
/
Copy pathstack-usage-reporter.py
173 lines (150 loc) · 6.02 KB
/
stack-usage-reporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/python3
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
# The purpose of this script is to analyze disassembly generated by objdump or
# dumpbin to print (or to compare) the stack usage of functions/methods.
# This is a quickly written script, so it is quite possible it may not handle
# all code properly.
#
# The script expects the user to create a text assembly dump to be passed to
# the script.
#
# objdump Example
# objdump --demangle --disassemble objfile.o > objfile.s
#
# dumpbin Example
# dumpbin /disasm objfile.obj > objfile.s
#
# If the script is passed a single file, then all stack size information that
# is found it printed. If two files are passed, then the script compares the
# stack usage of the two files (useful for A/B comparisons).
# Currently more than two input files are not supported. (But adding support shouldn't
# be very difficult.)
#
# Note: The script only handles x64 disassembly. Supporting x86 is likely
# trivial, but ARM support could be difficult.
# Thus far the script has been tested with MSVC on Win64 and clang on OSX.
import argparse
import re
blank_re = re.compile('\s*')
class LineReader:
def __init__(self, lines):
self.lines = list(reversed(lines))
def get_line(self):
return self.lines.pop(-1)
def peek_line(self):
return self.lines[-1]
def consume_blank_lines(self):
while blank_re.fullmatch(self.peek_line()):
self.get_line()
def is_empty(self):
return len(self.lines) == 0
def parse_objdump_assembly(in_file):
results = {}
text_section_re = re.compile('Disassembly of section __TEXT,__text:\s*')
symbol_re = re.compile('[^<]*<(.*)>:\s*')
stack_alloc = re.compile('.*subq\s*\$(\d*), %rsp\s*')
lr = LineReader(in_file.readlines())
def find_stack_alloc_size():
while True:
if lr.is_empty():
return None
if blank_re.fullmatch(lr.peek_line()):
return None
line = lr.get_line()
mo = stack_alloc.fullmatch(line)
if mo:
lr.consume_blank_lines()
return int(mo.group(1))
# Find beginning of disassembly
while not text_section_re.fullmatch(lr.get_line()):
pass
# Scan for symbols
while not lr.is_empty():
lr.consume_blank_lines()
if lr.is_empty():
break
line = lr.get_line()
mo = symbol_re.fullmatch(line)
# Found a symbol
if mo:
symbol = mo.group(1)
stack_size = find_stack_alloc_size()
if stack_size != None:
results[symbol] = stack_size
return results
def parse_dumpbin_assembly(in_file):
results = {}
file_type_re = re.compile('File Type: COFF OBJECT\s*')
symbol_re = re.compile('[^(]*\((.*)\):\s*')
summary_re = re.compile('\s*Summary\s*')
stack_alloc = re.compile('.*sub\s*rsp,([A-Z0-9]*)h\s*')
lr = LineReader(in_file.readlines())
def find_stack_alloc_size():
while True:
if lr.is_empty():
return None
if blank_re.fullmatch(lr.peek_line()):
return None
line = lr.get_line()
mo = stack_alloc.fullmatch(line)
if mo:
lr.consume_blank_lines()
return int(mo.group(1), 16) # return value in decimal
# Find beginning of disassembly
while not file_type_re.fullmatch(lr.get_line()):
pass
# Scan for symbols
while not lr.is_empty():
lr.consume_blank_lines()
if lr.is_empty():
break
line = lr.get_line()
if summary_re.fullmatch(line):
break
mo = symbol_re.fullmatch(line)
# Found a symbol
if mo:
symbol = mo.group(1)
stack_size = find_stack_alloc_size()
if stack_size != None:
results[symbol] = stack_size
return results
def main():
parser = argparse.ArgumentParser(description='Tool used for reporting or comparing the stack usage of functions/methods')
parser.add_argument('--format', choices=['dumpbin', 'objdump'], required=True, help='Specifies the program used to generate the input files')
parser.add_argument('--input', action='append', required=True, help='Input assembly file. This option may be specified multiple times.')
parser.add_argument('--md-output', action='store_true', help='Show table output in markdown format')
parser.add_argument('--only-diffs', action='store_true', help='Only show stack info when it differs between the input files')
args = parser.parse_args()
parsers = {'dumpbin': parse_dumpbin_assembly, 'objdump' : parse_objdump_assembly}
parse_func = parsers[args.format]
input_results = []
for input_name in args.input:
with open(input_name) as in_file:
results = parse_func(in_file)
input_results.append(results)
if len(input_results) == 1:
# Print out the results sorted by size
size_sorted = sorted([(size, symbol) for symbol, size in results.items()], reverse=True)
print(input_name)
for size, symbol in size_sorted:
print(f'{size:10}\t{symbol}')
print()
elif len(input_results) == 2:
common_symbols = set(input_results[0].keys()).intersection(set(input_results[1].keys()))
print(f'Found {len(common_symbols)} common symbols')
stack_sizes = sorted([(input_results[0][sym], input_results[1][sym], sym) for sym in common_symbols], reverse=True)
if args.md_output:
print('Before | After | Symbol')
print('-- | -- | --')
for size0, size1, symbol in stack_sizes:
if args.only_diffs and size0 == size1:
continue
if args.md_output:
print(f'{size0} | {size1} | {symbol}')
else:
print(f'{size0:10}\t{size1:10}\t{symbol}')
else:
print("TODO support more than 2 inputs")
if __name__ == '__main__':
main()