-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrace_consistency.py
1084 lines (934 loc) · 51.3 KB
/
trace_consistency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import re
import os
import sys
import copy
import config
import lcs
import profile
import pointed_objs
import utils
import wasm_instrument
import pin_instrument
glob_array_dict = dict()
debug_mode = False
silent_mode= True
def clear_glob_array_dict():
global glob_array_dict
glob_array_dict.clear()
def get_name_and_addr(glob_obj: dict):
""" This function could be complex to handle different array/structure/union types and compiler optimizations """
""" should be used for wasm only """
global glob_array_dict
obj = glob_obj
obj_name = obj['DW_AT_name'].strip('()').strip('"')
obj_addr = obj["DW_AT_location"]
obj_addr = int(re.search(r"DW_OP_addr (\w+)", obj_addr).group(1), 16)
# obj_addr = int(obj_addr.strip('()').split(' ')[1], 16)
obj_type = obj["DW_AT_type"]
obj_key = obj["DW_AT_name"] + obj["DW_AT_location"]
if obj_key in glob_array_dict:
return glob_array_dict[obj_key]
if '[' in obj_type: # array, return address list
obj_list = []
obj_type = obj["DW_AT_type"]
obj_type = obj_type.replace('const ', '')
obj_type = obj_type.replace('volatile ', '')
if mat := re.search(r'\(0x[\da-fA-F]+\s"([\w\s]+)((\[\d+])+)"\)', obj_type):
obj_type = mat.group(1)
array_dim = mat.group(2)
array_dim = array_dim.replace('[', '')
array_dim = array_dim.split(']')
array_dim.remove('')
obj_num = 1
for dim in array_dim:
dim = dim.strip()
if len(dim) > 0:
obj_num *= int(dim)
if "int64" in obj_type:
step_size = 8
elif "int32" in obj_type or obj_type == 'int' or obj_type == 'unsigned int' or obj_type == 'long' or obj_type == 'unsigned long' or obj_type == 'long unsigned int' or obj_type == 'long int':
step_size = 4
elif "int16" in obj_type or obj_type == 'short':
step_size = 2
elif "int8" in obj_type or 'char' in obj_type:
step_size = 1
elif 'char' not in obj_type and 'short' not in obj_type and 'int' not in obj_type and 'long' not in obj_type:
return [], (0, 0, 0) # ignore complex structure/union
else:
assert False, "glob obj type: {} not implemented".format(obj_type)
# TODO: handle DW_OP_piece, the memory layout is optimized
if 'DW_OP_piece' in obj["DW_AT_location"]:
if obj["DW_AT_type"].count("[") > 1:
return [], (0, 0, step_size) # ignore complex multiple dimension array with optimized memory layout
assert obj["DW_AT_type"].count("[") == 1
# get #optimized elements
opt_num = 0
it = re.finditer(r"DW_OP_piece (0x\w+)", obj["DW_AT_location"].strip('()'))
for mat in it:
opt_num += int(int(mat.group(1), 16) / step_size)
# generate tmp_list, which contains all elements in this array (w/ and w/o opt)
tmp_list = []
tmp_idx = 0
addr_info = obj["DW_AT_location"].strip('()')
# follow the definition of DW_OP_piece in Dwarf Manual https://dwarfstd.org/doc/DWARF4.pdf
# the dwarf debug info generated by Emscripten could be incorrect
addr_info = addr_info.split(', ')
prev_op_addr = 0
for dwarf_item in addr_info:
if mat := re.match(r"DW_OP_piece 0x(\w+)", dwarf_item):
if prev_op_addr == 0: # optimized elements
tmp_n = int(int(mat.group(1), 16) / step_size)
for i in range(tmp_n):
tmp_list += [(obj_name + '[{}]'.format(tmp_idx), 0)]
tmp_idx += 1
else: # elements stored in memory
tmp_n = int(int(mat.group(1), 16) / step_size)
for i in range(tmp_n):
tmp_list.append((obj_name + '[{}]'.format(tmp_idx), prev_op_addr))
tmp_idx += 1
prev_op_addr += step_size
prev_op_addr = 0
elif mat := re.match(r"DW_OP_addr 0x(\w+)", dwarf_item):
prev_op_addr = int(mat.group(1), 16)
# remove optimized elements in tmp_list (addr == 0)
obj_list = []
min_addr = obj_addr + obj_num * step_size # obj_addr -> the first element in mem
max_addr = obj_addr
for tmp in tmp_list:
addr = tmp[1]
if tmp[1] == 0:
pass
# early stop condition: current element overlaps with other objects in wasm or elf
elif addr in lcs.PtrItem.wasm_objs_dict and lcs.PtrItem.wasm_objs_dict[addr][0] != tmp[0]:
break
elif addr in lcs.PtrItem.clang_objs_dict and lcs.PtrItem.clang_objs_dict[addr][0] != tmp[0]:
break
else:
obj_list.append(copy.deepcopy(tmp))
max_addr = max(tmp[1], max_addr)
min_addr = min(tmp[1], min_addr)
glob_array_dict[obj_key] = (obj_list, (min_addr, max_addr, step_size))
return obj_list, (min_addr, max_addr, step_size)
else:
# the layout is not optimized
dim_len = len(array_dim)
for count in range(obj_num):
tmp_count = count
idx_nums = [0 for i in range(dim_len)]
dim_nums = [1 for i in range(dim_len)]
for i in range(dim_len):
for j in range(i+1, dim_len):
dim_nums[i] *= int(array_dim[j])
for j in range(dim_len):
idx_nums[j] = int(tmp_count / dim_nums[j])
tmp_count = tmp_count % dim_nums[j]
name = obj_name
for k in range(dim_len):
name += '[{}]'.format(idx_nums[k])
obj_list.append((name, obj_addr+count*step_size))
glob_array_dict[obj_key] = (obj_list, (obj_addr, obj_addr+(obj_num-1)*step_size, step_size))
return obj_list, (obj_addr, obj_addr+(obj_num-1)*step_size, step_size)
elif '*' in obj_type and '[' in obj_type:
return [], (0, 0, config.pointer_size) # pointer array, ignore
else:
assert False
else: # single addr
# step_size of single var may never be used, but just in case
if "int64" in obj_type or "long int" in obj_type or "long long" in obj_type:
step_size = 8
elif '*' in obj_type: # pointer type
step_size = config.pointer_size
elif "int32" in obj_type or '"int"' in obj_type or '"unsigned int"' in obj_type or '"long"' in obj_type:
step_size = 4
elif "int16" in obj_type or '"short"' in obj_type:
step_size = 2
elif "int8" in obj_type or '"char"' in obj_type:
step_size = 1
elif 'char' not in obj_type and 'short' not in obj_type and 'int' not in obj_type and 'long' not in obj_type:
step_size = None # ignore complex structure/union
else:
assert False, "glob obj type: {} not implemented".format(obj_type)
glob_array_dict[obj_key] = ([(obj_name, obj_addr)], (obj_addr, obj_addr, step_size))
return [(obj_name, obj_addr)], (obj_addr, obj_addr, step_size)
def generalize_wasm_trace(trace_path: str, wasm_globs: list, wasm_func_objs: list, wasm_param_dict: dict):
func_trace_dict = dict()
glob_trace_dict = dict()
clear_glob_array_dict()
def func_trace_add(key, value):
if key in func_trace_dict.keys():
func_trace_dict[key].append(value)
else:
func_trace_dict[key] = [value]
def glob_trace_add(key, value):
if key in glob_trace_dict.keys():
glob_trace_dict[key].append(value)
else:
glob_trace_dict[key] = [value]
aux_info = ""
with open(trace_path, 'r') as f:
lines = f.readlines()
idx = 0
while idx < len(lines):
l = lines[idx]
if l.startswith('ID:'):
aux_info = l
elif l.startswith('$') and 'R:' not in l: # func call
func_name = l.strip().strip('$')
func_key = '("{}")'.format(func_name)
param_list = wasm_param_dict[func_key] if func_key in wasm_param_dict.keys() else []
arg_list = []
while (idx+1) < len(lines) and lines[idx+1].startswith('P:'): # for param in param_list: # what if actual #parameters > #parameters defined in dwarf info
idx += 1
l = lines[idx]
assert l.startswith('P:')
arg_value = int(l[l.find(':')+1:].strip(), 16)
arg_list.append(arg_value)
func_trace_add(func_name, ('P', arg_list, aux_info))
aux_info = ""
elif l.startswith('$') and 'R:' in l: # func return
func_name = l[:l.find('R:')].strip('$ ')
ret_value = int(l[l.find(':')+1:].strip(), 16)
func_trace_add(func_name, ('R', [ret_value], aux_info))
aux_info = ""
elif l.startswith('W: '): # globals write
write_addr = int(l.split(':')[1].strip(), 16)
write_size = int(l.split(':')[2].strip())
idx += 1
l = lines[idx]
assert l.startswith('V: ')
write_value = int(l[l.find(':') + 1:].strip(), 16)
mask = 1
for i in range(write_size * 8 - 1):
mask = (mask << 1) | 1
write_value &= mask
glob_name = '' # find corresponding global name
if write_addr in lcs.PtrItem.wasm_objs_dict:
glob_name, clang_addr = lcs.PtrItem.wasm_objs_dict[write_addr]
for obj in wasm_globs:
obj = obj[1]
obj_list, (min_addr, max_addr, step_size) = get_name_and_addr(obj)
if min_addr <= write_addr <= max_addr:
for name, addr in obj_list:
if glob_name == name: # if write_addr == addr:
# glob_name = name
break
elif write_addr == addr:
glob_name = name if len(glob_name) == 0 else glob_name
break
if len(glob_name) > 0:
break
if not step_size: # ignore complex struct
pass
elif len(glob_name) != 0 and step_size <= write_size:
# handle optimized writes in wasm binary
mask = 1
for i in range(step_size*8 - 1):
mask = (mask << 1) | 1
tmp_list = []
while write_size > 0:
for name, addr in obj_list:
if write_addr == addr:
glob_name = name if len(glob_name) == 0 else glob_name
break
tmp_list.append((glob_name, write_value & mask))
glob_name = ''
write_addr += step_size
write_value = write_value >> step_size * 8
write_size -= step_size
for it in tmp_list:
glob_trace_add(it[0], (it[1], aux_info))
elif len(glob_name) != 0 and step_size > write_size:
# assert False, "currently do not support partial write to glob vars"
aux_info = "OPT\n" + aux_info
glob_trace_add(glob_name, (write_value, aux_info))
elif len(glob_name) != 0:
glob_trace_add(glob_name, (write_value, aux_info))
aux_info = ""
elif l.startswith('P: ') or l.startswith('V: '):
assert False, 'error during parsing raw wasm trace.'
else:
pass
idx += 1
return glob_trace_dict, func_trace_dict
def get_func_obj(func_addr: int, func_objs: list):
for obj in func_objs:
obj = obj[1]
if "DW_AT_low_pc" not in obj:
continue
current_addr = int(obj["DW_AT_low_pc"].strip('()'), 16)
if current_addr == func_addr:
return obj
def generalize_pin_trace(trace_path: str, clang_globs: list, clang_func_objs: list, clang_param_dict: dict):
func_trace_dict = dict()
glob_trace_dict = dict()
clear_glob_array_dict()
def func_trace_add(key, value):
if key in func_trace_dict.keys():
func_trace_dict[key].append(value)
else:
func_trace_dict[key] = [value]
def glob_trace_add(key, value):
if key in glob_trace_dict.keys():
glob_trace_dict[key].append(value)
else:
glob_trace_dict[key] = [value]
aux_info = ""
with open(trace_path, 'r') as f:
lines = f.readlines()
idx = 0
while idx < len(lines):
l = lines[idx]
if l.startswith('0x'):
aux_info = l
elif l.startswith('>') and 'R:' not in l: # func call
func_addr = int(l.strip().strip('>'), 16)
func_obj = get_func_obj(func_addr, clang_func_objs)
func_name = func_obj["DW_AT_name"].strip('()').strip('"')
func_key = func_obj["DW_AT_name"]
param_list = clang_param_dict[func_key] if func_key in clang_param_dict.keys() else []
arg_list = []
for param in param_list:
idx += 1
l = lines[idx]
assert l.startswith('P:')
arg_value = int(l[l.find(':') + 1:].strip(), 16)
arg_list.append(arg_value)
func_trace_add(func_name, ('P', arg_list, aux_info))
aux_info = ""
elif l.startswith('>') and 'R:' in l: # func return
func_addr = int(l.split(' ')[0].strip().strip('>'), 16)
func_obj = get_func_obj(func_addr, clang_func_objs)
func_name = func_obj["DW_AT_name"].strip('()').strip('"')
ret_value = int(l[l.find(':') + 1:].strip(), 16)
func_trace_add(func_name, ('R', [ret_value], aux_info))
aux_info = ""
elif l.startswith('W: '): # globals write
write_addr = int(l.split(':')[1].strip(), 16)
write_size = int(l.split(':')[2].strip())
idx += 1
l = lines[idx]
assert l.startswith('V: ')
write_value = int(l[l.find(':') + 1:].strip(), 16)
if write_size == 16:
idx += 3
l = lines[idx]
assert l.startswith('V: ')
write_value_con = int(l[l.find(':') + 1:].strip(), 16)
write_value = (write_value_con << 64) + write_value
# 16 bytes values, exist in clang/gcc O3 binaries (xmm word)
# writes to consecutive elements in an array
while write_size > 0:
glob_name = '' # find corresponding global name
if write_addr in lcs.PtrItem.clang_objs_dict:
glob_name, wasm_addr = lcs.PtrItem.clang_objs_dict[write_addr]
glob_key = glob_name if '[' not in glob_name else glob_name[:glob_name.find('[')]
for obj in clang_globs:
obj = obj[1]
if obj["DW_AT_name"].strip('"()') == glob_key:
obj_list, (min_addr, max_addr, step_size) = get_name_and_addr(obj) # only to get step_size
break
if not step_size:
break # complex structure
# founded the corresponding global variable and the step_size,
# now split the 16 bytes into consecutive writes
mask = 1
for i in range(step_size * 8 - 1):
mask = (mask << 1) | 1
glob_trace_add(glob_name, (write_value & mask, aux_info))
write_value = write_value >> (8 * step_size)
write_size -= step_size
write_addr += step_size
aux_info = ""
else:
glob_name = '' # find corresponding global name
if write_addr in lcs.PtrItem.clang_objs_dict:
glob_name, wasm_addr = lcs.PtrItem.clang_objs_dict[write_addr]
glob_key = glob_name if '[' not in glob_name else glob_name[:glob_name.find('[')]
for obj in clang_globs:
obj = obj[1]
if obj["DW_AT_name"].strip('"()') == glob_key:
obj_list, (min_addr, max_addr, step_size) = get_name_and_addr(obj) # only to get step_size
break
if len(glob_name) != 0 and step_size:
if step_size <= write_size:
while write_size > 0:
glob_name = '' # find corresponding global name
if write_addr in lcs.PtrItem.clang_objs_dict:
glob_name, wasm_addr = lcs.PtrItem.clang_objs_dict[write_addr]
glob_key = glob_name if '[' not in glob_name else glob_name[:glob_name.find('[')]
mask = 1
for i in range(step_size * 8 - 1):
mask = (mask << 1) | 1
glob_trace_add(glob_name, (write_value & mask, aux_info))
write_value = write_value >> (8 * step_size)
write_size -= step_size
write_addr += step_size
aux_info = ""
# it's possible len(glob_name)==0, some complex cases are not handled
else:
if step_size > write_size:
# due to compiler optimization
# currently, we cannot get the while value of this obj
# add an extra auxiliary info
aux_info = "OPT\n"+aux_info
glob_trace_add(glob_name, (write_value, aux_info))
aux_info = ""
elif l.startswith('P: ') or l.startswith('V: '):
assert False, 'error during parsing raw wasm trace.'
elif l.startswith('#eof'):
break
else:
pass
idx += 1
return glob_trace_dict, func_trace_dict
def trace_check_glob_correct(wasm_glob_trace_dict: dict, clang_glob_trace_dict: dict, wasm_globs: list, case2_check=False):
if debug_mode:
print('\nChecking correctness (global writes) ...')
inconsistent_list = []
# Case 1: inconsistent last write
for glob_name, glob_trace in wasm_glob_trace_dict.items():
glob_trace = [v[0] for v in glob_trace]
# find corresponding glob_obj
glob_key = glob_name
if '[' in glob_key:
glob_key = glob_key[:glob_key.find('[')] # an array element -> array name
for obj in wasm_globs:
obj = obj[1]
if obj["DW_AT_name"] == '("{}")'.format(glob_key):
break
assert obj["DW_AT_name"] == '("{}")'.format(glob_key)
if glob_name not in clang_glob_trace_dict:
# this function only check correctness inconsistent, so just skip
continue
clang_trace = clang_glob_trace_dict[glob_name]
clang_trace = [v[0] for v in clang_trace] # remove auxiliary information
if '*' in obj["DW_AT_type"]:
# for pointers: using PtrItem
glob_trace_backup = glob_trace
glob_trace = []
for v in glob_trace_backup:
glob_trace.append(lcs.PtrItem(ptr_name=glob_name, ptr_value=v))
clang_trace_backup = clang_trace
clang_trace = []
for v in clang_trace_backup:
clang_trace.append(lcs.PtrItem(ptr_name=glob_name, ptr_value=v))
if glob_trace[-1] != clang_trace[-1]:
# if the type is int, clang uses optimized inst, this is not an inconsistent case
if not (isinstance(glob_trace[-1], int) and clang_glob_trace_dict[glob_name][-1][1].startswith("OPT\n") and
((glob_trace[-1] & clang_trace[-1]) == clang_trace[-1] or clang_trace[-1] == 1)):
# (clang_trace[-1] == 1 and "OPT\n") is a kind of special optimization (codegen pattern?) used by clang
# TODO: re-consider the compiler optimization that may only update part of the var (e.g., OPT mark)
inconsistent_list.append("{}:{}".format(glob_name, glob_trace[-1]))
if debug_mode:
print('>Glob trace inconsistency founded.')
print('\tglob_name: {}, wasm_last_write: {}, clang_last_write: {}'.format(glob_name, glob_trace[-1], clang_trace[-1]))
# Case 2: missing global writes
if case2_check:
for glob_name, glob_trace in clang_glob_trace_dict.items():
if glob_name not in wasm_glob_trace_dict:
glob_key = glob_name
if '[' in glob_key:
glob_key = glob_key[:glob_key.find('[')] # an array element -> array name
# exists in wasm globs?
for glob in wasm_globs:
glob = glob[1]
if glob["DW_AT_name"] == '("{}")'.format(glob_key):
break
# exists in wasm objs?
for obj in lcs.PtrItem.wasm_objs_dict.values():
if obj[0] == glob_name:
break
if glob["DW_AT_name"] == '("{}")'.format(glob_key) and obj[0] == glob_name: # exist
inconsistent_list.append(glob_name)
if debug_mode:
print('>Missing glob trace founded.')
print('\tglob_name: {}'.format(glob_name))
else:
# TODO: what if the glob does not exist in wasm globs
# Ignore
pass
# inconsistent_list.append(glob_name)
# print('>Missing glob definition founded.')
# print('\tglob_name: {}'.format(glob_name))
return inconsistent_list
def trace_check_glob_perf(wasm_glob_trace_dict: dict, clang_glob_trace_dict: dict, wasm_globs: list):
global redundant_glob_writes, redundant_func_calls, overall_wasm_glob_writes, overall_wasm_func_calls, overall_clang_glob_writes, overall_clang_func_calls
if debug_mode:
print('\nChecking performance (global writes) ...')
inconsistent_list = []
for glob_name, glob_trace in wasm_glob_trace_dict.items():
glob_trace = [v[0] for v in glob_trace]
glob_key = glob_name
if '[' in glob_key:
glob_key = glob_key[:glob_key.find('[')]
for obj in wasm_globs:
obj = obj[1]
if obj["DW_AT_name"] == '("{}")'.format(glob_key):
break
assert obj["DW_AT_name"] == '("{}")'.format(glob_key)
if glob_name not in clang_glob_trace_dict:
if 'crc32' not in glob_name:
inconsistent_list.append(glob_name)
redundant_glob_writes += len(glob_trace)
if debug_mode:
print('>Redundant glob trace founded.')
print('\tglob_name: {}'.format(glob_name))
continue
clang_trace = clang_glob_trace_dict[glob_name]
# if clang_trace contains optimized writes (only write part of the whole glob, e.g. 1 byte of int32)
# simply ignore this glob
opt_write_flag = False
for it in clang_trace:
if 'OPT\n' in it[1]:
opt_write_flag = True
break
# if opt_write_flag: # do not skip, try to catch this
# continue
clang_trace = [v[0] for v in clang_trace] # remove auxiliary information
if '*' in obj["DW_AT_type"]:
# for pointers: using PtrItem
glob_trace_backup = glob_trace
glob_trace = []
for v in glob_trace_backup:
glob_trace.append(lcs.PtrItem(ptr_name=glob_name, ptr_value=v))
clang_trace_backup = clang_trace
clang_trace = []
for v in clang_trace_backup:
clang_trace.append(lcs.PtrItem(ptr_name=glob_name, ptr_value=v))
lcs_trace, lcs_trace2 = lcs.lcs(clang_trace, glob_trace)
if (not opt_write_flag and len(glob_trace) != len(lcs_trace)) or (opt_write_flag and len(clang_trace) < len(glob_trace)):
if not opt_write_flag:
perf_distance = len(glob_trace) - len(lcs_trace)
else:
perf_distance = len(glob_trace) - len(clang_trace) # TODO: this could be more accurate
# find the first inconsistent index of glob_trace element
# the element value is used for better reducing
for idx in range(len(glob_trace)):
if idx not in lcs_trace:
break
if '*' in obj["DW_AT_type"]:
inconsistent_list.append("{}:{}:{}".format(glob_name, 'ptr', perf_distance))
else:
inconsistent_list.append("{}:{}:{}".format(glob_name, glob_trace[idx], perf_distance))
redundant_glob_writes += len(glob_trace) - len(lcs_trace)
if debug_mode:
if glob_trace[-1] == clang_trace[-1]:
print('>Glob trace performance inconsistency founded.')
else:
print('>Glob trace correctness inconsistency founded.')
print('\tglob_name: {},'.format(glob_name), end=' ')
for i in range(len(glob_trace)):
if i not in lcs_trace:
print('write_index: {}, write_value: {},'.format(i, glob_trace[i]), end=' ')
print()
return inconsistent_list
def trace_check_func_correct(wasm_func_trace_dict: dict, clang_func_trace_dict: dict, wasm_func_objs: list, wasm_param_dict: dict):
# TODO: Clang O0, Wasm O3, mainly focus on the correctness
if debug_mode:
print('\nChecking correctness (function calls) ...')
inconsistent_list = []
for func_name, func_trace in wasm_func_trace_dict.items():
if func_name == 'main':
continue # ignore main function, as the return value is not captured by pin tool (tracer)
# What is the parameter type of this function?
func_key = '("{}")'.format(func_name)
pointer_flags = []
if func_key in wasm_param_dict:
params = wasm_param_dict[func_key]
for param in params:
if '*' in param["DW_AT_type"] or '[' in param["DW_AT_type"]:
# TODO: What array argument looks like?
if '[' in param["DW_AT_type"]:
print("debug: array argument")
pointer_flags.append(True)
else:
pointer_flags.append(False)
# And what is the return type of this function?
ptr_ret_flag = False
for addr, obj in wasm_func_objs:
if func_key == obj["DW_AT_name"] and "DW_AT_type" in obj:
ret_type = obj["DW_AT_type"]
if '*' in ret_type or '[' in ret_type:
ptr_ret_flag = True
elif 'int' not in ret_type and 'short' not in ret_type and 'char' not in ret_type and 'long' not in ret_type:
ptr_ret_flag = True
if func_name not in clang_func_trace_dict:
continue # the function is inlined in optimized clang binary
clang_trace = clang_func_trace_dict[func_name]
# Emscripten has (advanced) optimization strategies that only inline some out of all function calls
# Thus, we cannot assume/assert len(clang_trace) == len(func_trace)
# Here, the assumption would be: function calls exist in wasm trace should also exist in clang trace
func_item_trace = []
for item in func_trace:
# item[2] -> auxiliary information
func_item_trace.append(lcs.FuncItem(func_name=func_name, item_type=item[0], item_values=item[1], pointer_flags=pointer_flags, ptr_ret_flag=ptr_ret_flag))
clang_item_trace = []
for item in clang_trace:
# item[2] -> auxiliary information
clang_item_trace.append(lcs.FuncItem(func_name=func_name, item_type=item[0], item_values=item[1], pointer_flags=pointer_flags, ptr_ret_flag=ptr_ret_flag))
clang_idx = 0
for i in range(len(func_item_trace)):
match_flag = False
for j in range(clang_idx, len(clang_item_trace)):
if func_item_trace[i] == clang_item_trace[j]:
match_flag = True
clang_idx = j + 1
break
if not match_flag:
inconsistent_list.append("{}:{}".format(func_name, func_item_trace[i].values_str()))
if debug_mode:
print('>Func trace inconsistency founded.')
print('\tfunc_name: {}, wasm_item_index: {}, item_type: {}, item_values: {}'.format(
func_name, i, func_item_trace[i].type, func_item_trace[i].values))
break # de-duplicate
return inconsistent_list
def trace_check_func_perf(wasm_func_trace_dict: dict, clang_func_trace_dict: dict, wasm_func_objs: list, wasm_param_dict: dict):
# TODO: for the performance check, we compare Clang O3 with Wasm O3? i.e. does Wasm compiler have comparable optimization quality?
global redundant_glob_writes, redundant_func_calls, overall_wasm_glob_writes, overall_wasm_func_calls, overall_clang_glob_writes, overall_clang_func_calls
if debug_mode:
print('\nChecking performance (function calls) ...')
inconsistent_list = []
for func_name, func_trace in wasm_func_trace_dict.items():
if func_name == 'main':
continue # ignore main function, as the return value is not captured by pin tool (tracer)
# What is the parameter type of this function?
func_key = '("{}")'.format(func_name)
pointer_flags = []
if func_key in wasm_param_dict:
params = wasm_param_dict[func_key]
for param in params:
if '*' in param["DW_AT_type"] or '[' in param["DW_AT_type"]:
# TODO: What array argument looks like?
if '[' in param["DW_AT_type"]:
print("debug: array argument")
pointer_flags.append(True)
else:
pointer_flags.append(False)
# And what is the return type of this function?
ptr_ret_flag = False
for addr, obj in wasm_func_objs:
if func_key == obj["DW_AT_name"] and "DW_AT_type" in obj:
ret_type = obj["DW_AT_type"]
if '*' in ret_type or '[' in ret_type:
ptr_ret_flag = True
elif 'int' not in ret_type and 'short' not in ret_type and 'char' not in ret_type and 'long' not in ret_type:
ptr_ret_flag = True
func_item_trace = []
for item in func_trace:
# item[2] -> auxiliary information
func_item_trace.append(
lcs.FuncItem(func_name=func_name, item_type=item[0], item_values=item[1], pointer_flags=pointer_flags, ptr_ret_flag=ptr_ret_flag))
# what if this function
if func_name not in clang_func_trace_dict:
# TODO: is missing inline opportunity a problem?
# Seems not?
# https://dl.acm.org/doi/10.1145/3503222.3507744
print('>Func trace inconsistency founded.')
print('{} could be optimized or inlined.'.format(func_name))
inconsistent_list.append("{}:{}".format(func_name, func_item_trace[0].values_str()))
for it in func_trace:
if it[0] == 'P':
redundant_func_calls += 1
continue
clang_trace = clang_func_trace_dict[func_name]
# TODO: if we want to check missed opt opportunity we need to assume it is possible that len(clang_trace) != len(func_trace), i.e. Clang/gcc may have some more advanced optimizations
# assert len(clang_trace) == len(func_trace), "error: inconsistent length of function call.\nIs this possible?"
clang_item_trace = []
for item in clang_trace:
# item[2] -> auxiliary information
clang_item_trace.append(lcs.FuncItem(func_name=func_name, item_type=item[0], item_values=item[1], pointer_flags=pointer_flags, ptr_ret_flag=ptr_ret_flag))
lcs_item_trace, lcs_item_trace2 = lcs.lcs(clang_item_trace, func_item_trace)
if len(lcs_item_trace) != len(func_item_trace):
perf_distance = len(func_item_trace) - len(lcs_item_trace)
# find the first inconsistent index of glob_trace element
# the element value is used for better reducing
for idx in range(len(func_item_trace)):
if idx not in lcs_item_trace:
break
inconsistent_list.append("{}:{}:{}".format(func_name, func_item_trace[idx].values_str(), perf_distance))
for i in range(len(func_trace)):
if i in lcs_item_trace:
continue
it = func_trace[i]
if it[0] == 'P':
redundant_func_calls += 1
if debug_mode:
print('>Func trace inconsistency founded.')
print('\tfunc_name: {},'.format(func_name), end=' ')
for i in range(len(func_item_trace)):
if i not in lcs_item_trace:
print('item_index: {}, item_type: {}'.format(i, func_trace[i][0]), end=' ')
print()
return inconsistent_list
redundant_glob_writes = 0
redundant_func_calls = 0
overall_wasm_glob_writes = 0
overall_wasm_func_calls = 0
overall_clang_glob_writes = 0
overall_clang_func_calls = 0
def overall_statistic(wasm_glob_trace_dict: dict, clang_glob_trace_dict: dict, wasm_func_trace_dict: dict, clang_func_trace_dict: dict):
global redundant_glob_writes, redundant_func_calls, overall_wasm_glob_writes, overall_wasm_func_calls, overall_clang_glob_writes, overall_clang_func_calls
for glob_name, glob_trace in wasm_glob_trace_dict.items():
overall_wasm_glob_writes += len(glob_trace)
for glob_name, glob_trace in clang_glob_trace_dict.items():
overall_clang_glob_writes += len(glob_trace)
for func_name, func_trace in wasm_func_trace_dict.items():
for it in func_trace:
if it[0] == 'P':
overall_wasm_func_calls += 1
for func_name, func_trace in clang_func_trace_dict.items():
for it in func_trace:
if it[0] == 'P':
overall_clang_func_calls += 1
def trace_check(c_src_path: str, clang_opt_level='-O0', emcc_opt_level='-O2', need_compile=True, need_info=False, input_str=""):
# clean
c_src_path = os.path.abspath(c_src_path)
assert c_src_path.endswith('.c')
elf_path = c_src_path[:c_src_path.rfind('.')] + '.out'
clang_dwarf_txt_path = elf_path + '.dwarf'
wasm_path = c_src_path[:c_src_path.rfind('.')] + '.wasm'
js_path = c_src_path[:-2] + '.js'
wasm_dwarf_txt_path = wasm_path + '.dwarf'
if need_compile:
status, output = utils.cmd("rm {}".format(os.path.abspath(elf_path)))
status, output = utils.cmd("rm {}".format(os.path.abspath(wasm_path)))
if not silent_mode:
print("\nTrace Consistency Checking for {}...".format(c_src_path))
# profile, get dwarf information of global variables and function arguments
wasm_globs, clang_globs = profile.collect_glob_vars(c_src_path, clang_opt_level, emcc_opt_level, need_compile)
(wasm_func_objs, wasm_param_dict, wasm_func_names_list), \
(clang_func_objs, clang_param_dict, clang_func_names_list) = profile.collect_funcs(c_src_path, clang_opt_level, emcc_opt_level, need_compile)
wasm_globs_all = profile.get_wasm_globs(c_src_path, emcc_opt_level, need_compile)
if len(wasm_globs) == 0:
if debug_mode:
print("No globs, skip this case")
return [], [], [], []
# compile
if need_compile:
wasm_path, js_path, wasm_dwarf_txt_path = profile.emscripten_dwarf(c_src_path, opt_level=emcc_opt_level)
elf_path, dwarf_path = profile.clang_dwarf(c_src_path, opt_level=clang_opt_level)
# Before checking
# wat_path = wasm_path[:-5] + '.wat'
# if not os.path.exists(wat_path):
wat_path = utils.wasm2wat(wasm_path)
mapping_dict, wasm_objs_dict, clang_objs_dict = pointed_objs.get_pointed_objs_mapping(c_src_path, elf_path, wat_path, clang_opt_level, emcc_opt_level, need_compile)
lcs.FuncItem.set_dict(mapping_dict, wasm_objs_dict, clang_objs_dict)
lcs.PtrItem.set_dict(mapping_dict, wasm_objs_dict, clang_objs_dict)
# get trace
wasm_instrument.instrument(wasm_path, wasm_globs_all, wasm_func_objs, wasm_param_dict, wasm_path, opt_level=emcc_opt_level)
clang_raw_trace_path = pin_instrument.instrument(c_src_path, clang_globs, clang_func_objs, clang_param_dict, elf_path, input_str=input_str)
# wasm_raw_trace_path, js_status = wasm_instrument.run_wasm_timeout(js_path, input_str=input_str)
wasm_raw_trace_path, js_status = wasm_instrument.run_wasm(js_path, input_str=input_str)
if js_status: # non-exit loop in optimized wasm code, special handler
glob_correct_inconsistent_list = ["timeout"]
print('{} glob (incorrect):'.format(os.path.basename(c_src_path)), glob_correct_inconsistent_list)
return glob_correct_inconsistent_list, [], [], []
# trace generalization
wasm_glob_trace_dict, wasm_func_trace_dict = generalize_wasm_trace(wasm_raw_trace_path,
wasm_globs, wasm_func_objs, wasm_param_dict)
clang_glob_trace_dict, clang_func_trace_dict = generalize_pin_trace(clang_raw_trace_path,
clang_globs, clang_func_objs, clang_param_dict)
# TODO: update instrumentation, and provided more information to locate bugs
# trace consistency check
if len(wasm_globs) > 0:
glob_correct_inconsistent_list = \
trace_check_glob_correct(wasm_glob_trace_dict, clang_glob_trace_dict, wasm_globs, case2_check=False)
else:
glob_correct_inconsistent_list = []
func_correct_inconsistent_list = \
trace_check_func_correct(wasm_func_trace_dict, clang_func_trace_dict, wasm_func_objs, wasm_param_dict)
if not silent_mode or len(glob_correct_inconsistent_list) > 0 or len(func_correct_inconsistent_list) > 0:
print('{} glob (incorrect):'.format(os.path.basename(c_src_path)), glob_correct_inconsistent_list)
print('{} func (incorrect):'.format(os.path.basename(c_src_path)), func_correct_inconsistent_list)
if len(wasm_globs) > 0:
glob_perf_inconsistent_list = \
trace_check_glob_perf(wasm_glob_trace_dict, clang_glob_trace_dict, wasm_globs)
else:
glob_perf_inconsistent_list = []
func_perf_inconsistent_list = \
trace_check_func_perf(wasm_func_trace_dict, clang_func_trace_dict, wasm_func_objs, wasm_param_dict)
if not silent_mode or len(glob_perf_inconsistent_list) > 0 or len(func_perf_inconsistent_list) > 0:
print('{} glob (performance):'.format(os.path.basename(c_src_path)), glob_perf_inconsistent_list)
print('{} func (performance):'.format(os.path.basename(c_src_path)), func_perf_inconsistent_list)
overall_statistic(wasm_glob_trace_dict, clang_glob_trace_dict, wasm_func_trace_dict, clang_func_trace_dict)
print("redundant wasm glob writes: {}".format(redundant_glob_writes))
print("redundant wasm func calls: {}".format(redundant_func_calls))
print("overall wasm glob writes: {}".format(overall_wasm_glob_writes))
print("overall wasm func calls: {}".format(overall_wasm_func_calls))
print("overall clang glob writes: {}".format(overall_clang_glob_writes))
print("overall clang func calls: {}".format(overall_clang_func_calls))
if need_info:
return glob_correct_inconsistent_list, func_correct_inconsistent_list, glob_perf_inconsistent_list, func_perf_inconsistent_list, ((wasm_globs, clang_globs), (wasm_func_objs, clang_func_objs))
else:
return glob_correct_inconsistent_list, func_correct_inconsistent_list, glob_perf_inconsistent_list, func_perf_inconsistent_list
def trace_check_with_wasabi(c_src_path: str, clang_opt_level='-O0', emcc_opt_level='-O2', need_compile=True, need_info=False, input_str=""):
# clean
c_src_path = os.path.abspath(c_src_path)
assert c_src_path.endswith('.c')
elf_path = c_src_path[:c_src_path.rfind('.')] + '.out'
clang_dwarf_txt_path = elf_path + '.dwarf'
wasm_path = c_src_path[:c_src_path.rfind('.')] + '.wasm'
js_path = c_src_path[:-2] + '.js'
wasm_dwarf_txt_path = wasm_path + '.dwarf'
if need_compile:
status, output = utils.cmd("rm {}".format(os.path.abspath(elf_path)))
status, output = utils.cmd("rm {}".format(os.path.abspath(wasm_path)))
if not silent_mode:
print("\nTrace Consistency Checking for {}...".format(c_src_path))
# profile, get dwarf information of global variables and function arguments
wasm_globs, clang_globs = profile.collect_glob_vars(c_src_path, clang_opt_level, emcc_opt_level, need_compile)
(wasm_func_objs, wasm_param_dict, wasm_func_names_list), \
(clang_func_objs, clang_param_dict, clang_func_names_list) = profile.collect_funcs(c_src_path, clang_opt_level,
emcc_opt_level, need_compile)
wasm_globs_all = profile.get_wasm_globs(c_src_path, emcc_opt_level, need_compile)
if len(wasm_globs) == 0:
if debug_mode:
print("No globs, skip this case")
return [], [], [], []
# compile
if need_compile:
wasm_path, js_path, wasm_dwarf_txt_path = profile.emscripten_dwarf(c_src_path, opt_level=emcc_opt_level)
elf_path, dwarf_path = profile.clang_dwarf(c_src_path, opt_level=clang_opt_level)
# Before checking
# wat_path = wasm_path[:-5] + '.wat'
# if not os.path.exists(wat_path):
wat_path = utils.wasm2wat(wasm_path)
mapping_dict, wasm_objs_dict, clang_objs_dict = pointed_objs.get_pointed_objs_mapping(c_src_path, elf_path,
wat_path, clang_opt_level,
emcc_opt_level, need_compile)
lcs.FuncItem.set_dict(mapping_dict, wasm_objs_dict, clang_objs_dict)
lcs.PtrItem.set_dict(mapping_dict, wasm_objs_dict, clang_objs_dict)
# get trace
wasm_instrument.instrument(wasm_path, wasm_globs_all, wasm_func_objs, wasm_param_dict, wasm_path,
opt_level=emcc_opt_level)
clang_raw_trace_path = pin_instrument.instrument(c_src_path, clang_globs, clang_func_objs, clang_param_dict,
elf_path, input_str=input_str)
# wasm_raw_trace_path, js_status = wasm_instrument.run_wasm_timeout(js_path, input_str=input_str)
wasm_raw_trace_path, js_status = wasm_instrument.run_wasm(js_path, input_str=input_str)
if js_status: # non-exit loop in optimized wasm code, special handler
glob_correct_inconsistent_list = ["timeout"]
print('{} glob (incorrect):'.format(os.path.basename(c_src_path)), glob_correct_inconsistent_list)
return glob_correct_inconsistent_list, [], [], []
# trace generalization
wasm_glob_trace_dict, wasm_func_trace_dict = generalize_wasm_trace(wasm_raw_trace_path,
wasm_globs, wasm_func_objs, wasm_param_dict)
clang_glob_trace_dict, clang_func_trace_dict = generalize_pin_trace(clang_raw_trace_path,
clang_globs, clang_func_objs, clang_param_dict)
# TODO: update instrumentation, and provided more information to locate bugs
# trace consistency check
if len(wasm_globs) > 0:
glob_correct_inconsistent_list = \
trace_check_glob_correct(wasm_glob_trace_dict, clang_glob_trace_dict, wasm_globs, case2_check=False)
else:
glob_correct_inconsistent_list = []
func_correct_inconsistent_list = \
trace_check_func_correct(wasm_func_trace_dict, clang_func_trace_dict, wasm_func_objs, wasm_param_dict)
if not silent_mode or len(glob_correct_inconsistent_list) > 0 or len(func_correct_inconsistent_list) > 0:
print('{} glob (incorrect):'.format(os.path.basename(c_src_path)), glob_correct_inconsistent_list)
print('{} func (incorrect):'.format(os.path.basename(c_src_path)), func_correct_inconsistent_list)
if len(wasm_globs) > 0:
glob_perf_inconsistent_list = \
trace_check_glob_perf(wasm_glob_trace_dict, clang_glob_trace_dict, wasm_globs)
else:
glob_perf_inconsistent_list = []
func_perf_inconsistent_list = \
trace_check_func_perf(wasm_func_trace_dict, clang_func_trace_dict, wasm_func_objs, wasm_param_dict)
if not silent_mode or len(glob_perf_inconsistent_list) > 0 or len(func_perf_inconsistent_list) > 0:
print('{} glob (performance):'.format(os.path.basename(c_src_path)), glob_perf_inconsistent_list)
print('{} func (performance):'.format(os.path.basename(c_src_path)), func_perf_inconsistent_list)
if need_info:
return glob_correct_inconsistent_list, func_correct_inconsistent_list, glob_perf_inconsistent_list, func_perf_inconsistent_list, (
(wasm_globs, clang_globs), (wasm_func_objs, clang_func_objs))
else:
return glob_correct_inconsistent_list, func_correct_inconsistent_list, glob_perf_inconsistent_list, func_perf_inconsistent_list
def main():
global debug_mode
# test
# c_src_path = './missopt_cases/bug_cases/test6_re_re.c'
c_src_path = './tmp.c'
c_src_path = './find_wasm_opt/0-1000/test0-785_re.c'
c_src_path = './find_wasm_opt/0-1000/test15-935_re.c'
c_src_path = '/home/tester/Downloads/adpcm/adpcm.c'
# c_src_path = '/home/tester/Downloads/mips/mips.c'
# c_src_path = '/home/tester/Downloads/gsm/gsm.c'
# c_src_path = '/home/tester/Downloads/jpeg/main.c'
# c_src_path = '/home/tester/Downloads/motion/mpeg2.c'
# c_src_path = '/home/tester/Documents/BenchmarkingWebAssembly/modified_benchmarks/CHStone_v1.11_150204/adpcm/adpcm.c'
# c_src_path = '/home/tester/Documents/BenchmarkingWebAssembly/modified_benchmarks/CHStone_v1.11_150204/mips/mips.c'
# c_src_path = '/home/tester/Documents/BenchmarkingWebAssembly/modified_benchmarks/CHStone_v1.11_150204/gsm/gsm.c'
# c_src_path = '/home/tester/Documents/BenchmarkingWebAssembly/modified_benchmarks/CHStone_v1.11_150204/jpeg/main.c'
# c_src_path = '/home/tester/Documents/BenchmarkingWebAssembly/modified_benchmarks/CHStone_v1.11_150204/motion/mpeg2.c'
elf_path, dwarf_path = profile.clang_dwarf(c_src_path, opt_level='-O3')
wasm_path, js_path, wasm_dwarf_txt_path = profile.emscripten_dwarf(c_src_path, opt_level='-O0')
# output1, status1 = utils.run_single_prog(elf_path)
# output2, status2 = utils.run_single_prog("node {}".format(js_path))