-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
519 lines (435 loc) · 20.3 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
import re
import clang.cindex
import graphviz
import fnmatch
import os
import json
def find_descendants(parent_dict: dict):
child_dict = {}
for derived, bases in parent_dict.items():
if derived not in child_dict:
child_dict[derived] = []
for base in bases:
if base not in child_dict:
child_dict[base] = []
child_dict[base].append(derived)
return child_dict
##############################################################################################################
# Tree report
##############################################################################################################
def tree_report(parent_dict: dict, query: list, args):
matched_nodes = search_query_from_dict(parent_dict, {}, query)
if query:
verbal(args, f"query: {query}, found {len(matched_nodes)} nodes", matched_nodes)
if query and not matched_nodes:
print("no symbol found for ", query)
print("consider using wildcard")
return
if args.base:
print_ancestors(parent_dict, matched_nodes)
if args.derived:
child_dict = find_descendants(parent_dict)
print_descendants(child_dict, matched_nodes)
def print_ancestors(parent_dict: dict, classes: list):
if not classes:
classes = [p for p in parent_dict.keys() if all(p not in parent for parent in parent_dict.values())]
print("#######################################################")
print("printing ancestors")
print('============================')
tree_print(parent_dict, classes, "", "ancestors")
print("#######################################################")
print()
def print_descendants(child_dict: dict, classes: list):
if not classes:
classes = [c for c in child_dict.keys() if all(c not in children for children in child_dict.values())]
print("#######################################################")
print("printing descendants")
print('============================')
tree_print(child_dict, classes, "", "descendants")
print("#######################################################")
print()
def tree_print(connection: dict, nodes: list, indent: str = "", msg: str = ""):
if isinstance(nodes, str):
nodes = [nodes]
for node in nodes:
is_top = (indent == "")
next_level_nodes = connection.get(node, [])
if is_top:
if not next_level_nodes:
print(node, f": no {msg} found")
return
else:
print(node)
for i, next_node in enumerate(next_level_nodes):
is_last = (i == len(next_level_nodes) - 1)
prefix = "└── " if is_last else "├── "
print(indent + prefix + next_node)
tree_print(connection, next_node, indent + (" " if is_last else "│ ") )
if is_top:
print('----------------------------')
##############################################################################################################
# Graph report
##############################################################################################################
# secondary_edge is used to represent a secondary relation such as template link
def graph_report(parent_dict: dict, query: list, out_file: str, args, secondary_edge: dict):
dot = graphviz.Digraph(strict=True)
dot.node_attr["shape"] = "box"
dot.node_attr["style"] = "rounded"
inserted = {}
matched_nodes = search_query_from_dict(parent_dict, secondary_edge, query)
if query:
verbal(args, f"query: {query}, found {len(matched_nodes)} nodes", matched_nodes)
if query and not matched_nodes:
print("no symbol found for ", query)
print("consider using wildcard")
return
for node in matched_nodes:
insert_node_to_dot(dot, node, inserted, style="filled, rounded", fillcolor="turquoise")
if not matched_nodes:
# print all nodes and edges
generate_graph(parent_dict, None, parent_dict.keys(), dot, inserted, args, secondary_edge)
else:
child_dict = find_descendants(parent_dict)
generate_graph(parent_dict, child_dict, matched_nodes, dot, inserted, args, secondary_edge)
dot.render(out_file, format='pdf')
print("use https://www.devtoolsdaily.com/graphviz to view graph")
print(f"graph file is at ./{out_file}")
# generate a graph view of the class hierarchy using Graphviz
def generate_graph(parent_dict: dict, child_dict: dict, nodes: list, dot: graphviz.Digraph, inserted: dict, args, secondary_edge: dict = {}):
if isinstance(nodes, str):
nodes = [nodes]
for curr_node in nodes:
# travese towards base
if parent_dict and args.base:
for other_node in parent_dict.get(curr_node, []):
edge_key = f"{other_node}->{curr_node}"
if edge_key in inserted:
continue
insert_to_dot(dot, other_node, curr_node, edge_key, inserted, dir="back")
if args.connected:
generate_graph(parent_dict, child_dict, other_node, dot, inserted, args, secondary_edge)
else:
# don't need the child of the parent
generate_graph(parent_dict, None, other_node, dot, inserted, args, secondary_edge)
# travese towards derived
if child_dict and args.derived:
for other_node in child_dict.get(curr_node, []):
edge_key = f"{curr_node}->{other_node}"
if edge_key in inserted:
continue
insert_to_dot(dot, curr_node, other_node, edge_key, inserted, dir="back")
if args.connected:
generate_graph(parent_dict, child_dict, other_node, dot, inserted, args, secondary_edge)
else:
# don't need the parent of the child
generate_graph(None, child_dict, other_node, dot, inserted, args, secondary_edge)
# handle secondary_edge
if curr_node in secondary_edge:
for other_node in secondary_edge[curr_node]:
edge_key = f"{other_node['name']}->{curr_node}"
if edge_key in inserted:
continue
edge_label = other_node["label"] if "label" in other_node else ""
insert_to_dot(dot, other_node["name"], curr_node, edge_key, inserted, minlen="2", \
color="navy", arrowhead="odot", style="dashed", splines="false", \
headlabel=edge_label)
if args.connected:
generate_graph(parent_dict, child_dict, other_node["name"], dot, inserted, args, secondary_edge)
else:
# don't need the child of the parent
generate_graph(parent_dict, None, other_node["name"], dot, inserted, args, secondary_edge)
def insert_to_dot(dot: graphviz.Digraph, src: str, dest: str, edge_key: str, inserted: dict, **attrs):
src_name = insert_node_to_dot(dot, src, inserted)
dest_name = insert_node_to_dot(dot, dest, inserted)
inserted[edge_key] = True
dot.edge(src_name, dest_name, **attrs)
# insert to dot if not exist
# and return the escaped name of the node
def insert_node_to_dot(dot: graphviz.Digraph, node: str, inserted: dict, **attrs) -> str:
node_name = node.replace("::", "~")
if node not in inserted:
dot.node(node_name, trim_namespace(node), tooltip=node, **attrs)
inserted[node] = True
return node_name
##############################################################################################################
# clang symbol handling
##############################################################################################################
# return the fully quilified name with the complete namespace
def get_full_type_name(node: clang.cindex.Cursor):
type_name = node.type.get_declaration().type.get_canonical().spelling or node.spelling
return replace_std_string(node, type_name)
# std::string usually expands to hard-to-read underlying type
# replace_std_string() will convert it back to std::string
std_string_canonical = ""
def replace_std_string(node: clang.cindex.CursorKind, type_name: str):
global std_string_canonical
STD_STRING = "std::string"
if std_string_canonical:
return type_name.replace(std_string_canonical, STD_STRING)
if node.kind.is_declaration() or node.kind == clang.cindex.CursorKind.CXX_BASE_SPECIFIER:
for child in node.get_children():
if child.kind == clang.cindex.CursorKind.TYPE_REF and STD_STRING in child.spelling:
std_string_canonical = child.type.get_canonical().spelling
return type_name.replace(std_string_canonical, STD_STRING)
return type_name
# print AST for debug
def print_ast(node, level=0):
if (level == 1):
print("==========================")
if (level > 0):
print(' ' * (level - 1) + to_string(node))
# template_args = ""
# if (node.type.get_num_template_arguments() > 0):
# template_args = node.type.get_template_argument_type(0).spelling
# print(' ' * (level - 1) + f'{node.spelling}, K:{node.kind}, T:{node.type.spelling}, #A: {node.type.get_num_template_arguments()}, A:{template_args}')
for child in node.get_children():
print_ast(child, level + 1)
def to_string(node) -> str:
loc_str = get_symbol_loc(node)
return f'S: {node.spelling}, K: {node.kind}, PS: {node.semantic_parent.spelling if node.semantic_parent else ""},' + \
f'L: {loc_str}, T: {node.type.spelling}, TK: {node.type.kind.spelling}, #T: {node.get_num_template_arguments()}'
def get_symbol_loc(node) -> str:
loc = f"{node.location.file.name}:{node.location.line}" if node.location.file else ""
return loc
def get_symbol_decl_loc_from_def(node) -> str:
if (node.kind == clang.cindex.CursorKind.FUNCTION_TEMPLATE):
# use the definition location instead
return get_symbol_loc(node)
return get_symbol_loc(node.canonical)
# this is used to parse the declaration AST to gather the original template arguments
# only use for non-member functions
# e.g., template <typename T, int N> void func(int a) => returns <T, int>
def get_template_list_from_declaration(node: clang.cindex.Cursor):
if node.kind not in [clang.cindex.CursorKind.CLASS_TEMPLATE, clang.cindex.CursorKind.FUNCTION_TEMPLATE]:
raise ValueError(f"wrong node: {node.spelling}")
template_types = []
for child in node.get_children():
if child.kind == clang.cindex.CursorKind.TEMPLATE_TYPE_PARAMETER:
template_types.append(child.spelling)
elif child.kind == clang.cindex.CursorKind.TEMPLATE_NON_TYPE_PARAMETER:
template_types.append(child.type.spelling)
return "<" + (", ".join(template_types)) + ">"
# this is used to gather the actual template arguments that are used for instantiation
# e.g., func<int, 1>(); => returns <int, 1>
def get_template_list_from_instantiation(node):
if node.type.kind != clang.cindex.TypeKind.FUNCTIONPROTO:
raise ValueError(f"wrong node: {node.spelling}")
template_args = []
for idx in range(0, node.get_num_template_arguments()):
node_kind = None
try:
# clang will throw on variadic templates
node_kind = node.get_template_argument_kind(idx)
except:
continue
if node_kind == clang.cindex.TemplateArgumentKind.TYPE:
type_name = node.get_template_argument_type(idx).spelling
if is_lambda_type(type_name):
template_args.append("(lambda)")
else:
template_args.append(node.get_template_argument_type(idx).spelling)
elif node_kind == clang.cindex.TemplateArgumentKind.INTEGRAL:
template_args.append( str(node.get_template_argument_value(idx)) )
elif node_kind in [clang.cindex.TemplateArgumentKind.NULL, clang.cindex.TemplateArgumentKind.NULLPTR]:
continue
else:
# ref: https://github.com/llvm-mirror/clang/blob/aa231e4be75ac4759c236b755c57876f76e3cf05/bindings/python/clang/cindex.py#L1383
# the last kind is DECLARATION meaning the template arg is an pointer or reference to some object
continue
if template_args:
return "<" + (", ".join(template_args)) + ">"
elif node.get_num_template_arguments() > 0:
# template args contains only NULL, NULLPTR, or DECLARATION
return "<>"
else:
return ""
def get_param_list(node: clang.cindex.Cursor, type_only=False):
params = []
if node.kind == clang.cindex.CursorKind.FUNCTION_TEMPLATE:
for child in node.get_children():
if child.kind != clang.cindex.CursorKind.PARM_DECL:
continue
param_name = child.spelling
param_type = ""
for grandchild in child.get_children():
if grandchild.kind != clang.cindex.CursorKind.TYPE_REF:
continue
param_type = replace_lambda_name(grandchild.spelling)
break
if type_only:
params.append(param_type.strip())
else:
params.append(f"{param_type} {param_name}".strip())
elif node.kind == clang.cindex.CursorKind.VAR_DECL:
# function pointer
for param in node.type.get_canonical().get_pointee().argument_types():
param_type = replace_lambda_name(param.spelling)
params.append( param_type.strip() )
else:
for param in node.get_arguments():
param_type = replace_lambda_name(param.type.spelling)
if type_only:
params.append(param_type.strip())
else:
params.append( f"{param_type} {param.spelling}".strip() )
param_str = ", ".join(params)
return param_str
def replace_lambda_name(typename: str):
if is_lambda_type(typename):
return trim_lambda_name(typename)
return typename
def is_lambda_type(typename: str):
return typename.startswith("(lambda at")
def trim_lambda_name(typename: str):
# e.g., "(lambda at /file/foo.cpp:10:2) &&"
suffix = typename.split(')')[-1]
location = typename.split('at', 1)[1].strip()
file, line, _ = location.split(':')
file = os.path.basename(file)
param_type = f"(lambda@{file}:{line}){suffix}"
return param_type
##############################################################################################################
# clang interaction
##############################################################################################################
# return a list of the source files if compile_commands.json exists
def parse_compile_options(dir: str, args) -> list:
# when a file doesn't have entry in compile_commands.json or the json doesn't exist
# will use compile_default_options
global compile_default_options
compile_default_options = {}
global compile_db
compile_db = {}
for compile_db_path in [args.compile_db, os.path.join(dir, "compile_commands.json"), os.path.join(dir, ".cpp_util/compile_commands.json")]:
if compile_db_path and os.path.isfile(compile_db_path):
parse_compile_commands_json(compile_db_path)
return compile_db.keys() # return a list of the source files
guess_incl_path(dir)
return []
def parse_compile_commands_json(file: str):
with open(file) as fd:
json_db = json.load(fd)
for obj in json_db:
compile_options = trim_compile_options(obj["arguments"])
abs_path = os.path.join(obj["directory"], obj["file"])
compile_db[abs_path] = {"opts": compile_options, "dir": obj["directory"]}
# only keep -I, -D, and -std options
def trim_compile_options(options: list) -> list:
trimmed = []
for opt in options:
if opt.startswith("-I") or opt.startswith("-D") or opt.startswith("-std"):
trimmed.append(opt)
compile_default_options[opt] = True
return trimmed
def guess_incl_path(dir: str):
for dirpath, _, files in os.walk(dir):
for file in files:
if file.endswith('.h') or file.endswith('.hpp'):
compile_default_options[f"-I{dirpath}"] = True
break
def is_project_defined_symbol(node, project_dir: str, ws_root: str = ""):
# Ensure both paths are absolute
if node.location.file:
file_path = os.path.abspath(node.location.file.name)
return file_path.startswith(project_dir) or (ws_root and file_path.startswith(ws_root))
return False
##############################################################################################################
# miscellaneous
##############################################################################################################
def path_name_match(path: str, patterns: list) -> bool:
if not patterns:
return False
for pattern in patterns:
if is_glob(pattern):
if fnmatch.fnmatch(path, pattern):
return True
else:
if pattern in path:
return True
return False
def is_glob(pattern: str) -> bool:
glob_chars = ['*', '?', '[', ']', '{', '}', '!']
return any(char in pattern for char in glob_chars)
def find_last_double_colon_before_template(s):
# Find the index of the first occurrence of '<'
template_index = s.find('<')
if template_index == -1:
substring = s
else:
# Slice the string up to the first occurrence of '<'
substring = s[:template_index]
# Find the last occurrence of '::' in the sliced portion
return substring.rfind('::')
def break_long_name(name):
if len(name) > 50:
index = find_last_double_colon_before_template(name)
if index != -1:
index += 2
return name[:index] + "\\n" + name[index:]
return name
# remove the namespaces
def trim_namespace(symbol_name, preserve_std = True):
result = ''
temp_part = ''
for char in symbol_name:
if char == ' ' and (temp_part and temp_part[-1] == ' '):
# consecutive empty space, ignore
continue
temp_part += char
if char == ':':
if (temp_part.startswith("lambda@")):
# continue for lambda types
continue
if preserve_std and (temp_part == 'std:' or temp_part == 'std::'):
continue
temp_part = ''
elif char in ['<', '>', '(', ')', ' ']:
result += temp_part
temp_part = ''
elif char == ',':
result += temp_part + ' '
temp_part = ''
else:
# do nothing
pass
result += temp_part
return result
def verbal(opton, *args):
if (opton.verbal):
print(*args)
def search_query_from_dict(parent_dict: dict, secondary_edge: dict, query: list):
node_list = []
for value in parent_dict.values():
node_list.extend(value)
node_list.extend(parent_dict.keys())
# searching in secondary_edge can be noisy
# e.g., printing std::vector<BaseClass> doesn't provide much insight
# at the time being, we exclude the types starts with std:: to avoid the ones from stl
if secondary_edge:
for t_list in secondary_edge.values():
node_list.extend([t_arg["name"] for t_arg in t_list if not t_arg["name"].startswith("std::")])
node_list.extend([key for key in secondary_edge.keys() if not key.startswith("std::")])
return search_query(node_list, query)
def search_query(node_list: list, query: list):
result = []
for query_name in query:
for node in node_list:
if match_query(node, query_name):
if node not in result:
result.append(node)
return result
def match_query(declaration: str, query: str) -> bool:
# break down declaration like a::b<c::d, e>::f to [a::b, c::d, e, ::f]
pattern = r'[<>,() ]'
names = [n.strip() for n in re.split(pattern, declaration)]
if '*' not in query:
return exact_match_query(names, query)
else:
return fuzzy_match_query(names, query)
def exact_match_query(names: list, query_name: str) -> bool:
for name in names:
if name == query_name or name.endswith(f"::{query_name}"):
return True
return False
def fuzzy_match_query(names: list, query: str) -> bool:
query = "^(.*::)?" + query.replace("*", ".*") + "$"
return any([re.match(query, name) for name in names])