-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_method.py
166 lines (124 loc) · 6.32 KB
/
extract_method.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import javalang
import conditional_nodes_tree as cnt
from visualize import visualize_matrix as vm
# Simple class holding information about variables and method calls
class CodeElement:
def __init__(self, name, line):
self.name = name
self.line = line
def __hash__(self):
return hash((self.name, self.line))
def __eq__(self, other):
return self.name == other.name and self.line == other.line
def read_java_code_from_file(file_path):
with open(file_path, 'r') as file:
return file.read()
def count_lines_in_string(s):
return s.count('\n') + 1 if s else 0
def process_expression(expression, line):
# Yield the full expression as is
yield CodeElement(expression, line)
# Find the index of the first dot
dot_index = expression.find('.')
# If there's a dot, split the expression at this dot and process recursively
if dot_index != -1:
first_part = expression[:dot_index]
remainder = expression[dot_index + 1:] # The rest of the expression after the dot
# Yield the first part before the dot
yield CodeElement(first_part, line)
# Recursively process the remainder of the expression
yield from process_expression(remainder, line)
else:
# If there's no dot, it's the final word, so just yield it
yield CodeElement(expression, line)
# Handles cases like 'rec[0][j].getName().replace' where array selectors are involved
def build_member_chain(selectors, start_chain):
member_chain = start_chain
for selector in selectors:
if isinstance(selector, javalang.tree.ArraySelector):
continue # Skip ArraySelectors
elif isinstance(selector, (javalang.tree.MemberReference, javalang.tree.MethodInvocation)):
# When a MemberReference or MethodInvocation follows an ArraySelector
member_chain += '.' + selector.member # Append the member to the chain
return member_chain
# Enhanced function to capture both parts and the whole of member references
def find_nodes(node, node_type):
line = node.position.line if node.position else None
if line is not None:
if isinstance(node, (javalang.tree.MemberReference, javalang.tree.MethodInvocation)):
if node.selectors:
yield from process_expression(build_member_chain(node.selectors, node.member), line)
elif node.qualifier:
expression = f"{node.qualifier}.{node.member}"
yield from process_expression(expression, line)
else:
yield CodeElement(node.member, line)
# Handle LocalVariableDeclaration nodes
if isinstance(node, javalang.tree.LocalVariableDeclaration):
for declarator in node.declarators:
yield CodeElement(declarator.name, line)
# Handle VariableDeclarator nodes (for other types of variable declarations)
if isinstance(node, javalang.tree.VariableDeclarator):
yield CodeElement(node.name, line)
# # Handle FieldDeclaration nodes for class fields
# if isinstance(node, javalang.tree.FieldDeclaration):
# for declarator in node.declarators:
# yield CodeElement(declarator.name, line)
# # Handle FieldDeclaration nodes for class fields
# if isinstance(node, javalang.tree.MethodDeclaration):
# yield CodeElement(node.name, line)
# Recursively process child nodes
for child in node.children:
if isinstance(child, (list, set)):
for subchild in child:
if isinstance(subchild, javalang.ast.Node):
yield from find_nodes(subchild, node_type)
elif isinstance(child, javalang.ast.Node):
yield from find_nodes(child, node_type)
# Main function to parse Java method code and output the usage table
def main(java_file_path):
# Read the Java method code from a file
java_code = read_java_code_from_file(java_file_path)
number_of_lines = count_lines_in_string(java_code)
# Tokenize the Java source code
tokens = list(javalang.tokenizer.tokenize(java_code))
# Find the index where the method body begins (after the first '{')
method_body_start_index = next((index for index, token in enumerate(tokens) if
isinstance(token, javalang.tokenizer.Separator) and token.value == '{'), None)
# If we found the beginning of a method body
if method_body_start_index is not None:
# Adjust the tokens to start from the method body
# tokens = tokens[method_body_start_index:]
parser = javalang.parser.Parser(tokens)
try:
# Attempt to parse the method body
block_statement = parser.parse_compilation_unit()
# Find variable references and method calls within the method body
nodes = list(find_nodes(block_statement, javalang.tree.MemberReference))
# Create a dictionary to store where each variable/method (context) is used
usage_table = {}
# Populate the usage_table
for element in nodes:
if element.line not in usage_table:
usage_table[element.line] = set()
usage_table[element.line].add(element.name)
# Create the conditional tree
root_node = cnt.ConditionalNode(-1, 'root', -1)
# Find the root of the method body and start building the conditional tree
cnt.find_conditional_nodes(block_statement, root_node, 1)
# Update the usage table with conditional context
cnt.update_usage_table_with_conditional_context(root_node, usage_table)
# Print the result as a simple table
print(f"{'Line':<5}{'Variables/Methods Accessed/Called':<35}")
print('-' * 40)
for line, items in sorted(usage_table.items()):
print(f"{line:<5}{', '.join(items):<35}")
# Visualize the result as a matrix
vm(usage_table, number_of_lines)
except javalang.parser.JavaSyntaxError as e:
print("Failed to parse the method body:", e.description, e.args, e.at)
else:
print("Parsing completed successfully.")
if __name__ == '__main__':
java_file_path = './java_samples/sample2.java'
main(java_file_path)