From d4c4ac6c5c8fd49ce38d909cb2ce07ffa1598340 Mon Sep 17 00:00:00 2001 From: n_zgr Date: Sat, 9 Jul 2022 02:12:39 +0300 Subject: [PATCH 1/2] ddg for python --- server/app.py | 2 +- server/python/handler.py | 4 +- server/python/python_ddg.py | 215 ++++++++++++++++++++++++++++++++++++ 3 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 server/python/python_ddg.py diff --git a/server/app.py b/server/app.py index db2649b..1a8d27a 100644 --- a/server/app.py +++ b/server/app.py @@ -19,7 +19,7 @@ from java.handler import handler as java_handler from javascript.handler import handler as js_handler -functions = {'python': ('ast', 'cfg'), 'kotlin': ('ast', 'cfg'), 'c': ('ast', 'cfg', 'ssa'), 'go': ('ast', 'cfg'), +functions = {'python': ('ast', 'cfg', 'ddg'), 'kotlin': ('ast', 'cfg'), 'c': ('ast', 'cfg', 'ssa'), 'go': ('ast', 'cfg'), 'java': ('ast', 'cfg'), 'js': ('ast', 'cfg')} handlers = {"python": py_handler, "kotlin": kt_handler, "c": c_handler, 'go': go_handler, 'java': java_handler, 'js': js_handler} diff --git a/server/python/handler.py b/server/python/handler.py index cbcfcc9..1452897 100644 --- a/server/python/handler.py +++ b/server/python/handler.py @@ -1,4 +1,4 @@ -from python import python_ast, python_cfg +from python import python_ast, python_cfg, python_ddg def handler(code: str, model: str): @@ -6,4 +6,6 @@ def handler(code: str, model: str): return python_ast.make(code) elif model == 'cfg': return python_cfg.make(code) + elif model == 'ddg': + return python_ddg.make(code) pass diff --git a/server/python/python_ddg.py b/server/python/python_ddg.py new file mode 100644 index 0000000..536b52d --- /dev/null +++ b/server/python/python_ddg.py @@ -0,0 +1,215 @@ +import ast +import numbers +import re +from uuid import uuid4 as uuid + +import graphviz as gv + + +def get_deps(code): + body = ast.parse(code) + _, statements = next(ast.iter_fields(body)) + + # Line no. at which each identifier was first seen + declaration_line_num_map = {} + ddg = {} + + def update_decls(lhs_vars_input, num): + lhs_var_nodes = [] + for var_node in lhs_vars_input: + lhs_var_nodes.append(var_node) + if var_node.id not in declaration_line_num_map: + declaration_line_num_map[var_node.id] = num + ddg[var_node.id] = set() + return lhs_var_nodes + + # x1, x2, x3, ..., xN = 1, 2, 3, 4, 5, ..., N + # is represented in the AST as: + # - R = ast.Assign is root + # - R.targets gives the LHS + # - R.values + + for seq_no, node in enumerate(statements): + if isinstance(node, ast.Assign): + identifier_names = node.targets + lhs_vars = update_decls(identifier_names, seq_no) + + self_edge_occurrences_to_ignore = {x: 1 for x in identifier_names} + + # DFS in RHS + depends_on = [] + for descendant in ast.walk(node): + if descendant in self_edge_occurrences_to_ignore and self_edge_occurrences_to_ignore[descendant] > 0: + self_edge_occurrences_to_ignore[descendant] -= 1 + continue + if isinstance(descendant, ast.Name): + depends_on.append(descendant) + + for var in lhs_vars: + for dependency in depends_on: + ddg[var.id].add(dependency.id) + + return declaration_line_num_map, ddg + + +class MethodLevelDDGs: + def __init__(self, code): + self.parsed_ast = ast.parse(code) + + def get_methods(self): + fn_nodes = [] + + class FnVisitor(ast.NodeVisitor): + def visit_FunctionDef(self, node): + fn_nodes.append(node) + + visitor = FnVisitor() + visitor.visit(self.parsed_ast) + return fn_nodes + + def recursive_ddg(self, fn_root_node): + ddg = {} + self_edge_set = set() + + class DDGVisitor(ast.NodeVisitor): + def visit_Assign(self, node): + identifiers = node.targets + for identifier in identifiers: + ddg[identifier.id] = set() + self_edge_set.add(identifier.id) + + depends_on = [] + for descendant in ast.walk(node): + if isinstance(descendant, ast.Name): + depends_on.append(descendant) + + for var in identifiers: + for dependency in depends_on: + if var.id in self_edge_set: + self_edge_set.remove(var.id) + continue + ddg[var.id].add(dependency.id) + + visitor = DDGVisitor() + visitor.visit(fn_root_node) + return ddg + + +def fn_ddgs(code): + method_level_ddgs = MethodLevelDDGs(code) + methods = method_level_ddgs.get_methods() + ddgs = {method.name: method_level_ddgs.recursive_ddg(method) for method in methods} + return ddgs + + + + +class GraphRenderer: + """ + this class is capable of rendering data structures consisting of + dicts and lists as a graph using graphviz + """ + + graphattrs = { + 'labelloc': 't', + 'fontcolor': 'black', + 'bgcolor': 'white', + 'margin': '0', + } + + nodeattrs = { + 'color': 'black', + 'fontcolor': 'black', + 'style': 'filled', + 'fillcolor': 'white', + } + + edgeattrs = { + 'color': 'black', + 'fontcolor': 'black', + } + + _graph = None + _rendered_nodes = None + + @staticmethod + def _escape_dot_label(str): + return str.replace("\\", "\\\\").replace("|", "\\|").replace("<", "\\<").replace(">", "\\>") + + def _render_node(self, node): + if isinstance(node, (str, numbers.Number)) or node is None: + node_id = uuid() + else: + node_id = id(node) + node_id = str(node_id) + + if node_id not in self._rendered_nodes: + self._rendered_nodes.add(node_id) + if isinstance(node, dict): + self._render_dict(node, node_id) + elif isinstance(node, list): + self._render_list(node, node_id) + else: + self._graph.node(node_id, label=self._escape_dot_label(str(node))) + + return node_id + + def _render_dict(self, node, node_id): + self._graph.node(node_id, label=node.get("node_type", "[dict]")) + for key, value in node.items(): + if key == "node_type": + continue + child_node_id = self._render_node(value) + self._graph.edge(node_id, child_node_id, label=self._escape_dot_label(key)) + + def _render_list(self, node, node_id): + self._graph.node(node_id, label="[list]") + for idx, value in enumerate(node): + child_node_id = self._render_node(value) + self._graph.edge(node_id, child_node_id, label=self._escape_dot_label(str(idx))) + + def render(self, data, *, label=None): + # create the graph + graphattrs = self.graphattrs.copy() + if label is not None: + graphattrs['label'] = self._escape_dot_label(label) + graph = gv.Digraph(graph_attr=graphattrs, node_attr=self.nodeattrs, edge_attr=self.edgeattrs) + + # recursively draw all the nodes and edges + self._graph = graph + self._rendered_nodes = set() + self._render_node(data) + self._graph = None + self._rendered_nodes = None + + return graph + + +##if __name__ == '__main__': +## code = open("C:/Users/n_zgr/OneDrive/Рабочий стол/py-data-dependency-graph-master/snippets/sample_1.py").read() +## print(code) +## decls, graph = code2ddg.get_deps(code) +## print("var: line_number map =>") +## print(decls) +## +## print("variable data dependence =>") +## print(graph) +## +## renderer = GraphRenderer() +## graph = renderer.render(graph, label=None) +## graph.format = 'dot' +## return graph.pipe() + + +def make(code: str): + decls, graph = get_deps(code) + renderer = GraphRenderer() + graph = renderer.render(graph, label=None) + graph.format = 'dot' + return graph.pipe() + + +if __name__ == '__main__': + code = open("C:/Users/n_zgr/OneDrive/Рабочий стол/py-data-dependency-graph-master/snippets/sample_1.py").read() + make(code) + From 267e3dd5ec6796195003b2b056bb86fab0503a42 Mon Sep 17 00:00:00 2001 From: n_zgr Date: Sun, 10 Jul 2022 21:16:46 +0300 Subject: [PATCH 2/2] ddg --- server/python/python_ddg.py | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/server/python/python_ddg.py b/server/python/python_ddg.py index 536b52d..259d354 100644 --- a/server/python/python_ddg.py +++ b/server/python/python_ddg.py @@ -1,6 +1,5 @@ import ast import numbers -import re from uuid import uuid4 as uuid import graphviz as gv @@ -102,8 +101,6 @@ def fn_ddgs(code): return ddgs - - class GraphRenderer: """ this class is capable of rendering data structures consisting of @@ -185,31 +182,11 @@ def render(self, data, *, label=None): return graph -##if __name__ == '__main__': -## code = open("C:/Users/n_zgr/OneDrive/Рабочий стол/py-data-dependency-graph-master/snippets/sample_1.py").read() -## print(code) -## decls, graph = code2ddg.get_deps(code) -## print("var: line_number map =>") -## print(decls) -## -## print("variable data dependence =>") -## print(graph) -## -## renderer = GraphRenderer() -## graph = renderer.render(graph, label=None) -## graph.format = 'dot' -## return graph.pipe() - - def make(code: str): decls, graph = get_deps(code) renderer = GraphRenderer() graph = renderer.render(graph, label=None) graph.format = 'dot' - return graph.pipe() - -if __name__ == '__main__': - code = open("C:/Users/n_zgr/OneDrive/Рабочий стол/py-data-dependency-graph-master/snippets/sample_1.py").read() - make(code) + return graph.pipe()