diff --git a/src/malls/ts/utils.py b/src/malls/ts/utils.py index d941287..a1f5aea 100644 --- a/src/malls/ts/utils.py +++ b/src/malls/ts/utils.py @@ -265,16 +265,19 @@ def tree_sitter_to_lsp_position(text: str, pos: Point, new_text: str = None) -> lines = text.splitlines(keepends=True) - line_text = lines[ts_line] + if len(lines) > ts_line: + line_text = lines[ts_line] - # Decode the line text from UTF-8 to a string - line_string = line_text.decode("utf-8") + # Decode the line text from UTF-8 to a string + line_string = line_text.decode("utf-8") - # Get the slice of the string up to the byte offset - string_slice = line_string.encode("utf-8")[:ts_byte_offset].decode("utf-8") + # Get the slice of the string up to the byte offset + string_slice = line_string.encode("utf-8")[:ts_byte_offset].decode("utf-8") - # The length of this slice in UTF-16 code units is the LSP character position - lsp_char = len(string_slice.encode("utf-16-le")) // 2 + # The length of this slice in UTF-16 code units is the LSP character position + lsp_char = len(string_slice.encode("utf-16-le")) // 2 + else: + lsp_char = 0 return Position(line=ts_line, character=lsp_char) @@ -935,10 +938,12 @@ def find_asset_from_expr(node: Node, symbol: str, document_uri: str, storage: di # of associations while assets: el = assets.pop(0) + el_name = el # if we have a tuple, then we have to find the asset directly, not from the association if type(el) is tuple: + el_name = el[0] node, file = bfs_search( - document_uri, FIND_ASSET_DECLARATION, "asset_declaration", el[0], storage + document_uri, FIND_ASSET_DECLARATION, "asset_declaration", el_name, storage ) else: # retrieve name of asset @@ -946,7 +951,7 @@ def find_asset_from_expr(node: Node, symbol: str, document_uri: str, storage: di if not node: break asset_name = node.children_by_field_name("id")[0].text - if el == symbol: + if el_name == symbol: if type(el) is tuple: result = node else: @@ -992,8 +997,11 @@ def find_symbol_reaching( assets.pop(-1) # remove last element (which is the attack step) # get the asset where the attack step is defined (last element) if assets: # go down the chain + asset_name = assets[-1] + if type(assets[-1]) is tuple: + asset_name = asset_name[0] asset, result_file = find_asset_from_expr( - node, assets[-1], document_uri, storage, assets + node, asset_name, document_uri, storage, assets ) else: asset = node.parent.parent.parent # go to asset @@ -1137,3 +1145,193 @@ def query_for_error_nodes(tree: Tree, text: str, doc_uri: str, notification_stor notification_storage[doc_uri] = [diagnostic] return + + +def find_meta_comment_category_declaration(node: Node) -> list: + """ + In a category declaration, we will try to find if the node has + any meta information and, if so, return it. + """ + meta_info = [] + for children in node.children_by_field_name("meta"): + meta_info.append(children.child_by_field_name("info").text.strip(b'"')) + + return meta_info + + +def find_meta_comment_asset_declaration(node: Node) -> list: + """ + In an asset declaration, we will try to find if the node has + any meta information and, if so, return it. + """ + meta_info = [] + for children in node.children_by_field_name("meta"): + meta_info.append(children.child_by_field_name("info").text.strip(b'"')) + + return meta_info + + +def find_meta_comment_attack_step(node: Node) -> list: + """ + In an attack step, we will try to find if the node has + any meta information and, if so, return it. + """ + meta_info = [] + for children in node.children_by_field_name("meta"): + meta_info.append(children.child_by_field_name("info").text.strip(b'"')) + + return meta_info + + +def find_meta_comment_asset_variable( + node: Node, symbol: str, document_uri: str, storage: dict +) -> list: + """ + In an asset variable, we will follow the expression + chain and get the asset where the symbol is defined. + Once we have it, we just have to obtain the meta + comments it contains + """ + asset, _ = find_asset_from_expr( + node.child_by_field_name("value"), symbol, document_uri, storage, [] + ) + + if not asset: + return [] + + meta_info = [] + for children in asset.children_by_field_name("meta"): + meta_info.append(children.child_by_field_name("info").text.strip(b'"')) + + return meta_info + + +def find_meta_comment_asset_variable_subsitution( + node: Node, symbol: str, document_uri: str, storage: dict +) -> list: + """ + In an asset variable substition, we will have to first find + where the variable is defined. Afterwards, follow the expression + chain and get the asset referenced by the variable. Once we have + it, we just have to obtain the meta comments it contains. + """ + + # find where the variable is defined + variable_node, _ = find_symbol_definition_variable_substitution( + node, symbol, document_uri, storage + ) + + if variable_node is None: + # in case the variable is not defined anywhere + return [] + + # divide the expression + assets = [] + visit_expr(variable_node.children[-1].children[0].walk(), assets, document_uri, storage) + + # obtain the last expression component (so we find the asset referenced by the variable) + asset_symbol = assets[-1] + + # find the asset the variable refers to + asset, _ = find_asset_from_expr( + variable_node.child_by_field_name("value"), asset_symbol, document_uri, storage, assets + ) + + if not asset: + # in case the asset is not found + return [] + + # otherwise get the meta corresponding to that asset + meta_info = [] + for children in asset.children_by_field_name("meta"): + meta_info.append(children.child_by_field_name("info").text.strip(b'"')) + + return meta_info + + +def find_meta_comment_asset_expr( + node: Node, symbol: str, document_uri: str, storage: dict, pos: tuple +) -> list: + """ + In an asset expr, we can simply find where the asset mentioned by the symbol is defined + (via the expression chain) and find the needed meta comments. + """ + + # find asset from expression + asset, _ = find_symbol_reaching(node, symbol, pos, document_uri, storage) + + if not asset: + # in case the asset is not found + return [] + + # otherwise get the meta corresponding to that asset + meta_info = [] + for children in asset.children_by_field_name("meta"): + meta_info.append(children.child_by_field_name("info").text.strip(b'"')) + + return meta_info + + +def find_meta_comment_association( + node: Node, symbol: str, document_uri: str, storage: dict +) -> list: + """ + In an association, we can call the auxiliary `find_symbol_definition_association` + which will find the asset referenced by the symbol or the current node otherwise, + from which we can find the corresponding meta. + """ + + # find the node where the meta is defined (either the current node or an asset node) + result_node, _ = find_symbol_definition_association(node, symbol, document_uri, storage) + + if not result_node: + # in case the asset is not found + return [] + + # otherwise get the meta corresponding to that asset + meta_info = [] + for children in result_node.children_by_field_name("meta"): + meta_info.append(children.child_by_field_name("info").text.strip(b'"')) + + return meta_info + + +def find_meta_comment_function( + node: Node, symbol: str, document_uri: str = None, storage: dict = None +) -> list: + """ + Given a node and a symbol, this function will find the point + where that symbol is defined. + + Since the node can be of any type, we need to go up the parent + tree until we find a parent from which we can extract relevant + information. + """ + + original_position = (node.start_point, node.end_point) + + while True: + match node.type: + case "category_declaration": + return find_meta_comment_category_declaration(node) + case "asset_declaration": + return find_meta_comment_asset_declaration(node) + case "attack_step": + return find_meta_comment_attack_step(node) + case "asset_variable": + return find_meta_comment_asset_variable(node, symbol, document_uri, storage) + case "asset_variable_substitution": + return find_meta_comment_asset_variable_subsitution( + node, symbol, document_uri, storage + ) + case "asset_expr": + return find_meta_comment_asset_expr( + node, symbol, document_uri, storage, original_position + ) + case "association": + return find_meta_comment_association(node, symbol, document_uri, storage) + case _: + node = node.parent # go to parent if no info proved relevant + # terminate if there are no more parents + if node is None: + return [] diff --git a/tests/fixtures/mal/find_meta_comment_function.mal b/tests/fixtures/mal/find_meta_comment_function.mal new file mode 100644 index 0000000..d065970 --- /dev/null +++ b/tests/fixtures/mal/find_meta_comment_function.mal @@ -0,0 +1,59 @@ +#id: "org.mal-lang.testAnalyzer" +#version:"0.0.0" + +category Example +developer info: "dev cat" +modeler info: "mod cat" +{ + + abstract asset Asset1 + developer info: "dev asset" + modeler info: "mod asset" + { + let var = c.b + | compromise + developer info: "dev attack_step" + modeler info: "mod attack_step" + -> var().destroy + + | attack + -> c.b.h.attack4, + c.b.h[Asset5].attack5 + } + + abstract asset Asset3 + developer info: "dev asset3" + modeler info: "mod asset3" + { + + } + + asset Asset2 extends Asset3 + { + | destroy + } + + asset Asset4 + developer info: "dev asset4" + modeler info: "mod asset4" + { + & attack4 + } + + asset Asset5 extends Asset4 + developer info: "dev asset5" + modeler info: "mod asset5" + { + & attack5 + developer info: "dev attack_step_5" + modeler info: "mod attack_step_5" + } + +} +associations +{ + Asset1 [a] * <-- L --> * [c] Asset2 developer info: "some info" + Asset2 [d] 1 <-- M --> 1 [e] Asset2 + Asset3 [b] 1 <-- N --> 1 [f] Asset2 + Asset3 [g] 1 <-- O --> 1 [h] Asset4 +} diff --git a/tests/unit/test_find_meta_comment_function.py b/tests/unit/test_find_meta_comment_function.py new file mode 100644 index 0000000..839b69b --- /dev/null +++ b/tests/unit/test_find_meta_comment_function.py @@ -0,0 +1,64 @@ +from pathlib import Path + +import pytest +import tree_sitter_mal as ts_mal +from tree_sitter import Language, Parser + +from malls.lsp.classes import Document +from malls.lsp.utils import recursive_parsing +from malls.ts.utils import INCLUDED_FILES_QUERY, find_meta_comment_function, run_query + +MAL_LANGUAGE = Language(ts_mal.language()) +PARSER = Parser(MAL_LANGUAGE) +FILE_PATH = str(Path(__file__).parent.parent.resolve()) + "/fixtures/mal/" + +parameters = [ + ((3, 12), [b"dev cat", b"mod cat"]), + ((8, 22), [b"dev asset", b"mod asset"]), + ((13, 13), [b"dev attack_step", b"mod attack_step"]), + ((12, 18), [b"dev asset3", b"mod asset3"]), + ((16, 12), [b"dev asset3", b"mod asset3"]), + ((19, 15), [b"dev asset4", b"mod asset4"]), + ((20, 20), [b"dev asset5", b"mod asset5"]), + ((20, 28), [b"dev attack_step_5", b"mod attack_step_5"]), + ((54, 12), [b"some info"]), + ((54, 21), [b"some info"]), + ((57, 37), [b"dev asset4", b"mod asset4"]), +] + + +@pytest.mark.parametrize( + "point,comments", + parameters, +) +def test_find_meta_comment_function(mal_find_meta_comment_function, point, comments): + # build the storage (mimicks the file parsing in the server) + storage = {} + + doc_uri = FILE_PATH + "find_meta_comment_function.mal" + source_encoded = mal_find_meta_comment_function.read() + tree = PARSER.parse(source_encoded) + + storage[doc_uri] = Document(tree, source_encoded, doc_uri) + + # obtain the included files + root_node = tree.root_node + + captures = run_query(root_node, INCLUDED_FILES_QUERY) + if "file_name" in captures: + recursive_parsing(FILE_PATH, captures["file_name"], storage, doc_uri, []) + + ################################### + + # get the node + cursor = tree.walk() + while cursor.goto_first_child_for_point(point) is not None: + continue + + # confirm it's an identifier + assert cursor.node.type == "identifier" + + # we use sets to ensure order does not matter + returned_comments = find_meta_comment_function(cursor.node, cursor.node.text, doc_uri, storage) + + assert set(returned_comments) == set(comments)