From a6bb30e803bfc8de36d4428a30d647dbaa4a638d Mon Sep 17 00:00:00 2001 From: HAHWUL Date: Tue, 23 Jan 2024 22:45:36 +0900 Subject: [PATCH 1/8] =?UTF-8?q?=F0=9F=91=94=20Add=20identification=20logic?= =?UTF-8?q?=20of=20parameters=20in=20Spring=20(targeting=20specific=20code?= =?UTF-8?q?=20styles=20/=20#232)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java_spring/src/HttpServletRequest.java | 16 +++++++ .../java_spring/src/RequestParam.java | 12 +++++ .../testers/java_spring_spec.cr | 8 +++- src/analyzer/analyzers/analyzer_spring.cr | 47 ++++++++++++++++--- 4 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 spec/functional_test/fixtures/java_spring/src/HttpServletRequest.java create mode 100644 spec/functional_test/fixtures/java_spring/src/RequestParam.java diff --git a/spec/functional_test/fixtures/java_spring/src/HttpServletRequest.java b/spec/functional_test/fixtures/java_spring/src/HttpServletRequest.java new file mode 100644 index 00000000..e66e3f42 --- /dev/null +++ b/spec/functional_test/fixtures/java_spring/src/HttpServletRequest.java @@ -0,0 +1,16 @@ +import javax.servlet.http.HttpServletRequest; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +public class MyController { + + @GetMapping("/greet") + public String greet(HttpServletRequest request) { + String name = request.getParameter("name"); + if (name == null || name.isEmpty()) { + name = "World"; + } + return "Hello, " + name + "!"; + } +} diff --git a/spec/functional_test/fixtures/java_spring/src/RequestParam.java b/spec/functional_test/fixtures/java_spring/src/RequestParam.java new file mode 100644 index 00000000..de3f1de6 --- /dev/null +++ b/spec/functional_test/fixtures/java_spring/src/RequestParam.java @@ -0,0 +1,12 @@ +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +@RestController +public class MyController { + + @GetMapping("/greet2") + public String greet2(@RequestParam("myname") String a, @RequestParam("b") int b) String name) { + return "Hello, " + a + b"!"; + } +} diff --git a/spec/functional_test/testers/java_spring_spec.cr b/spec/functional_test/testers/java_spring_spec.cr index 0ee49789..d128c6b0 100644 --- a/spec/functional_test/testers/java_spring_spec.cr +++ b/spec/functional_test/testers/java_spring_spec.cr @@ -19,9 +19,15 @@ extected_endpoints = [ Endpoint.new("/items", "POST"), Endpoint.new("/items/update/{id}", "PUT"), Endpoint.new("/items/delete/{id}", "DELETE"), + Endpoint.new("/greet", "GET", [ + Param.new("name", "", "query"), + ]), + Endpoint.new("/greet2", "GET", [ + Param.new("myname", "", "query"), + ]), ] FunctionalTester.new("fixtures/java_spring/", { :techs => 1, - :endpoints => 15, + :endpoints => 17, }, extected_endpoints).test_all diff --git a/src/analyzer/analyzers/analyzer_spring.cr b/src/analyzer/analyzers/analyzer_spring.cr index 4e53ad09..e57cf41d 100644 --- a/src/analyzer/analyzers/analyzer_spring.cr +++ b/src/analyzer/analyzers/analyzer_spring.cr @@ -14,6 +14,7 @@ class AnalyzerSpring < Analyzer url = "" if File.exists?(path) && (path.ends_with?(".java") || path.ends_with?(".kt")) content = File.read(path, encoding: "utf-8", invalid: :skip) + last_endpoint = Endpoint.new("", "") # Spring MVC has_class_been_imported = false @@ -41,7 +42,9 @@ class AnalyzerSpring < Analyzer if line.includes? "RequestMethod" define_requestmapping_handlers(["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS", "TRACE"]) else - @result << Endpoint.new("#{url}#{mapping_path}", "GET", details) + endpoint = Endpoint.new("#{url}#{mapping_path}", "GET", details) + last_endpoint = endpoint + @result << last_endpoint end end end @@ -50,31 +53,49 @@ class AnalyzerSpring < Analyzer if line.includes? "PostMapping" mapping_paths = mapping_to_path(line) mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "POST", details) + endpoint = Endpoint.new("#{url}#{mapping_path}", "POST", details) + last_endpoint = endpoint + @result << last_endpoint end end if line.includes? "PutMapping" mapping_paths = mapping_to_path(line) mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "PUT", details) + endpoint = Endpoint.new("#{url}#{mapping_path}", "PUT", details) + last_endpoint = endpoint + @result << last_endpoint end end if line.includes? "DeleteMapping" mapping_paths = mapping_to_path(line) mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "DELETE", details) + endpoint = Endpoint.new("#{url}#{mapping_path}", "DELETE", details) + last_endpoint = endpoint + @result << last_endpoint end end if line.includes? "PatchMapping" mapping_paths = mapping_to_path(line) mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "PATCH", details) + endpoint = Endpoint.new("#{url}#{mapping_path}", "PATCH", details) + last_endpoint = endpoint + @result << last_endpoint end end if line.includes? "GetMapping" mapping_paths = mapping_to_path(line) mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "GET", details) + endpoint = Endpoint.new("#{url}#{mapping_path}", "GET", details) + last_endpoint = endpoint + @result << last_endpoint + end + end + + # Param Analysis + param = line_to_param(line) + if param.name != "" + if last_endpoint.method != "" + last_endpoint.push_param(param) end end end @@ -100,6 +121,20 @@ class AnalyzerSpring < Analyzer @result end + def line_to_param(line : String) : Param + if line.includes? "getParameter(" + param = line.split("getParameter(")[1].split(")")[0].gsub("\"", "").gsub("'", "") + return Param.new(param, "", "query") + end + + if line.includes? "@RequestParam(" + param = line.split("@RequestParam(")[1].split(")")[0].gsub("\"", "").gsub("'", "") + return Param.new(param, "", "query") + end + + Param.new("", "", "") + end + def mapping_to_path(line : String) unless line.includes? "(" # no path From ae2cdded2da2bea8619236b1ebde8a7ccd8af3dd Mon Sep 17 00:00:00 2001 From: ksg Date: Sun, 18 Feb 2024 17:29:28 +0900 Subject: [PATCH 2/8] feat: Implement Java parser and lexer (#232, #233) - Added `src/miniparsers/java.cr` file - Added `JavaParser` class with `parse`, `parse_import_statements`, `parse_classes_body`, and `print_tokens` methods - Implemented tokenization logic in `MiniLexer` class - Added `line` property to `Token` class and updated `to_s` method to handle newline characters correctly - Added `src/minilexers/java.cr` file with keyword, literal, separator, and operator definitions for Java - Implemented `JavaLexer` class for tokenizing Java code, handling different types of tokens, and skipping whitespace and comments --- .../java_spring/src/ItemController.java | 2 + src/minilexers/java.cr | 336 ++++++++++++++++++ src/miniparsers/java.cr | 97 +++++ src/models/minilexer/minilexer.cr | 25 +- src/models/minilexer/token.cr | 11 +- 5 files changed, 466 insertions(+), 5 deletions(-) create mode 100644 src/minilexers/java.cr create mode 100644 src/miniparsers/java.cr diff --git a/spec/functional_test/fixtures/java_spring/src/ItemController.java b/spec/functional_test/fixtures/java_spring/src/ItemController.java index 88ebfb54..92ec1ab2 100644 --- a/spec/functional_test/fixtures/java_spring/src/ItemController.java +++ b/spec/functional_test/fixtures/java_spring/src/ItemController.java @@ -1,4 +1,6 @@ import org.springframework.web.bind.annotation.*; +import a.b.c.bind.annotation.*; +import org.springframework.c.d.e.*; @RestController @RequestMapping("/items") diff --git a/src/minilexers/java.cr b/src/minilexers/java.cr new file mode 100644 index 00000000..5535549d --- /dev/null +++ b/src/minilexers/java.cr @@ -0,0 +1,336 @@ +require "../models/minilexer/*" + +# Keywords +ABSTRACT = "abstract" +ASSERT = "assert" +BOOLEAN = "boolean" +BREAK = "break" +BYTE = "byte" +CASE = "case" +CATCH = "catch" +CHAR = "char" +CLASS = "class" +CONST = "const" +CONTINUE = "continue" +DEFAULT = "default" +DO = "do" +DOUBLE = "double" +ELSE = "else" +ENUM = "enum" +EXTENDS = "extends" +FINAL = "final" +FINALLY = "finally" +FLOAT = "float" +FOR = "for" +IF = "if" +GOTO = "goto" +IMPLEMENTS = "implements" +IMPORT = "import" +INSTANCEOF = "instanceof" +INT = "int" +INTERFACE = "interface" +LONG = "long" +NATIVE = "native" +NEW = "new" +PACKAGE = "package" +PRIVATE = "private" +PROTECTED = "protected" +PUBLIC = "public" +RETURN = "return" +SHORT = "short" +STATIC = "static" +STRICTFP = "strictfp" +SUPER = "super" +SWITCH = "switch" +SYNCHRONIZED = "synchronized" +THIS = "this" +THROW = "throw" +THROWS = "throws" +TRANSIENT = "transient" +TRY = "try" +VOID = "void" +VOLATILE = "volatile" +WHILE = "while" + +# Module related keywords +MODULE = "module" +OPEN = "open" +REQUIRES = "requires" +EXPORTS = "exports" +OPENS = "opens" +TO = "to" +USES = "uses" +PROVIDES = "provides" +WITH = "with" +TRANSITIVE = "transitive" + +# Local Variable Type Inference +VAR = "var" # reserved type name + +# Switch Expressions +YIELD = "yield" # reserved type name from Java 14 + +# Records +RECORD = "record" + +# Sealed Classes +SEALED = "sealed" +PERMITS = "permits" +NON_SEALED = "non-sealed" + +# Literals +DECIMAL_LITERAL = /0|[1-9]([_\d]*\d)?[lL]?/ +HEX_LITERAL = /0[xX][0-9a-fA-F]([0-9a-fA-F_]*[0-9a-fA-F])?[lL]?/ +OCT_LITERAL = /0[0-7]([0-7_]*[0-7])?[lL]?/ +BINARY_LITERAL = /0[bB][01]([01_]*[01])?[lL]?/ +FLOAT_LITERAL = /((\d+\.\d*|\.\d+)([eE][+-]?\d+)?|[+-]?\d+[eE][+-]?\d+)[fFdD]?/ +HEX_FLOAT_LITERAL = /0[xX]([0-9a-fA-F]+(\.[0-9a-fA-F]*)?|\.[0-9a-fA-F]+)[pP][+-]?\d+[fFdD]?/ +BOOL_LITERAL = /true|false/ +CHAR_LITERAL = /'([^'\\\r\n]|\\['"\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^'"\r\n])*'/ +STRING_LITERAL = /"([^"\\\r\n]|\\["\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^"\r\n])*"/ +TEXT_BLOCK = /"""\s*(.|\\["\\bfnrt])*?\s*"""/ +NULL_LITERAL = "null" + +# Separators +LPAREN = "(" +RPAREN = ")" +LBRACE = "{" +RBRACE = "}" +LBRACK = "[" +RBRACK = "]" +SEMI = ";" +COMMA = "," +DOT = "." + +# Operators +ASSIGN = "=" +GT = ">" +LT = "<" +BANG = "!" +TILDE = "~" +QUESTION = "?" +COLON = ":" +EQUAL = "==" +LE = "<=" +GE = ">=" +NOTEQUAL = "!=" +AND = "&&" +OR = "||" +INC = "++" +DEC = "--" +ADD = "+" +SUB = "-" +MUL = "*" +DIV = "/" +BITAND = "&" +BITOR = "|" +CARET = "^" +MOD = "%" + +ADD_ASSIGN = "+=" +SUB_ASSIGN = "-=" +MUL_ASSIGN = "*=" +DIV_ASSIGN = "/=" +AND_ASSIGN = "&=" +OR_ASSIGN = "|=" +XOR_ASSIGN = "^=" +MOD_ASSIGN = "%=" +LSHIFT_ASSIGN = "<<=" +RSHIFT_ASSIGN = ">>=" +URSHIFT_ASSIGN = ">>>=" + +# Java 8 tokens +ARROW = "->" +COLONCOLON = "::" + +# Additional symbols not defined in the lexical specification +AT = "@" +ELLIPSIS = "..." + +# Whitespace and comments +WS = /[ \t\r\n\x0C]+/ +COMMENT = /\/\*.*?\*\//m +LINE_COMMENT = /\/\/[^\r\n]*/ + +# Identifiers +IDENTIFIER = /[a-zA-Z$_][a-zA-Z\d$_]*/ + +# Fragment rules +ExponentPart = /[eE][+-]?\d+/ +EscapeSequence = /\\(?:u005c)?[btnfr"'\\]|\\u(?:[0-3]?[0-7])?[0-7]|\\u[0-9a-fA-F]{4}/ +HexDigits = /[0-9a-fA-F]([_0-9a-fA-F]*[0-9a-fA-F])?/ +HexDigit = /[0-9a-fA-F]/ +Digits = /\d([_\d]*\d)?/ +LetterOrDigit = /[a-zA-Z\d$_]/ +Letter = /[a-zA-Z$_]|[^[:ascii:]]/ + +class JavaLexer < MiniLexer + def initialize + super + end + + def tokenize(@input : String) : Array(Token) + super + end + + def tokenize_logic(@input : String) : Array(Token) + while @position < @input.size + skip_whitespace_and_comments + + case @input[@position] + when '0'..'9' + match_number + when 'a'..'z', 'A'..'Z', '$', '_' + match_identifier_or_keyword + when "'" + match_char_literal + when '"' + match_string_literal_or_text_block + else + match_symbol_or_operator + end + end + + @tokens + end + + def skip_whitespace_and_comments + if match = @input.match(/^([ \t\r\n\x0C]+)/, @position) + @position += match[0].size + elsif match = @input.match(/^\/\*.*?\*\//m, @position) + @position += match[0].size + elsif match = @input.match(/^\/\/[^\r\n]*/, @position) + @position += match[0].size + end + end + + + def match_number + if (match = @input.match(/(0[xX][0-9a-fA-F](_?[0-9a-fA-F])*[lL]?|\d(_?\d)*(\.\d(_?\d)*)?([eE][+-]?\d(_?\d)*)?[fFdD]?)/, @position)) + literal = match[0] + self << case literal + when /^0[xX]/ + Tuple.new(:HEX_LITERAL, literal) + when /^0/ + Tuple.new(:OCT_LITERAL, literal) + else # /^[\d.]/ + Tuple.new(:DECIMAL_LITERAL, literal) + end + + @position += literal.size + end + end + + def match_identifier_or_keyword + if match = @input.match(/[a-zA-Z$_][a-zA-Z\d$_]*/, @position) + type = case match[0] + when ABSTRACT then :ABSTRACT + when ASSERT then :ASSERT + when BOOLEAN then :BOOLEAN + when BREAK then :BREAK + when BYTE then :BYTE + when CASE then :CASE + when CATCH then :CATCH + when CHAR then :CHAR + when CLASS then :CLASS + when CONST then :CONST + when CONTINUE then :CONTINUE + when DEFAULT then :DEFAULT + when DO then :DO + when DOUBLE then :DOUBLE + when ELSE then :ELSE + when ENUM then :ENUM + when EXTENDS then :EXTENDS + when FINAL then :FINAL + when FINALLY then :FINALLY + when FLOAT then :FLOAT + when FOR then :FOR + when IF then :IF + when GOTO then :GOTO + when IMPLEMENTS then :IMPLEMENTS + when IMPORT then :IMPORT + when INSTANCEOF then :INSTANCEOF + when INT then :INT + when INTERFACE then :INTERFACE + when LONG then :LONG + when NATIVE then :NATIVE + when NEW then :NEW + when PACKAGE then :PACKAGE + when PRIVATE then :PRIVATE + when PROTECTED then :PROTECTED + when PUBLIC then :PUBLIC + when RETURN then :RETURN + when SHORT then :SHORT + when STATIC then :STATIC + when STRICTFP then :STRICTFP + when SUPER then :SUPER + when SWITCH then :SWITCH + when SYNCHRONIZED then :SYNCHRONIZED + when THIS then :THIS + when THROW then :THROW + when THROWS then :THROWS + when TRANSIENT then :TRANSIENT + when TRY then :TRY + when VOID then :VOID + when VOLATILE then :VOLATILE + when WHILE then :WHILE + when MODULE then :MODULE + when OPEN then :OPEN + when REQUIRES then :REQUIRES + when EXPORTS then :EXPORTS + when OPENS then :OPENS + when TO then :TO + when USES then :USES + when PROVIDES then :PROVIDES + when WITH then :WITH + when TRANSITIVE then :TRANSITIVE + when VAR then :VAR + when YIELD then :YIELD + when RECORD then :RECORD + when SEALED then :SEALED + when PERMITS then :PERMITS + when NON_SEALED then :NON_SEALED + else :IDENTIFIER + end + + self << Tuple.new(type, match[0]) + @position += match[0].size + end + end + + + def match_char_literal + if match = @input.match(/'([^'\\\r\n]|\\['"\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^'"\r\n])*'/, @position) + self << Tuple.new(:CHAR_LITERAL, match[0]) + @position += match[0].size + end + end + + def match_string_literal_or_text_block + if match = @input.match(/"""[ \t]*[\r\n](.|\\["\\bfnrt])*?[\r\n][ \t]*"""/, @position) + self << Tuple.new(:TEXT_BLOCK, match[0]) + @position += match[0].size + elsif match = @input.match(/"[^"\\\r\n]*(\\["\\bfnrt][^"\\\r\n]*)*"/, @position) + self << Tuple.new(:STRING_LITERAL, match[0]) + @position += match[0].size + end + end + + def match_symbol_or_operator + case @input[@position] + when '(' then self << Tuple.new(:LPAREN, "(") + when ')' then self << Tuple.new(:RPAREN, ")") + when ' ' then self << Tuple.new(:WHITESPACE, " ") + when '.' then self << Tuple.new(:DOT, ".") + when '@' then self << Tuple.new(:AT, "@") + when '{' then self << Tuple.new(:LBRACE, "{") + when '}' then self << Tuple.new(:RBRACE, "}") + when '\n' then + self << Tuple.new(:NEWLINE, "\n") + @line += 1 + else + self << Tuple.new(:IDENTIFIER, @input[@position].to_s) + end + @position += 1 + end +end \ No newline at end of file diff --git a/src/miniparsers/java.cr b/src/miniparsers/java.cr new file mode 100644 index 00000000..5899f9c2 --- /dev/null +++ b/src/miniparsers/java.cr @@ -0,0 +1,97 @@ +require "../minilexers/java" +require "../models/minilexer/token" + +class JavaParser + def initialize + @import_statements = Array(String).new + @classes_body_tokens = Array(Array(Token)).new + end + + def parse(tokens : Array(Token)) + parse_import_statements(tokens) + parse_classes_body(tokens) + end + + def parse_import_statements(tokens : Array(Token)) + import_statements = [] of String + import_tokens = tokens.select { |token| token.type == :IMPORT } + import_tokens.each do |import_token| + next_token_index = import_token.index + 2 + next_token = tokens[next_token_index] + + if next_token && next_token.type == :IDENTIFIER + import_statement = next_token.value + next_token_index += 1 + + while next_token_index < tokens.size && tokens[next_token_index].type == :DOT + next_token_index += 1 + identifier_token = tokens[next_token_index] + break if !identifier_token || identifier_token.type != :IDENTIFIER + + import_statement += ".#{identifier_token.value}" + next_token_index += 1 + end + + @import_statements << import_statement + end + end + end + + def parse_classes_body(tokens : Array(Token)) + start_token_parse = false + class_body = Array(Token).new + + lbrace = 0 + rbrace = 0 + tokens.each do |token| + if !start_token_parse && token.type == :AT + start_token_parse = true + class_body = Array(Token).new + lbrace = 0 + rbrace = 0 + end + + if start_token_parse + if token.type == :LBRACE + lbrace += 1 + elsif token.type == :RBRACE + rbrace += 1 + end + + class_body << token + if lbrace > 0 && lbrace == rbrace + print_tokens class_body + @classes_body_tokens << class_body + start_token_parse = false + end + end + end + end + + def parse_annotation_definitions(tokens : Array(Token)) + method_tokens = tokens.select { |token| token.type == :METHOD } + method_tokens.each do |method_token| + method_lines = method_token.value.split("\n") + method_header = method_lines[0].gsub("{", "").strip + method_body = method_lines[1..-2].join("\n").strip + puts "Method: #{method_header}" + puts "Body: #{method_body}" + end + end + + def print_tokens(tokens : Array(Token)) + puts "token size: #{tokens.size}" + tokens.each do |token| + print(token.value) + end + puts("\n=====================================") + end +end + +# file_path = "/Users/ksg/workspace/noir/spec/functional_test/fixtures/java_spring/src/ItemController.java" +# input = File.read(file_path) +# lexer = JavaLexer.new +# tokens = lexer.tokenize(input) +# lexer.trace +# parser = JavaParser.new +# parser.parse(tokens) \ No newline at end of file diff --git a/src/models/minilexer/minilexer.cr b/src/models/minilexer/minilexer.cr index 85e4028c..33023779 100644 --- a/src/models/minilexer/minilexer.cr +++ b/src/models/minilexer/minilexer.cr @@ -5,13 +5,20 @@ class MiniLexer def initialize @mode = :normal @tokens = [] of Token + @position = 0 + @input = "" + @line = 1 end def mode=(mode) @mode = mode end - def tokenize(input : String) : Array(Token) + def <<(t : Tuple(Symbol, String)) + @tokens << Token.new(t[0], t[1], @tokens.size, @position, @line) + end + + def tokenize(@input : String) : Array(Token) results = tokenize_logic(input) if @mode == :persistent @@ -21,7 +28,7 @@ class MiniLexer results end - def tokenize_logic(input : String) : Array(Token) + def tokenize_logic(@input : String) : Array(Token) results = [] of Token results end @@ -29,4 +36,18 @@ class MiniLexer def find(token_type : Symbol) : Array(Token) @tokens.select { |token| token.type == token_type } end + + def trace() + line_number = 1 + source_line = "" + lines = @input.split "\n" + puts "line size: #{lines.size}, token number: #{tokens.size}" + @tokens.each do |token| + if token.line == line_number + puts "\nLine #{line_number}: " + lines[line_number-1] + line_number += 1 + end + puts token.to_s + end + end end diff --git a/src/models/minilexer/token.cr b/src/models/minilexer/token.cr index 4bc55d1d..d80a32d9 100644 --- a/src/models/minilexer/token.cr +++ b/src/models/minilexer/token.cr @@ -1,9 +1,11 @@ class Token property type : Symbol property value : String - property position : Int64 + property index : Int32 + property position : Int32 + property line : Int32 - def initialize(@type, @value, @position) + def initialize(@type, @value, @index, @position, @line) end def is?(type) @@ -11,6 +13,9 @@ class Token end def to_s - "#{@type} #{@value}" + if @value == "\n" + @value = "\\n" + end + "#{@type} '#{@value}'" end end From 51fca9ed5ac5012c447b81ea6f2ddfcda83ba592 Mon Sep 17 00:00:00 2001 From: ksg Date: Sun, 25 Feb 2024 16:33:11 +0900 Subject: [PATCH 3/8] feat: Refactor code for improved parsing and printing - Refactor `@classes_body_tokens` variable to `@classes_tokens` in `java.cr` - Add `@class_annotation` variable for storing class annotations in `java.cr` - Modify `parse_classes` method in `java.cr` to also parse class annotations - Add `parse_formal_parameters` method for parsing formal parameters in `java.cr` - Add `parse_annotations` method for parsing annotations in `java.cr` - Modify `parse_methods` method in `java.cr` to correctly parse method declarations and bodies - Add imports for `java` lexer and parser in `analyzer_spring.cr` - Change variable name `has_class_been_imported` to `has_spring_web_bind_class_been_import` in `analyzer_spring.cr` - Refactor code in `analyzer_spring.cr` to use the `JavaLexer` and `JavaParser` classes - Modify `tokenize` method in `minilexer.cr` to pass line number obtained from `line()` method to `Token` constructor - Modify `<<` method in `minilexer.cr` to use the new `line()` method to calculate line number for each token --- .../java_spring/src/ItemController.java | 2 +- src/analyzer/analyzers/analyzer_spring.cr | 289 ++++++++---------- src/minilexers/java.cr | 69 ++++- src/miniparsers/java.cr | 262 ++++++++++++++-- src/models/minilexer/minilexer.cr | 23 +- src/models/minilexer/token.cr | 2 + src/output_builder/common.cr | 2 +- 7 files changed, 442 insertions(+), 207 deletions(-) diff --git a/spec/functional_test/fixtures/java_spring/src/ItemController.java b/spec/functional_test/fixtures/java_spring/src/ItemController.java index 92ec1ab2..136be38b 100644 --- a/spec/functional_test/fixtures/java_spring/src/ItemController.java +++ b/spec/functional_test/fixtures/java_spring/src/ItemController.java @@ -7,7 +7,7 @@ public class ItemController { @GetMapping("/{id}") - public Item getItem(@PathVariable Long id) { + public Item getItem(@PathVariable Long id) throws ItemNotFoundException { } @PostMapping diff --git a/src/analyzer/analyzers/analyzer_spring.cr b/src/analyzer/analyzers/analyzer_spring.cr index 4e53ad09..0c60368c 100644 --- a/src/analyzer/analyzers/analyzer_spring.cr +++ b/src/analyzer/analyzers/analyzer_spring.cr @@ -1,4 +1,6 @@ require "../../models/analyzer" +require "../../minilexers/java" +require "../../miniparsers/java" class AnalyzerSpring < Analyzer REGEX_CLASS_DEFINITION = /^(((public|private|protected|default)\s+)|^)class\s+/ @@ -15,84 +17,113 @@ class AnalyzerSpring < Analyzer if File.exists?(path) && (path.ends_with?(".java") || path.ends_with?(".kt")) content = File.read(path, encoding: "utf-8", invalid: :skip) - # Spring MVC - has_class_been_imported = false - content.each_line.with_index do |line, index| - details = Details.new(PathInfo.new(path, index + 1)) - if has_class_been_imported == false && REGEX_CLASS_DEFINITION.match(line) - has_class_been_imported = true + lexer = JavaLexer.new + tokens = lexer.tokenize(content) + parser = JavaParser.new + parser.parse(tokens) + has_spring_web_bind_class_been_import = false + parser.@import_statements.each do |import_statement| + if import_statement.includes? "org.springframework.web.bind.annotation." + has_spring_web_bind_class_been_import = true end - - if line.includes? "RequestMapping" - mapping_paths = mapping_to_path(line) - if has_class_been_imported == false && mapping_paths.size > 0 - class_mapping_url = mapping_paths[0] - - if class_mapping_url.ends_with?("/*") - class_mapping_url = class_mapping_url[0..-3] - end - if class_mapping_url.ends_with?("/") - class_mapping_url = class_mapping_url[0..-2] - end - - url = "#{class_mapping_url}" - else - mapping_paths.each do |mapping_path| - if line.includes? "RequestMethod" - define_requestmapping_handlers(["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS", "TRACE"]) + end + if has_spring_web_bind_class_been_import + # Spring Web + parser.@classes_tokens.each do |class_tokens| + class_annotations = parser.parse_annotations(tokens, class_tokens[0].index) + class_annotations.each do |class_annotation| + if class_annotation[1].value == "RequestMapping" + class_path_token = parser.parse_formal_parameters(tokens, class_annotation[1].index)[0][-1] + if class_path_token.type == :STRING_LITERAL + url = class_path_token.value[1..-2] + if url.ends_with? "*" + url = url[0..-2] + end + end + end + end + + parser.parse_methods(class_tokens).each do |method_tokens| + method_annotations = parser.parse_annotations(tokens, method_tokens[0].index) + method_annotations.each do |method_annotation_tokens| + if method_annotation_tokens[1].value == "RequestMapping" + annotation_parameters = parser.parse_formal_parameters(tokens, method_annotation_tokens[1].index) + + url_path = "" + line = method_annotation_tokens[1].line + if annotation_parameters.size != 0 + url_path = annotation_parameters[0][-1].value[1..-2] + if url.ends_with?("/") && url_path.starts_with?("/") + url_path = url_path[1..-1] + end + line = annotation_parameters[0][-1].line + end + parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index) + details = Details.new(PathInfo.new(path, line)) + + method_flag = false + if annotation_parameters.size > 1 + annotation_parameters.each do |annotation_parameter_tokens| + if annotation_parameter_tokens[0].value == "method" + method = annotation_parameter_tokens[-1].value + method_flag = true + @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) + break + end + end + end + + unless method_flag + ["GET", "POST", "PUT", "DELETE", "PATCH"].each do |method| + @result << Endpoint.new("#{url}#{url_path}", method, details) + end + end else - @result << Endpoint.new("#{url}#{mapping_path}", "GET", details) + ["GetMapping", "PostMapping", "PutMapping", "DeleteMapping", "PatchMapping"].each do |method_mapping| + if method_annotation_tokens[1].value == method_mapping + url_path = "" + line = method_annotation_tokens[1].line + method = method_mapping[0..-8].upcase + parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index) + + if tokens[method_annotation_tokens[1].index + 1].type == :LPAREN + annotation_parameters = parser.parse_formal_parameters(tokens, method_annotation_tokens[1].index) + method_path_token = annotation_parameters[0][-1] + if method_path_token.type == :STRING_LITERAL + url_path = method_path_token.value[1..-2] + if url.ends_with?("/") && url_path.starts_with?("/") + url_path = url_path[1..-1] + end + else + # error case + next + end + end + + details = Details.new(PathInfo.new(path, line)) + @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) + end + end end end end end - - if line.includes? "PostMapping" - mapping_paths = mapping_to_path(line) - mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "POST", details) - end - end - if line.includes? "PutMapping" - mapping_paths = mapping_to_path(line) - mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "PUT", details) - end - end - if line.includes? "DeleteMapping" - mapping_paths = mapping_to_path(line) - mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "DELETE", details) - end - end - if line.includes? "PatchMapping" - mapping_paths = mapping_to_path(line) - mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "PATCH", details) - end - end - if line.includes? "GetMapping" - mapping_paths = mapping_to_path(line) - mapping_paths.each do |mapping_path| - @result << Endpoint.new("#{url}#{mapping_path}", "GET", details) + else + # Reactive Router + content.scan(REGEX_ROUTER_CODE_BLOCK) do |route_code| + method_code = route_code[0] + method_code.scan(REGEX_ROUTE_CODE_LINE) do |match| + next if match.size != 4 + method = match[2] + endpoint = match[3].gsub(/\n/, "") + details = Details.new(PathInfo.new(path)) + @result << Endpoint.new("#{url}#{endpoint}", method, details) end end end - - # Reactive Router - content.scan(REGEX_ROUTER_CODE_BLOCK) do |route_code| - method_code = route_code[0] - method_code.scan(REGEX_ROUTE_CODE_LINE) do |match| - next if match.size != 4 - method = match[2] - endpoint = match[3].gsub(/\n/, "") - details = Details.new(PathInfo.new(path)) - @result << Endpoint.new("#{url}#{endpoint}", method, details) - end - end end end - rescue e + rescue e logger.debug e end Fiber.yield @@ -100,110 +131,40 @@ class AnalyzerSpring < Analyzer @result end - def mapping_to_path(line : String) - unless line.includes? "(" - # no path - return [""] - end - - paths = Array(String).new - splited_line = line.strip.split("(") - if splited_line.size > 1 && splited_line[1].includes? ")" - params = splited_line[1].split(")")[0] - params = params.gsub(/\s/, "") # remove space - if params.size > 0 - path = nil - # value parameter - if params.includes? "value=" - value = params.split("value=")[1] - if value.size > 0 - if value[0] == '"' - path = value.split("\"")[1] - elsif value[0] == '{' && value.includes? "}" - path = value[1..].split("}")[0] - end - end - end - - # first parameter - if path.nil? - if params[0] == '"' - path = params.split("\"")[1] - elsif params[0] == '{' && params.includes? "}" - path = params[1..].split("}")[0] + def get_endpoint_parameters(parser : JavaParser, tokens : Array(Token), method_token_index : Int32) : Array(Param) + endpoint_parameters = Array(Param).new + parser.parse_formal_parameters(tokens, method_token_index).each do |formal_parameter_tokens| + next if formal_parameter_tokens.size == 0 + + parameter_type = nil + if formal_parameter_tokens[-1].type == :IDENTIFIER + if formal_parameter_tokens[0].type == :AT + if formal_parameter_tokens[1].value == "PathVariable" + next + elsif formal_parameter_tokens[1].value == "RequestBody" + parameter_type = "form" + elsif formal_parameter_tokens[1].value == "RequestParam" + parameter_type = "query" + else + next # unknown parameter type end end - - # extract path - if path.nil? - # can't find path - paths << "" - else - if path.size > 0 && path[0] == '"' && path.includes? "," - # multiple path - path.split(",").each do |each_path| - if each_path.size > 0 - if each_path[0] == '"' - paths << each_path[1..-2] - else - paths << "" - end - end + + if !parameter_type.nil? + parameter_name = formal_parameter_tokens[-1].value # case of "@RequestParam String a" + if formal_parameter_tokens[-1].type != IDENTIFIER + if formal_parameter_tokens[2].type == :LPAREN && formal_parameter_tokens[3].type == :STRING_LITERAL + parameter_name_token = formal_parameter_tokens[3] # case of "@RequestParam("a") String a" + parameter_name = parameter_name_token.value[1..-2] end - else - # single path - if path.size > 0 && path[0] == '"' - path = path.split("\"")[1] - end - - paths << path end - end - else - # no path - paths << "" - end - end - # append slash - (0..paths.size - 1).each do |i| - path = paths[i] - if path.size > 0 && !path.starts_with? "/" - path = "/" + path - end - - paths[i] = path - end - - paths - end - - def is_bracket(content : String) - content.gsub(/\s/, "")[0].to_s == "{" - end - - def comma_in_bracket(content : String) - result = content.gsub(/\{(.*?)\}/) do |match| - match.gsub(",", "_BRACKET_COMMA_") + endpoint_parameters << Param.new(parameter_name, "", parameter_type) + end + end end - result.gsub("{", "").gsub("}", "") - end - - def extract_param(content : String) - # TODO - # case1 -> @RequestParam("a") - # case2 -> String a = param.get("a"); - # case3 -> String a = request.getParameter("a"); - # case4 -> (PATH) @PathVariable("a") - end - - macro define_requestmapping_handlers(methods) - {% for method, index in methods %} - if line.includes? "RequestMethod.{{method.id}}" - @result << Endpoint.new("#{url}#{mapping_path}", "{{method.id}}") - end - {% end %} + endpoint_parameters end end diff --git a/src/minilexers/java.cr b/src/minilexers/java.cr index 5535549d..2880bf7d 100644 --- a/src/minilexers/java.cr +++ b/src/minilexers/java.cr @@ -166,7 +166,7 @@ Letter = /[a-zA-Z$_]|[^[:ascii:]]/ class JavaLexer < MiniLexer def initialize - super + super end def tokenize(@input : String) : Array(Token) @@ -175,36 +175,61 @@ class JavaLexer < MiniLexer def tokenize_logic(@input : String) : Array(Token) while @position < @input.size - skip_whitespace_and_comments - + before_skip_position = -1 + while before_skip_position < @position + before_skip_position = @position + skip_whitespace_and_comments + end + case @input[@position] when '0'..'9' + t = 1 match_number when 'a'..'z', 'A'..'Z', '$', '_' + t = 2 match_identifier_or_keyword - when "'" + when '\'' + t = 3 match_char_literal when '"' + t = 4 match_string_literal_or_text_block else + t = 5 match_symbol_or_operator end end - + @tokens end def skip_whitespace_and_comments - if match = @input.match(/^([ \t\r\n\x0C]+)/, @position) - @position += match[0].size - elsif match = @input.match(/^\/\*.*?\*\//m, @position) - @position += match[0].size - elsif match = @input.match(/^\/\/[^\r\n]*/, @position) - @position += match[0].size + c = @input[@position] + if c == '\r' || c == '\t' + @position += 1 + elsif @position != @input.size - 1 + if c == '/' && @input[@position + 1] == '*' + @position += 2 + while @position < @input.size + if @input[@position] == '*' && @input[@position + 1] == '/' + @position += 2 + break + end + @position += 1 + end + elsif c == '/' && @input[@position + 1] == '/' + @position += 2 + while @position < @input.size + if @input[@position] == '\n' + @position += 1 + break + end + @position += 1 + end + end end end - def match_number if (match = @input.match(/(0[xX][0-9a-fA-F](_?[0-9a-fA-F])*[lL]?|\d(_?\d)*(\.\d(_?\d)*)?([eE][+-]?\d(_?\d)*)?[fFdD]?)/, @position)) literal = match[0] @@ -216,7 +241,7 @@ class JavaLexer < MiniLexer else # /^[\d.]/ Tuple.new(:DECIMAL_LITERAL, literal) end - + @position += literal.size end end @@ -295,14 +320,21 @@ class JavaLexer < MiniLexer self << Tuple.new(type, match[0]) @position += match[0].size + else + self << Tuple.new(:IDENTIFIER, @input[@position].to_s) + @position += 1 end end def match_char_literal - if match = @input.match(/'([^'\\\r\n]|\\['"\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^'"\r\n])*'/, @position) + if match = @input.match(/'([^'\\\r\n]|\\['"\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^'\r\n])*'/, @position) self << Tuple.new(:CHAR_LITERAL, match[0]) @position += match[0].size + else + # impossible to reach here + self << Tuple.new(:IDENTIFIER, @input[@position].to_s) + @position += 1 end end @@ -312,7 +344,11 @@ class JavaLexer < MiniLexer @position += match[0].size elsif match = @input.match(/"[^"\\\r\n]*(\\["\\bfnrt][^"\\\r\n]*)*"/, @position) self << Tuple.new(:STRING_LITERAL, match[0]) - @position += match[0].size + @position += match[0].size + else + # impossible to reach here + self << Tuple.new(:IDENTIFIER, @input[@position].to_s) + @position += 1 end end @@ -322,12 +358,13 @@ class JavaLexer < MiniLexer when ')' then self << Tuple.new(:RPAREN, ")") when ' ' then self << Tuple.new(:WHITESPACE, " ") when '.' then self << Tuple.new(:DOT, ".") + when ',' then self << Tuple.new(:COMMA, ",") when '@' then self << Tuple.new(:AT, "@") when '{' then self << Tuple.new(:LBRACE, "{") when '}' then self << Tuple.new(:RBRACE, "}") + when '\t' then self << Tuple.new(:TAB, "\t") when '\n' then self << Tuple.new(:NEWLINE, "\n") - @line += 1 else self << Tuple.new(:IDENTIFIER, @input[@position].to_s) end diff --git a/src/miniparsers/java.cr b/src/miniparsers/java.cr index 5899f9c2..843c5c9a 100644 --- a/src/miniparsers/java.cr +++ b/src/miniparsers/java.cr @@ -4,12 +4,13 @@ require "../models/minilexer/token" class JavaParser def initialize @import_statements = Array(String).new - @classes_body_tokens = Array(Array(Token)).new + @classes_tokens = Array(Array(Token)).new + @class_annotation = Array(Token).new end def parse(tokens : Array(Token)) parse_import_statements(tokens) - parse_classes_body(tokens) + parse_classes(tokens) end def parse_import_statements(tokens : Array(Token)) @@ -37,18 +38,95 @@ class JavaParser end end - def parse_classes_body(tokens : Array(Token)) + def parse_formal_parameters(tokens : Array(Token), cursor : Int32) + lparen_count = 0 + rparan_count = 0 + parameters = Array(Array(Token)).new + parameter_token = Array(Token).new + while cursor < tokens.size + token = tokens[cursor] + if token.type == :LPAREN + lparen_count += 1 + elsif token.type == :COMMA + parameters << parameter_token + parameter_token = Array(Token).new + elsif lparen_count > 0 + if token.type == :RPAREN + rparan_count += 1 + if lparen_count == rparan_count + parameters << parameter_token + break + end + else + unless token.type == :WHITESPACE || token.type == :TAB || token.type == :NEWLINE + parameter_token << token + end + end + end + + cursor += 1 + end + + parameters + end + + def parse_annotations(tokens : Array(Token), declare_token_index : Int32) + skip_line = 0 + annotation_tokens = Array(Array(Token)).new + + cursor = declare_token_index - 1 + annotation_token_last_index = -1 + last_newline_index = -1 + while cursor > 0 + token = tokens[cursor] + + if tokens[cursor].type == :NEWLINE + skip_line += 1 + if skip_line == 1 + last_newline_index = cursor + end + end + + if skip_line == 2 + # :NEWLINE(cursor) @RequestMapping + # :NEWLINE public class Controller(type param) + annotation_token_index = cursor + 1 + starts_with_at = while annotation_token_index < last_newline_index + if tokens[annotation_token_index].type == :AT + break true + elsif tokens[annotation_token_index].type == :WHITESPACE || tokens[annotation_token_index].type == :TAB || tokens[annotation_token_index].type == :WHITESPACE + annotation_token_index += 1 + next + else + break false + end + end + + if starts_with_at + annotation_tokens << tokens[annotation_token_index..last_newline_index-1] + skip_line = 1 + last_newline_index = cursor + else + break + end + end + + cursor -= 1 + end + + return annotation_tokens + end + + def parse_classes(tokens : Array(Token)) start_token_parse = false class_body = Array(Token).new - lbrace = 0 - rbrace = 0 + lbrace = rbrace = 0 tokens.each do |token| - if !start_token_parse && token.type == :AT + if !start_token_parse && token.type == :CLASS && tokens[token.index+1].type == :WHITESPACE start_token_parse = true class_body = Array(Token).new - lbrace = 0 - rbrace = 0 + lbrace = rbrace = 0 end if start_token_parse @@ -59,32 +137,170 @@ class JavaParser end class_body << token - if lbrace > 0 && lbrace == rbrace - print_tokens class_body - @classes_body_tokens << class_body + if lbrace > 0 && lbrace == rbrace + @classes_tokens << class_body start_token_parse = false end end end end - def parse_annotation_definitions(tokens : Array(Token)) - method_tokens = tokens.select { |token| token.type == :METHOD } - method_tokens.each do |method_token| - method_lines = method_token.value.split("\n") - method_header = method_lines[0].gsub("{", "").strip - method_body = method_lines[1..-2].join("\n").strip - puts "Method: #{method_header}" - puts "Body: #{method_body}" + def parse_methods(class_body_tokens : Array(Token)) + # 1. Skip first line (class declaration) + # 2. Search ":RPAREN :LBRACE" or ":RPAREN throws :IDENTIFIER :LBRACE" pattern (method body entry point) + # 3. Get method declaration from ":NEWLINE" to ":RPAREN" (method declaration) + # 4. Get method body from ":LBRACE" to ":RBRACE" (method body) + # 5. Repeat 2-4 until end of class body + methods = Array(Array(Token)).new + method_tokens = Array(Token).new + + lbrace_count = rbrace_count = 0 + lparen_count = rparen_count = 0 + + enter_class_body = false + enter_method_body = false + class_body_tokens.each_index do |index| + token = class_body_tokens[index] + if token.type == :NEWLINE && !enter_class_body + # 1. Skip first line (class declaration) + enter_class_body = true + elsif enter_class_body && !enter_method_body + lbrace_count = rbrace_count = 0 + lparen_count = rparen_count = 0 + if token.type == :LBRACE + # 2. Search ":RPAREN :LBRACE" or ":RPAREN throws :IDENTIFIER :LBRACE" pattern (method body entry point) + lbrace_count = 1 + rbrace_count = 0 + lparen_count = rparen_count = 0 + + previous_token_index = index - 1 + has_exception = false + while 0 < previous_token_index + previous_token = class_body_tokens[previous_token_index] + if previous_token.type == :RPAREN + rparen_count = 1 + enter_method_body = true + # 3. Get method declaration from ":NEWLINE" to ":RPAREN" (method declaration) + method_declaration_index = previous_token_index - 1 + while 0 < method_declaration_index + method_declaration_token = class_body_tokens[method_declaration_index] + if method_declaration_token.type == :RPAREN + rparen_count += 1 + elsif method_declaration_token.type == :LPAREN + lparen_count += 1 + elsif rparen_count == lparen_count && method_declaration_token.type == :NEWLINE + method_tokens = class_body_tokens[method_declaration_index+1..index] + break + end + method_declaration_index -= 1 + end + + break + elsif previous_token.type == :WHITESPACE || previous_token.type == :TAB || previous_token.type == :NEWLINE + previous_token_index -= 1 + next + elsif has_exception + break unless previous_token.type == :THROWS && previous_token.value == "throws" + elsif previous_token.type == :IDENTIFIER + has_exception = true + else + break + end + + previous_token_index -= 1 + end + end + elsif enter_method_body + # 4. Get method body from ":LBRACE" to ":RBRACE" (method body) + method_tokens << token + if token.type == :RBRACE + rbrace_count += 1 + if lbrace_count == rbrace_count + methods << method_tokens + method_tokens = Array(Token).new + enter_method_body = false + end + elsif token.type == :LBRACE + lbrace_count += 1 + end + end + end + + methods + end + + def parse_methods22(class_body_tokens : Array(Token)) + methods = Array(Array(Token)).new + method_tokens = Array(Token).new + + lbrace_count = rbrace_count = 0 + lparen_count = rparen_count = 0 + + method_sequence = 0 + enter_class_body = false + class_body_tokens.each_index do |index| + token = class_body_tokens[index] + if enter_class_body + if method_sequence != 2 + if token.type == :RPAREN && method_sequence == 0 + method_sequence = 1 + elsif token.type == :LBRACE && method_sequence == 1 + method_sequence = 2 + lbrace_count = 1 + rbrace_count = lparen_count = rparen_count = 0 + + previous_index = index - 1 + while 0 < previous_index + previous_token = class_body_tokens[previous_index] + if previous_token.type == :LPAREN + lparen_count += 1 + elsif previous_token.type == :RPAREN + rparen_count += 1 + end + if lparen_count == rparen_count && previous_token.type == :NEWLINE + break + end + previous_index -= 1 + end + + method_tokens = class_body_tokens[previous_index+1..index] + elsif token.type == :WHITESPACE || token.type == :TAB || token.type == :NEWLINE + next + else + method_tokens.clear + method_sequence = 0 + end + else + if token.type == :LBRACE + lbrace_count += 1 + elsif token.type == :RBRACE + rbrace_count += 1 + end + + method_tokens << token + if lbrace_count == rbrace_count + methods << method_tokens + method_sequence = 0 + method_tokens = Array(Token).new + end + end + elsif token.type == :NEWLINE + enter_class_body = true + end end + + methods end - def print_tokens(tokens : Array(Token)) - puts "token size: #{tokens.size}" + def print_tokens(tokens : Array(Token), id = "default") + puts("================ #{id} ===================") tokens.each do |token| print(token.value) + if id == "error" + print("(#{token.type})") + end end - puts("\n=====================================") + puts("\n===========================================") end end @@ -94,4 +310,4 @@ end # tokens = lexer.tokenize(input) # lexer.trace # parser = JavaParser.new -# parser.parse(tokens) \ No newline at end of file +# parser.parse(tokens) diff --git a/src/models/minilexer/minilexer.cr b/src/models/minilexer/minilexer.cr index 33023779..603e9acd 100644 --- a/src/models/minilexer/minilexer.cr +++ b/src/models/minilexer/minilexer.cr @@ -7,15 +7,34 @@ class MiniLexer @tokens = [] of Token @position = 0 @input = "" - @line = 1 + @pos_line_array = Array(Tuple(Int32, Int32)).new end def mode=(mode) @mode = mode end + def line() : Int + pos_index = 0 + line_index = 1 + i = @pos_line_array.size - 1 + while 0 < i + pos = @pos_line_array[i][pos_index] + line = @pos_line_array[i][line_index] + if pos < @position + return line + @input[pos+1..@position].count("\n") + end + i -= 1 + end + + line = @input[0..@position].count("\n") + 1 + @pos_line_array << Tuple.new(@position, line) + + line + end + def <<(t : Tuple(Symbol, String)) - @tokens << Token.new(t[0], t[1], @tokens.size, @position, @line) + @tokens << Token.new(t[0], t[1], @tokens.size, @position, line()) end def tokenize(@input : String) : Array(Token) diff --git a/src/models/minilexer/token.cr b/src/models/minilexer/token.cr index d80a32d9..3cf790bf 100644 --- a/src/models/minilexer/token.cr +++ b/src/models/minilexer/token.cr @@ -15,6 +15,8 @@ class Token def to_s if @value == "\n" @value = "\\n" + elsif @value == "\t" + @value = "\\t" end "#{@type} '#{@value}'" end diff --git a/src/output_builder/common.cr b/src/output_builder/common.cr index 37c08c76..a0ee7475 100644 --- a/src/output_builder/common.cr +++ b/src/output_builder/common.cr @@ -38,7 +38,7 @@ class OutputBuilderCommon < OutputBuilder if code_path.line.nil? r_buffer += "\n ○ file: #{code_path.path}" else - r_buffer += "\n ○ file: #{code_path.path}##{code_path.line}" + r_buffer += "\n ○ file: #{code_path.path} (line #{code_path.line})" end end end From 359fad8c54974008edbef2ccc4fd349fec8cbcb4 Mon Sep 17 00:00:00 2001 From: ksg Date: Sun, 25 Feb 2024 23:33:33 +0900 Subject: [PATCH 4/8] feat: Refactor JavaParser class and Spring MVC analyzer - Modify `JavaParser` class for improved handling of `{}` characters in `parse_formal_parameters` method - Update `parse_classes` method to correctly parse class bodies - Implement `parse_methods` method to handle parsing of methods within the class body - Refactor `analyzers/analyzer_spring.cr` by removing unnecessary `begin` and `rescue` blocks in `analyze` method - Rename `has_spring_web_bind_class_been_import` to `has_spring_web_bind_package_been_import` - Add parsing of base URL, methods, annotations, and parameters in the Spring MVC class - Improve handling of methods without `@RequestMapping` annotation - Remove unused code and variables related to the `Reactive Router` - Add `get_mapping_path` method for handling parsing of path parameters in method annotations - Modify logic for retrieving endpoint parameters in the Spring MVC class --- src/analyzer/analyzers/analyzer_spring.cr | 238 +++++++++++++--------- src/miniparsers/java.cr | 88 ++------ 2 files changed, 161 insertions(+), 165 deletions(-) diff --git a/src/analyzer/analyzers/analyzer_spring.cr b/src/analyzer/analyzers/analyzer_spring.cr index 0c60368c..99c3fa8e 100644 --- a/src/analyzer/analyzers/analyzer_spring.cr +++ b/src/analyzer/analyzers/analyzer_spring.cr @@ -9,98 +9,102 @@ class AnalyzerSpring < Analyzer def analyze # Source Analysis - begin - Dir.glob("#{@base_path}/**/*") do |path| - next if File.directory?(path) - - url = "" - if File.exists?(path) && (path.ends_with?(".java") || path.ends_with?(".kt")) - content = File.read(path, encoding: "utf-8", invalid: :skip) - - lexer = JavaLexer.new - tokens = lexer.tokenize(content) - parser = JavaParser.new - parser.parse(tokens) - has_spring_web_bind_class_been_import = false - parser.@import_statements.each do |import_statement| - if import_statement.includes? "org.springframework.web.bind.annotation." - has_spring_web_bind_class_been_import = true - end + Dir.glob("#{@base_path}/**/*") do |path| + next if File.directory?(path) + + url = "" + if File.exists?(path) && (path.ends_with?(".java") || path.ends_with?(".kt")) + content = File.read(path, encoding: "utf-8", invalid: :skip) + + lexer = JavaLexer.new + tokens = lexer.tokenize(content) + parser = JavaParser.new + parser.parse(tokens) + has_spring_web_bind_package_been_import = false + parser.@import_statements.each do |import_statement| + if import_statement.includes? "org.springframework.web.bind.annotation." + has_spring_web_bind_package_been_import = true end - if has_spring_web_bind_class_been_import - # Spring Web - parser.@classes_tokens.each do |class_tokens| - class_annotations = parser.parse_annotations(tokens, class_tokens[0].index) - class_annotations.each do |class_annotation| - if class_annotation[1].value == "RequestMapping" - class_path_token = parser.parse_formal_parameters(tokens, class_annotation[1].index)[0][-1] - if class_path_token.type == :STRING_LITERAL - url = class_path_token.value[1..-2] - if url.ends_with? "*" - url = url[0..-2] - end + end + + # Spring MVC Router (Controller) + if has_spring_web_bind_package_been_import + parser.@classes_tokens.each do |class_tokens| + # Parse the base url of the class + class_annotations = parser.parse_annotations(tokens, class_tokens[0].index) + class_annotations.each do |class_annotation| + if class_annotation[1].value == "RequestMapping" + class_path_token = parser.parse_formal_parameters(tokens, class_annotation[1].index)[0][-1] + if class_path_token.type == :STRING_LITERAL + url = class_path_token.value[1..-2] + if url.ends_with? "*" + url = url[0..-2] end - end + end end - - parser.parse_methods(class_tokens).each do |method_tokens| - method_annotations = parser.parse_annotations(tokens, method_tokens[0].index) - method_annotations.each do |method_annotation_tokens| - if method_annotation_tokens[1].value == "RequestMapping" - annotation_parameters = parser.parse_formal_parameters(tokens, method_annotation_tokens[1].index) - - url_path = "" - line = method_annotation_tokens[1].line - if annotation_parameters.size != 0 - url_path = annotation_parameters[0][-1].value[1..-2] - if url.ends_with?("/") && url_path.starts_with?("/") - url_path = url_path[1..-1] - end - line = annotation_parameters[0][-1].line - end - parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index) - details = Details.new(PathInfo.new(path, line)) - - method_flag = false - if annotation_parameters.size > 1 - annotation_parameters.each do |annotation_parameter_tokens| - if annotation_parameter_tokens[0].value == "method" - method = annotation_parameter_tokens[-1].value - method_flag = true - @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) - break + end + + # Parse the methods of the class + parser.parse_methods(class_tokens).each do |method_tokens| + # Parse the method annotations + method_annotations = parser.parse_annotations(tokens, method_tokens[0].index) + method_annotations.each do |method_annotation_tokens| + url_paths = Array(String).new + annotation_name_token = method_annotation_tokens[1] + # If the method is annotated with @RequestMapping + if annotation_name_token.value == "RequestMapping" + if tokens[annotation_name_token.index + 1].type == :LPAREN + url_paths = get_mapping_path(parser, tokens, method_annotation_tokens[1].index) + else + url_paths = [""] + end + + line = annotation_name_token.line + parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index) + details = Details.new(PathInfo.new(path, line)) + + # Parse the method parameter (method = "GET" or "POST" or "PUT" or "DELETE" or "PATCH") + method_flag = false + annotation_parameters = parser.parse_formal_parameters(tokens, annotation_name_token.index) + annotation_parameters.each do |annotation_parameter_tokens| + if annotation_parameter_tokens.size > 2 + if annotation_parameter_tokens[0].value == "method" + method = annotation_parameter_tokens[-1].value + method_flag = true + url_paths.each do |url_path| + @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) end + break end end + end - unless method_flag - ["GET", "POST", "PUT", "DELETE", "PATCH"].each do |method| - @result << Endpoint.new("#{url}#{url_path}", method, details) + # If the method is not annotated with @RequestMapping, then 5 methods are allowed + unless method_flag + ["GET", "POST", "PUT", "DELETE", "PATCH"].each do |method| + url_paths.each do |url_path| + @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) end end - else - ["GetMapping", "PostMapping", "PutMapping", "DeleteMapping", "PatchMapping"].each do |method_mapping| - if method_annotation_tokens[1].value == method_mapping - url_path = "" - line = method_annotation_tokens[1].line - method = method_mapping[0..-8].upcase - parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index) - - if tokens[method_annotation_tokens[1].index + 1].type == :LPAREN - annotation_parameters = parser.parse_formal_parameters(tokens, method_annotation_tokens[1].index) - method_path_token = annotation_parameters[0][-1] - if method_path_token.type == :STRING_LITERAL - url_path = method_path_token.value[1..-2] - if url.ends_with?("/") && url_path.starts_with?("/") - url_path = url_path[1..-1] - end - else - # error case - next - end - end + end + else + # If the method is annotated with @GetMapping, @PostMapping, @PutMapping, @DeleteMapping, @PatchMapping + ["GetMapping", "PostMapping", "PutMapping", "DeleteMapping", "PatchMapping"].each do |method_mapping| + if annotation_name_token.value == method_mapping + line = annotation_name_token.line + method = method_mapping[0..-8].upcase + parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index) + + # Parse the path paremeter + if tokens[annotation_name_token.index + 1].type == :LPAREN + url_paths = get_mapping_path(parser, tokens, annotation_name_token.index) + else + # If the path parameter is not specified, then the path is "" + url_paths = [""] + end - details = Details.new(PathInfo.new(path, line)) + details = Details.new(PathInfo.new(path, line)) + url_paths.each do |url_path| @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) end end @@ -108,29 +112,71 @@ class AnalyzerSpring < Analyzer end end end - else - # Reactive Router - content.scan(REGEX_ROUTER_CODE_BLOCK) do |route_code| - method_code = route_code[0] - method_code.scan(REGEX_ROUTE_CODE_LINE) do |match| - next if match.size != 4 - method = match[2] - endpoint = match[3].gsub(/\n/, "") - details = Details.new(PathInfo.new(path)) - @result << Endpoint.new("#{url}#{endpoint}", method, details) - end + end + else + # Reactive Router + content.scan(REGEX_ROUTER_CODE_BLOCK) do |route_code| + method_code = route_code[0] + method_code.scan(REGEX_ROUTE_CODE_LINE) do |match| + next if match.size != 4 + method = match[2] + endpoint = match[3].gsub(/\n/, "") + details = Details.new(PathInfo.new(path)) + @result << Endpoint.new("#{url}#{endpoint}", method, details) end end end end - rescue e - logger.debug e end Fiber.yield @result end + def get_mapping_path(parser : JavaParser, tokens : Array(Token), mapping_token_index : Int32) + # 1. Search for the value of the @xxxxxMapping annotation + # 2. If the value is a string literal, return it + # 3. If the value is an array, return each element + # 4. Other case return empty array + url_paths = Array(String).new + mapping_parameters = parser.parse_formal_parameters(tokens, mapping_token_index) + if mapping_parameters[0].size != 0 + path_argument_index = 0 + mapping_parameters.each_with_index do |mapping_parameter, index| + if mapping_parameter[0].type == :IDENTIFIER && mapping_parameter[0].value == "value" + path_argument_index = index + end + end + + path_parameter_tokens = mapping_parameters[path_argument_index] + if path_parameter_tokens[-1].type == :STRING_LITERAL + # @GetMapping("/abc") or @GetMapping(value = "/abc") + url_paths << path_parameter_tokens[-1].value[1..-2] + elsif path_parameter_tokens[-1].type == :RBRACE + # @GetMapping({"/abc", "/def"}) or @GetMapping(value = {"/abc", "/def"}) + i = path_parameter_tokens.size - 2 + while i > 0 + parameter_token = path_parameter_tokens[i] + if parameter_token.type == :LBRACE + break + elsif parameter_token.type == :COMMA + i -= 1 + next + elsif parameter_token.type == :STRING_LITERAL + url_paths << parameter_token.value[1..-2] + else + puts parameter_token.to_s + break + end + + i -= 1 + end + end + end + + url_paths + end + def get_endpoint_parameters(parser : JavaParser, tokens : Array(Token), method_token_index : Int32) : Array(Param) endpoint_parameters = Array(Param).new parser.parse_formal_parameters(tokens, method_token_index).each do |formal_parameter_tokens| @@ -160,7 +206,7 @@ class AnalyzerSpring < Analyzer end endpoint_parameters << Param.new(parameter_name, "", parameter_type) - end + end end end diff --git a/src/miniparsers/java.cr b/src/miniparsers/java.cr index 843c5c9a..f2f12861 100644 --- a/src/miniparsers/java.cr +++ b/src/miniparsers/java.cr @@ -40,22 +40,35 @@ class JavaParser def parse_formal_parameters(tokens : Array(Token), cursor : Int32) lparen_count = 0 - rparan_count = 0 + rparen_count = 0 + lbrace_count = 0 + rbrace_count = 0 parameters = Array(Array(Token)).new parameter_token = Array(Token).new while cursor < tokens.size token = tokens[cursor] if token.type == :LPAREN lparen_count += 1 - elsif token.type == :COMMA + if lparen_count > 1 + parameter_token << token + end + elsif token.type == :LBRACE + lbrace_count += 1 + parameter_token << token + elsif token.type == :RBRACE + rbrace_count += 1 + parameter_token << token + elsif lbrace_count == rbrace_count && lparen_count - 1 == rparen_count && token.type == :COMMA parameters << parameter_token parameter_token = Array(Token).new elsif lparen_count > 0 if token.type == :RPAREN - rparan_count += 1 - if lparen_count == rparan_count + rparen_count += 1 + if lparen_count == rparen_count parameters << parameter_token break + else + parameter_token << token end else unless token.type == :WHITESPACE || token.type == :TAB || token.type == :NEWLINE @@ -228,79 +241,16 @@ class JavaParser methods end - - def parse_methods22(class_body_tokens : Array(Token)) - methods = Array(Array(Token)).new - method_tokens = Array(Token).new - - lbrace_count = rbrace_count = 0 - lparen_count = rparen_count = 0 - - method_sequence = 0 - enter_class_body = false - class_body_tokens.each_index do |index| - token = class_body_tokens[index] - if enter_class_body - if method_sequence != 2 - if token.type == :RPAREN && method_sequence == 0 - method_sequence = 1 - elsif token.type == :LBRACE && method_sequence == 1 - method_sequence = 2 - lbrace_count = 1 - rbrace_count = lparen_count = rparen_count = 0 - - previous_index = index - 1 - while 0 < previous_index - previous_token = class_body_tokens[previous_index] - if previous_token.type == :LPAREN - lparen_count += 1 - elsif previous_token.type == :RPAREN - rparen_count += 1 - end - if lparen_count == rparen_count && previous_token.type == :NEWLINE - break - end - previous_index -= 1 - end - - method_tokens = class_body_tokens[previous_index+1..index] - elsif token.type == :WHITESPACE || token.type == :TAB || token.type == :NEWLINE - next - else - method_tokens.clear - method_sequence = 0 - end - else - if token.type == :LBRACE - lbrace_count += 1 - elsif token.type == :RBRACE - rbrace_count += 1 - end - - method_tokens << token - if lbrace_count == rbrace_count - methods << method_tokens - method_sequence = 0 - method_tokens = Array(Token).new - end - end - elsif token.type == :NEWLINE - enter_class_body = true - end - end - - methods - end def print_tokens(tokens : Array(Token), id = "default") - puts("================ #{id} ===================") + puts("\n================ #{id} ===================") tokens.each do |token| print(token.value) if id == "error" print("(#{token.type})") end end - puts("\n===========================================") + puts end end From dba97f66840be172d0a0adc898ed86441c54a864 Mon Sep 17 00:00:00 2001 From: ksg Date: Sat, 2 Mar 2024 16:29:50 +0900 Subject: [PATCH 5/8] feat: Refactor and improve Java and Spring code handling - Made modifications to the `java.cr` file to improve logic in tokenization and matching of literals, identifiers, and operators - Added new methods and updated existing methods in the `java.cr` file to support class parsing and model creation - Updated the `AnalyzerSpring` class to handle parsing of Spring MVC and Reactive Router code, including endpoints, annotations, and parameters - Added a `.gitignore` file to the `kotlin_spring` fixtures and included `.gradle` in it - Added a `.gitignore` file to the `java_spring` fixtures and added `.gradle` to the ignore list --- .../fixtures/java_spring/.gitignore | 1 + .../fixtures/kotlin_spring/.gitignore | 1 + src/analyzer/analyzers/analyzer_spring.cr | 150 +++++++++++------- src/minilexers/java.cr | 14 +- src/miniparsers/java.cr | 29 +++- 5 files changed, 133 insertions(+), 62 deletions(-) create mode 100644 spec/functional_test/fixtures/java_spring/.gitignore create mode 100644 spec/functional_test/fixtures/kotlin_spring/.gitignore diff --git a/spec/functional_test/fixtures/java_spring/.gitignore b/spec/functional_test/fixtures/java_spring/.gitignore new file mode 100644 index 00000000..08a55c09 --- /dev/null +++ b/spec/functional_test/fixtures/java_spring/.gitignore @@ -0,0 +1 @@ +.gradle diff --git a/spec/functional_test/fixtures/kotlin_spring/.gitignore b/spec/functional_test/fixtures/kotlin_spring/.gitignore new file mode 100644 index 00000000..08a55c09 --- /dev/null +++ b/spec/functional_test/fixtures/kotlin_spring/.gitignore @@ -0,0 +1 @@ +.gradle diff --git a/src/analyzer/analyzers/analyzer_spring.cr b/src/analyzer/analyzers/analyzer_spring.cr index 99c3fa8e..ec6364b6 100644 --- a/src/analyzer/analyzers/analyzer_spring.cr +++ b/src/analyzer/analyzers/analyzer_spring.cr @@ -13,7 +13,7 @@ class AnalyzerSpring < Analyzer next if File.directory?(path) url = "" - if File.exists?(path) && (path.ends_with?(".java") || path.ends_with?(".kt")) + if File.exists?(path) && (path.ends_with?(".java")) content = File.read(path, encoding: "utf-8", invalid: :skip) lexer = JavaLexer.new @@ -29,8 +29,9 @@ class AnalyzerSpring < Analyzer # Spring MVC Router (Controller) if has_spring_web_bind_package_been_import - parser.@classes_tokens.each do |class_tokens| + parser.@classes.each do |class_model| # Parse the base url of the class + class_tokens = class_model.@tokens class_annotations = parser.parse_annotations(tokens, class_tokens[0].index) class_annotations.each do |class_annotation| if class_annotation[1].value == "RequestMapping" @@ -45,67 +46,79 @@ class AnalyzerSpring < Analyzer end # Parse the methods of the class - parser.parse_methods(class_tokens).each do |method_tokens| + class_model.@methods.each do |method_tokens| # Parse the method annotations method_annotations = parser.parse_annotations(tokens, method_tokens[0].index) - method_annotations.each do |method_annotation_tokens| + method_annotations.each do |method_annotation_tokens| # multiline annotations url_paths = Array(String).new annotation_name_token = method_annotation_tokens[1] - # If the method is annotated with @RequestMapping - if annotation_name_token.value == "RequestMapping" - if tokens[annotation_name_token.index + 1].type == :LPAREN - url_paths = get_mapping_path(parser, tokens, method_annotation_tokens[1].index) - else - url_paths = [""] - end - - line = annotation_name_token.line - parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index) - details = Details.new(PathInfo.new(path, line)) - # Parse the method parameter (method = "GET" or "POST" or "PUT" or "DELETE" or "PATCH") - method_flag = false + # Spring MVC Decorator + if annotation_name_token.value.ends_with? "Mapping" + method = nil + parameter_type = nil + default_value = nil annotation_parameters = parser.parse_formal_parameters(tokens, annotation_name_token.index) annotation_parameters.each do |annotation_parameter_tokens| if annotation_parameter_tokens.size > 2 - if annotation_parameter_tokens[0].value == "method" - method = annotation_parameter_tokens[-1].value - method_flag = true - url_paths.each do |url_path| - @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) + annotation_parameter_key = annotation_parameter_tokens[0].value + annotation_parameter_value = annotation_parameter_tokens[-1].value + if annotation_parameter_key == "method" + method = annotation_parameter_value + elsif annotation_parameter_key == "consumes" + if annotation_parameter_value.ends_with? "APPLICATION_FORM_URLENCODED_VALUE" + parameter_type = "form" + elsif annotation_parameter_value.ends_with?("APPLICATION_JSON_VALUE") + parameter_type = "json" end - break end end end - # If the method is not annotated with @RequestMapping, then 5 methods are allowed - unless method_flag - ["GET", "POST", "PUT", "DELETE", "PATCH"].each do |method| + if annotation_name_token.value == "RequestMapping" + if tokens[annotation_name_token.index + 1].type == :LPAREN + url_paths = get_mapping_path(parser, tokens, method_annotation_tokens[1].index) + else + url_paths = [""] + end + + line = annotation_name_token.line + parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index, parameter_type) + details = Details.new(PathInfo.new(path, line)) + + if method.nil? + # If the method is not annotated with @RequestMapping, then 5 methods are allowed + ["GET", "POST", "PUT", "DELETE", "PATCH"].each do |method| + url_paths.each do |url_path| + @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) + end + end + else url_paths.each do |url_path| - @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) + @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) end end - end - else - # If the method is annotated with @GetMapping, @PostMapping, @PutMapping, @DeleteMapping, @PatchMapping - ["GetMapping", "PostMapping", "PutMapping", "DeleteMapping", "PatchMapping"].each do |method_mapping| - if annotation_name_token.value == method_mapping - line = annotation_name_token.line - method = method_mapping[0..-8].upcase - parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index) - - # Parse the path paremeter - if tokens[annotation_name_token.index + 1].type == :LPAREN - url_paths = get_mapping_path(parser, tokens, annotation_name_token.index) - else - # If the path parameter is not specified, then the path is "" - url_paths = [""] - end + else + ["GetMapping", "PostMapping", "PutMapping", "DeleteMapping", "PatchMapping"].each do |method_mapping| + if annotation_name_token.value == method_mapping + line = annotation_name_token.line + method = method_mapping[0..-8].upcase + parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index, parameter_type) + + # Parse the path paremeter + if tokens[annotation_name_token.index + 1].type == :LPAREN + url_paths = get_mapping_path(parser, tokens, annotation_name_token.index) + else + # If the path parameter is not specified, then the path is "" + url_paths = [""] + end - details = Details.new(PathInfo.new(path, line)) - url_paths.each do |url_path| - @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) + details = Details.new(PathInfo.new(path, line)) + url_paths.each do |url_path| + @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) + end + + break end end end @@ -177,18 +190,18 @@ class AnalyzerSpring < Analyzer url_paths end - def get_endpoint_parameters(parser : JavaParser, tokens : Array(Token), method_token_index : Int32) : Array(Param) - endpoint_parameters = Array(Param).new + def get_endpoint_parameters(parser : JavaParser, tokens : Array(Token), method_token_index : Int32, parameter_type : String|Nil) : Array(Param) + endpoint_parameters = Array(Param).new parser.parse_formal_parameters(tokens, method_token_index).each do |formal_parameter_tokens| - next if formal_parameter_tokens.size == 0 - - parameter_type = nil + next if formal_parameter_tokens.size == 0 if formal_parameter_tokens[-1].type == :IDENTIFIER if formal_parameter_tokens[0].type == :AT if formal_parameter_tokens[1].value == "PathVariable" next elsif formal_parameter_tokens[1].value == "RequestBody" - parameter_type = "form" + if parameter_type.nil? + parameter_type = "json" + end elsif formal_parameter_tokens[1].value == "RequestParam" parameter_type = "query" else @@ -197,17 +210,38 @@ class AnalyzerSpring < Analyzer end if !parameter_type.nil? - parameter_name = formal_parameter_tokens[-1].value # case of "@RequestParam String a" + default_value = "" + # @RequestParam(@RequestParam long time) -> time + parameter_name = formal_parameter_tokens[-1].value if formal_parameter_tokens[-1].type != IDENTIFIER - if formal_parameter_tokens[2].type == :LPAREN && formal_parameter_tokens[3].type == :STRING_LITERAL - parameter_name_token = formal_parameter_tokens[3] # case of "@RequestParam("a") String a" - parameter_name = parameter_name_token.value[1..-2] + if formal_parameter_tokens[2].type == :LPAREN + request_parameters = parser.parse_formal_parameters(tokens, formal_parameter_tokens[2].index) + request_parameters.each do |request_parameter_tokens| + parser.print_tokens request_parameter_tokens + if request_parameter_tokens.size > 2 + request_param_name = request_parameter_tokens[0].value + request_param_value = request_parameter_tokens[-1].value + + if request_param_name == "value" + # @RequestParam(value = "name") + parameter_name = request_param_value[1..-2] + elsif request_param_name == "defaultValue" + # @RequestParam(defaultValue = "defaultValue") + default_value = request_param_value[1..-2] + end + end + end + if formal_parameter_tokens[3].type == :STRING_LITERAL + # @RequestParam("name") -> name + parameter_name_token = formal_parameter_tokens[3] + parameter_name = parameter_name_token.value[1..-2] + end end end - endpoint_parameters << Param.new(parameter_name, "", parameter_type) + endpoint_parameters << Param.new(parameter_name, default_value, parameter_type) end - end + end end endpoint_parameters diff --git a/src/minilexers/java.cr b/src/minilexers/java.cr index 2880bf7d..71beffa9 100644 --- a/src/minilexers/java.cr +++ b/src/minilexers/java.cr @@ -180,6 +180,7 @@ class JavaLexer < MiniLexer before_skip_position = @position skip_whitespace_and_comments end + break if @position == @input.size case @input[@position] when '0'..'9' @@ -235,14 +236,21 @@ class JavaLexer < MiniLexer literal = match[0] self << case literal when /^0[xX]/ + @position += literal.size Tuple.new(:HEX_LITERAL, literal) when /^0/ + @position += literal.size Tuple.new(:OCT_LITERAL, literal) - else # /^[\d.]/ + when /^[\d.]/ + @position += literal.size Tuple.new(:DECIMAL_LITERAL, literal) + else + @position += 1 + Tuple.new(:IDENTIFIER, @input[@position].to_s) end - - @position += literal.size + else + self << Tuple.new(:IDENTIFIER, @input[@position].to_s) + @position += 1 end end diff --git a/src/miniparsers/java.cr b/src/miniparsers/java.cr index f2f12861..d95f96c4 100644 --- a/src/miniparsers/java.cr +++ b/src/miniparsers/java.cr @@ -5,12 +5,15 @@ class JavaParser def initialize @import_statements = Array(String).new @classes_tokens = Array(Array(Token)).new - @class_annotation = Array(Token).new + @classes = Array(ClassModel).new end def parse(tokens : Array(Token)) parse_import_statements(tokens) parse_classes(tokens) + @classes_tokens.each do |class_tokens| + @classes << ClassModel.new(self, class_tokens) + end end def parse_import_statements(tokens : Array(Token)) @@ -158,6 +161,18 @@ class JavaParser end end + def get_class_name(tokens : Array(Token)) + tokens.each_with_index do |token, index| + if token.index != 0 + if tokens[index - 1].type == :CLASS + return token.value + end + end + end + + return "" + end + def parse_methods(class_body_tokens : Array(Token)) # 1. Skip first line (class declaration) # 2. Search ":RPAREN :LBRACE" or ":RPAREN throws :IDENTIFIER :LBRACE" pattern (method body entry point) @@ -254,6 +269,18 @@ class JavaParser end end +class ClassModel + @parser : JavaParser + @name : String + @methods : Array(Array(Token)) + @tokens : Array(Token) + + def initialize(@parser : JavaParser, @tokens : Array(Token)) + @name = @parser.get_class_name(@tokens) + @methods = @parser.parse_methods(@tokens) + end +end + # file_path = "/Users/ksg/workspace/noir/spec/functional_test/fixtures/java_spring/src/ItemController.java" # input = File.read(file_path) # lexer = JavaLexer.new From 44e26c1ddcb0d0073963e188941824fb58b918b6 Mon Sep 17 00:00:00 2001 From: ksg Date: Tue, 5 Mar 2024 03:06:54 +0900 Subject: [PATCH 6/8] feat: Refactor java analyzer - Refactored and implemented various methods and classes in `src/miniparsers/java.cr` - Added a new file `detect_kotlin_spring_spe_spec.cr` and test case in `spec/unit_test/detector` for detecting Java Spring - Updated `src/models/minilexer/minilexer.cr` to include a new line and fix a typo in the method signature - Made modifications and additions to `src/models/noir.cr` including the addition of new methods - Updated `spec/functional_test/fixtures/java_spring/src/ItemController.java` to include a package statement and class definition - Created a new file `src/analyzer/analyzers/analyzer_kotlin_spring.cr` with various methods and functions - Deleted `src/analyzer/analyzers/analyzer_spring.cr` - Made changes to `spec/functional_test/testers/java_spring_spec.cr` related to endpoint definitions - Refactored and reordered code in `src/minilexers/java.cr` - Made changes to `src/models/endpoint.cr` including the addition of comparison methods - Modified `src/analyzer/analyzer.cr` to use updated analyzer functions and remove duplicate checks - Renamed `spec/unit_test/analyzer/analyzer_spring_spec.cr` to `analyzer_kotlin_spring_spec.cr` and updated its contents - Refactored conditions in `src/detector/detectors/java_spring.cr` for better readability - Modified `src/detector/detectors/kotlin_spring.cr` to add additional detection conditions - Added a new file `src/analyzer/analyzers/analyzer_java_spring.cr` with various methods and functions. --- .../java_spring/src/ItemController.java | 22 + .../testers/java_spring_spec.cr | 4 +- ...spec.cr => analyzer_kotlin_spring_spec.cr} | 6 +- .../detector/detect_kotlin_spring_spe_spec.cr | 10 + src/analyzer/analyzer.cr | 8 +- .../analyzers/analyzer_java_spring.cr | 337 ++++++++++++++ .../analyzers/analyzer_kotlin_spring.cr | 213 +++++++++ src/analyzer/analyzers/analyzer_spring.cr | 254 ----------- src/detector/detectors/java_spring.cr | 12 +- src/detector/detectors/kotlin_spring.cr | 8 +- src/minilexers/java.cr | 428 +++++++++--------- src/miniparsers/java.cr | 416 ++++++++++++++--- src/models/endpoint.cr | 10 + src/models/minilexer/minilexer.cr | 18 +- src/models/minilexer/token.cr | 2 +- src/models/noir.cr | 5 +- 16 files changed, 1181 insertions(+), 572 deletions(-) rename spec/unit_test/analyzer/{analyzer_spring_spec.cr => analyzer_kotlin_spring_spec.cr} (95%) create mode 100644 spec/unit_test/detector/detect_kotlin_spring_spe_spec.cr create mode 100644 src/analyzer/analyzers/analyzer_java_spring.cr create mode 100644 src/analyzer/analyzers/analyzer_kotlin_spring.cr delete mode 100644 src/analyzer/analyzers/analyzer_spring.cr diff --git a/spec/functional_test/fixtures/java_spring/src/ItemController.java b/spec/functional_test/fixtures/java_spring/src/ItemController.java index 136be38b..0ad6063f 100644 --- a/spec/functional_test/fixtures/java_spring/src/ItemController.java +++ b/spec/functional_test/fixtures/java_spring/src/ItemController.java @@ -1,3 +1,4 @@ +package com.test; import org.springframework.web.bind.annotation.*; import a.b.c.bind.annotation.*; import org.springframework.c.d.e.*; @@ -25,4 +26,25 @@ public void deleteItem(@PathVariable Long id) { @GetMapping("/json/{id}", produces = [MediaType.APPLICATION_JSON_VALUE]) public void getItemJson(){ } +} + +class Item { + int id; + String name; + + public void setId(int _id) { + id = _id; + } + + public int getId() { + return id; + } + + public void setName(String _name) { + name = _name; + } + + public String getName() { + return name; + } } \ No newline at end of file diff --git a/spec/functional_test/testers/java_spring_spec.cr b/spec/functional_test/testers/java_spring_spec.cr index 0ee49789..4e7c3918 100644 --- a/spec/functional_test/testers/java_spring_spec.cr +++ b/spec/functional_test/testers/java_spring_spec.cr @@ -16,8 +16,8 @@ extected_endpoints = [ # ItemController.java Endpoint.new("/items/{id}", "GET"), Endpoint.new("/items/json/{id}", "GET"), - Endpoint.new("/items", "POST"), - Endpoint.new("/items/update/{id}", "PUT"), + Endpoint.new("/items", "POST", [Param.new("id", "", "form"), Param.new("name", "", "form")]), + Endpoint.new("/items/update/{id}", "PUT", [Param.new("id", "", "json"), Param.new("name", "", "json")]), Endpoint.new("/items/delete/{id}", "DELETE"), ] diff --git a/spec/unit_test/analyzer/analyzer_spring_spec.cr b/spec/unit_test/analyzer/analyzer_kotlin_spring_spec.cr similarity index 95% rename from spec/unit_test/analyzer/analyzer_spring_spec.cr rename to spec/unit_test/analyzer/analyzer_kotlin_spring_spec.cr index ac21d265..a84241b1 100644 --- a/spec/unit_test/analyzer/analyzer_spring_spec.cr +++ b/spec/unit_test/analyzer/analyzer_kotlin_spring_spec.cr @@ -1,9 +1,9 @@ -require "../../../src/analyzer/analyzers/analyzer_spring.cr" +require "../../../src/analyzer/analyzers/analyzer_kotlin_spring.cr" require "../../../src/options" describe "mapping_to_path" do options = default_options() - instance = AnalyzerSpring.new(options) + instance = AnalyzerKotlinSpring.new(options) it "mapping_to_path - GET" do instance.mapping_to_path("@GetMapping(\"/abcd\")").should eq(["/abcd"]) @@ -72,7 +72,7 @@ end describe "utils func" do options = default_options() - instance = AnalyzerSpring.new(options) + instance = AnalyzerKotlinSpring.new(options) it "is_bracket - true" do instance.is_bracket("{abcd=1234}").should eq(true) diff --git a/spec/unit_test/detector/detect_kotlin_spring_spe_spec.cr b/spec/unit_test/detector/detect_kotlin_spring_spe_spec.cr new file mode 100644 index 00000000..76746536 --- /dev/null +++ b/spec/unit_test/detector/detect_kotlin_spring_spe_spec.cr @@ -0,0 +1,10 @@ +require "../../../src/detector/detectors/*" + +describe "Detect Java Spring" do + options = default_options() + instance = DetectorKotlinSpring.new options + + it "build.gradle.kts" do + instance.detect("build.gradle.kts", "'org.springframework.boot' version '2.6.2'").should eq(true) + end +end diff --git a/src/analyzer/analyzer.cr b/src/analyzer/analyzer.cr index 155531fb..09a82483 100644 --- a/src/analyzer/analyzer.cr +++ b/src/analyzer/analyzer.cr @@ -15,9 +15,9 @@ def initialize_analyzers(logger : NoirLogger) analyzers["go_gin"] = ->analyzer_go_gin(Hash(Symbol, String)) analyzers["java_armeria"] = ->analyzer_armeria(Hash(Symbol, String)) analyzers["java_jsp"] = ->analyzer_jsp(Hash(Symbol, String)) - analyzers["java_spring"] = ->analyzer_spring(Hash(Symbol, String)) + analyzers["java_spring"] = ->analyzer_java_spring(Hash(Symbol, String)) analyzers["js_express"] = ->analyzer_express(Hash(Symbol, String)) - analyzers["kotlin_spring"] = ->analyzer_spring(Hash(Symbol, String)) + analyzers["kotlin_spring"] = ->analyzer_kotlin_spring(Hash(Symbol, String)) analyzers["oas2"] = ->analyzer_oas2(Hash(Symbol, String)) analyzers["oas3"] = ->analyzer_oas3(Hash(Symbol, String)) analyzers["php_pure"] = ->analyzer_php_pure(Hash(Symbol, String)) @@ -51,10 +51,6 @@ def analysis_endpoints(options : Hash(Symbol, String), techs, logger : NoirLogge logger.system "Analysis Started" logger.info_sub "Code Analyzer: #{techs.size} in use" - if (techs.includes? "java_spring") && (techs.includes? "kotlin_spring") - techs.delete("kotlin_spring") - end - techs.each do |tech| if analyzer.has_key?(tech) if NoirTechs.similar_to_tech(options[:exclude_techs]).includes?(tech) diff --git a/src/analyzer/analyzers/analyzer_java_spring.cr b/src/analyzer/analyzers/analyzer_java_spring.cr new file mode 100644 index 00000000..3a83b75b --- /dev/null +++ b/src/analyzer/analyzers/analyzer_java_spring.cr @@ -0,0 +1,337 @@ +require "../../models/analyzer" +require "../../minilexers/java" +require "../../miniparsers/java" + +class AnalyzerJavaSpring < Analyzer + REGEX_ROUTER_CODE_BLOCK = /route\(\)?.*?\);/m + REGEX_ROUTE_CODE_LINE = /((?:andRoute|route)\s*\(|\.)\s*(GET|POST|DELETE|PUT)\(\s*"([^"]*)/ + + def analyze + parser_map = Hash(String, JavaParser).new + package_map = Hash(String, Hash(String, ClassModel)).new + Dir.glob("#{@base_path}/**/*") do |path| + next if File.directory?(path) + url = "" + if File.exists?(path) && path.ends_with?(".java") + content = File.read(path, encoding: "utf-8", invalid: :skip) + + # Spring MVC Router (Controller) + spring_web_bind_package = "org.springframework.web.bind.annotation." + has_spring_web_bind_package_been_import = content.includes?(spring_web_bind_package) + if has_spring_web_bind_package_been_import + if parser_map.has_key?(path) + parser = parser_map[path] + tokens = parser.tokens + else + parser = get_parser(Path.new(path), content) + tokens = parser.tokens + parser_map[path] = parser + end + + package_name = parser.get_package_name(tokens) + next if package_name == "" + root_source_directory : Path = parser.get_root_source_directory(path, package_name) + package_directory = Path.new(path).dirname + + # Import packages + import_map = Hash(String, ClassModel).new + parser.import_statements.each do |import_statement| + import_path = import_statement.gsub(".", "/") + if import_path.ends_with?("/*") + import_directory = root_source_directory.join(import_path[..-3]) + if Dir.exists?(import_directory) + Dir.glob("#{import_directory}/*.java") do |_path| + next if path == _path + if !parser_map.has_key?(_path) + _parser = get_parser(Path.new(_path)) + else + _parser = parser_map[_path] + end + + _parser.classes.each do |package_class| + import_map.put_if_absent(package_class.name, package_class) + end + end + end + else + source_path = root_source_directory.join(import_path + ".java") + next if source_path.dirname == package_directory || !File.exists?(source_path) + if !parser_map.has_key?(source_path.to_s) + _content = File.read(source_path.to_s, encoding: "utf-8", invalid: :skip) + _parser = get_parser(source_path, _content) + parser_map[source_path.to_s] = _parser + _parser.classes.each do |package_class| + import_map.put_if_absent(package_class.name, package_class) + end + else + _parser = parser_map[source_path.to_s] + _parser.classes.each do |package_class| + import_map.put_if_absent(package_class.name, package_class) + end + end + end + end + + # Packages in same directory + package_class_map = package_map[package_directory]? + if package_class_map.nil? + package_class_map = Hash(String, ClassModel).new + Dir.glob("#{package_directory}/*.java") do |_path| + next if path == _path + if !parser_map.has_key?(_path) + _parser = get_parser(Path.new(_path)) + else + _parser = parser_map[_path] + end + + _parser.classes.each do |package_class| + package_class_map.put_if_absent(package_class.name, package_class) + end + + parser.classes.each do |package_class| + package_class_map.put_if_absent(package_class.name, package_class) + end + + package_map[package_directory] = package_class_map + end + end + + class_map = package_class_map.merge(import_map) + parser.classes.each do |class_model| + class_annotation = class_model.annotations["RequestMapping"]? + if !class_annotation.nil? + next if class_annotation.params.size == 0 + class_path_token = class_annotation.params[0][-1] + if class_path_token.type == :STRING_LITERAL + url = class_path_token.value[1..-2] + if url.ends_with? "*" + url = url[0..-2] + end + end + end + + class_model.methods.values.each do |method| + method.annotations.values.each do |method_annotation| # multiline annotations + url_paths = Array(String).new + + # Spring MVC Decorator + request_method = nil + if method_annotation.name.ends_with? "Mapping" + parameter_format = nil + annotation_parameters = method_annotation.params + annotation_parameters.each do |annotation_parameter_tokens| + if annotation_parameter_tokens.size > 2 + annotation_parameter_key = annotation_parameter_tokens[0].value + annotation_parameter_value = annotation_parameter_tokens[-1].value + if annotation_parameter_key == "method" + request_method = annotation_parameter_value + elsif annotation_parameter_key == "consumes" + if annotation_parameter_value.ends_with? "APPLICATION_FORM_URLENCODED_VALUE" + parameter_format = "form" + elsif annotation_parameter_value.ends_with? "APPLICATION_JSON_VALUE" + parameter_format = "json" + end + end + end + end + + if method_annotation.name == "RequestMapping" + url_paths = [""] + if method_annotation.params.size > 0 + url_paths = get_mapping_path(parser, tokens, method_annotation.params) + end + + line = method_annotation.tokens[0].line + details = Details.new(PathInfo.new(path, line)) + + if request_method.nil? + # If the method is not annotated with @RequestMapping, then 5 methods are allowed + ["GET", "POST", "PUT", "DELETE", "PATCH"].each do |_request_method| + parameters = get_endpoint_parameters(parser, _request_method, method.params, parameter_format, class_map) + url_paths.each do |url_path| + @result << Endpoint.new("#{url}#{url_path}", _request_method, parameters, details) + end + end + else + url_paths.each do |url_path| + parameters = get_endpoint_parameters(parser, request_method, method.params, parameter_format, class_map) + @result << Endpoint.new("#{url}#{url_path}", request_method, parameters, details) + end + end + break + else + mapping_annotations = ["GetMapping", "PostMapping", "PutMapping", "DeleteMapping", "PatchMapping"] + mapping_index = mapping_annotations.index(method_annotation.name) + if !mapping_index.nil? + line = method_annotation.tokens[0].line + request_method = mapping_annotations[mapping_index][0..-8].upcase + if parameter_format.nil? && request_method == "POST" + parameter_format = "form" + end + parameters = get_endpoint_parameters(parser, request_method, method.params, parameter_format, class_map) + + url_paths = [""] + if method_annotation.params.size > 0 + url_paths = get_mapping_path(parser, tokens, method_annotation.params) + end + + details = Details.new(PathInfo.new(path, line)) + url_paths.each do |url_path| + @result << Endpoint.new("#{url}#{url_path}", request_method, parameters, details) + end + break + end + end + end + end + end + end + else + # Reactive Router + content.scan(REGEX_ROUTER_CODE_BLOCK) do |route_code| + method_code = route_code[0] + method_code.scan(REGEX_ROUTE_CODE_LINE) do |match| + next if match.size != 4 + method = match[2] + endpoint = match[3].gsub(/\n/, "") + details = Details.new(PathInfo.new(path)) + @result << Endpoint.new("#{url}#{endpoint}", method, details) + end + end + end + end + end + Fiber.yield + + @result + end + + def get_mapping_path(parser : JavaParser, tokens : Array(Token), method_params : Array(Array(Token))) + # 1. Search for the value of the @xxxxxMapping annotation + # 2. If the value is a string literal, return it + # 3. If the value is an array, return each element + # 4. Other case return empty array + url_paths = Array(String).new + if method_params[0].size != 0 + path_argument_index = 0 + method_params.each_with_index do |mapping_parameter, index| + if mapping_parameter[0].type == :IDENTIFIER && mapping_parameter[0].value == "value" + path_argument_index = index + end + end + + path_parameter_tokens = method_params[path_argument_index] + if path_parameter_tokens[-1].type == :STRING_LITERAL + # @GetMapping("/abc") or @GetMapping(value = "/abc") + url_paths << path_parameter_tokens[-1].value[1..-2] + elsif path_parameter_tokens[-1].type == :RBRACE + # @GetMapping({"/abc", "/def"}) or @GetMapping(value = {"/abc", "/def"}) + i = path_parameter_tokens.size - 2 + while i > 0 + parameter_token = path_parameter_tokens[i] + if parameter_token.type == :LBRACE + break + elsif parameter_token.type == :COMMA + i -= 1 + next + elsif parameter_token.type == :STRING_LITERAL + url_paths << parameter_token.value[1..-2] + else + break + end + + i -= 1 + end + end + end + + url_paths + end + + def get_endpoint_parameters(parser : JavaParser, request_method : String, method_params : Array(Array(Token)), parameter_format : String | Nil, package_class_map : Hash(String, ClassModel)) : Array(Param) + endpoint_parameters = Array(Param).new + + method_params.each do |method_param_tokens| + next if method_param_tokens.size == 0 + if method_param_tokens[-1].type == :IDENTIFIER + if method_param_tokens[0].type == :AT + if method_param_tokens[1].value == "PathVariable" + next + elsif method_param_tokens[1].value == "RequestBody" + if parameter_format.nil? + parameter_format = "json" + end + elsif method_param_tokens[1].value == "RequestParam" + parameter_format = "query" + end + end + + if parameter_format.nil? + parameter_format = "query" + end + + default_value = nil + # @RequestParam(@RequestParam long time) -> time + parameter_name = method_param_tokens[-1].value + if method_param_tokens[-1].type != IDENTIFIER && method_param_tokens.size > 2 + if method_param_tokens[2].type == :LPAREN + request_parameters = parser.parse_formal_parameters(method_param_tokens, 2) + request_parameters.each do |request_parameter_tokens| + if request_parameter_tokens.size > 2 + request_param_name = request_parameter_tokens[0].value + request_param_value = request_parameter_tokens[-1].value + + if request_param_name == "value" + # abc(@RequestParam(value = "name") int xx) -> name + parameter_name = request_param_value[1..-2] + elsif request_param_name == "defaultValue" + # abc(@RequestParam(defaultValue = "defaultValue") String xx) -> defaultValue + default_value = request_param_value[1..-2] + end + end + end + if method_param_tokens[3].type == :STRING_LITERAL + # abc(@RequestParam("name") int xx) -> name + parameter_name_token = method_param_tokens[3] + parameter_name = parameter_name_token.value[1..-2] + end + end + end + + parameter_type = method_param_tokens[-2].value + if ["long", "int", "integer", "char", "boolean", "string", "multipartfile"].index(parameter_type.downcase) + param_default_value = default_value.nil? ? "" : default_value + endpoint_parameters << Param.new(parameter_name, param_default_value, parameter_format) + else + # custom class" + if package_class_map.has_key?(parameter_type) + package_class = package_class_map[parameter_type] + package_class.fields.values.each do |field| + if field.access_modifier == "public" || field.has_setter? + param_default_value = default_value.nil? ? field.init_value : default_value + endpoint_parameters << Param.new(field.name, param_default_value, parameter_format) + end + end + end + end + end + end + + endpoint_parameters + end +end + +def get_parser(path : Path, content : String = "") + if content == "" + content = File.read(path, encoding: "utf-8", invalid: :skip) + end + lexer = JavaLexer.new + tokens = lexer.tokenize(content) + parser = JavaParser.new(path.to_s, tokens) + parser +end + +def analyzer_java_spring(options : Hash(Symbol, String)) + instance = AnalyzerJavaSpring.new(options) + instance.analyze +end diff --git a/src/analyzer/analyzers/analyzer_kotlin_spring.cr b/src/analyzer/analyzers/analyzer_kotlin_spring.cr new file mode 100644 index 00000000..ec50338a --- /dev/null +++ b/src/analyzer/analyzers/analyzer_kotlin_spring.cr @@ -0,0 +1,213 @@ +require "../../models/analyzer" + +class AnalyzerKotlinSpring < Analyzer + REGEX_CLASS_DEFINITION = /^(((public|private|protected|default)\s+)|^)class\s+/ + REGEX_ROUTER_CODE_BLOCK = /route\(\)?.*?\);/m + REGEX_ROUTE_CODE_LINE = /((?:andRoute|route)\s*\(|\.)\s*(GET|POST|DELETE|PUT)\(\s*"([^"]*)/ + + def analyze + # Source Analysis + begin + Dir.glob("#{@base_path}/**/*") do |path| + next if File.directory?(path) + + url = "" + if File.exists?(path) && path.ends_with?(".kt") + content = File.read(path, encoding: "utf-8", invalid: :skip) + + # Spring MVC + has_class_been_imported = false + content.each_line.with_index do |line, index| + details = Details.new(PathInfo.new(path, index + 1)) + if has_class_been_imported == false && REGEX_CLASS_DEFINITION.match(line) + has_class_been_imported = true + end + + if line.includes? "RequestMapping" + mapping_paths = mapping_to_path(line) + if has_class_been_imported == false && mapping_paths.size > 0 + class_mapping_url = mapping_paths[0] + + if class_mapping_url.ends_with?("/*") + class_mapping_url = class_mapping_url[0..-3] + end + if class_mapping_url.ends_with?("/") + class_mapping_url = class_mapping_url[0..-2] + end + + url = "#{class_mapping_url}" + else + mapping_paths.each do |mapping_path| + if line.includes? "RequestMethod" + define_requestmapping_handlers(["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS", "TRACE"]) + else + @result << Endpoint.new("#{url}#{mapping_path}", "GET", details) + end + end + end + end + + if line.includes? "PostMapping" + mapping_paths = mapping_to_path(line) + mapping_paths.each do |mapping_path| + @result << Endpoint.new("#{url}#{mapping_path}", "POST", details) + end + end + if line.includes? "PutMapping" + mapping_paths = mapping_to_path(line) + mapping_paths.each do |mapping_path| + @result << Endpoint.new("#{url}#{mapping_path}", "PUT", details) + end + end + if line.includes? "DeleteMapping" + mapping_paths = mapping_to_path(line) + mapping_paths.each do |mapping_path| + @result << Endpoint.new("#{url}#{mapping_path}", "DELETE", details) + end + end + if line.includes? "PatchMapping" + mapping_paths = mapping_to_path(line) + mapping_paths.each do |mapping_path| + @result << Endpoint.new("#{url}#{mapping_path}", "PATCH", details) + end + end + if line.includes? "GetMapping" + mapping_paths = mapping_to_path(line) + mapping_paths.each do |mapping_path| + @result << Endpoint.new("#{url}#{mapping_path}", "GET", details) + end + end + end + + # Reactive Router + content.scan(REGEX_ROUTER_CODE_BLOCK) do |route_code| + method_code = route_code[0] + method_code.scan(REGEX_ROUTE_CODE_LINE) do |match| + next if match.size != 4 + method = match[2] + endpoint = match[3].gsub(/\n/, "") + details = Details.new(PathInfo.new(path)) + @result << Endpoint.new("#{url}#{endpoint}", method, details) + end + end + end + end + rescue e + logger.debug e + end + Fiber.yield + + @result + end + + def mapping_to_path(line : String) + unless line.includes? "(" + # no path + return [""] + end + + paths = Array(String).new + splited_line = line.strip.split("(") + if splited_line.size > 1 && splited_line[1].includes? ")" + params = splited_line[1].split(")")[0] + params = params.gsub(/\s/, "") # remove space + if params.size > 0 + path = nil + # value parameter + if params.includes? "value=" + value = params.split("value=")[1] + if value.size > 0 + if value[0] == '"' + path = value.split("\"")[1] + elsif value[0] == '{' && value.includes? "}" + path = value[1..].split("}")[0] + end + end + end + + # first parameter + if path.nil? + if params[0] == '"' + path = params.split("\"")[1] + elsif params[0] == '{' && params.includes? "}" + path = params[1..].split("}")[0] + end + end + + # extract path + if path.nil? + # can't find path + paths << "" + else + if path.size > 0 && path[0] == '"' && path.includes? "," + # multiple path + path.split(",").each do |each_path| + if each_path.size > 0 + if each_path[0] == '"' + paths << each_path[1..-2] + else + paths << "" + end + end + end + else + # single path + if path.size > 0 && path[0] == '"' + path = path.split("\"")[1] + end + + paths << path + end + end + else + # no path + paths << "" + end + end + + # append slash + (0..paths.size - 1).each do |i| + path = paths[i] + if path.size > 0 && !path.starts_with? "/" + path = "/" + path + end + + paths[i] = path + end + + paths + end + + def is_bracket(content : String) + content.gsub(/\s/, "")[0].to_s == "{" + end + + def comma_in_bracket(content : String) + result = content.gsub(/\{(.*?)\}/) do |match| + match.gsub(",", "_BRACKET_COMMA_") + end + + result.gsub("{", "").gsub("}", "") + end + + def extract_param(content : String) + # TODO + # case1 -> @RequestParam("a") + # case2 -> String a = param.get("a"); + # case3 -> String a = request.getParameter("a"); + # case4 -> (PATH) @PathVariable("a") + end + + macro define_requestmapping_handlers(methods) + {% for method, index in methods %} + if line.includes? "RequestMethod.{{method.id}}" + @result << Endpoint.new("#{url}#{mapping_path}", "{{method.id}}") + end + {% end %} + end +end + +def analyzer_kotlin_spring(options : Hash(Symbol, String)) + instance = AnalyzerKotlinSpring.new(options) + instance.analyze +end diff --git a/src/analyzer/analyzers/analyzer_spring.cr b/src/analyzer/analyzers/analyzer_spring.cr deleted file mode 100644 index ec6364b6..00000000 --- a/src/analyzer/analyzers/analyzer_spring.cr +++ /dev/null @@ -1,254 +0,0 @@ -require "../../models/analyzer" -require "../../minilexers/java" -require "../../miniparsers/java" - -class AnalyzerSpring < Analyzer - REGEX_CLASS_DEFINITION = /^(((public|private|protected|default)\s+)|^)class\s+/ - REGEX_ROUTER_CODE_BLOCK = /route\(\)?.*?\);/m - REGEX_ROUTE_CODE_LINE = /((?:andRoute|route)\s*\(|\.)\s*(GET|POST|DELETE|PUT)\(\s*"([^"]*)/ - - def analyze - # Source Analysis - Dir.glob("#{@base_path}/**/*") do |path| - next if File.directory?(path) - - url = "" - if File.exists?(path) && (path.ends_with?(".java")) - content = File.read(path, encoding: "utf-8", invalid: :skip) - - lexer = JavaLexer.new - tokens = lexer.tokenize(content) - parser = JavaParser.new - parser.parse(tokens) - has_spring_web_bind_package_been_import = false - parser.@import_statements.each do |import_statement| - if import_statement.includes? "org.springframework.web.bind.annotation." - has_spring_web_bind_package_been_import = true - end - end - - # Spring MVC Router (Controller) - if has_spring_web_bind_package_been_import - parser.@classes.each do |class_model| - # Parse the base url of the class - class_tokens = class_model.@tokens - class_annotations = parser.parse_annotations(tokens, class_tokens[0].index) - class_annotations.each do |class_annotation| - if class_annotation[1].value == "RequestMapping" - class_path_token = parser.parse_formal_parameters(tokens, class_annotation[1].index)[0][-1] - if class_path_token.type == :STRING_LITERAL - url = class_path_token.value[1..-2] - if url.ends_with? "*" - url = url[0..-2] - end - end - end - end - - # Parse the methods of the class - class_model.@methods.each do |method_tokens| - # Parse the method annotations - method_annotations = parser.parse_annotations(tokens, method_tokens[0].index) - method_annotations.each do |method_annotation_tokens| # multiline annotations - url_paths = Array(String).new - annotation_name_token = method_annotation_tokens[1] - - # Spring MVC Decorator - if annotation_name_token.value.ends_with? "Mapping" - method = nil - parameter_type = nil - default_value = nil - annotation_parameters = parser.parse_formal_parameters(tokens, annotation_name_token.index) - annotation_parameters.each do |annotation_parameter_tokens| - if annotation_parameter_tokens.size > 2 - annotation_parameter_key = annotation_parameter_tokens[0].value - annotation_parameter_value = annotation_parameter_tokens[-1].value - if annotation_parameter_key == "method" - method = annotation_parameter_value - elsif annotation_parameter_key == "consumes" - if annotation_parameter_value.ends_with? "APPLICATION_FORM_URLENCODED_VALUE" - parameter_type = "form" - elsif annotation_parameter_value.ends_with?("APPLICATION_JSON_VALUE") - parameter_type = "json" - end - end - end - end - - if annotation_name_token.value == "RequestMapping" - if tokens[annotation_name_token.index + 1].type == :LPAREN - url_paths = get_mapping_path(parser, tokens, method_annotation_tokens[1].index) - else - url_paths = [""] - end - - line = annotation_name_token.line - parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index, parameter_type) - details = Details.new(PathInfo.new(path, line)) - - if method.nil? - # If the method is not annotated with @RequestMapping, then 5 methods are allowed - ["GET", "POST", "PUT", "DELETE", "PATCH"].each do |method| - url_paths.each do |url_path| - @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) - end - end - else - url_paths.each do |url_path| - @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) - end - end - else - ["GetMapping", "PostMapping", "PutMapping", "DeleteMapping", "PatchMapping"].each do |method_mapping| - if annotation_name_token.value == method_mapping - line = annotation_name_token.line - method = method_mapping[0..-8].upcase - parameters = get_endpoint_parameters(parser, tokens, method_tokens[0].index, parameter_type) - - # Parse the path paremeter - if tokens[annotation_name_token.index + 1].type == :LPAREN - url_paths = get_mapping_path(parser, tokens, annotation_name_token.index) - else - # If the path parameter is not specified, then the path is "" - url_paths = [""] - end - - details = Details.new(PathInfo.new(path, line)) - url_paths.each do |url_path| - @result << Endpoint.new("#{url}#{url_path}", method, parameters, details) - end - - break - end - end - end - end - end - end - end - else - # Reactive Router - content.scan(REGEX_ROUTER_CODE_BLOCK) do |route_code| - method_code = route_code[0] - method_code.scan(REGEX_ROUTE_CODE_LINE) do |match| - next if match.size != 4 - method = match[2] - endpoint = match[3].gsub(/\n/, "") - details = Details.new(PathInfo.new(path)) - @result << Endpoint.new("#{url}#{endpoint}", method, details) - end - end - end - end - end - Fiber.yield - - @result - end - - def get_mapping_path(parser : JavaParser, tokens : Array(Token), mapping_token_index : Int32) - # 1. Search for the value of the @xxxxxMapping annotation - # 2. If the value is a string literal, return it - # 3. If the value is an array, return each element - # 4. Other case return empty array - url_paths = Array(String).new - mapping_parameters = parser.parse_formal_parameters(tokens, mapping_token_index) - if mapping_parameters[0].size != 0 - path_argument_index = 0 - mapping_parameters.each_with_index do |mapping_parameter, index| - if mapping_parameter[0].type == :IDENTIFIER && mapping_parameter[0].value == "value" - path_argument_index = index - end - end - - path_parameter_tokens = mapping_parameters[path_argument_index] - if path_parameter_tokens[-1].type == :STRING_LITERAL - # @GetMapping("/abc") or @GetMapping(value = "/abc") - url_paths << path_parameter_tokens[-1].value[1..-2] - elsif path_parameter_tokens[-1].type == :RBRACE - # @GetMapping({"/abc", "/def"}) or @GetMapping(value = {"/abc", "/def"}) - i = path_parameter_tokens.size - 2 - while i > 0 - parameter_token = path_parameter_tokens[i] - if parameter_token.type == :LBRACE - break - elsif parameter_token.type == :COMMA - i -= 1 - next - elsif parameter_token.type == :STRING_LITERAL - url_paths << parameter_token.value[1..-2] - else - puts parameter_token.to_s - break - end - - i -= 1 - end - end - end - - url_paths - end - - def get_endpoint_parameters(parser : JavaParser, tokens : Array(Token), method_token_index : Int32, parameter_type : String|Nil) : Array(Param) - endpoint_parameters = Array(Param).new - parser.parse_formal_parameters(tokens, method_token_index).each do |formal_parameter_tokens| - next if formal_parameter_tokens.size == 0 - if formal_parameter_tokens[-1].type == :IDENTIFIER - if formal_parameter_tokens[0].type == :AT - if formal_parameter_tokens[1].value == "PathVariable" - next - elsif formal_parameter_tokens[1].value == "RequestBody" - if parameter_type.nil? - parameter_type = "json" - end - elsif formal_parameter_tokens[1].value == "RequestParam" - parameter_type = "query" - else - next # unknown parameter type - end - end - - if !parameter_type.nil? - default_value = "" - # @RequestParam(@RequestParam long time) -> time - parameter_name = formal_parameter_tokens[-1].value - if formal_parameter_tokens[-1].type != IDENTIFIER - if formal_parameter_tokens[2].type == :LPAREN - request_parameters = parser.parse_formal_parameters(tokens, formal_parameter_tokens[2].index) - request_parameters.each do |request_parameter_tokens| - parser.print_tokens request_parameter_tokens - if request_parameter_tokens.size > 2 - request_param_name = request_parameter_tokens[0].value - request_param_value = request_parameter_tokens[-1].value - - if request_param_name == "value" - # @RequestParam(value = "name") - parameter_name = request_param_value[1..-2] - elsif request_param_name == "defaultValue" - # @RequestParam(defaultValue = "defaultValue") - default_value = request_param_value[1..-2] - end - end - end - if formal_parameter_tokens[3].type == :STRING_LITERAL - # @RequestParam("name") -> name - parameter_name_token = formal_parameter_tokens[3] - parameter_name = parameter_name_token.value[1..-2] - end - end - end - - endpoint_parameters << Param.new(parameter_name, default_value, parameter_type) - end - end - end - - endpoint_parameters - end -end - -def analyzer_spring(options : Hash(Symbol, String)) - instance = AnalyzerSpring.new(options) - instance.analyze -end diff --git a/src/detector/detectors/java_spring.cr b/src/detector/detectors/java_spring.cr index 766e635b..49113004 100644 --- a/src/detector/detectors/java_spring.cr +++ b/src/detector/detectors/java_spring.cr @@ -2,13 +2,13 @@ require "../../models/detector" class DetectorJavaSpring < Detector def detect(filename : String, file_contents : String) : Bool - if ( - (filename.includes? "pom.xml") || (filename.ends_with? "build.gradle") - ) && (file_contents.includes? "org.springframework") - true - else - false + if (filename.ends_with? "build.gradle") && (file_contents.includes? "org.springframework") + return true + elsif (filename.ends_with? "pom.xml") && (file_contents.includes? "org.springframework") + return true end + + false end def set_name diff --git a/src/detector/detectors/kotlin_spring.cr b/src/detector/detectors/kotlin_spring.cr index 3ea4cffc..52f32abe 100644 --- a/src/detector/detectors/kotlin_spring.cr +++ b/src/detector/detectors/kotlin_spring.cr @@ -3,10 +3,12 @@ require "../../models/detector" class DetectorKotlinSpring < Detector def detect(filename : String, file_contents : String) : Bool if (filename.ends_with? "build.gradle.kts") && (file_contents.includes? "org.springframework") - true - else - false + return true + elsif (filename.ends_with? "pom.xml") && (file_contents.includes? "org.springframework") && (file_contents.includes? "org.jetbrains.kotlin") + return true end + + false end def set_name diff --git a/src/minilexers/java.cr b/src/minilexers/java.cr index 71beffa9..e9926f17 100644 --- a/src/minilexers/java.cr +++ b/src/minilexers/java.cr @@ -1,67 +1,67 @@ require "../models/minilexer/*" # Keywords -ABSTRACT = "abstract" -ASSERT = "assert" -BOOLEAN = "boolean" -BREAK = "break" -BYTE = "byte" -CASE = "case" -CATCH = "catch" -CHAR = "char" -CLASS = "class" -CONST = "const" -CONTINUE = "continue" -DEFAULT = "default" -DO = "do" -DOUBLE = "double" -ELSE = "else" -ENUM = "enum" -EXTENDS = "extends" -FINAL = "final" -FINALLY = "finally" -FLOAT = "float" -FOR = "for" -IF = "if" -GOTO = "goto" -IMPLEMENTS = "implements" -IMPORT = "import" -INSTANCEOF = "instanceof" -INT = "int" -INTERFACE = "interface" -LONG = "long" -NATIVE = "native" -NEW = "new" -PACKAGE = "package" -PRIVATE = "private" -PROTECTED = "protected" -PUBLIC = "public" -RETURN = "return" -SHORT = "short" -STATIC = "static" -STRICTFP = "strictfp" -SUPER = "super" -SWITCH = "switch" +ABSTRACT = "abstract" +ASSERT = "assert" +BOOLEAN = "boolean" +BREAK = "break" +BYTE = "byte" +CASE = "case" +CATCH = "catch" +CHAR = "char" +CLASS = "class" +CONST = "const" +CONTINUE = "continue" +DEFAULT = "default" +DO = "do" +DOUBLE = "double" +ELSE = "else" +ENUM = "enum" +EXTENDS = "extends" +FINAL = "final" +FINALLY = "finally" +FLOAT = "float" +FOR = "for" +IF = "if" +GOTO = "goto" +IMPLEMENTS = "implements" +IMPORT = "import" +INSTANCEOF = "instanceof" +INT = "int" +INTERFACE = "interface" +LONG = "long" +NATIVE = "native" +NEW = "new" +PACKAGE = "package" +PRIVATE = "private" +PROTECTED = "protected" +PUBLIC = "public" +RETURN = "return" +SHORT = "short" +STATIC = "static" +STRICTFP = "strictfp" +SUPER = "super" +SWITCH = "switch" SYNCHRONIZED = "synchronized" -THIS = "this" -THROW = "throw" -THROWS = "throws" -TRANSIENT = "transient" -TRY = "try" -VOID = "void" -VOLATILE = "volatile" -WHILE = "while" +THIS = "this" +THROW = "throw" +THROWS = "throws" +TRANSIENT = "transient" +TRY = "try" +VOID = "void" +VOLATILE = "volatile" +WHILE = "while" # Module related keywords -MODULE = "module" -OPEN = "open" -REQUIRES = "requires" -EXPORTS = "exports" -OPENS = "opens" -TO = "to" -USES = "uses" -PROVIDES = "provides" -WITH = "with" +MODULE = "module" +OPEN = "open" +REQUIRES = "requires" +EXPORTS = "exports" +OPENS = "opens" +TO = "to" +USES = "uses" +PROVIDES = "provides" +WITH = "with" TRANSITIVE = "transitive" # Local Variable Type Inference @@ -74,22 +74,22 @@ YIELD = "yield" # reserved type name from Java 14 RECORD = "record" # Sealed Classes -SEALED = "sealed" -PERMITS = "permits" +SEALED = "sealed" +PERMITS = "permits" NON_SEALED = "non-sealed" # Literals -DECIMAL_LITERAL = /0|[1-9]([_\d]*\d)?[lL]?/ -HEX_LITERAL = /0[xX][0-9a-fA-F]([0-9a-fA-F_]*[0-9a-fA-F])?[lL]?/ -OCT_LITERAL = /0[0-7]([0-7_]*[0-7])?[lL]?/ -BINARY_LITERAL = /0[bB][01]([01_]*[01])?[lL]?/ -FLOAT_LITERAL = /((\d+\.\d*|\.\d+)([eE][+-]?\d+)?|[+-]?\d+[eE][+-]?\d+)[fFdD]?/ +DECIMAL_LITERAL = /0|[1-9]([_\d]*\d)?[lL]?/ +HEX_LITERAL = /0[xX][0-9a-fA-F]([0-9a-fA-F_]*[0-9a-fA-F])?[lL]?/ +OCT_LITERAL = /0[0-7]([0-7_]*[0-7])?[lL]?/ +BINARY_LITERAL = /0[bB][01]([01_]*[01])?[lL]?/ +FLOAT_LITERAL = /((\d+\.\d*|\.\d+)([eE][+-]?\d+)?|[+-]?\d+[eE][+-]?\d+)[fFdD]?/ HEX_FLOAT_LITERAL = /0[xX]([0-9a-fA-F]+(\.[0-9a-fA-F]*)?|\.[0-9a-fA-F]+)[pP][+-]?\d+[fFdD]?/ -BOOL_LITERAL = /true|false/ -CHAR_LITERAL = /'([^'\\\r\n]|\\['"\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^'"\r\n])*'/ -STRING_LITERAL = /"([^"\\\r\n]|\\["\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^"\r\n])*"/ -TEXT_BLOCK = /"""\s*(.|\\["\\bfnrt])*?\s*"""/ -NULL_LITERAL = "null" +BOOL_LITERAL = /true|false/ +CHAR_LITERAL = /'([^'\\\r\n]|\\['"\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^'"\r\n])*'/ +STRING_LITERAL = /"([^"\\\r\n]|\\["\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^"\r\n])*"/ +TEXT_BLOCK = /"""\s*(.|\\["\\bfnrt])*?\s*"""/ +NULL_LITERAL = "null" # Separators LPAREN = "(" @@ -98,75 +98,75 @@ LBRACE = "{" RBRACE = "}" LBRACK = "[" RBRACK = "]" -SEMI = ";" -COMMA = "," -DOT = "." +SEMI = ";" +COMMA = "," +DOT = "." # Operators -ASSIGN = "=" -GT = ">" -LT = "<" -BANG = "!" -TILDE = "~" +ASSIGN = "=" +GT = ">" +LT = "<" +BANG = "!" +TILDE = "~" QUESTION = "?" -COLON = ":" -EQUAL = "==" -LE = "<=" -GE = ">=" +COLON = ":" +EQUAL = "==" +LE = "<=" +GE = ">=" NOTEQUAL = "!=" -AND = "&&" -OR = "||" -INC = "++" -DEC = "--" -ADD = "+" -SUB = "-" -MUL = "*" -DIV = "/" -BITAND = "&" -BITOR = "|" -CARET = "^" -MOD = "%" +AND = "&&" +OR = "||" +INC = "++" +DEC = "--" +ADD = "+" +SUB = "-" +MUL = "*" +DIV = "/" +BITAND = "&" +BITOR = "|" +CARET = "^" +MOD = "%" -ADD_ASSIGN = "+=" -SUB_ASSIGN = "-=" -MUL_ASSIGN = "*=" -DIV_ASSIGN = "/=" -AND_ASSIGN = "&=" -OR_ASSIGN = "|=" -XOR_ASSIGN = "^=" -MOD_ASSIGN = "%=" -LSHIFT_ASSIGN = "<<=" -RSHIFT_ASSIGN = ">>=" +ADD_ASSIGN = "+=" +SUB_ASSIGN = "-=" +MUL_ASSIGN = "*=" +DIV_ASSIGN = "/=" +AND_ASSIGN = "&=" +OR_ASSIGN = "|=" +XOR_ASSIGN = "^=" +MOD_ASSIGN = "%=" +LSHIFT_ASSIGN = "<<=" +RSHIFT_ASSIGN = ">>=" URSHIFT_ASSIGN = ">>>=" # Java 8 tokens -ARROW = "->" +ARROW = "->" COLONCOLON = "::" # Additional symbols not defined in the lexical specification -AT = "@" +AT = "@" ELLIPSIS = "..." # Whitespace and comments -WS = /[ \t\r\n\x0C]+/ -COMMENT = /\/\*.*?\*\//m +WS = /[ \t\r\n\x0C]+/ +COMMENT = /\/\*.*?\*\//m LINE_COMMENT = /\/\/[^\r\n]*/ # Identifiers IDENTIFIER = /[a-zA-Z$_][a-zA-Z\d$_]*/ # Fragment rules -ExponentPart = /[eE][+-]?\d+/ +ExponentPart = /[eE][+-]?\d+/ EscapeSequence = /\\(?:u005c)?[btnfr"'\\]|\\u(?:[0-3]?[0-7])?[0-7]|\\u[0-9a-fA-F]{4}/ -HexDigits = /[0-9a-fA-F]([_0-9a-fA-F]*[0-9a-fA-F])?/ -HexDigit = /[0-9a-fA-F]/ -Digits = /\d([_\d]*\d)?/ -LetterOrDigit = /[a-zA-Z\d$_]/ -Letter = /[a-zA-Z$_]|[^[:ascii:]]/ +HexDigits = /[0-9a-fA-F]([_0-9a-fA-F]*[0-9a-fA-F])?/ +HexDigit = /[0-9a-fA-F]/ +Digits = /\d([_\d]*\d)?/ +LetterOrDigit = /[a-zA-Z\d$_]/ +Letter = /[a-zA-Z$_]|[^[:ascii:]]/ class JavaLexer < MiniLexer def initialize - super + super end def tokenize(@input : String) : Array(Token) @@ -177,30 +177,25 @@ class JavaLexer < MiniLexer while @position < @input.size before_skip_position = -1 while before_skip_position < @position - before_skip_position = @position skip_whitespace_and_comments + before_skip_position = @position end break if @position == @input.size case @input[@position] when '0'..'9' - t = 1 match_number when 'a'..'z', 'A'..'Z', '$', '_' - t = 2 match_identifier_or_keyword when '\'' - t = 3 match_char_literal when '"' - t = 4 match_string_literal_or_text_block else - t = 5 match_symbol_or_operator end end - + @tokens end @@ -232,21 +227,21 @@ class JavaLexer < MiniLexer end def match_number - if (match = @input.match(/(0[xX][0-9a-fA-F](_?[0-9a-fA-F])*[lL]?|\d(_?\d)*(\.\d(_?\d)*)?([eE][+-]?\d(_?\d)*)?[fFdD]?)/, @position)) + if match = @input.match(/0[xX][0-9a-fA-F](_?[0-9a-fA-F])*[lL]?|\d(_?\d)*(\.\d(_?\d)*)?([eE][+-]?\d(_?\d)*)?[fFdD]?/, @position) literal = match[0] self << case literal - when /^0[xX]/ - @position += literal.size - Tuple.new(:HEX_LITERAL, literal) - when /^0/ - @position += literal.size - Tuple.new(:OCT_LITERAL, literal) - when /^[\d.]/ - @position += literal.size - Tuple.new(:DECIMAL_LITERAL, literal) - else - @position += 1 - Tuple.new(:IDENTIFIER, @input[@position].to_s) + when /^0[xX]/ + @position += literal.size + Tuple.new(:HEX_LITERAL, literal) + when /^0/ + @position += literal.size + Tuple.new(:OCT_LITERAL, literal) + when /^[\d.]/ + @position += literal.size + Tuple.new(:DECIMAL_LITERAL, literal) + else + @position += 1 + Tuple.new(:IDENTIFIER, @input[@position].to_s) end else self << Tuple.new(:IDENTIFIER, @input[@position].to_s) @@ -257,74 +252,74 @@ class JavaLexer < MiniLexer def match_identifier_or_keyword if match = @input.match(/[a-zA-Z$_][a-zA-Z\d$_]*/, @position) type = case match[0] - when ABSTRACT then :ABSTRACT - when ASSERT then :ASSERT - when BOOLEAN then :BOOLEAN - when BREAK then :BREAK - when BYTE then :BYTE - when CASE then :CASE - when CATCH then :CATCH - when CHAR then :CHAR - when CLASS then :CLASS - when CONST then :CONST - when CONTINUE then :CONTINUE - when DEFAULT then :DEFAULT - when DO then :DO - when DOUBLE then :DOUBLE - when ELSE then :ELSE - when ENUM then :ENUM - when EXTENDS then :EXTENDS - when FINAL then :FINAL - when FINALLY then :FINALLY - when FLOAT then :FLOAT - when FOR then :FOR - when IF then :IF - when GOTO then :GOTO - when IMPLEMENTS then :IMPLEMENTS - when IMPORT then :IMPORT - when INSTANCEOF then :INSTANCEOF - when INT then :INT - when INTERFACE then :INTERFACE - when LONG then :LONG - when NATIVE then :NATIVE - when NEW then :NEW - when PACKAGE then :PACKAGE - when PRIVATE then :PRIVATE - when PROTECTED then :PROTECTED - when PUBLIC then :PUBLIC - when RETURN then :RETURN - when SHORT then :SHORT - when STATIC then :STATIC - when STRICTFP then :STRICTFP - when SUPER then :SUPER - when SWITCH then :SWITCH - when SYNCHRONIZED then :SYNCHRONIZED - when THIS then :THIS - when THROW then :THROW - when THROWS then :THROWS - when TRANSIENT then :TRANSIENT - when TRY then :TRY - when VOID then :VOID - when VOLATILE then :VOLATILE - when WHILE then :WHILE - when MODULE then :MODULE - when OPEN then :OPEN - when REQUIRES then :REQUIRES - when EXPORTS then :EXPORTS - when OPENS then :OPENS - when TO then :TO - when USES then :USES - when PROVIDES then :PROVIDES - when WITH then :WITH - when TRANSITIVE then :TRANSITIVE - when VAR then :VAR - when YIELD then :YIELD - when RECORD then :RECORD - when SEALED then :SEALED - when PERMITS then :PERMITS - when NON_SEALED then :NON_SEALED - else :IDENTIFIER - end + when ABSTRACT then :ABSTRACT + when ASSERT then :ASSERT + when BOOLEAN then :BOOLEAN + when BREAK then :BREAK + when BYTE then :BYTE + when CASE then :CASE + when CATCH then :CATCH + when CHAR then :CHAR + when CLASS then :CLASS + when CONST then :CONST + when CONTINUE then :CONTINUE + when DEFAULT then :DEFAULT + when DO then :DO + when DOUBLE then :DOUBLE + when ELSE then :ELSE + when ENUM then :ENUM + when EXTENDS then :EXTENDS + when FINAL then :FINAL + when FINALLY then :FINALLY + when FLOAT then :FLOAT + when FOR then :FOR + when IF then :IF + when GOTO then :GOTO + when IMPLEMENTS then :IMPLEMENTS + when IMPORT then :IMPORT + when INSTANCEOF then :INSTANCEOF + when INT then :INT + when INTERFACE then :INTERFACE + when LONG then :LONG + when NATIVE then :NATIVE + when NEW then :NEW + when PACKAGE then :PACKAGE + when PRIVATE then :PRIVATE + when PROTECTED then :PROTECTED + when PUBLIC then :PUBLIC + when RETURN then :RETURN + when SHORT then :SHORT + when STATIC then :STATIC + when STRICTFP then :STRICTFP + when SUPER then :SUPER + when SWITCH then :SWITCH + when SYNCHRONIZED then :SYNCHRONIZED + when THIS then :THIS + when THROW then :THROW + when THROWS then :THROWS + when TRANSIENT then :TRANSIENT + when TRY then :TRY + when VOID then :VOID + when VOLATILE then :VOLATILE + when WHILE then :WHILE + when MODULE then :MODULE + when OPEN then :OPEN + when REQUIRES then :REQUIRES + when EXPORTS then :EXPORTS + when OPENS then :OPENS + when TO then :TO + when USES then :USES + when PROVIDES then :PROVIDES + when WITH then :WITH + when TRANSITIVE then :TRANSITIVE + when VAR then :VAR + when YIELD then :YIELD + when RECORD then :RECORD + when SEALED then :SEALED + when PERMITS then :PERMITS + when NON_SEALED then :NON_SEALED + else :IDENTIFIER + end self << Tuple.new(type, match[0]) @position += match[0].size @@ -333,7 +328,6 @@ class JavaLexer < MiniLexer @position += 1 end end - def match_char_literal if match = @input.match(/'([^'\\\r\n]|\\['"\\bfnrt]|\\u[0-9a-fA-F]{4}|\\[^'\r\n])*'/, @position) @@ -345,14 +339,14 @@ class JavaLexer < MiniLexer @position += 1 end end - + def match_string_literal_or_text_block if match = @input.match(/"""[ \t]*[\r\n](.|\\["\\bfnrt])*?[\r\n][ \t]*"""/, @position) self << Tuple.new(:TEXT_BLOCK, match[0]) @position += match[0].size elsif match = @input.match(/"[^"\\\r\n]*(\\["\\bfnrt][^"\\\r\n]*)*"/, @position) self << Tuple.new(:STRING_LITERAL, match[0]) - @position += match[0].size + @position += match[0].size else # impossible to reach here self << Tuple.new(:IDENTIFIER, @input[@position].to_s) @@ -362,20 +356,22 @@ class JavaLexer < MiniLexer def match_symbol_or_operator case @input[@position] - when '(' then self << Tuple.new(:LPAREN, "(") - when ')' then self << Tuple.new(:RPAREN, ")") - when ' ' then self << Tuple.new(:WHITESPACE, " ") - when '.' then self << Tuple.new(:DOT, ".") - when ',' then self << Tuple.new(:COMMA, ",") - when '@' then self << Tuple.new(:AT, "@") - when '{' then self << Tuple.new(:LBRACE, "{") - when '}' then self << Tuple.new(:RBRACE, "}") + when '(' then self << Tuple.new(:LPAREN, "(") + when ')' then self << Tuple.new(:RPAREN, ")") + when ' ' then self << Tuple.new(:WHITESPACE, " ") + when '.' then self << Tuple.new(:DOT, ".") + when ',' then self << Tuple.new(:COMMA, ",") + when '@' then self << Tuple.new(:AT, "@") + when '{' then self << Tuple.new(:LBRACE, "{") + when '}' then self << Tuple.new(:RBRACE, "}") when '\t' then self << Tuple.new(:TAB, "\t") - when '\n' then + when ';' then self << Tuple.new(:SEMI, ";") + when '=' then self << Tuple.new(:ASSIGN, "=") + when '\n' self << Tuple.new(:NEWLINE, "\n") else self << Tuple.new(:IDENTIFIER, @input[@position].to_s) end @position += 1 end -end \ No newline at end of file +end diff --git a/src/miniparsers/java.cr b/src/miniparsers/java.cr index d95f96c4..e0001c05 100644 --- a/src/miniparsers/java.cr +++ b/src/miniparsers/java.cr @@ -2,22 +2,69 @@ require "../minilexers/java" require "../models/minilexer/token" class JavaParser - def initialize + property classes_tokens : Array(Array(Token)) + property classes : Array(ClassModel) + property tokens : Array(Token) + property import_statements : Array(String) + property path : String + + def initialize(@path : String, @tokens : Array(Token)) @import_statements = Array(String).new @classes_tokens = Array(Array(Token)).new @classes = Array(ClassModel).new + + parse() end - def parse(tokens : Array(Token)) - parse_import_statements(tokens) - parse_classes(tokens) + def parse + parse_import_statements(@tokens) + parse_classes(@tokens) @classes_tokens.each do |class_tokens| - @classes << ClassModel.new(self, class_tokens) + name = get_class_name(class_tokens) + methods = parse_methods(class_tokens) + annotations = parse_annotations(@tokens, class_tokens[0].index) + fields = parse_fields(class_tokens, methods, annotations) + @classes << ClassModel.new(annotations, name, fields, methods, class_tokens) end end + def get_root_source_directory(path : String, package_name : String) + i = 0 + path = Path.new(path).parent + while i < package_name.split(".").size + path = path.parent + i += 1 + end + + path + end + + def get_package_name(tokens : Array(Token)) + package_start = false + tokens.each_with_index do |token, index| + if token.type == :PACKAGE + package_start = true + i = index + 1 + package_name = "" + while i < tokens.size + if tokens[i].type != :SEMI + if tokens[i].type == :IDENTIFIER || tokens[i].type == :DOT + package_name += tokens[i].value + end + else + return package_name + end + + i += 1 + end + break + end + end + + "" + end + def parse_import_statements(tokens : Array(Token)) - import_statements = [] of String import_tokens = tokens.select { |token| token.type == :IMPORT } import_tokens.each do |import_token| next_token_index = import_token.index + 2 @@ -26,7 +73,7 @@ class JavaParser if next_token && next_token.type == :IDENTIFIER import_statement = next_token.value next_token_index += 1 - + while next_token_index < tokens.size && tokens[next_token_index].type == :DOT next_token_index += 1 identifier_token = tokens[next_token_index] @@ -41,13 +88,30 @@ class JavaParser end end - def parse_formal_parameters(tokens : Array(Token), cursor : Int32) + def parse_formal_parameters(tokens : Array(Token), param_start_index : Int32) lparen_count = 0 rparen_count = 0 lbrace_count = 0 - rbrace_count = 0 + rbrace_count = 0 parameters = Array(Array(Token)).new parameter_token = Array(Token).new + return parameters if tokens.size <= param_start_index + + while param_start_index < tokens.size + if tokens[param_start_index].type == :WHITESPACE + param_start_index += 1 + elsif tokens[param_start_index].type == :TAB + param_start_index += 1 + elsif tokens[param_start_index].type == :NEWLINE + param_start_index += 1 + elsif tokens[param_start_index].type == :LPAREN + break + else + return parameters + end + end + + cursor = param_start_index while cursor < tokens.size token = tokens[cursor] if token.type == :LPAREN @@ -86,23 +150,20 @@ class JavaParser parameters end - def parse_annotations(tokens : Array(Token), declare_token_index : Int32) - skip_line = 0 - annotation_tokens = Array(Array(Token)).new + def parse_annotations(tokens : Array(Token), declare_token_index : Int32) + skip_line = 0 + annotation_tokens = Hash(String, AnnotationModel).new cursor = declare_token_index - 1 - annotation_token_last_index = -1 last_newline_index = -1 while cursor > 0 - token = tokens[cursor] - if tokens[cursor].type == :NEWLINE skip_line += 1 if skip_line == 1 last_newline_index = cursor end end - + if skip_line == 2 # :NEWLINE(cursor) @RequestMapping # :NEWLINE public class Controller(type param) @@ -119,41 +180,43 @@ class JavaParser end if starts_with_at - annotation_tokens << tokens[annotation_token_index..last_newline_index-1] + annotation_name = tokens[annotation_token_index + 1].value + annotation_params = parse_formal_parameters(tokens, annotation_token_index + 2) + annotation_tokens[annotation_name] = AnnotationModel.new(annotation_name, annotation_params, tokens[annotation_token_index - 1..last_newline_index - 1]) skip_line = 1 last_newline_index = cursor else break end end - + cursor -= 1 end - return annotation_tokens + annotation_tokens end - + def parse_classes(tokens : Array(Token)) - start_token_parse = false + start_token_parse = false class_body = Array(Token).new lbrace = rbrace = 0 tokens.each do |token| - if !start_token_parse && token.type == :CLASS && tokens[token.index+1].type == :WHITESPACE + if !start_token_parse && token.type == :CLASS && tokens[token.index + 1].type == :WHITESPACE start_token_parse = true - class_body = Array(Token).new - lbrace = rbrace = 0 + class_body = Array(Token).new + lbrace = rbrace = 0 end - + if start_token_parse if token.type == :LBRACE lbrace += 1 elsif token.type == :RBRACE rbrace += 1 end - + class_body << token - if lbrace > 0 && lbrace == rbrace + if lbrace > 0 && lbrace == rbrace @classes_tokens << class_body start_token_parse = false end @@ -162,73 +225,212 @@ class JavaParser end def get_class_name(tokens : Array(Token)) - tokens.each_with_index do |token, index| + has_token = false + tokens.each do |token| if token.index != 0 - if tokens[index - 1].type == :CLASS + if token.type == :CLASS + has_token = true + elsif has_token && token.type == :IDENTIFIER return token.value end end end - return "" + "" + end + + def parse_fields(class_tokens : Array(Token), methods : Hash(String, MethodModel), annotations : Hash(String, AnnotationModel)) + fields = Hash(String, FieldModel).new + class_body_start = false + + lbrace = 0 + rbrace = 0 + semi_indexs = Array(Int32).new + class_tokens.each_with_index do |token, index| + if token.type == :LBRACE + lbrace += 1 + elsif token.type == :RBRACE + rbrace += 1 + end + + if lbrace == rbrace + 1 + class_body_start = true + elsif class_body_start && lbrace == rbrace + break + end + + if class_body_start && token.type == :SEMI && class_tokens[index + 1].type == :NEWLINE + semi_indexs << index + end + end + + semi_indexs.each do |semi_index| + is_method_token = false + methods.values.each do |method| + method_start = method.@tokens[0].index + method_end = method.@tokens[-1].index + is_method_token = method_start <= semi_index && semi_index <= method_end + end + + if !is_method_token + assign_index = nil + field_name = "" + field_index = semi_index + while 0 < field_index + field_index -= 1 + token = class_tokens[field_index] + if token.type == :ASSIGN + assign_index = field_index + elsif token.type == :NEWLINE + # [access_modifier] [static] [final] type name [= initial value] ; + + if assign_index.nil? + field_name = class_tokens[semi_index - 1].value + else + field_name = class_tokens[assign_index - 1].value + end + + line_tokens = Array(Token).new + class_tokens[field_index + 1..semi_index - 1].each do |line_token| + next if line_token.type == :WHITESPACE || line_token.type == :TAB + line_tokens << line_token + end + + step = 0 + next if line_tokens.size == step + + is_static = false + is_final = false + modifier = "default" + if [:PRIVATE, :PUBLIC, :PROTECTED, :DEFAULT].index(line_tokens[step].type) + modifier = line_tokens[0].value + step += 1 + next if line_tokens.size == step + end + + if line_tokens[step].type == :STATIC + is_static = true + step += 1 + next if line_tokens.size == step + end + + if line_tokens[step].type == :FINAL + is_final = true + step += 1 + next if line_tokens.size == step + end + + # Only support common variable types + if ["int", "integer", "long", "string", "char", "boolean"].index(line_tokens[step].value.downcase) + field_type = line_tokens[step].value + field_name = line_tokens[step + 1].value + init_value = "" + if step + 3 < line_tokens.size && line_tokens[step + 2].type == :ASSIGN + line_tokens[step + 3..semi_index - 1].each do |init_token| + init_value += init_token.value # [TODO] currently support literal value only + end + end + + field = FieldModel.new(modifier, is_static, is_final, field_type, field_name, init_value) + + # getter, setter method + has_getter = false + has_setter = false + pascal_field_name = field_name[0].upcase + field_name[1..] + if methods.has_key?("get" + pascal_field_name) + has_getter = true + end + if methods.has_key?("set" + pascal_field_name) + has_setter = true + end + + # lombok annotaitons + if annotations.has_key?("Data") + has_getter = true + has_setter = true + else + has_getter = has_getter || annotations.has_key?("Getter") + has_setter = has_setter || annotations.has_key?("Setter") + end + + field.has_getter = has_getter + field.has_setter = has_setter + fields.put_if_absent(field.name, field) + end + + break + end + end + end + end + + fields end - def parse_methods(class_body_tokens : Array(Token)) + def parse_methods(class_tokens : Array(Token)) # 1. Skip first line (class declaration) # 2. Search ":RPAREN :LBRACE" or ":RPAREN throws :IDENTIFIER :LBRACE" pattern (method body entry point) # 3. Get method declaration from ":NEWLINE" to ":RPAREN" (method declaration) # 4. Get method body from ":LBRACE" to ":RBRACE" (method body) # 5. Repeat 2-4 until end of class body - methods = Array(Array(Token)).new + methods = Hash(String, MethodModel).new method_tokens = Array(Token).new - lbrace_count = rbrace_count = 0 + lbrace_count = rbrace_count = 0 lparen_count = rparen_count = 0 + method_name = nil enter_class_body = false enter_method_body = false - class_body_tokens.each_index do |index| - token = class_body_tokens[index] + method_name_index = -1 + class_tokens.each_index do |index| + token = class_tokens[index] if token.type == :NEWLINE && !enter_class_body # 1. Skip first line (class declaration) enter_class_body = true - elsif enter_class_body && !enter_method_body - lbrace_count = rbrace_count = 0 + elsif enter_class_body && !enter_method_body + lbrace_count = rbrace_count = 0 lparen_count = rparen_count = 0 if token.type == :LBRACE # 2. Search ":RPAREN :LBRACE" or ":RPAREN throws :IDENTIFIER :LBRACE" pattern (method body entry point) - lbrace_count = 1 + lbrace_count = 1 rbrace_count = 0 - lparen_count = rparen_count = 0 - + lparen_count = rparen_count = 0 + previous_token_index = index - 1 has_exception = false while 0 < previous_token_index - previous_token = class_body_tokens[previous_token_index] + previous_token = class_tokens[previous_token_index] if previous_token.type == :RPAREN rparen_count = 1 enter_method_body = true + # 3. Get method declaration from ":NEWLINE" to ":RPAREN" (method declaration) - method_declaration_index = previous_token_index - 1 - while 0 < method_declaration_index - method_declaration_token = class_body_tokens[method_declaration_index] + i = previous_token_index - 1 + while 0 < i + method_declaration_token = class_tokens[i] if method_declaration_token.type == :RPAREN - rparen_count += 1 + rparen_count += 1 elsif method_declaration_token.type == :LPAREN lparen_count += 1 - elsif rparen_count == lparen_count && method_declaration_token.type == :NEWLINE - method_tokens = class_body_tokens[method_declaration_index+1..index] - break - end - method_declaration_index -= 1 + elsif rparen_count == lparen_count + if method_name == nil && method_declaration_token.type == :IDENTIFIER + method_name = method_declaration_token.value + method_name_index = i + elsif method_declaration_token.type == :NEWLINE + method_tokens = class_tokens[i + 1..index] + break + end + end + i -= 1 end break - elsif previous_token.type == :WHITESPACE || previous_token.type == :TAB || previous_token.type == :NEWLINE + elsif previous_token.type == :WHITESPACE || previous_token.type == :TAB || previous_token.type == :NEWLINE previous_token_index -= 1 next elsif has_exception - break unless previous_token.type == :THROWS && previous_token.value == "throws" + break unless previous_token.type == :THROWS && previous_token.value == "throws" elsif previous_token.type == :IDENTIFIER has_exception = true else @@ -244,9 +446,15 @@ class JavaParser if token.type == :RBRACE rbrace_count += 1 if lbrace_count == rbrace_count - methods << method_tokens + annotations = parse_annotations(class_tokens, method_name_index) + if !method_name.nil? + methods[method_name] = MethodModel.new(method_name, parse_formal_parameters(class_tokens, method_name_index + 1), annotations, method_tokens) + end + + # reset method_tokens = Array(Token).new enter_method_body = false + method_name = nil end elsif token.type == :LBRACE lbrace_count += 1 @@ -256,8 +464,8 @@ class JavaParser methods end - - def print_tokens(tokens : Array(Token), id = "default") + + def print_tokens(tokens : Array(Token), id = "default", trace = false) puts("\n================ #{id} ===================") tokens.each do |token| print(token.value) @@ -265,26 +473,92 @@ class JavaParser print("(#{token.type})") end end - puts + + if trace + puts "" + tokens.each do |token| + print("#{token.value}(#{token.type})") + end + end + end +end + +class AnnotationModel + property name : String + property params : Array(Array(Token)) + property tokens : Array(Token) + + def initialize(@name : String, @params : Array(Array(Token)), @tokens : Array(Token)) end end class ClassModel - @parser : JavaParser - @name : String - @methods : Array(Array(Token)) - @tokens : Array(Token) - - def initialize(@parser : JavaParser, @tokens : Array(Token)) - @name = @parser.get_class_name(@tokens) - @methods = @parser.parse_methods(@tokens) + property name : String + property methods : Hash(String, MethodModel) + property fields : Hash(String, FieldModel) + property annotations : Hash(String, AnnotationModel) + property tokens : Array(Token) + + def initialize(@annotations, @name, @fields, @methods, @tokens : Array(Token)) + end +end + +class MethodModel + property name : String + property params : Array(Array(Token)) + property annotations : Hash(String, AnnotationModel) + property tokens : Array(Token) + + def initialize(@name, @params, @annotations, @tokens) end end -# file_path = "/Users/ksg/workspace/noir/spec/functional_test/fixtures/java_spring/src/ItemController.java" -# input = File.read(file_path) -# lexer = JavaLexer.new -# tokens = lexer.tokenize(input) -# lexer.trace -# parser = JavaParser.new -# parser.parse(tokens) +class FieldModel + property access_modifier : String + property? is_static : Bool + property? is_final : Bool + property type : String + property name : String + property init_value : String + property? has_getter : Bool + property? has_setter : Bool + + def initialize(@access_modifier, @is_static, @is_final, @type, @name, @init_value) + # [access_modifier] [static] [final] type name [= initial value] ; + @has_getter = false + @has_setter = false + end + + def has_getter=(value : Bool) + @has_getter = value + end + + def has_setter=(value : Bool) + @has_setter = value + end + + def to_s + l = @access_modifier + " " + if @is_static + l += "static " + end + + if @is_final + l += "final " + end + + l += "#{@type} #{@name}" + if @init_value != "" + l += " = \"#{@init_value}\"" + end + + if @has_getter + l += " (has_getter)" + end + if @has_setter + l += " (has_setter)" + end + + l + end +end diff --git a/src/models/endpoint.cr b/src/models/endpoint.cr index f104b2e1..a3838281 100644 --- a/src/models/endpoint.cr +++ b/src/models/endpoint.cr @@ -79,6 +79,12 @@ struct Details def add_path(code_path : PathInfo) @code_paths << code_path end + + def ==(other : Details) : Bool + return false if @code_paths.size != other.code_paths.size + return false unless @code_paths.all? { |path| other.code_paths.any? { |other_path| path == other_path } } + true + end end struct PathInfo @@ -92,6 +98,10 @@ struct PathInfo def initialize(@path : String, @line : Int32 | Nil) end + + def ==(other : PathInfo) : Bool + @path == other.path && @line == other.line + end end struct EndpointReference diff --git a/src/models/minilexer/minilexer.cr b/src/models/minilexer/minilexer.cr index 603e9acd..e8c1b02b 100644 --- a/src/models/minilexer/minilexer.cr +++ b/src/models/minilexer/minilexer.cr @@ -14,7 +14,7 @@ class MiniLexer @mode = mode end - def line() : Int + def line : Int pos_index = 0 line_index = 1 i = @pos_line_array.size - 1 @@ -22,18 +22,18 @@ class MiniLexer pos = @pos_line_array[i][pos_index] line = @pos_line_array[i][line_index] if pos < @position - return line + @input[pos+1..@position].count("\n") + return line + @input[pos + 1..@position].count("\n") end i -= 1 end - line = @input[0..@position].count("\n") + 1 - @pos_line_array << Tuple.new(@position, line) + line = @input[0..@position].count("\n") + 1 + @pos_line_array << Tuple.new(@position, line) - line + line end - def <<(t : Tuple(Symbol, String)) + def <<(t : Tuple(Symbol, String)) @tokens << Token.new(t[0], t[1], @tokens.size, @position, line()) end @@ -56,14 +56,14 @@ class MiniLexer @tokens.select { |token| token.type == token_type } end - def trace() + def trace line_number = 1 source_line = "" lines = @input.split "\n" puts "line size: #{lines.size}, token number: #{tokens.size}" - @tokens.each do |token| + @tokens.each do |token| if token.line == line_number - puts "\nLine #{line_number}: " + lines[line_number-1] + puts "\nLine #{line_number}: " + lines[line_number - 1] line_number += 1 end puts token.to_s diff --git a/src/models/minilexer/token.cr b/src/models/minilexer/token.cr index 3cf790bf..f3d6a79c 100644 --- a/src/models/minilexer/token.cr +++ b/src/models/minilexer/token.cr @@ -3,7 +3,7 @@ class Token property value : String property index : Int32 property position : Int32 - property line : Int32 + property line : Int32 def initialize(@type, @value, @index, @position, @line) end diff --git a/src/models/noir.cr b/src/models/noir.cr index 16453d22..fb0137b4 100644 --- a/src/models/noir.cr +++ b/src/models/noir.cr @@ -118,7 +118,10 @@ class NoirRunner if dup.method == tiny_tmp.method && dup.url == tiny_tmp.url is_new = false tiny_tmp.params.each do |param| - dup.params << param + existing_param = dup.params.find { |p| p.name == param.name } + unless existing_param + dup.params << param + end end end end From 81e9165f6131fdf0c18a87838b0b5146575af6e4 Mon Sep 17 00:00:00 2001 From: ksg Date: Tue, 5 Mar 2024 03:10:34 +0900 Subject: [PATCH 7/8] Remove whitespace trails --- src/analyzer/analyzers/analyzer_java_spring.cr | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/analyzer/analyzers/analyzer_java_spring.cr b/src/analyzer/analyzers/analyzer_java_spring.cr index 3a83b75b..2f9813c6 100644 --- a/src/analyzer/analyzers/analyzer_java_spring.cr +++ b/src/analyzer/analyzers/analyzer_java_spring.cr @@ -14,11 +14,11 @@ class AnalyzerJavaSpring < Analyzer url = "" if File.exists?(path) && path.ends_with?(".java") content = File.read(path, encoding: "utf-8", invalid: :skip) - + # Spring MVC Router (Controller) spring_web_bind_package = "org.springframework.web.bind.annotation." has_spring_web_bind_package_been_import = content.includes?(spring_web_bind_package) - if has_spring_web_bind_package_been_import + if has_spring_web_bind_package_been_import if parser_map.has_key?(path) parser = parser_map[path] tokens = parser.tokens @@ -93,13 +93,13 @@ class AnalyzerJavaSpring < Analyzer end package_map[package_directory] = package_class_map - end + end end - class_map = package_class_map.merge(import_map) + class_map = package_class_map.merge(import_map) parser.classes.each do |class_model| - class_annotation = class_model.annotations["RequestMapping"]? - if !class_annotation.nil? + class_annotation = class_model.annotations["RequestMapping"]? + if !class_annotation.nil? next if class_annotation.params.size == 0 class_path_token = class_annotation.params[0][-1] if class_path_token.type == :STRING_LITERAL From 49c0bad712ab71487f46b5347e3968ad0cd1cba6 Mon Sep 17 00:00:00 2001 From: ksg Date: Tue, 5 Mar 2024 21:58:13 +0900 Subject: [PATCH 8/8] feat: Improve parsing of Java Spring MVC code blocks and endpoints - Added support for RequestHeader annotation in endpoint parameters - Added support for HttpServletRequest argument in endpoint parameters --- .../java_spring/src/HttpServletRequest.java | 8 ++- .../java_spring/src/RequestParam.java | 3 +- .../testers/java_spring_spec.cr | 3 + .../analyzers/analyzer_java_spring.cr | 61 +++++++++++++++---- src/miniparsers/java.cr | 12 ++-- 5 files changed, 70 insertions(+), 17 deletions(-) diff --git a/spec/functional_test/fixtures/java_spring/src/HttpServletRequest.java b/spec/functional_test/fixtures/java_spring/src/HttpServletRequest.java index e66e3f42..bf25b963 100644 --- a/spec/functional_test/fixtures/java_spring/src/HttpServletRequest.java +++ b/spec/functional_test/fixtures/java_spring/src/HttpServletRequest.java @@ -1,3 +1,4 @@ +package com.test; import javax.servlet.http.HttpServletRequest; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RestController; @@ -11,6 +12,11 @@ public String greet(HttpServletRequest request) { if (name == null || name.isEmpty()) { name = "World"; } - return "Hello, " + name + "!"; + + String header = request.getHeader("header"); + if (header == null || header.isEmpty()) { + header = "!"; + } + return "Hello, " + name + header; } } diff --git a/spec/functional_test/fixtures/java_spring/src/RequestParam.java b/spec/functional_test/fixtures/java_spring/src/RequestParam.java index de3f1de6..83dfc19e 100644 --- a/spec/functional_test/fixtures/java_spring/src/RequestParam.java +++ b/spec/functional_test/fixtures/java_spring/src/RequestParam.java @@ -1,3 +1,4 @@ +package com.test; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; @@ -6,7 +7,7 @@ public class MyController { @GetMapping("/greet2") - public String greet2(@RequestParam("myname") String a, @RequestParam("b") int b) String name) { + public String greet2(@RequestParam("myname") String a, @RequestParam("b") int b, String name) { return "Hello, " + a + b"!"; } } diff --git a/spec/functional_test/testers/java_spring_spec.cr b/spec/functional_test/testers/java_spring_spec.cr index 35482c20..04cfa8ed 100644 --- a/spec/functional_test/testers/java_spring_spec.cr +++ b/spec/functional_test/testers/java_spring_spec.cr @@ -21,9 +21,12 @@ extected_endpoints = [ Endpoint.new("/items/delete/{id}", "DELETE"), Endpoint.new("/greet", "GET", [ Param.new("name", "", "query"), + Param.new("header", "", "header"), ]), Endpoint.new("/greet2", "GET", [ Param.new("myname", "", "query"), + Param.new("b", "", "query"), + Param.new("name", "", "query"), ]), ] diff --git a/src/analyzer/analyzers/analyzer_java_spring.cr b/src/analyzer/analyzers/analyzer_java_spring.cr index 2f9813c6..c1e2d6df 100644 --- a/src/analyzer/analyzers/analyzer_java_spring.cr +++ b/src/analyzer/analyzers/analyzer_java_spring.cr @@ -49,7 +49,7 @@ class AnalyzerJavaSpring < Analyzer end _parser.classes.each do |package_class| - import_map.put_if_absent(package_class.name, package_class) + import_map[package_class.name] = package_class end end end @@ -61,12 +61,12 @@ class AnalyzerJavaSpring < Analyzer _parser = get_parser(source_path, _content) parser_map[source_path.to_s] = _parser _parser.classes.each do |package_class| - import_map.put_if_absent(package_class.name, package_class) + import_map[package_class.name] = package_class end else _parser = parser_map[source_path.to_s] _parser.classes.each do |package_class| - import_map.put_if_absent(package_class.name, package_class) + import_map[package_class.name] = package_class end end end @@ -85,11 +85,11 @@ class AnalyzerJavaSpring < Analyzer end _parser.classes.each do |package_class| - package_class_map.put_if_absent(package_class.name, package_class) + package_class_map[package_class.name] = package_class end parser.classes.each do |package_class| - package_class_map.put_if_absent(package_class.name, package_class) + package_class_map[package_class.name] = package_class end package_map[package_directory] = package_class_map @@ -147,14 +147,14 @@ class AnalyzerJavaSpring < Analyzer if request_method.nil? # If the method is not annotated with @RequestMapping, then 5 methods are allowed ["GET", "POST", "PUT", "DELETE", "PATCH"].each do |_request_method| - parameters = get_endpoint_parameters(parser, _request_method, method.params, parameter_format, class_map) + parameters = get_endpoint_parameters(parser, _request_method, method, parameter_format, class_map) url_paths.each do |url_path| @result << Endpoint.new("#{url}#{url_path}", _request_method, parameters, details) end end else url_paths.each do |url_path| - parameters = get_endpoint_parameters(parser, request_method, method.params, parameter_format, class_map) + parameters = get_endpoint_parameters(parser, request_method, method, parameter_format, class_map) @result << Endpoint.new("#{url}#{url_path}", request_method, parameters, details) end end @@ -168,7 +168,7 @@ class AnalyzerJavaSpring < Analyzer if parameter_format.nil? && request_method == "POST" parameter_format = "form" end - parameters = get_endpoint_parameters(parser, request_method, method.params, parameter_format, class_map) + parameters = get_endpoint_parameters(parser, request_method, method, parameter_format, class_map) url_paths = [""] if method_annotation.params.size > 0 @@ -248,10 +248,9 @@ class AnalyzerJavaSpring < Analyzer url_paths end - def get_endpoint_parameters(parser : JavaParser, request_method : String, method_params : Array(Array(Token)), parameter_format : String | Nil, package_class_map : Hash(String, ClassModel)) : Array(Param) + def get_endpoint_parameters(parser : JavaParser, request_method : String, method : MethodModel, parameter_format : String | Nil, package_class_map : Hash(String, ClassModel)) : Array(Param) endpoint_parameters = Array(Param).new - - method_params.each do |method_param_tokens| + method.params.each do |method_param_tokens| next if method_param_tokens.size == 0 if method_param_tokens[-1].type == :IDENTIFIER if method_param_tokens[0].type == :AT @@ -263,6 +262,8 @@ class AnalyzerJavaSpring < Analyzer end elsif method_param_tokens[1].value == "RequestParam" parameter_format = "query" + elsif method_param_tokens[1].value == "RequestHeader" + parameter_format = "header" end end @@ -298,10 +299,48 @@ class AnalyzerJavaSpring < Analyzer end end + argument_name = method_param_tokens[-1].value parameter_type = method_param_tokens[-2].value if ["long", "int", "integer", "char", "boolean", "string", "multipartfile"].index(parameter_type.downcase) param_default_value = default_value.nil? ? "" : default_value endpoint_parameters << Param.new(parameter_name, param_default_value, parameter_format) + elsif parameter_type == "HttpServletRequest" + i = 0 + while i < method.body.size - 6 + if [:TAB, :WHITESPACE, :NEWLINE].index(method.body[i].type) + i += 1 + next + end + + next if method.body[i].type == :WHITESPACE + next if method.body[i].type == :NEWLINE + + if method.body[i].type == :IDENTIFIER && method.body[i].value == argument_name + if method.body[i + 1].type == :DOT + if method.body[i + 2].type == :IDENTIFIER && method.body[i + 3].type == :LPAREN + servlet_request_method_name = method.body[i + 2].value + if method.body[i + 4].type == :STRING_LITERAL + parameter_name = method.body[i + 4].value[1..-2] + if servlet_request_method_name == "getParameter" + unless endpoint_parameters.any? { |param| param.name == parameter_name } + endpoint_parameters << Param.new(parameter_name, "", parameter_format) + end + i += 6 + next + elsif servlet_request_method_name == "getHeader" + unless endpoint_parameters.any? { |param| param.name == parameter_name } + endpoint_parameters << Param.new(parameter_name, "", "header") + end + i += 6 + next + end + end + end + end + end + + i += 1 + end else # custom class" if package_class_map.has_key?(parameter_type) diff --git a/src/miniparsers/java.cr b/src/miniparsers/java.cr index e0001c05..6f1fbe27 100644 --- a/src/miniparsers/java.cr +++ b/src/miniparsers/java.cr @@ -355,7 +355,7 @@ class JavaParser field.has_getter = has_getter field.has_setter = has_setter - fields.put_if_absent(field.name, field) + fields[field.name] = field end break @@ -383,6 +383,7 @@ class JavaParser enter_class_body = false enter_method_body = false method_name_index = -1 + method_body_index = -1 class_tokens.each_index do |index| token = class_tokens[index] if token.type == :NEWLINE && !enter_class_body @@ -396,7 +397,7 @@ class JavaParser lbrace_count = 1 rbrace_count = 0 lparen_count = rparen_count = 0 - + method_body_index = index previous_token_index = index - 1 has_exception = false while 0 < previous_token_index @@ -448,7 +449,9 @@ class JavaParser if lbrace_count == rbrace_count annotations = parse_annotations(class_tokens, method_name_index) if !method_name.nil? - methods[method_name] = MethodModel.new(method_name, parse_formal_parameters(class_tokens, method_name_index + 1), annotations, method_tokens) + method_params = parse_formal_parameters(class_tokens, method_name_index + 1) + method_body = class_tokens[method_body_index + 1..index - 1] + methods[method_name] = MethodModel.new(method_name, method_params, annotations, method_tokens, method_body) end # reset @@ -508,8 +511,9 @@ class MethodModel property params : Array(Array(Token)) property annotations : Hash(String, AnnotationModel) property tokens : Array(Token) + property body : Array(Token) - def initialize(@name, @params, @annotations, @tokens) + def initialize(@name, @params, @annotations, @tokens, @body) end end