diff --git a/src/Carbunql.LexicalAnalyzer/Lexer.Comment.cs b/src/Carbunql.LexicalAnalyzer/Lexer.Comment.cs index b3edb19e..c542236a 100644 --- a/src/Carbunql.LexicalAnalyzer/Lexer.Comment.cs +++ b/src/Carbunql.LexicalAnalyzer/Lexer.Comment.cs @@ -38,28 +38,28 @@ private static bool TryParseCommentStartLex(ReadOnlyMemory memory, ref int return false; } - /// - /// Parses and removes comments until a non-comment Lex is reached, starting from the specified non-comment state. - /// - /// The string to be parsed. - /// The previous Lex indicating a non-comment state, or null if no previous state exists. - /// An enumeration of Lexes after comments have been removed. - public static IEnumerable ParseUntilNonComment(ReadOnlyMemory memory, Lex? previous = null) - { - // Invalid if the previous Lex is in a comment state - if (previous?.Type == LexType.LineCommentStart - || previous?.Type == LexType.BlockCommentStart - || previous?.Type == LexType.HitCommentStart - || previous?.Type == LexType.Comment) - { - throw new InvalidOperationException("Previous Lex must be in a non-comment state."); - } - - // Start position is 0 if previous is null - int position = previous?.EndPosition ?? 0; - - return ParseUntilNonComment(memory, position); - } + ///// + ///// Parses and removes comments until a non-comment Lex is reached, starting from the specified non-comment state. + ///// + ///// The string to be parsed. + ///// The previous Lex indicating a non-comment state, or null if no previous state exists. + ///// An enumeration of Lexes after comments have been removed. + //public static IEnumerable ParseUntilNonComment(ReadOnlyMemory memory, Lex? previous = null) + //{ + // // Invalid if the previous Lex is in a comment state + // if (previous?.Type == LexType.LineCommentStart + // || previous?.Type == LexType.BlockCommentStart + // || previous?.Type == LexType.HitCommentStart + // || previous?.Type == LexType.Comment) + // { + // throw new InvalidOperationException("Previous Lex must be in a non-comment state."); + // } + + // // Start position is 0 if previous is null + // int position = previous?.EndPosition ?? 0; + + // return ParseUntilNonComment(memory, position); + //} public static IEnumerable ParseUntilNonComment(ReadOnlyMemory memory, int position) { @@ -105,8 +105,7 @@ private static bool TryParseBlockCommentStart(ReadOnlyMemory memory, ref i { lex = default; - // Must be at least 4 characters (minimum comment /**/) - if (memory.HasFewerThanChars(position, 4)) + if (memory.HasFewerThanChars(position, 2)) { return false; } @@ -116,7 +115,7 @@ private static bool TryParseBlockCommentStart(ReadOnlyMemory memory, ref i if (memory.Span[position] == '/' && memory.Span[position + 1] == '*') { // Check for /*+ - if (memory.Span[position + 2] == '+') + if (memory.HasChar(position + 2) && memory.Span[position + 2] == '+') { position += 3; lex = new Lex(memory, LexType.HitCommentStart, start, position - start); @@ -152,7 +151,7 @@ private static Lex ParseLineComment(ReadOnlyMemory memory, ref int positio var start = position; // exclude line comment end symbol - while (position < memory.Length) + while (!memory.IsAtEnd(position)) { char current = memory.Span[position]; diff --git a/src/Carbunql.LexicalAnalyzer/Lexer.Paren.cs b/src/Carbunql.LexicalAnalyzer/Lexer.Paren.cs new file mode 100644 index 00000000..37df3a10 --- /dev/null +++ b/src/Carbunql.LexicalAnalyzer/Lexer.Paren.cs @@ -0,0 +1,18 @@ +using System.Diagnostics.CodeAnalysis; + +namespace Carbunql.LexicalAnalyzer; + +public static partial class Lexer +{ + [MemberNotNullWhen(true)] + private static bool TryParseLeftParen(ReadOnlyMemory memory, ref int position, out Lex lex) + { + return TryParseSingleCharLex(memory, ref position, '(', LexType.LeftParen, out lex); + } + + private static Lex ParseRightParen(ReadOnlyMemory memory, ref int position) + { + if (TryParseSingleCharLex(memory, ref position, ')', LexType.RightParen, out var lex)) return lex; + throw new FormatException($"Expected a closing parenthesis ')' at position {position} in the input string."); + } +} diff --git a/src/Carbunql.LexicalAnalyzer/Lexer.Value.cs b/src/Carbunql.LexicalAnalyzer/Lexer.Value.cs index 3b50f944..6c6d09ef 100644 --- a/src/Carbunql.LexicalAnalyzer/Lexer.Value.cs +++ b/src/Carbunql.LexicalAnalyzer/Lexer.Value.cs @@ -21,70 +21,70 @@ private static bool TryParseWildCard(ReadOnlyMemory memory, ref int positi return TryParseSingleCharLex(memory, ref position, '*', LexType.WildCard, out lex); } - [MemberNotNullWhen(true)] - public static bool TryParseLetter(ReadOnlyMemory memory, ref int position, out Lex lex) - { - lex = default; + //[MemberNotNullWhen(true)] + //public static bool TryParseLetter(ReadOnlyMemory memory, ref int position, out Lex lex) + //{ + // lex = default; - if (memory.Length < position + 1) - { - return false; - } + // if (memory.Length < position + 1) + // { + // return false; + // } - int start = position; + // int start = position; - while (position < memory.Length) - { - char current = memory.Span[position]; + // while (position < memory.Length) + // { + // char current = memory.Span[position]; - if (char.IsLetter(current) || current == '_') - { - position++; - continue; - } + // if (char.IsLetter(current) || current == '_') + // { + // position++; + // continue; + // } - break; - } + // break; + // } - // If no letter were found - if (start == position) - { - return false; - } + // // If no letter were found + // if (start == position) + // { + // return false; + // } - //check next lex - int tempPos = position; - SkipWhiteSpaces(memory, ref tempPos); + // //check next lex + // int tempPos = position; + // SkipWhiteSpaces(memory, ref tempPos); - LexType lexType; - if (tempPos < memory.Length) - { - char nextChar = memory.Span[tempPos]; - if (nextChar == '.') - { - lexType = LexType.SchemaOrTableOrColumn; - } - else if (nextChar == ',' || char.IsWhiteSpace(nextChar)) - { - lexType = LexType.Column; - } - else if (nextChar == '(') - { - lexType = LexType.Function; - } - else - { - lexType = LexType.Value; - } - } - else - { - lexType = LexType.Value; - } + // LexType lexType; + // if (tempPos < memory.Length) + // { + // char nextChar = memory.Span[tempPos]; + // if (nextChar == '.') + // { + // lexType = LexType.SchemaOrTableOrColumn; + // } + // else if (nextChar == ',' || char.IsWhiteSpace(nextChar)) + // { + // lexType = LexType.Column; + // } + // else if (nextChar == '(') + // { + // lexType = LexType.Function; + // } + // else + // { + // lexType = LexType.Value; + // } + // } + // else + // { + // lexType = LexType.Value; + // } - lex = new Lex(memory, lexType, start, position - start); - return true; - } + // lex = new Lex(memory, lexType, start, position - start); + // return true; + //} //[MemberNotNullWhen(true)] //public static bool TryParseLetterValueLex(ReadOnlyMemory memory, ref int position, out Lex lex) diff --git a/src/Carbunql.LexicalAnalyzer/Lexer.cs b/src/Carbunql.LexicalAnalyzer/Lexer.cs index 4c40ac81..43d2eca2 100644 --- a/src/Carbunql.LexicalAnalyzer/Lexer.cs +++ b/src/Carbunql.LexicalAnalyzer/Lexer.cs @@ -14,25 +14,25 @@ public static IEnumerable ReadExpressionLexes(ReadOnlyMemory memory, yield break; } - //first charator "*" is all column wild card. Lex lex; - if (TryParseWildCard(memory, ref position, out lex)) - { - yield return lex; - yield break; - } - while (!memory.IsAtEnd(position)) { + // wildcard + if (TryParseWildCard(memory, ref position, out lex)) + { + yield return lex; + break; + } + // value if (TryParseSingleQuotedText(memory, ref position, out lex) || TryParseNumericValue(memory, ref position, out lex) || TryParseSpecialValue(memory, ref position, out lex)) { yield return lex; - SkipWhiteSpacesAndComment(memory, ref position); - // operator + // operator check + SkipWhiteSpacesAndComment(memory, ref position); if (TryParseOperator(memory, ref position, out lex)) { yield return lex; @@ -64,9 +64,9 @@ public static IEnumerable ReadExpressionLexes(ReadOnlyMemory memory, throw new FormatException(); } yield return lex; - SkipWhiteSpacesAndComment(memory, ref position); - // operator + // operator check + SkipWhiteSpacesAndComment(memory, ref position); if (TryParseOperator(memory, ref position, out lex)) { yield return lex; @@ -80,64 +80,86 @@ public static IEnumerable ReadExpressionLexes(ReadOnlyMemory memory, if (TryParseLeftParen(memory, ref position, out lex)) { - // 左カッコが出現する可能性がある - // SELECT句が来る可能性がある - // あとは同じ + yield return lex; + SkipWhiteSpacesAndComment(memory, ref position); + + // 次のトークンがselectの場合、インラインクエリのため特殊 + + // それ以外は再帰処理 + foreach (var innerLex in ReadExpressionLexes(memory, position)) + { + yield return innerLex; + position = innerLex.EndPosition; + } + + lex = ParseRightParen(memory, ref position); + yield return lex; + + // operator check + SkipWhiteSpacesAndComment(memory, ref position); + if (TryParseOperator(memory, ref position, out lex)) + { + yield return lex; + SkipWhiteSpacesAndComment(memory, ref position); + continue; + } + + // alias, expression separator, or 'from' keyword + break; } } } + //public static Lex TokenizeAsQueryStart(ReadOnlyMemory memory) + //{ + // int position = 0; - public static Lex TokenizeAsQueryStart(ReadOnlyMemory memory) - { - int position = 0; + // SkipWhiteSpaces(memory, ref position); - SkipWhiteSpaces(memory, ref position); + // // Discard all comments before the query starts + // position = ParseUntilNonComment(memory, previous: null).LastOrDefault().EndPosition; + // SkipWhiteSpaces(memory, ref position); - // Discard all comments before the query starts - position = ParseUntilNonComment(memory, previous: null).LastOrDefault().EndPosition; - SkipWhiteSpaces(memory, ref position); + // if (memory.Length < position + 1) + // { + // throw new FormatException("The SQL string is empty or in an invalid format."); + // } - if (memory.Length < position + 1) - { - throw new FormatException("The SQL string is empty or in an invalid format."); - } + // // The first character must be a comment start or a reserved word; otherwise, it's an error. + // Lex lex; + // if (TryParseWithOrRecursiveLex(memory, ref position, out lex)) return lex; + // if (TryParseSelectLex(memory, ref position, out lex)) return lex; + // if (TryParseInsertLex(memory, ref position, out lex)) return lex; + // if (TryParseDeleteLex(memory, ref position, out lex)) return lex; + // if (TryParseUpdateLex(memory, ref position, out lex)) return lex; + // if (TryParseMergeLex(memory, ref position, out lex)) return lex; + // if (TryParseCreateLex(memory, ref position, out lex)) return lex; + // if (TryParseAlterLex(memory, ref position, out lex)) return lex; - // The first character must be a comment start or a reserved word; otherwise, it's an error. - Lex lex; - if (TryParseWithOrRecursiveLex(memory, ref position, out lex)) return lex; - if (TryParseSelectLex(memory, ref position, out lex)) return lex; - if (TryParseInsertLex(memory, ref position, out lex)) return lex; - if (TryParseDeleteLex(memory, ref position, out lex)) return lex; - if (TryParseUpdateLex(memory, ref position, out lex)) return lex; - if (TryParseMergeLex(memory, ref position, out lex)) return lex; - if (TryParseCreateLex(memory, ref position, out lex)) return lex; - if (TryParseAlterLex(memory, ref position, out lex)) return lex; - - throw new FormatException("An invalid token was encountered. Please check if the SQL statement is correct."); - } + // throw new FormatException("An invalid token was encountered. Please check if the SQL statement is correct."); + //} - public static Lex TokenizeIdentifier(ReadOnlyMemory memory, int position) - { - SkipWhiteSpaces(memory, ref position); + //public static Lex TokenizeIdentifier(ReadOnlyMemory memory, int position) + //{ + // SkipWhiteSpaces(memory, ref position); - if (memory.Length < position + 1) - { - throw new FormatException("The SQL string is empty or in an invalid format."); - } + // if (memory.Length < position + 1) + // { + // throw new FormatException("The SQL string is empty or in an invalid format."); + // } - // Assume some identifier can be retrieved - // Separators like commas or dots are not expected - Lex lex; - if (TryParseCommentStartLex(memory, ref position, out lex)) return lex; + // // Assume some identifier can be retrieved + // // Separators like commas or dots are not expected + // Lex lex; + // if (TryParseCommentStartLex(memory, ref position, out lex)) return lex; - if (TryParsePrefixNegationLex(memory, ref position, out lex)) return lex; - if (TryParseLeftParen(memory, ref position, out lex)) return lex; + // if (TryParsePrefixNegationLex(memory, ref position, out lex)) return lex; + // if (TryParseLeftParen(memory, ref position, out lex)) return lex; - if (TryParseValueLex(memory, ref position, out lex)) return lex; + // if (TryParseValueLex(memory, ref position, out lex)) return lex; - throw new FormatException("An invalid token was encountered. Please check if the SQL statement is correct."); - } + // throw new FormatException("An invalid token was encountered. Please check if the SQL statement is correct."); + //} @@ -217,11 +239,6 @@ private static bool TryParseIdentifierSeparator(ReadOnlyMemory memory, ref return TryParseSingleCharLex(memory, ref position, '.', LexType.IdentifierSeparator, out lex); } - [MemberNotNullWhen(true)] - private static bool TryParseLeftParen(ReadOnlyMemory memory, ref int position, out Lex lex) - { - return TryParseSingleCharLex(memory, ref position, '(', LexType.LeftParen, out lex); - } diff --git a/test/Carbunql.LexicalAnalyzer.Test/ExpressionParseTest.cs b/test/Carbunql.LexicalAnalyzer.Test/ExpressionParseTest.cs index 785bdc12..cb336845 100644 --- a/test/Carbunql.LexicalAnalyzer.Test/ExpressionParseTest.cs +++ b/test/Carbunql.LexicalAnalyzer.Test/ExpressionParseTest.cs @@ -217,4 +217,50 @@ public void LogicalOperators(string text, string[] expectedValues) Assert.Equal(expectedValues[i], lexes[i].Value); } } + + [Theory] + [InlineData("a + b", new[] { "a", "+", "b" })] + [InlineData("a - b", new[] { "a", "-", "b" })] + [InlineData("a * b", new[] { "a", "*", "b" })] + [InlineData("a / b", new[] { "a", "/", "b" })] + [InlineData("(a + b) * c", new[] { "(", "a", "+", "b", ")", "*", "c" })] + [InlineData("x * (y - z)", new[] { "x", "*", "(", "y", "-", "z", ")" })] + [InlineData("((a + b) * c) / d", new[] { "(", "(", "a", "+", "b", ")", "*", "c", ")", "/", "d" })] + public void ArithmeticOperators(string text, string[] expectedValues) + { + output.WriteLine($"Text: {text}"); + + var lexes = Lexer.ReadExpressionLexes(text.AsMemory(), 0).ToList(); + + foreach (var (lex, index) in lexes.Select((lex, index) => (lex, index))) + { + output.WriteLine($"[{index}] {lex.Value}"); + } + + Assert.Equal(expectedValues.Length, lexes.Count); + + for (int i = 0; i < expectedValues.Length; i++) + { + Assert.Equal(expectedValues[i], lexes[i].Value); + } + } + + [Theory] + [InlineData("a.value", new[] { "a", ".", "value" })] + [InlineData("table.column", new[] { "table", ".", "column" })] + [InlineData("schema.table.column", new[] { "schema", ".", "table", ".", "column" })] + [InlineData("(a.value + b.value) * c.value", new[] { "(", "a", ".", "value", "+", "b", ".", "value", ")", "*", "c", ".", "value" })] + [InlineData("x.value * (y.value - z.value)", new[] { "x", ".", "value", "*", "(", "y", ".", "value", "-", "z", ".", "value", ")" })] + [InlineData("((a.value + b.value) * c.value) / d.value", new[] { "(", "(", "a", ".", "value", "+", "b", ".", "value", ")", "*", "c", ".", "value", ")", "/", "d", ".", "value" })] + public void ColumnAccess(string text, string[] expectedValues) + { + var lexes = Lexer.ReadExpressionLexes(text.AsMemory(), 0).ToList(); + + Assert.Equal(expectedValues.Length, lexes.Count); + + for (int i = 0; i < expectedValues.Length; i++) + { + Assert.Equal(expectedValues[i], lexes[i].Value); + } + } } \ No newline at end of file