Skip to content

Commit

Permalink
Parse strings as namespace, column, or alias names
Browse files Browse the repository at this point in the history
Added functionality to parse strings as namespace names, column names, or alias names. Additionally, escape characters like double quotes are now supported. This enhances the flexibility of dynamic SQL query parsing and manipulation.
  • Loading branch information
mk3008 committed Nov 16, 2024
1 parent d33d8fc commit 96b1caf
Show file tree
Hide file tree
Showing 22 changed files with 546 additions and 166 deletions.
43 changes: 43 additions & 0 deletions src/Carbunql.LexicalAnalyzer/CharExtenstions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
namespace Carbunql.LexicalAnalyzer;

internal static class CharExtenstions
{
/// <summary>
/// Defines a set of characters considered as symbols that terminate an identifier.
/// </summary>
private static readonly HashSet<char> Symbols = new HashSet<char>
{
'+', '-', '*', '/', '%', // Arithmetic operators
'(', ')', '[', ']', '{', '}', // Brackets and braces
'~', '@', '#', '$', '^', '&', // Special symbols
'!', '?', ':', ';', ',', '.', '<', '>', '=', '|', '\\', // Other symbols
'`', '"', '\'' // Quotation marks
};

private static readonly HashSet<char> WhiteSpaces = new HashSet<char>
{
' ', '\t', '\r', '\n',
};

private static readonly Dictionary<char, char> ValueEscapePairs = new Dictionary<char, char>
{
{ '"', '"' }, // ダブルクォート
{ '[', ']' }, // 角括弧
{ '`', '`' } // バッククォート
};

public static bool TryGetDbmsValueEscapeChar(this char c, out char closeChar)
{
return ValueEscapePairs.TryGetValue(c, out closeChar);
}

public static bool IsWhiteSpace(this char c)
{
return WhiteSpaces.Contains(c);
}

public static bool IsSymbols(this char c)
{
return Symbols.Contains(c);
}
}
6 changes: 4 additions & 2 deletions src/Carbunql.LexicalAnalyzer/LexType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public enum LexType : byte

QueryTerminator,

As,
Alias,

Operator,

Expand All @@ -25,6 +25,8 @@ public enum LexType : byte
AlterTrigger,
AlterSequence,

Type,

//create command
CreateSchema,
CreateTable,
Expand Down Expand Up @@ -61,7 +63,7 @@ public enum LexType : byte
SchemaOrTableOrColumn,
Column,
Function,
Letter,
//Letter,

LineCommentStart,
BlockCommentStart,
Expand Down
102 changes: 77 additions & 25 deletions src/Carbunql.LexicalAnalyzer/Lexer.Expression.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Diagnostics.CodeAnalysis;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;

namespace Carbunql.LexicalAnalyzer;

Expand Down Expand Up @@ -77,33 +78,64 @@ public static IEnumerable<Lex> ReadExpressionLexes(ReadOnlyMemory<char> memory,
break;
}

if (TryParseCharactorValues(memory, position, out var lexes))
// type
if (TryParseDbType(memory, position, out lex, out position))
{
foreach (var item in lexes)
yield return lex;
break;
}

// charactor value
if (TryParseCharactorValue(memory, position, out lex, out position))
{
yield return lex;

if (lex.Type == LexType.Function)
{
position = item.EndPosition;
yield return item;
// open paren
memory.SkipWhiteSpacesAndComment(ref position);
yield return ParseLeftParen(memory, ref position);

if (item.Type == LexType.Function)
// read arguments
foreach (var argument in ReadExpressionLexes(memory, position))
{
// open paren
yield return ParseLeftParen(memory, ref position);
yield return argument;
position = argument.EndPosition;
}

// read arguments
foreach (var argument in ReadExpressionLexes(memory, position))
// close paren
memory.SkipWhiteSpacesAndComment(ref position);
yield return ParseRightParen(memory, ref position);
}
else if (lex.Type == LexType.Namespace)
{
do
{
memory.SkipWhiteSpacesAndComment(ref position);
if (memory.Span[position] != '.')
{
yield return argument;
position = argument.EndPosition;
throw new FormatException();
}
position++;
memory.SkipWhiteSpacesAndComment(ref position);

// close paren
yield return ParseRightParen(memory, ref position);
}
if (memory.TryParseWildCard(ref position, out lex))
{
yield return lex;
break;
}
if (!TryParseCharactorValue(memory, position, out lex, out position))
{
throw new FormatException();
}
if (!(lex.Type == LexType.Namespace || lex.Type == LexType.Column))
{
throw new FormatException();
}
yield return lex;
} while (lex.Type != LexType.Column);
}




// operator check
memory.SkipWhiteSpacesAndComment(ref position);
if (TryParseOperator(memory, ref position, out lex))
Expand All @@ -118,6 +150,7 @@ public static IEnumerable<Lex> ReadExpressionLexes(ReadOnlyMemory<char> memory,
}


throw new FormatException();
}
}

Expand All @@ -132,18 +165,37 @@ public static bool TryParseExpressionName(ReadOnlyMemory<char> memory, ref int p
}

memory.SkipWhiteSpacesAndComment(ref position);

var start = position;
memory.EqualsWordIgnoreCase(position, "as", out position);
memory.SkipWhiteSpacesAndComment(ref position);
if (start != position)
{
// "as" がある場合、必ずエイリアス名が取得できる

if (TryParseNamespaceOrColumn(memory, ref position, out lex))
memory.SkipWhiteSpacesAndComment(ref position);
start = position;
if (!TryGetCharacterEndPosition(memory, position, out position))
{
throw new FormatException();
}
lex = new Lex(memory, LexType.Column, start, position - start);
return true;
}
else
{
if (lex.Type == LexType.Column)
// "as" が省略されている場合、エイリアス名は無い可能性がある
if (!TryGetCharacterEndPosition(memory, position, out var p))
{
return true;
return false;
}
throw new FormatException();
var name = memory.Slice(start, p - start).ToString();
if (name.ToLowerInvariant() == "from")
{
return false;
}
position = p;
lex = new Lex(memory, LexType.Column, start, position - start, name);
return true;
}

return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ internal static bool TryParseOperator(ReadOnlyMemory<char> memory, ref int posit
return false;
}

private static readonly HashSet<char> Operators = new HashSet<char> { '*', '+', '-', '/', '=', '<', '>', '!', '^' };
private static readonly HashSet<char> Operators = new HashSet<char> { '*', '+', '-', '/', '=', '<', '>', '!', '^', ':' };

private static bool TryParseCharOperator(ReadOnlyMemory<char> memory, ref int position, out Lex lex)
{
Expand Down
35 changes: 13 additions & 22 deletions src/Carbunql.LexicalAnalyzer/Lexer.Operator.StringOperator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@

public static partial class Lexer
{

private static HashSet<string> StringOperators = new HashSet<string>()
{
"like",
"not",
"and",
"or",
"as", //type cast "as"
};

private static bool TryParseStringOperator(ReadOnlyMemory<char> memory, ref int position, out Lex lex)
{
Expand All @@ -15,28 +22,12 @@ private static bool TryParseStringOperator(ReadOnlyMemory<char> memory, ref int
return false;
}

// Check for "like" operator
if (memory.TryParseKeywordIgnoreCase(ref position, "like", LexType.Operator, out lex))
{
return true;
}

// Check for "not" operator
if (memory.TryParseKeywordIgnoreCase(ref position, "not", LexType.Operator, out lex))
{
return true;
}

// Check for "and" operator
if (memory.TryParseKeywordIgnoreCase(ref position, "and", LexType.Operator, out lex))
{
return true;
}

// Check for "or" operator
if (memory.TryParseKeywordIgnoreCase(ref position, "or", LexType.Operator, out lex))
foreach (var item in StringOperators)
{
return true;
if (memory.TryParseKeywordIgnoreCase(ref position, item, LexType.Operator, out lex))
{
return true;
}
}

// Check for "is" operator
Expand Down
Loading

0 comments on commit 96b1caf

Please sign in to comment.