-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implements query-first lex analysis only
- Loading branch information
Showing
20 changed files
with
2,358 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
namespace Carbunql.LexicalAnalyzer; | ||
|
||
public readonly struct Lex | ||
{ | ||
public Lex(ReadOnlyMemory<char> memory, LexType type, int position, int length) | ||
{ | ||
Memory = memory; | ||
Type = type; | ||
Position = position; | ||
Length = length; | ||
EndPosition = Position + Length; | ||
cachedValue = null; | ||
} | ||
|
||
public Lex(ReadOnlyMemory<char> memory, LexType type, int position, int length, int endPosition) | ||
{ | ||
Memory = memory; | ||
Type = type; | ||
Position = position; | ||
Length = length; | ||
EndPosition = endPosition; | ||
cachedValue = null; | ||
} | ||
|
||
public readonly ReadOnlyMemory<char> Memory { get; } | ||
public LexType Type { get; } | ||
public int Position { get; } | ||
public int Length { get; } | ||
public int EndPosition { get; } | ||
|
||
private readonly string? cachedValue; | ||
|
||
public string GetValue() | ||
{ | ||
if (cachedValue == null) | ||
{ | ||
if (Position + Length > Memory.Length) | ||
throw new ArgumentOutOfRangeException(nameof(Length), "Position and Length exceed memory bounds."); | ||
|
||
return Memory.Slice(Position, Length).ToString(); | ||
} | ||
return cachedValue; | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
namespace Carbunql.LexicalAnalyzer; | ||
|
||
public enum LexType : byte | ||
{ | ||
/// <summary> | ||
/// Unknown or unrecognized token type. | ||
/// </summary> | ||
Unknown = 0, | ||
|
||
CommentAfterWhiteSpaces, | ||
|
||
QueryTerminator = 1, | ||
|
||
//alter command | ||
AlterSchema, | ||
AlterTable, | ||
AlterView, | ||
AlterIndex, | ||
AlterFunction, | ||
AlterProcedure, | ||
AlterTrigger, | ||
AlterSequence, | ||
|
||
//create command | ||
CreateSchema, | ||
CreateTable, | ||
CreateTemporaryTable, | ||
CreateView, | ||
CreateIndex, | ||
CreateUniqueIndex, | ||
CreateFunction, | ||
CreateTrigger, | ||
CreateProcedure, | ||
CreateSequence, | ||
|
||
//insert, update, delete, merge command | ||
Insert, | ||
Update, | ||
Delete, | ||
Merge, | ||
|
||
|
||
Select, | ||
SelectDistinct, | ||
SelectDistinctOn, | ||
|
||
With, | ||
WithRecursive, | ||
|
||
PrefixNegation, // not, ~ | ||
|
||
WildCard, | ||
Value, | ||
SchemaOrTableOrColumn, | ||
Column, | ||
Function, | ||
Letter, | ||
|
||
LineCommentStart = 1, | ||
BlockCommentStart = 2, | ||
HitCommentStart = 4, | ||
BlockCommentEnd = 3, | ||
Comment = 6, | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
/// <summary> | ||
/// Terminator for SQL queries, represented by a semicolon (;). | ||
/// </summary> | ||
|
||
|
||
/// <summary> | ||
/// Separator for values, represented by a comma (,). | ||
/// </summary> | ||
ValueSeparator = 2, | ||
|
||
/// <summary> | ||
/// Separator for identifiers, represented by a period (.). | ||
/// Used to separate schema, table, and column names (e.g., table.column). | ||
/// </summary> | ||
IdentifierSeparator = 3, | ||
|
||
/// <summary> | ||
/// Left parenthesis, represented by an opening parenthesis (() used in expressions. | ||
/// </summary> | ||
LeftParen = 4, | ||
|
||
/// <summary> | ||
/// Right parenthesis, represented by a closing parenthesis ()) used in expressions. | ||
/// </summary> | ||
RightParen = 5, | ||
|
||
/// <summary> | ||
/// Left square bracket, represented by an opening bracket ([), often used for identifiers. | ||
/// </summary> | ||
LeftSquareBracket = 6, | ||
|
||
/// <summary> | ||
/// Right square bracket, represented by a closing bracket (]), often used for identifiers. | ||
/// </summary> | ||
RightSquareBracket = 7 | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
using System.Diagnostics.CodeAnalysis; | ||
|
||
namespace Carbunql.LexicalAnalyzer; | ||
|
||
public static partial class Lexer | ||
{ | ||
[MemberNotNullWhen(true)] | ||
private static bool TryParseAlterLex(ReadOnlyMemory<char> memory, ref int position, out Lex lex) | ||
{ | ||
lex = default; | ||
|
||
// Check for "alter" keyword | ||
if (!memory.EqualsWordIgnoreCase(position, "alter")) | ||
{ | ||
return false; | ||
} | ||
|
||
// Starting position for the lex and move position past "alter" | ||
var start = position; | ||
position += 6; // Move past "alter" | ||
|
||
// Check for supported objects after "alter" | ||
if (memory.EqualsWordIgnoreCase(position, "table")) | ||
{ | ||
lex = new Lex(memory, LexType.AlterTable, start, position + 5 - start); | ||
return true; | ||
} | ||
else if (memory.EqualsWordIgnoreCase(position, "view")) | ||
{ | ||
lex = new Lex(memory, LexType.AlterView, start, position + 4 - start); | ||
return true; | ||
} | ||
else if (memory.EqualsWordIgnoreCase(position, "index")) | ||
{ | ||
lex = new Lex(memory, LexType.AlterIndex, start, position + 5 - start); | ||
return true; | ||
} | ||
else if (memory.EqualsWordIgnoreCase(position, "schema")) | ||
{ | ||
lex = new Lex(memory, LexType.AlterSchema, start, position + 6 - start); | ||
return true; | ||
} | ||
else if (memory.EqualsWordIgnoreCase(position, "function")) | ||
{ | ||
lex = new Lex(memory, LexType.AlterFunction, start, position + 8 - start); | ||
return true; | ||
} | ||
else if (memory.EqualsWordIgnoreCase(position, "procedure")) | ||
{ | ||
lex = new Lex(memory, LexType.AlterProcedure, start, position + 9 - start); | ||
return true; | ||
} | ||
else if (memory.EqualsWordIgnoreCase(position, "trigger")) | ||
{ | ||
lex = new Lex(memory, LexType.AlterTrigger, start, position + 7 - start); | ||
return true; | ||
} | ||
else if (memory.EqualsWordIgnoreCase(position, "sequence")) | ||
{ | ||
lex = new Lex(memory, LexType.AlterSequence, start, position + 8 - start); | ||
return true; | ||
} | ||
|
||
throw new NotSupportedException($"Unsupported lex type encountered at position {position}. " + | ||
$"Expected types: 'table', 'view', 'index', 'schema', 'function', " + | ||
$" 'procedure', 'trigger', or 'sequence'."); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
using System.Diagnostics.CodeAnalysis; | ||
|
||
namespace Carbunql.LexicalAnalyzer; | ||
|
||
public static partial class Lexer | ||
{ | ||
[MemberNotNullWhen(true)] | ||
private static bool TryParseCommentStartLex(ReadOnlyMemory<char> memory, ref int position, out Lex lex) | ||
{ | ||
lex = default; | ||
if (TryParseLineCommentStartLex(memory, ref position, out lex)) return true; | ||
if (TryParseBlockCommentStartLex(memory, ref position, out lex)) return true; | ||
return false; | ||
} | ||
|
||
/// <summary> | ||
/// Parses and removes comments until a non-comment Lex is reached, starting from the specified non-comment state. | ||
/// </summary> | ||
/// <param name="memory">The string to be parsed.</param> | ||
/// <param name="previous">The previous Lex indicating a non-comment state, or null if no previous state exists.</param> | ||
/// <returns>An enumeration of Lexes after comments have been removed.</returns> | ||
private static IEnumerable<Lex> ParseUntilNonComment(ReadOnlyMemory<char> memory, Lex? previous = null) | ||
{ | ||
// Invalid if the previous Lex is in a comment state | ||
if (previous?.Type == LexType.LineCommentStart | ||
|| previous?.Type == LexType.BlockCommentStart | ||
|| previous?.Type == LexType.HitCommentStart | ||
|| previous?.Type == LexType.Comment) | ||
{ | ||
throw new InvalidOperationException("Previous Lex must be in a non-comment state."); | ||
} | ||
|
||
// Start position is 0 if previous is null | ||
int position = previous?.EndPosition ?? 0; | ||
|
||
while (true) | ||
{ | ||
Lex lex; | ||
if (TryParseLineCommentStartLex(memory, ref position, out lex)) | ||
{ | ||
yield return lex; | ||
yield return ParseLineComment(memory, ref position); | ||
SkipWhiteSpaces(memory, ref position); | ||
continue; | ||
} | ||
else if (TryParseBlockCommentStartLex(memory, ref position, out lex)) | ||
{ | ||
yield return lex; | ||
yield return ParseBlockComment(memory, ref position); | ||
yield return ParseBlockCommentEndLex(memory, ref position); | ||
SkipWhiteSpaces(memory, ref position); | ||
continue; | ||
} | ||
break; | ||
} | ||
} | ||
|
||
[MemberNotNullWhen(true)] | ||
private static bool TryParseLineCommentStartLex(ReadOnlyMemory<char> memory, ref int position, out Lex lex) | ||
{ | ||
lex = default; | ||
|
||
if (memory.Length < position + 2) | ||
{ | ||
return false; | ||
} | ||
|
||
if (memory.Span[position] == '-' && memory.Span[position + 1] == '-') | ||
{ | ||
lex = new Lex(memory, LexType.LineCommentStart, position, 2); | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
[MemberNotNullWhen(true)] | ||
private static bool TryParseBlockCommentStartLex(ReadOnlyMemory<char> memory, ref int position, out Lex lex) | ||
{ | ||
lex = default; | ||
|
||
if (memory.Length < position + 1) | ||
{ | ||
return false; | ||
} | ||
|
||
// Check for /* or /*+ | ||
if (memory.Span[position] == '/' && memory.Span[position + 1] == '*') | ||
{ | ||
// Check for /*+ | ||
if (memory.Length < position + 2 && memory.Span[position + 2] == '+') | ||
{ | ||
lex = new Lex(memory, LexType.HitCommentStart, position, 3); | ||
} | ||
else | ||
{ | ||
lex = new Lex(memory, LexType.BlockCommentStart, position, 2); | ||
} | ||
return true; | ||
} | ||
|
||
// Not a block comment start | ||
return false; | ||
} | ||
|
||
private static Lex ParseLineComment(ReadOnlyMemory<char> memory, ref int position) | ||
{ | ||
if (memory.Length < position + 1) | ||
{ | ||
throw new FormatException("The SQL string is empty or in an invalid format."); | ||
} | ||
|
||
var start = position; | ||
|
||
// exclude line comment end symbol | ||
while (position < memory.Length + 1) | ||
{ | ||
char next = memory.Span[position + 1]; | ||
|
||
if (next == '\r' || next == '\n') | ||
{ | ||
return new Lex(memory, LexType.Comment, start, position - start); | ||
} | ||
position++; | ||
} | ||
return new Lex(memory, LexType.Comment, start, memory.Length - start); | ||
} | ||
|
||
private static Lex ParseBlockComment(ReadOnlyMemory<char> memory, ref int position) | ||
{ | ||
if (memory.Length < position + 2) | ||
{ | ||
throw new FormatException("The SQL string is empty or in an invalid format."); | ||
} | ||
|
||
var start = position; | ||
|
||
// Search for the block comment end | ||
while (position < memory.Length - 1) | ||
{ | ||
if (memory.Span[position] == '*' && memory.Span[position + 1] == '/') | ||
{ | ||
// Found the end of the block comment | ||
position += 2; | ||
return new Lex(memory, LexType.Comment, start, position - start); | ||
} | ||
position++; | ||
} | ||
|
||
throw new FormatException("Block comment not closed properly."); | ||
} | ||
|
||
[MemberNotNullWhen(true)] | ||
private static Lex ParseBlockCommentEndLex(ReadOnlyMemory<char> memory, ref int position) | ||
{ | ||
if (!memory.EqualsWordIgnoreCase(position, "*/")) | ||
{ | ||
throw new FormatException("Block comment end symbols '*/' not found."); | ||
} | ||
|
||
return new Lex(memory, LexType.BlockCommentEnd, position, 2); | ||
} | ||
} |
Oops, something went wrong.