Skip to content

Commit

Permalink
Implements query-first lex analysis only
Browse files Browse the repository at this point in the history
  • Loading branch information
mk3008 committed Nov 3, 2024
1 parent 2d01e6c commit cc09a0e
Show file tree
Hide file tree
Showing 20 changed files with 2,358 additions and 1 deletion.
9 changes: 9 additions & 0 deletions src/Carbunql.LexicalAnalyzer/Carbunql.LexicalAnalyzer.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

</Project>
46 changes: 46 additions & 0 deletions src/Carbunql.LexicalAnalyzer/Lex.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
namespace Carbunql.LexicalAnalyzer;

public readonly struct Lex
{
public Lex(ReadOnlyMemory<char> memory, LexType type, int position, int length)
{
Memory = memory;
Type = type;
Position = position;
Length = length;
EndPosition = Position + Length;
cachedValue = null;
}

public Lex(ReadOnlyMemory<char> memory, LexType type, int position, int length, int endPosition)
{
Memory = memory;
Type = type;
Position = position;
Length = length;
EndPosition = endPosition;
cachedValue = null;
}

public readonly ReadOnlyMemory<char> Memory { get; }
public LexType Type { get; }
public int Position { get; }
public int Length { get; }
public int EndPosition { get; }

private readonly string? cachedValue;

public string GetValue()
{
if (cachedValue == null)
{
if (Position + Length > Memory.Length)
throw new ArgumentOutOfRangeException(nameof(Length), "Position and Length exceed memory bounds.");

return Memory.Slice(Position, Length).ToString();
}
return cachedValue;
}
}


109 changes: 109 additions & 0 deletions src/Carbunql.LexicalAnalyzer/LexType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
namespace Carbunql.LexicalAnalyzer;

public enum LexType : byte
{
/// <summary>
/// Unknown or unrecognized token type.
/// </summary>
Unknown = 0,

CommentAfterWhiteSpaces,

QueryTerminator = 1,

//alter command
AlterSchema,
AlterTable,
AlterView,
AlterIndex,
AlterFunction,
AlterProcedure,
AlterTrigger,
AlterSequence,

//create command
CreateSchema,
CreateTable,
CreateTemporaryTable,
CreateView,
CreateIndex,
CreateUniqueIndex,
CreateFunction,
CreateTrigger,
CreateProcedure,
CreateSequence,

//insert, update, delete, merge command
Insert,
Update,
Delete,
Merge,


Select,
SelectDistinct,
SelectDistinctOn,

With,
WithRecursive,

PrefixNegation, // not, ~

WildCard,
Value,
SchemaOrTableOrColumn,
Column,
Function,
Letter,

LineCommentStart = 1,
BlockCommentStart = 2,
HitCommentStart = 4,
BlockCommentEnd = 3,
Comment = 6,








/// <summary>
/// Terminator for SQL queries, represented by a semicolon (;).
/// </summary>


/// <summary>
/// Separator for values, represented by a comma (,).
/// </summary>
ValueSeparator = 2,

/// <summary>
/// Separator for identifiers, represented by a period (.).
/// Used to separate schema, table, and column names (e.g., table.column).
/// </summary>
IdentifierSeparator = 3,

/// <summary>
/// Left parenthesis, represented by an opening parenthesis (() used in expressions.
/// </summary>
LeftParen = 4,

/// <summary>
/// Right parenthesis, represented by a closing parenthesis ()) used in expressions.
/// </summary>
RightParen = 5,

/// <summary>
/// Left square bracket, represented by an opening bracket ([), often used for identifiers.
/// </summary>
LeftSquareBracket = 6,

/// <summary>
/// Right square bracket, represented by a closing bracket (]), often used for identifiers.
/// </summary>
RightSquareBracket = 7
}


68 changes: 68 additions & 0 deletions src/Carbunql.LexicalAnalyzer/Lexer.Alter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using System.Diagnostics.CodeAnalysis;

namespace Carbunql.LexicalAnalyzer;

public static partial class Lexer
{
[MemberNotNullWhen(true)]
private static bool TryParseAlterLex(ReadOnlyMemory<char> memory, ref int position, out Lex lex)
{
lex = default;

// Check for "alter" keyword
if (!memory.EqualsWordIgnoreCase(position, "alter"))
{
return false;
}

// Starting position for the lex and move position past "alter"
var start = position;
position += 6; // Move past "alter"

// Check for supported objects after "alter"
if (memory.EqualsWordIgnoreCase(position, "table"))
{
lex = new Lex(memory, LexType.AlterTable, start, position + 5 - start);
return true;
}
else if (memory.EqualsWordIgnoreCase(position, "view"))
{
lex = new Lex(memory, LexType.AlterView, start, position + 4 - start);
return true;
}
else if (memory.EqualsWordIgnoreCase(position, "index"))
{
lex = new Lex(memory, LexType.AlterIndex, start, position + 5 - start);
return true;
}
else if (memory.EqualsWordIgnoreCase(position, "schema"))
{
lex = new Lex(memory, LexType.AlterSchema, start, position + 6 - start);
return true;
}
else if (memory.EqualsWordIgnoreCase(position, "function"))
{
lex = new Lex(memory, LexType.AlterFunction, start, position + 8 - start);
return true;
}
else if (memory.EqualsWordIgnoreCase(position, "procedure"))
{
lex = new Lex(memory, LexType.AlterProcedure, start, position + 9 - start);
return true;
}
else if (memory.EqualsWordIgnoreCase(position, "trigger"))
{
lex = new Lex(memory, LexType.AlterTrigger, start, position + 7 - start);
return true;
}
else if (memory.EqualsWordIgnoreCase(position, "sequence"))
{
lex = new Lex(memory, LexType.AlterSequence, start, position + 8 - start);
return true;
}

throw new NotSupportedException($"Unsupported lex type encountered at position {position}. " +
$"Expected types: 'table', 'view', 'index', 'schema', 'function', " +
$" 'procedure', 'trigger', or 'sequence'.");
}
}
163 changes: 163 additions & 0 deletions src/Carbunql.LexicalAnalyzer/Lexer.Comment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
using System.Diagnostics.CodeAnalysis;

namespace Carbunql.LexicalAnalyzer;

public static partial class Lexer
{
[MemberNotNullWhen(true)]
private static bool TryParseCommentStartLex(ReadOnlyMemory<char> memory, ref int position, out Lex lex)
{
lex = default;
if (TryParseLineCommentStartLex(memory, ref position, out lex)) return true;
if (TryParseBlockCommentStartLex(memory, ref position, out lex)) return true;
return false;
}

/// <summary>
/// Parses and removes comments until a non-comment Lex is reached, starting from the specified non-comment state.
/// </summary>
/// <param name="memory">The string to be parsed.</param>
/// <param name="previous">The previous Lex indicating a non-comment state, or null if no previous state exists.</param>
/// <returns>An enumeration of Lexes after comments have been removed.</returns>
private static IEnumerable<Lex> ParseUntilNonComment(ReadOnlyMemory<char> memory, Lex? previous = null)
{
// Invalid if the previous Lex is in a comment state
if (previous?.Type == LexType.LineCommentStart
|| previous?.Type == LexType.BlockCommentStart
|| previous?.Type == LexType.HitCommentStart
|| previous?.Type == LexType.Comment)
{
throw new InvalidOperationException("Previous Lex must be in a non-comment state.");
}

// Start position is 0 if previous is null
int position = previous?.EndPosition ?? 0;

while (true)
{
Lex lex;
if (TryParseLineCommentStartLex(memory, ref position, out lex))
{
yield return lex;
yield return ParseLineComment(memory, ref position);
SkipWhiteSpaces(memory, ref position);
continue;
}
else if (TryParseBlockCommentStartLex(memory, ref position, out lex))
{
yield return lex;
yield return ParseBlockComment(memory, ref position);
yield return ParseBlockCommentEndLex(memory, ref position);
SkipWhiteSpaces(memory, ref position);
continue;
}
break;
}
}

[MemberNotNullWhen(true)]
private static bool TryParseLineCommentStartLex(ReadOnlyMemory<char> memory, ref int position, out Lex lex)
{
lex = default;

if (memory.Length < position + 2)
{
return false;
}

if (memory.Span[position] == '-' && memory.Span[position + 1] == '-')
{
lex = new Lex(memory, LexType.LineCommentStart, position, 2);
return true;
}

return false;
}

[MemberNotNullWhen(true)]
private static bool TryParseBlockCommentStartLex(ReadOnlyMemory<char> memory, ref int position, out Lex lex)
{
lex = default;

if (memory.Length < position + 1)
{
return false;
}

// Check for /* or /*+
if (memory.Span[position] == '/' && memory.Span[position + 1] == '*')
{
// Check for /*+
if (memory.Length < position + 2 && memory.Span[position + 2] == '+')
{
lex = new Lex(memory, LexType.HitCommentStart, position, 3);
}
else
{
lex = new Lex(memory, LexType.BlockCommentStart, position, 2);
}
return true;
}

// Not a block comment start
return false;
}

private static Lex ParseLineComment(ReadOnlyMemory<char> memory, ref int position)
{
if (memory.Length < position + 1)
{
throw new FormatException("The SQL string is empty or in an invalid format.");
}

var start = position;

// exclude line comment end symbol
while (position < memory.Length + 1)
{
char next = memory.Span[position + 1];

if (next == '\r' || next == '\n')
{
return new Lex(memory, LexType.Comment, start, position - start);
}
position++;
}
return new Lex(memory, LexType.Comment, start, memory.Length - start);
}

private static Lex ParseBlockComment(ReadOnlyMemory<char> memory, ref int position)
{
if (memory.Length < position + 2)
{
throw new FormatException("The SQL string is empty or in an invalid format.");
}

var start = position;

// Search for the block comment end
while (position < memory.Length - 1)
{
if (memory.Span[position] == '*' && memory.Span[position + 1] == '/')
{
// Found the end of the block comment
position += 2;
return new Lex(memory, LexType.Comment, start, position - start);
}
position++;
}

throw new FormatException("Block comment not closed properly.");
}

[MemberNotNullWhen(true)]
private static Lex ParseBlockCommentEndLex(ReadOnlyMemory<char> memory, ref int position)
{
if (!memory.EqualsWordIgnoreCase(position, "*/"))
{
throw new FormatException("Block comment end symbols '*/' not found.");
}

return new Lex(memory, LexType.BlockCommentEnd, position, 2);
}
}
Loading

0 comments on commit cc09a0e

Please sign in to comment.