diff --git a/Test/Lexer.lean b/Test/Lexer.lean index 7865406..e43dd16 100644 --- a/Test/Lexer.lean +++ b/Test/Lexer.lean @@ -28,6 +28,12 @@ def lexResult (s : String) : String := #guard_msgs in #eval lexResult ":" +/-- + info: "Value: `,`, Kind: comma" +-/ +#guard_msgs in +#eval lexResult "," + /-- info: "Value: `:`, Kind: colon" -/ @@ -339,3 +345,63 @@ def lexResult (s : String) : String := -/ #guard_msgs in #eval lexResult "^ foo" + +/-- + info: "Value: `0`, Kind: intLit" +-/ +#guard_msgs in +#eval lexResult "0x" + +/-- + info: "Value: `0`, Kind: intLit" +-/ +#guard_msgs in +#eval lexResult "0xi" + +/-- + info: "Value: `0x0123456789abcdefABCDEF`, Kind: intLit" +-/ +#guard_msgs in +#eval lexResult "0x0123456789abcdefABCDEF" + +/-- + info: "Value: `42`, Kind: intLit" +-/ +#guard_msgs in +#eval lexResult "42" + +/-- + info: "Value: `42.`, Kind: floatLit" +-/ +#guard_msgs in +#eval lexResult "42." + +/-- + info: "Value: `42.0`, Kind: floatLit" +-/ +#guard_msgs in +#eval lexResult "42.0" + +/-- + info: "Value: `3.1415926`, Kind: floatLit" +-/ +#guard_msgs in +#eval lexResult "3.1415926" + +/-- + info: "Value: `34.e0`, Kind: floatLit" +-/ +#guard_msgs in +#eval lexResult "34.e0" + +/-- + info: "Value: `34.e-23`, Kind: floatLit" +-/ +#guard_msgs in +#eval lexResult "34.e-23" + +/-- + info: "Value: `34.e12`, Kind: floatLit" +-/ +#guard_msgs in +#eval lexResult "34.e12" diff --git a/Veir/ForLean.lean b/Veir/ForLean.lean index 5a2a8b7..5e98b3b 100644 --- a/Veir/ForLean.lean +++ b/Veir/ForLean.lean @@ -16,6 +16,10 @@ def UInt8.isDigit (c : UInt8) : Bool := def UInt8.isHexDigit (c : UInt8) : Bool := c.isDigit || (c >= 'a'.toUInt8 && c <= 'f'.toUInt8) || (c >= 'A'.toUInt8 && c <= 'F'.toUInt8) +@[inline] +def ByteArray.getD (ba : ByteArray) (i : Nat) (default : UInt8) : UInt8 := + if h : i < ba.size then ba[i] else default + set_option warn.sorry false in @[grind .] theorem Array.back_popWhile {as : Array α} {p : α → Bool} (h : 0 < (as.popWhile p).size) : diff --git a/Veir/Parser/Lexer.lean b/Veir/Parser/Lexer.lean index 4f2e99b..afab109 100644 --- a/Veir/Parser/Lexer.lean +++ b/Veir/Parser/Lexer.lean @@ -107,7 +107,7 @@ inductive TokenKind | fileMetadataBegin /-- The `#-}` token. -/ | fileMetadataEnd -deriving Inhabited, Repr +deriving Inhabited, Repr, DecidableEq instance TokenKind.toString : ToString TokenKind where toString @@ -315,6 +315,80 @@ def lexPrefixedIdentifier (state : LexerState) (tokStart : Nat) else .error errorString +/-- + Lex a number literal. + + integer-literal ::= digit+ | `0x` hex_digit+ + float-literal ::= [0-9]+[.][0-9]*([eE][-+]?[0-9]+)? +-/ +def lexNumber (state : LexerState) (firstChar : UInt8) (tokStart : Nat) : Token × LexerState := Id.run do + let mut pos := state.pos + let input := state.input + + if h: state.isEof then + return (state.mkToken .intLit tokStart, state) + else + let c1 := input[pos]'(by grind [LexerState.isEof]) + -- Hexadecimal literal + if firstChar == '0'.toUInt8 && c1 == 'x'.toUInt8 then + -- In MLIR, `0xi` is parsed as `0` followed by `xi`. + if !(input.getD (pos + 1) 0).isHexDigit then + return (state.mkToken .intLit tokStart, state) + pos := pos + 2 + while h: pos < input.size do + let c := input[pos] + if c.isHexDigit then + pos := pos + 1 + else + break + let newState := { state with pos := pos } + return (newState.mkToken .intLit tokStart, newState) + + -- Handle the normal decimal case, with the starting digits + while h: pos < input.size do + let c := input[pos] + if UInt8.isDigit c then + pos := pos + 1 + else + break + + -- Check for fractional part + if input.getD pos 0 != '.'.toUInt8 then + let newState := { state with pos := pos } + return (newState.mkToken .intLit tokStart, newState) + pos := pos + 1 + + -- Parse the fractional digits + while h: pos < input.size do + let c := input[pos] + if UInt8.isDigit c then + pos := pos + 1 + else + break + + -- Check for exponent part + if input.getD pos 0 != 'e'.toUInt8 && input.getD pos 0 != 'E'.toUInt8 then + let newState := { state with pos := pos } + return (newState.mkToken .floatLit tokStart, newState) + pos := pos + 1 + + if (input.getD pos 0).isDigit || + ((input.getD pos 0 == '+'.toUInt8 || input.getD pos 0 == '-'.toUInt8) && + (input.getD (pos + 1) 0).isDigit) then + pos := pos + 1 + while h: pos < input.size do + let c := input[pos] + if UInt8.isDigit c then + pos := pos + 1 + else + break + let newState := { state with pos := pos } + return (newState.mkToken .floatLit tokStart, newState) + else + let newState := { state with pos := pos } + return (newState.mkToken .floatLit tokStart, newState) + + /-- Lex the next token from the input. -/ @@ -336,6 +410,9 @@ partial def lex (state : LexerState) : Except String (Token × LexerState) := else if c == ':'.toUInt8 then let newState := { state with pos := state.pos + 1 } return (newState.mkToken .colon tokStart, newState) + else if c == ','.toUInt8 then + let newState := { state with pos := state.pos + 1 } + return (newState.mkToken .comma tokStart, newState) else if c == '('.toUInt8 then let newState := { state with pos := state.pos + 1 } return (newState.mkToken .lParen tokStart, newState) @@ -447,6 +524,9 @@ partial def lex (state : LexerState) : Except String (Token × LexerState) := else if c == '!'.toUInt8 then let newState := { state with pos := state.pos + 1 } lexPrefixedIdentifier newState tokStart .exclamationIdent + else if UInt8.isDigit c then + let newState := { state with pos := state.pos + 1 } + return lexNumber newState c tokStart else .error s!"Unexpected character '{Char.ofUInt8 c}' at position {state.pos}"