Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions Test/Lexer.lean
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ def lexResult (s : String) : String :=
#guard_msgs in
#eval lexResult ":"

/--
info: "Value: `,`, Kind: comma"
-/
#guard_msgs in
#eval lexResult ","

/--
info: "Value: `:`, Kind: colon"
-/
Expand Down Expand Up @@ -339,3 +345,63 @@ def lexResult (s : String) : String :=
-/
#guard_msgs in
#eval lexResult "^ foo"

/--
info: "Value: `0`, Kind: intLit"
-/
#guard_msgs in
#eval lexResult "0x"

/--
info: "Value: `0`, Kind: intLit"
-/
#guard_msgs in
#eval lexResult "0xi"

/--
info: "Value: `0x0123456789abcdefABCDEF`, Kind: intLit"
-/
#guard_msgs in
#eval lexResult "0x0123456789abcdefABCDEF"

/--
info: "Value: `42`, Kind: intLit"
-/
#guard_msgs in
#eval lexResult "42"

/--
info: "Value: `42.`, Kind: floatLit"
-/
#guard_msgs in
#eval lexResult "42."

/--
info: "Value: `42.0`, Kind: floatLit"
-/
#guard_msgs in
#eval lexResult "42.0"

/--
info: "Value: `3.1415926`, Kind: floatLit"
-/
#guard_msgs in
#eval lexResult "3.1415926"

/--
info: "Value: `34.e0`, Kind: floatLit"
-/
#guard_msgs in
#eval lexResult "34.e0"

/--
info: "Value: `34.e-23`, Kind: floatLit"
-/
#guard_msgs in
#eval lexResult "34.e-23"

/--
info: "Value: `34.e12`, Kind: floatLit"
-/
#guard_msgs in
#eval lexResult "34.e12"
4 changes: 4 additions & 0 deletions Veir/ForLean.lean
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ def UInt8.isDigit (c : UInt8) : Bool :=
def UInt8.isHexDigit (c : UInt8) : Bool :=
c.isDigit || (c >= 'a'.toUInt8 && c <= 'f'.toUInt8) || (c >= 'A'.toUInt8 && c <= 'F'.toUInt8)

@[inline]
def ByteArray.getD (ba : ByteArray) (i : Nat) (default : UInt8) : UInt8 :=
if h : i < ba.size then ba[i] else default

set_option warn.sorry false in
@[grind .]
theorem Array.back_popWhile {as : Array α} {p : α → Bool} (h : 0 < (as.popWhile p).size) :
Expand Down
82 changes: 81 additions & 1 deletion Veir/Parser/Lexer.lean
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ inductive TokenKind
| fileMetadataBegin
/-- The `#-}` token. -/
| fileMetadataEnd
deriving Inhabited, Repr
deriving Inhabited, Repr, DecidableEq

instance TokenKind.toString : ToString TokenKind where
toString
Expand Down Expand Up @@ -315,6 +315,80 @@ def lexPrefixedIdentifier (state : LexerState) (tokStart : Nat)
else
.error errorString

/--
Lex a number literal.

integer-literal ::= digit+ | `0x` hex_digit+
float-literal ::= [0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
-/
def lexNumber (state : LexerState) (firstChar : UInt8) (tokStart : Nat) : Token × LexerState := Id.run do
let mut pos := state.pos
let input := state.input

if h: state.isEof then
return (state.mkToken .intLit tokStart, state)
else
let c1 := input[pos]'(by grind [LexerState.isEof])
-- Hexadecimal literal
if firstChar == '0'.toUInt8 && c1 == 'x'.toUInt8 then
-- In MLIR, `0xi` is parsed as `0` followed by `xi`.
if !(input.getD (pos + 1) 0).isHexDigit then
return (state.mkToken .intLit tokStart, state)
pos := pos + 2
while h: pos < input.size do
let c := input[pos]
if c.isHexDigit then
pos := pos + 1
else
break
let newState := { state with pos := pos }
return (newState.mkToken .intLit tokStart, newState)

-- Handle the normal decimal case, with the starting digits
while h: pos < input.size do
let c := input[pos]
if UInt8.isDigit c then
pos := pos + 1
else
break

-- Check for fractional part
if input.getD pos 0 != '.'.toUInt8 then
let newState := { state with pos := pos }
return (newState.mkToken .intLit tokStart, newState)
pos := pos + 1

-- Parse the fractional digits
while h: pos < input.size do
let c := input[pos]
if UInt8.isDigit c then
pos := pos + 1
else
break

-- Check for exponent part
if input.getD pos 0 != 'e'.toUInt8 && input.getD pos 0 != 'E'.toUInt8 then
let newState := { state with pos := pos }
return (newState.mkToken .floatLit tokStart, newState)
pos := pos + 1

if (input.getD pos 0).isDigit ||
((input.getD pos 0 == '+'.toUInt8 || input.getD pos 0 == '-'.toUInt8) &&
(input.getD (pos + 1) 0).isDigit) then
pos := pos + 1
while h: pos < input.size do
let c := input[pos]
if UInt8.isDigit c then
pos := pos + 1
else
break
let newState := { state with pos := pos }
return (newState.mkToken .floatLit tokStart, newState)
else
let newState := { state with pos := pos }
return (newState.mkToken .floatLit tokStart, newState)


/--
Lex the next token from the input.
-/
Expand All @@ -336,6 +410,9 @@ partial def lex (state : LexerState) : Except String (Token × LexerState) :=
else if c == ':'.toUInt8 then
let newState := { state with pos := state.pos + 1 }
return (newState.mkToken .colon tokStart, newState)
else if c == ','.toUInt8 then
let newState := { state with pos := state.pos + 1 }
return (newState.mkToken .comma tokStart, newState)
else if c == '('.toUInt8 then
let newState := { state with pos := state.pos + 1 }
return (newState.mkToken .lParen tokStart, newState)
Expand Down Expand Up @@ -447,6 +524,9 @@ partial def lex (state : LexerState) : Except String (Token × LexerState) :=
else if c == '!'.toUInt8 then
let newState := { state with pos := state.pos + 1 }
lexPrefixedIdentifier newState tokStart .exclamationIdent
else if UInt8.isDigit c then
let newState := { state with pos := state.pos + 1 }
return lexNumber newState c tokStart
else
.error s!"Unexpected character '{Char.ofUInt8 c}' at position {state.pos}"

Expand Down