Skip to content

Commit

Permalink
feat: generalized Parsec (#4774)
Browse files Browse the repository at this point in the history
For experimentation by @the-sofi-uwu.

I also have an efficient number parser in LeanSAT that I am planning to
upstream after we have sufficiently bikeshed this change.
  • Loading branch information
hargoniX authored Aug 6, 2024
1 parent 35e1554 commit b7db828
Show file tree
Hide file tree
Showing 9 changed files with 559 additions and 282 deletions.
115 changes: 115 additions & 0 deletions src/Init/Data/ByteArray/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,121 @@ def foldlM {β : Type v} {m : Type v → Type w} [Monad m] (f : β → UInt8 →
def foldl {β : Type v} (f : β → UInt8 → β) (init : β) (as : ByteArray) (start := 0) (stop := as.size) : β :=
Id.run <| as.foldlM f init start stop

/-- Iterator over the bytes (`UInt8`) of a `ByteArray`.
Typically created by `arr.iter`, where `arr` is a `ByteArray`.
An iterator is *valid* if the position `i` is *valid* for the array `arr`, meaning `0 ≤ i ≤ arr.size`
Most operations on iterators return arbitrary values if the iterator is not valid. The functions in
the `ByteArray.Iterator` API should rule out the creation of invalid iterators, with two exceptions:
- `Iterator.next iter` is invalid if `iter` is already at the end of the array (`iter.atEnd` is
`true`)
- `Iterator.forward iter n`/`Iterator.nextn iter n` is invalid if `n` is strictly greater than the
number of remaining bytes.
-/
structure Iterator where
/-- The array the iterator is for. -/
array : ByteArray
/-- The current position.
This position is not necessarily valid for the array, for instance if one keeps calling
`Iterator.next` when `Iterator.atEnd` is true. If the position is not valid, then the
current byte is `(default : UInt8)`. -/
idx : Nat
deriving Inhabited

/-- Creates an iterator at the beginning of an array. -/
def mkIterator (arr : ByteArray) : Iterator :=
⟨arr, 0

@[inherit_doc mkIterator]
abbrev iter := mkIterator

/-- The size of an array iterator is the number of bytes remaining. -/
instance : SizeOf Iterator where
sizeOf i := i.array.size - i.idx

theorem Iterator.sizeOf_eq (i : Iterator) : sizeOf i = i.array.size - i.idx :=
rfl

namespace Iterator

/-- Number of bytes remaining in the iterator. -/
def remainingBytes : Iterator → Nat
| ⟨arr, i⟩ => arr.size - i

@[inherit_doc Iterator.idx]
def pos := Iterator.idx

/-- The byte at the current position.
On an invalid position, returns `(default : UInt8)`. -/
@[inline]
def curr : Iterator → UInt8
| ⟨arr, i⟩ =>
if h:i < arr.size then
arr[i]'h
else
default

/-- Moves the iterator's position forward by one byte, unconditionally.
It is only valid to call this function if the iterator is not at the end of the array, *i.e.*
`Iterator.atEnd` is `false`; otherwise, the resulting iterator will be invalid. -/
@[inline]
def next : Iterator → Iterator
| ⟨arr, i⟩ => ⟨arr, i + 1

/-- Decreases the iterator's position.
If the position is zero, this function is the identity. -/
@[inline]
def prev : Iterator → Iterator
| ⟨arr, i⟩ => ⟨arr, i - 1

/-- True if the iterator is past the array's last byte. -/
@[inline]
def atEnd : Iterator → Bool
| ⟨arr, i⟩ => i ≥ arr.size

/-- True if the iterator is not past the array's last byte. -/
@[inline]
def hasNext : Iterator → Bool
| ⟨arr, i⟩ => i < arr.size

/-- True if the position is not zero. -/
@[inline]
def hasPrev : Iterator → Bool
| ⟨_, i⟩ => i > 0

/-- Moves the iterator's position to the end of the array.
Note that `i.toEnd.atEnd` is always `true`. -/
@[inline]
def toEnd : Iterator → Iterator
| ⟨arr, _⟩ => ⟨arr, arr.size⟩

/-- Moves the iterator's position several bytes forward.
The resulting iterator is only valid if the number of bytes to skip is less than or equal to
the number of bytes left in the iterator. -/
@[inline]
def forward : Iterator → Nat → Iterator
| ⟨arr, i⟩, f => ⟨arr, i + f⟩

@[inherit_doc forward, inline]
def nextn : Iterator → Nat → Iterator := forward

/-- Moves the iterator's position several bytes back.
If asked to go back more bytes than available, stops at the beginning of the array. -/
@[inline]
def prevn : Iterator → Nat → Iterator
| ⟨arr, i⟩, f => ⟨arr, i - f⟩

end Iterator
end ByteArray

def List.toByteArray (bs : List UInt8) : ByteArray :=
Expand Down
39 changes: 20 additions & 19 deletions src/Lean/Data/Json/Parser.lean
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ import Lean.Data.RBMap
namespace Lean.Json.Parser

open Lean.Parsec
open Lean.Parsec.String

@[inline]
def hexChar : Parsec Nat := do
let c ← anyChar
def hexChar : Parser Nat := do
let c ← any
if '0' ≤ c ∧ c ≤ '9' then
pure $ c.val.toNat - '0'.val.toNat
else if 'a' ≤ c ∧ c ≤ 'f' then
Expand All @@ -25,8 +26,8 @@ def hexChar : Parsec Nat := do
else
fail "invalid hex character"

def escapedChar : Parsec Char := do
let c ← anyChar
def escapedChar : Parser Char := do
let c ← any
match c with
| '\\' => return '\\'
| '"' => return '"'
Expand All @@ -41,13 +42,13 @@ def escapedChar : Parsec Char := do
return Char.ofNat $ 4096*u1 + 256*u2 + 16*u3 + u4
| _ => fail "illegal \\u escape"

partial def strCore (acc : String) : Parsec String := do
partial def strCore (acc : String) : Parser String := do
let c ← peek!
if c = '"' then -- "
skip
return acc
else
let c ← anyChar
let c ← any
if c = '\\' then
strCore (acc.push (← escapedChar))
-- as to whether c.val > 0xffff should be split up and encoded with multiple \u,
Expand All @@ -58,9 +59,9 @@ partial def strCore (acc : String) : Parsec String := do
else
fail "unexpected character in string"

def str : Parsec String := strCore ""
def str : Parser String := strCore ""

partial def natCore (acc digits : Nat) : Parsec (Nat × Nat) := do
partial def natCore (acc digits : Nat) : Parser (Nat × Nat) := do
let some c ← peek? | return (acc, digits)
if '0' ≤ c ∧ c ≤ '9' then
skip
Expand All @@ -70,30 +71,30 @@ partial def natCore (acc digits : Nat) : Parsec (Nat × Nat) := do
return (acc, digits)

@[inline]
def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parsec Unit := do
def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parser Unit := do
let c ← peek!
if p c then
return ()
else
fail <| "expected " ++ desc

@[inline]
def natNonZero : Parsec Nat := do
def natNonZero : Parser Nat := do
lookahead (fun c => '1' ≤ c ∧ c ≤ '9') "1-9"
let (n, _) ← natCore 0 0
return n

@[inline]
def natNumDigits : Parsec (Nat × Nat) := do
def natNumDigits : Parser (Nat × Nat) := do
lookahead (fun c => '0' ≤ c ∧ c ≤ '9') "digit"
natCore 0 0

@[inline]
def natMaybeZero : Parsec Nat := do
def natMaybeZero : Parser Nat := do
let (n, _) ← natNumDigits
return n

def num : Parsec JsonNumber := do
def num : Parser JsonNumber := do
let c ← peek!
let sign ← if c = '-' then
skip
Expand Down Expand Up @@ -132,10 +133,10 @@ def num : Parsec JsonNumber := do
else
return res

partial def arrayCore (anyCore : Parsec Json) (acc : Array Json) : Parsec (Array Json) := do
partial def arrayCore (anyCore : Parser Json) (acc : Array Json) : Parser (Array Json) := do
let hd ← anyCore
let acc' := acc.push hd
let c ← anyChar
let c ← any
if c = ']' then
ws
return acc'
Expand All @@ -145,12 +146,12 @@ partial def arrayCore (anyCore : Parsec Json) (acc : Array Json) : Parsec (Array
else
fail "unexpected character in array"

partial def objectCore (anyCore : Parsec Json) : Parsec (RBNode String (fun _ => Json)) := do
partial def objectCore (anyCore : Parser Json) : Parser (RBNode String (fun _ => Json)) := do
lookahead (fun c => c = '"') "\""; skip; -- "
let k ← strCore ""; ws
lookahead (fun c => c = ':') ":"; skip; ws
let v ← anyCore
let c ← anyChar
let c ← any
if c = '}' then
ws
return RBNode.singleton k v
Expand All @@ -161,7 +162,7 @@ partial def objectCore (anyCore : Parsec Json) : Parsec (RBNode String (fun _ =>
else
fail "unexpected character in object"

partial def anyCore : Parsec Json := do
partial def anyCore : Parser Json := do
let c ← peek!
if c = '[' then
skip; ws
Expand Down Expand Up @@ -203,7 +204,7 @@ partial def anyCore : Parsec Json := do
fail "unexpected input"


def any : Parsec Json := do
def any : Parser Json := do
ws
let res ← anyCore
eof
Expand Down
Loading

0 comments on commit b7db828

Please sign in to comment.