Skip to content

Commit

Permalink
Merge pull request #10 from opencompl/adjust_to_parser_changes
Browse files Browse the repository at this point in the history
update toolchain to v4.12.0-rc2
  • Loading branch information
dupuisf authored Sep 3, 2024
2 parents c138ab5 + 53e632a commit 0a294fe
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 88 deletions.
4 changes: 3 additions & 1 deletion BibtexQuery/Format.lean
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import BibtexQuery.Name
import BibtexQuery.Entry
import Lean.Data.Xml.Basic
import Lean.Data.HashMap
import Std.Internal.Parsec
import Std.Internal.Parsec.String

/-!
Expand All @@ -18,7 +20,7 @@ tag generating and sorting.
-/

open Lean Xml Parsec Unicode BibtexQuery.TexDiacritics BibtexQuery.Name
open Lean Xml Std.Internal.Parsec Unicode BibtexQuery.TexDiacritics BibtexQuery.Name

namespace BibtexQuery

Expand Down
16 changes: 8 additions & 8 deletions BibtexQuery/Main.lean
Original file line number Diff line number Diff line change
Expand Up @@ -100,23 +100,23 @@ def main : List String → IO Unit
IO.println s!"Reading {fname} to find doubled keys"
let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
match parsed with
| Parsec.ParseResult.success _pos res =>
| .success _pos res =>
let lst := listDoublons res
IO.println lst
| Parsec.ParseResult.error pos err => IO.eprintln s!"Parse error at line {pos.lineNumber}: {err}"
| .error pos err => IO.eprintln s!"Parse error at line {pos.lineNumber}: {err}"
| ["l", fname] => do
let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
match parsed with
| Parsec.ParseResult.success _pos res => printEntries res
| Parsec.ParseResult.error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
| .success _pos res => printEntries res
| .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
| "q" :: (fname :: queries) => do
let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
match parsed with
| Parsec.ParseResult.success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString
| Parsec.ParseResult.error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
| .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString
| .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
| "c" :: (fname :: queries) => do
let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
match parsed with
| Parsec.ParseResult.success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString
| Parsec.ParseResult.error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
| .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString
| .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
| _ => do IO.eprintln "Invalid command-line arguments"; printHelp
21 changes: 12 additions & 9 deletions BibtexQuery/Name.lean
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Author: Jz Pan

import BibtexQuery.TexDiacritics
import UnicodeBasic
import Std.Internal.Parsec
import Std.Internal.Parsec.String


/-!
Expand All @@ -21,12 +24,12 @@ The math equations are still not allowed.
-/

open Lean Parsec Unicode BibtexQuery.TexDiacritics
open Lean Std.Internal.Parsec Std.Internal.Parsec.String Unicode BibtexQuery.TexDiacritics

namespace BibtexQuery.Name

partial def getNameBracedAux : Parsec String := do
let doOne : Parsec (Option String) := fun it =>
partial def getNameBracedAux : Parser String := do
let doOne : Parser (Option String) := fun it =>
if it.hasNext then
match it.curr with
| '{' => (.some <$> bracedContent getNameBracedAux) it
Expand All @@ -36,27 +39,27 @@ partial def getNameBracedAux : Parsec String := do
.success it .none
return String.join (← manyOptions doOne).toList

def normalCharWithoutSpaceOrComma : Parsec Char := satisfy fun c =>
def normalCharWithoutSpaceOrComma : Parser Char := satisfy fun c =>
match c with
| '\\' | '$' | '{' | '}' | ' ' | '\t' | '\r' | '\n' | ',' => false
| _ => true

def normalCharsWithoutSpaceOrComma : Parsec String := do
def normalCharsWithoutSpaceOrComma : Parser String := do
let s ← many1Chars normalCharWithoutSpaceOrComma
pure <| replaceChars s

/-- Input a bibtex name string without TeX commands
and math equations, split the string by " " and ",". -/
def getNameAux : Parsec (Array String) := do
let doOne' : Parsec (Option String) := fun it =>
def getNameAux : Parser (Array String) := do
let doOne' : Parser (Option String) := fun it =>
if it.hasNext then
match it.curr with
| '{' => (.some <$> bracedContent getNameBracedAux) it
| '}' | ' ' | '\t' | '\r' | '\n' | ',' => .success it .none
| _ => (.some <$> normalCharsWithoutSpaceOrComma) it
else
.success it .none
let doOne : Parsec (Option String) := fun it =>
let doOne : Parser (Option String) := fun it =>
if it.hasNext then
match it.curr with
| '}' => .success it .none
Expand Down Expand Up @@ -106,7 +109,7 @@ def getName (arr : Array String) : String × String :=
/-- Input a bibtex name string without TeX commands
and math equations, return an array of `(Firstname, Lastname)`.
The braces in the name are preserevd. -/
def getNames : Parsec (Array (String × String)) := do
def getNames : Parser (Array (String × String)) := do
let arr ← getNameAux
let arr2 : Array (Array String) := arr.foldl (fun acc s =>
if s = "and" then
Expand Down
68 changes: 36 additions & 32 deletions BibtexQuery/ParsecExtra.lean
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Frédéric Dupuis
-/

import Lean.Data.Parsec
import Std.Internal.Parsec
import Std.Internal.Parsec.Basic
import Std.Internal.Parsec.String



/-!
# Extra Parsec material
Expand All @@ -14,98 +18,98 @@ of which is modelled after its Haskell counterpart.
-/


open Lean Lean.Parsec
open Lean Std.Internal.Parsec.String Std.Internal.Parsec

namespace BibtexQuery.ParsecExtra

def _root_.String.parse? [Inhabited α] (s : String) (p : Lean.Parsec α) : Option α :=
def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α :=
match p s.iter with
| Lean.Parsec.ParseResult.success _ x => some x
| Lean.Parsec.ParseResult.error _ _ => none
| .success _ x => some x
| .error _ _ => none

def _root_.String.parseDebug [Inhabited α] (s : String) (p : Lean.Parsec α) : Option (α × String.Pos) :=
def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos) :=
match p s.iter with
| Lean.Parsec.ParseResult.success pos x => some ⟨x, pos.i⟩
| Lean.Parsec.ParseResult.error _ _ => none
| .success pos x => some ⟨x, pos.i⟩
| .error _ _ => none

@[inline]
def noneOf (bad : String) : Parsec Char := Parsec.satisfy (fun z => ¬bad.contains z)
def noneOf (bad : String) : Parser Char := satisfy (fun z => ¬bad.contains z)

@[inline]
def noneOfStr (bad : String) : Parsec String := manyChars (noneOf bad)
def noneOfStr (bad : String) : Parser String := manyChars (noneOf bad)

@[inline]
def eol : Parsec String :=
Parsec.pstring "\n\r" <|> Parsec.pstring "\r\n" <|> Parsec.pstring "\n"
def eol : ByteArray.Parser String :=
ByteArray.pstring "\n\r" <|> ByteArray.pstring "\r\n" <|> ByteArray.pstring "\n"

@[inline]
def maybeSkip (p : Parsec α) : Parsec Unit := (attempt (p *> pure ())) <|> pure ()
def maybeSkip (p : Parser α) : Parser Unit := (attempt (p *> pure ())) <|> pure ()

@[inline]
partial def manyCore' (p : Parsec α) (acc : List α) : Parsec (List α) :=
partial def manyCore' (p : Parser α) (acc : List α) : Parser (List α) :=
(do manyCore' p (acc ++ [← p])) <|> pure acc

@[inline]
def many' (p : Parsec α) : Parsec (List α) := manyCore' p []
def many' (p : Parser α) : Parser (List α) := manyCore' p []

@[inline]
partial def manyStrCore (p : Parsec String) (acc : String) : Parsec String :=
partial def manyStrCore (p : Parser String) (acc : String) : Parser String :=
(do manyStrCore p (acc ++ (← p))) <|> pure acc

@[inline]
def manyStr (p : Parsec String) : Parsec String := manyStrCore p ""
def manyStr (p : Parser String) : Parser String := manyStrCore p ""

@[inline]
partial def sepByCore (pcont : Parsec α) (psep : Parsec β) (acc : List α) :
Parsec (List α) :=
partial def sepByCore (pcont : Parser α) (psep : Parser β) (acc : List α) :
Parser (List α) :=
attempt (do let _ ← psep; sepByCore pcont psep (acc ++ [← pcont])) <|> pure acc

@[inline]
def sepBy (pcont : Parsec α) (psep : Parsec β) : Parsec (List α) :=
def sepBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
(do sepByCore pcont psep [← pcont]) <|> pure []

@[inline]
def sepOrEndBy (pcont : Parsec α) (psep : Parsec β) : Parsec (List α) :=
def sepOrEndBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
(do let output ← sepByCore pcont psep [← pcont]; maybeSkip psep; return output) <|> pure []

@[inline]
partial def endByCore (pcont : Parsec α) (psep : Parsec β) (acc : List α) :
Parsec (List α) :=
partial def endByCore (pcont : Parser α) (psep : Parser β) (acc : List α) :
Parser (List α) :=
attempt (do let x ← pcont; let _ ← psep; endByCore pcont psep (acc ++ [x])) <|> pure acc

@[inline]
def endBy (pcont : Parsec α) (psep : Parsec β) : Parsec (List α) :=
def endBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
(do endByCore pcont psep []) <|> pure []

@[inline]
def alphaNum : Parsec Char := attempt do
let c ← anyChar
def alphaNum : Parser Char := attempt do
let c ← any
if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') ∨ ('0' ≤ c ∧ c ≤ '9') then
return c
else fail s!"ASCII alphanumeric character expected"

@[inline]
def asciiLetterToLower : Parsec Char := return (← asciiLetter).toLower
def asciiLetterToLower : Parser Char := return (← asciiLetter).toLower

@[inline]
def alphaNumToLower : Parsec Char := return (← alphaNum).toLower
def alphaNumToLower : Parser Char := return (← alphaNum).toLower

@[inline]
def asciiWordToLower : Parsec String := manyChars asciiLetterToLower
def asciiWordToLower : Parser String := manyChars asciiLetterToLower

@[inline]
def between (op : Parsec α) (cl : Parsec α) (mid : Parsec β) : Parsec β := attempt do
def between (op : Parser α) (cl : Parser α) (mid : Parser β) : Parser β := attempt do
let _ ← op
let s ← mid
let _ ← cl
return s

@[inline]
def natNum : Parsec Nat := attempt do
def natNum : Parser Nat := attempt do
let some n := (← manyChars digit).toNat? | fail "Not a natural number"
return n

def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parsec String := fun it =>
def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser String := fun it =>
let out :=
it.foldUntil "" fun acc c =>
let prev : Option Char := if acc == "" then none else acc.back
Expand Down
30 changes: 16 additions & 14 deletions BibtexQuery/Parser.lean
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Author: Frédéric Dupuis

import BibtexQuery.ParsecExtra
import BibtexQuery.Entry
import Std.Internal.Parsec
import Std.Internal.Parsec.String

/-!
# Bibtex Parser
Expand All @@ -15,21 +17,21 @@ Bibtex format is supported; features such as predefined strings and concatenatio
supported.
-/

open Lean Parsec BibtexQuery.ParsecExtra
open Lean Std.Internal.Parsec Std.Internal.Parsec.String BibtexQuery.ParsecExtra

namespace BibtexQuery.Parser

/-- The name of the bibtex entry (i.e. what goes in the cite command). -/
def name : Parsec String := attempt do
let firstChar ← Parsec.asciiLetter
def name : Parser String := attempt do
let firstChar ← asciiLetter
let remainder ← manyChars <| (alphaNum <|> pchar ':' <|> pchar '-' <|> pchar '_')
return firstChar.toString ++ remainder

/-- "article", "book", etc -/
def category : Parsec String := attempt do skipChar '@'; asciiWordToLower
def category : Parser String := attempt do skipChar '@'; asciiWordToLower

partial def bracedContentTail (acc : String) : Parsec String := attempt do
let c ← anyChar
partial def bracedContentTail (acc : String) : Parser String := attempt do
let c ← any
if c = '{' then
let s ← bracedContentTail ""
bracedContentTail (acc ++ "{" ++ s)
Expand All @@ -38,18 +40,18 @@ partial def bracedContentTail (acc : String) : Parsec String := attempt do
else
bracedContentTail (acc ++ c.toString)

def bracedContent : Parsec String := attempt do
def bracedContent : Parser String := attempt do
skipChar '{'
let s ← bracedContentTail ""
return s.dropRight 1

def quotedContent : Parsec String := attempt do
def quotedContent : Parser String := attempt do
skipChar '"'
let s ← manyCharsUntilWithPrev fun | (some '\\'), '"' => false | _, '"' => true | _, _ => false
skipChar '"'
return (s.replace "\n" "").replace "\r" ""

def month : Parsec String := attempt do
def month : Parser String := attempt do
let s ← asciiWordToLower
match s with
| "jan" => return s
Expand All @@ -67,7 +69,7 @@ def month : Parsec String := attempt do
| _ => fail "Not a valid month"

/-- The content field of a tag. -/
def tagContent : Parsec String := attempt do
def tagContent : Parser String := attempt do
let c ← peek!
if c.isDigit then manyChars digit else
if c.isAlpha then month else
Expand All @@ -77,17 +79,17 @@ def tagContent : Parsec String := attempt do
| _ => fail "Tag content expected"

/-- i.e. journal = {Journal of Musical Deontology} -/
def tag : Parsec Tag := attempt do
def tag : Parser Tag := attempt do
let tagName ← manyChars (alphaNumToLower <|> pchar '_' <|> pchar '-')
ws; skipChar '='; ws
let tagContent ← tagContent
return { name := tagName, content := tagContent }

def outsideEntry : Parsec Unit := attempt do
def outsideEntry : Parser Unit := attempt do
let _ ← manyChars <| noneOf "@"

/-- A Bibtex entry. TODO deal with "preamble" etc. -/
def entry : Parsec Entry := attempt do
def entry : Parser Entry := attempt do
outsideEntry
let typeOfEntry ← category
ws; skipChar '{'; ws
Expand All @@ -97,7 +99,7 @@ def entry : Parsec Entry := attempt do
ws; skipChar '}'; ws
return Entry.normalType typeOfEntry nom t

def bibtexFile : Parsec (List Entry) := many' entry
def bibtexFile : Parser (List Entry) := many' entry

--#eval "auTHOr23:z ".parseDebug name
--#eval "auTHOr23:z".parseDebug name
Expand Down
Loading

0 comments on commit 0a294fe

Please sign in to comment.