diff --git a/BibtexQuery/Format.lean b/BibtexQuery/Format.lean index 8f73328..39ae2fa 100644 --- a/BibtexQuery/Format.lean +++ b/BibtexQuery/Format.lean @@ -8,6 +8,8 @@ import BibtexQuery.Name import BibtexQuery.Entry import Lean.Data.Xml.Basic import Lean.Data.HashMap +import Std.Internal.Parsec +import Std.Internal.Parsec.String /-! @@ -18,7 +20,7 @@ tag generating and sorting. -/ -open Lean Xml Parsec Unicode BibtexQuery.TexDiacritics BibtexQuery.Name +open Lean Xml Std.Internal.Parsec Unicode BibtexQuery.TexDiacritics BibtexQuery.Name namespace BibtexQuery diff --git a/BibtexQuery/Main.lean b/BibtexQuery/Main.lean index b5f0566..0a44031 100644 --- a/BibtexQuery/Main.lean +++ b/BibtexQuery/Main.lean @@ -100,23 +100,23 @@ def main : List String → IO Unit IO.println s!"Reading {fname} to find doubled keys" let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter match parsed with - | Parsec.ParseResult.success _pos res => + | .success _pos res => let lst := listDoublons res IO.println lst - | Parsec.ParseResult.error pos err => IO.eprintln s!"Parse error at line {pos.lineNumber}: {err}" + | .error pos err => IO.eprintln s!"Parse error at line {pos.lineNumber}: {err}" | ["l", fname] => do let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter match parsed with - | Parsec.ParseResult.success _pos res => printEntries res - | Parsec.ParseResult.error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" + | .success _pos res => printEntries res + | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" | "q" :: (fname :: queries) => do let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter match parsed with - | Parsec.ParseResult.success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString - | Parsec.ParseResult.error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" + | .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString + | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" | "c" :: (fname :: queries) => do let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter match parsed with - | Parsec.ParseResult.success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString - | Parsec.ParseResult.error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" + | .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString + | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" | _ => do IO.eprintln "Invalid command-line arguments"; printHelp diff --git a/BibtexQuery/Name.lean b/BibtexQuery/Name.lean index 379a855..301de8c 100644 --- a/BibtexQuery/Name.lean +++ b/BibtexQuery/Name.lean @@ -6,6 +6,9 @@ Author: Jz Pan import BibtexQuery.TexDiacritics import UnicodeBasic +import Std.Internal.Parsec +import Std.Internal.Parsec.String + /-! @@ -21,12 +24,12 @@ The math equations are still not allowed. -/ -open Lean Parsec Unicode BibtexQuery.TexDiacritics +open Lean Std.Internal.Parsec Std.Internal.Parsec.String Unicode BibtexQuery.TexDiacritics namespace BibtexQuery.Name -partial def getNameBracedAux : Parsec String := do - let doOne : Parsec (Option String) := fun it => +partial def getNameBracedAux : Parser String := do + let doOne : Parser (Option String) := fun it => if it.hasNext then match it.curr with | '{' => (.some <$> bracedContent getNameBracedAux) it @@ -36,19 +39,19 @@ partial def getNameBracedAux : Parsec String := do .success it .none return String.join (← manyOptions doOne).toList -def normalCharWithoutSpaceOrComma : Parsec Char := satisfy fun c => +def normalCharWithoutSpaceOrComma : Parser Char := satisfy fun c => match c with | '\\' | '$' | '{' | '}' | ' ' | '\t' | '\r' | '\n' | ',' => false | _ => true -def normalCharsWithoutSpaceOrComma : Parsec String := do +def normalCharsWithoutSpaceOrComma : Parser String := do let s ← many1Chars normalCharWithoutSpaceOrComma pure <| replaceChars s /-- Input a bibtex name string without TeX commands and math equations, split the string by " " and ",". -/ -def getNameAux : Parsec (Array String) := do - let doOne' : Parsec (Option String) := fun it => +def getNameAux : Parser (Array String) := do + let doOne' : Parser (Option String) := fun it => if it.hasNext then match it.curr with | '{' => (.some <$> bracedContent getNameBracedAux) it @@ -56,7 +59,7 @@ def getNameAux : Parsec (Array String) := do | _ => (.some <$> normalCharsWithoutSpaceOrComma) it else .success it .none - let doOne : Parsec (Option String) := fun it => + let doOne : Parser (Option String) := fun it => if it.hasNext then match it.curr with | '}' => .success it .none @@ -106,7 +109,7 @@ def getName (arr : Array String) : String × String := /-- Input a bibtex name string without TeX commands and math equations, return an array of `(Firstname, Lastname)`. The braces in the name are preserevd. -/ -def getNames : Parsec (Array (String × String)) := do +def getNames : Parser (Array (String × String)) := do let arr ← getNameAux let arr2 : Array (Array String) := arr.foldl (fun acc s => if s = "and" then diff --git a/BibtexQuery/ParsecExtra.lean b/BibtexQuery/ParsecExtra.lean index 85b077d..7702090 100644 --- a/BibtexQuery/ParsecExtra.lean +++ b/BibtexQuery/ParsecExtra.lean @@ -4,7 +4,11 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Frédéric Dupuis -/ -import Lean.Data.Parsec +import Std.Internal.Parsec +import Std.Internal.Parsec.Basic +import Std.Internal.Parsec.String + + /-! # Extra Parsec material @@ -14,98 +18,98 @@ of which is modelled after its Haskell counterpart. -/ -open Lean Lean.Parsec +open Lean Std.Internal.Parsec.String Std.Internal.Parsec namespace BibtexQuery.ParsecExtra -def _root_.String.parse? [Inhabited α] (s : String) (p : Lean.Parsec α) : Option α := +def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α := match p s.iter with - | Lean.Parsec.ParseResult.success _ x => some x - | Lean.Parsec.ParseResult.error _ _ => none + | .success _ x => some x + | .error _ _ => none -def _root_.String.parseDebug [Inhabited α] (s : String) (p : Lean.Parsec α) : Option (α × String.Pos) := +def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos) := match p s.iter with - | Lean.Parsec.ParseResult.success pos x => some ⟨x, pos.i⟩ - | Lean.Parsec.ParseResult.error _ _ => none + | .success pos x => some ⟨x, pos.i⟩ + | .error _ _ => none @[inline] -def noneOf (bad : String) : Parsec Char := Parsec.satisfy (fun z => ¬bad.contains z) +def noneOf (bad : String) : Parser Char := satisfy (fun z => ¬bad.contains z) @[inline] -def noneOfStr (bad : String) : Parsec String := manyChars (noneOf bad) +def noneOfStr (bad : String) : Parser String := manyChars (noneOf bad) @[inline] -def eol : Parsec String := - Parsec.pstring "\n\r" <|> Parsec.pstring "\r\n" <|> Parsec.pstring "\n" +def eol : ByteArray.Parser String := + ByteArray.pstring "\n\r" <|> ByteArray.pstring "\r\n" <|> ByteArray.pstring "\n" @[inline] -def maybeSkip (p : Parsec α) : Parsec Unit := (attempt (p *> pure ())) <|> pure () +def maybeSkip (p : Parser α) : Parser Unit := (attempt (p *> pure ())) <|> pure () @[inline] -partial def manyCore' (p : Parsec α) (acc : List α) : Parsec (List α) := +partial def manyCore' (p : Parser α) (acc : List α) : Parser (List α) := (do manyCore' p (acc ++ [← p])) <|> pure acc @[inline] -def many' (p : Parsec α) : Parsec (List α) := manyCore' p [] +def many' (p : Parser α) : Parser (List α) := manyCore' p [] @[inline] -partial def manyStrCore (p : Parsec String) (acc : String) : Parsec String := +partial def manyStrCore (p : Parser String) (acc : String) : Parser String := (do manyStrCore p (acc ++ (← p))) <|> pure acc @[inline] -def manyStr (p : Parsec String) : Parsec String := manyStrCore p "" +def manyStr (p : Parser String) : Parser String := manyStrCore p "" @[inline] -partial def sepByCore (pcont : Parsec α) (psep : Parsec β) (acc : List α) : - Parsec (List α) := +partial def sepByCore (pcont : Parser α) (psep : Parser β) (acc : List α) : + Parser (List α) := attempt (do let _ ← psep; sepByCore pcont psep (acc ++ [← pcont])) <|> pure acc @[inline] -def sepBy (pcont : Parsec α) (psep : Parsec β) : Parsec (List α) := +def sepBy (pcont : Parser α) (psep : Parser β) : Parser (List α) := (do sepByCore pcont psep [← pcont]) <|> pure [] @[inline] -def sepOrEndBy (pcont : Parsec α) (psep : Parsec β) : Parsec (List α) := +def sepOrEndBy (pcont : Parser α) (psep : Parser β) : Parser (List α) := (do let output ← sepByCore pcont psep [← pcont]; maybeSkip psep; return output) <|> pure [] @[inline] -partial def endByCore (pcont : Parsec α) (psep : Parsec β) (acc : List α) : - Parsec (List α) := +partial def endByCore (pcont : Parser α) (psep : Parser β) (acc : List α) : + Parser (List α) := attempt (do let x ← pcont; let _ ← psep; endByCore pcont psep (acc ++ [x])) <|> pure acc @[inline] -def endBy (pcont : Parsec α) (psep : Parsec β) : Parsec (List α) := +def endBy (pcont : Parser α) (psep : Parser β) : Parser (List α) := (do endByCore pcont psep []) <|> pure [] @[inline] -def alphaNum : Parsec Char := attempt do - let c ← anyChar +def alphaNum : Parser Char := attempt do + let c ← any if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') ∨ ('0' ≤ c ∧ c ≤ '9') then return c else fail s!"ASCII alphanumeric character expected" @[inline] -def asciiLetterToLower : Parsec Char := return (← asciiLetter).toLower +def asciiLetterToLower : Parser Char := return (← asciiLetter).toLower @[inline] -def alphaNumToLower : Parsec Char := return (← alphaNum).toLower +def alphaNumToLower : Parser Char := return (← alphaNum).toLower @[inline] -def asciiWordToLower : Parsec String := manyChars asciiLetterToLower +def asciiWordToLower : Parser String := manyChars asciiLetterToLower @[inline] -def between (op : Parsec α) (cl : Parsec α) (mid : Parsec β) : Parsec β := attempt do +def between (op : Parser α) (cl : Parser α) (mid : Parser β) : Parser β := attempt do let _ ← op let s ← mid let _ ← cl return s @[inline] -def natNum : Parsec Nat := attempt do +def natNum : Parser Nat := attempt do let some n := (← manyChars digit).toNat? | fail "Not a natural number" return n -def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parsec String := fun it => +def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser String := fun it => let out := it.foldUntil "" fun acc c => let prev : Option Char := if acc == "" then none else acc.back diff --git a/BibtexQuery/Parser.lean b/BibtexQuery/Parser.lean index a20b06b..0105601 100644 --- a/BibtexQuery/Parser.lean +++ b/BibtexQuery/Parser.lean @@ -6,6 +6,8 @@ Author: Frédéric Dupuis import BibtexQuery.ParsecExtra import BibtexQuery.Entry +import Std.Internal.Parsec +import Std.Internal.Parsec.String /-! # Bibtex Parser @@ -15,21 +17,21 @@ Bibtex format is supported; features such as predefined strings and concatenatio supported. -/ -open Lean Parsec BibtexQuery.ParsecExtra +open Lean Std.Internal.Parsec Std.Internal.Parsec.String BibtexQuery.ParsecExtra namespace BibtexQuery.Parser /-- The name of the bibtex entry (i.e. what goes in the cite command). -/ -def name : Parsec String := attempt do - let firstChar ← Parsec.asciiLetter +def name : Parser String := attempt do + let firstChar ← asciiLetter let remainder ← manyChars <| (alphaNum <|> pchar ':' <|> pchar '-' <|> pchar '_') return firstChar.toString ++ remainder /-- "article", "book", etc -/ -def category : Parsec String := attempt do skipChar '@'; asciiWordToLower +def category : Parser String := attempt do skipChar '@'; asciiWordToLower -partial def bracedContentTail (acc : String) : Parsec String := attempt do - let c ← anyChar +partial def bracedContentTail (acc : String) : Parser String := attempt do + let c ← any if c = '{' then let s ← bracedContentTail "" bracedContentTail (acc ++ "{" ++ s) @@ -38,18 +40,18 @@ partial def bracedContentTail (acc : String) : Parsec String := attempt do else bracedContentTail (acc ++ c.toString) -def bracedContent : Parsec String := attempt do +def bracedContent : Parser String := attempt do skipChar '{' let s ← bracedContentTail "" return s.dropRight 1 -def quotedContent : Parsec String := attempt do +def quotedContent : Parser String := attempt do skipChar '"' let s ← manyCharsUntilWithPrev fun | (some '\\'), '"' => false | _, '"' => true | _, _ => false skipChar '"' return (s.replace "\n" "").replace "\r" "" -def month : Parsec String := attempt do +def month : Parser String := attempt do let s ← asciiWordToLower match s with | "jan" => return s @@ -67,7 +69,7 @@ def month : Parsec String := attempt do | _ => fail "Not a valid month" /-- The content field of a tag. -/ -def tagContent : Parsec String := attempt do +def tagContent : Parser String := attempt do let c ← peek! if c.isDigit then manyChars digit else if c.isAlpha then month else @@ -77,17 +79,17 @@ def tagContent : Parsec String := attempt do | _ => fail "Tag content expected" /-- i.e. journal = {Journal of Musical Deontology} -/ -def tag : Parsec Tag := attempt do +def tag : Parser Tag := attempt do let tagName ← manyChars (alphaNumToLower <|> pchar '_' <|> pchar '-') ws; skipChar '='; ws let tagContent ← tagContent return { name := tagName, content := tagContent } -def outsideEntry : Parsec Unit := attempt do +def outsideEntry : Parser Unit := attempt do let _ ← manyChars <| noneOf "@" /-- A Bibtex entry. TODO deal with "preamble" etc. -/ -def entry : Parsec Entry := attempt do +def entry : Parser Entry := attempt do outsideEntry let typeOfEntry ← category ws; skipChar '{'; ws @@ -97,7 +99,7 @@ def entry : Parsec Entry := attempt do ws; skipChar '}'; ws return Entry.normalType typeOfEntry nom t -def bibtexFile : Parsec (List Entry) := many' entry +def bibtexFile : Parser (List Entry) := many' entry --#eval "auTHOr23:z ".parseDebug name --#eval "auTHOr23:z".parseDebug name diff --git a/BibtexQuery/TexDiacritics.lean b/BibtexQuery/TexDiacritics.lean index 4a628b4..697a7fc 100644 --- a/BibtexQuery/TexDiacritics.lean +++ b/BibtexQuery/TexDiacritics.lean @@ -4,7 +4,8 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Jz Pan -/ -import Lean.Data.Parsec +import Std.Internal.Parsec +import Std.Internal.Parsec.String /-! @@ -17,16 +18,16 @@ and error on any other TeX commands which are not in math environment. -/ -open Lean Parsec +open Lean Std.Internal.Parsec Std.Internal.Parsec.String namespace BibtexQuery.TexDiacritics /-- Match a sequence of space characters and return it. -/ -def ws' : Parsec String := manyChars <| satisfy fun c => +def ws' : Parser String := manyChars <| satisfy fun c => c == ' ' || c == '\n' || c == '\r' || c == '\t' /-- Match a normal character which is not the beginning of TeX command. -/ -def normalChar : Parsec Char := satisfy fun c => +def normalChar : Parser Char := satisfy fun c => c != '\\' && c != '$' && c != '{' && c != '}' /-- Replace certain sequences (e.g. "--") by their UTF-8 representations. -/ @@ -41,15 +42,15 @@ def replaceChars (s : String) : String := arr.foldl (fun acc (o, r) => acc.replace o r) s /-- Match at least one normal characters which is not the beginning of TeX command. -/ -def normalChars : Parsec String := do +def normalChars : Parser String := do let s ← many1Chars normalChar pure <| replaceChars s /-- Match a TeX command starting with `\`, potentially with trailing whitespaces. -/ -def texCommand : Parsec String := pchar '\\' *> attempt do +def texCommand : Parser String := pchar '\\' *> attempt do let s ← manyChars asciiLetter if s.isEmpty then - return "\\" ++ toString (← anyChar) ++ (← ws') + return "\\" ++ toString (← any) ++ (← ws') else match ← peek? with | .some c => @@ -63,22 +64,22 @@ def texCommand : Parsec String := pchar '\\' *> attempt do return "\\" ++ s /-- Similar to `texCommand` but it excludes some commands. -/ -def texCommand' (exclude : Array String) : Parsec String := attempt do +def texCommand' (exclude : Array String) : Parser String := attempt do let s ← texCommand match exclude.find? (· == s.trim) with | .some _ => fail s!"'{s.trim}' is not allowed" | .none => return s /-- Match a sequence starting with `{` and ending with `}`. -/ -def bracedContent (p : Parsec String) : Parsec String := +def bracedContent (p : Parser String) : Parser String := pchar '{' *> (("{" ++ · ++ "}") <$> p) <* pchar '}' /-- Similar to `bracedContent` but it does not output braces. -/ -def bracedContent' (p : Parsec String) : Parsec String := +def bracedContent' (p : Parser String) : Parser String := pchar '{' *> p <* pchar '}' -partial def manyOptions {α} (p : Parsec (Option α)) (acc : Array α := #[]) : - Parsec (Array α) := fun it => +partial def manyOptions {α} (p : Parser (Option α)) (acc : Array α := #[]) : + Parser (Array α) := fun it => match p it with | .success it ret => match ret with @@ -86,8 +87,8 @@ partial def manyOptions {α} (p : Parsec (Option α)) (acc : Array α := #[]) : | .none => .success it acc | .error it err => .error it err -partial def mathContentAux : Parsec String := do - let doOne : Parsec (Option String) := fun it => +partial def mathContentAux : Parser String := do + let doOne : Parser (Option String) := fun it => if it.hasNext then match it.curr with | '{' => (.some <$> bracedContent mathContentAux) it @@ -102,8 +103,8 @@ partial def mathContentAux : Parsec String := do return String.join (← manyOptions doOne).toList /-- Match a math content. Returns `Option.none` if it does not start with `\(`, `\[` or `$`. -/ -def mathContent : Parsec (Option String) := fun it => - let aux (beginning ending dollar : String) : Parsec String := +def mathContent : Parser (Option String) := fun it => + let aux (beginning ending dollar : String) : Parser String := pstring beginning *> ((dollar ++ · ++ dollar) <$> mathContentAux) <* pstring ending let substr := it.extract (it.forward 2) if substr = "\\[" then @@ -120,7 +121,7 @@ def mathContent : Parsec (Option String) := fun it => /-- Match a TeX command for diacritics, return the corresponding UTF-8 string. Sometimes it needs to read the character after the command, in this case the `p` is used to read braced content. -/ -def texDiacriticsCommand (p : Parsec String) : Parsec String := do +def texDiacriticsCommand (p : Parser String) : Parser String := do let cmd ← String.trim <$> texCommand match cmd with | "\\oe" => pure "œ" | "\\OE" => pure "Œ" @@ -147,7 +148,7 @@ def texDiacriticsCommand (p : Parsec String) : Parsec String := do if ch.isEmpty then fail s!"unsupported command: '{cmd}'" else - let doOne : Parsec String := fun it => + let doOne : Parser String := fun it => if it.hasNext then match it.curr with | '{' => bracedContent p it @@ -168,8 +169,8 @@ def texDiacriticsCommand (p : Parsec String) : Parsec String := do /-- Convert all TeX commands for diacritics into UTF-8 characters, and error on any other TeX commands which are not in math environment. -/ -partial def texDiacritics : Parsec String := do - let doOne : Parsec (Option String) := fun it => +partial def texDiacritics : Parser String := do + let doOne : Parser (Option String) := fun it => if it.hasNext then match mathContent it with | .success it ret => @@ -188,8 +189,8 @@ partial def texDiacritics : Parsec String := do /-- Remove all braces except for those in math environment, and error on any TeX commands which are not in math environment. -/ -partial def removeBraces : Parsec String := do - let doOne : Parsec (Option String) := fun it => +partial def removeBraces : Parser String := do + let doOne : Parser (Option String) := fun it => if it.hasNext then match mathContent it with | .success it ret => diff --git a/lake-manifest.json b/lake-manifest.json index ea7bca9..bc54d1b 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -5,7 +5,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "5770a09ab92f05f82598dc04afed67c977dddd77", + "rev": "7afce91e4fcee25c1ed06dca8d71b82bed396776", "name": "UnicodeBasic", "manifestFile": "lake-manifest.json", "inputRev": "main", diff --git a/lean-toolchain b/lean-toolchain index e7a4f40..98556ba 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.11.0-rc2 +leanprover/lean4:v4.12.0-rc1