Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add KMP frontend for lists #1065

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Batteries/Data/List.lean
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import Batteries.Data.List.Count
import Batteries.Data.List.EraseIdx
import Batteries.Data.List.Init.Lemmas
import Batteries.Data.List.Lemmas
import Batteries.Data.List.Matcher
import Batteries.Data.List.Monadic
import Batteries.Data.List.OfFn
import Batteries.Data.List.Pairwise
Expand Down
85 changes: 85 additions & 0 deletions Batteries/Data/List/Matcher.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/-
Copyright (c) 2024 François G. Dorais. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: François G. Dorais
-/
import Batteries.Data.Array.Match
import Batteries.Data.String.Basic

namespace List

/-- Knuth-Morris-Pratt matcher type

This type is used to keep data for running the Knuth-Morris-Pratt (KMP) list matching algorithm.
KMP is a linear time algorithm to locate all contiguous sublists of a list that match a given
pattern. Generating the algorithm data is also linear in the length of the pattern but the data can
be re-used to match the same pattern over multiple lists.

The KMP data for a pattern can be generated using `Matcher.ofList`. Then `Matcher.find?` and
`Matcher.findAll` can be used to run the algorithm on an input list.
```
def m := Matcher.ofList [0,1,1,0]

#eval Option.isSome <| m.find? [2,1,1,0,1,1,2] -- false
#eval Option.isSome <| m.find? [0,0,1,1,0,0] -- true

#eval Array.size <| m.findAll [0,1,1,0,1,1,0] -- 2
#eval Array.size <| m.findAll [0,1,1,0,1,1,0,1,1,0] -- 3
```
-/
structure Matcher (α : Type _) extends Array.Matcher α where
/-- The pattern for the matcher -/
pattern : List α

/-- Make KMP matcher from list pattern. -/
@[inline] def Matcher.ofList [BEq α] (pattern : List α) : Matcher α where
toMatcher := Array.Matcher.ofStream pattern
pattern := pattern

/-- List stream that keeps count of items read. -/
local instance (α) : Stream (List α × Nat) α where
next?
| ([], _) => none
| (x::xs, n) => (x, xs, n+1)

/--
Find all start and end positions of all infix sublists of `l` matching `m.pattern`.
The sublists may be overlapping.
-/
partial def Matcher.findAll [BEq α] (m : Matcher α) (l : List α) : Array (Nat × Nat) :=
loop (l, 0) m.toMatcher #[]
where
/-- Accumulator loop for `List.Matcher.findAll` -/
loop (l : List α × Nat) (am : Array.Matcher α) (occs : Array (Nat × Nat)) : Array (Nat × Nat) :=
match am.next? l with
| none => occs
| some (l, am) => loop l am (occs.push (l.snd - m.table.size, l.snd))

/--
Find the start and end positions of the first infix sublist of `l` matching `m.pattern`,
or `none` if there is no such sublist.
-/
def Matcher.find? [BEq α] (m : Matcher α) (l : List α) : Option (Nat × Nat) :=
match m.next? (l, 0) with
| none => none
| some (l, _) => some (l.snd - m.table.size, l.snd)

/--
Returns all the start and end positions of all infix sublists of of `l` that match `pattern`.
The sublists may be overlapping.
-/
@[inline] def findAllInfix [BEq α] (l pattern : List α) : Array (Nat × Nat) :=
(Matcher.ofList pattern).findAll l

/--
Returns the start and end positions of the first infix sublist of `l` that matches `pattern`,
or `none` if there is no such sublist.
-/
@[inline] def findInfix? [BEq α] (l pattern : List α) : Option (Nat × Nat) :=
(Matcher.ofList pattern).find? l

/--
Returns true iff `pattern` occurs as an infix sublist of `l`.
-/
@[inline] def containsInfix [BEq α] (l pattern : List α) : Bool :=
findInfix? l pattern |>.isSome
17 changes: 16 additions & 1 deletion BatteriesTest/kmp_matcher.lean
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import Batteries.Data.String.Matcher
import Batteries.Data.List.Matcher

/-! Tests for Knuth-Morris-Pratt matching algorithm -/
/-! # Tests for the Knuth-Morris-Pratt (KMP) matching algorithm -/

/-! ### String API -/

/-- Matcher for pattern "abba" -/
def m := String.Matcher.ofString "abba"
Expand All @@ -24,3 +27,15 @@ def m := String.Matcher.ofString "abba"
#guard Array.size ("xyxyyxxyx".findAllSubstr "xyx") = 2

#guard Array.size ("xyxyxyyxxyxyx".findAllSubstr "xyx") = 4

/-! ### List API -/

def lm := List.Matcher.ofList [0,1,1,0]

#guard lm.find? [2,1,1,0,1,1,2] == none

#guard lm.find? [0,0,1,1,0,0] == some (1, 5)

#guard (lm.findAll [0,1,1,0,1,1,0]).size == 2

#guard (lm.findAll [0,1,1,0,1,1,0,1,1,0]).size == 3
Loading