@@ -3,7 +3,6 @@ Copyright (c) 2022 Jannis Limperg. All rights reserved.
3
3
Released under Apache 2.0 license as described in the file LICENSE.
4
4
Authors: Jannis Limperg, James Gallicchio, F. G. Dorais
5
5
-/
6
- import Batteries.Data.Array.Match
7
6
8
7
instance : Coe String Substring := ⟨String.toSubstring⟩
9
8
@@ -12,63 +11,6 @@ namespace String
12
11
protected theorem Pos.ne_zero_of_lt : {a b : Pos} → a < b → b ≠ 0
13
12
| _, _, hlt, rfl => Nat.not_lt_zero _ hlt
14
13
15
- /-- Knuth-Morris-Pratt matcher type
16
-
17
- This type is used to keep data for running the Knuth-Morris-Pratt (KMP) string matching algorithm.
18
- KMP is a linear time algorithm to locate all substrings of a string that match a given pattern.
19
- Generating the algorithm data is also linear in the length of the pattern but the data can be
20
- re-used to match the same pattern over different strings.
21
-
22
- The KMP data for a pattern string can be generated using `Matcher.ofString`. Then `Matcher.find?`
23
- and `Matcher.findAll` can be used to run the algorithm on an input string.
24
- ```
25
- def m := Matcher.ofString "abba"
26
-
27
- #eval Option.isSome <| m.find? "AbbabbA" -- false
28
- #eval Option.isSome <| m.find? "aabbaa" -- true
29
-
30
- #eval Array.size <| m.findAll "abbabba" -- 2
31
- #eval Array.size <| m.findAll "abbabbabba" -- 3
32
- ```
33
- -/
34
- structure Matcher extends Array.Matcher Char where
35
- /-- The pattern for the matcher -/
36
- pattern : Substring
37
-
38
- /-- Make KMP matcher from pattern substring -/
39
- @[inline] def Matcher.ofSubstring (pattern : Substring) : Matcher where
40
- toMatcher := Array.Matcher.ofStream pattern
41
- pattern := pattern
42
-
43
- /-- Make KMP matcher from pattern string -/
44
- @[inline] def Matcher.ofString (pattern : String) : Matcher :=
45
- Matcher.ofSubstring pattern
46
-
47
- /-- The byte size of the string pattern for the matcher -/
48
- abbrev Matcher.patternSize (m : Matcher) : Nat := m.pattern.bsize
49
-
50
- /-- Find all substrings of `s` matching `m.pattern`. -/
51
- partial def Matcher.findAll (m : Matcher) (s : Substring) : Array Substring :=
52
- loop s m.toMatcher #[]
53
- where
54
- /-- Accumulator loop for `String.Matcher.findAll` -/
55
- loop (s : Substring) (am : Array.Matcher Char) (occs : Array Substring) : Array Substring :=
56
- match am.next? s with
57
- | none => occs
58
- | some (s, am) =>
59
- loop s am <| occs.push { s with
60
- startPos := ⟨s.startPos.byteIdx - m.patternSize⟩
61
- stopPos := s.startPos }
62
-
63
- /-- Find the first substring of `s` matching `m.pattern`, or `none` if no such substring exists. -/
64
- def Matcher.find? (m : Matcher) (s : Substring) : Option Substring :=
65
- match m.next? s with
66
- | none => none
67
- | some (s, _) =>
68
- some { s with
69
- startPos := ⟨s.startPos.byteIdx - m.patternSize⟩
70
- stopPos := s.startPos }
71
-
72
14
end String
73
15
74
16
namespace Substring
@@ -133,41 +75,10 @@ def dropSuffix? (s : Substring) (suff : Substring) : Option Substring :=
133
75
else
134
76
none
135
77
136
- /--
137
- Returns all the substrings of `s` that match `pattern`.
138
- -/
139
- @[inline] def findAllSubstr (s pattern : Substring) : Array Substring :=
140
- (String.Matcher.ofSubstring pattern).findAll s
141
-
142
- /--
143
- Returns the first substring of `s` that matches `pattern`,
144
- or `none` if there is no such substring.
145
- -/
146
- @[inline] def findSubstr? (s pattern : Substring) : Option Substring :=
147
- (String.Matcher.ofSubstring pattern).find? s
148
-
149
- /--
150
- Returns true iff `pattern` occurs as a substring of `s`.
151
- -/
152
- @[inline] def containsSubstr (s pattern : Substring) : Bool :=
153
- s.findSubstr? pattern |>.isSome
154
-
155
78
end Substring
156
79
157
80
namespace String
158
81
159
- @[inherit_doc Substring.findAllSubstr]
160
- abbrev findAllSubstr (s : String) (pattern : Substring) : Array Substring :=
161
- (String.Matcher.ofSubstring pattern).findAll s
162
-
163
- @[inherit_doc Substring.findSubstr?]
164
- abbrev findSubstr? (s : String) (pattern : Substring) : Option Substring :=
165
- s.toSubstring.findSubstr? pattern
166
-
167
- @[inherit_doc Substring.containsSubstr]
168
- abbrev containsSubstr (s : String) (pattern : Substring) : Bool :=
169
- s.toSubstring.containsSubstr pattern
170
-
171
82
/--
172
83
If `pre` is a prefix of `s`, i.e. `s = pre ++ t`, returns the remainder `t`.
173
84
-/
0 commit comments