Skip to content

Commit 55c2137

Browse files
committed
feat: Implement groups into NFAEvaluator (proper greedy matches)
1 parent 0946e7b commit 55c2137

File tree

6 files changed

+327
-81
lines changed

6 files changed

+327
-81
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import regex_automata
1313

1414
pattern = regex_automata.compile(r"(foo)*bar|baz") # regex_automata.Pattern
1515

16-
pattern.fullmatch("foofoobar") # regex_automata.Match(span=(0, 9), match='foofoobar')
16+
pattern.fullmatch("foofoobar") # regex_automata.Match
1717
pattern.fullmatch("foo") # None
1818

1919
pattern.ast # regex_automata.parser.ast.AstNode
@@ -44,14 +44,14 @@ Finite automaton accepting `(foo)*bar|baz`:
4444

4545
- Library
4646
- `match()`, `fullmatch()`, `search()` and `finditer()` methods
47-
- `Match` object containing span and matched text (but no groups)
47+
- `Match` object containing span, matched text and groups
4848
- flags `DOTALL`, `IGNORECASE` and `MULTILINE`
4949

5050
- Syntax
5151
- character sets: `.`, `[...]` (special sequences such as `\w` are supported, but not inside square brackets)
5252
- repetition: `*`, `?`, `+`, `{n,k}`
5353
- boundary assertions: `^`, `$`, `\b`, `\B`, `\A`, `\Z`
54-
- basic groups: `(...)` that behave like `(?:...)` ie. non-capturing
54+
- basic groups: `(...)` (named and non-capturing groups are not supported)
5555

5656
## Implementation overview
5757

src/regex_automata/regex/match.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,53 @@
11
from dataclasses import dataclass
2-
from typing import Self
2+
from typing import TYPE_CHECKING
33

4+
if TYPE_CHECKING:
5+
from .pattern import Pattern
46

5-
@dataclass
7+
8+
@dataclass(repr=False)
69
class Match:
7-
span: tuple[int, int]
10+
re: "Pattern"
11+
pos: int | None
12+
endpos: int | None
813
match: str
14+
groupspandict: dict[int, tuple[int, int]]
15+
16+
def group(self, *indices: int) -> (str | None) | tuple[str | None, ...]:
17+
match len(indices):
18+
case 0:
19+
return self._group(0)
20+
case 1:
21+
return self._group(indices[0])
22+
case _:
23+
return tuple(map(self._group, indices))
24+
25+
def _group(self, i: int) -> str | None:
26+
start, end = self.span(i)
27+
if start == -1:
28+
return None
29+
else:
30+
match_start = self.start()
31+
return self.match[start - match_start : end - match_start]
32+
33+
def groupdict(self) -> dict[int, str | None]:
34+
return {i: self._group(i) for i in self.groupspandict}
35+
36+
def span(self, i: int = 0) -> tuple[int, int]:
37+
return self.groupspandict.get(i, (-1, -1))
38+
39+
def start(self, i: int = 0) -> int:
40+
return self.span(i)[0]
41+
42+
def end(self, i: int = 0) -> int:
43+
return self.span(i)[1]
44+
45+
def __getitem__(self, i: int) -> str | None:
46+
return self._group(i)
947

10-
@classmethod
11-
def from_span_and_text(cls, start: int, end: int, text: str) -> Self:
12-
return cls((start, end), text[start:end])
48+
@property
49+
def string(self) -> str:
50+
return self.re.pattern
1351

14-
def group(self) -> str:
15-
return self.match
52+
def __repr__(self) -> str:
53+
return f"<Match span={self.span()!r}, match={self.match!r}>"

0 commit comments

Comments
 (0)