From 59d5ddd7077d3c8add016aede081804c9bc3ec55 Mon Sep 17 00:00:00 2001 From: Adarsh Gourab Mahalik <71959210+itsmeadarsh2008@users.noreply.github.com> Date: Mon, 17 Jun 2024 07:40:22 +0000 Subject: [PATCH] Added Examples + group, groups, start, end and span functions --- examples/helloworld.py | 29 +++++++ flpc.pyi | 174 +++++++++++++++++++++++------------------ pyproject.toml | 2 +- src/lib.rs | 54 +++++++++++-- 4 files changed, 173 insertions(+), 86 deletions(-) create mode 100644 examples/helloworld.py diff --git a/examples/helloworld.py b/examples/helloworld.py new file mode 100644 index 0000000..34edcbe --- /dev/null +++ b/examples/helloworld.py @@ -0,0 +1,29 @@ +import flpc + +# Compile a pattern +pattern = flpc.compile(r'(\w+) (\w+)', flags=0) + +# Search for the pattern in text +match = flpc.search(pattern, "Hello World") +if match: + print("Group 0:", match.group(0)) # Hello World + print("Group 1:", match.group(1)) # Hello + print("Group 2:", match.group(2)) # World + print("Groups:", match.groups()) # ['Hello', 'World'] + print("Start of group 0:", match.start(0)) # 0 + print("End of group 0:", match.end(0)) # 11 + print("Span of group 0:", match.span(0)) # (0, 11) + +# Find all matches +matches = flpc.findall(pattern, "Hello World Hello Python") +print("All matches:", matches) # ['Hello World', 'Hello Python'] + +# Find all matches with iterator +matches_iter = flpc.finditer(pattern, "Hello World Hello Python") +for match in matches_iter: + print("Match:", match.group(0)) # Hello World, Hello Python + print("Groups:", match.groups()) # ['Hello', 'World'], ['Hello', 'Python'] + +# Replace matches +replaced_text = flpc.sub(pattern, r'\2 \1', "Hello World") +print("Replaced text:", replaced_text) # World Hello \ No newline at end of file diff --git a/flpc.pyi b/flpc.pyi index 0afe6c4..f5f995b 100644 --- a/flpc.pyi +++ b/flpc.pyi @@ -1,78 +1,98 @@ -from typing import List, Optional, Tuple - -class Pattern: - """A compiled regular expression pattern.""" - regex: Regex - def __init__(self, regex: Regex): ... - -class Match: - """A match object returned by a regex search.""" - mat: regex.Match - def __init__(self, mat: regex.Match): ... - -class Scanner: - """A scanner object (not implemented).""" - ... - -class RegexFlag: - """A struct representing regex flags.""" - bits: int - def __init__(self, bits: int): ... - -class Constants: - """A struct for regex constants (not implemented).""" - ... - -class Sre: - """A struct for regex engine (not implemented).""" - ... - -def compile(pattern: str, flags: Optional[int] = ...) -> Pattern: - """Compile a regular expression pattern into a regex object.""" - ... - -def search(pattern: Pattern, text: str) -> Optional[Match]: - """Scan through a string, looking for any location where the regex pattern matches.""" - ... - -def fmatch(pattern: Pattern, text: str) -> Optional[Match]: - """Try to apply the pattern at the start of the string, returning a match object if successful.""" - ... - -def fullmatch(pattern: Pattern, text: str) -> Optional[Match]: - """Try to apply the pattern to all of the string, returning a match object if the whole string matches.""" - ... - -def split(pattern: Pattern, text: str) -> List[str]: - """Split the source string by the occurrences of the pattern.""" - ... - -def findall(pattern: Pattern, text: str) -> List[str]: - """Find all substrings where the regex pattern matches and return them as a list.""" - ... - -def finditer(pattern: Pattern, text: str) -> List[Match]: - """Return an iterator yielding match objects over all non-overlapping matches for the pattern in the string.""" - ... - -def sub(pattern: Pattern, repl: str, text: str) -> str: - """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the string by the replacement repl.""" - ... - -def subn(pattern: Pattern, repl: str, text: str) -> Tuple[str, int]: - """Perform the same operation as sub(), but return a tuple (new_string, number_of_subs_made).""" - ... - -def escape(text: str) -> str: - """Escape all non-alphanumeric characters in a string.""" - ... - -def purge() -> None: - """Purge the regex cache (not implemented).""" - ... - -__version__: str -__doc__: str -__name__: str -__package__: str +from typing import List, Optional, Tuple + +class Pattern: + """A compiled regular expression pattern.""" + regex: Regex + def __init__(self, regex: Regex): ... + +class Match: + """A match object returned by a regex search.""" + mat: regex.Match + def __init__(self, mat: regex.Match): ... + + def group(self, idx: int) -> Optional[str]: + """Return the string matched by the group idx.""" + ... + + def groups(self) -> List[Optional[str]]: + """Return a list of all groups matched by the pattern.""" + ... + + def start(self, idx: int) -> Optional[int]: + """Return the starting position of the match.""" + ... + + def end(self, idx: int) -> Optional[int]: + """Return the ending position of the match.""" + ... + + def span(self, idx: int) -> Optional[Tuple[int, int]]: + """Return a tuple containing the (start, end) positions of the match.""" + ... + +class Scanner: + """A scanner object (not implemented).""" + ... + +class RegexFlag: + """A struct representing regex flags.""" + bits: int + def __init__(self, bits: int): ... + +class Constants: + """A struct for regex constants (not implemented).""" + ... + +class Sre: + """A struct for regex engine (not implemented).""" + ... + +def compile(pattern: str, flags: Optional[int] = ...) -> Pattern: + """Compile a regular expression pattern into a regex object.""" + ... + +def search(pattern: Pattern, text: str) -> Optional[Match]: + """Scan through a string, looking for any location where the regex pattern matches.""" + ... + +def fmatch(pattern: Pattern, text: str) -> Optional[Match]: + """Try to apply the pattern at the start of the string, returning a match object if successful.""" + ... + +def fullmatch(pattern: Pattern, text: str) -> Optional[Match]: + """Try to apply the pattern to all of the string, returning a match object if the whole string matches.""" + ... + +def split(pattern: Pattern, text: str) -> List[str]: + """Split the source string by the occurrences of the pattern.""" + ... + +def findall(pattern: Pattern, text: str) -> List[str]: + """Find all substrings where the regex pattern matches and return them as a list.""" + ... + +def finditer(pattern: Pattern, text: str) -> List[Match]: + """Return an iterator yielding match objects over all non-overlapping matches for the pattern in the string.""" + ... + +def sub(pattern: Pattern, repl: str, text: str) -> str: + """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the string by the replacement repl.""" + ... + +def subn(pattern: Pattern, repl: str, text: str) -> Tuple[str, int]: + """Perform the same operation as sub(), but return a tuple (new_string, number_of_subs_made).""" + ... + +def escape(text: str) -> str: + """Escape all non-alphanumeric characters in a string.""" + ... + +def purge() -> None: + """Purge the regex cache (not implemented).""" + ... + +__version__: str +__doc__: str +__name__: str +__package__: str __all__: List[str] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b10033b..179c783 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "flpc" -version = "0.1.3" +version = "0.1.4" description = "A Rust-based regex crate wrapper for Python3 to get faster performance. 👾" maintainers = [{ name = "Adarsh Gourab Mahalik", email = "gourabmahalikadarsh@gmail.com" }] readme = "README.md" diff --git a/src/lib.rs b/src/lib.rs index 906a51e..e8be509 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::wrap_pyfunction; -use regex::{Regex, RegexBuilder}; +use regex::{Captures, Regex, RegexBuilder}; #[pyclass] struct Pattern { @@ -12,6 +12,7 @@ struct Pattern { struct Match { #[allow(dead_code)] mat: regex::Match<'static>, + captures: Captures<'static>, } #[pyclass] @@ -31,6 +32,33 @@ struct Constants; #[pyclass] struct Sre; +#[pymethods] +impl Match { + fn group(&self, idx: usize) -> Option { + self.captures.get(idx).map(|m| m.as_str().to_string()) + } + + fn groups(&self) -> Vec> { + self.captures + .iter() + .skip(1) + .map(|m| m.map(|mat| mat.as_str().to_string())) + .collect() + } + + fn start(&self, idx: usize) -> Option { + self.captures.get(idx).map(|m| m.start()) + } + + fn end(&self, idx: usize) -> Option { + self.captures.get(idx).map(|m| m.end()) + } + + fn span(&self, idx: usize) -> Option<(usize, usize)> { + self.captures.get(idx).map(|m| (m.start(), m.end())) + } +} + #[pyfunction] fn compile(pattern: &str, flags: Option) -> PyResult { let mut builder = RegexBuilder::new(pattern); @@ -54,9 +82,11 @@ fn compile(pattern: &str, flags: Option) -> PyResult { #[pyfunction] fn search(pattern: &Pattern, text: &str) -> PyResult> { - if let Some(mat) = pattern.regex.find(text) { + if let Some(captures) = pattern.regex.captures(text) { + let mat = captures.get(0).unwrap(); Ok(Some(Match { mat: unsafe { std::mem::transmute(mat) }, + captures: unsafe { std::mem::transmute(captures) }, })) } else { Ok(None) @@ -65,10 +95,12 @@ fn search(pattern: &Pattern, text: &str) -> PyResult> { #[pyfunction(name = "fmatch")] fn fmatch(pattern: &Pattern, text: &str) -> PyResult> { - if let Some(mat) = pattern.regex.find(text) { + if let Some(captures) = pattern.regex.captures(text) { + let mat = captures.get(0).unwrap(); if mat.start() == 0 { Ok(Some(Match { mat: unsafe { std::mem::transmute(mat) }, + captures: unsafe { std::mem::transmute(captures) }, })) } else { Ok(None) @@ -80,10 +112,12 @@ fn fmatch(pattern: &Pattern, text: &str) -> PyResult> { #[pyfunction] fn fullmatch(pattern: &Pattern, text: &str) -> PyResult> { - if let Some(mat) = pattern.regex.find(text) { + if let Some(captures) = pattern.regex.captures(text) { + let mat = captures.get(0).unwrap(); if mat.as_str() == text { Ok(Some(Match { mat: unsafe { std::mem::transmute(mat) }, + captures: unsafe { std::mem::transmute(captures) }, })) } else { Ok(None) @@ -111,9 +145,13 @@ fn findall(pattern: &Pattern, text: &str) -> PyResult> { fn finditer(pattern: &Pattern, text: &str) -> PyResult> { Ok(pattern .regex - .find_iter(text) - .map(|mat| Match { - mat: unsafe { std::mem::transmute(mat) }, + .captures_iter(text) + .map(|captures| { + let mat = captures.get(0).unwrap(); + Match { + mat: unsafe { std::mem::transmute(mat) }, + captures: unsafe { std::mem::transmute(captures) }, + } }) .collect()) } @@ -149,7 +187,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add("__version__", "0.1.0")?; + m.add("__version__", "0.1.4")?; m.add( "__doc__", "A Rust-based regex crate wrapper for Python3 to get faster performance. 👾",