Skip to content

Commit

Permalink
Added Examples + group, groups, start, end and span functions
Browse files Browse the repository at this point in the history
  • Loading branch information
itsmeadarsh2008 committed Jun 17, 2024
1 parent 3610116 commit 59d5ddd
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 86 deletions.
29 changes: 29 additions & 0 deletions examples/helloworld.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import flpc

# Compile a pattern
pattern = flpc.compile(r'(\w+) (\w+)', flags=0)

# Search for the pattern in text
match = flpc.search(pattern, "Hello World")
if match:
print("Group 0:", match.group(0)) # Hello World
print("Group 1:", match.group(1)) # Hello
print("Group 2:", match.group(2)) # World
print("Groups:", match.groups()) # ['Hello', 'World']
print("Start of group 0:", match.start(0)) # 0
print("End of group 0:", match.end(0)) # 11
print("Span of group 0:", match.span(0)) # (0, 11)

# Find all matches
matches = flpc.findall(pattern, "Hello World Hello Python")
print("All matches:", matches) # ['Hello World', 'Hello Python']

# Find all matches with iterator
matches_iter = flpc.finditer(pattern, "Hello World Hello Python")
for match in matches_iter:
print("Match:", match.group(0)) # Hello World, Hello Python
print("Groups:", match.groups()) # ['Hello', 'World'], ['Hello', 'Python']

# Replace matches
replaced_text = flpc.sub(pattern, r'\2 \1', "Hello World")
print("Replaced text:", replaced_text) # World Hello
174 changes: 97 additions & 77 deletions flpc.pyi
Original file line number Diff line number Diff line change
@@ -1,78 +1,98 @@
from typing import List, Optional, Tuple

class Pattern:
"""A compiled regular expression pattern."""
regex: Regex
def __init__(self, regex: Regex): ...

class Match:
"""A match object returned by a regex search."""
mat: regex.Match
def __init__(self, mat: regex.Match): ...

class Scanner:
"""A scanner object (not implemented)."""
...

class RegexFlag:
"""A struct representing regex flags."""
bits: int
def __init__(self, bits: int): ...

class Constants:
"""A struct for regex constants (not implemented)."""
...

class Sre:
"""A struct for regex engine (not implemented)."""
...

def compile(pattern: str, flags: Optional[int] = ...) -> Pattern:
"""Compile a regular expression pattern into a regex object."""
...

def search(pattern: Pattern, text: str) -> Optional[Match]:
"""Scan through a string, looking for any location where the regex pattern matches."""
...

def fmatch(pattern: Pattern, text: str) -> Optional[Match]:
"""Try to apply the pattern at the start of the string, returning a match object if successful."""
...

def fullmatch(pattern: Pattern, text: str) -> Optional[Match]:
"""Try to apply the pattern to all of the string, returning a match object if the whole string matches."""
...

def split(pattern: Pattern, text: str) -> List[str]:
"""Split the source string by the occurrences of the pattern."""
...

def findall(pattern: Pattern, text: str) -> List[str]:
"""Find all substrings where the regex pattern matches and return them as a list."""
...

def finditer(pattern: Pattern, text: str) -> List[Match]:
"""Return an iterator yielding match objects over all non-overlapping matches for the pattern in the string."""
...

def sub(pattern: Pattern, repl: str, text: str) -> str:
"""Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the string by the replacement repl."""
...

def subn(pattern: Pattern, repl: str, text: str) -> Tuple[str, int]:
"""Perform the same operation as sub(), but return a tuple (new_string, number_of_subs_made)."""
...

def escape(text: str) -> str:
"""Escape all non-alphanumeric characters in a string."""
...

def purge() -> None:
"""Purge the regex cache (not implemented)."""
...

__version__: str
__doc__: str
__name__: str
__package__: str
from typing import List, Optional, Tuple

class Pattern:
"""A compiled regular expression pattern."""
regex: Regex
def __init__(self, regex: Regex): ...

class Match:
"""A match object returned by a regex search."""
mat: regex.Match
def __init__(self, mat: regex.Match): ...

def group(self, idx: int) -> Optional[str]:
"""Return the string matched by the group idx."""
...

def groups(self) -> List[Optional[str]]:
"""Return a list of all groups matched by the pattern."""
...

def start(self, idx: int) -> Optional[int]:
"""Return the starting position of the match."""
...

def end(self, idx: int) -> Optional[int]:
"""Return the ending position of the match."""
...

def span(self, idx: int) -> Optional[Tuple[int, int]]:
"""Return a tuple containing the (start, end) positions of the match."""
...

class Scanner:
"""A scanner object (not implemented)."""
...

class RegexFlag:
"""A struct representing regex flags."""
bits: int
def __init__(self, bits: int): ...

class Constants:
"""A struct for regex constants (not implemented)."""
...

class Sre:
"""A struct for regex engine (not implemented)."""
...

def compile(pattern: str, flags: Optional[int] = ...) -> Pattern:
"""Compile a regular expression pattern into a regex object."""
...

def search(pattern: Pattern, text: str) -> Optional[Match]:
"""Scan through a string, looking for any location where the regex pattern matches."""
...

def fmatch(pattern: Pattern, text: str) -> Optional[Match]:
"""Try to apply the pattern at the start of the string, returning a match object if successful."""
...

def fullmatch(pattern: Pattern, text: str) -> Optional[Match]:
"""Try to apply the pattern to all of the string, returning a match object if the whole string matches."""
...

def split(pattern: Pattern, text: str) -> List[str]:
"""Split the source string by the occurrences of the pattern."""
...

def findall(pattern: Pattern, text: str) -> List[str]:
"""Find all substrings where the regex pattern matches and return them as a list."""
...

def finditer(pattern: Pattern, text: str) -> List[Match]:
"""Return an iterator yielding match objects over all non-overlapping matches for the pattern in the string."""
...

def sub(pattern: Pattern, repl: str, text: str) -> str:
"""Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the string by the replacement repl."""
...

def subn(pattern: Pattern, repl: str, text: str) -> Tuple[str, int]:
"""Perform the same operation as sub(), but return a tuple (new_string, number_of_subs_made)."""
...

def escape(text: str) -> str:
"""Escape all non-alphanumeric characters in a string."""
...

def purge() -> None:
"""Purge the regex cache (not implemented)."""
...

__version__: str
__doc__: str
__name__: str
__package__: str
__all__: List[str]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "flpc"
version = "0.1.3"
version = "0.1.4"
description = "A Rust-based regex crate wrapper for Python3 to get faster performance. 👾"
maintainers = [{ name = "Adarsh Gourab Mahalik", email = "gourabmahalikadarsh@gmail.com" }]
readme = "README.md"
Expand Down
54 changes: 46 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use regex::{Regex, RegexBuilder};
use regex::{Captures, Regex, RegexBuilder};

#[pyclass]
struct Pattern {
Expand All @@ -12,6 +12,7 @@ struct Pattern {
struct Match {
#[allow(dead_code)]
mat: regex::Match<'static>,
captures: Captures<'static>,
}

#[pyclass]
Expand All @@ -31,6 +32,33 @@ struct Constants;
#[pyclass]
struct Sre;

#[pymethods]
impl Match {
fn group(&self, idx: usize) -> Option<String> {
self.captures.get(idx).map(|m| m.as_str().to_string())
}

fn groups(&self) -> Vec<Option<String>> {
self.captures
.iter()
.skip(1)
.map(|m| m.map(|mat| mat.as_str().to_string()))
.collect()
}

fn start(&self, idx: usize) -> Option<usize> {
self.captures.get(idx).map(|m| m.start())
}

fn end(&self, idx: usize) -> Option<usize> {
self.captures.get(idx).map(|m| m.end())
}

fn span(&self, idx: usize) -> Option<(usize, usize)> {
self.captures.get(idx).map(|m| (m.start(), m.end()))
}
}

#[pyfunction]
fn compile(pattern: &str, flags: Option<u32>) -> PyResult<Pattern> {
let mut builder = RegexBuilder::new(pattern);
Expand All @@ -54,9 +82,11 @@ fn compile(pattern: &str, flags: Option<u32>) -> PyResult<Pattern> {

#[pyfunction]
fn search(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
if let Some(mat) = pattern.regex.find(text) {
if let Some(captures) = pattern.regex.captures(text) {
let mat = captures.get(0).unwrap();
Ok(Some(Match {
mat: unsafe { std::mem::transmute(mat) },
captures: unsafe { std::mem::transmute(captures) },
}))
} else {
Ok(None)
Expand All @@ -65,10 +95,12 @@ fn search(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {

#[pyfunction(name = "fmatch")]
fn fmatch(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
if let Some(mat) = pattern.regex.find(text) {
if let Some(captures) = pattern.regex.captures(text) {
let mat = captures.get(0).unwrap();
if mat.start() == 0 {
Ok(Some(Match {
mat: unsafe { std::mem::transmute(mat) },
captures: unsafe { std::mem::transmute(captures) },
}))
} else {
Ok(None)
Expand All @@ -80,10 +112,12 @@ fn fmatch(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {

#[pyfunction]
fn fullmatch(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
if let Some(mat) = pattern.regex.find(text) {
if let Some(captures) = pattern.regex.captures(text) {
let mat = captures.get(0).unwrap();
if mat.as_str() == text {
Ok(Some(Match {
mat: unsafe { std::mem::transmute(mat) },
captures: unsafe { std::mem::transmute(captures) },
}))
} else {
Ok(None)
Expand Down Expand Up @@ -111,9 +145,13 @@ fn findall(pattern: &Pattern, text: &str) -> PyResult<Vec<String>> {
fn finditer(pattern: &Pattern, text: &str) -> PyResult<Vec<Match>> {
Ok(pattern
.regex
.find_iter(text)
.map(|mat| Match {
mat: unsafe { std::mem::transmute(mat) },
.captures_iter(text)
.map(|captures| {
let mat = captures.get(0).unwrap();
Match {
mat: unsafe { std::mem::transmute(mat) },
captures: unsafe { std::mem::transmute(captures) },
}
})
.collect())
}
Expand Down Expand Up @@ -149,7 +187,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<RegexFlag>()?;
m.add_class::<Constants>()?;
m.add_class::<Sre>()?;
m.add("__version__", "0.1.0")?;
m.add("__version__", "0.1.4")?;
m.add(
"__doc__",
"A Rust-based regex crate wrapper for Python3 to get faster performance. 👾",
Expand Down

0 comments on commit 59d5ddd

Please sign in to comment.