Skip to content

Commit

Permalink
boosted performance to hundreds with lazy stati caching :shipit:
Browse files Browse the repository at this point in the history
  • Loading branch information
itsmeadarsh2008 committed Jul 3, 2024
1 parent 59d5ddd commit 6aefebd
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 45 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ crate-type = ["cdylib"]
[dependencies]
pyo3 = "0.21.1"
regex = "1.10.5"
lazy_static = "1.4.0"
65 changes: 54 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,61 @@
<div align="center">
<img src="https://svgshare.com/i/17F2.svg" height="300">
<img src="https://svgshare.com/i/17F2.svg" height="300" alt="flpc logo">

# flpc
<img alt="Star" src="https://img.shields.io/badge/Please%20Give%20A%20Star%20%E2%AD%90-30323D">
<img alt="PyPI - Implementation" src="https://img.shields.io/pypi/implementation/flpc?style=flat-square">
<img alt="GitHub Issues or Pull Requests" src="https://img.shields.io/github/issues/itsmeadarsh2008/flpc?style=flat-square">
<img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dd/flpc?style=flat-square">
# flpc: Lightning-Fast Python Regex

![Star](https://img.shields.io/badge/Please%20Give%20A%20Star%20%E2%AD%90-30323D?style=flat-square)
![PyPI - Implementation](https://img.shields.io/pypi/implementation/flpc?style=flat-square)
![GitHub Issues](https://img.shields.io/github/issues/itsmeadarsh2008/flpc?style=flat-square)
![PyPI - Downloads](https://img.shields.io/pypi/dd/flpc?style=flat-square)
![GitHub License](https://img.shields.io/github/license/itsmeadarsh2008/flpc?style=flat-square)
![GitHub last commit](https://img.shields.io/github/last-commit/itsmeadarsh2008/flpc?display_timestamp=committer&style=flat-square)

A Rust-based **[regex crate](https://crates.io/crates/regex) wrapper** for Python3 to get faster performance. 👾

## DEAD-SIMPLE
Just import `flpc` as `re` and use it as you like and the namings are same as the native `re` module. Only thing is that `match` function name of the `re` native library is replaced with `fmatch`

### MIT Licensed

🚀 Supercharge your Python regex with Rust-powered performance!
</div>

## 🌟 Why flpc?

flpc is a powerful Python library that wraps the blazing-fast [Rust regex crate](https://crates.io/crates/regex), bringing enhanced speed to your regular expression operations. It's designed to be a drop-in replacement for Python's native `re` module, with some minor syntax differences.

## 🚀 Quick Start

1. Install flpc:
```
pip install flpc
```

2. Use it in your code as shown in the API

## 🔧 API

flpc mirrors the `re` module's API, with a few small exceptions:

- Use `fmatch()` instead of `match()` (to avoid conflicts with Python's keyword)
- When using `group()` on a match object, always provide an index (e.g., `group(0)` for the entire match)

Common functions include:

- `compile()`
- `search()`
- `findall()`
- `finditer()`
- `split()`
- `sub()`
- `subn()`

## 💡 Pro Tips

- Pre-compile your patterns for faster execution
- Use raw strings (`r''`) for cleaner regex patterns
- Always check if a match is found before accessing groups
- Remember to use `group(0)` to get the entire match

## 🤝 Contributing

We welcome contributions! Whether it's bug reports, feature requests, or code contributions, please feel free to reach out. Check our [contribution guidelines](CONTRIBUTING.md) to get started.

## 📄 License

flpc is open-source software licensed under the MIT license.
File renamed without changes.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "flpc"
version = "0.1.4"
description = "A Rust-based regex crate wrapper for Python3 to get faster performance. 👾"
version = "0.2.0"
description = "A Lightning Fast ⚡ Rust-based regex crate wrapper for Python3 to get faster performance. 👾"
maintainers = [{ name = "Adarsh Gourab Mahalik", email = "gourabmahalikadarsh@gmail.com" }]
readme = "README.md"
license = "MIT"
Expand Down
74 changes: 42 additions & 32 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use regex::{Captures, Regex, RegexBuilder};
use std::collections::HashMap;
use std::sync::Mutex;
use lazy_static::lazy_static;

#[pyclass]
struct Pattern {
Expand Down Expand Up @@ -32,6 +35,11 @@ struct Constants;
#[pyclass]
struct Sre;

// Global cache for compiled regex patterns
lazy_static! {
static ref REGEX_CACHE: Mutex<HashMap<(String, u32), Regex>> = Mutex::new(HashMap::new());
}

#[pymethods]
impl Match {
fn group(&self, idx: usize) -> Option<String> {
Expand Down Expand Up @@ -61,70 +69,72 @@ impl Match {

#[pyfunction]
fn compile(pattern: &str, flags: Option<u32>) -> PyResult<Pattern> {
let flags = flags.unwrap_or(0);
let mut cache = REGEX_CACHE.lock().unwrap();

if let Some(regex) = cache.get(&(pattern.to_string(), flags)) {
return Ok(Pattern { regex: regex.clone() });
}

let mut builder = RegexBuilder::new(pattern);
if let Some(f) = flags {
if f & 0b0001 != 0 {
builder.case_insensitive(true);
}
if f & 0b0010 != 0 {
builder.multi_line(true);
}
if f & 0b0100 != 0 {
builder.dot_matches_new_line(true);
}
// Add other flags as needed
if flags & 0b0001 != 0 {
builder.case_insensitive(true);
}
if flags & 0b0010 != 0 {
builder.multi_line(true);
}
if flags & 0b0100 != 0 {
builder.dot_matches_new_line(true);
}
// Add other flags as needed

let regex = builder
.build()
.map_err(|e| PyValueError::new_err(e.to_string()))?;

cache.insert((pattern.to_string(), flags), regex.clone());
Ok(Pattern { regex })
}

#[pyfunction]
fn search(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
if let Some(captures) = pattern.regex.captures(text) {
pattern.regex.captures(text).map(|captures| {
let mat = captures.get(0).unwrap();
Ok(Some(Match {
mat: unsafe { std::mem::transmute(mat) },
captures: unsafe { std::mem::transmute(captures) },
}))
} else {
Ok(None)
}
}).unwrap_or(Ok(None))
}

#[pyfunction(name = "fmatch")]
fn fmatch(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
if let Some(captures) = pattern.regex.captures(text) {
pattern.regex.captures(text).and_then(|captures| {
let mat = captures.get(0).unwrap();
if mat.start() == 0 {
Ok(Some(Match {
Some(Ok(Some(Match {
mat: unsafe { std::mem::transmute(mat) },
captures: unsafe { std::mem::transmute(captures) },
}))
})))
} else {
Ok(None)
None
}
} else {
Ok(None)
}
}).unwrap_or(Ok(None))
}

#[pyfunction]
fn fullmatch(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
if let Some(captures) = pattern.regex.captures(text) {
pattern.regex.captures(text).and_then(|captures| {
let mat = captures.get(0).unwrap();
if mat.as_str() == text {
Ok(Some(Match {
Some(Ok(Some(Match {
mat: unsafe { std::mem::transmute(mat) },
captures: unsafe { std::mem::transmute(captures) },
}))
})))
} else {
Ok(None)
None
}
} else {
Ok(None)
}
}).unwrap_or(Ok(None))
}

#[pyfunction]
Expand Down Expand Up @@ -175,7 +185,7 @@ fn escape(text: &str) -> PyResult<String> {

#[pyfunction]
fn purge() -> PyResult<()> {
// Implement cache purge if necessary
REGEX_CACHE.lock().unwrap().clear();
Ok(())
}

Expand All @@ -190,7 +200,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add("__version__", "0.1.4")?;
m.add(
"__doc__",
"A Rust-based regex crate wrapper for Python3 to get faster performance. 👾",
"",
)?;
m.add("__name__", "flpc")?;
m.add("__package__", "flpc")?;
Expand Down Expand Up @@ -224,4 +234,4 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(purge, m)?)?;

Ok(())
}
}

0 comments on commit 6aefebd

Please sign in to comment.