Skip to content

Commit

Permalink
Fixed: Unicode Segmentation Beep Bop Beep! ⚡
Browse files Browse the repository at this point in the history
  • Loading branch information
itsmeadarsh2008 committed Jul 5, 2024
1 parent e91744d commit 3861152
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 144 deletions.
157 changes: 20 additions & 137 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "flpc"
version = "0.1.0"
version = "0.2.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -9,6 +9,7 @@ name = "flpc"
crate-type = ["cdylib"]

[dependencies]
pyo3 = "0.21.1"
pyo3 = "0.22.0"
regex = "1.10.5"
lazy_static = "1.4.0"
lazy_static = "1.5.0"
unicode-segmentation = "1.11.0"
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
</div>

## 🌟 Why flpc?
[`CURRENTLY IN EXPERIMENTAL STAGE`]

flpc is a powerful Python library that wraps the blazing-fast [Rust regex crate](https://crates.io/crates/regex), bringing enhanced speed to your regular expression operations. It's designed to be a drop-in replacement for Python's native `re` module, with some minor syntax differences.

Expand Down
4 changes: 4 additions & 0 deletions examples/unicodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import flpc as re
re.match = re.fmatch
compiled_regex = re.compile('.*')
print(re.match(compiled_regex,'hello \N{EARTH GLOBE AMERICAS}').span(0))
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "flpc"
version = "0.2.1"
version = "0.2.2"
description = "A Lightning Fast ⚡ Rust-based regex crate wrapper for Python3 to get faster performance. 👾"
maintainers = [{ name = "Adarsh Gourab Mahalik", email = "gourabmahalikadarsh@gmail.com" }]
readme = "README.md"
Expand Down
17 changes: 14 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use regex::{Captures, Regex, RegexBuilder};
use std::collections::HashMap;
use std::sync::Mutex;
use lazy_static::lazy_static;
use unicode_segmentation::UnicodeSegmentation;

#[pyclass]
struct Pattern {
Expand Down Expand Up @@ -55,19 +56,29 @@ impl Match {
}

fn start(&self, idx: usize) -> Option<usize> {
self.captures.get(idx).map(|m| m.start())
self.captures.get(idx).map(|m| {
self.captures.get(0).unwrap().as_str()[..m.start()].graphemes(true).count()
})
}

fn end(&self, idx: usize) -> Option<usize> {
self.captures.get(idx).map(|m| m.end())
self.captures.get(idx).map(|m| {
self.captures.get(0).unwrap().as_str()[..m.end()].graphemes(true).count()
})
}

fn span(&self, idx: usize) -> Option<(usize, usize)> {
self.captures.get(idx).map(|m| (m.start(), m.end()))
self.captures.get(idx).map(|m| {
let full_match = self.captures.get(0).unwrap().as_str();
let start = full_match[..m.start()].graphemes(true).count();
let end = full_match[..m.end()].graphemes(true).count();
(start, end)
})
}
}

#[pyfunction]
#[pyo3(signature = (pattern, flags=None))]
fn compile(pattern: &str, flags: Option<u32>) -> PyResult<Pattern> {
let flags = flags.unwrap_or(0);
let mut cache = REGEX_CACHE.lock().unwrap();
Expand Down

0 comments on commit 3861152

Please sign in to comment.