From 57eada051d939b7467c242fa8ad23400a73dd81b Mon Sep 17 00:00:00 2001
From: Adarsh Gourab Mahalik
<71959210+itsmeadarsh2008@users.noreply.github.com>
Date: Sat, 15 Jun 2024 06:45:25 +0000
Subject: [PATCH] find_match to fmatch + Additional
---
.gitignore | 72 +++++++++++++++++++++++++++++++++++++++++++
README.md | 13 +++++---
benchmark/bench.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++
flpc.pyi | 2 +-
pyproject.toml | 2 +-
src/lib.rs | 8 ++---
6 files changed, 163 insertions(+), 10 deletions(-)
create mode 100644 .gitignore
create mode 100644 benchmark/bench.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0c4687d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,72 @@
+/target
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+.pytest_cache/
+*.py[cod]
+seed/
+# C extensions
+*.so
+
+Distribution / packaging
+.Python
+.venv/
+env/
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+include/
+man/
+venv/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+pip-selfcheck.json
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+.DS_Store
+
+# Sphinx documentation
+docs/_build/
+
+# PyCharm
+.idea/
+
+# VSCode
+.vscode/
+
+# Pyenv
+.python-version
diff --git a/README.md b/README.md
index 653748e..c14ae9e 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,16 @@
+
+
+
# flpc
![PyPI - Implementation](https://img.shields.io/pypi/implementation/flpc)
![GitHub Issues or Pull Requests](https://img.shields.io/github/issues/itsmeadarsh2008/flpc)
-![PyPI - Downloads](https://img.shields.io/pypi/dm/flpc)
+![PyPI - Downloads](https://img.shields.io/pypi/dm/flpc)
A Rust-based regex port for Python3 to get faster performance. 👾
-# DEAD-SIMPLE
-Just import `flpc` as `re` and use it as you like and the namings are same as the native `re` module. Only thing is that `match` function name of the `re` native library is replaced with `find_match`
+## DEAD-SIMPLE
+Just import `flpc` as `re` and use it as you like and the namings are same as the native `re` module. Only thing is that `match` function name of the `re` native library is replaced with `fmatch`
+
+### MIT Licensed
-# MIT Licensed
\ No newline at end of file
+
diff --git a/benchmark/bench.py b/benchmark/bench.py
new file mode 100644
index 0000000..e162c26
--- /dev/null
+++ b/benchmark/bench.py
@@ -0,0 +1,76 @@
+import re # noqa: D100
+import time
+from random import choices
+from string import ascii_letters, digits
+from flpc import (
+ compile,
+ escape,
+ fmatch,
+ findall,
+ finditer,
+ fullmatch,
+ search,
+ split,
+ sub,
+ subn,
+)
+
+
+def benchmark(func, *args, iterations=1000): # noqa: D103
+ start = time.time()
+ for _ in range(iterations):
+ func(*args)
+ end = time.time()
+ return (end - start) * 1000 # Convert to milliseconds
+
+# Test cases
+PATTERN = r'(\w+)\s+(\d+)'
+TEXT = ''.join(choices(ascii_letters + digits, k=1000))
+ITERATIONS = 100
+
+if __name__ == "__main__":
+ flpc_results = [
+ ("Compile", benchmark(compile, PATTERN, iterations=ITERATIONS), True),
+ ("Search", benchmark(search, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
+ ("Find Match", benchmark(fmatch, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
+ ("Full Match", benchmark(fullmatch, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
+ ("Split", benchmark(split, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
+ ("Find All", benchmark(findall, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
+ ("Find Iter", benchmark(finditer, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
+ ("Sub", benchmark(sub, compile(PATTERN), 'foo', TEXT, iterations=ITERATIONS), True),
+ ("Subn", benchmark(subn, compile(PATTERN), 'foo', TEXT, iterations=ITERATIONS), True),
+ ("Escape", benchmark(escape, TEXT, iterations=ITERATIONS), False)
+ ]
+
+ re_results = [
+ ("Compile", benchmark(re.compile, PATTERN, iterations=ITERATIONS), True),
+ ("Search", benchmark(lambda pattern: pattern.search(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
+ ("Find Match", benchmark(lambda pattern: pattern.match(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
+ ("Full Match", benchmark(lambda pattern: pattern.fullmatch(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
+ ("Split", benchmark(lambda pattern: pattern.split(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
+ ("Find All", benchmark(lambda pattern: pattern.findall(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
+ ("Find Iter", benchmark(lambda pattern: list(pattern.finditer(TEXT)), re.compile(PATTERN), iterations=ITERATIONS), True),
+ ("Sub", benchmark(lambda pattern: pattern.sub('foo', TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
+ ("Subn", benchmark(lambda pattern: pattern.subn('foo', TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
+ ("Escape", benchmark(re.escape, TEXT, iterations=ITERATIONS), False)
+ ]
+
+ max_len = max(len(op) for op, _, _ in flpc_results + re_results)
+ print(f"{'Operation':{max_len}} | {'flpc (ms)':<10} | {'re (ms)':<10} | {'Used Regex':<10} | {'Faster':<10}")
+ print("-" * (max_len + 45))
+
+ flpc_times = []
+ re_times = []
+
+ for (op, flpc_time, used_regex_flpc), (_, re_time, used_regex_re) in zip(flpc_results, re_results):
+ used_regex = "Yes" if used_regex_flpc and used_regex_re else "No"
+ faster = "flpc" if flpc_time < re_time else "re"
+ print(f"{op:{max_len}} | {flpc_time:<10.5f} | {re_time:<10.5f} | {used_regex:<10} | {faster:<10}")
+ flpc_times.append(flpc_time)
+ re_times.append(re_time)
+
+ mean_flpc = sum(flpc_times) / len(flpc_times)
+ mean_re = sum(re_times) / len(re_times)
+ performance_ratio = mean_re / mean_flpc
+
+ print(f"\nThe flpc is {performance_ratio:.2f}x faster than re module on average")
\ No newline at end of file
diff --git a/flpc.pyi b/flpc.pyi
index aeae7fb..0afe6c4 100644
--- a/flpc.pyi
+++ b/flpc.pyi
@@ -35,7 +35,7 @@ def search(pattern: Pattern, text: str) -> Optional[Match]:
"""Scan through a string, looking for any location where the regex pattern matches."""
...
-def find_match(pattern: Pattern, text: str) -> Optional[Match]:
+def fmatch(pattern: Pattern, text: str) -> Optional[Match]:
"""Try to apply the pattern at the start of the string, returning a match object if successful."""
...
diff --git a/pyproject.toml b/pyproject.toml
index fde11a7..8912a4d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "flpc"
-version = "0.1.1"
+version = "0.1.2"
description = "A Rust-based regex port for Python3 to faster performance. 👾"
maintainers = [{ name = "Adarsh Gourab Mahalik", email = "gourabmahalikadarsh@gmail.com" }]
readme = "README.md"
diff --git a/src/lib.rs b/src/lib.rs
index 551e8bb..b2f6078 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -63,8 +63,8 @@ fn search(pattern: &Pattern, text: &str) -> PyResult> {
}
}
-#[pyfunction(name = "find_match")]
-fn find_match(pattern: &Pattern, text: &str) -> PyResult > {
+#[pyfunction(name = "fmatch")]
+fn fmatch(pattern: &Pattern, text: &str) -> PyResult > {
if let Some(mat) = pattern.regex.find(text) {
if mat.start() == 0 {
Ok(Some(Match {
@@ -161,7 +161,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> {
vec![
"compile",
"search",
- "find_match",
+ "fmatch",
"fullmatch",
"split",
"findall",
@@ -175,7 +175,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compile, m)?)?;
m.add_function(wrap_pyfunction!(search, m)?)?;
- m.add_function(wrap_pyfunction!(find_match, m)?)?;
+ m.add_function(wrap_pyfunction!(fmatch, m)?)?;
m.add_function(wrap_pyfunction!(fullmatch, m)?)?;
m.add_function(wrap_pyfunction!(split, m)?)?;
m.add_function(wrap_pyfunction!(findall, m)?)?;