From 57eada051d939b7467c242fa8ad23400a73dd81b Mon Sep 17 00:00:00 2001 From: Adarsh Gourab Mahalik <71959210+itsmeadarsh2008@users.noreply.github.com> Date: Sat, 15 Jun 2024 06:45:25 +0000 Subject: [PATCH] find_match to fmatch + Additional --- .gitignore | 72 +++++++++++++++++++++++++++++++++++++++++++ README.md | 13 +++++--- benchmark/bench.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++ flpc.pyi | 2 +- pyproject.toml | 2 +- src/lib.rs | 8 ++--- 6 files changed, 163 insertions(+), 10 deletions(-) create mode 100644 .gitignore create mode 100644 benchmark/bench.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0c4687d --- /dev/null +++ b/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] +seed/ +# C extensions +*.so + +Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/README.md b/README.md index 653748e..c14ae9e 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,16 @@ +
+ + # flpc ![PyPI - Implementation](https://img.shields.io/pypi/implementation/flpc) ![GitHub Issues or Pull Requests](https://img.shields.io/github/issues/itsmeadarsh2008/flpc) -![PyPI - Downloads](https://img.shields.io/pypi/dm/flpc)
+![PyPI - Downloads](https://img.shields.io/pypi/dm/flpc) A Rust-based regex port for Python3 to get faster performance. 👾 -# DEAD-SIMPLE -Just import `flpc` as `re` and use it as you like and the namings are same as the native `re` module. Only thing is that `match` function name of the `re` native library is replaced with `find_match` +## DEAD-SIMPLE +Just import `flpc` as `re` and use it as you like and the namings are same as the native `re` module. Only thing is that `match` function name of the `re` native library is replaced with `fmatch` + +### MIT Licensed -# MIT Licensed \ No newline at end of file +
diff --git a/benchmark/bench.py b/benchmark/bench.py new file mode 100644 index 0000000..e162c26 --- /dev/null +++ b/benchmark/bench.py @@ -0,0 +1,76 @@ +import re # noqa: D100 +import time +from random import choices +from string import ascii_letters, digits +from flpc import ( + compile, + escape, + fmatch, + findall, + finditer, + fullmatch, + search, + split, + sub, + subn, +) + + +def benchmark(func, *args, iterations=1000): # noqa: D103 + start = time.time() + for _ in range(iterations): + func(*args) + end = time.time() + return (end - start) * 1000 # Convert to milliseconds + +# Test cases +PATTERN = r'(\w+)\s+(\d+)' +TEXT = ''.join(choices(ascii_letters + digits, k=1000)) +ITERATIONS = 100 + +if __name__ == "__main__": + flpc_results = [ + ("Compile", benchmark(compile, PATTERN, iterations=ITERATIONS), True), + ("Search", benchmark(search, compile(PATTERN), TEXT, iterations=ITERATIONS), True), + ("Find Match", benchmark(fmatch, compile(PATTERN), TEXT, iterations=ITERATIONS), True), + ("Full Match", benchmark(fullmatch, compile(PATTERN), TEXT, iterations=ITERATIONS), True), + ("Split", benchmark(split, compile(PATTERN), TEXT, iterations=ITERATIONS), True), + ("Find All", benchmark(findall, compile(PATTERN), TEXT, iterations=ITERATIONS), True), + ("Find Iter", benchmark(finditer, compile(PATTERN), TEXT, iterations=ITERATIONS), True), + ("Sub", benchmark(sub, compile(PATTERN), 'foo', TEXT, iterations=ITERATIONS), True), + ("Subn", benchmark(subn, compile(PATTERN), 'foo', TEXT, iterations=ITERATIONS), True), + ("Escape", benchmark(escape, TEXT, iterations=ITERATIONS), False) + ] + + re_results = [ + ("Compile", benchmark(re.compile, PATTERN, iterations=ITERATIONS), True), + ("Search", benchmark(lambda pattern: pattern.search(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True), + ("Find Match", benchmark(lambda pattern: pattern.match(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True), + ("Full Match", benchmark(lambda pattern: pattern.fullmatch(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True), + ("Split", benchmark(lambda pattern: pattern.split(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True), + ("Find All", benchmark(lambda pattern: pattern.findall(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True), + ("Find Iter", benchmark(lambda pattern: list(pattern.finditer(TEXT)), re.compile(PATTERN), iterations=ITERATIONS), True), + ("Sub", benchmark(lambda pattern: pattern.sub('foo', TEXT), re.compile(PATTERN), iterations=ITERATIONS), True), + ("Subn", benchmark(lambda pattern: pattern.subn('foo', TEXT), re.compile(PATTERN), iterations=ITERATIONS), True), + ("Escape", benchmark(re.escape, TEXT, iterations=ITERATIONS), False) + ] + + max_len = max(len(op) for op, _, _ in flpc_results + re_results) + print(f"{'Operation':{max_len}} | {'flpc (ms)':<10} | {'re (ms)':<10} | {'Used Regex':<10} | {'Faster':<10}") + print("-" * (max_len + 45)) + + flpc_times = [] + re_times = [] + + for (op, flpc_time, used_regex_flpc), (_, re_time, used_regex_re) in zip(flpc_results, re_results): + used_regex = "Yes" if used_regex_flpc and used_regex_re else "No" + faster = "flpc" if flpc_time < re_time else "re" + print(f"{op:{max_len}} | {flpc_time:<10.5f} | {re_time:<10.5f} | {used_regex:<10} | {faster:<10}") + flpc_times.append(flpc_time) + re_times.append(re_time) + + mean_flpc = sum(flpc_times) / len(flpc_times) + mean_re = sum(re_times) / len(re_times) + performance_ratio = mean_re / mean_flpc + + print(f"\nThe flpc is {performance_ratio:.2f}x faster than re module on average") \ No newline at end of file diff --git a/flpc.pyi b/flpc.pyi index aeae7fb..0afe6c4 100644 --- a/flpc.pyi +++ b/flpc.pyi @@ -35,7 +35,7 @@ def search(pattern: Pattern, text: str) -> Optional[Match]: """Scan through a string, looking for any location where the regex pattern matches.""" ... -def find_match(pattern: Pattern, text: str) -> Optional[Match]: +def fmatch(pattern: Pattern, text: str) -> Optional[Match]: """Try to apply the pattern at the start of the string, returning a match object if successful.""" ... diff --git a/pyproject.toml b/pyproject.toml index fde11a7..8912a4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "flpc" -version = "0.1.1" +version = "0.1.2" description = "A Rust-based regex port for Python3 to faster performance. 👾" maintainers = [{ name = "Adarsh Gourab Mahalik", email = "gourabmahalikadarsh@gmail.com" }] readme = "README.md" diff --git a/src/lib.rs b/src/lib.rs index 551e8bb..b2f6078 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,8 +63,8 @@ fn search(pattern: &Pattern, text: &str) -> PyResult> { } } -#[pyfunction(name = "find_match")] -fn find_match(pattern: &Pattern, text: &str) -> PyResult> { +#[pyfunction(name = "fmatch")] +fn fmatch(pattern: &Pattern, text: &str) -> PyResult> { if let Some(mat) = pattern.regex.find(text) { if mat.start() == 0 { Ok(Some(Match { @@ -161,7 +161,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> { vec![ "compile", "search", - "find_match", + "fmatch", "fullmatch", "split", "findall", @@ -175,7 +175,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(compile, m)?)?; m.add_function(wrap_pyfunction!(search, m)?)?; - m.add_function(wrap_pyfunction!(find_match, m)?)?; + m.add_function(wrap_pyfunction!(fmatch, m)?)?; m.add_function(wrap_pyfunction!(fullmatch, m)?)?; m.add_function(wrap_pyfunction!(split, m)?)?; m.add_function(wrap_pyfunction!(findall, m)?)?;