Skip to content

Commit

Permalink
find_match to fmatch + Additional
Browse files Browse the repository at this point in the history
  • Loading branch information
itsmeadarsh2008 committed Jun 15, 2024
1 parent 3f1b27c commit 57eada0
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 10 deletions.
72 changes: 72 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/target

# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
*.py[cod]
seed/
# C extensions
*.so

Distribution / packaging
.Python
.venv/
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
include/
man/
venv/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# Rope
.ropeproject

# Django stuff:
*.log
*.pot

.DS_Store

# Sphinx documentation
docs/_build/

# PyCharm
.idea/

# VSCode
.vscode/

# Pyenv
.python-version
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
<div align="center">
<img src="https://svgshare.com/i/17F2.svg" height="300">

# flpc
![PyPI - Implementation](https://img.shields.io/pypi/implementation/flpc)
![GitHub Issues or Pull Requests](https://img.shields.io/github/issues/itsmeadarsh2008/flpc)
![PyPI - Downloads](https://img.shields.io/pypi/dm/flpc) <br>
![PyPI - Downloads](https://img.shields.io/pypi/dm/flpc)

A Rust-based regex port for Python3 to get faster performance. 👾

# DEAD-SIMPLE
Just import `flpc` as `re` and use it as you like and the namings are same as the native `re` module. Only thing is that `match` function name of the `re` native library is replaced with `find_match`
## DEAD-SIMPLE
Just import `flpc` as `re` and use it as you like and the namings are same as the native `re` module. Only thing is that `match` function name of the `re` native library is replaced with `fmatch`

### MIT Licensed

# MIT Licensed
</div>
76 changes: 76 additions & 0 deletions benchmark/bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import re # noqa: D100
import time
from random import choices
from string import ascii_letters, digits
from flpc import (
compile,
escape,
fmatch,
findall,
finditer,
fullmatch,
search,
split,
sub,
subn,
)


def benchmark(func, *args, iterations=1000): # noqa: D103
start = time.time()
for _ in range(iterations):
func(*args)
end = time.time()
return (end - start) * 1000 # Convert to milliseconds

# Test cases
PATTERN = r'(\w+)\s+(\d+)'
TEXT = ''.join(choices(ascii_letters + digits, k=1000))
ITERATIONS = 100

if __name__ == "__main__":
flpc_results = [
("Compile", benchmark(compile, PATTERN, iterations=ITERATIONS), True),
("Search", benchmark(search, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
("Find Match", benchmark(fmatch, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
("Full Match", benchmark(fullmatch, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
("Split", benchmark(split, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
("Find All", benchmark(findall, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
("Find Iter", benchmark(finditer, compile(PATTERN), TEXT, iterations=ITERATIONS), True),
("Sub", benchmark(sub, compile(PATTERN), 'foo', TEXT, iterations=ITERATIONS), True),
("Subn", benchmark(subn, compile(PATTERN), 'foo', TEXT, iterations=ITERATIONS), True),
("Escape", benchmark(escape, TEXT, iterations=ITERATIONS), False)
]

re_results = [
("Compile", benchmark(re.compile, PATTERN, iterations=ITERATIONS), True),
("Search", benchmark(lambda pattern: pattern.search(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
("Find Match", benchmark(lambda pattern: pattern.match(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
("Full Match", benchmark(lambda pattern: pattern.fullmatch(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
("Split", benchmark(lambda pattern: pattern.split(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
("Find All", benchmark(lambda pattern: pattern.findall(TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
("Find Iter", benchmark(lambda pattern: list(pattern.finditer(TEXT)), re.compile(PATTERN), iterations=ITERATIONS), True),
("Sub", benchmark(lambda pattern: pattern.sub('foo', TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
("Subn", benchmark(lambda pattern: pattern.subn('foo', TEXT), re.compile(PATTERN), iterations=ITERATIONS), True),
("Escape", benchmark(re.escape, TEXT, iterations=ITERATIONS), False)
]

max_len = max(len(op) for op, _, _ in flpc_results + re_results)
print(f"{'Operation':{max_len}} | {'flpc (ms)':<10} | {'re (ms)':<10} | {'Used Regex':<10} | {'Faster':<10}")
print("-" * (max_len + 45))

flpc_times = []
re_times = []

for (op, flpc_time, used_regex_flpc), (_, re_time, used_regex_re) in zip(flpc_results, re_results):
used_regex = "Yes" if used_regex_flpc and used_regex_re else "No"
faster = "flpc" if flpc_time < re_time else "re"
print(f"{op:{max_len}} | {flpc_time:<10.5f} | {re_time:<10.5f} | {used_regex:<10} | {faster:<10}")
flpc_times.append(flpc_time)
re_times.append(re_time)

mean_flpc = sum(flpc_times) / len(flpc_times)
mean_re = sum(re_times) / len(re_times)
performance_ratio = mean_re / mean_flpc

print(f"\nThe flpc is {performance_ratio:.2f}x faster than re module on average")
2 changes: 1 addition & 1 deletion flpc.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def search(pattern: Pattern, text: str) -> Optional[Match]:
"""Scan through a string, looking for any location where the regex pattern matches."""
...

def find_match(pattern: Pattern, text: str) -> Optional[Match]:
def fmatch(pattern: Pattern, text: str) -> Optional[Match]:
"""Try to apply the pattern at the start of the string, returning a match object if successful."""
...

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "flpc"
version = "0.1.1"
version = "0.1.2"
description = "A Rust-based regex port for Python3 to faster performance. 👾"
maintainers = [{ name = "Adarsh Gourab Mahalik", email = "gourabmahalikadarsh@gmail.com" }]
readme = "README.md"
Expand Down
8 changes: 4 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ fn search(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
}
}

#[pyfunction(name = "find_match")]
fn find_match(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
#[pyfunction(name = "fmatch")]
fn fmatch(pattern: &Pattern, text: &str) -> PyResult<Option<Match>> {
if let Some(mat) = pattern.regex.find(text) {
if mat.start() == 0 {
Ok(Some(Match {
Expand Down Expand Up @@ -161,7 +161,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> {
vec![
"compile",
"search",
"find_match",
"fmatch",
"fullmatch",
"split",
"findall",
Expand All @@ -175,7 +175,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> {

m.add_function(wrap_pyfunction!(compile, m)?)?;
m.add_function(wrap_pyfunction!(search, m)?)?;
m.add_function(wrap_pyfunction!(find_match, m)?)?;
m.add_function(wrap_pyfunction!(fmatch, m)?)?;
m.add_function(wrap_pyfunction!(fullmatch, m)?)?;
m.add_function(wrap_pyfunction!(split, m)?)?;
m.add_function(wrap_pyfunction!(findall, m)?)?;
Expand Down

0 comments on commit 57eada0

Please sign in to comment.