Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create Regex benchmarks #11504

Merged
merged 8 commits into from
Nov 12, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ replacer_cache_lookup : Text -> Replacer | Nothing
replacer_cache_lookup replacement_string = Replacer_Cache.INSTANCE.get replacement_string

## PRIVATE
group_reference_regex = "\$(([0-9]+)|(\$)|(&)|(<([^>]+)>))"
group_reference_regex = Regex.compile "\$(([0-9]+)|(\$)|(&)|(<([^>]+)>))"

## PRIVATE

Expand All @@ -95,7 +95,7 @@ build_replacement_vector_cached replacement_string pattern =
strings and group reference numbers.
build_replacement_vector : Text -> Regex -> Vector Replacement ! No_Such_Group
build_replacement_vector replacement_string pattern =
replacement_pattern = Regex.compile group_reference_regex
replacement_pattern = group_reference_regex
it = replacement_pattern.iterator replacement_string

Vector.build builder->
Expand Down
2 changes: 2 additions & 0 deletions test/Benchmarks/src/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import project.Text.Build
import project.Text.Compare
import project.Text.Contains
import project.Text.Pretty
import project.Text.Regex_Bench
import project.Text.Reverse
import project.Time.Work_Days
import project.Time.Format
Expand Down Expand Up @@ -79,6 +80,7 @@ all_benchmarks =
builder.append Compare.collect_benches
builder.append Contains.collect_benches
builder.append Pretty.collect_benches
builder.append Regex_Bench.collect_benches
builder.append Reverse.collect_benches

# Time
Expand Down
105 changes: 105 additions & 0 deletions test/Benchmarks/src/Text/Regex_Bench.enso
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from Standard.Base import all
from Standard.Base.Runtime import assert

import Standard.Base.Data.Text.Regex.Internal.Replacer.Replacer

from Standard.Test import Bench, Faker


type Data
Value
~two_nums
~just_two_nums
~four_nums
~a_regex
~regexes
~replacers
~one_num_regex
~two_nums_regex

create =
faker = Faker.new
a_regex = Regex.compile "(\d)ab(\d)"
Data.Value
<| build_two_nums faker
<| build_just_two_nums faker
<| build_four_nums faker
<| a_regex
<| build_regexes faker
<| build_replacers faker
<| Regex.compile "(\d+)"
<| Regex.compile "(\d+)[a-z]+(\d+)"

list_count = 1000
regex_count = 1000

# "ab12cdefg634hi"
build_two_nums faker = 0.up_to list_count . map _->
(faker.alpha 2) + (faker.integer 0 100).to_text + (faker.alpha 8) + (faker.integer 0 100).to_text + (faker.alpha 2)

# "12cdefg634"
build_just_two_nums faker = 0.up_to list_count . map _->
(faker.integer 0 100).to_text + (faker.alpha 8) + (faker.integer 0 100).to_text

# "ab12cdefg634hiab12cdefg634hi"
build_four_nums faker =
strings0 = build_two_nums faker
strings1 = build_two_nums faker
strings0.zip strings1 (_ + _)


build_regexes faker = 0.up_to regex_count . map _->
"(\d)" + (faker.alpha 2) + "(\d)"

build_replacers faker = 0.up_to regex_count . map _->
"$0" + (faker.alpha 2) + "$1" + (faker.alpha 2) + "$2"

options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 3)


collect_benches = Bench.build builder->
data = Data.create

builder.group "Regex" options group_builder->
group_builder.specify "match" <|
result = data.two_nums . map data.two_nums_regex.match
assert ((result.at 0 . groups . length) == 3)

group_builder.specify "match_all" <|
result = data.four_nums . map data.two_nums_regex.match_all
assert ((result.at 0 . map .groups . map .length) == [3, 3])

group_builder.specify "matches" <|
result = data.just_two_nums . map data.two_nums_regex.matches
assert ((result.at 0) == True)

group_builder.specify "find" <|
result = data.two_nums . map data.two_nums_regex.find
assert (result.at 0 . is_a Text)

group_builder.specify "find_all" <|
result = data.four_nums . map data.two_nums_regex.find_all
assert ((result.at 0 . at 0) . is_a Text)

group_builder.specify "split" <|
result = data.two_nums . map data.two_nums_regex.split
assert ((result.at 0 . map .length) == [2, 2])

group_builder.specify "tokenize" <|
result = data.two_nums . map data.two_nums_regex.tokenize
assert (result.at 0 . at 0 . is_a Text)

group_builder.specify "replace" <|
result = data.two_nums . map (x-> data.two_nums_regex.replace x "NUM")
assert ((result . at 0 . take 5 . drop 2) == "NUM")

group_builder.specify "regex_compile" <|
result = data.regexes.map Regex.compile
assert (result . at 0 . is_a Regex)

group_builder.specify "replacer_compile" <|
result = data.replacers.map (r-> Replacer.new r data.a_regex)
assert (result . at 0 . is_a Replacer)


main = collect_benches . run_main
Loading