Skip to content

Commit

Permalink
Create Regex benchmarks (#11504)
Browse files Browse the repository at this point in the history
  • Loading branch information
GregoryTravis authored Nov 12, 2024
1 parent 67f075b commit 978a009
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ replacer_cache_lookup : Text -> Replacer | Nothing
replacer_cache_lookup replacement_string = Replacer_Cache.INSTANCE.get replacement_string

## PRIVATE
group_reference_regex = "\$(([0-9]+)|(\$)|(&)|(<([^>]+)>))"
group_reference_regex = Regex.compile "\$(([0-9]+)|(\$)|(&)|(<([^>]+)>))"

## PRIVATE

Expand All @@ -95,7 +95,7 @@ build_replacement_vector_cached replacement_string pattern =
strings and group reference numbers.
build_replacement_vector : Text -> Regex -> Vector Replacement ! No_Such_Group
build_replacement_vector replacement_string pattern =
replacement_pattern = Regex.compile group_reference_regex
replacement_pattern = group_reference_regex
it = replacement_pattern.iterator replacement_string

Vector.build builder->
Expand Down
2 changes: 2 additions & 0 deletions test/Benchmarks/src/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import project.Text.Build
import project.Text.Compare
import project.Text.Contains
import project.Text.Pretty
import project.Text.Regex_Bench
import project.Text.Reverse
import project.Time.Work_Days
import project.Time.Format
Expand Down Expand Up @@ -79,6 +80,7 @@ all_benchmarks =
builder.append Compare.collect_benches
builder.append Contains.collect_benches
builder.append Pretty.collect_benches
builder.append Regex_Bench.collect_benches
builder.append Reverse.collect_benches

# Time
Expand Down
105 changes: 105 additions & 0 deletions test/Benchmarks/src/Text/Regex_Bench.enso
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from Standard.Base import all
from Standard.Base.Runtime import assert

import Standard.Base.Data.Text.Regex.Internal.Replacer.Replacer

from Standard.Test import Bench, Faker


type Data
Value
~two_nums
~just_two_nums
~four_nums
~a_regex
~regexes
~replacers
~one_num_regex
~two_nums_regex

create =
faker = Faker.new
a_regex = Regex.compile "(\d)ab(\d)"
Data.Value
<| build_two_nums faker
<| build_just_two_nums faker
<| build_four_nums faker
<| a_regex
<| build_regexes faker
<| build_replacers faker
<| Regex.compile "(\d+)"
<| Regex.compile "(\d+)[a-z]+(\d+)"

list_count = 1000
regex_count = 1000

# "ab12cdefg634hi"
build_two_nums faker = 0.up_to list_count . map _->
(faker.alpha 2) + (faker.integer 0 100).to_text + (faker.alpha 8) + (faker.integer 0 100).to_text + (faker.alpha 2)

# "12cdefg634"
build_just_two_nums faker = 0.up_to list_count . map _->
(faker.integer 0 100).to_text + (faker.alpha 8) + (faker.integer 0 100).to_text

# "ab12cdefg634hiab12cdefg634hi"
build_four_nums faker =
strings0 = build_two_nums faker
strings1 = build_two_nums faker
strings0.zip strings1 (_ + _)


build_regexes faker = 0.up_to regex_count . map _->
"(\d)" + (faker.alpha 2) + "(\d)"

build_replacers faker = 0.up_to regex_count . map _->
"$0" + (faker.alpha 2) + "$1" + (faker.alpha 2) + "$2"

options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 3)


collect_benches = Bench.build builder->
data = Data.create

builder.group "Regex" options group_builder->
group_builder.specify "match" <|
result = data.two_nums . map data.two_nums_regex.match
assert ((result.at 0 . groups . length) == 3)

group_builder.specify "match_all" <|
result = data.four_nums . map data.two_nums_regex.match_all
assert ((result.at 0 . map .groups . map .length) == [3, 3])

group_builder.specify "matches" <|
result = data.just_two_nums . map data.two_nums_regex.matches
assert ((result.at 0) == True)

group_builder.specify "find" <|
result = data.two_nums . map data.two_nums_regex.find
assert (result.at 0 . is_a Text)

group_builder.specify "find_all" <|
result = data.four_nums . map data.two_nums_regex.find_all
assert ((result.at 0 . at 0) . is_a Text)

group_builder.specify "split" <|
result = data.two_nums . map data.two_nums_regex.split
assert ((result.at 0 . map .length) == [2, 2])

group_builder.specify "tokenize" <|
result = data.two_nums . map data.two_nums_regex.tokenize
assert (result.at 0 . at 0 . is_a Text)

group_builder.specify "replace" <|
result = data.two_nums . map (x-> data.two_nums_regex.replace x "NUM")
assert ((result . at 0 . take 5 . drop 2) == "NUM")

group_builder.specify "regex_compile" <|
result = data.regexes.map Regex.compile
assert (result . at 0 . is_a Regex)

group_builder.specify "replacer_compile" <|
result = data.replacers.map (r-> Replacer.new r data.a_regex)
assert (result . at 0 . is_a Replacer)


main = collect_benches . run_main

0 comments on commit 978a009

Please sign in to comment.