diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Internal/Replacer.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Internal/Replacer.enso index fb1df1408863..132f7126b824 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Internal/Replacer.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Internal/Replacer.enso @@ -71,7 +71,7 @@ replacer_cache_lookup : Text -> Replacer | Nothing replacer_cache_lookup replacement_string = Replacer_Cache.INSTANCE.get replacement_string ## PRIVATE -group_reference_regex = "\$(([0-9]+)|(\$)|(&)|(<([^>]+)>))" +group_reference_regex = Regex.compile "\$(([0-9]+)|(\$)|(&)|(<([^>]+)>))" ## PRIVATE @@ -95,7 +95,7 @@ build_replacement_vector_cached replacement_string pattern = strings and group reference numbers. build_replacement_vector : Text -> Regex -> Vector Replacement ! No_Such_Group build_replacement_vector replacement_string pattern = - replacement_pattern = Regex.compile group_reference_regex + replacement_pattern = group_reference_regex it = replacement_pattern.iterator replacement_string Vector.build builder-> diff --git a/test/Benchmarks/src/Main.enso b/test/Benchmarks/src/Main.enso index 15898283d42c..5e844e877dd6 100644 --- a/test/Benchmarks/src/Main.enso +++ b/test/Benchmarks/src/Main.enso @@ -31,6 +31,7 @@ import project.Text.Build import project.Text.Compare import project.Text.Contains import project.Text.Pretty +import project.Text.Regex_Bench import project.Text.Reverse import project.Time.Work_Days import project.Time.Format @@ -79,6 +80,7 @@ all_benchmarks = builder.append Compare.collect_benches builder.append Contains.collect_benches builder.append Pretty.collect_benches + builder.append Regex_Bench.collect_benches builder.append Reverse.collect_benches # Time diff --git a/test/Benchmarks/src/Text/Regex_Bench.enso b/test/Benchmarks/src/Text/Regex_Bench.enso new file mode 100644 index 000000000000..29d55731b57c --- /dev/null +++ b/test/Benchmarks/src/Text/Regex_Bench.enso @@ -0,0 +1,105 @@ +from Standard.Base import all +from Standard.Base.Runtime import assert + +import Standard.Base.Data.Text.Regex.Internal.Replacer.Replacer + +from Standard.Test import Bench, Faker + + +type Data + Value + ~two_nums + ~just_two_nums + ~four_nums + ~a_regex + ~regexes + ~replacers + ~one_num_regex + ~two_nums_regex + + create = + faker = Faker.new + a_regex = Regex.compile "(\d)ab(\d)" + Data.Value + <| build_two_nums faker + <| build_just_two_nums faker + <| build_four_nums faker + <| a_regex + <| build_regexes faker + <| build_replacers faker + <| Regex.compile "(\d+)" + <| Regex.compile "(\d+)[a-z]+(\d+)" + +list_count = 1000 +regex_count = 1000 + +# "ab12cdefg634hi" +build_two_nums faker = 0.up_to list_count . map _-> + (faker.alpha 2) + (faker.integer 0 100).to_text + (faker.alpha 8) + (faker.integer 0 100).to_text + (faker.alpha 2) + +# "12cdefg634" +build_just_two_nums faker = 0.up_to list_count . map _-> + (faker.integer 0 100).to_text + (faker.alpha 8) + (faker.integer 0 100).to_text + +# "ab12cdefg634hiab12cdefg634hi" +build_four_nums faker = + strings0 = build_two_nums faker + strings1 = build_two_nums faker + strings0.zip strings1 (_ + _) + + +build_regexes faker = 0.up_to regex_count . map _-> + "(\d)" + (faker.alpha 2) + "(\d)" + +build_replacers faker = 0.up_to regex_count . map _-> + "$0" + (faker.alpha 2) + "$1" + (faker.alpha 2) + "$2" + +options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 3) + + +collect_benches = Bench.build builder-> + data = Data.create + + builder.group "Regex" options group_builder-> + group_builder.specify "match" <| + result = data.two_nums . map data.two_nums_regex.match + assert ((result.at 0 . groups . length) == 3) + + group_builder.specify "match_all" <| + result = data.four_nums . map data.two_nums_regex.match_all + assert ((result.at 0 . map .groups . map .length) == [3, 3]) + + group_builder.specify "matches" <| + result = data.just_two_nums . map data.two_nums_regex.matches + assert ((result.at 0) == True) + + group_builder.specify "find" <| + result = data.two_nums . map data.two_nums_regex.find + assert (result.at 0 . is_a Text) + + group_builder.specify "find_all" <| + result = data.four_nums . map data.two_nums_regex.find_all + assert ((result.at 0 . at 0) . is_a Text) + + group_builder.specify "split" <| + result = data.two_nums . map data.two_nums_regex.split + assert ((result.at 0 . map .length) == [2, 2]) + + group_builder.specify "tokenize" <| + result = data.two_nums . map data.two_nums_regex.tokenize + assert (result.at 0 . at 0 . is_a Text) + + group_builder.specify "replace" <| + result = data.two_nums . map (x-> data.two_nums_regex.replace x "NUM") + assert ((result . at 0 . take 5 . drop 2) == "NUM") + + group_builder.specify "regex_compile" <| + result = data.regexes.map Regex.compile + assert (result . at 0 . is_a Regex) + + group_builder.specify "replacer_compile" <| + result = data.replacers.map (r-> Replacer.new r data.a_regex) + assert (result . at 0 . is_a Replacer) + + +main = collect_benches . run_main