From 6ec2a71d83f270fc19df5ecb9a1081ea961c7360 Mon Sep 17 00:00:00 2001 From: Jan Uhlig Date: Fri, 25 Aug 2023 15:55:46 +0200 Subject: [PATCH] Binary replacement with a function With this change, `binary:replace/3,4` also accepts a function for the Replacement argument, for cases when more complex processing is required to generate a replacement. The given function will be called with the match as argument, and the returned binary will be inserted into the result. Co-authored-by: Maria Scott --- lib/stdlib/doc/src/binary.xml | 71 ++++++++++++++++--------- lib/stdlib/src/binary.erl | 32 +++++------ lib/stdlib/test/binary_module_SUITE.erl | 23 ++++++++ lib/stdlib/test/binref.erl | 8 ++- 4 files changed, 86 insertions(+), 48 deletions(-) diff --git a/lib/stdlib/doc/src/binary.xml b/lib/stdlib/doc/src/binary.xml index 5ed7babc7b82..b3badc9424e8 100644 --- a/lib/stdlib/doc/src/binary.xml +++ b/lib/stdlib/doc/src/binary.xml @@ -574,41 +574,60 @@ store(Binary, GBSet) -> Replace bytes in a binary according to a pattern. An integer() =< byte_size(Replacement) - + if Replacement is given as a binary. -

Constructs a new binary by replacing the parts in - Subject matching Pattern with - the content of Replacement.

- -

If the matching subpart of Subject giving raise - to the replacement is to be inserted in the result, option - {insert_replaced, InsPos} inserts the matching part - into Replacement at the specified position (or - positions) before inserting Replacement into - Subject.

+

Constructs a new binary by replacing the parts in + Subject matching Pattern with + Replacement if given as a literal binary() + or with the result of applying Replacement to a matching + subpart if given as a fun.

+ +

If Replacement is given as a binary() and the + matching subpart of Subject giving raise + to the replacement is to be inserted in the result, option + {insert_replaced, InsPos} inserts the matching part + into Replacement at the specified position (or + positions) before inserting Replacement into + Subject. If Replacement is given + as a fun instead, this option is ignored.

-

Example:

+

If any position specified in InsPos > size + of the replacement binary, a badarg exception is raised.

- -1> binary:replace(<<"abcde">>,<<"b">>,<<"[]">>, [{insert_replaced,1}]). +

Options global and {scope, part()} work as for + split/3. + The return type is always a binary().

+ +

For a description of Pattern, see + compile_pattern/1.

+

Examples:

+ +
+1> binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"X">>, []).
+<<"aXcde">>
+
+2> binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"X">>, [global]).
+<<"aXcXe">>
+
+3> binary:replace(<<"abcde">>, <<"b">>, <<"[]">>, [{insert_replaced, 1}]).
 <<"a[b]cde">>
-2> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[]">>,[global,{insert_replaced,1}]).
+
+4> binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"[]">>, [global, {insert_replaced, 1}]).
 <<"a[b]c[d]e">>
-3> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[]">>,[global,{insert_replaced,[1,1]}]).
+
+5> binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"[]">>, [global, {insert_replaced, [1, 1]}]).
 <<"a[bb]c[dd]e">>
-4> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[-]">>,[global,{insert_replaced,[1,2]}]).
-<<"a[b-b]c[d-d]e">>
 
-        

If any position specified in InsPos > size - of the replacement binary, a badarg exception is raised.

+6> binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"[-]">>, [global, {insert_replaced, [1, 2]}]). +<<"a[b-b]c[d-d]e">> -

Options global and {scope, part()} work as for - split/3. - The return type is always a binary().

+7> binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], fun(M) -> <<$[, M/binary, $]>> end, []). +<<"a[b]cde">> + +8> binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], fun(M) -> <<$[, M/binary, $]>> end, [global]). +<<"a[b]c[d]e">> +
-

For a description of Pattern, see - compile_pattern/1. -

diff --git a/lib/stdlib/src/binary.erl b/lib/stdlib/src/binary.erl index 52e6cbda0a54..8b3be311ee73 100644 --- a/lib/stdlib/src/binary.erl +++ b/lib/stdlib/src/binary.erl @@ -276,7 +276,7 @@ split(_, _, _) -> Subject :: binary(), Pattern :: PatternBinary | [PatternBinary,...] | cp(), PatternBinary :: nonempty_binary(), - Replacement :: binary(), + Replacement :: binary() | fun((binary()) -> binary()), Result :: binary(). replace(H,N,R) -> @@ -291,7 +291,7 @@ replace(H,N,R) -> Subject :: binary(), Pattern :: PatternBinary | [PatternBinary,...] | cp(), PatternBinary :: nonempty_binary(), - Replacement :: binary(), + Replacement :: binary() | fun((binary()) -> binary()), Options :: [Option], Option :: global | {scope, part()} | {insert_replaced, InsPos}, InsPos :: OnePos | [ OnePos ], @@ -300,7 +300,7 @@ replace(H,N,R) -> replace(Haystack,Needles,Replacement,Options) -> try - true = is_binary(Replacement), % Make badarg instead of function clause + true = is_binary(Replacement) orelse is_function(Replacement, 1), % Make badarg instead of function clause {Part,Global,Insert} = get_opts_replace(Options,{no,false,[]}), Moptlist = case Part of no -> @@ -317,13 +317,17 @@ replace(Haystack,Needles,Replacement,Options) -> Match -> [Match] end end, - ReplList = case Insert of + ReplList = case is_function(Replacement, 1) orelse Insert of + true -> + Replacement; [] -> - Replacement; + fun(_) -> Replacement end; Y when is_integer(Y) -> - splitat(Replacement,0,[Y]); + <> = Replacement, + fun(M) -> [ReplFront, M, ReplRear] end; Li when is_list(Li) -> - splitat(Replacement,0,lists:sort(Li)) + Splits = splitat(Replacement,0,lists:sort(Li)), + fun(M) -> lists:join(M, Splits) end end, erlang:iolist_to_binary(do_replace(Haystack,MList,ReplList,0)) catch @@ -337,19 +341,7 @@ replace(Haystack,Needles,Replacement,Options) -> do_replace(H,[],_,N) -> [binary:part(H,{N,byte_size(H)-N})]; do_replace(H,[{A,B}|T],Replacement,N) -> - [binary:part(H,{N,A-N}), - if - is_list(Replacement) -> - do_insert(Replacement, binary:part(H,{A,B})); - true -> - Replacement - end - | do_replace(H,T,Replacement,A+B)]. - -do_insert([X],_) -> - [X]; -do_insert([H|T],R) -> - [H,R|do_insert(T,R)]. + [binary:part(H,{N,A-N}), Replacement(binary:part(H, {A, B})) | do_replace(H,T,Replacement,A+B)]. splitat(H,N,[]) -> [binary:part(H,{N,byte_size(H)-N})]; diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl index 954efae9b709..8127b93819d9 100644 --- a/lib/stdlib/test/binary_module_SUITE.erl +++ b/lib/stdlib/test/binary_module_SUITE.erl @@ -495,28 +495,51 @@ do_interesting(Module) -> [] = binary:split(<<>>, <<",">>, [global,trim]), [] = binary:split(<<>>, <<",">>, [global,trim_all]), + ReplaceFn = fun(Match) -> << <<(B + 1)>> || <> <= Match >> end, badarg = ?MASK_ERROR( Module:replace(<<1,2,3,4,5,6,7,8>>, [<<4,5>>,<<7>>,<<8>>],<<99>>, [global,trim,{scope,{0,5}}])), + badarg = ?MASK_ERROR( + Module:replace(<<1,2,3,4,5,6,7,8>>, + [<<4,5>>,<<7>>,<<8>>],ReplaceFn, + [global,trim,{scope,{0,5}}])), <<1,2,3,99,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, [<<4,5>>,<<7>>,<<8>>],<<99>>,[]), + <<1,2,3,5,6,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, + [<<4,5>>,<<7>>,<<8>>],ReplaceFn,[]), <<1,2,3,99,6,99,99>> = Module:replace(<<1,2,3,4,5,6,7,8>>, [<<4,5>>,<<7>>,<<8>>],<<99>>, [global]), + <<1,2,3,5,6,6,8,9>> = Module:replace(<<1,2,3,4,5,6,7,8>>, + [<<4,5>>,<<7>>,<<8>>],ReplaceFn, + [global]), <<1,2,3,99,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, [<<4,5>>,<<7>>,<<8>>],<<99>>, [global,{scope,{0,5}}]), + <<1,2,3,5,6,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, + [<<4,5>>,<<7>>,<<8>>],ReplaceFn, + [global,{scope,{0,5}}]), <<1,2,3,99,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, [<<4,5>>,<<7>>,<<8>>],<<99>>, [global,{scope,{0,5}}]), + <<1,2,3,5,6,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, + [<<4,5>>,<<7>>,<<8>>],ReplaceFn, + [global,{scope,{0,5}}]), <<1,2,3,99,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, [<<4,5>>,<<7>>,<<8>>],<<99>>, [global,{scope,{0,5}}]), + <<1,2,3,5,6,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, + [<<4,5>>,<<7>>,<<8>>],ReplaceFn, + [global,{scope,{0,5}}]), badarg = ?MASK_ERROR(Module:replace(<<1,2,3,4,5,6,7,8>>, [<<4,5>>,<<7>>,<<8>>],<<99>>, [global,{scope,{0,5}}, {insert,1}])), + badarg = ?MASK_ERROR(Module:replace(<<1,2,3,4,5,6,7,8>>, + [<<4,5>>,<<7>>,<<8>>],ReplaceFn, + [global,{scope,{0,5}}, + {insert,1}])), <<1,2,3,99,4,5,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>, [<<4,5>>,<<7>>,<<8>>],<<99>>, [global,{scope,{0,5}}, diff --git a/lib/stdlib/test/binref.erl b/lib/stdlib/test/binref.erl index deb1ede4dff6..c92a716dba9e 100644 --- a/lib/stdlib/test/binref.erl +++ b/lib/stdlib/test/binref.erl @@ -228,7 +228,7 @@ replace(Haystack,Needles0,Replacement,Options) -> true -> exit(badtype) end, - true = is_binary(Replacement), % Make badarg instead of function clause + true = is_binary(Replacement) orelse is_function(Replacement, 1), % Make badarg instead of function clause {Part,Global,Insert} = get_opts_replace(Options,{nomatch,false,[]}), {Start,End,NewStack} = case Part of @@ -254,7 +254,9 @@ replace(Haystack,Needles0,Replacement,Options) -> [X] end end, - ReplList = case Insert of + ReplList = case is_binary(Replacement) andalso Insert of + false -> + Replacement; [] -> Replacement; Y when is_integer(Y) -> @@ -274,6 +276,8 @@ do_replace(H,[],_,N) -> do_replace(H,[{A,B}|T],Replacement,N) -> [part(H,{N,A-N}), if + is_function(Replacement) -> + Replacement(part(H, {A, B})); is_list(Replacement) -> do_insert(Replacement, part(H,{A,B})); true ->