From 2e3fcfc00db57b4202955046baa68fc1b2ce760d Mon Sep 17 00:00:00 2001 From: Garth Tuohy Date: Thu, 22 Jun 2023 12:09:51 -0400 Subject: [PATCH 1/2] Fix of_string_maybe_invalid --- src/lTerm_text_impl.ml | 47 ++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/src/lTerm_text_impl.ml b/src/lTerm_text_impl.ml index 5730abc..bf79196 100644 --- a/src/lTerm_text_impl.ml +++ b/src/lTerm_text_impl.ml @@ -44,36 +44,25 @@ module Make (LiteralIntf: LiteralIntf.Type) = struct Uchar.of_int (Char.code 'a' + x - 10) let of_string_maybe_invalid str= - let len= Zed_string.length str in - let arr= Array.make len dummy in - let rec loop ofs idx= - if idx = len then - arr + let txt zc = (zc, LTerm_style.none) in + let rec loop ofs acc = + if ofs = 0 then acc else begin - let ofs, idx= - try - let chr, ofs= Zed_string.extract_next str ofs in - Array.unsafe_set arr idx (chr, LTerm_style.none); - (ofs, idx + 1) - with - Zed_utf8.Invalid _-> - let code= Uchar.to_int (Zed_char.core (Zed_string.extract str ofs)) in - Array.unsafe_set arr (idx + 0) - (Zed_char.unsafe_of_char '\\', LTerm_style.none); - Array.unsafe_set arr (idx + 1) - (Zed_char.unsafe_of_char 'y', LTerm_style.none); - Array.unsafe_set arr (idx + 2) - (Zed_char.unsafe_of_uChar (uchar_of_hex (code lsr 4)) - , LTerm_style.none); - Array.unsafe_set arr (idx + 3) - (Zed_char.unsafe_of_uChar (uchar_of_hex (code land 15)) - , LTerm_style.none); - ofs + 1, idx + 4 - in - loop ofs idx - end - in - loop 0 0 + try + let (zc, ofs') = Zed_string.extract_prev str ofs in + loop ofs' (txt zc :: acc) + with Invalid_argument _ | Zed_utf8.Invalid _ -> + (* extract_prev calls Zed_utf8.unsafe_extract_prev which throws + Zed_utf8.Invalid, or can call Uchar.of_int which throws Invalid_argument *) + let invalid = Zed_string.sub_ofs ~ofs:(ofs-1) ~len:1 str in + let code = Char.code (Zed_string.to_utf8 invalid).[0] in + loop (ofs - 1) (txt (Zed_char.unsafe_of_char '\\') :: + txt (Zed_char.unsafe_of_char 'y') :: + txt (Zed_char.unsafe_of_uChar (uchar_of_hex (code lsr 4))) :: + txt (Zed_char.unsafe_of_uChar (uchar_of_hex (code land 15))) :: + acc) + end in + Array.of_list @@ loop (Zed_string.bytes str) [] let of_utf8_maybe_invalid str= let str= Zed_string.unsafe_of_utf8 str in From 7771712125e39abb9561e22e4bcc714ca1827cf5 Mon Sep 17 00:00:00 2001 From: Garth Tuohy Date: Fri, 23 Jun 2023 00:22:51 -0400 Subject: [PATCH 2/2] Handle combining characters, tiny cleanup --- src/lTerm_text_impl.ml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/lTerm_text_impl.ml b/src/lTerm_text_impl.ml index bf79196..58dc456 100644 --- a/src/lTerm_text_impl.ml +++ b/src/lTerm_text_impl.ml @@ -46,14 +46,15 @@ module Make (LiteralIntf: LiteralIntf.Type) = struct let of_string_maybe_invalid str= let txt zc = (zc, LTerm_style.none) in let rec loop ofs acc = - if ofs = 0 then acc + if ofs = 0 then Array.of_list acc else begin try let (zc, ofs') = Zed_string.extract_prev str ofs in loop ofs' (txt zc :: acc) - with Invalid_argument _ | Zed_utf8.Invalid _ -> - (* extract_prev calls Zed_utf8.unsafe_extract_prev which throws - Zed_utf8.Invalid, or can call Uchar.of_int which throws Invalid_argument *) + with + | Invalid_argument _ (* from Uchar.of_int *) + | Zed_utf8.Invalid _ (* invalid UTF8 sequence *) + | Zed_string.Invalid _ -> (* individual combining character *) let invalid = Zed_string.sub_ofs ~ofs:(ofs-1) ~len:1 str in let code = Char.code (Zed_string.to_utf8 invalid).[0] in loop (ofs - 1) (txt (Zed_char.unsafe_of_char '\\') :: @@ -61,8 +62,9 @@ module Make (LiteralIntf: LiteralIntf.Type) = struct txt (Zed_char.unsafe_of_uChar (uchar_of_hex (code lsr 4))) :: txt (Zed_char.unsafe_of_uChar (uchar_of_hex (code land 15))) :: acc) - end in - Array.of_list @@ loop (Zed_string.bytes str) [] + end + in + loop (Zed_string.bytes str) [] let of_utf8_maybe_invalid str= let str= Zed_string.unsafe_of_utf8 str in