From 96664cd62cb9708b54114e24d31c5d95c3b6d01a Mon Sep 17 00:00:00 2001 From: David Sancho Moreno Date: Sun, 3 Mar 2024 21:25:53 +0100 Subject: [PATCH] Add tests for all flags --- lib/regexp.ml | 20 +++++++------- test/suite.ml | 72 +++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/lib/regexp.ml b/lib/regexp.ml index a6eb6b3..82d632c 100644 --- a/lib/regexp.ml +++ b/lib/regexp.ml @@ -63,6 +63,8 @@ let compile re flags = print_endline error; raise (Invalid_argument "Compilation failed") +let lastIndex regexp = regexp.lastIndex + (* exec is not a binding to lre_exec but an implementation of `js_regexp_exec` *) let exec regexp input = let capture_count = Bindings.C.Functions.lre_get_capture_count regexp.bc in @@ -88,15 +90,9 @@ let exec regexp input = (* if ((re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) == 0) { last_index = 0; } *) - match global regexp.flags with - | true -> - print_endline "global and flags are set"; - regexp.lastIndex - | false -> - print_endline - (Printf.sprintf "flags %d, global: %d" regexp.flags lre_flag_global); - print_endline "not global and flags are not set"; - 0 + match global regexp.flags || sticky regexp.flags with + | true -> regexp.lastIndex + | false -> 0 in (* Printf.printf "lastIndex %d\n" regexp.lastIndex; *) @@ -138,9 +134,11 @@ let exec regexp input = *) *) i := !i + 2 done; - (* mutable lastIndex : int *) { captures = substrings } | 0 -> - (* Printf.sprintf "nothing found" |> print_endline; *) + (* When there's no matches left, sticky goes to lastIndex 0 *) + (match sticky regexp.flags with + | true -> regexp.lastIndex <- 0 + | false -> ()); { captures = [||] } | _ (* -1 *) -> raise (Invalid_argument "Error") diff --git a/test/suite.ml b/test/suite.ml index 7a90c48..f5d1ed7 100644 --- a/test/suite.ml +++ b/test/suite.ml @@ -115,6 +115,8 @@ let test title fn = Alcotest.test_case title `Quick fn let assert_result left right = Alcotest.(check (array string)) "match" right left +let assert_int left right = Alcotest.(check int) "match" right left + let () = Alcotest.run "RegExp" [ @@ -124,7 +126,35 @@ let () = let regex = RegExp.compile "[0-9]+" "" in let result = RegExp.exec regex "abc123xyz" in assert_result result.captures [| "123" |]); - test "running exec with global" (fun () -> + test "exec" (fun () -> + let regex = RegExp.compile "[0-9]+" "" in + let result = RegExp.exec regex "abc00123xyz456_0" in + assert_result result.captures [| "00123" |]; + let result = RegExp.exec regex "abc00123xyz456_0" in + assert_result result.captures [| "00123" |]); + test "basic text" (fun () -> + let regex = RegExp.compile "a" "" in + let result = RegExp.exec regex "bbb" in + assert_result result.captures [||]; + let result = RegExp.exec regex "bbba" in + assert_result result.captures [| "a" |]); + test "with i (ignorecase)" (fun () -> + let regex = RegExp.compile "a" "i" in + let result = RegExp.exec regex "123bA" in + assert_result result.captures [| "A" |]; + let result = RegExp.exec regex "123ba" in + assert_result result.captures [| "a" |]); + test "with m (multiline)" (fun () -> + let regex = RegExp.compile "^d" "m" in + let result = RegExp.exec regex "123bA" in + assert_result result.captures [||]; + let result = RegExp.exec regex "123bA\n123" in + assert_result result.captures [||]; + let result = RegExp.exec regex "david" in + assert_result result.captures [| "d" |]; + let result = RegExp.exec regex "123bA\ndavid" in + assert_result result.captures [| "d" |]); + test "with g (global)" (fun () -> let regex = RegExp.compile "[0-9]+" "g" in let input = "abc00123xyz456_0" in let result = RegExp.exec regex input in @@ -133,19 +163,33 @@ let () = assert_result result.captures [| "456" |]; let result = RegExp.exec regex input in assert_result result.captures [| "0" |]); - test "without global" (fun () -> - let regex = RegExp.compile "[0-9]+" "" in - let result = RegExp.exec regex "abc00123xyz456_0" in - assert_result result.captures [| "00123" |]; - let result = RegExp.exec regex "abc00123xyz456_0" in - assert_result result.captures [| "00123" |]); - (* test "i" (fun () -> ()) *) - (* test "m" (fun () -> ()) *) - (* test "s" (fun () -> ()) *) - (* test "u" (fun () -> ()) *) - (* test "y" (fun () -> ()) *) - - (* test "groups" *) + test "with y (sticky)" (fun () -> + let regex = RegExp.compile "foo" "y" in + assert_int (RegExp.lastIndex regex) 0; + let input = "foofoofoo" in + let result = RegExp.exec regex input in + assert_int (RegExp.lastIndex regex) 3; + assert_result result.captures [| "foo" |]; + let result = RegExp.exec regex input in + assert_int (RegExp.lastIndex regex) 6; + assert_result result.captures [| "foo" |]; + let result = RegExp.exec regex input in + assert_int (RegExp.lastIndex regex) 9; + assert_result result.captures [| "foo" |]; + let result = RegExp.exec regex input in + assert_int (RegExp.lastIndex regex) 0; + assert_result result.captures [||]); + test "groups" (fun () -> + let regex = RegExp.compile "(xyz)" "" in + let input = "xyz yz xyzx xzy" in + let result = RegExp.exec regex input in + assert_result result.captures [| "xyz"; "xyz" |]); + (* https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/word-boundary.js *) + test "groups with (?: )" (fun () -> + let regex = RegExp.compile "(?<=\\b)[d-f]{3}" "" in + let input = "def" in + let result = RegExp.exec regex input in + assert_result result.captures [| "def" |]); (* test "named groups?" *) ] ); ]