diff --git a/src/core/regex/include/sourcemeta/core/regex.h b/src/core/regex/include/sourcemeta/core/regex.h index 2e3bb772a..74e554b87 100644 --- a/src/core/regex/include/sourcemeta/core/regex.h +++ b/src/core/regex/include/sourcemeta/core/regex.h @@ -27,7 +27,7 @@ #include // std::variant /// @defgroup regex Regex -/// @brief A regex ECMA implementation for JSON Schema +/// @brief An opinionated regex ECMA 262 implementation for JSON Schema /// /// This functionality is included as follows: /// @@ -95,6 +95,10 @@ auto to_regex(const T &pattern) -> std::optional> { if (pattern == ".*" || pattern == "^.*$" || pattern == "^(.*)$" || pattern == "(.*)" || pattern == "[\\s\\S]*" || pattern == "^[\\s\\S]*$") { return RegexTypeNoop{}; + + // Note that the JSON Schema specification does not impose the use of any + // regular expression flag. Given popular adoption, we assume `.` matches + // new line characters (as in the `DOTALL`) option } else if (pattern == ".+" || pattern == "^.+$" || pattern == "^(.+)$" || pattern == ".") { return RegexTypeNonEmpty{}; @@ -125,6 +129,9 @@ auto to_regex(const T &pattern) -> std::optional> { // treated as non-marking sub-expressions (?:expr) boost::regex::nosubs | + // Make the `.` character class match new lines + boost::regex::mod_s | + // Instructs the regular expression engine to make matching faster, // with the potential cost of making construction slower boost::regex::optimize}; diff --git a/test/regex/regex_matches_test.cc b/test/regex/regex_matches_test.cc index 77b930d5c..2807d6a0f 100644 --- a/test/regex/regex_matches_test.cc +++ b/test/regex/regex_matches_test.cc @@ -93,6 +93,25 @@ TEST(Regex_matches, match_true_14) { "@namespace/mypackage")); } +TEST(Regex_matches, match_true_15) { + const auto regex{sourcemeta::core::to_regex(".")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\n")); +} + +TEST(Regex_matches, match_true_16) { + const auto regex{sourcemeta::core::to_regex(".")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\r")); +} + +TEST(Regex_matches, match_true_17) { + const auto regex{sourcemeta::core::to_regex("^.+$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE( + sourcemeta::core::matches(regex.value(), "foo\nbar\r")); +} + TEST(Regex_matches, match_false_1) { const auto regex{sourcemeta::core::to_regex("^foo")}; EXPECT_TRUE(regex.has_value());