Skip to content

Clarify the use of DOTALL in JSON Schema regular expressions #1586

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/core/regex/include/sourcemeta/core/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include <variant> // std::variant

/// @defgroup regex Regex
/// @brief A regex ECMA implementation for JSON Schema
/// @brief An opinionated regex ECMA 262 implementation for JSON Schema
///
/// This functionality is included as follows:
///
Expand Down Expand Up @@ -95,6 +95,10 @@ auto to_regex(const T &pattern) -> std::optional<Regex<T>> {
if (pattern == ".*" || pattern == "^.*$" || pattern == "^(.*)$" ||
pattern == "(.*)" || pattern == "[\\s\\S]*" || pattern == "^[\\s\\S]*$") {
return RegexTypeNoop{};

// Note that the JSON Schema specification does not impose the use of any
// regular expression flag. Given popular adoption, we assume `.` matches
// new line characters (as in the `DOTALL`) option
} else if (pattern == ".+" || pattern == "^.+$" || pattern == "^(.+)$" ||
pattern == ".") {
return RegexTypeNonEmpty{};
Expand Down Expand Up @@ -125,6 +129,9 @@ auto to_regex(const T &pattern) -> std::optional<Regex<T>> {
// treated as non-marking sub-expressions (?:expr)
boost::regex::nosubs |

// Make the `.` character class match new lines
boost::regex::mod_s |

// Instructs the regular expression engine to make matching faster,
// with the potential cost of making construction slower
boost::regex::optimize};
Expand Down
19 changes: 19 additions & 0 deletions test/regex/regex_matches_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,25 @@ TEST(Regex_matches, match_true_14) {
"@namespace/mypackage"));
}

TEST(Regex_matches, match_true_15) {
const auto regex{sourcemeta::core::to_regex<std::string>(".")};
EXPECT_TRUE(regex.has_value());
EXPECT_TRUE(sourcemeta::core::matches<std::string>(regex.value(), "\n"));
}

TEST(Regex_matches, match_true_16) {
const auto regex{sourcemeta::core::to_regex<std::string>(".")};
EXPECT_TRUE(regex.has_value());
EXPECT_TRUE(sourcemeta::core::matches<std::string>(regex.value(), "\r"));
}

TEST(Regex_matches, match_true_17) {
const auto regex{sourcemeta::core::to_regex<std::string>("^.+$")};
EXPECT_TRUE(regex.has_value());
EXPECT_TRUE(
sourcemeta::core::matches<std::string>(regex.value(), "foo\nbar\r"));
}

TEST(Regex_matches, match_false_1) {
const auto regex{sourcemeta::core::to_regex<std::string>("^foo")};
EXPECT_TRUE(regex.has_value());
Expand Down