diff --git a/docs/schema.md b/docs/schema.md index 567802f7..ec27cb04 100644 --- a/docs/schema.md +++ b/docs/schema.md @@ -29,8 +29,8 @@ For documentation, the schema allows for user comments by ignoring any text prec : ``` -* `variable-name` may contain any alphanumeric characters, but may not be the reserved names - `delimiters` or `timestamp`. +* `variable-name` may contain any printable character except space and colon, but may not be the + reserved names `delimiters` or `timestamp`. * `variable-pattern` is a regular expression using the supported [syntax](#regular-expression-syntax). diff --git a/src/log_surgeon/SchemaParser.cpp b/src/log_surgeon/SchemaParser.cpp index 0b7816d4..41baf8ea 100644 --- a/src/log_surgeon/SchemaParser.cpp +++ b/src/log_surgeon/SchemaParser.cpp @@ -472,14 +472,12 @@ auto SchemaParser::add_lexical_rules() -> void { // RegexASTGroupByte default constructs to an m_negate group, so we add the only two characters // which can't be in a comment, the newline and carriage return characters as they signify the // end of the comment. - unique_ptr comment_characters = make_unique(); + auto comment_characters{make_unique()}; comment_characters->add_literal('\r'); comment_characters->add_literal('\n'); add_token_group("CommentCharacters", std::move(comment_characters)); - add_token_group("IdentifierCharacters", make_unique('a', 'z')); - add_token_group("IdentifierCharacters", make_unique('A', 'Z')); - add_token_group("IdentifierCharacters", make_unique('0', '9')); - add_token("IdentifierCharacters", '_'); + add_token_group("IdentifierCharacters", make_unique('!', ':' - 1)); + add_token_group("IdentifierCharacters", make_unique(':' + 1, '~')); } auto SchemaParser::add_productions() -> void { diff --git a/tests/test-schema.cpp b/tests/test-schema.cpp index 8a384a51..1af4f830 100644 --- a/tests/test-schema.cpp +++ b/tests/test-schema.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include @@ -16,8 +18,10 @@ using log_surgeon::Schema; using log_surgeon::SchemaVarAST; +using std::pair; using std::string; using std::string_view; +using std::vector; using RegexASTCatByte = log_surgeon::finite_automata::RegexASTCat; @@ -155,29 +159,64 @@ TEST_CASE("add_invalid_var_priorities", "[Schema]") { /** * @ingroup unit_tests_schema - * @brief Create a schema, adding a variable and capture group name with an underscore. + * @brief Create a schema, adding a variable and capture group name with various character + * combinations that should succeed. */ -TEST_CASE("add_underscore_name", "[Schema]") { +TEST_CASE("add_various_correct_variable_names", "[Schema]") { Schema schema; - string const var_name = "var_name"; - string const cap_name = "cap_name"; - string const var_schema = var_name + string(":a(?<") + cap_name + string(">_)b"); - schema.add_variable(string_view(var_schema), -1); - auto const schema_ast = schema.release_schema_ast_ptr(); - REQUIRE(schema_ast->m_schema_vars.size() == 1); - REQUIRE(schema.release_schema_ast_ptr()->m_schema_vars.empty()); - - auto& schema_var_ast_ptr = schema_ast->m_schema_vars.at(0); - REQUIRE(nullptr != schema_var_ast_ptr); - auto& schema_var_ast = dynamic_cast(*schema_var_ast_ptr); - REQUIRE(var_name == schema_var_ast.m_name); + vector> name_pairs { + {"varName123","capName123"}, + {"var_name","cap_name"}, + {"var@name","cap@name"}, + {"var.name","cap.name"}, + {"var-name","cap-name"}, + {"var~name","cap~name"}, + {"var\"name\"","cap\"name\""}, + {"var'name'","cap'name'"} + }; + + for (auto const& [var_name, cap_name] : name_pairs) { + string const var_schema{var_name + string(":a(?<") + cap_name + string(">_)b")}; + schema.add_variable(string_view(var_schema), -1); + + auto const schema_ast{schema.release_schema_ast_ptr()}; + REQUIRE(schema_ast->m_schema_vars.size() == 1); + REQUIRE(schema.release_schema_ast_ptr()->m_schema_vars.empty()); + + auto& schema_var_ast_ptr{schema_ast->m_schema_vars.at(0)}; + REQUIRE(nullptr != schema_var_ast_ptr); + auto& schema_var_ast{dynamic_cast(*schema_var_ast_ptr)}; + REQUIRE(var_name == schema_var_ast.m_name); + + REQUIRE_NOTHROW([&]() -> void { + (void)dynamic_cast(*schema_var_ast.m_regex_ptr); + }()); + + auto const captures{schema_var_ast.m_regex_ptr->get_subtree_positive_captures()}; + REQUIRE(captures.size() == 1); + REQUIRE(cap_name == captures.at(0)->get_name()); + } +} - REQUIRE_NOTHROW([&]() -> void { - (void)dynamic_cast(*schema_var_ast.m_regex_ptr); - }()); +/** + * @ingroup unit_tests_schema + * @brief Create a schema, adding a variable and capture group name with various character + * combinations that should throw errors. + */ +TEST_CASE("add_various_incorrect_variable_names", "[Schema]") { + Schema schema; - auto const captures{schema_var_ast.m_regex_ptr->get_subtree_positive_captures()}; - REQUIRE(captures.size() == 1); - REQUIRE(cap_name == captures.at(0)->get_name()); + vector> name_pairs { + {"var name","validName"}, + {"validName","cap:name"}, + {"validName","cap name"}, + }; + + for (auto const& [var_name, cap_name] : name_pairs) { + CAPTURE(var_name); + CAPTURE(cap_name); + string const var_schema{var_name + string(":a(?<") + cap_name + string(">_)b")}; + REQUIRE_THROWS(schema.add_variable(string_view(var_schema), -1)); + } }