diff --git a/src/parser/RdfParser.cpp b/src/parser/RdfParser.cpp index 9cf4bffe93..0c29d7da97 100644 --- a/src/parser/RdfParser.cpp +++ b/src/parser/RdfParser.cpp @@ -739,11 +739,11 @@ bool TurtleParser::iriref() { if (!view.starts_with('<')) { return false; } - auto endPos = view.find_first_of("<>\"\n", 1); + auto endPos = view.find_first_of(">\n", 1); if (endPos == string::npos || view[endPos] != '>') { raise( - "Unterminated IRI reference (found '<' but no '>' before " - "one of the following characters: <, \", newline)"); + "Unterminated IRI reference (found '<' but no matching `>` " + "on the same line)"); } // In relaxed mode, that is all we check. Otherwise, we check if the IRI is // standard-compliant. If not, we output a warning and try to parse it in a diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h index a8dc50d0ac..5f77227ff7 100644 --- a/src/parser/Tokenizer.h +++ b/src/parser/Tokenizer.h @@ -128,7 +128,7 @@ struct TurtleToken { "<([^\\x00-\\x20<>\"{}|^`\\\\]|"s + UcharString + ")*>"; const RE2 Iriref; const string IrirefStringRelaxed = - "<([^\\x00-\\x19<>\"\\\\]|"s + UcharString + ")*>"; + "<([^\\x00-\\x19<>\\\\]|"s + UcharString + ")*>"; const RE2 IrirefRelaxed; const string PercentString = "%" + cls(HexString) + "{2}"; diff --git a/test/RdfParserTest.cpp b/test/RdfParserTest.cpp index 6eba30f45d..23d59c5562 100644 --- a/test/RdfParserTest.cpp +++ b/test/RdfParserTest.cpp @@ -753,7 +753,7 @@ TEST(RdfParserTest, iriref) { std::string iriref_1 = ""; std::string iriref_2 = ""; std::string iriref_3 = ""; - std::string iriref_4 = ""; + std::string iriref_4 = "). + // The fourth IRI ref throws an exception when parsed (because `<` is + // encountered before the closing `>`). parser.setInputStream(iriref_4); ASSERT_THROW(parser.iriref(), TurtleParser::ParseException); // The fifth IRI ref is not recognized as an IRI ref. diff --git a/test/TokenTest.cpp b/test/TokenTest.cpp index 20602384e4..104a327900 100644 --- a/test/TokenTest.cpp +++ b/test/TokenTest.cpp @@ -189,8 +189,8 @@ TEST(TokenizerTest, Entities) { string iriref4 = ""; string noIriref1 = "< >"; string noIriref2 = "<{}|^`>"; - string noIriref4 = "<\">"; string noIriref3 = "<\n>"; + string noIriref4 = "