Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the relaxed parsing even more relaxed #1672

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/parser/RdfParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -739,11 +739,11 @@ bool TurtleParser<T>::iriref() {
if (!view.starts_with('<')) {
return false;
}
auto endPos = view.find_first_of("<>\"\n", 1);
auto endPos = view.find_first_of(">\n", 1);
if (endPos == string::npos || view[endPos] != '>') {
raise(
"Unterminated IRI reference (found '<' but no '>' before "
"one of the following characters: <, \", newline)");
"Unterminated IRI reference (found '<' but no matching `>` "
"on the same line)");
}
// In relaxed mode, that is all we check. Otherwise, we check if the IRI is
// standard-compliant. If not, we output a warning and try to parse it in a
Expand Down
2 changes: 1 addition & 1 deletion src/parser/Tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ struct TurtleToken {
"<([^\\x00-\\x20<>\"{}|^`\\\\]|"s + UcharString + ")*>";
const RE2 Iriref;
const string IrirefStringRelaxed =
"<([^\\x00-\\x19<>\"\\\\]|"s + UcharString + ")*>";
"<([^\\x00-\\x19<>\\\\]|"s + UcharString + ")*>";
const RE2 IrirefRelaxed;

const string PercentString = "%" + cls(HexString) + "{2}";
Expand Down
6 changes: 3 additions & 3 deletions test/RdfParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ TEST(RdfParserTest, iriref) {
std::string iriref_1 = "<fine>";
std::string iriref_2 = "<okay ish>";
std::string iriref_3 = "<not\x19okay_for_RE2>";
std::string iriref_4 = "<throws\"exception>";
std::string iriref_4 = "<throws exception";
std::string iriref_5 = "no iriref at all";
// The first IRI ref is fine for both parsers.
parser.setInputStream(iriref_1);
Expand Down Expand Up @@ -779,8 +779,8 @@ TEST(RdfParserTest, iriref) {
} else {
ASSERT_FALSE(parser.iriref());
}
// The fourth IRI ref throws an exception when parsed (because " is
// encountered before the closing >).
// The fourth IRI ref throws an exception when parsed (because `<` is
// encountered before the closing `>`).
parser.setInputStream(iriref_4);
ASSERT_THROW(parser.iriref(), TurtleParser<Tokenizer>::ParseException);
// The fifth IRI ref is not recognized as an IRI ref.
Expand Down
2 changes: 1 addition & 1 deletion test/TokenTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ TEST(TokenizerTest, Entities) {
string iriref4 = "<escaped\\uAA34\\U000ABC34end>";
string noIriref1 = "< >";
string noIriref2 = "<{}|^`>";
string noIriref4 = "<\">";
string noIriref3 = "<\n>";
string noIriref4 = "<abc";

// Strict Iriref parsing.
ASSERT_TRUE(RE2::FullMatch(iriref1, t.Iriref, nullptr));
Expand Down
Loading