Skip to content

Commit

Permalink
re #440: add possible workaround
Browse files Browse the repository at this point in the history
  • Loading branch information
biojppm committed Jun 27, 2024
1 parent fde1751 commit 2e28afc
Showing 1 changed file with 30 additions and 9 deletions.
39 changes: 30 additions & 9 deletions src/c4/yml/parse_engine.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
}

inline bool _is_doc_begin_token(csubstr s)
inline bool _is_doc_begin_token(csubstr s) noexcept
{
RYML_ASSERT(s.begins_with('-'));
RYML_ASSERT(!s.ends_with("\n"));
Expand All @@ -77,7 +77,7 @@ inline bool _is_doc_begin_token(csubstr s)
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}

inline bool _is_doc_end_token(csubstr s)
inline bool _is_doc_end_token(csubstr s) noexcept
{
RYML_ASSERT(s.begins_with('.'));
RYML_ASSERT(!s.ends_with("\n"));
Expand All @@ -86,14 +86,33 @@ inline bool _is_doc_end_token(csubstr s)
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}

inline bool _is_doc_token(csubstr s)
inline bool _is_doc_token(csubstr s) noexcept
{
//
// NOTE: this function was failing under some scenarios when
// compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
// related to optimizer assumptions on the input string and
// possibly caused from UB around assignment to that string (the
// call site was in _scan_block()). For more details see:
//
// https://github.com/biojppm/rapidyaml/issues/440
//
// The current version does not suffer this problem, but it may
// appear again.
//
if(s.len >= 3)
{
if(s.str[0] == '-')
return _is_doc_begin_token(s);
else if(s.str[0] == '.')
return _is_doc_end_token(s);
switch(s.str[0])
{
case '-':
//return _is_doc_begin_token(s); // this was failing with gcc -O2
return (s.str[1] == '-' && s.str[2] == '-')
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
case '.':
//return _is_doc_end_token(s); // this was failing with gcc -O2
return (s.str[1] == '.' && s.str[2] == '.')
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}
}
return false;
}
Expand Down Expand Up @@ -2026,7 +2045,7 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
digits = t.left_of(t.first_not_of("0123456789"));
if( ! digits.empty())
{
if(digits.len > 1)
if(C4_UNLIKELY(digits.len > 1))
_c4err("parse error: invalid indentation");
_c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
Expand Down Expand Up @@ -2067,8 +2086,9 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
// evaluate termination conditions
if(indentation != npos)
{
_c4dbgpf("blck: indentation={}", indentation);
// stop when the line is deindented and not empty
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty()))
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
{
if(raw_block.len)
{
Expand All @@ -2082,6 +2102,7 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
}
else if(indentation == 0)
{
_c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
if(_is_doc_token(lc.rem))
{
_c4dbgp("blck: stop. indentation=0 and doc ended");
Expand Down

0 comments on commit 2e28afc

Please sign in to comment.