Skip to content

Commit

Permalink
re #440: add possible workaround
Browse files Browse the repository at this point in the history
  • Loading branch information
biojppm committed Jun 28, 2024
1 parent c3b8a00 commit 8c273eb
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 9 deletions.
5 changes: 3 additions & 2 deletions changelog/current.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
## Fixes

- (WIP) Fix [#440](https://github.com/biojppm/rapidyaml/issues/440) Some tests failing with gcc -O2 (due to undefined behavior)
- Fix all warnings from `scan-build`
- Fix [#440](https://github.com/biojppm/rapidyaml/issues/440): some tests failing with gcc -O2 (hypothetically due to undefined behavior)
- The fix was accomplished by refactoring some internal parser functions; see the comments in [#440](https://github.com/biojppm/rapidyaml/issues/440) for further details.
- Also, fix all warnings from `scan-build`.
- Use malloc.h instead of alloca.h on MinGW ([PR#447](https://github.com/biojppm/rapidyaml/pull/447))
- Fix [#442](https://github.com/biojppm/rapidyaml/issues/442) ([PR#443](https://github.com/biojppm/rapidyaml/pull/443)):
- Ensure leading `+` is accepted when deserializing numbers.
Expand Down
35 changes: 28 additions & 7 deletions src/c4/yml/parse_engine.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,33 @@ inline bool _is_doc_end_token(csubstr s)
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}

inline bool _is_doc_token(csubstr s)
inline bool _is_doc_token(csubstr s) noexcept
{
//
// NOTE: this function was failing under some scenarios when
// compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
// related to optimizer assumptions on the input string and
// possibly caused from UB around assignment to that string (the
// call site was in _scan_block()). For more details see:
//
// https://github.com/biojppm/rapidyaml/issues/440
//
// The current version does not suffer this problem, but it may
// appear again.
//
if(s.len >= 3)
{
if(s.str[0] == '-')
return _is_doc_begin_token(s);
else if(s.str[0] == '.')
return _is_doc_end_token(s);
switch(s.str[0])
{
case '-':
//return _is_doc_begin_token(s); // this was failing with gcc -O2
return (s.str[1] == '-' && s.str[2] == '-')
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
case '.':
//return _is_doc_end_token(s); // this was failing with gcc -O2
return (s.str[1] == '.' && s.str[2] == '.')
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}
}
return false;
}
Expand Down Expand Up @@ -2026,7 +2045,7 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
digits = t.left_of(t.first_not_of("0123456789"));
if( ! digits.empty())
{
if(digits.len > 1)
if(C4_UNLIKELY(digits.len > 1))
_c4err("parse error: invalid indentation");
_c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
Expand Down Expand Up @@ -2067,8 +2086,9 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
// evaluate termination conditions
if(indentation != npos)
{
_c4dbgpf("blck: indentation={}", indentation);
// stop when the line is deindented and not empty
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty()))
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
{
if(raw_block.len)
{
Expand All @@ -2082,6 +2102,7 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
}
else if(indentation == 0)
{
_c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
if(_is_doc_token(lc.rem))
{
_c4dbgp("blck: stop. indentation=0 and doc ended");
Expand Down

0 comments on commit 8c273eb

Please sign in to comment.