Skip to content

Commit

Permalink
Merge pull request #448 from biojppm/fix/440
Browse files Browse the repository at this point in the history
Fix test failures with gcc -O2
  • Loading branch information
biojppm authored Jun 28, 2024
2 parents e837309 + 8c273eb commit 619ad08
Show file tree
Hide file tree
Showing 11 changed files with 130 additions and 65 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,39 @@ jobs:
- {name: shared32-run, run: source .github/setenv.sh && c4_run_test shared32}
- {name: shared32-pack, run: source .github/setenv.sh && c4_package shared32}

#----------------------------------------------------------------------------
gcc_O2: # see https://github.com/biojppm/rapidyaml/issues/440
name: gcc_O2/${{matrix.cxx}}/c++${{matrix.std}}
continue-on-error: true
if: always() # https://stackoverflow.com/questions/62045967/github-actions-is-there-a-way-to-continue-on-error-while-still-getting-correct
runs-on: ubuntu-latest
container: ghcr.io/biojppm/c4core/ubuntu22.04:latest # use the docker image
strategy:
fail-fast: false
matrix:
include:
- {std: 11, gcc: 12 , bt: Release}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.gcc}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
- {name: checkout, uses: actions/checkout@v3, with: {submodules: recursive}}
- run: git config --system --add safe.directory '*' # needed for running in the docker image. see https://github.com/actions/checkout/issues/1169
- run: c4core-install g++-${{matrix.gcc}}
- {name: show info, run: source .github/setenv.sh && c4_show_info}
- name: configure
run: |
cmake -S . -B build_o2 \
-DCMAKE_CXX_COMPILER=g++-${{matrix.gcc}} \
-DCMAKE_C_COMPILER=gcc-${{matrix.gcc}} \
-DCMAKE_CXX_FLAGS_RELEASE:STRING="-O2 -g -DNDEBUG" \
-DRYML_BUILD_TESTS:BOOL=ON \
-DRYML_DBG:BOOL=OFF
- name: build
run: |
cmake --build build_o2 --target ryml-test-build -j --verbose
- name: run
run: |
cmake --build build_o2 --target ryml-test-run
#----------------------------------------------------------------------------
gcc_tabtokens:
name: gcc_canary/${{matrix.cxx}}/c++${{matrix.std}}/${{matrix.bt}}
Expand Down
10 changes: 10 additions & 0 deletions changelog/current.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
## Fixes

- Fix [#440](https://github.com/biojppm/rapidyaml/issues/440): some tests failing with gcc -O2 (hypothetically due to undefined behavior)
- The fix was accomplished by refactoring some internal parser functions; see the comments in [#440](https://github.com/biojppm/rapidyaml/issues/440) for further details.
- Also, fix all warnings from `scan-build`.
- Use malloc.h instead of alloca.h on MinGW ([PR#447](https://github.com/biojppm/rapidyaml/pull/447))
- Fix [#442](https://github.com/biojppm/rapidyaml/issues/442) ([PR#443](https://github.com/biojppm/rapidyaml/pull/443)):
- Ensure leading `+` is accepted when deserializing numbers.
- Ensure numbers are not quoted by fixing the heuristics in `scalar_style_query_plain()` and `scalar_style_choose()`.
- Add quickstart sample for overflow detection (only of integral types).
- Parse engine: cleanup unused macros


## Thanks

- @toge
- @musicinmybrain
2 changes: 1 addition & 1 deletion ext/c4core
17 changes: 16 additions & 1 deletion src/c4/yml/event_handler_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,13 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

void start_parse(const char* filename, detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr);
this->_stack_start_parse(filename, relocate_arena, relocate_arena_data);
}

void finish_parse()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr);
if(m_num_directives && !m_tree->is_stream(m_tree->root_id()))
_RYML_CB_ERR_(m_stack.m_callbacks, "directives cannot be used without a document", {});
this->_stack_finish_parse();
Expand Down Expand Up @@ -307,6 +309,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

void add_sibling()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_parent);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_children(m_parent->node_id));
NodeData const* prev = m_tree->m_buf; // watchout against relocation of the tree nodes
Expand Down Expand Up @@ -445,6 +448,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void set_key_anchor(csubstr anchor)
{
_c4dbgpf("node[{}]: set key anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
if(C4_UNLIKELY(_has_any_(KEYREF)))
_RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos);
_RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&'));
Expand All @@ -454,6 +458,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void set_val_anchor(csubstr anchor)
{
_c4dbgpf("node[{}]: set val anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
if(C4_UNLIKELY(_has_any_(VALREF)))
_RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos);
_RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&'));
Expand All @@ -464,6 +469,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void set_key_ref(csubstr ref)
{
_c4dbgpf("node[{}]: set key ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
if(C4_UNLIKELY(_has_any_(KEYANCH)))
_RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos);
_RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*'));
Expand All @@ -474,6 +480,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void set_val_ref(csubstr ref)
{
_c4dbgpf("node[{}]: set val ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
if(C4_UNLIKELY(_has_any_(VALANCH)))
_RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos);
_RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*'));
Expand Down Expand Up @@ -541,6 +548,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

substr alloc_arena(size_t len)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
csubstr prev = m_tree->arena();
substr out = m_tree->alloc_arena(len);
substr curr = m_tree->arena();
Expand All @@ -551,6 +559,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

substr alloc_arena(size_t len, substr *relocated)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
csubstr prev = m_tree->arena();
if(!prev.is_super(*relocated))
return alloc_arena(len);
Expand All @@ -568,12 +577,13 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
/** @cond dev */
void _reset_parser_state(state* st, id_type parse_root, id_type node)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_set_state_(st, node);
const NodeType type = m_tree->type(node);
#ifdef RYML_DBG
char flagbuf[80];
#endif
_c4dbgpf("resetting state: initial flags={}", detail::_parser_flags_to_str(flagbuf, st->flags));
#endif
if(type == NOTYPE)
{
_c4dbgpf("node[{}] is notype", node);
Expand Down Expand Up @@ -612,7 +622,9 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
_c4dbgpf("node[{}] is doc", node);
st->flags |= RDOC;
}
#ifdef RYML_DBG
_c4dbgpf("resetting state: final flags={}", detail::_parser_flags_to_str(flagbuf, st->flags));
#endif
}

/** push a new parent, add a child to the new parent, and set the
Expand Down Expand Up @@ -697,6 +709,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void _remove_speculative()
{
_c4dbgp("remove speculative node");
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0);
const id_type last_added = m_tree->size() - 1;
if(m_tree->has_parent(last_added))
Expand All @@ -706,6 +719,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

void _remove_speculative_with_parent()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0);
const id_type last_added = m_tree->size() - 1;
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_parent(last_added));
Expand All @@ -718,6 +732,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

C4_ALWAYS_INLINE void _save_loc()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->_p(m_curr->node_id)->m_val.scalar.len == 0);
m_tree->_p(m_curr->node_id)->m_val.scalar.str = m_curr->line_contents.rem.str;
}
Expand Down
2 changes: 1 addition & 1 deletion src/c4/yml/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ inline void _reset_tree_handler(Parser *parser, Tree *t, id_type node_id)
RYML_ASSERT(parser);
RYML_ASSERT(t);
if(!parser->m_evt_handler)
_RYML_CB_ERR(parser->m_evt_handler->m_stack.m_callbacks, "event handler is not set");
_RYML_CB_ERR(t->m_callbacks, "event handler is not set");
parser->m_evt_handler->reset(t, node_id);
RYML_ASSERT(parser->m_evt_handler->m_tree == t);
}
Expand Down
37 changes: 29 additions & 8 deletions src/c4/yml/parse_engine.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,33 @@ inline bool _is_doc_end_token(csubstr s)
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}

inline bool _is_doc_token(csubstr s)
inline bool _is_doc_token(csubstr s) noexcept
{
//
// NOTE: this function was failing under some scenarios when
// compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
// related to optimizer assumptions on the input string and
// possibly caused from UB around assignment to that string (the
// call site was in _scan_block()). For more details see:
//
// https://github.com/biojppm/rapidyaml/issues/440
//
// The current version does not suffer this problem, but it may
// appear again.
//
if(s.len >= 3)
{
if(s.str[0] == '-')
return _is_doc_begin_token(s);
else if(s.str[0] == '.')
return _is_doc_end_token(s);
switch(s.str[0])
{
case '-':
//return _is_doc_begin_token(s); // this was failing with gcc -O2
return (s.str[1] == '-' && s.str[2] == '-')
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
case '.':
//return _is_doc_end_token(s); // this was failing with gcc -O2
return (s.str[1] == '.' && s.str[2] == '.')
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}
}
return false;
}
Expand Down Expand Up @@ -2026,7 +2045,7 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
digits = t.left_of(t.first_not_of("0123456789"));
if( ! digits.empty())
{
if(digits.len > 1)
if(C4_UNLIKELY(digits.len > 1))
_c4err("parse error: invalid indentation");
_c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
Expand Down Expand Up @@ -2067,8 +2086,9 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
// evaluate termination conditions
if(indentation != npos)
{
_c4dbgpf("blck: indentation={}", indentation);
// stop when the line is deindented and not empty
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty()))
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
{
if(raw_block.len)
{
Expand All @@ -2082,6 +2102,7 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
}
else if(indentation == 0)
{
_c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
if(_is_doc_token(lc.rem))
{
_c4dbgp("blck: stop. indentation=0 and doc ended");
Expand Down Expand Up @@ -3222,7 +3243,7 @@ void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C
// In the end, moving this block to a separate function
// was the only way to bury the problem. But it may
// resurface again, as The Undead, rising to from the
// grave to haunt us with his terrible
// grave to haunt us with his terrible presence.
//
// We may have to revisit this. With a stake, and lots of
// garlic.
Expand Down
21 changes: 9 additions & 12 deletions src/c4/yml/parser_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,30 +58,27 @@ struct LineContents
void reset_with_next_line(substr buf, size_t offset)
{
RYML_ASSERT(offset <= buf.len);
char const* C4_RESTRICT b = &buf[offset];
char const* C4_RESTRICT e = b;
size_t e = offset;
// get the current line stripped of newline chars
while(e < buf.end() && (*e != '\n' && *e != '\r'))
while(e < buf.len && (buf.str[e] != '\n' && buf.str[e] != '\r'))
++e;
RYML_ASSERT(e >= b);
const substr stripped_ = buf.sub(offset, static_cast<size_t>(e - b));
RYML_ASSERT(e >= offset);
const substr stripped_ = buf.range(offset, e);
// advance pos to include the first line ending
if(e != buf.end() && *e == '\r')
if(e < buf.len && buf.str[e] == '\r')
++e;
if(e != buf.end() && *e == '\n')
if(e < buf.len && buf.str[e] == '\n')
++e;
RYML_ASSERT(e >= b);
const substr full_ = buf.sub(offset, static_cast<size_t>(e - b));
const substr full_ = buf.range(offset, e);
reset(full_, stripped_);
}

void reset(substr full_, substr stripped_)
{
rem = stripped_;
indentation = stripped_.first_not_of(' '); // find the first column where the character is not a space
full = full_;
stripped = stripped_;
rem = stripped_;
// find the first column where the character is not a space
indentation = stripped.first_not_of(' ');
}

C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
Expand Down
14 changes: 4 additions & 10 deletions src/c4/yml/reference_resolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,16 +208,10 @@ void ReferenceResolver::resolve(Tree *t_)
_c4dbgpf("ref {} has parent: {}", i, refdata.parent_ref);
_RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref));
const id_type p = m_tree->parent(refdata.parent_ref);
id_type after;
if(prev_parent_ref != refdata.parent_ref)
{
after = refdata.parent_ref;//prev_sibling(rd.parent_ref_sibling);
prev_parent_ref_after = after;
}
else
{
after = prev_parent_ref_after;
}
const id_type after = (prev_parent_ref != refdata.parent_ref) ?
refdata.parent_ref//prev_sibling(rd.parent_ref_sibling)
:
prev_parent_ref_after;
prev_parent_ref = refdata.parent_ref;
prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after);
m_tree->remove(refdata.node);
Expand Down
10 changes: 4 additions & 6 deletions test/test_noderef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1118,14 +1118,12 @@ TEST(NodeRef, vsConstNodeRef)
// mseq = seq; // deliberate compilation error
seq = mseq; // ok
{
NodeData *nd = mseq.get();
// nd = seq.get(); // deliberate compile error
C4_UNUSED(nd);
(void)mseq.get();
//(void)seq.get(); // deliberate compile error
}
{
NodeData const* nd = seq.get();
nd = seq.get(); // ok
C4_UNUSED(nd);
(void)seq.get();
(void)seq.get(); // ok
}
test_invariants(t);
}
Expand Down
12 changes: 6 additions & 6 deletions test/test_serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,11 +565,11 @@ void test_deserialize_trailing_434(csubstr yaml, csubstr val, csubstr first, dou
ConstNodeRef cnode = tree["val"];
NodeRef node = tree["val"];
{
int value;
int value = {};
EXPECT_FALSE(read(cnode, &value));
}
{
int value;
int value = {};
EXPECT_FALSE(read(node, &value));
}
ExpectError::do_check(&tree, [&]{
Expand All @@ -582,22 +582,22 @@ void test_deserialize_trailing_434(csubstr yaml, csubstr val, csubstr first, dou
});
float fval = (float)dval;
{
float value;
float value = {};
EXPECT_TRUE(read(cnode, &value));
EXPECT_EQ(memcmp(&value, &fval, sizeof(fval)), 0);
}
{
float value;
float value = {};
EXPECT_TRUE(read(node, &value));
EXPECT_EQ(memcmp(&value, &fval, sizeof(fval)), 0);
}
{
double value;
double value = {};
EXPECT_TRUE(read(cnode, &value));
EXPECT_EQ(memcmp(&value, &dval, sizeof(dval)), 0);
}
{
double value;
double value = {};
EXPECT_TRUE(read(node, &value));
EXPECT_EQ(memcmp(&value, &dval, sizeof(dval)), 0);
}
Expand Down
Loading

0 comments on commit 619ad08

Please sign in to comment.