diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 2a16edb2..e3660d92 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -71,6 +71,39 @@ jobs: - {name: shared32-run, run: source .github/setenv.sh && c4_run_test shared32} - {name: shared32-pack, run: source .github/setenv.sh && c4_package shared32} + #---------------------------------------------------------------------------- + gcc_O2: # see https://github.com/biojppm/rapidyaml/issues/440 + name: gcc_O2/${{matrix.cxx}}/c++${{matrix.std}} + continue-on-error: true + if: always() # https://stackoverflow.com/questions/62045967/github-actions-is-there-a-way-to-continue-on-error-while-still-getting-correct + runs-on: ubuntu-latest + container: ghcr.io/biojppm/c4core/ubuntu22.04:latest # use the docker image + strategy: + fail-fast: false + matrix: + include: + - {std: 11, gcc: 12 , bt: Release} + env: {STD: "${{matrix.std}}", CXX_: "${{matrix.gcc}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"} + steps: + - {name: checkout, uses: actions/checkout@v3, with: {submodules: recursive}} + - run: git config --system --add safe.directory '*' # needed for running in the docker image. see https://github.com/actions/checkout/issues/1169 + - run: c4core-install g++-${{matrix.gcc}} + - {name: show info, run: source .github/setenv.sh && c4_show_info} + - name: configure + run: | + cmake -S . -B build_o2 \ + -DCMAKE_CXX_COMPILER=g++-${{matrix.gcc}} \ + -DCMAKE_C_COMPILER=gcc-${{matrix.gcc}} \ + -DCMAKE_CXX_FLAGS_RELEASE:STRING="-O2 -g -DNDEBUG" \ + -DRYML_BUILD_TESTS:BOOL=ON \ + -DRYML_DBG:BOOL=OFF + - name: build + run: | + cmake --build build_o2 --target ryml-test-build -j --verbose + - name: run + run: | + cmake --build build_o2 --target ryml-test-run + #---------------------------------------------------------------------------- gcc_tabtokens: name: gcc_canary/${{matrix.cxx}}/c++${{matrix.std}}/${{matrix.bt}} diff --git a/changelog/current.md b/changelog/current.md index 2cde82e4..9f25ef5e 100644 --- a/changelog/current.md +++ b/changelog/current.md @@ -1,7 +1,17 @@ ## Fixes +- Fix [#440](https://github.com/biojppm/rapidyaml/issues/440): some tests failing with gcc -O2 (hypothetically due to undefined behavior) + - The fix was accomplished by refactoring some internal parser functions; see the comments in [#440](https://github.com/biojppm/rapidyaml/issues/440) for further details. + - Also, fix all warnings from `scan-build`. +- Use malloc.h instead of alloca.h on MinGW ([PR#447](https://github.com/biojppm/rapidyaml/pull/447)) - Fix [#442](https://github.com/biojppm/rapidyaml/issues/442) ([PR#443](https://github.com/biojppm/rapidyaml/pull/443)): - Ensure leading `+` is accepted when deserializing numbers. - Ensure numbers are not quoted by fixing the heuristics in `scalar_style_query_plain()` and `scalar_style_choose()`. - Add quickstart sample for overflow detection (only of integral types). - Parse engine: cleanup unused macros + + +## Thanks + +- @toge +- @musicinmybrain diff --git a/ext/c4core b/ext/c4core index f9e970ac..a95b20e8 160000 --- a/ext/c4core +++ b/ext/c4core @@ -1 +1 @@ -Subproject commit f9e970ac152034544d13dc3d9d109e17504274b1 +Subproject commit a95b20e8c02f4d8eb4425a85792d0f6f81007441 diff --git a/src/c4/yml/event_handler_tree.hpp b/src/c4/yml/event_handler_tree.hpp index 6c08ecad..25369df8 100644 --- a/src/c4/yml/event_handler_tree.hpp +++ b/src/c4/yml/event_handler_tree.hpp @@ -104,11 +104,13 @@ struct EventHandlerTree : public EventHandlerStack_stack_start_parse(filename, relocate_arena, relocate_arena_data); } void finish_parse() { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr); if(m_num_directives && !m_tree->is_stream(m_tree->root_id())) _RYML_CB_ERR_(m_stack.m_callbacks, "directives cannot be used without a document", {}); this->_stack_finish_parse(); @@ -307,6 +309,7 @@ struct EventHandlerTree : public EventHandlerStackhas_children(m_parent->node_id)); NodeData const* prev = m_tree->m_buf; // watchout against relocation of the tree nodes @@ -445,6 +448,7 @@ struct EventHandlerTree : public EventHandlerStacknode_id, anchor.len, anchor); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); if(C4_UNLIKELY(_has_any_(KEYREF))) _RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos); _RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&')); @@ -454,6 +458,7 @@ struct EventHandlerTree : public EventHandlerStacknode_id, anchor.len, anchor); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); if(C4_UNLIKELY(_has_any_(VALREF))) _RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos); _RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&')); @@ -464,6 +469,7 @@ struct EventHandlerTree : public EventHandlerStacknode_id, ref.len, ref); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); if(C4_UNLIKELY(_has_any_(KEYANCH))) _RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos); _RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*')); @@ -474,6 +480,7 @@ struct EventHandlerTree : public EventHandlerStacknode_id, ref.len, ref); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); if(C4_UNLIKELY(_has_any_(VALANCH))) _RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos); _RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*')); @@ -541,6 +548,7 @@ struct EventHandlerTree : public EventHandlerStackarena(); substr out = m_tree->alloc_arena(len); substr curr = m_tree->arena(); @@ -551,6 +559,7 @@ struct EventHandlerTree : public EventHandlerStackarena(); if(!prev.is_super(*relocated)) return alloc_arena(len); @@ -568,12 +577,13 @@ struct EventHandlerTree : public EventHandlerStacktype(node); #ifdef RYML_DBG char flagbuf[80]; - #endif _c4dbgpf("resetting state: initial flags={}", detail::_parser_flags_to_str(flagbuf, st->flags)); + #endif if(type == NOTYPE) { _c4dbgpf("node[{}] is notype", node); @@ -612,7 +622,9 @@ struct EventHandlerTree : public EventHandlerStackflags |= RDOC; } + #ifdef RYML_DBG _c4dbgpf("resetting state: final flags={}", detail::_parser_flags_to_str(flagbuf, st->flags)); + #endif } /** push a new parent, add a child to the new parent, and set the @@ -697,6 +709,7 @@ struct EventHandlerTree : public EventHandlerStackcallbacks(), m_tree->size() > 0); const id_type last_added = m_tree->size() - 1; if(m_tree->has_parent(last_added)) @@ -706,6 +719,7 @@ struct EventHandlerTree : public EventHandlerStackcallbacks(), m_tree->size() > 0); const id_type last_added = m_tree->size() - 1; _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_parent(last_added)); @@ -718,6 +732,7 @@ struct EventHandlerTree : public EventHandlerStackcallbacks(), m_tree->_p(m_curr->node_id)->m_val.scalar.len == 0); m_tree->_p(m_curr->node_id)->m_val.scalar.str = m_curr->line_contents.rem.str; } diff --git a/src/c4/yml/parse.cpp b/src/c4/yml/parse.cpp index 144a6c4a..6651fa9f 100644 --- a/src/c4/yml/parse.cpp +++ b/src/c4/yml/parse.cpp @@ -28,7 +28,7 @@ inline void _reset_tree_handler(Parser *parser, Tree *t, id_type node_id) RYML_ASSERT(parser); RYML_ASSERT(t); if(!parser->m_evt_handler) - _RYML_CB_ERR(parser->m_evt_handler->m_stack.m_callbacks, "event handler is not set"); + _RYML_CB_ERR(t->m_callbacks, "event handler is not set"); parser->m_evt_handler->reset(t, node_id); RYML_ASSERT(parser->m_evt_handler->m_tree == t); } diff --git a/src/c4/yml/parse_engine.def.hpp b/src/c4/yml/parse_engine.def.hpp index c9929711..f400e10a 100644 --- a/src/c4/yml/parse_engine.def.hpp +++ b/src/c4/yml/parse_engine.def.hpp @@ -86,14 +86,33 @@ inline bool _is_doc_end_token(csubstr s) && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); } -inline bool _is_doc_token(csubstr s) +inline bool _is_doc_token(csubstr s) noexcept { + // + // NOTE: this function was failing under some scenarios when + // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely + // related to optimizer assumptions on the input string and + // possibly caused from UB around assignment to that string (the + // call site was in _scan_block()). For more details see: + // + // https://github.com/biojppm/rapidyaml/issues/440 + // + // The current version does not suffer this problem, but it may + // appear again. + // if(s.len >= 3) { - if(s.str[0] == '-') - return _is_doc_begin_token(s); - else if(s.str[0] == '.') - return _is_doc_end_token(s); + switch(s.str[0]) + { + case '-': + //return _is_doc_begin_token(s); // this was failing with gcc -O2 + return (s.str[1] == '-' && s.str[2] == '-') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); + case '.': + //return _is_doc_end_token(s); // this was failing with gcc -O2 + return (s.str[1] == '.' && s.str[2] == '.') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); + } } return false; } @@ -2026,7 +2045,7 @@ void ParseEngine::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t digits = t.left_of(t.first_not_of("0123456789")); if( ! digits.empty()) { - if(digits.len > 1) + if(C4_UNLIKELY(digits.len > 1)) _c4err("parse error: invalid indentation"); _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits); if(C4_UNLIKELY( ! c4::atou(digits, &indentation))) @@ -2067,8 +2086,9 @@ void ParseEngine::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t // evaluate termination conditions if(indentation != npos) { + _c4dbgpf("blck: indentation={}", indentation); // stop when the line is deindented and not empty - if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty())) + if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty())) { if(raw_block.len) { @@ -2082,6 +2102,7 @@ void ParseEngine::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t } else if(indentation == 0) { + _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem); if(_is_doc_token(lc.rem)) { _c4dbgp("blck: stop. indentation=0 and doc ended"); @@ -3222,7 +3243,7 @@ void ParseEngine::_filter_block_folded_newlines(FilterProcessor &C // In the end, moving this block to a separate function // was the only way to bury the problem. But it may // resurface again, as The Undead, rising to from the - // grave to haunt us with his terrible + // grave to haunt us with his terrible presence. // // We may have to revisit this. With a stake, and lots of // garlic. diff --git a/src/c4/yml/parser_state.hpp b/src/c4/yml/parser_state.hpp index b1bcfdd6..1977cbf5 100644 --- a/src/c4/yml/parser_state.hpp +++ b/src/c4/yml/parser_state.hpp @@ -58,30 +58,27 @@ struct LineContents void reset_with_next_line(substr buf, size_t offset) { RYML_ASSERT(offset <= buf.len); - char const* C4_RESTRICT b = &buf[offset]; - char const* C4_RESTRICT e = b; + size_t e = offset; // get the current line stripped of newline chars - while(e < buf.end() && (*e != '\n' && *e != '\r')) + while(e < buf.len && (buf.str[e] != '\n' && buf.str[e] != '\r')) ++e; - RYML_ASSERT(e >= b); - const substr stripped_ = buf.sub(offset, static_cast(e - b)); + RYML_ASSERT(e >= offset); + const substr stripped_ = buf.range(offset, e); // advance pos to include the first line ending - if(e != buf.end() && *e == '\r') + if(e < buf.len && buf.str[e] == '\r') ++e; - if(e != buf.end() && *e == '\n') + if(e < buf.len && buf.str[e] == '\n') ++e; - RYML_ASSERT(e >= b); - const substr full_ = buf.sub(offset, static_cast(e - b)); + const substr full_ = buf.range(offset, e); reset(full_, stripped_); } void reset(substr full_, substr stripped_) { + rem = stripped_; + indentation = stripped_.first_not_of(' '); // find the first column where the character is not a space full = full_; stripped = stripped_; - rem = stripped_; - // find the first column where the character is not a space - indentation = stripped.first_not_of(' '); } C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT diff --git a/src/c4/yml/reference_resolver.cpp b/src/c4/yml/reference_resolver.cpp index a4ddfba8..392699a0 100644 --- a/src/c4/yml/reference_resolver.cpp +++ b/src/c4/yml/reference_resolver.cpp @@ -208,16 +208,10 @@ void ReferenceResolver::resolve(Tree *t_) _c4dbgpf("ref {} has parent: {}", i, refdata.parent_ref); _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref)); const id_type p = m_tree->parent(refdata.parent_ref); - id_type after; - if(prev_parent_ref != refdata.parent_ref) - { - after = refdata.parent_ref;//prev_sibling(rd.parent_ref_sibling); - prev_parent_ref_after = after; - } - else - { - after = prev_parent_ref_after; - } + const id_type after = (prev_parent_ref != refdata.parent_ref) ? + refdata.parent_ref//prev_sibling(rd.parent_ref_sibling) + : + prev_parent_ref_after; prev_parent_ref = refdata.parent_ref; prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after); m_tree->remove(refdata.node); diff --git a/test/test_noderef.cpp b/test/test_noderef.cpp index 5ecffb10..da919b3e 100644 --- a/test/test_noderef.cpp +++ b/test/test_noderef.cpp @@ -1118,14 +1118,12 @@ TEST(NodeRef, vsConstNodeRef) // mseq = seq; // deliberate compilation error seq = mseq; // ok { - NodeData *nd = mseq.get(); - // nd = seq.get(); // deliberate compile error - C4_UNUSED(nd); + (void)mseq.get(); + //(void)seq.get(); // deliberate compile error } { - NodeData const* nd = seq.get(); - nd = seq.get(); // ok - C4_UNUSED(nd); + (void)seq.get(); + (void)seq.get(); // ok } test_invariants(t); } diff --git a/test/test_serialize.cpp b/test/test_serialize.cpp index 8492e06f..fe2dfdcd 100644 --- a/test/test_serialize.cpp +++ b/test/test_serialize.cpp @@ -565,11 +565,11 @@ void test_deserialize_trailing_434(csubstr yaml, csubstr val, csubstr first, dou ConstNodeRef cnode = tree["val"]; NodeRef node = tree["val"]; { - int value; + int value = {}; EXPECT_FALSE(read(cnode, &value)); } { - int value; + int value = {}; EXPECT_FALSE(read(node, &value)); } ExpectError::do_check(&tree, [&]{ @@ -582,22 +582,22 @@ void test_deserialize_trailing_434(csubstr yaml, csubstr val, csubstr first, dou }); float fval = (float)dval; { - float value; + float value = {}; EXPECT_TRUE(read(cnode, &value)); EXPECT_EQ(memcmp(&value, &fval, sizeof(fval)), 0); } { - float value; + float value = {}; EXPECT_TRUE(read(node, &value)); EXPECT_EQ(memcmp(&value, &fval, sizeof(fval)), 0); } { - double value; + double value = {}; EXPECT_TRUE(read(cnode, &value)); EXPECT_EQ(memcmp(&value, &dval, sizeof(dval)), 0); } { - double value; + double value = {}; EXPECT_TRUE(read(node, &value)); EXPECT_EQ(memcmp(&value, &dval, sizeof(dval)), 0); } diff --git a/test/test_tree.cpp b/test/test_tree.cpp index 5e75cf3c..7ebb23d5 100644 --- a/test/test_tree.cpp +++ b/test/test_tree.cpp @@ -719,9 +719,8 @@ TEST(Tree, move_ctor) test_compare(save, src); { Tree dst(std::move(src)); - EXPECT_EQ(src.empty(), true); - EXPECT_EQ(src.size(), 0u); - EXPECT_EQ(src.arena().empty(), true); + EXPECT_EQ(src.m_size, 0u); + EXPECT_EQ(src.m_arena_pos, 0u); EXPECT_EQ(dst.size(), save.size()); EXPECT_EQ(dst.arena(), save.arena()); test_invariants(src); @@ -789,10 +788,9 @@ TEST(Tree, move_assign_same_callbacks) EXPECT_NE(dst.size(), 0u); EXPECT_NE(dst.arena().empty(), true); dst = std::move(src); - EXPECT_EQ(src.empty(), true); - EXPECT_EQ(src.size(), 0u); - EXPECT_EQ(src.arena().empty(), true); - EXPECT_EQ(src.callbacks(), cbt.callbacks()); + EXPECT_EQ(src.m_size, 0u); + EXPECT_EQ(src.m_arena_pos, 0u); + EXPECT_EQ(src.m_callbacks, cbt.callbacks()); EXPECT_EQ(dst.size(), save.size()); EXPECT_EQ(dst.arena(), save.arena()); EXPECT_EQ(dst.callbacks(), save.callbacks()); @@ -820,10 +818,9 @@ TEST(Tree, move_assign_diff_callbacks) EXPECT_NE(dst.arena().empty(), true); EXPECT_EQ(dst.callbacks(), cbdst.callbacks()); dst = std::move(src); - EXPECT_EQ(src.empty(), true); - EXPECT_EQ(src.size(), 0u); - EXPECT_EQ(src.arena().empty(), true); - EXPECT_EQ(src.callbacks(), cbsrc.callbacks()); + EXPECT_EQ(src.m_size, 0u); + EXPECT_EQ(src.m_arena_pos, 0u); + EXPECT_EQ(src.m_callbacks, cbsrc.callbacks()); EXPECT_EQ(dst.size(), save.size()); EXPECT_EQ(dst.arena(), save.arena()); EXPECT_NE(dst.callbacks(), cbdst.callbacks()); @@ -3714,16 +3711,16 @@ TEST(Tree, lookup_path_or_modify) EXPECT_EQ(t["newmap2"]["newseq2"][2]["newmap2"]["newseq2"][2].val(), nullptr); EXPECT_EQ(t["newmap2"]["newseq2"][2]["newmap2"]["newseq2"][3].is_map(), true); EXPECT_EQ(t["newmap2"]["newseq2"][2]["newmap2"]["newseq2"][3]["first2"].val(), "y"); - sz2 = t.lookup_path_or_modify("z", "newmap2.newseq2[2].newmap2.newseq2[3].second2"); + (void)t.lookup_path_or_modify("z", "newmap2.newseq2[2].newmap2.newseq2[3].second2"); EXPECT_EQ (t["newmap2"]["newseq2"][2]["newmap2"]["newseq2"][3]["second2"].val(), "z"); - sz = t.lookup_path_or_modify("foo", "newmap.newseq1[1]"); + (void)t.lookup_path_or_modify("foo", "newmap.newseq1[1]"); EXPECT_EQ(t["newmap"].is_map(), true); EXPECT_EQ(t["newmap"]["newseq1"].is_seq(), true); EXPECT_EQ(t["newmap"]["newseq1"].num_children(), 2u); EXPECT_EQ(t["newmap"]["newseq1"][0].val(), nullptr); EXPECT_EQ(t["newmap"]["newseq1"][1].val(), "foo"); - sz = t.lookup_path_or_modify("bar", "newmap.newseq1[2][1]"); + (void)t.lookup_path_or_modify("bar", "newmap.newseq1[2][1]"); EXPECT_EQ(t["newmap"]["newseq1"].is_seq(), true); EXPECT_EQ(t["newmap"]["newseq1"].num_children(), 3u); EXPECT_EQ(t["newmap"]["newseq1"][0].val(), nullptr); @@ -3732,12 +3729,12 @@ TEST(Tree, lookup_path_or_modify) EXPECT_EQ(t["newmap"]["newseq1"][2].num_children(), 2u); EXPECT_EQ(t["newmap"]["newseq1"][2][0].val(), nullptr); EXPECT_EQ(t["newmap"]["newseq1"][2][1].val(), "bar"); - sz = t.lookup_path_or_modify("Foo?" , "newmap.newseq1[0]"); - sz = t.lookup_path_or_modify("Bar?" , "newmap.newseq1[2][0]"); - sz = t.lookup_path_or_modify("happy" , "newmap.newseq1[2][2][3]"); - sz = t.lookup_path_or_modify("trigger", "newmap.newseq1[2][2][2]"); - sz = t.lookup_path_or_modify("Arnold" , "newmap.newseq1[2][2][0]"); - sz = t.lookup_path_or_modify("is" , "newmap.newseq1[2][2][1]"); + (void)t.lookup_path_or_modify("Foo?" , "newmap.newseq1[0]"); + (void)t.lookup_path_or_modify("Bar?" , "newmap.newseq1[2][0]"); + (void)t.lookup_path_or_modify("happy" , "newmap.newseq1[2][2][3]"); + (void)t.lookup_path_or_modify("trigger", "newmap.newseq1[2][2][2]"); + (void)t.lookup_path_or_modify("Arnold" , "newmap.newseq1[2][2][0]"); + (void)t.lookup_path_or_modify("is" , "newmap.newseq1[2][2][1]"); EXPECT_EQ(t["newmap"]["newseq1"].is_seq(), true); EXPECT_EQ(t["newmap"]["newseq1"].num_children(), 3u); EXPECT_EQ(t["newmap"]["newseq1"][0].val(), "Foo?");