Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix test failures with gcc -O2 #448

Merged
merged 4 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,39 @@ jobs:
- {name: shared32-run, run: source .github/setenv.sh && c4_run_test shared32}
- {name: shared32-pack, run: source .github/setenv.sh && c4_package shared32}

#----------------------------------------------------------------------------
gcc_O2: # see https://github.com/biojppm/rapidyaml/issues/440
name: gcc_O2/${{matrix.cxx}}/c++${{matrix.std}}
continue-on-error: true
if: always() # https://stackoverflow.com/questions/62045967/github-actions-is-there-a-way-to-continue-on-error-while-still-getting-correct
runs-on: ubuntu-latest
container: ghcr.io/biojppm/c4core/ubuntu22.04:latest # use the docker image
strategy:
fail-fast: false
matrix:
include:
- {std: 11, gcc: 12 , bt: Release}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.gcc}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
- {name: checkout, uses: actions/checkout@v3, with: {submodules: recursive}}
- run: git config --system --add safe.directory '*' # needed for running in the docker image. see https://github.com/actions/checkout/issues/1169
- run: c4core-install g++-${{matrix.gcc}}
- {name: show info, run: source .github/setenv.sh && c4_show_info}
- name: configure
run: |
cmake -S . -B build_o2 \
-DCMAKE_CXX_COMPILER=g++-${{matrix.gcc}} \
-DCMAKE_C_COMPILER=gcc-${{matrix.gcc}} \
-DCMAKE_CXX_FLAGS_RELEASE:STRING="-O2 -g -DNDEBUG" \
-DRYML_BUILD_TESTS:BOOL=ON \
-DRYML_DBG:BOOL=OFF
- name: build
run: |
cmake --build build_o2 --target ryml-test-build -j --verbose
- name: run
run: |
cmake --build build_o2 --target ryml-test-run

#----------------------------------------------------------------------------
gcc_tabtokens:
name: gcc_canary/${{matrix.cxx}}/c++${{matrix.std}}/${{matrix.bt}}
Expand Down
10 changes: 10 additions & 0 deletions changelog/current.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
## Fixes

- Fix [#440](https://github.com/biojppm/rapidyaml/issues/440): some tests failing with gcc -O2 (hypothetically due to undefined behavior)
- The fix was accomplished by refactoring some internal parser functions; see the comments in [#440](https://github.com/biojppm/rapidyaml/issues/440) for further details.
- Also, fix all warnings from `scan-build`.
- Use malloc.h instead of alloca.h on MinGW ([PR#447](https://github.com/biojppm/rapidyaml/pull/447))
- Fix [#442](https://github.com/biojppm/rapidyaml/issues/442) ([PR#443](https://github.com/biojppm/rapidyaml/pull/443)):
- Ensure leading `+` is accepted when deserializing numbers.
- Ensure numbers are not quoted by fixing the heuristics in `scalar_style_query_plain()` and `scalar_style_choose()`.
- Add quickstart sample for overflow detection (only of integral types).
- Parse engine: cleanup unused macros


## Thanks

- @toge
- @musicinmybrain
17 changes: 16 additions & 1 deletion src/c4/yml/event_handler_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,13 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

void start_parse(const char* filename, detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr);
this->_stack_start_parse(filename, relocate_arena, relocate_arena_data);
}

void finish_parse()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr);
if(m_num_directives && !m_tree->is_stream(m_tree->root_id()))
_RYML_CB_ERR_(m_stack.m_callbacks, "directives cannot be used without a document", {});
this->_stack_finish_parse();
Expand Down Expand Up @@ -307,6 +309,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

void add_sibling()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_parent);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_children(m_parent->node_id));
NodeData const* prev = m_tree->m_buf; // watchout against relocation of the tree nodes
Expand Down Expand Up @@ -445,6 +448,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void set_key_anchor(csubstr anchor)
{
_c4dbgpf("node[{}]: set key anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
if(C4_UNLIKELY(_has_any_(KEYREF)))
_RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos);
_RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&'));
Expand All @@ -454,6 +458,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void set_val_anchor(csubstr anchor)
{
_c4dbgpf("node[{}]: set val anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
if(C4_UNLIKELY(_has_any_(VALREF)))
_RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos);
_RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&'));
Expand All @@ -464,6 +469,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void set_key_ref(csubstr ref)
{
_c4dbgpf("node[{}]: set key ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
if(C4_UNLIKELY(_has_any_(KEYANCH)))
_RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos);
_RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*'));
Expand All @@ -474,6 +480,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void set_val_ref(csubstr ref)
{
_c4dbgpf("node[{}]: set val ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
if(C4_UNLIKELY(_has_any_(VALANCH)))
_RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos);
_RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*'));
Expand Down Expand Up @@ -541,6 +548,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

substr alloc_arena(size_t len)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
csubstr prev = m_tree->arena();
substr out = m_tree->alloc_arena(len);
substr curr = m_tree->arena();
Expand All @@ -551,6 +559,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

substr alloc_arena(size_t len, substr *relocated)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
csubstr prev = m_tree->arena();
if(!prev.is_super(*relocated))
return alloc_arena(len);
Expand All @@ -568,12 +577,13 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
/** @cond dev */
void _reset_parser_state(state* st, id_type parse_root, id_type node)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_set_state_(st, node);
const NodeType type = m_tree->type(node);
#ifdef RYML_DBG
char flagbuf[80];
#endif
_c4dbgpf("resetting state: initial flags={}", detail::_parser_flags_to_str(flagbuf, st->flags));
#endif
if(type == NOTYPE)
{
_c4dbgpf("node[{}] is notype", node);
Expand Down Expand Up @@ -612,7 +622,9 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
_c4dbgpf("node[{}] is doc", node);
st->flags |= RDOC;
}
#ifdef RYML_DBG
_c4dbgpf("resetting state: final flags={}", detail::_parser_flags_to_str(flagbuf, st->flags));
#endif
}

/** push a new parent, add a child to the new parent, and set the
Expand Down Expand Up @@ -697,6 +709,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void _remove_speculative()
{
_c4dbgp("remove speculative node");
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0);
const id_type last_added = m_tree->size() - 1;
if(m_tree->has_parent(last_added))
Expand All @@ -706,6 +719,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

void _remove_speculative_with_parent()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0);
const id_type last_added = m_tree->size() - 1;
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_parent(last_added));
Expand All @@ -718,6 +732,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

C4_ALWAYS_INLINE void _save_loc()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->_p(m_curr->node_id)->m_val.scalar.len == 0);
m_tree->_p(m_curr->node_id)->m_val.scalar.str = m_curr->line_contents.rem.str;
}
Expand Down
2 changes: 1 addition & 1 deletion src/c4/yml/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
RYML_ASSERT(parser);
RYML_ASSERT(t);
if(!parser->m_evt_handler)
_RYML_CB_ERR(parser->m_evt_handler->m_stack.m_callbacks, "event handler is not set");
_RYML_CB_ERR(t->m_callbacks, "event handler is not set");

Check warning on line 31 in src/c4/yml/parse.cpp

View check run for this annotation

Codecov / codecov/patch

src/c4/yml/parse.cpp#L31

Added line #L31 was not covered by tests
parser->m_evt_handler->reset(t, node_id);
RYML_ASSERT(parser->m_evt_handler->m_tree == t);
}
Expand Down
37 changes: 29 additions & 8 deletions src/c4/yml/parse_engine.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,33 @@ inline bool _is_doc_end_token(csubstr s)
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}

inline bool _is_doc_token(csubstr s)
inline bool _is_doc_token(csubstr s) noexcept
{
//
// NOTE: this function was failing under some scenarios when
// compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
// related to optimizer assumptions on the input string and
// possibly caused from UB around assignment to that string (the
// call site was in _scan_block()). For more details see:
//
// https://github.com/biojppm/rapidyaml/issues/440
//
// The current version does not suffer this problem, but it may
// appear again.
//
if(s.len >= 3)
{
if(s.str[0] == '-')
return _is_doc_begin_token(s);
else if(s.str[0] == '.')
return _is_doc_end_token(s);
switch(s.str[0])
{
case '-':
//return _is_doc_begin_token(s); // this was failing with gcc -O2
return (s.str[1] == '-' && s.str[2] == '-')
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
case '.':
//return _is_doc_end_token(s); // this was failing with gcc -O2
return (s.str[1] == '.' && s.str[2] == '.')
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
}
}
return false;
}
Expand Down Expand Up @@ -2026,7 +2045,7 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
digits = t.left_of(t.first_not_of("0123456789"));
if( ! digits.empty())
{
if(digits.len > 1)
if(C4_UNLIKELY(digits.len > 1))
_c4err("parse error: invalid indentation");
_c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
Expand Down Expand Up @@ -2067,8 +2086,9 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
// evaluate termination conditions
if(indentation != npos)
{
_c4dbgpf("blck: indentation={}", indentation);
// stop when the line is deindented and not empty
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty()))
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
{
if(raw_block.len)
{
Expand All @@ -2082,6 +2102,7 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
}
else if(indentation == 0)
{
_c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
if(_is_doc_token(lc.rem))
{
_c4dbgp("blck: stop. indentation=0 and doc ended");
Expand Down Expand Up @@ -3222,7 +3243,7 @@ void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C
// In the end, moving this block to a separate function
// was the only way to bury the problem. But it may
// resurface again, as The Undead, rising to from the
// grave to haunt us with his terrible
// grave to haunt us with his terrible presence.
//
// We may have to revisit this. With a stake, and lots of
// garlic.
Expand Down
21 changes: 9 additions & 12 deletions src/c4/yml/parser_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,30 +58,27 @@ struct LineContents
void reset_with_next_line(substr buf, size_t offset)
{
RYML_ASSERT(offset <= buf.len);
char const* C4_RESTRICT b = &buf[offset];
char const* C4_RESTRICT e = b;
size_t e = offset;
// get the current line stripped of newline chars
while(e < buf.end() && (*e != '\n' && *e != '\r'))
while(e < buf.len && (buf.str[e] != '\n' && buf.str[e] != '\r'))
++e;
RYML_ASSERT(e >= b);
const substr stripped_ = buf.sub(offset, static_cast<size_t>(e - b));
RYML_ASSERT(e >= offset);
const substr stripped_ = buf.range(offset, e);
// advance pos to include the first line ending
if(e != buf.end() && *e == '\r')
if(e < buf.len && buf.str[e] == '\r')
++e;
if(e != buf.end() && *e == '\n')
if(e < buf.len && buf.str[e] == '\n')
++e;
RYML_ASSERT(e >= b);
const substr full_ = buf.sub(offset, static_cast<size_t>(e - b));
const substr full_ = buf.range(offset, e);
reset(full_, stripped_);
}

void reset(substr full_, substr stripped_)
{
rem = stripped_;
indentation = stripped_.first_not_of(' '); // find the first column where the character is not a space
full = full_;
stripped = stripped_;
rem = stripped_;
// find the first column where the character is not a space
indentation = stripped.first_not_of(' ');
}

C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
Expand Down
14 changes: 4 additions & 10 deletions src/c4/yml/reference_resolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,16 +208,10 @@ void ReferenceResolver::resolve(Tree *t_)
_c4dbgpf("ref {} has parent: {}", i, refdata.parent_ref);
_RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref));
const id_type p = m_tree->parent(refdata.parent_ref);
id_type after;
if(prev_parent_ref != refdata.parent_ref)
{
after = refdata.parent_ref;//prev_sibling(rd.parent_ref_sibling);
prev_parent_ref_after = after;
}
else
{
after = prev_parent_ref_after;
}
const id_type after = (prev_parent_ref != refdata.parent_ref) ?
refdata.parent_ref//prev_sibling(rd.parent_ref_sibling)
:
prev_parent_ref_after;
prev_parent_ref = refdata.parent_ref;
prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after);
m_tree->remove(refdata.node);
Expand Down
10 changes: 4 additions & 6 deletions test/test_noderef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1118,14 +1118,12 @@ TEST(NodeRef, vsConstNodeRef)
// mseq = seq; // deliberate compilation error
seq = mseq; // ok
{
NodeData *nd = mseq.get();
// nd = seq.get(); // deliberate compile error
C4_UNUSED(nd);
(void)mseq.get();
//(void)seq.get(); // deliberate compile error
}
{
NodeData const* nd = seq.get();
nd = seq.get(); // ok
C4_UNUSED(nd);
(void)seq.get();
(void)seq.get(); // ok
}
test_invariants(t);
}
Expand Down
12 changes: 6 additions & 6 deletions test/test_serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,11 +565,11 @@ void test_deserialize_trailing_434(csubstr yaml, csubstr val, csubstr first, dou
ConstNodeRef cnode = tree["val"];
NodeRef node = tree["val"];
{
int value;
int value = {};
EXPECT_FALSE(read(cnode, &value));
}
{
int value;
int value = {};
EXPECT_FALSE(read(node, &value));
}
ExpectError::do_check(&tree, [&]{
Expand All @@ -582,22 +582,22 @@ void test_deserialize_trailing_434(csubstr yaml, csubstr val, csubstr first, dou
});
float fval = (float)dval;
{
float value;
float value = {};
EXPECT_TRUE(read(cnode, &value));
EXPECT_EQ(memcmp(&value, &fval, sizeof(fval)), 0);
}
{
float value;
float value = {};
EXPECT_TRUE(read(node, &value));
EXPECT_EQ(memcmp(&value, &fval, sizeof(fval)), 0);
}
{
double value;
double value = {};
EXPECT_TRUE(read(cnode, &value));
EXPECT_EQ(memcmp(&value, &dval, sizeof(dval)), 0);
}
{
double value;
double value = {};
EXPECT_TRUE(read(node, &value));
EXPECT_EQ(memcmp(&value, &dval, sizeof(dval)), 0);
}
Expand Down
Loading
Loading