From e0c80ad871147330f990fa953e95dcdf7b0aa5e3 Mon Sep 17 00:00:00 2001 From: Matthieu Eyraud Date: Wed, 16 Aug 2023 16:27:03 +0200 Subject: [PATCH] C++ instrumentation: fix for ranges support The instrumentation scheme for C++ for ranges resulted in incorrect code when initializer lists were involved, e.g.: ``` for (auto i : {1, 2) { } ``` being instrumented as ``` for (auto i : (witness(), {1, 2}) { } ``` which is invalid C++ code. To fix this, we now instrument such expressions as followed: ``` witness(); for (auto i : {1, 2}) { } ``` We can't execute the loop statement without executing the witness with a goto as it is forbidden to bypass initialization statements in C++. When there is an initialization statement, e.g.: ``` for (int j = 0; auto i : {1, 2}){} ``` we actually introduce an outer scope and instrument as followed: ``` { witness_j(); int j = 0; witness_i(); for (auto i : {1, 2}){} } ``` (cherry picked from commit c29e1be8bf3ab223654eda810b4a50d97f9b2ddb) --- .../src/test_for_range.cpp | 18 +++ .../ForRange/WithInitializerRange/test.py | 13 ++ tools/gnatcov/clang-extensions.adb | 23 +++ tools/gnatcov/clang-extensions.ads | 6 + tools/gnatcov/clang-wrapper.cc | 15 +- tools/gnatcov/instrument-c.adb | 132 +++++++++++++++++- tools/gnatcov/instrument-c.ads | 12 ++ 7 files changed, 210 insertions(+), 9 deletions(-) create mode 100644 testsuite/tests/C++/stmt/ForRange/WithInitializerRange/src/test_for_range.cpp create mode 100644 testsuite/tests/C++/stmt/ForRange/WithInitializerRange/test.py diff --git a/testsuite/tests/C++/stmt/ForRange/WithInitializerRange/src/test_for_range.cpp b/testsuite/tests/C++/stmt/ForRange/WithInitializerRange/src/test_for_range.cpp new file mode 100644 index 000000000..736210b0c --- /dev/null +++ b/testsuite/tests/C++/stmt/ForRange/WithInitializerRange/src/test_for_range.cpp @@ -0,0 +1,18 @@ +#include + +int +main (void) +{ + int sum = 0; // # init + for (auto i : { 1, 2, 3 }) // # for-range + { + sum += i; // # for-body + } + return 0; +} + +//# test_for_range.cpp +// +// /init/ l+ ## 0 +// /for-range/ l+ ## 0 +// /for-body/ l+ ## 0 diff --git a/testsuite/tests/C++/stmt/ForRange/WithInitializerRange/test.py b/testsuite/tests/C++/stmt/ForRange/WithInitializerRange/test.py new file mode 100644 index 000000000..c34f08074 --- /dev/null +++ b/testsuite/tests/C++/stmt/ForRange/WithInitializerRange/test.py @@ -0,0 +1,13 @@ +""" +Test that gnatcov correctly instruments a for range with an initializer as the +range expression and an initialization statement, e.g. +``` +for (int i = 0; auto j : {1, 2}){} +``` +""" + +from SCOV.tc import TestCase +from SUITE.context import thistest + +TestCase().run() +thistest.result() diff --git a/tools/gnatcov/clang-extensions.adb b/tools/gnatcov/clang-extensions.adb index 2dae8f683..f29eeef17 100644 --- a/tools/gnatcov/clang-extensions.adb +++ b/tools/gnatcov/clang-extensions.adb @@ -164,6 +164,29 @@ package body Clang.Extensions is CX_Rewriter_Insert_Text_Before_Token_C (Rew, Loc, Insert & ASCII.NUL); end CX_Rewriter_Insert_Text_Before_Token; + ------------------------------------ + -- CX_Rewriter_Get_Rewritten_Text -- + ------------------------------------ + + function CX_Rewriter_Get_Rewritten_Text + (Rew : Rewriter_T; + R : Source_Range_T) return String + is + function CX_Rewriter_Get_Rewritten_Text + (Rew : Rewriter_T; + R : Source_Range_T) return String_T + with + Import, Convention => C, + External_Name => "clang_CXRewriter_getRewrittenText"; + + Rewritten_Text_C : constant String_T := + CX_Rewriter_Get_Rewritten_Text (Rew, R); + Rewritten_Text : constant String := Get_C_String (Rewritten_Text_C); + begin + Dispose_String (Rewritten_Text_C); + return Rewritten_Text; + end CX_Rewriter_Get_Rewritten_Text; + ----------------------- -- Spelling_Location -- ----------------------- diff --git a/tools/gnatcov/clang-extensions.ads b/tools/gnatcov/clang-extensions.ads index 2689b9fdd..b516cd84e 100644 --- a/tools/gnatcov/clang-extensions.ads +++ b/tools/gnatcov/clang-extensions.ads @@ -128,6 +128,12 @@ package Clang.Extensions is -- Insert the text Insert before the token at the given location, and after -- any previously inserted string (at the same location). + function CX_Rewriter_Get_Rewritten_Text + (Rew : Rewriter_T; + R : Source_Range_T) return String + with Inline; + -- Return the rewritten text for the given source range. + function Get_Cursor_TU (C : Cursor_T) return Translation_Unit_T with Import, Convention => C, diff --git a/tools/gnatcov/clang-wrapper.cc b/tools/gnatcov/clang-wrapper.cc index 4554dcc9c..945646bfa 100644 --- a/tools/gnatcov/clang-wrapper.cc +++ b/tools/gnatcov/clang-wrapper.cc @@ -705,10 +705,19 @@ clang_CXRewriter_insertTextBeforeToken (CXRewriter Rew, CXSourceLocation Loc, R.InsertTextAfter (Prv, Insert); } -/* Wrappers around source location analysis functions. */ +extern "C" CXString +clang_CXRewriter_getRewrittenText (CXRewriter Rew, CXSourceRange Rng) +{ + assert (Rew); + Rewriter &R = *reinterpret_cast (Rew); + SourceRange SR = translateCXSourceRange (Rng); + return createDup (R.getRewrittenText (SR).c_str ()); +} -extern "C" unsigned -clang_isMacroLocation (CXSourceLocation Loc) + /* Wrappers around source location analysis functions. */ + + extern "C" unsigned + clang_isMacroLocation (CXSourceLocation Loc) { const SourceLocation SLoc = translateSourceLocation (Loc); return SLoc.isMacroID () ? 1 : 0; diff --git a/tools/gnatcov/instrument-c.adb b/tools/gnatcov/instrument-c.adb index 525533b27..b848dcfef 100644 --- a/tools/gnatcov/instrument-c.adb +++ b/tools/gnatcov/instrument-c.adb @@ -40,7 +40,6 @@ with Coverage; use Coverage; with Coverage_Options; with Hex_Images; use Hex_Images; with Inputs; use Inputs; -with Instrument.C_Utils; use Instrument.C_Utils; with Outputs; use Outputs; with Paths; use Paths; with SCOs; @@ -221,6 +220,11 @@ package body Instrument.C is Loc : Source_Location_T; Text : String); + overriding procedure Register_CXX_For_Range + (Pass : Instrument_Pass_Kind; + UIC : in out C_Unit_Inst_Context'Class; + N : Cursor_T); + Record_PP_Info_Pass : aliased Record_PP_Info_Pass_Kind; Instrument_Pass : aliased Instrument_Pass_Kind; @@ -281,6 +285,54 @@ package body Instrument.C is function Insert_MCDC_State (UIC : in out C_Unit_Inst_Context'Class; Name : String) return String; + procedure Fix_CXX_For_Ranges (UIC : in out C_Unit_Inst_Context); + -- This procedure fixes C++ for ranges that were purposefully instrumented + -- wrongly. To instrument a C++ for range, we actually need to turn + -- + -- for (int i = 0; auto j : {1, 2}) {} + -- + -- into + -- + -- { + -- witness_i(); int i = 0; + -- witness_j(); + -- for (auto j : {1, 2}) {} + -- } + -- + -- We introduce an outer scope to leave the initializer declaration + -- lifetime unchanged, and to be able to easily instrument both the + -- initializer declaration and the range expression. + -- + -- Note that this is valid because you can't goto inside the loop + -- (and thus skip the execution of witness_j) as it is + -- forbidden to bypass declarations with initialization in C++, + -- which "auto j : {1, 2}" is. + -- + -- Though we can't do that all at once, as this changes the shape as the + -- AST. As we rely on the AST node location to instrument statements and + -- decisions, we would be instrumenting the decision at the wrong place, + -- as we would instrument the initialization statement by moving it. + -- + -- Thus, we do the normal instrumentation process, which will yield an + -- unvalid C++ sequence, that we can easily fix in this procedure, by + -- moving around the rewritten code. + -- + -- The for ranges at the entry of this procedure will have been + -- instrumented as followed (the added code is identified by <>): + -- + -- for ( int i = 0; auto j : {1, 2}) {}<}> + -- + -- Two things to note here: the witness_j is executed before "int i = 0;" + -- (which is wrong), and there is an unmatched closing brace. + -- + -- To fix this, we actually need to move both the added code, _and_ the + -- initializer statement before the , and insert an opening + -- brace before: + -- + -- <{> int i = 0; for (auto j : {1, 2}) {}<}> + -- + -- and now we have valid, and correctly instrumented code. + ---------------------------- -- Source level rewriting -- ---------------------------- @@ -1030,6 +1082,18 @@ package body Instrument.C is end if; end Insert_Text_After; + ---------------------------- + -- Register_CXX_For_Range -- + ---------------------------- + + overriding procedure Register_CXX_For_Range + (Pass : Instrument_Pass_Kind; + UIC : in out C_Unit_Inst_Context'Class; + N : Cursor_T) is + begin + UIC.Instrumented_CXX_For_Ranges.Append (N); + end Register_CXX_For_Range; + -------------------------- -- Instrument_Statement -- -------------------------- @@ -1306,6 +1370,43 @@ package body Instrument.C is return Name; end Insert_MCDC_State; + ------------------------ + -- Fix_CXX_For_Ranges -- + ------------------------ + + procedure Fix_CXX_For_Ranges (UIC : in out C_Unit_Inst_Context) is + begin + for N of UIC.Instrumented_CXX_For_Ranges loop + declare + Loc : constant Source_Location_T := Start_Sloc (N); + For_Init_Stmt : constant Cursor_T := Get_For_Init (N); + For_Init_Rng : constant Source_Range_T := + Get_Cursor_Extent (For_Init_Stmt); + begin + -- Get the rewritten text for the initializing statement and + -- move it before any rewritten text before the for statement. + + CX_Rewriter_Insert_Text_Before + (UIC.Rewriter, + Loc, + CX_Rewriter_Get_Rewritten_Text (UIC.Rewriter, For_Init_Rng)); + + -- Open the outer scope. It was closed during the instrumentation + -- process, so we do not need to take care of that. + + CX_Rewriter_Insert_Text_Before (UIC.Rewriter, Loc, "{"); + CX_Rewriter_Remove_Text (UIC.Rewriter, For_Init_Rng); + + -- for (; auto i : {1, 2}) is invalid C++ code so insert a dummy + -- initializing expression to avoid the null statement here, as + -- it is easier than trying to move around the comma. + + CX_Rewriter_Insert_Text_Before + (UIC.Rewriter, Start_Sloc (For_Init_Stmt), "true"); + end; + end loop; + end Fix_CXX_For_Ranges; + type SC_Entry is record N : Cursor_T; -- Original statement node, used to compute macro expansion information @@ -2134,14 +2235,28 @@ package body Instrument.C is -- the range declaration initialization expression. Like all -- statements, they can contain nested decisions. - Extend_Statement_Sequence - (For_Init_Stmt, ' ', Insertion_N => N); - Process_Decisions_Defer (For_Init_Stmt, 'X'); + if not Is_Null (For_Init_Stmt) then + + -- See Fix_CXX_For_Ranges for an explanation of the + -- below code. + + Extend_Statement_Sequence + (For_Init_Stmt, ' ', Insertion_N => For_Init_Stmt); + Process_Decisions_Defer (For_Init_Stmt, 'X'); + + -- Preemptively end the introduced outer scope as it is + -- easier done when traversing the AST. + + Append (Trailing_Braces, '}'); + UIC.Pass.Register_CXX_For_Range (UIC, N); + end if; + + -- Instrument the range as mentioned above Extend_Statement_Sequence (For_Range_Decl, ' ', - Insertion_N => For_Range_Decl, - Instr_Scheme => Instr_Expr); + Insertion_N => N, + Instr_Scheme => Instr_Stmt); Process_Decisions_Defer (For_Range_Decl, 'X'); Set_Statement_Entry; @@ -3315,6 +3430,11 @@ package body Instrument.C is end loop; end; + -- Once everything has been instrumented, fixup the for ranges. See the + -- documentation of Fix_CXX_For_Ranges. + + Fix_CXX_For_Ranges (UIC); + -- We import the extern declaration of symbols instead of including the -- header where they are defined. -- diff --git a/tools/gnatcov/instrument-c.ads b/tools/gnatcov/instrument-c.ads index 58253690f..2e6dbfcc7 100644 --- a/tools/gnatcov/instrument-c.ads +++ b/tools/gnatcov/instrument-c.ads @@ -32,6 +32,7 @@ with Clang.Rewrite; use Clang.Rewrite; with Diagnostics; use Diagnostics; with Files_Table; use Files_Table; +with Instrument.C_Utils; use Instrument.C_Utils; with Instrument.Common; use Instrument.Common; with Slocs; use Slocs; @@ -381,6 +382,11 @@ package Instrument.C is Disable_Instrumentation : Boolean := False; -- Set to True to deactivate instrumentation and prevent any code -- rewriting. + + Instrumented_CXX_For_Ranges : Cursor_Vectors.Vector; + -- List of instrumented for ranges. For an explanation of why we need + -- to store these, see the documentation of the Fix_CXX_For_Ranges + -- subprogram. end record; type C_Source_Rewriter is tagged limited private; @@ -472,6 +478,12 @@ private Msg : String; Kind : Report_Kind := Diagnostics.Warning) is null; + procedure Register_CXX_For_Range + (Pass : Pass_Kind; + UIC : in out C_Unit_Inst_Context'Class; + N : Cursor_T) is null; + -- See the documentation of Fix_CXX_For_Ranges + type C_Source_Rewriter is limited new Ada.Finalization.Limited_Controlled with record CIdx : Index_T;