diff --git a/diffkemp/cli.py b/diffkemp/cli.py index 50c188734..977507d2a 100644 --- a/diffkemp/cli.py +++ b/diffkemp/cli.py @@ -152,7 +152,8 @@ def make_argument_parser(): "type-casts", "control-flow-only", "inverse-conditions", - "reordered-bin-ops"] + "reordered-bin-ops", + "group-vars"] # Semantic patterns options. compare_ap.add_argument("--enable-pattern", diff --git a/diffkemp/config.py b/diffkemp/config.py index c26f2751b..cfa2862d7 100644 --- a/diffkemp/config.py +++ b/diffkemp/config.py @@ -23,6 +23,7 @@ def __init__( control_flow_only=False, inverse_conditions=True, reordered_bin_ops=True, + group_vars=True, ): """ Create a configuration of built-in patterns. @@ -41,6 +42,7 @@ def __init__( :param control_flow_only: Consider control-flow changes only. :param inverse_conditions: Inverted branch conditions. :param reordered_bin_ops: Match reordered binary operations. + :param group_vars: Grouping of local variables. """ self.settings = { "struct-alignment": struct_alignment, @@ -54,6 +56,7 @@ def __init__( "control-flow-only": control_flow_only, "inverse-conditions": inverse_conditions, "reordered-bin-ops": reordered_bin_ops, + "group-vars": group_vars, } self.resolve_dependencies() @@ -101,6 +104,7 @@ def as_ffi_struct(self): ffi_struct.ControlFlowOnly = self.settings["control-flow-only"] ffi_struct.InverseConditions = self.settings["inverse-conditions"] ffi_struct.ReorderedBinOps = self.settings["reordered-bin-ops"] + ffi_struct.GroupVars = self.settings["group-vars"] return ffi_struct diff --git a/diffkemp/simpll/Config.h b/diffkemp/simpll/Config.h index 34ccbb72e..2376aabfb 100644 --- a/diffkemp/simpll/Config.h +++ b/diffkemp/simpll/Config.h @@ -38,6 +38,7 @@ struct BuiltinPatterns { bool ControlFlowOnly = false; bool InverseConditions = true; bool ReorderedBinOps = true; + bool GroupVars = true; }; /// Tool configuration parsed from CLI options. diff --git a/diffkemp/simpll/DifferentialFunctionComparator.cpp b/diffkemp/simpll/DifferentialFunctionComparator.cpp index 78feef8a6..a8bad23b8 100644 --- a/diffkemp/simpll/DifferentialFunctionComparator.cpp +++ b/diffkemp/simpll/DifferentialFunctionComparator.cpp @@ -516,6 +516,15 @@ bool DifferentialFunctionComparator::maySkipInstruction( skippedInstructions.insert(Inst); return true; } + if (config.Patterns.GroupVars && Inst->isSafeToRemove() + && allUsersAreExtraMemInsts(Inst)) { + // If this is a safe instruction (not a store, call, or a terminator), + // it can be ignored if its users are extra memory instructions, i.e., + // stores to or loads from ignored local variables. This is useful for + // skipping GEPs that return pointers to such variables. + ignoredInstructions.insert(Inst); + return true; + } if (config.Patterns.ReorderedBinOps && isReorderableBinaryOp(Inst) && maySkipReorderableBinaryOp(Inst)) { ignoredInstructions.insert(Inst); @@ -532,6 +541,9 @@ bool DifferentialFunctionComparator::maySkipInstruction( replacedInstructions.insert({Inst, Inst->getOperand(0)}); return true; } + if (auto Store = dyn_cast(Inst)) { + return maySkipStore(Store); + } if (auto Load = dyn_cast(Inst)) { return maySkipLoad(Load); } @@ -599,32 +611,34 @@ bool DifferentialFunctionComparator::maySkipCast(const User *Cast) const { return false; } -/// Check whether the given instruction is a repetitive variant of -/// a previous load with no store instructions in between. -/// Load replacements are stored inside the replaced instructions map. +/// Check whether the given load may be skipped, i.e., it is either +/// a repetitive variant of a previous load, or a load from an ignored +/// local variable. Load replacements are stored inside the replaced +/// instructions map. bool DifferentialFunctionComparator::maySkipLoad(const LoadInst *Load) const { auto BB = Load->getParent(); if (!BB) { return false; } - // Check if all backward paths in the CFG graph lead to a single load from - // the same pointer as the one we want to skip. There must be no stores - // or function calls on any of the paths. + // Check if all backward CFG paths starting in the load lead to either: + // - a single load from the same pointer, + // - a single ignored store to the same pointer. + // There must be no conflicting stores or calls on any of the paths. bool first = true; - const LoadInst *PreviousLoad = nullptr; + const Value *Replacement = nullptr; std::deque blockQueue = {BB}; std::unordered_set visitedBlocks = {BB}; while (!blockQueue.empty()) { BB = blockQueue.front(); blockQueue.pop_front(); - // Look for the load instruction in the current block. If it's not - // here, we will have to check that all predecessors have it. + // Look for a suitable load or store in the current block. If it's not + // here, we will have to check all predecessors. bool checkPredecessors = true; for (auto it = BB->rbegin(); it != BB->rend(); ++it) { - // Skip all instructions before the compared load - // when in the first block. + // Skip all instructions before the compared load when in the first + // block (note that we are iterating in reverse). if (first) { if (&*it == Load) { first = false; @@ -632,22 +646,32 @@ bool DifferentialFunctionComparator::maySkipLoad(const LoadInst *Load) const { continue; } - if (auto Candidate = dyn_cast(&*it)) { - // Found a candidate load; check whether the pointer matches. + const Value *ReplacementCandidate = nullptr; + if (auto PreviousLoad = dyn_cast(&*it)) { if (Load->getPointerOperand() - == Candidate->getPointerOperand()) { - if (PreviousLoad) { - // If another suitable load has been found previously - // in a different CFG path, fail. We need a single one. - return false; - } - // Otherwise remember this load. - PreviousLoad = Candidate; - checkPredecessors = false; - break; + == PreviousLoad->getPointerOperand()) { + // Found an identical load, save the candidate. + ReplacementCandidate = PreviousLoad; + } + } else if (auto Store = dyn_cast(&*it)) { + if (ignoredInstructions.count(Store) + && Store->getPointerOperand() + == Load->getPointerOperand()) { + // Found a previously ignored store, save the candidate. + ReplacementCandidate = Store->getValueOperand(); + } + } + if (ReplacementCandidate) { + if (Replacement) { + // If another suitable replacement has been found previously + // in a different CFG path, fail. We need a single one. + return false; } - } else if (isa(&*it) - || (isa(&*it) && !isDebugInfo(*it))) { + // Otherwise remember this replacement. + Replacement = ReplacementCandidate; + checkPredecessors = false; + break; + } else if (mayStoreTo(&*it, Load->getPointerOperand())) { // Fail if a possibly conflicting store or call is found. return false; } @@ -656,7 +680,9 @@ bool DifferentialFunctionComparator::maySkipLoad(const LoadInst *Load) const { if (checkPredecessors) { auto BBPredecessors = predecessors(BB); if (BBPredecessors.begin() == BBPredecessors.end()) { - // Fail if no more predecessors are available. + // Fail if no more predecessors are available. This means that + // we've reached the top of the function, and we haven't found + // a suitable replacement, so we cannot skip the load. return false; } // Queue up all unvisited predecessors. @@ -668,10 +694,41 @@ bool DifferentialFunctionComparator::maySkipLoad(const LoadInst *Load) const { } } } - // If the load is repeated without stores in between, skip it because - // the load is redundant and its removal will cause no semantic difference. - if (PreviousLoad) { - replacedInstructions.insert({Load, PreviousLoad}); + // Successfully found a replacement, + if (Replacement) { + replacedInstructions.insert({Load, Replacement}); + return true; + } + return false; +} + +/// Check whether the given store can be skipped because it stores to +/// an ignored local variable and the stored value will be compared later. +bool DifferentialFunctionComparator::maySkipStore( + const StoreInst *Store) const { + if (!config.Patterns.GroupVars) { + return false; + } + // We can skip a store if it stores to a skipped/ignored local variable. + // This is allowed because any load from the same variable will + // also fail, and will have to be skipped and replaced with the + // stored value. As a result, the corresponding alloca cannot be + // directly compared as an operand from this point onward, because + // its contents may differ from now. Hence, we move it from + // skippedInstructions to ignoredInstructions. + auto Alloca = getAllocaOp(Store); + if (!Alloca) { + return false; + } + auto SkippedIt = skippedInstructions.find(Alloca); + if (SkippedIt != skippedInstructions.end()) { + skippedInstructions.erase(SkippedIt); + ignoredInstructions.insert(Alloca); + ignoredInstructions.insert(Store); + return true; + } + if (ignoredInstructions.count(Alloca)) { + ignoredInstructions.insert(Store); return true; } return false; @@ -2054,3 +2111,28 @@ bool DifferentialFunctionComparator::checkInverseCondUsers( } return true; } + +/// Check whether the pointer operand of the given instruction (e.g., load) +/// points into local memory allocated by an ignored alloca. +template +bool DifferentialFunctionComparator::hasIgnoredAllocaOp( + const InstType *Inst) const { + if (auto Alloca = getAllocaOp(Inst)) + return ignoredInstructions.count(Alloca) + || skippedInstructions.count(Alloca); + return false; +} + +/// Check whether all uses of the provided value are extra memory instructions, +/// i.e. loads from or stores to ignored memory. +bool DifferentialFunctionComparator::allUsersAreExtraMemInsts( + const Instruction *Inst) const { + return std::all_of( + Inst->user_begin(), Inst->user_end(), [&](const User *user) { + if (auto store = dyn_cast(user)) + return hasIgnoredAllocaOp(store); + if (auto load = dyn_cast(user)) + return hasIgnoredAllocaOp(load); + return false; + }); +} diff --git a/diffkemp/simpll/DifferentialFunctionComparator.h b/diffkemp/simpll/DifferentialFunctionComparator.h index 56db0e261..187e68457 100644 --- a/diffkemp/simpll/DifferentialFunctionComparator.h +++ b/diffkemp/simpll/DifferentialFunctionComparator.h @@ -157,8 +157,8 @@ class DifferentialFunctionComparator : public FunctionComparator { /// be synchronized later if a matching instruction is found. mutable std::unordered_set skippedInstructions; - /// Set of irreversibly ignored instructions. Matching these could - /// result in false negatives. + /// Set of irreversibly ignored instructions. These should never be directly + /// compared to other instructions. mutable std::unordered_set ignoredInstructions; /// Contains pairs of values mapped by synchronisation maps. Enables @@ -239,11 +239,16 @@ class DifferentialFunctionComparator : public FunctionComparator { /// replacements inside the replaced instructions map. bool maySkipCast(const User *Cast) const; - /// Check whether the given instruction is a repetitive variant of - /// a previous load with no store instructions in between. - /// Load replacements are stored inside the replaced instructions map. + /// Check whether the given load may be skipped, i.e., it is either + /// a repetitive variant of a previous load, or a load from an ignored + /// local variable. Load replacements are stored inside the replaced + /// instructions map. bool maySkipLoad(const LoadInst *Load) const; + /// Check whether the given store can be skipped because it stores to + /// an ignored local variable and the stored value will be compared later. + bool maySkipStore(const StoreInst *Store) const; + /// Check whether the given reorderable binary operator can be skipped. /// It can only be skipped if all its users are binary operations /// of the same kind. @@ -300,6 +305,15 @@ class DifferentialFunctionComparator : public FunctionComparator { /// Return true if the users are acceptable for inverse conditions pattern, /// false otherwise. bool checkInverseCondUsers(const Instruction *inst) const; + + /// Check whether all uses of the provided instruction are extra memory + /// instructions, i.e. loads from or stores to ignored memory. + bool allUsersAreExtraMemInsts(const Instruction *Inst) const; + + /// Check whether the pointer operand of the given instruction (e.g., load) + /// is a previously ignored alloca or a GEP pointing to one. + template + bool hasIgnoredAllocaOp(const InstType *Inst) const; }; #endif // DIFFKEMP_SIMPLL_DIFFERENTIALFUNCTIONCOMPARATOR_H diff --git a/diffkemp/simpll/SimpLL.cpp b/diffkemp/simpll/SimpLL.cpp index fbccc6cb8..f087dfa3d 100644 --- a/diffkemp/simpll/SimpLL.cpp +++ b/diffkemp/simpll/SimpLL.cpp @@ -115,6 +115,9 @@ cl::opt ReorderedBinOpsOpt( "reordered-bin-ops", cl::desc("Enable reordered binary operations pattern."), cl::cat(BuiltinPatternsCategory)); +cl::opt GroupVarsOpt("group-vars", + cl::desc("Enable variable grouping pattern."), + cl::cat(BuiltinPatternsCategory)); /// Add suffix to the file name. /// \param File Original file name. @@ -162,7 +165,8 @@ int main(int argc, const char **argv) { .TypeCasts = TypeCastsOpt, .ControlFlowOnly = ControlFlowOnlyOpt, .InverseConditions = InverseConditionsOpt, - .ReorderedBinOps = ReorderedBinOpsOpt}; + .ReorderedBinOps = ReorderedBinOpsOpt, + .GroupVars = GroupVarsOpt}; // Parse --fun option auto FunName = parseFunOption(); diff --git a/diffkemp/simpll/Utils.cpp b/diffkemp/simpll/Utils.cpp index 17596b1b1..843b32dd8 100644 --- a/diffkemp/simpll/Utils.cpp +++ b/diffkemp/simpll/Utils.cpp @@ -14,6 +14,7 @@ #include "Utils.h" #include "Config.h" #include "CustomPatternSet.h" +#include "DebugInfo.h" #include #include #include @@ -874,3 +875,67 @@ StructType *getTypeByName(const Module &Mod, StringRef Name) { return Mod.getTypeByName(Name); #endif } + +/// Given an instruction and a pointer value, try to determine whether the +/// instruction may store to the memory pointed to by the pointer. This can +/// happen only if the instruction is a store or a function call. +bool mayStoreTo(const Instruction *Inst, const Value *Ptr) { + if (auto Store = dyn_cast(Inst)) { + // If the instruction is a store, we check whether its pointer operand + // may alias the given pointer. + return mayAlias(Store->getPointerOperand(), Ptr); + } + if (auto Call = dyn_cast(Inst)) { + // If the instruction is a call, we preventively return true + // unless it is a debug call. + return !isDebugInfo(*Call); + } + return false; +} + +/// Given two pointer values, try do determine whether they may alias. The +/// function currently supports only simple aliasing of local memory. +bool mayAlias(const Value *PtrL, const Value *PtrR) { + auto AllocaL = getAllocaFromPtr(PtrL); + auto AllocaR = getAllocaFromPtr(PtrR); + if (!AllocaL || !AllocaR) + // If any of the two pointers does not directly point to local memory, + // do not continue; more advanced alias analysis would be necessary. + return true; + if (AllocaL != AllocaR) + // If the underlying allocas are different, the pointers cannot alias. + return false; + auto GEPL = dyn_cast(PtrL); + auto GEPR = dyn_cast(PtrR); + if (!GEPL || !GEPR) + // At this point, we know that the pointers point to the same alloca. + // If any of the pointers is the alloca itself, the pointers alias. + return true; + auto GEPLIdxIt = GEPL->idx_begin(), GEPLIdxE = GEPL->idx_end(); + auto GEPRIdxIt = GEPR->idx_begin(), GEPRIdxE = GEPR->idx_end(); + // Go through the indices of both GEPs in parallel. If they diverge at some + // point, the pointers do not alias. + while (GEPLIdxIt != GEPLIdxE && GEPRIdxIt != GEPRIdxE) { + auto ConstantIdxL = dyn_cast(GEPLIdxIt->get()); + auto ConstantIdxR = dyn_cast(GEPRIdxIt->get()); + if (ConstantIdxL && ConstantIdxR + && ConstantIdxL->getSExtValue() != ConstantIdxR->getSExtValue()) + return false; + GEPLIdxIt++; + GEPRIdxIt++; + } + return true; +} + +/// Given a pointer value, return the instruction which allocated the memory +/// where the pointer points. Return a null pointer if an alloca is not found, +/// e.g. because the pointer is a function parameter. +const AllocaInst *getAllocaFromPtr(const Value *Ptr) { + if (auto Alloca = dyn_cast(Ptr)) { + return Alloca; + } + if (auto GEP = dyn_cast(Ptr)) { + return getAllocaFromPtr(GEP->getPointerOperand()); + } + return nullptr; +} diff --git a/diffkemp/simpll/Utils.h b/diffkemp/simpll/Utils.h index a2e298486..0123f0116 100644 --- a/diffkemp/simpll/Utils.h +++ b/diffkemp/simpll/Utils.h @@ -203,4 +203,26 @@ StructType *getTypeByName(const Module &Mod, StringRef Name); /// Retrieve information about a structured type being pointed to by a value TypeInfo getPointeeStructTypeInfo(const Value *Val, const DataLayout *Layout); +/// Given an instruction and a pointer value, try to determine whether the +/// instruction may store to the memory pointed to by the pointer. This is +/// currently implemented only as a set of heuristics - if they do not suffice, +/// we return true. +bool mayStoreTo(const Instruction *Inst, const Value *Ptr); + +/// Given two pointer values, try do determine whether they may alias. The +/// function currently supports only simple aliasing of local memory. +bool mayAlias(const Value *PtrL, const Value *PtrR); + +/// Given a pointer value, return the instruction which allocated the memory +/// where the pointer points. Return a null pointer if the alloca is not found. +const AllocaInst *getAllocaFromPtr(const Value *Ptr); + +/// Return the alloca instruction that was used to allocate the variable into +/// which the pointer operand of the given instruction (e.g., load) points. +/// If such an alloca cannot be found, return nullptr. +template +const AllocaInst *getAllocaOp(const InstType *Inst) { + return getAllocaFromPtr(Inst->getPointerOperand()); +} + #endif // DIFFKEMP_SIMPLL_UTILS_H diff --git a/diffkemp/simpll/library/FFI.cpp b/diffkemp/simpll/library/FFI.cpp index dc154b142..5e75d3026 100644 --- a/diffkemp/simpll/library/FFI.cpp +++ b/diffkemp/simpll/library/FFI.cpp @@ -73,6 +73,7 @@ BuiltinPatterns BuiltinPatternsFromC(builtin_patterns PatternsC) { .ControlFlowOnly = (bool)PatternsC.ControlFlowOnly, .InverseConditions = (bool)PatternsC.InverseConditions, .ReorderedBinOps = (bool)PatternsC.ReorderedBinOps, + .GroupVars = (bool)PatternsC.GroupVars, }; } diff --git a/diffkemp/simpll/library/FFI.h b/diffkemp/simpll/library/FFI.h index 98caa3114..1a8adaa8d 100644 --- a/diffkemp/simpll/library/FFI.h +++ b/diffkemp/simpll/library/FFI.h @@ -35,6 +35,7 @@ struct builtin_patterns { int ControlFlowOnly; int InverseConditions; int ReorderedBinOps; + int GroupVars; }; struct config { diff --git a/tests/unit_tests/simpll/DifferentialFunctionComparatorTest.cpp b/tests/unit_tests/simpll/DifferentialFunctionComparatorTest.cpp index eb23d633c..54bac8be8 100644 --- a/tests/unit_tests/simpll/DifferentialFunctionComparatorTest.cpp +++ b/tests/unit_tests/simpll/DifferentialFunctionComparatorTest.cpp @@ -2347,3 +2347,69 @@ TEST_F(DifferentialFunctionComparatorTest, SkipRepetitiveLoad) { ASSERT_EQ(DiffComp->compare(), 0); } + +TEST_F(DifferentialFunctionComparatorTest, ReorganizedLocalVariables) { + // Left function: + // %1 = add i8 1, 2 + // %2 = add i8 %1, %1 + // ret i8 %2 + // + // Right function: + // %1 = alloca %struct ; %struct.struct = type { i8, i8 } + // %2 = add i8 1, 2 + // %3 = getelementptr inbounds ptr, ptr %1, i32 0 + // %4 = getelementptr inbounds ptr, ptr %1, i32 1 + // store i8 %2, ptr %3 + // store i8 %2, ptr %4 + // %5 = load i8, ptr %3 + // %6 = load i8, ptr %4 + // %7 = add i8 %5, %6 + // ret i8 %7 + + BasicBlock *BBL = BasicBlock::Create(CtxL, "", FL); + BasicBlock *BBR = BasicBlock::Create(CtxR, "", FR); + + auto STyR = + StructType::create({Type::getInt8Ty(CtxR), Type::getInt8Ty(CtxR)}); + STyR->setName("struct"); + auto AllocaR = new AllocaInst(STyR, 0, "", BBR); + + Constant *ConstL1 = ConstantInt::get(Type::getInt8Ty(CtxL), 1); + Constant *ConstL2 = ConstantInt::get(Type::getInt8Ty(CtxL), 2); + Constant *ConstR1 = ConstantInt::get(Type::getInt8Ty(CtxR), 1); + Constant *ConstR2 = ConstantInt::get(Type::getInt8Ty(CtxR), 2); + + auto AddL = BinaryOperator::Create( + BinaryOperator::Add, ConstL1, ConstL2, "", BBL); + auto AddR = BinaryOperator::Create( + BinaryOperator::Add, ConstR1, ConstR2, "", BBR); + + auto GEPR1 = GetElementPtrInst::Create( + STyR, + AllocaR, + {ConstantInt::get(Type::getInt32Ty(CtxR), 0)}, + "", + BBR); + auto GEPR2 = GetElementPtrInst::Create( + STyR, + AllocaR, + {ConstantInt::get(Type::getInt32Ty(CtxR), 1)}, + "", + BBR); + + new StoreInst(AddR, GEPR1, BBR); + new StoreInst(AddR, GEPR2, BBR); + + auto LoadR1 = new LoadInst(Type::getInt8Ty(CtxR), GEPR1, "", BBR); + auto LoadR2 = new LoadInst(Type::getInt8Ty(CtxR), GEPR2, "", BBR); + + auto ResL = + BinaryOperator::Create(BinaryOperator::Add, AddL, AddL, "", BBL); + auto ResR = BinaryOperator::Create( + BinaryOperator::Add, LoadR1, LoadR2, "", BBR); + + ReturnInst::Create(CtxL, ResL, BBL); + ReturnInst::Create(CtxR, ResR, BBR); + + ASSERT_EQ(DiffComp->compare(), 0); +}