Skip to content

Commit

Permalink
Add a new built-in pattern: group-vars
Browse files Browse the repository at this point in the history
The pattern can detect simple instances of
local variable refactoring, such as grouping
a set of variables into a structure.

It works by skipping stores to and loads from
ignored allocas and matching the stored values
later when necessary.
  • Loading branch information
zacikpa authored and viktormalik committed Dec 13, 2023
1 parent fdd25dc commit 395902b
Show file tree
Hide file tree
Showing 11 changed files with 298 additions and 37 deletions.
3 changes: 2 additions & 1 deletion diffkemp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ def make_argument_parser():
"type-casts",
"control-flow-only",
"inverse-conditions",
"reordered-bin-ops"]
"reordered-bin-ops",
"group-vars"]

# Semantic patterns options.
compare_ap.add_argument("--enable-pattern",
Expand Down
4 changes: 4 additions & 0 deletions diffkemp/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(
control_flow_only=False,
inverse_conditions=True,
reordered_bin_ops=True,
group_vars=True,
):
"""
Create a configuration of built-in patterns.
Expand All @@ -41,6 +42,7 @@ def __init__(
:param control_flow_only: Consider control-flow changes only.
:param inverse_conditions: Inverted branch conditions.
:param reordered_bin_ops: Match reordered binary operations.
:param group_vars: Grouping of local variables.
"""
self.settings = {
"struct-alignment": struct_alignment,
Expand All @@ -54,6 +56,7 @@ def __init__(
"control-flow-only": control_flow_only,
"inverse-conditions": inverse_conditions,
"reordered-bin-ops": reordered_bin_ops,
"group-vars": group_vars,
}
self.resolve_dependencies()

Expand Down Expand Up @@ -101,6 +104,7 @@ def as_ffi_struct(self):
ffi_struct.ControlFlowOnly = self.settings["control-flow-only"]
ffi_struct.InverseConditions = self.settings["inverse-conditions"]
ffi_struct.ReorderedBinOps = self.settings["reordered-bin-ops"]
ffi_struct.GroupVars = self.settings["group-vars"]
return ffi_struct


Expand Down
1 change: 1 addition & 0 deletions diffkemp/simpll/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct BuiltinPatterns {
bool ControlFlowOnly = false;
bool InverseConditions = true;
bool ReorderedBinOps = true;
bool GroupVars = true;
};

/// Tool configuration parsed from CLI options.
Expand Down
142 changes: 112 additions & 30 deletions diffkemp/simpll/DifferentialFunctionComparator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,15 @@ bool DifferentialFunctionComparator::maySkipInstruction(
skippedInstructions.insert(Inst);
return true;
}
if (config.Patterns.GroupVars && Inst->isSafeToRemove()
&& allUsersAreExtraMemInsts(Inst)) {
// If this is a safe instruction (not a store, call, or a terminator),
// it can be ignored if its users are extra memory instructions, i.e.,
// stores to or loads from ignored local variables. This is useful for
// skipping GEPs that return pointers to such variables.
ignoredInstructions.insert(Inst);
return true;
}
if (config.Patterns.ReorderedBinOps && isReorderableBinaryOp(Inst)
&& maySkipReorderableBinaryOp(Inst)) {
ignoredInstructions.insert(Inst);
Expand All @@ -532,6 +541,9 @@ bool DifferentialFunctionComparator::maySkipInstruction(
replacedInstructions.insert({Inst, Inst->getOperand(0)});
return true;
}
if (auto Store = dyn_cast<StoreInst>(Inst)) {
return maySkipStore(Store);
}
if (auto Load = dyn_cast<LoadInst>(Inst)) {
return maySkipLoad(Load);
}
Expand Down Expand Up @@ -599,55 +611,67 @@ bool DifferentialFunctionComparator::maySkipCast(const User *Cast) const {
return false;
}

/// Check whether the given instruction is a repetitive variant of
/// a previous load with no store instructions in between.
/// Load replacements are stored inside the replaced instructions map.
/// Check whether the given load may be skipped, i.e., it is either
/// a repetitive variant of a previous load, or a load from an ignored
/// local variable. Load replacements are stored inside the replaced
/// instructions map.
bool DifferentialFunctionComparator::maySkipLoad(const LoadInst *Load) const {
auto BB = Load->getParent();
if (!BB) {
return false;
}

// Check if all backward paths in the CFG graph lead to a single load from
// the same pointer as the one we want to skip. There must be no stores
// or function calls on any of the paths.
// Check if all backward CFG paths starting in the load lead to either:
// - a single load from the same pointer,
// - a single ignored store to the same pointer.
// There must be no conflicting stores or calls on any of the paths.
bool first = true;
const LoadInst *PreviousLoad = nullptr;
const Value *Replacement = nullptr;
std::deque<const BasicBlock *> blockQueue = {BB};
std::unordered_set<const BasicBlock *> visitedBlocks = {BB};
while (!blockQueue.empty()) {
BB = blockQueue.front();
blockQueue.pop_front();

// Look for the load instruction in the current block. If it's not
// here, we will have to check that all predecessors have it.
// Look for a suitable load or store in the current block. If it's not
// here, we will have to check all predecessors.
bool checkPredecessors = true;
for (auto it = BB->rbegin(); it != BB->rend(); ++it) {
// Skip all instructions before the compared load
// when in the first block.
// Skip all instructions before the compared load when in the first
// block (note that we are iterating in reverse).
if (first) {
if (&*it == Load) {
first = false;
}
continue;
}

if (auto Candidate = dyn_cast<LoadInst>(&*it)) {
// Found a candidate load; check whether the pointer matches.
const Value *ReplacementCandidate = nullptr;
if (auto PreviousLoad = dyn_cast<LoadInst>(&*it)) {
if (Load->getPointerOperand()
== Candidate->getPointerOperand()) {
if (PreviousLoad) {
// If another suitable load has been found previously
// in a different CFG path, fail. We need a single one.
return false;
}
// Otherwise remember this load.
PreviousLoad = Candidate;
checkPredecessors = false;
break;
== PreviousLoad->getPointerOperand()) {
// Found an identical load, save the candidate.
ReplacementCandidate = PreviousLoad;
}
} else if (auto Store = dyn_cast<StoreInst>(&*it)) {
if (ignoredInstructions.count(Store)
&& Store->getPointerOperand()
== Load->getPointerOperand()) {
// Found a previously ignored store, save the candidate.
ReplacementCandidate = Store->getValueOperand();
}
}
if (ReplacementCandidate) {
if (Replacement) {
// If another suitable replacement has been found previously
// in a different CFG path, fail. We need a single one.
return false;
}
} else if (isa<StoreInst>(&*it)
|| (isa<CallInst>(&*it) && !isDebugInfo(*it))) {
// Otherwise remember this replacement.
Replacement = ReplacementCandidate;
checkPredecessors = false;
break;
} else if (mayStoreTo(&*it, Load->getPointerOperand())) {
// Fail if a possibly conflicting store or call is found.
return false;
}
Expand All @@ -656,7 +680,9 @@ bool DifferentialFunctionComparator::maySkipLoad(const LoadInst *Load) const {
if (checkPredecessors) {
auto BBPredecessors = predecessors(BB);
if (BBPredecessors.begin() == BBPredecessors.end()) {
// Fail if no more predecessors are available.
// Fail if no more predecessors are available. This means that
// we've reached the top of the function, and we haven't found
// a suitable replacement, so we cannot skip the load.
return false;
}
// Queue up all unvisited predecessors.
Expand All @@ -668,10 +694,41 @@ bool DifferentialFunctionComparator::maySkipLoad(const LoadInst *Load) const {
}
}
}
// If the load is repeated without stores in between, skip it because
// the load is redundant and its removal will cause no semantic difference.
if (PreviousLoad) {
replacedInstructions.insert({Load, PreviousLoad});
// Successfully found a replacement,
if (Replacement) {
replacedInstructions.insert({Load, Replacement});
return true;
}
return false;
}

/// Check whether the given store can be skipped because it stores to
/// an ignored local variable and the stored value will be compared later.
bool DifferentialFunctionComparator::maySkipStore(
const StoreInst *Store) const {
if (!config.Patterns.GroupVars) {
return false;
}
// We can skip a store if it stores to a skipped/ignored local variable.
// This is allowed because any load from the same variable will
// also fail, and will have to be skipped and replaced with the
// stored value. As a result, the corresponding alloca cannot be
// directly compared as an operand from this point onward, because
// its contents may differ from now. Hence, we move it from
// skippedInstructions to ignoredInstructions.
auto Alloca = getAllocaOp(Store);
if (!Alloca) {
return false;
}
auto SkippedIt = skippedInstructions.find(Alloca);
if (SkippedIt != skippedInstructions.end()) {
skippedInstructions.erase(SkippedIt);
ignoredInstructions.insert(Alloca);
ignoredInstructions.insert(Store);
return true;
}
if (ignoredInstructions.count(Alloca)) {
ignoredInstructions.insert(Store);
return true;
}
return false;
Expand Down Expand Up @@ -2054,3 +2111,28 @@ bool DifferentialFunctionComparator::checkInverseCondUsers(
}
return true;
}

/// Check whether the pointer operand of the given instruction (e.g., load)
/// points into local memory allocated by an ignored alloca.
template <typename InstType>
bool DifferentialFunctionComparator::hasIgnoredAllocaOp(
const InstType *Inst) const {
if (auto Alloca = getAllocaOp(Inst))
return ignoredInstructions.count(Alloca)
|| skippedInstructions.count(Alloca);
return false;
}

/// Check whether all uses of the provided value are extra memory instructions,
/// i.e. loads from or stores to ignored memory.
bool DifferentialFunctionComparator::allUsersAreExtraMemInsts(
const Instruction *Inst) const {
return std::all_of(
Inst->user_begin(), Inst->user_end(), [&](const User *user) {
if (auto store = dyn_cast<StoreInst>(user))
return hasIgnoredAllocaOp(store);
if (auto load = dyn_cast<LoadInst>(user))
return hasIgnoredAllocaOp(load);
return false;
});
}
24 changes: 19 additions & 5 deletions diffkemp/simpll/DifferentialFunctionComparator.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ class DifferentialFunctionComparator : public FunctionComparator {
/// be synchronized later if a matching instruction is found.
mutable std::unordered_set<const Value *> skippedInstructions;

/// Set of irreversibly ignored instructions. Matching these could
/// result in false negatives.
/// Set of irreversibly ignored instructions. These should never be directly
/// compared to other instructions.
mutable std::unordered_set<const Value *> ignoredInstructions;

/// Contains pairs of values mapped by synchronisation maps. Enables
Expand Down Expand Up @@ -239,11 +239,16 @@ class DifferentialFunctionComparator : public FunctionComparator {
/// replacements inside the replaced instructions map.
bool maySkipCast(const User *Cast) const;

/// Check whether the given instruction is a repetitive variant of
/// a previous load with no store instructions in between.
/// Load replacements are stored inside the replaced instructions map.
/// Check whether the given load may be skipped, i.e., it is either
/// a repetitive variant of a previous load, or a load from an ignored
/// local variable. Load replacements are stored inside the replaced
/// instructions map.
bool maySkipLoad(const LoadInst *Load) const;

/// Check whether the given store can be skipped because it stores to
/// an ignored local variable and the stored value will be compared later.
bool maySkipStore(const StoreInst *Store) const;

/// Check whether the given reorderable binary operator can be skipped.
/// It can only be skipped if all its users are binary operations
/// of the same kind.
Expand Down Expand Up @@ -300,6 +305,15 @@ class DifferentialFunctionComparator : public FunctionComparator {
/// Return true if the users are acceptable for inverse conditions pattern,
/// false otherwise.
bool checkInverseCondUsers(const Instruction *inst) const;

/// Check whether all uses of the provided instruction are extra memory
/// instructions, i.e. loads from or stores to ignored memory.
bool allUsersAreExtraMemInsts(const Instruction *Inst) const;

/// Check whether the pointer operand of the given instruction (e.g., load)
/// is a previously ignored alloca or a GEP pointing to one.
template <typename InstType>
bool hasIgnoredAllocaOp(const InstType *Inst) const;
};

#endif // DIFFKEMP_SIMPLL_DIFFERENTIALFUNCTIONCOMPARATOR_H
6 changes: 5 additions & 1 deletion diffkemp/simpll/SimpLL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ cl::opt<bool> ReorderedBinOpsOpt(
"reordered-bin-ops",
cl::desc("Enable reordered binary operations pattern."),
cl::cat(BuiltinPatternsCategory));
cl::opt<bool> GroupVarsOpt("group-vars",
cl::desc("Enable variable grouping pattern."),
cl::cat(BuiltinPatternsCategory));

/// Add suffix to the file name.
/// \param File Original file name.
Expand Down Expand Up @@ -162,7 +165,8 @@ int main(int argc, const char **argv) {
.TypeCasts = TypeCastsOpt,
.ControlFlowOnly = ControlFlowOnlyOpt,
.InverseConditions = InverseConditionsOpt,
.ReorderedBinOps = ReorderedBinOpsOpt};
.ReorderedBinOps = ReorderedBinOpsOpt,
.GroupVars = GroupVarsOpt};

// Parse --fun option
auto FunName = parseFunOption();
Expand Down
65 changes: 65 additions & 0 deletions diffkemp/simpll/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "Utils.h"
#include "Config.h"
#include "CustomPatternSet.h"
#include "DebugInfo.h"
#include <algorithm>
#include <iostream>
#include <llvm/BinaryFormat/Dwarf.h>
Expand Down Expand Up @@ -874,3 +875,67 @@ StructType *getTypeByName(const Module &Mod, StringRef Name) {
return Mod.getTypeByName(Name);
#endif
}

/// Given an instruction and a pointer value, try to determine whether the
/// instruction may store to the memory pointed to by the pointer. This can
/// happen only if the instruction is a store or a function call.
bool mayStoreTo(const Instruction *Inst, const Value *Ptr) {
if (auto Store = dyn_cast<StoreInst>(Inst)) {
// If the instruction is a store, we check whether its pointer operand
// may alias the given pointer.
return mayAlias(Store->getPointerOperand(), Ptr);
}
if (auto Call = dyn_cast<CallInst>(Inst)) {
// If the instruction is a call, we preventively return true
// unless it is a debug call.
return !isDebugInfo(*Call);
}
return false;
}

/// Given two pointer values, try do determine whether they may alias. The
/// function currently supports only simple aliasing of local memory.
bool mayAlias(const Value *PtrL, const Value *PtrR) {
auto AllocaL = getAllocaFromPtr(PtrL);
auto AllocaR = getAllocaFromPtr(PtrR);
if (!AllocaL || !AllocaR)
// If any of the two pointers does not directly point to local memory,
// do not continue; more advanced alias analysis would be necessary.
return true;
if (AllocaL != AllocaR)
// If the underlying allocas are different, the pointers cannot alias.
return false;
auto GEPL = dyn_cast<GetElementPtrInst>(PtrL);
auto GEPR = dyn_cast<GetElementPtrInst>(PtrR);
if (!GEPL || !GEPR)
// At this point, we know that the pointers point to the same alloca.
// If any of the pointers is the alloca itself, the pointers alias.
return true;
auto GEPLIdxIt = GEPL->idx_begin(), GEPLIdxE = GEPL->idx_end();
auto GEPRIdxIt = GEPR->idx_begin(), GEPRIdxE = GEPR->idx_end();
// Go through the indices of both GEPs in parallel. If they diverge at some
// point, the pointers do not alias.
while (GEPLIdxIt != GEPLIdxE && GEPRIdxIt != GEPRIdxE) {
auto ConstantIdxL = dyn_cast<ConstantInt>(GEPLIdxIt->get());
auto ConstantIdxR = dyn_cast<ConstantInt>(GEPRIdxIt->get());
if (ConstantIdxL && ConstantIdxR
&& ConstantIdxL->getSExtValue() != ConstantIdxR->getSExtValue())
return false;
GEPLIdxIt++;
GEPRIdxIt++;
}
return true;
}

/// Given a pointer value, return the instruction which allocated the memory
/// where the pointer points. Return a null pointer if an alloca is not found,
/// e.g. because the pointer is a function parameter.
const AllocaInst *getAllocaFromPtr(const Value *Ptr) {
if (auto Alloca = dyn_cast<AllocaInst>(Ptr)) {
return Alloca;
}
if (auto GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
return getAllocaFromPtr(GEP->getPointerOperand());
}
return nullptr;
}
Loading

0 comments on commit 395902b

Please sign in to comment.