Skip to content

Commit 1485125

Browse files
committed
added the ability for the analysis to split basic blocks on call instructions
this has the effect that any block that has a call, has exactly one call and that call is the last instruction. This may seem useless at first, but it allows for simpler implementations of other analyses. For example, to determine if a function is a no-return function, with this change we can simply look at all terminating blocks and if ALL of them end with either call to a no-return function, then it is also a no-return function. (There are of course other cases that will need to be handled, but you get the gist)
1 parent fd5ac05 commit 1485125

File tree

7 files changed

+120
-2
lines changed

7 files changed

+120
-2
lines changed

include/BasicBlock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ class EDB_EXPORT BasicBlock {
8181
[[nodiscard]] edb::address_t firstAddress() const;
8282
[[nodiscard]] edb::address_t lastAddress() const;
8383

84+
public:
85+
std::pair<BasicBlock, BasicBlock> splitBlock(const instruction_pointer &inst);
86+
8487
private:
8588
std::vector<instruction_pointer> instructions_;
8689
std::vector<std::pair<edb::address_t, edb::address_t>> references_;

include/Function.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ class EDB_EXPORT Function {
5353
[[nodiscard]] Type type() const;
5454
void setType(Type t);
5555

56+
void erase(const_iterator it);
57+
5658
public:
5759
[[nodiscard]] const_reference back() const;
5860
[[nodiscard]] const_reference front() const;

plugins/Analyzer/Analyzer.cpp

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,21 @@ void set_function_types(IAnalyzer::FunctionMap *results) {
117117
Q_ASSERT(results);
118118

119119
// give bonus if we have a symbol for the address
120-
std::for_each(results->begin(), results->end(), [](Function &function) {
120+
for (auto it = results->begin(); it != results->end(); ++it) {
121+
122+
Function &function = it.value();
123+
124+
if (function.empty()) {
125+
qDebug() << "HERE:" << it.key().toString();
126+
}
127+
128+
Q_ASSERT(!function.empty());
121129
if (is_thunk(function.entryAddress())) {
122130
function.setType(Function::Thunk);
123131
} else {
124132
function.setType(Function::Standard);
125133
}
126-
});
134+
}
127135
}
128136

129137
/**
@@ -431,6 +439,56 @@ void Analyzer::identHeader(Analyzer::RegionData *data) {
431439
Q_UNUSED(data)
432440
}
433441

442+
bool split_function(Function &func) {
443+
444+
for (auto bb_it = func.begin(); bb_it != func.end(); ++bb_it) {
445+
BasicBlock &bb = bb_it->second;
446+
447+
if (bb.size() <= 1) {
448+
continue;
449+
}
450+
451+
for (auto it = bb.begin(); it != bb.end(); ++it) {
452+
const std::shared_ptr<edb::Instruction> &insn = *it;
453+
454+
// if it's a call and not the last instruction of the BB
455+
// then split!
456+
if (is_call(*insn) && insn != bb.back()) {
457+
458+
auto newBlocks = bb.splitBlock(insn);
459+
func.erase(bb_it);
460+
461+
Q_ASSERT(!newBlocks.first.empty());
462+
Q_ASSERT(!newBlocks.second.empty());
463+
464+
func.insert(newBlocks.first);
465+
func.insert(newBlocks.second);
466+
467+
return true;
468+
}
469+
}
470+
}
471+
472+
return false;
473+
}
474+
475+
void Analyzer::splitBlocks(RegionData *data) {
476+
Q_ASSERT(data);
477+
478+
for (auto it = data->functions.begin(); it != data->functions.end(); ++it) {
479+
const edb::address_t function = it.key();
480+
Function &func = it.value();
481+
482+
while (split_function(func)) {
483+
continue;
484+
}
485+
}
486+
}
487+
488+
void Analyzer::computeNonReturning(Analyzer::RegionData *data) {
489+
Q_UNUSED(data);
490+
}
491+
434492
/**
435493
* @brief Analyzer::collectFunctions
436494
* @param data
@@ -693,6 +751,8 @@ void Analyzer::analyze(const std::shared_ptr<IRegion> &region) {
693751
{"attempting to add marked functions to the list...", [this, &region_data]() { bonusMarkedFunctions(&region_data); }},
694752
{"attempting to collect functions with fuzzy analysis...", [this, &region_data]() { collectFuzzyFunctions(&region_data); }},
695753
{"collecting basic blocks...", [this, &region_data]() { collectFunctions(&region_data); }},
754+
{"splitting basic blocks...", [this, &region_data]() { splitBlocks(&region_data); }},
755+
{"computing non-returning functions...", [this, &region_data]() { computeNonReturning(&region_data); }},
696756
};
697757

698758
const int total_steps = sizeof(analysis_steps) / sizeof(analysis_steps[0]);

plugins/Analyzer/Analyzer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ class Analyzer final : public QObject, public IAnalyzer, public IPlugin {
7777
void bonusMarkedFunctions(RegionData *data);
7878
void bonusSymbols(RegionData *data);
7979
void collectFunctions(RegionData *data);
80+
void computeNonReturning(RegionData *data);
81+
void splitBlocks(RegionData *data);
8082
void collectFuzzyFunctions(RegionData *data);
8183
void doAnalysis(const std::shared_ptr<IRegion> &region);
8284
void identHeader(Analyzer::RegionData *data);

plugins/FunctionFinder/DialogResults.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,17 @@ DialogResults::DialogResults(QWidget *parent, Qt::WindowFlags f)
135135
}
136136
}
137137
} else if (is_terminator(inst)) {
138+
} else {
139+
// if the bb's last address is another blocks first address
140+
// connect them because they run into each other
141+
142+
auto to = nodes.find(bb.lastAddress());
143+
if (to != nodes.end()) {
144+
auto from = nodes.find(bb.firstAddress());
145+
if (to != nodes.end() && from != nodes.end()) {
146+
new GraphEdge(from.value(), to.value(), Qt::blue);
147+
}
148+
}
138149
}
139150
}
140151
}

src/BasicBlock.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,36 @@ void BasicBlock::addReference(edb::address_t refsite, edb::address_t target) {
237237
std::vector<std::pair<edb::address_t, edb::address_t>> BasicBlock::references() const {
238238
return references_;
239239
}
240+
241+
/**
242+
* @brief BasicBlock::references
243+
* @return
244+
*/
245+
std::pair<BasicBlock, BasicBlock> BasicBlock::splitBlock(const instruction_pointer &inst) {
246+
BasicBlock block1;
247+
BasicBlock block2;
248+
249+
auto it = begin();
250+
for (; it != end(); ++it) {
251+
252+
block1.push_back(*it);
253+
if (*it == inst) {
254+
++it;
255+
break;
256+
}
257+
}
258+
259+
for (; it != end(); ++it) {
260+
block2.push_back(*it);
261+
}
262+
263+
for (auto it = references_.begin(); it != references_.end(); ++it) {
264+
if (it->first >= block1.firstAddress() && it->first < block1.lastAddress()) {
265+
block1.addReference(it->first, it->second);
266+
} else {
267+
block2.addReference(it->first, it->second);
268+
}
269+
}
270+
271+
return std::make_pair(block1, block2);
272+
}

src/Function.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ void Function::insert(BasicBlock &&bb) {
5050
* @return
5151
*/
5252
edb::address_t Function::entryAddress() const {
53+
Q_ASSERT(!empty());
5354
return front().firstAddress();
5455
}
5556

@@ -58,6 +59,7 @@ edb::address_t Function::entryAddress() const {
5859
* @return
5960
*/
6061
edb::address_t Function::endAddress() const {
62+
Q_ASSERT(!empty());
6163
return back().lastAddress() - 1;
6264
}
6365

@@ -66,6 +68,7 @@ edb::address_t Function::endAddress() const {
6668
* @return
6769
*/
6870
edb::address_t Function::lastInstruction() const {
71+
Q_ASSERT(!empty());
6972
return back().back()->rva();
7073
}
7174

@@ -214,3 +217,7 @@ Function::Type Function::type() const {
214217
void Function::setType(Type t) {
215218
type_ = t;
216219
}
220+
221+
void Function::erase(const_iterator it) {
222+
blocks_.erase(it);
223+
}

0 commit comments

Comments
 (0)