Skip to content

Commit e86e58b

Browse files
authored
Implementation of C++ metrics parallelization (#734)
1 parent e9f01db commit e86e58b

File tree

5 files changed

+317
-149
lines changed

5 files changed

+317
-149
lines changed

plugins/cpp/model/include/model/cppfunction.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,6 @@ struct CppFunctionParamCountWithId
5656

5757
#pragma db column("count(" + Parameters::id + ")")
5858
std::size_t count;
59-
60-
#pragma db column(File::path)
61-
std::string filePath;
6259
};
6360

6461
#pragma db view \
@@ -80,9 +77,6 @@ struct CppFunctionMcCabe
8077

8178
#pragma db column(CppFunction::mccabe)
8279
unsigned int mccabe;
83-
84-
#pragma db column(File::path)
85-
std::string filePath;
8680
};
8781

8882
#pragma db view \
@@ -99,9 +93,6 @@ struct CppFunctionBumpyRoad
9993

10094
#pragma db column(CppFunction::statementCount)
10195
unsigned int statementCount;
102-
103-
#pragma db column(File::path)
104-
std::string filePath;
10596
};
10697

10798
}

plugins/cpp_metrics/model/include/model/cppcohesionmetrics.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@ struct CohesionCppRecordView
2323

2424
#pragma db column(CppEntity::astNodeId)
2525
CppAstNodeId astNodeId;
26-
27-
#pragma db column(File::path)
28-
std::string filePath;
2926
};
3027

3128
#pragma db view \

plugins/cpp_metrics/parser/include/cppmetricsparser/cppmetricsparser.h

Lines changed: 147 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,43 @@
1313
#include <model/cpprecord.h>
1414
#include <model/cpprecord-odb.hxx>
1515

16+
#include <util/dbutil.h>
1617
#include <util/parserutil.h>
1718
#include <util/threadpool.h>
19+
#include <util/odbtransaction.h>
1820

1921
namespace cc
2022
{
2123
namespace parser
2224
{
23-
25+
26+
template<typename TTask>
27+
class MetricsTasks
28+
{
29+
public:
30+
typedef typename std::vector<TTask>::const_iterator TTaskIter;
31+
32+
const TTaskIter& begin() const { return _begin; }
33+
const TTaskIter& end() const { return _end; }
34+
std::size_t size() const { return _size; }
35+
36+
MetricsTasks(
37+
const TTaskIter& begin_,
38+
const TTaskIter& end_,
39+
std::size_t size_
40+
) :
41+
_begin(begin_),
42+
_end(end_),
43+
_size(size_)
44+
{}
45+
46+
private:
47+
TTaskIter _begin;
48+
TTaskIter _end;
49+
std::size_t _size;
50+
};
51+
52+
2453
class CppMetricsParser : public AbstractParser
2554
{
2655
public:
@@ -41,10 +70,126 @@ class CppMetricsParser : public AbstractParser
4170
// and member functions for every type.
4271
void lackOfCohesion();
4372

73+
74+
/// @brief Constructs an ODB query that you can use to filter only
75+
/// the database records of the given parameter type whose path
76+
/// is rooted under any of this parser's input paths.
77+
/// @tparam TQueryParam The type of database records to query.
78+
/// This type must represent an ODB view that has access to
79+
/// (i.e. is also joined with) the File table.
80+
/// @return A query containing the disjunction of filters.
81+
template<typename TQueryParam>
82+
odb::query<TQueryParam> getFilterPathsQuery() const
83+
{
84+
return cc::util::getFilterPathsQuery<TQueryParam>(
85+
_inputPaths.begin(), _inputPaths.end());
86+
}
87+
88+
/// @brief Calculates a metric by querying all objects of the
89+
/// specified parameter type and passing them one-by-one to the
90+
/// specified worker function on parallel threads.
91+
/// This call blocks the caller thread until all workers are finished.
92+
/// @tparam TQueryParam The type of parameters to query.
93+
/// @param name_ The name of the metric (for progress logging).
94+
/// @param partitions_ The number of jobs to partition the query into.
95+
/// @param query_ A filter query for retrieving only
96+
/// the eligible parameters for which a worker should be spawned.
97+
/// @param worker_ The logic of the worker thread.
98+
template<typename TQueryParam>
99+
void parallelCalcMetric(
100+
const char* name_,
101+
std::size_t partitions_,
102+
const odb::query<TQueryParam>& query_,
103+
const std::function<void(const MetricsTasks<TQueryParam>&)>& worker_)
104+
{
105+
typedef MetricsTasks<TQueryParam> TMetricsTasks;
106+
typedef typename TMetricsTasks::TTaskIter TTaskIter;
107+
typedef std::pair<std::size_t, TMetricsTasks> TJobParam;
108+
109+
// Define the thread pool and job wrapper function.
110+
LOG(info) << name_ << " : Collecting jobs from database...";
111+
std::unique_ptr<util::JobQueueThreadPool<TJobParam>> pool =
112+
util::make_thread_pool<TJobParam>(_threadCount,
113+
[&](const TJobParam& job)
114+
{
115+
LOG(info) << '(' << job.first << '/' << partitions_
116+
<< ") " << name_;
117+
worker_(job.second);
118+
});
119+
120+
// Cache the results of the query that will be dispatched to workers.
121+
std::vector<TQueryParam> tasks;
122+
util::OdbTransaction {_ctx.db} ([&, this]
123+
{
124+
// Storing the result directly and then calling odb::result<>::cache()
125+
// on it does not work: odb::result<>::size() will always throw
126+
// odb::result_not_cached. As of writing, this is a limitation of SQLite.
127+
// So we fall back to the old-fashioned way: std::vector<> in memory.
128+
for (const TQueryParam& param : _ctx.db->query<TQueryParam>(query_))
129+
tasks.emplace_back(param);
130+
});
131+
132+
// Ensure that all workers receive at least one task.
133+
std::size_t taskCount = tasks.size();
134+
if (partitions_ > taskCount)
135+
partitions_ = taskCount;
136+
137+
// Dispatch jobs to workers in discrete packets.
138+
LOG(info) << name_ << " : Dispatching jobs on "
139+
<< _threadCount << " thread(s)...";
140+
std::size_t prev = 0;
141+
TTaskIter it_prev = tasks.cbegin();
142+
143+
std::size_t i = 0;
144+
while (i < partitions_)
145+
{
146+
std::size_t next = taskCount * ++i / partitions_;
147+
std::size_t size = next - prev;
148+
TTaskIter it_next = it_prev;
149+
std::advance(it_next, size);
150+
151+
pool->enqueue(TJobParam(i, TMetricsTasks(it_prev, it_next, size)));
152+
153+
prev = next;
154+
it_prev = it_next;
155+
}
156+
157+
// Await the termination of all workers.
158+
pool->wait();
159+
LOG(info) << name_ << " : Calculation finished.";
160+
}
161+
162+
/// @brief Calculates a metric by querying all objects of the
163+
/// specified parameter type and passing them one-by-one to the
164+
/// specified worker function on parallel threads.
165+
/// This call blocks the caller thread until all workers are finished.
166+
/// @tparam TQueryParam The type of parameters to query.
167+
/// @param name_ The name of the metric (for progress logging).
168+
/// @param partitions_ The number of jobs to partition the query into.
169+
/// @param worker_ The logic of the worker thread.
170+
template<typename TQueryParam>
171+
void parallelCalcMetric(
172+
const char* name_,
173+
std::size_t partitions_,
174+
const std::function<void(const MetricsTasks<TQueryParam>&)>& worker_)
175+
{
176+
parallelCalcMetric<TQueryParam>(
177+
name_,
178+
partitions_,
179+
odb::query<TQueryParam>(),
180+
worker_);
181+
}
182+
183+
184+
int _threadCount;
44185
std::vector<std::string> _inputPaths;
45186
std::unordered_set<model::FileId> _fileIdCache;
46187
std::unordered_map<model::CppAstNodeId, model::FileId> _astNodeIdCache;
47-
std::unique_ptr<util::JobQueueThreadPool<std::string>> _pool;
188+
189+
static const int functionParamsPartitionMultiplier = 5;
190+
static const int functionMcCabePartitionMultiplier = 5;
191+
static const int functionBumpyRoadPartitionMultiplier = 5;
192+
static const int lackOfCohesionPartitionMultiplier = 25;
48193
};
49194

50195
} // parser

0 commit comments

Comments
 (0)