Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of C++ metrics parallelization #734

Merged
merged 9 commits into from
Jun 17, 2024
9 changes: 0 additions & 9 deletions plugins/cpp/model/include/model/cppfunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ struct CppFunctionParamCountWithId

#pragma db column("count(" + Parameters::id + ")")
std::size_t count;

#pragma db column(File::path)
std::string filePath;
};

#pragma db view \
Expand All @@ -80,9 +77,6 @@ struct CppFunctionMcCabe

#pragma db column(CppFunction::mccabe)
unsigned int mccabe;

#pragma db column(File::path)
std::string filePath;
};

#pragma db view \
Expand All @@ -99,9 +93,6 @@ struct CppFunctionBumpyRoad

#pragma db column(CppFunction::statementCount)
unsigned int statementCount;

#pragma db column(File::path)
std::string filePath;
};

}
Expand Down
3 changes: 0 additions & 3 deletions plugins/cpp_metrics/model/include/model/cppcohesionmetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ struct CohesionCppRecordView

#pragma db column(CppEntity::astNodeId)
CppAstNodeId astNodeId;

#pragma db column(File::path)
std::string filePath;
};

#pragma db view \
Expand Down
149 changes: 147 additions & 2 deletions plugins/cpp_metrics/parser/include/cppmetricsparser/cppmetricsparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,43 @@
#include <model/cpprecord.h>
#include <model/cpprecord-odb.hxx>

#include <util/dbutil.h>
#include <util/parserutil.h>
#include <util/threadpool.h>
#include <util/odbtransaction.h>

namespace cc
{
namespace parser
{


template<typename TTask>
class MetricsTasks
{
public:
typedef typename std::vector<TTask>::const_iterator TTaskIter;

const TTaskIter& begin() const { return _begin; }
const TTaskIter& end() const { return _end; }
std::size_t size() const { return _size; }

MetricsTasks(
const TTaskIter& begin_,
const TTaskIter& end_,
std::size_t size_
) :
_begin(begin_),
_end(end_),
_size(size_)
{}

private:
TTaskIter _begin;
TTaskIter _end;
std::size_t _size;
};


class CppMetricsParser : public AbstractParser
{
public:
Expand All @@ -41,10 +70,126 @@ class CppMetricsParser : public AbstractParser
// and member functions for every type.
void lackOfCohesion();


/// @brief Constructs an ODB query that you can use to filter only
/// the database records of the given parameter type whose path
/// is rooted under any of this parser's input paths.
/// @tparam TQueryParam The type of database records to query.
/// This type must represent an ODB view that has access to
/// (i.e. is also joined with) the File table.
/// @return A query containing the disjunction of filters.
template<typename TQueryParam>
odb::query<TQueryParam> getFilterPathsQuery() const
{
return cc::util::getFilterPathsQuery<TQueryParam>(
_inputPaths.begin(), _inputPaths.end());
}

/// @brief Calculates a metric by querying all objects of the
/// specified parameter type and passing them one-by-one to the
/// specified worker function on parallel threads.
/// This call blocks the caller thread until all workers are finished.
/// @tparam TQueryParam The type of parameters to query.
/// @param name_ The name of the metric (for progress logging).
/// @param partitions_ The number of jobs to partition the query into.
/// @param query_ A filter query for retrieving only
/// the eligible parameters for which a worker should be spawned.
/// @param worker_ The logic of the worker thread.
template<typename TQueryParam>
void parallelCalcMetric(
const char* name_,
std::size_t partitions_,
const odb::query<TQueryParam>& query_,
const std::function<void(const MetricsTasks<TQueryParam>&)>& worker_)
{
typedef MetricsTasks<TQueryParam> TMetricsTasks;
typedef typename TMetricsTasks::TTaskIter TTaskIter;
typedef std::pair<std::size_t, TMetricsTasks> TJobParam;

// Define the thread pool and job wrapper function.
LOG(info) << name_ << " : Collecting jobs from database...";
std::unique_ptr<util::JobQueueThreadPool<TJobParam>> pool =
util::make_thread_pool<TJobParam>(_threadCount,
[&](const TJobParam& job)
{
LOG(info) << '(' << job.first << '/' << partitions_
<< ") " << name_;
worker_(job.second);
});

// Cache the results of the query that will be dispatched to workers.
std::vector<TQueryParam> tasks;
util::OdbTransaction {_ctx.db} ([&, this]
{
// Storing the result directly and then calling odb::result<>::cache()
// on it does not work: odb::result<>::size() will always throw
// odb::result_not_cached. As of writing, this is a limitation of SQLite.
// So we fall back to the old-fashioned way: std::vector<> in memory.
for (const TQueryParam& param : _ctx.db->query<TQueryParam>(query_))
tasks.emplace_back(param);
});

// Ensure that all workers receive at least one task.
std::size_t taskCount = tasks.size();
if (partitions_ > taskCount)
partitions_ = taskCount;

// Dispatch jobs to workers in discrete packets.
LOG(info) << name_ << " : Dispatching jobs on "
<< _threadCount << " thread(s)...";
std::size_t prev = 0;
TTaskIter it_prev = tasks.cbegin();

std::size_t i = 0;
while (i < partitions_)
{
std::size_t next = taskCount * ++i / partitions_;
std::size_t size = next - prev;
TTaskIter it_next = it_prev;
std::advance(it_next, size);

pool->enqueue(TJobParam(i, TMetricsTasks(it_prev, it_next, size)));

prev = next;
it_prev = it_next;
}

// Await the termination of all workers.
pool->wait();
LOG(info) << name_ << " : Calculation finished.";
}

/// @brief Calculates a metric by querying all objects of the
/// specified parameter type and passing them one-by-one to the
/// specified worker function on parallel threads.
/// This call blocks the caller thread until all workers are finished.
/// @tparam TQueryParam The type of parameters to query.
/// @param name_ The name of the metric (for progress logging).
/// @param partitions_ The number of jobs to partition the query into.
/// @param worker_ The logic of the worker thread.
template<typename TQueryParam>
void parallelCalcMetric(
const char* name_,
std::size_t partitions_,
const std::function<void(const MetricsTasks<TQueryParam>&)>& worker_)
{
parallelCalcMetric<TQueryParam>(
name_,
partitions_,
odb::query<TQueryParam>(),
worker_);
}


int _threadCount;
std::vector<std::string> _inputPaths;
std::unordered_set<model::FileId> _fileIdCache;
std::unordered_map<model::CppAstNodeId, model::FileId> _astNodeIdCache;
std::unique_ptr<util::JobQueueThreadPool<std::string>> _pool;

static const int functionParamsPartitionMultiplier = 5;
static const int functionMcCabePartitionMultiplier = 5;
static const int functionBumpyRoadPartitionMultiplier = 5;
static const int lackOfCohesionPartitionMultiplier = 25;
};

} // parser
Expand Down
Loading
Loading