Skip to content

Commit

Permalink
Parallelize nesterovBase
Browse files Browse the repository at this point in the history
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
  • Loading branch information
kbieganski committed Jan 24, 2024
1 parent 589dee1 commit ec35de2
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 41 deletions.
8 changes: 4 additions & 4 deletions src/gpl/include/gpl/Replace.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ class Replace
utl::Logger* logger);
void reset();

void doIncrementalPlace();
void doIncrementalPlace(int threads);
void doInitialPlace();
void runMBFF(int max_sz, float alpha, float beta, int threads);

int doNesterovPlace(int start_iter = 0);
int doNesterovPlace(int threads, int start_iter = 0);

// Initial Place param settings
void setInitialPlaceMaxIter(int iter);
Expand All @@ -107,7 +107,7 @@ class Replace
void setMinPhiCoef(float minPhiCoef);
void setMaxPhiCoef(float maxPhiCoef);

float getUniformTargetDensity();
float getUniformTargetDensity(int threads);

// HPWL: half-parameter wire length.
void setReferenceHpwl(float refHpwl);
Expand Down Expand Up @@ -144,7 +144,7 @@ class Replace
odb::dbInst* inst = nullptr);

private:
bool initNesterovPlace();
bool initNesterovPlace(int threads);

odb::dbDatabase* db_ = nullptr;
sta::dbSta* sta_ = nullptr;
Expand Down
105 changes: 83 additions & 22 deletions src/gpl/src/nesterovBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

#include "nesterovBase.h"

#include <omp.h>

#include <algorithm>
#include <cmath>
#include <iostream>
Expand Down Expand Up @@ -595,6 +597,7 @@ static unsigned int roundDownToPowerOfTwo(unsigned int x)

void BinGrid::initBins()
{
assert(omp_get_thread_num() == 0);
int64_t totalBinArea
= static_cast<int64_t>(ux_ - lx_) * static_cast<int64_t>(uy_ - ly_);

Expand Down Expand Up @@ -650,16 +653,17 @@ void BinGrid::initBins()
log_->info(GPL, 29, "BinSize: {} {}", binSizeX_, binSizeY_);

// initialize bins_ vector
bins_.reserve(binCntX_ * (size_t) binCntY_);
bins_.resize(binCntX_ * (size_t) binCntY_);
#pragma omp parallel for num_threads(num_threads_)
for (int idxY = 0; idxY < binCntY_; ++idxY) {
for (int idxX = 0; idxX < binCntX_; ++idxX) {
const int x = lx_ + idxX * binSizeX_;
const int y = ly_ + idxY * binSizeY_;
const int sizeX = std::min(ux_ - x, binSizeX_);
const int sizeY = std::min(uy_ - y, binSizeY_);

bins_.emplace_back(
idxX, idxY, x, y, x + sizeX, y + sizeY, targetDensity_);
bins_[idxY * binCntX_ + idxX]
= Bin(idxX, idxY, x, y, x + sizeX, y + sizeY, targetDensity_);
}
}

Expand All @@ -671,11 +675,14 @@ void BinGrid::initBins()

void BinGrid::updateBinsNonPlaceArea()
{
assert(omp_get_thread_num() == 0);
#pragma omp parallel for num_threads(num_threads_)
for (auto& bin : bins_) {
bin.setNonPlaceArea(0);
bin.setNonPlaceAreaUnscaled(0);
}

#pragma omp parallel for num_threads(num_threads_)
for (auto& inst : pb_->nonPlaceInsts()) {
std::pair<int, int> pairX = getMinMaxIdxX(inst);
std::pair<int, int> pairY = getMinMaxIdxY(inst);
Expand Down Expand Up @@ -703,12 +710,16 @@ void BinGrid::updateBinsNonPlaceArea()
// Core Part
void BinGrid::updateBinsGCellDensityArea(const std::vector<GCell*>& cells)
{
// clear the Bin-area info
for (Bin& bin : bins_) {
bin.setInstPlacedArea(0);
bin.setInstPlacedAreaUnscaled(0);
bin.setFillerArea(0);
}
assert(omp_get_thread_num() == 0);
struct BinAreas
{
float instPlacedArea = 0;
float instPlacedAreaUnscaled = 0;
float fillerArea = 0;
};
static std::vector<BinAreas> bin_areas;
bin_areas.clear();
bin_areas.resize(bins_.size() * num_threads_);

for (auto& cell : cells) {
std::pair<int, int> pairX = getDensityMinMaxIdxX(cell);
Expand All @@ -724,12 +735,13 @@ void BinGrid::updateBinsGCellDensityArea(const std::vector<GCell*>& cells)
for (int i = pairX.first; i < pairX.second; i++) {
for (int j = pairY.first; j < pairY.second; j++) {
Bin& bin = bins_[j * binCntX_ + i];

const float scaledAvea = getOverlapDensityArea(bin, cell)
BinAreas& areas = bin_areas[num_threads_ * (j * binCntX_ + i)
+ omp_get_thread_num()];
const float scaledArea = getOverlapDensityArea(bin, cell)
* cell->densityScale()
* bin.targetDensity();
bin.addInstPlacedArea(scaledAvea);
bin.addInstPlacedAreaUnscaled(scaledAvea);
areas.instPlacedArea += scaledArea;
areas.instPlacedAreaUnscaled += scaledArea;
}
}
}
Expand All @@ -738,19 +750,23 @@ void BinGrid::updateBinsGCellDensityArea(const std::vector<GCell*>& cells)
for (int i = pairX.first; i < pairX.second; i++) {
for (int j = pairY.first; j < pairY.second; j++) {
Bin& bin = bins_[j * binCntX_ + i];
BinAreas& areas = bin_areas[num_threads_ * (j * binCntX_ + i)
+ omp_get_thread_num()];
const float scaledArea
= getOverlapDensityArea(bin, cell) * cell->densityScale();
bin.addInstPlacedArea(scaledArea);
bin.addInstPlacedAreaUnscaled(scaledArea);
areas.instPlacedArea += scaledArea;
areas.instPlacedAreaUnscaled += scaledArea;
}
}
}
} else if (cell->isFiller()) {
for (int i = pairX.first; i < pairX.second; i++) {
for (int j = pairY.first; j < pairY.second; j++) {
Bin& bin = bins_[j * binCntX_ + i];
bin.addFillerArea(getOverlapDensityArea(bin, cell)
* cell->densityScale());
BinAreas& areas = bin_areas[num_threads_ * (j * binCntX_ + i)
+ omp_get_thread_num()];
areas.fillerArea
+= getOverlapDensityArea(bin, cell) * cell->densityScale();
}
}
}
Expand All @@ -760,7 +776,19 @@ void BinGrid::updateBinsGCellDensityArea(const std::vector<GCell*>& cells)
overflowAreaUnscaled_ = 0;
// update density and overflowArea
// for nesterov use and FFT library
for (Bin& bin : bins_) {
#pragma omp parallel for num_threads(num_threads_) \
reduction(+ : overflowArea_, overflowAreaUnscaled_)
for (size_t i = 0; i < bins_.size(); i++) {
Bin& bin = bins_[i];
bin.setInstPlacedArea(0);
bin.setInstPlacedAreaUnscaled(0);
bin.setFillerArea(0);
for (size_t j = 0; j < num_threads_; j++) {
BinAreas& areas = bin_areas[num_threads_ * i + j];
bin.addInstPlacedArea(areas.instPlacedArea);
bin.addInstPlacedAreaUnscaled(areas.instPlacedAreaUnscaled);
bin.addFillerArea(areas.fillerArea);
}
int64_t binArea = bin.binArea();
const float scaledBinArea
= static_cast<float>(binArea * bin.targetDensity());
Expand Down Expand Up @@ -843,8 +871,11 @@ void NesterovPlaceVars::reset()

NesterovBaseCommon::NesterovBaseCommon(NesterovBaseVars nbVars,
std::shared_ptr<PlacerBaseCommon> pbc,
utl::Logger* log)
utl::Logger* log,
size_t num_threads)
: num_threads_{num_threads}
{
assert(omp_get_thread_num() == 0);
nbVars_ = nbVars;
pbc_ = std::move(pbc);
log_ = log;
Expand Down Expand Up @@ -899,6 +930,7 @@ NesterovBaseCommon::NesterovBaseCommon(NesterovBaseVars nbVars,
}

// gCellStor_'s pins_ fill
#pragma omp parallel for num_threads(num_threads_)
for (auto& gCell : gCellStor_) {
if (gCell.isFiller()) {
continue;
Expand All @@ -910,12 +942,14 @@ NesterovBaseCommon::NesterovBaseCommon(NesterovBaseVars nbVars,
}

// gPinStor_' GNet and GCell fill
#pragma omp parallel for num_threads(num_threads_)
for (auto& gPin : gPinStor_) {
gPin.setGCell(pbToNb(gPin.pin()->instance()));
gPin.setGNet(pbToNb(gPin.pin()->net()));
}

// gNetStor_'s GPin fill
#pragma omp parallel for num_threads(num_threads_)
for (auto& gNet : gNetStor_) {
for (auto& pin : gNet.net()->pins()) {
gNet.addGPin(pbToNb(pin));
Expand Down Expand Up @@ -972,15 +1006,16 @@ GNet* NesterovBaseCommon::dbToNb(odb::dbNet* net) const
// in ePlace paper.
void NesterovBaseCommon::updateWireLengthForceWA(float wlCoeffX, float wlCoeffY)
{
assert(omp_get_thread_num() == 0);
// clear all WA variables.
for (auto& gNet : gNets_) {
gNet->clearWaVars();
}
#pragma omp parallel for num_threads(num_threads_)
for (auto& gPin : gPins_) {
gPin->clearWaVars();
}

#pragma omp parallel for num_threads(num_threads_)
for (auto& gNet : gNets_) {
gNet->clearWaVars();
gNet->updateBox();

for (auto& gPin : gNet->gPins()) {
Expand Down Expand Up @@ -1182,6 +1217,8 @@ FloatPoint NesterovBaseCommon::getWireLengthPreconditioner(

void NesterovBaseCommon::updateDbGCells()
{
assert(omp_get_thread_num() == 0);
#pragma omp parallel for num_threads(num_threads_)
for (auto& gCell : gCells()) {
if (gCell->isInstance()) {
odb::dbInst* inst = gCell->instance()->dbInst();
Expand All @@ -1198,7 +1235,9 @@ void NesterovBaseCommon::updateDbGCells()

int64_t NesterovBaseCommon::getHpwl()
{
assert(omp_get_thread_num() == 0);
int64_t hpwl = 0;
#pragma omp parallel for num_threads(num_threads_) reduction(+ : hpwl)
for (auto& gNet : gNets_) {
gNet->updateBox();
hpwl += gNet->hpwl();
Expand Down Expand Up @@ -1436,8 +1475,10 @@ void NesterovBase::updateGCellDensityCenterLocation(

void NesterovBase::setTargetDensity(float density)
{
assert(omp_get_thread_num() == 0);
targetDensity_ = density;
bg_.setTargetDensity(density);
#pragma omp parallel for num_threads(nbc_->getNumThreads())
for (auto& bin : bins()) {
bin.setTargetDensity(density);
}
Expand Down Expand Up @@ -1540,6 +1581,8 @@ float NesterovBase::targetDensity() const
// update densitySize and densityScale in each gCell
void NesterovBase::updateDensitySize()
{
assert(omp_get_thread_num() == 0);
#pragma omp parallel for num_threads(nbc_->getNumThreads())
for (auto& gCell : gCells_) {
float scaleX = 0, scaleY = 0;
float densitySizeX = 0, densitySizeY = 0;
Expand Down Expand Up @@ -1568,9 +1611,12 @@ void NesterovBase::updateDensitySize()

void NesterovBase::updateAreas()
{
assert(omp_get_thread_num() == 0);
// bloating can change the following :
// stdInstsArea and macroInstsArea
stdInstsArea_ = macroInstsArea_ = 0;
#pragma omp parallel for num_threads(nbc_->getNumThreads()) \
reduction(+ : stdInstsArea_, macroInstsArea_)
for (auto* gCell : gCells_) {
if (gCell->isMacroInstance()) {
macroInstsArea_ += static_cast<int64_t>(gCell->dx())
Expand Down Expand Up @@ -1720,7 +1766,9 @@ FloatPoint NesterovBase::getDensityGradient(const GCell* gCell) const
// Density force cals
void NesterovBase::updateDensityForceBin()
{
assert(omp_get_thread_num() == 0);
// copy density to utilize FFT
#pragma omp parallel for num_threads(nbc_->getNumThreads())
for (Bin& bin : bg_.bins()) {
fft_->updateDensity(bin.x(), bin.y(), bin.density());
}
Expand All @@ -1731,6 +1779,8 @@ void NesterovBase::updateDensityForceBin()
// update electroPhi and electroForce
// update sumPhi_ for nesterov loop
sumPhi_ = 0;
#pragma omp parallel for num_threads(nbc_->getNumThreads()) \
reduction(+ : sumPhi_)
for (Bin& bin : bg_.bins()) {
auto eForcePair = fft_->getElectroForce(bin.x(), bin.y());
bin.setElectroForce(eForcePair.first, eForcePair.second);
Expand All @@ -1746,6 +1796,7 @@ void NesterovBase::updateDensityForceBin()

void NesterovBase::initDensity1()
{
assert(omp_get_thread_num() == 0);
const int gCellSize = gCells_.size();
curSLPCoordi_.resize(gCellSize, FloatPoint());
curSLPWireLengthGrads_.resize(gCellSize, FloatPoint());
Expand All @@ -1767,6 +1818,7 @@ void NesterovBase::initDensity1()

initCoordi_.resize(gCellSize, FloatPoint());

#pragma omp parallel for num_threads(nbc_->getNumThreads())
for (auto& gCell : gCells_) {
updateDensityCoordiLayoutInside(gCell);
int idx = &gCell - &gCells_[0];
Expand Down Expand Up @@ -1860,6 +1912,7 @@ void NesterovBase::updateGradients(std::vector<FloatPoint>& sumGrads,
float wlCoeffX,
float wlCoeffY)
{
assert(omp_get_thread_num() == 0);
if (isConverged_) {
return;
}
Expand All @@ -1872,6 +1925,8 @@ void NesterovBase::updateGradients(std::vector<FloatPoint>& sumGrads,
debugPrint(
log_, GPL, "updateGrad", 1, "DensityPenalty: {:g}", densityPenalty_);

#pragma omp parallel for num_threads(nbc_->getNumThreads()) \
reduction(+ : wireLengthGradSum_, densityGradSum_, gradSum)
for (size_t i = 0; i < gCells_.size(); i++) {
GCell* gCell = gCells_.at(i);
wireLengthGrads[i]
Expand Down Expand Up @@ -1954,6 +2009,8 @@ void NesterovBase::updateNextGradient(float wlCoeffX, float wlCoeffY)

void NesterovBase::updateInitialPrevSLPCoordi()
{
assert(omp_get_thread_num() == 0);
#pragma omp parallel for num_threads(nbc_->getNumThreads())
for (size_t i = 0; i < gCells_.size(); i++) {
GCell* curGCell = gCells_[i];

Expand Down Expand Up @@ -2011,6 +2068,7 @@ float NesterovBase::getPhiCoef(float scaledDiffHpwl) const

void NesterovBase::updateNextIter(const int iter)
{
assert(omp_get_thread_num() == 0);
if (isConverged_) {
return;
}
Expand All @@ -2022,6 +2080,7 @@ void NesterovBase::updateNextIter(const int iter)
std::swap(prevSLPSumGrads_, curSLPSumGrads_);

// Prevent locked instances from moving
#pragma omp parallel for num_threads(nbc_->getNumThreads())
for (size_t k = 0; k < gCells_.size(); ++k) {
if (gCells_[k]->isInstance() && gCells_[k]->instance()->isLocked()) {
nextSLPCoordi_[k] = curSLPCoordi_[k];
Expand Down Expand Up @@ -2205,6 +2264,7 @@ void NesterovBase::snapshot()

bool NesterovBase::checkConvergence()
{
assert(omp_get_thread_num() == 0);
if (isConverged_) {
return true;
}
Expand All @@ -2219,6 +2279,7 @@ bool NesterovBase::checkConvergence()
sumOverflowUnscaled_);
}

#pragma omp parallel for num_threads(nbc_->getNumThreads())
for (auto& gCell : gCells_) {
if (!gCell->isInstance()) {
continue;
Expand Down
Loading

0 comments on commit ec35de2

Please sign in to comment.