Skip to content

Commit

Permalink
Merge pull request #13 from jrbyrnes/ExtractAlloc
Browse files Browse the repository at this point in the history
Extract alloc
  • Loading branch information
jrbyrnes authored Jul 15, 2024
2 parents fd0556f + acc88ad commit 7bd2798
Show file tree
Hide file tree
Showing 31 changed files with 763 additions and 347 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,6 @@ if(OPTSCHED_INCLUDE_TESTS)
-O3
)
endif()

add_compile_options(-Wno-suggest-override)
add_compile_options(-Wno-inconsistent-missing-override)
4 changes: 2 additions & 2 deletions include/opt-sched/Scheduler/aco.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ class ACOScheduler : public ConstrainedScheduler {
InstCount upperBound, SchedPriorities priorities, bool vrfySched,
bool IsPostBB, int SolverID);
virtual ~ACOScheduler();
FUNC_RESULT FindSchedule(InstSchedule *schedule, SchedRegion *region);
inline void UpdtRdyLst_(InstCount cycleNum, int slotNum);
FUNC_RESULT FindSchedule(InstSchedule *schedule, SchedRegion *region) override;
inline void UpdtRdyLst_(InstCount cycleNum, int slotNum) override;
// Set the initial schedule for ACO
// Default is NULL if none are set.
void setInitialSched(InstSchedule *Sched);
Expand Down
86 changes: 60 additions & 26 deletions include/opt-sched/Scheduler/bb_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ Last Update: Jan. 2022
#include <mutex>
#include <atomic>
#include <stack>
#include <iostream>
#include <fstream>

namespace llvm {
namespace opt_sched {
Expand All @@ -50,7 +52,6 @@ class BitVector;
class InstPool4 {
private:
std::queue<std::shared_ptr<HalfNode>> pool;
int maxSize_;
int SortMethod_;
int Depth_;
public:
Expand All @@ -71,7 +72,6 @@ class InstPool4 {
class InstPool {
private:
std::queue<std::pair<EnumTreeNode *, unsigned long *>> pool;
int maxSize_;
int SortMethod_;
int Depth_;
public:
Expand Down Expand Up @@ -155,7 +155,6 @@ class BBThread {
int ExitInstCnt_;
int NumberOfInsts_;


// A bit vector indexed by register number indicating whether that
// register is live
WeightedBitVector *LiveRegs_;
Expand Down Expand Up @@ -208,6 +207,8 @@ class BBThread {
bool enblStallEnum, int SCW, SPILL_COST_FUNCTION spillCostFunc,
SchedulerType HeurSchedType);
virtual ~BBThread();
std::mutex *GlobalPoolLock_;
std::ofstream ThreadStream_;

// Stats on the number of nodes examined
// Number of calls to stepfrwrd
Expand All @@ -223,6 +224,19 @@ class BBThread {
// Global Pool Nodes explored
uint64_t GlobalPoolNodes = 0;

int *RegCrntUseCnts;
int *RegNums;
int16_t *RegTypes;

struct RegFields {
int CrntUseCnt;
int Num;
int Type;
};

DenseMap<llvm::opt_sched::Register *, RegFields> RegToFields;

void resetRegFields();
// Allocate register structures needed to track cost
void setupForSchdulng();
// Initialize cost and register information (e.g register pressure)
Expand Down Expand Up @@ -253,6 +267,7 @@ class BBThread {
bool chkCostFsblty(InstCount trgtLngth, EnumTreeNode *&treeNode, bool isGlobalPoolNode = false);
// Not Implemented
bool chkInstLgltyBBThread(SchedInstruction *inst);
inline RegFields getRegFields(Register *reg) {return RegToFields[reg];}
// Returns the spill cost from last partial schedule cost calculation
inline InstCount getCrntSpillCost() {return CrntSpillCost_;}
// Returns the peak spill cost from last partial schedule cost calculation
Expand Down Expand Up @@ -373,17 +388,17 @@ class BBThread {

class BBInterfacer : public SchedRegion, public BBThread {
private:
void CmputAbslutUprBound_();
void CmputAbslutUprBound_() override;

InstCount cmputCostLwrBound();
InstCount cmputCostLwrBound() override;

protected:
InstCount *BestCost_;
InstCount *CostLwrBound_;

int NumSolvers_;

void CmputSchedUprBound_();
void CmputSchedUprBound_() override;

// override SchedRegion virtual
void InitForSchdulng() override {return initForSchdulng();}
Expand All @@ -408,7 +423,7 @@ class BBInterfacer : public SchedRegion, public BBThread {
void setBestCost(InstCount BestCost) override { *BestCost_ = BestCost; }

InstCount UpdtOptmlSched(InstSchedule *crntSched,
LengthCostEnumerator *enumrtr);
LengthCostEnumerator *enumrtr) override;


public:
Expand All @@ -417,23 +432,25 @@ class BBInterfacer : public SchedRegion, public BBThread {
SchedPriorities hurstcPrirts, SchedPriorities enumPrirts,
bool vrfySched, Pruning PruningStrategy, bool SchedForRPOnly,
bool enblStallEnum, int SCW, SPILL_COST_FUNCTION spillCostFunc,
SchedulerType HeurSchedType);
SchedulerType HeurSchedType, SmallVector<MemAlloc<EnumTreeNode> *, 16> &EnumNodeAllocs,
SmallVector<MemAlloc<CostHistEnumTreeNode> *, 16> &HistNodeAllocs,
SmallVector<MemAlloc<BinHashTblEntry<HistEnumTreeNode>> *, 16> &HashTablAllocs);


inline void SchdulInst(SchedInstruction *inst, InstCount cycleNum, InstCount slotNum,
bool trackCnflcts)
bool trackCnflcts) override
{
schdulInst(inst, cycleNum, slotNum, trackCnflcts);
}

inline void UnschdulInst(SchedInstruction *inst, InstCount cycleNum,
InstCount slotNum, EnumTreeNode *trgtNode)
InstCount slotNum, EnumTreeNode *trgtNode) override
{
unschdulInst(inst, cycleNum, slotNum, trgtNode);
}

inline InstCount CmputNormCost_(InstSchedule *sched, COST_COMP_MODE compMode,
InstCount &execCost, bool trackCnflcts)
InstCount &execCost, bool trackCnflcts) override
{
return cmputNormCost(sched, compMode, execCost, trackCnflcts);
}
Expand All @@ -442,6 +459,8 @@ class BBInterfacer : public SchedRegion, public BBThread {
RegisterFile *regFiles_,
DataDepGraph *dataDepGraph_);

//RegFields getRegFields(Register *reg) override {return RegToFields[reg];}

bool isSecondPass() override { return isSecondPass_; }

bool isWorker() override {return false;}
Expand Down Expand Up @@ -472,7 +491,7 @@ class BBInterfacer : public SchedRegion, public BBThread {



inline InstCount getHeuristicCost() {return GetHeuristicCost();}
inline InstCount getHeuristicCost() override {return GetHeuristicCost();}

};

Expand All @@ -489,13 +508,17 @@ class BBWithSpill : public BBInterfacer {
bool vrfySched, Pruning PruningStrategy, bool SchedForRPOnly,
bool enblStallEnum, int SCW, SPILL_COST_FUNCTION spillCostFunc,
SchedulerType HeurSchedType, int timeoutToMemblock, bool isTwoPass,
bool IsTimeoutPerInst);
bool IsTimeoutPerInst, SmallVector<MemAlloc<EnumTreeNode> *, 16> &EnumNodeAllocs,
SmallVector<MemAlloc<CostHistEnumTreeNode> *, 16> &HistNodeAllocs,
SmallVector<MemAlloc<BinHashTblEntry<HistEnumTreeNode>> *, 16> &HashTablAllocs);


FUNC_RESULT Enumerate_(Milliseconds startTime, Milliseconds rgnTimeout,
Milliseconds lngthTimeout, int *OptimalSolverID) override;

Enumerator *AllocEnumrtr_(Milliseconds timeout);
Enumerator *AllocEnumrtr_(Milliseconds timeout, SmallVector<MemAlloc<EnumTreeNode> *, 16> &EnumNodeAllocs,
SmallVector<MemAlloc<CostHistEnumTreeNode> *, 16> &HistNodeAllocs,
SmallVector<MemAlloc<BinHashTblEntry<HistEnumTreeNode>> *, 16> &HashTablAllocs) override;

uint64_t getExaminedNodeCount() override {return Enumrtr_->GetNodeCnt(); }

Expand Down Expand Up @@ -567,7 +590,7 @@ class BBWorker : public BBThread {

// References to the locks on shared data
std::mutex **HistTableLock_;
std::mutex *GlobalPoolLock_;
// std::mutex *GlobalPoolLock_;
std::mutex *BestSchedLock_;
std::mutex *NodeCountLock_;
std::mutex *ImprvmntCntLock_;
Expand All @@ -589,15 +612,15 @@ class BBWorker : public BBThread {
void handlEnumrtrRslt_(FUNC_RESULT rslt, InstCount trgtLngth);

// overrides
inline InstCount getBestCost() {return *MasterCost_;}
inline void setBestCost(InstCount BestCost) {
inline InstCount getBestCost() override {return *MasterCost_;}
inline void setBestCost(InstCount BestCost) override {
BestCost_ = BestCost;
}

inline InstCount getCrntScheduleCost() {return MasterSched_->GetCost();}
inline InstCount getCrntScheduleCost() override {return MasterSched_->GetCost();}


InstCount UpdtOptmlSched(InstSchedule *crntSched, LengthCostEnumerator *enumrtr);
InstCount UpdtOptmlSched(InstSchedule *crntSched, LengthCostEnumerator *enumrtr) override;

void writeBestSchedToMaster(InstSchedule *BestSchedule, InstCount BestCost, InstCount BestSpill);

Expand Down Expand Up @@ -625,7 +648,9 @@ class BBWorker : public BBThread {
int timeoutToMemblock, int64_t **subspaceLwrBounds);

~BBWorker();
/*

// std::mutex *GlobalPoolLock_;
/*
BBWorker (const BBWorker&) = delete;
BBWorker& operator= (const BBWorker&) = delete;
*/
Expand All @@ -634,7 +659,8 @@ class BBWorker : public BBThread {

void setHeurInfo(InstCount SchedUprBound, InstCount HeuristicCost, InstCount SchedLwrBound);

void allocEnumrtr_(Milliseconds timeout);
void allocEnumrtr_(Milliseconds timeout, MemAlloc<EnumTreeNode> *EnumNodeAlloc,
MemAlloc<CostHistEnumTreeNode> *HistNodeAlloc, MemAlloc<BinHashTblEntry<HistEnumTreeNode>> *HashTablAlloc);
void initEnumrtr_(bool scheduleRoot = true);
void setLCEElements_(InstCount costLwrBound);
void setLowerBounds_(InstCount costLwrBound);
Expand Down Expand Up @@ -692,7 +718,7 @@ class BBWorker : public BBThread {

bool isWorker() override {return true;}

inline InstCount getHeuristicCost() {return HeuristicCost_;}
inline InstCount getHeuristicCost() override {return HeuristicCost_;}

inline void setCostLowerBound(InstCount StaticLowerBound) {
StaticLowerBound_ = StaticLowerBound;
Expand All @@ -703,6 +729,8 @@ class BBWorker : public BBThread {

inline void setRegionSchedule(InstSchedule *RegionSched) {RegionSched_ = RegionSched;}

//RegFields getRegFields(Register *reg) override {return RegToFields[reg];}

void histTableLock(UDT_HASHVAL key) override;
void histTableUnlock(UDT_HASHVAL key) override;

Expand Down Expand Up @@ -755,6 +783,7 @@ class BBMaster : public BBInterfacer {
int InactiveThreads_;
int NumThreadsToLaunch_;

SmallVector<std::ofstream, 16> ThreadStreams_;
std::mutex **HistTableLock;
std::mutex GlobalPoolLock;
std::mutex BestSchedLock;
Expand All @@ -774,7 +803,6 @@ class BBMaster : public BBInterfacer {
int LocalPoolSize_;
float ExploitationPercent_;
SPILL_COST_FUNCTION GlobalPoolSCF_;
int GlobalPoolSort_;

bool WorkSteal_;
bool WorkStealOn_;
Expand All @@ -801,7 +829,9 @@ class BBMaster : public BBInterfacer {
bool initGlobalPool();
bool init();
void setWorkerHeurInfo();
Enumerator *allocEnumHierarchy_(Milliseconds timeout, bool *fsbl);
Enumerator *allocEnumHierarchy_(Milliseconds timeout, bool *fsbl, SmallVector<MemAlloc<EnumTreeNode> *, 16> &EnumNodeAllocs,
SmallVector<MemAlloc<CostHistEnumTreeNode> *, 16> &HistNodeAllocs,
SmallVector<MemAlloc<BinHashTblEntry<HistEnumTreeNode>> *, 16> &HashTablAllocs);

inline BinHashTable<HistEnumTreeNode> *getEnumHistTable() {
return Enumrtr_->getHistTable();
Expand All @@ -817,14 +847,18 @@ class BBMaster : public BBInterfacer {
int MinSplittingDepth,
int MaxSplittingDepth, int NumSolvers, int LocalPoolSize, float ExploitationPercent,
SPILL_COST_FUNCTION GlobalPoolSCF, int GlobalPoolSort, bool WorkSteal, bool IsTimeoutPerInst,
int timeoutToMemblock, bool isTwoPass);
int timeoutToMemblock, bool isTwoPass, SmallVector<MemAlloc<EnumTreeNode> *, 16> &EnumNodeAllocs,
SmallVector<MemAlloc<CostHistEnumTreeNode> *, 16> &HistNodeAllocs,
SmallVector<MemAlloc<BinHashTblEntry<HistEnumTreeNode>> *, 16> &HashTablAllocs);

~BBMaster();

BBMaster (const BBMaster&) = delete;
BBMaster& operator= (const BBMaster&) = delete;

Enumerator *AllocEnumrtr_(Milliseconds timeout);
Enumerator *AllocEnumrtr_(Milliseconds timeout, SmallVector<MemAlloc<EnumTreeNode> *, 16> &EnumNodeAllocs,
SmallVector<MemAlloc<CostHistEnumTreeNode> *, 16> &HistNodeAllocs,
SmallVector<MemAlloc<BinHashTblEntry<HistEnumTreeNode>> *, 16> &HashTablAllocs) override;


FUNC_RESULT Enumerate_(Milliseconds startTime, Milliseconds rgnTimeout,
Expand Down
22 changes: 12 additions & 10 deletions include/opt-sched/Scheduler/data_dep.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ class DataDepGraph : public llvm::opt_sched::OptSchedDDGWrapperBase,

virtual ~DataDepGraph();


MachineFunction *MF_ = nullptr;

void resetThreadWriteFields(int SolverID = -1, bool full = true);

// Reads the data dependence graph from a text file.
Expand All @@ -203,10 +206,10 @@ class DataDepGraph : public llvm::opt_sched::OptSchedDDGWrapperBase,
float GetWeight() const;

// Given an instruction number, return a pointer to the instruction object.
SchedInstruction *GetInstByIndx(InstCount instIndx);
SchedInstruction *GetInstByIndx(InstCount instIndx) override;

SchedInstruction *GetInstByTplgclOrdr(InstCount ordr);
SchedInstruction *GetInstByRvrsTplgclOrdr(InstCount ordr);
SchedInstruction *GetInstByTplgclOrdr(InstCount ordr) override;
SchedInstruction *GetInstByRvrsTplgclOrdr(InstCount ordr) override;

// Setup the Dep. Graph for scheduling by doing a topological sort
// followed by critical path computation
Expand All @@ -224,8 +227,8 @@ class DataDepGraph : public llvm::opt_sched::OptSchedDDGWrapperBase,
void GetCrntLwrBounds(DIRECTION dir, InstCount crntlwrBounds[], int SolverID);
void SetCrntLwrBounds(DIRECTION dir, InstCount crntlwrBounds[], int SolverID);

SchedInstruction *GetRootInst();
SchedInstruction *GetLeafInst();
SchedInstruction *GetRootInst() override;
SchedInstruction *GetLeafInst() override;

UDT_GLABEL GetMaxLtncySum();
UDT_GLABEL GetMaxLtncy();
Expand Down Expand Up @@ -267,16 +270,16 @@ class DataDepGraph : public llvm::opt_sched::OptSchedDDGWrapperBase,
void CountDeps(InstCount &totDepCnt, InstCount &crossDepCnt, int SolverID);

int GetBscBlkCnt();
bool IsInGraph(SchedInstruction *inst);
InstCount GetInstIndx(SchedInstruction *inst);
bool IsInGraph(SchedInstruction *inst) override;
InstCount GetInstIndx(SchedInstruction *inst) override;
InstCount GetRltvCrtclPath(SchedInstruction *ref, SchedInstruction *inst,
DIRECTION dir);
DIRECTION dir) override;
void SetCrntFrwrdLwrBound(SchedInstruction *inst, int SolverID);
void SetSttcLwrBounds();
void SetDynmcLwrBounds();
void CreateEdge(SchedInstruction *frmNode, SchedInstruction *toNode,
int ltncy, DependenceType depType);
InstCount GetDistFrmLeaf(SchedInstruction *inst, int SolverID = INVALID_VALUE);
InstCount GetDistFrmLeaf(SchedInstruction *inst, int SolverID = INVALID_VALUE) override;

void SetPrblmtc();
bool IsPrblmtc();
Expand Down Expand Up @@ -350,7 +353,6 @@ class DataDepGraph : public llvm::opt_sched::OptSchedDDGWrapperBase,
SmallVector<std::unique_ptr<GraphTrans>, 0> graphTrans_;

MachineModel *machMdl_;
MachineFunction *MF_ = nullptr;

bool backTrackEnbl_;

Expand Down
Loading

0 comments on commit 7bd2798

Please sign in to comment.