Skip to content

Commit 01fd4f2

Browse files
committed
[TapirUtils] Perform basic updates to TaskInfo analysis when serializing detaches, to support serializing nested tasks.
1 parent 41a3d6e commit 01fd4f2

File tree

6 files changed

+151
-28
lines changed

6 files changed

+151
-28
lines changed

llvm/include/llvm/Transforms/Utils/TapirUtils.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ bool MoveStaticAllocasInBlock(BasicBlock *Entry, BasicBlock *Block,
103103

104104
/// Inline any taskframe.resume markers associated with the given taskframe. If
105105
/// \p DT is provided, then it will be updated to reflect the CFG changes.
106-
void InlineTaskFrameResumes(Value *TaskFrame, DominatorTree *DT = nullptr);
106+
void InlineTaskFrameResumes(Value *TaskFrame, DominatorTree *DT = nullptr,
107+
TaskInfo *TI = nullptr);
107108

108109
/// Clone exception-handling blocks EHBlocksToClone, with predecessors
109110
/// EHBlockPreds in a given task. Updates EHBlockPreds to point at the cloned
@@ -131,7 +132,8 @@ void SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry,
131132
SmallPtrSetImpl<LandingPadInst *> *InlinedLPads,
132133
SmallVectorImpl<Instruction *> *DetachedRethrows,
133134
bool ReplaceWithTaskFrame = false,
134-
DominatorTree *DT = nullptr, LoopInfo *LI = nullptr);
135+
DominatorTree *DT = nullptr, TaskInfo *TI = nullptr,
136+
LoopInfo *LI = nullptr);
135137

136138
/// Analyze a task T for serialization. Gets the reattaches, landing pads, and
137139
/// detached rethrows that need special handling during serialization.
@@ -145,7 +147,7 @@ void AnalyzeTaskForSerialization(
145147
/// Serialize the detach DI that spawns task T. If \p DT is provided, then it
146148
/// will be updated to reflect the CFG changes.
147149
void SerializeDetach(DetachInst *DI, Task *T, bool ReplaceWithTaskFrame = false,
148-
DominatorTree *DT = nullptr);
150+
DominatorTree *DT = nullptr, TaskInfo *TI = nullptr);
149151

150152
/// Get the entry basic block to the detached context that contains
151153
/// the specified block.

llvm/include/llvm/Transforms/Utils/TaskSimplify.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class TaskSimplifyPass : public PassInfoMixin<TaskSimplifyPass> {
3131
bool simplifySyncs(Task *T, MaybeParallelTasks &MPTasks);
3232

3333
/// Simplify the specified task T.
34-
bool simplifyTask(Task *T);
34+
bool simplifyTask(Task *T, TaskInfo &TI, DominatorTree &DT);
3535

3636
/// Simplify the taskframes analyzed by TapirTaskInfo TI.
3737
bool simplifyTaskFrames(TaskInfo &TI, DominatorTree &DT);

llvm/lib/Transforms/Tapir/LoopStripMine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,7 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount,
10991099
SerializeDetach(ClonedDI, ParentEntry, EHCont, EHContLPadVal,
11001100
ClonedReattaches, &ClonedEHBlocks, &ClonedEHBlockPreds,
11011101
&ClonedInlinedLPads, &ClonedDetachedRethrows,
1102-
NeedToInsertTaskFrame, DT, LI);
1102+
NeedToInsertTaskFrame, DT, nullptr, LI);
11031103
}
11041104

11051105
// Detach the stripmined loop.

llvm/lib/Transforms/Utils/TapirUtils.cpp

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -387,11 +387,16 @@ class LandingPadInliningInfo {
387387

388388
/// Dominator tree to update.
389389
DominatorTree *DT = nullptr;
390+
391+
/// TaskInfo to update.
392+
TaskInfo *TI = nullptr;
393+
390394
public:
391395
LandingPadInliningInfo(DetachInst *DI, BasicBlock *EHContinue,
392396
Value *LPadValInEHContinue,
393-
DominatorTree *DT = nullptr)
394-
: OuterResumeDest(EHContinue), SpawnerLPad(LPadValInEHContinue), DT(DT) {
397+
DominatorTree *DT = nullptr, TaskInfo *TI = nullptr)
398+
: OuterResumeDest(EHContinue), SpawnerLPad(LPadValInEHContinue), DT(DT),
399+
TI(TI) {
395400
// Find the predecessor block of OuterResumeDest.
396401
BasicBlock *DetachBB = DI->getParent();
397402
BasicBlock *DetachUnwind = DI->getUnwindDest();
@@ -414,9 +419,9 @@ class LandingPadInliningInfo {
414419
}
415420

416421
LandingPadInliningInfo(InvokeInst *TaskFrameResume,
417-
DominatorTree *DT = nullptr)
422+
DominatorTree *DT = nullptr, TaskInfo *TI = nullptr)
418423
: OuterResumeDest(TaskFrameResume->getUnwindDest()),
419-
SpawnerLPad(TaskFrameResume->getLandingPadInst()), DT(DT) {
424+
SpawnerLPad(TaskFrameResume->getLandingPadInst()), DT(DT), TI(TI) {
420425
// If there are PHI nodes in the unwind destination block, we need to keep
421426
// track of which values came into them from the detach before removing the
422427
// edge from this block.
@@ -484,6 +489,8 @@ BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
484489
for (DomTreeNode *I : Children)
485490
DT->changeImmediateDominator(I, NewNode);
486491
}
492+
if (TI)
493+
TI->addBlockToSpindle(*InnerResumeDest, TI->getSpindleFor(OuterResumeDest));
487494

488495
// The number of incoming edges we expect to the inner landing pad.
489496
const unsigned PHICapacity = 2;
@@ -571,11 +578,15 @@ void LandingPadInliningInfo::forwardTaskResume(InvokeInst *TR) {
571578
if (NormalDest) {
572579
for (BasicBlock *Succ : successors(NormalDest))
573580
maybeRemovePredecessor(Succ, NormalDest);
581+
if (TI)
582+
TI->removeBlock(*NormalDest);
574583
NormalDest->eraseFromParent();
575584
}
576585
if (UnwindDest) {
577586
for (BasicBlock *Succ : successors(UnwindDest))
578587
maybeRemovePredecessor(Succ, UnwindDest);
588+
if (TI)
589+
TI->removeBlock(*UnwindDest);
579590
UnwindDest->eraseFromParent();
580591
}
581592
}
@@ -584,8 +595,8 @@ static void handleDetachedLandingPads(
584595
DetachInst *DI, BasicBlock *EHContinue, Value *LPadValInEHContinue,
585596
SmallPtrSetImpl<LandingPadInst *> &InlinedLPads,
586597
SmallVectorImpl<Instruction *> &DetachedRethrows,
587-
DominatorTree *DT = nullptr) {
588-
LandingPadInliningInfo DetUnwind(DI, EHContinue, LPadValInEHContinue, DT);
598+
DominatorTree *DT = nullptr, TaskInfo *TI = nullptr) {
599+
LandingPadInliningInfo DetUnwind(DI, EHContinue, LPadValInEHContinue, DT, TI);
589600

590601
// Append the clauses from the outer landing pad instruction into the inlined
591602
// landing pad instructions.
@@ -815,13 +826,14 @@ static void getTaskFrameLandingPads(
815826
// Helper method to handle a given taskframe.resume.
816827
static void handleTaskFrameResume(Value *TaskFrame,
817828
Instruction *TaskFrameResume,
818-
DominatorTree *DT = nullptr) {
829+
DominatorTree *DT = nullptr,
830+
TaskInfo *TI = nullptr) {
819831
// Get landingpads to inline.
820832
SmallPtrSet<LandingPadInst *, 1> InlinedLPads;
821833
getTaskFrameLandingPads(TaskFrame, TaskFrameResume, InlinedLPads);
822834

823835
InvokeInst *TFR = cast<InvokeInst>(TaskFrameResume);
824-
LandingPadInliningInfo TFResumeDest(TFR, DT);
836+
LandingPadInliningInfo TFResumeDest(TFR, DT, TI);
825837

826838
// Append the clauses from the outer landing pad instruction into the inlined
827839
// landing pad instructions.
@@ -839,7 +851,8 @@ static void handleTaskFrameResume(Value *TaskFrame,
839851
TFResumeDest.forwardTaskResume(TFR);
840852
}
841853

842-
void llvm::InlineTaskFrameResumes(Value *TaskFrame, DominatorTree *DT) {
854+
void llvm::InlineTaskFrameResumes(Value *TaskFrame, DominatorTree *DT,
855+
TaskInfo *TI) {
843856
SmallVector<Instruction *, 1> TaskFrameResumes;
844857
// Record all taskframe.resume markers that use TaskFrame.
845858
for (User *U : TaskFrame->users())
@@ -849,20 +862,20 @@ void llvm::InlineTaskFrameResumes(Value *TaskFrame, DominatorTree *DT) {
849862

850863
// Handle all taskframe.resume markers.
851864
for (Instruction *TFR : TaskFrameResumes)
852-
handleTaskFrameResume(TaskFrame, TFR, DT);
865+
handleTaskFrameResume(TaskFrame, TFR, DT, TI);
853866
}
854867

855868
static void startSerializingTaskFrame(Value *TaskFrame,
856869
SmallVectorImpl<Instruction *> &ToErase,
857-
DominatorTree *DT,
870+
DominatorTree *DT, TaskInfo *TI,
858871
bool PreserveTaskFrame) {
859872
for (User *U : TaskFrame->users())
860873
if (Instruction *UI = dyn_cast<Instruction>(U))
861874
if (isTapirIntrinsic(Intrinsic::taskframe_use, UI))
862875
ToErase.push_back(UI);
863876

864877
if (!PreserveTaskFrame)
865-
InlineTaskFrameResumes(TaskFrame, DT);
878+
InlineTaskFrameResumes(TaskFrame, DT, TI);
866879
}
867880

868881
void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry,
@@ -873,7 +886,9 @@ void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry,
873886
SmallPtrSetImpl<LandingPadInst *> *InlinedLPads,
874887
SmallVectorImpl<Instruction *> *DetachedRethrows,
875888
bool ReplaceWithTaskFrame, DominatorTree *DT,
876-
LoopInfo *LI) {
889+
TaskInfo *TI, LoopInfo *LI) {
890+
LLVM_DEBUG(dbgs() << "Serializing detach " << *DI << "\n");
891+
877892
BasicBlock *Spawner = DI->getParent();
878893
BasicBlock *TaskEntry = DI->getDetached();
879894
BasicBlock *Continue = DI->getContinue();
@@ -885,7 +900,7 @@ void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry,
885900
SmallVector<Instruction *, 8> ToErase;
886901
Value *TaskFrame = getTaskFrameUsed(TaskEntry);
887902
if (TaskFrame)
888-
startSerializingTaskFrame(TaskFrame, ToErase, DT, ReplaceWithTaskFrame);
903+
startSerializingTaskFrame(TaskFrame, ToErase, DT, TI, ReplaceWithTaskFrame);
889904

890905
// Clone any EH blocks that need cloning.
891906
if (EHBlocksToClone) {
@@ -952,7 +967,7 @@ void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry,
952967
} else {
953968
// Otherwise, "inline" the detached landingpads.
954969
handleDetachedLandingPads(DI, EHContinue, LPadValInEHContinue,
955-
*InlinedLPads, *DetachedRethrows, DT);
970+
*InlinedLPads, *DetachedRethrows, DT, TI);
956971
}
957972
}
958973

@@ -1059,7 +1074,7 @@ void llvm::AnalyzeTaskForSerialization(
10591074
/// Serialize the detach DI that spawns task T. If provided, the dominator tree
10601075
/// DT will be updated to reflect the serialization.
10611076
void llvm::SerializeDetach(DetachInst *DI, Task *T, bool ReplaceWithTaskFrame,
1062-
DominatorTree *DT) {
1077+
DominatorTree *DT, TaskInfo *TI) {
10631078
assert(DI && "SerializeDetach given nullptr for detach.");
10641079
assert(DI == T->getDetach() && "Task and detach arguments do not match.");
10651080
SmallVector<BasicBlock *, 4> EHBlocksToClone;
@@ -1078,7 +1093,9 @@ void llvm::SerializeDetach(DetachInst *DI, Task *T, bool ReplaceWithTaskFrame,
10781093
}
10791094
SerializeDetach(DI, T->getParentTask()->getEntry(), EHContinue, LPadVal,
10801095
Reattaches, &EHBlocksToClone, &EHBlockPreds, &InlinedLPads,
1081-
&DetachedRethrows, ReplaceWithTaskFrame, DT);
1096+
&DetachedRethrows, ReplaceWithTaskFrame, DT, TI);
1097+
if (TI)
1098+
TI->moveSpindlesToParent(T);
10821099
}
10831100

10841101
static bool isCanonicalTaskFrameEnd(const Instruction *TFEnd) {

llvm/lib/Transforms/Utils/TaskSimplify.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -238,10 +238,11 @@ static bool detachImmediatelySyncs(DetachInst *DI) {
238238
return isa<SyncInst>(I);
239239
}
240240

241-
bool llvm::simplifyTask(Task *T) {
241+
bool llvm::simplifyTask(Task *T, TaskInfo &TI, DominatorTree &DT) {
242242
if (T->isRootTask())
243243
return false;
244244

245+
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
245246
LLVM_DEBUG(dbgs() << "Simplifying task @ " << T->getEntry()->getName()
246247
<< "\n");
247248

@@ -254,7 +255,8 @@ bool llvm::simplifyTask(Task *T) {
254255
// destination from T's detach.
255256
if (DI->hasUnwindDest()) {
256257
if (!taskCanThrow(T)) {
257-
removeUnwindEdge(DI->getParent());
258+
LLVM_DEBUG(dbgs() << "Removing unwind edge of " << *DI << "\n");
259+
removeUnwindEdge(DI->getParent(), &DTU);
258260
// removeUnwindEdge will invalidate the DI pointer. Get the new DI
259261
// pointer.
260262
DI = T->getDetach();
@@ -263,13 +265,17 @@ bool llvm::simplifyTask(Task *T) {
263265
}
264266

265267
if (!taskCanReachContinuation(T)) {
268+
LLVM_DEBUG(dbgs() << "Task cannot reach continuation. Serializing " << *DI
269+
<< "\n");
266270
// This optimization assumes that if a task cannot reach its continuation
267271
// then we shouldn't bother spawning it. The task might perform code that
268272
// can reach the unwind destination, however.
269-
SerializeDetach(DI, T, NestedSync);
273+
SerializeDetach(DI, T, NestedSync, &DT, &TI);
270274
Changed = true;
271275
} else if (!PreserveAllSpawns && detachImmediatelySyncs(DI)) {
272-
SerializeDetach(DI, T, NestedSync);
276+
LLVM_DEBUG(dbgs() << "Detach immediately syncs. Serializing " << *DI
277+
<< "\n");
278+
SerializeDetach(DI, T, NestedSync, &DT, &TI);
273279
Changed = true;
274280
}
275281

@@ -651,7 +657,7 @@ bool TaskSimplify::runOnFunction(Function &F) {
651657

652658
// Simplify each task in the function.
653659
for (Task *T : post_order(TI.getRootTask()))
654-
Changed |= simplifyTask(T);
660+
Changed |= simplifyTask(T, TI, DT);
655661

656662
if (PostCleanupCFG && (Changed | SplitBlocks))
657663
Changed |= simplifyFunctionCFG(F, TTI, nullptr, Options);
@@ -729,7 +735,7 @@ PreservedAnalyses TaskSimplifyPass::run(Function &F,
729735

730736
// Simplify each task in the function.
731737
for (Task *T : post_order(TI.getRootTask()))
732-
Changed |= simplifyTask(T);
738+
Changed |= simplifyTask(T, TI, DT);
733739

734740
if (PostCleanupCFG && (Changed | SplitBlocks))
735741
Changed |= simplifyFunctionCFG(F, TTI, nullptr, Options);
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; Check that nested detaches can be serialized.
2+
;
3+
; RUN: opt < %s -passes="function<eager-inv>(task-simplify)" -S | FileCheck %s
4+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5+
target triple = "arm64-apple-macosx15.0.0"
6+
7+
; Function Attrs: nounwind willreturn memory(argmem: readwrite)
8+
declare token @llvm.syncregion.start() #0
9+
10+
; Function Attrs: willreturn memory(argmem: readwrite)
11+
declare void @llvm.sync.unwind(token) #1
12+
13+
; Function Attrs: willreturn memory(argmem: readwrite)
14+
declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #1
15+
16+
; Function Attrs: nounwind willreturn memory(argmem: readwrite)
17+
declare token @llvm.taskframe.create() #0
18+
19+
; CHECK: define void @_ZNK5Graph17pbfs_walk_PennantEP7PennantIiERH3BagIiEjPj()
20+
; CHECK-NEXT: entry:
21+
; CHECK-NOT: detach within
22+
; CHECK: unreachable
23+
24+
define void @_ZNK5Graph17pbfs_walk_PennantEP7PennantIiERH3BagIiEjPj() personality ptr null {
25+
entry:
26+
%syncreg = tail call token @llvm.syncregion.start()
27+
%syncreg45 = tail call token @llvm.syncregion.start()
28+
%0 = tail call token @llvm.tapir.runtime.start()
29+
detach within %syncreg45, label %pfor.body.entry.tf, label %pfor.inc unwind label %lpad59.loopexit
30+
31+
pfor.body.entry.tf: ; preds = %entry
32+
%tf.i = tail call token @llvm.taskframe.create()
33+
%syncreg.i = tail call token @llvm.syncregion.start()
34+
detach within %syncreg.i, label %pfor.cond.i.strpm.detachloop.entry, label %pfor.cond.cleanup.i unwind label %lpad4924.loopexit.split-lp
35+
36+
pfor.cond.i.strpm.detachloop.entry: ; preds = %pfor.body.entry.tf
37+
%syncreg.i.strpm.detachloop = tail call token @llvm.syncregion.start()
38+
detach within none, label %pfor.body.entry.i.strpm.outer.1, label %pfor.inc.i.strpm.outer.1 unwind label %lpad4924.loopexit.strpm
39+
40+
pfor.body.entry.i.strpm.outer.1: ; preds = %pfor.cond.i.strpm.detachloop.entry
41+
invoke void @llvm.detached.rethrow.sl_p0i32s(token none, { ptr, i32 } zeroinitializer)
42+
to label %lpad4924.unreachable unwind label %lpad4924.loopexit.strpm
43+
44+
pfor.inc.i.strpm.outer.1: ; preds = %pfor.cond.i.strpm.detachloop.entry
45+
sync within none, label %pfor.cond.i.strpm.detachloop.reattach.split
46+
47+
pfor.cond.i.strpm.detachloop.reattach.split: ; preds = %pfor.inc.i.strpm.outer.1
48+
reattach within %syncreg.i, label %pfor.cond.cleanup.i
49+
50+
pfor.cond.cleanup.i: ; preds = %pfor.cond.i.strpm.detachloop.reattach.split, %pfor.body.entry.tf
51+
sync within %syncreg.i, label %sync.continue.i
52+
53+
sync.continue.i: ; preds = %pfor.cond.cleanup.i
54+
invoke void @llvm.sync.unwind(token none)
55+
to label %pfor.preattach unwind label %lpad4924.loopexit.split-lp
56+
57+
lpad4924.loopexit.strpm: ; preds = %pfor.body.entry.i.strpm.outer.1, %pfor.cond.i.strpm.detachloop.entry
58+
%lpad.strpm = landingpad { ptr, i32 }
59+
cleanup
60+
invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg.i, { ptr, i32 } zeroinitializer)
61+
to label %lpad4924.loopexit.strpm.unreachable unwind label %lpad4924.loopexit.split-lp
62+
63+
lpad4924.loopexit.strpm.unreachable: ; preds = %lpad4924.loopexit.strpm
64+
unreachable
65+
66+
lpad4924.loopexit.split-lp: ; preds = %lpad4924.loopexit.strpm, %sync.continue.i, %pfor.body.entry.tf
67+
%lpad.loopexit.split-lp = landingpad { ptr, i32 }
68+
cleanup
69+
call void @llvm.detached.rethrow.sl_p0i32s(token none, { ptr, i32 } zeroinitializer)
70+
unreachable
71+
72+
lpad4924.unreachable: ; preds = %pfor.body.entry.i.strpm.outer.1
73+
unreachable
74+
75+
pfor.preattach: ; preds = %sync.continue.i
76+
reattach within %syncreg45, label %pfor.inc
77+
78+
pfor.inc: ; preds = %pfor.preattach, %entry
79+
ret void
80+
81+
lpad59.loopexit: ; preds = %entry
82+
%lpad.loopexit28 = landingpad { ptr, i32 }
83+
cleanup
84+
tail call void @llvm.tapir.runtime.end(token %0)
85+
resume { ptr, i32 } zeroinitializer
86+
}
87+
88+
; Function Attrs: nounwind willreturn memory(argmem: readwrite)
89+
declare token @llvm.tapir.runtime.start() #0
90+
91+
; Function Attrs: nounwind willreturn memory(argmem: readwrite)
92+
declare void @llvm.tapir.runtime.end(token) #0
93+
94+
; uselistorder directives
95+
uselistorder ptr null, { 1, 2, 0 }
96+
97+
attributes #0 = { nounwind willreturn memory(argmem: readwrite) }
98+
attributes #1 = { willreturn memory(argmem: readwrite) }

0 commit comments

Comments
 (0)