From 1e8b750c0437944c0bdc299b818e94c970df9c19 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 26 Jan 2024 10:03:49 +0100 Subject: [PATCH] JIT: Factor loop duplication code (#97506) Factor the loop duplication code out of loop cloning and loop unrolling in anticipation of also using it in loop peeling. --- src/coreclr/jit/compiler.h | 4 + src/coreclr/jit/compiler.hpp | 35 +++-- src/coreclr/jit/flowgraph.cpp | 170 ++++++++++++++++++++ src/coreclr/jit/loopcloning.cpp | 191 ++-------------------- src/coreclr/jit/optimizer.cpp | 270 ++++++++++++-------------------- 5 files changed, 314 insertions(+), 356 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9bf467fb7f1e4..ee779d375939d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2188,6 +2188,9 @@ class FlowGraphNaturalLoop bool HasDef(unsigned lclNum); + bool CanDuplicate(INDEBUG(const char** reason)); + void Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale, bool bottomNeedsRedirection); + #ifdef DEBUG static void Dump(FlowGraphNaturalLoop* loop); #endif // DEBUG @@ -6785,6 +6788,7 @@ class Compiler void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) bool optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR); + void optRedirectPrevUnrollIteration(FlowGraphNaturalLoop* loop, BasicBlock* prevTestBlock, BasicBlock* target); void optReplaceScalarUsesWithConst(BasicBlock* block, unsigned lclNum, ssize_t cnsVal); void optRemoveRedundantZeroInits(); PhaseStatus optIfConversion(); // If conversion diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index c9ab69f198cbc..22a36940820ee 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4893,27 +4893,40 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocks(TFunc func) template BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) { - BasicBlock* top = m_header; - BasicBlock* bottom = m_header; + BasicBlock* top = m_header; + unsigned numLoopBlocks = 0; VisitLoopBlocks([&](BasicBlock* block) { if (block->bbNum < top->bbNum) + { top = block; - if (block->bbNum > bottom->bbNum) - bottom = block; + } + + numLoopBlocks++; return BasicBlockVisit::Continue; }); - BasicBlock* block = top; - while (true) + INDEBUG(BasicBlock* prev = nullptr); + BasicBlock* cur = top; + while (numLoopBlocks > 0) { - if (ContainsBlock(block) && (func(block) == BasicBlockVisit::Abort)) - return BasicBlockVisit::Abort; + // If we run out of blocks the blocks aren't sequential. + assert(cur != nullptr); - if (block == bottom) - return BasicBlockVisit::Continue; + if (ContainsBlock(cur)) + { + assert((prev == nullptr) || (prev->bbNum < cur->bbNum)); - block = block->Next(); + if (func(cur) == BasicBlockVisit::Abort) + return BasicBlockVisit::Abort; + + INDEBUG(prev = cur); + numLoopBlocks--; + } + + cur = cur->Next(); } + + return BasicBlockVisit::Continue; } /*****************************************************************************/ diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index d475ff7ec1c2b..9837027af31fd 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5409,6 +5409,176 @@ bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) return !result; } +//------------------------------------------------------------------------ +// CanDuplicate: Check if this loop can be duplicated. +// +// Parameters: +// reason - If this function returns false, the reason why. +// +// Returns: +// True if the loop can be duplicated. +// +// Remarks: +// We currently do not support duplicating loops with EH constructs in them. +// +bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) +{ +#ifdef DEBUG + const char* localReason; + if (reason == nullptr) + { + reason = &localReason; + } +#endif + + Compiler* comp = m_dfsTree->GetCompiler(); + BasicBlockVisit result = VisitLoopBlocks([=](BasicBlock* block) { + if (comp->bbIsTryBeg(block)) + { + INDEBUG(*reason = "Loop has a `try` begin"); + return BasicBlockVisit::Abort; + } + + return BasicBlockVisit::Continue; + }); + + return result != BasicBlockVisit::Abort; +} + +//------------------------------------------------------------------------ +// Duplicate: Duplicate the blocks of this loop, inserting them after `insertAfter`. +// +// Parameters: +// insertAfter - [in, out] Block to insert duplicated blocks after; updated to last block inserted. +// map - A map that will have mappings from loop blocks to duplicated blocks added to it. +// weightScale - Factor to scale weight of new blocks by +// bottomNeedsRedirection - Whether or not to insert a redirection block for the bottom block in case of fallthrough +// +// Remarks: +// Due to fallthrough this block may need to insert blocks with no +// corresponding source block in "map". +// +void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, + BlockToBlockMap* map, + weight_t weightScale, + bool bottomNeedsRedirection) +{ + assert(CanDuplicate(nullptr)); + + Compiler* comp = m_dfsTree->GetCompiler(); + + BasicBlock* bottom = GetLexicallyBottomMostBlock(); + + VisitLoopBlocksLexical([=](BasicBlock* blk) { + // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later + // with BasicBlock::CopyTarget(). + BasicBlock* newBlk = comp->fgNewBBafter(BBJ_ALWAYS, *insertAfter, /*extendRegion*/ true); + JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, + (*insertAfter)->bbNum); + + BasicBlock::CloneBlockState(comp, newBlk, blk); + + // We're going to create the preds below, which will set the bbRefs properly, + // so clear out the cloned bbRefs field. + newBlk->bbRefs = 0; + + newBlk->scaleBBWeight(weightScale); + + // If the loop we're cloning contains nested loops, we need to clear the pre-header bit on + // any nested loop pre-header blocks, since they will no longer be loop pre-headers. + // + // TODO-Cleanup: BBF_LOOP_PREHEADER can be removed; we do not attempt + // to keep it up to date anymore when we do FG changes. + // + if (newBlk->HasFlag(BBF_LOOP_PREHEADER)) + { + JITDUMP("Removing BBF_LOOP_PREHEADER flag from nested cloned loop block " FMT_BB "\n", newBlk->bbNum); + newBlk->RemoveFlags(BBF_LOOP_PREHEADER); + } + + *insertAfter = newBlk; + map->Set(blk, newBlk, BlockToBlockMap::Overwrite); + + // If the block falls through to a block outside the loop then we may + // need to insert a new block to redirect. + // Skip this once we get to the bottom block if our cloned version is + // going to fall into the right version anyway. + if (blk->bbFallsThrough() && !ContainsBlock(blk->Next()) && ((blk != bottom) || bottomNeedsRedirection)) + { + if (blk->KindIs(BBJ_COND)) + { + BasicBlock* targetBlk = blk->GetFalseTarget(); + assert(blk->NextIs(targetBlk)); + + // Need to insert a block. + BasicBlock* newRedirBlk = + comp->fgNewBBafter(BBJ_ALWAYS, *insertAfter, /* extendRegion */ true, targetBlk); + newRedirBlk->copyEHRegion(*insertAfter); + newRedirBlk->bbWeight = blk->Next()->bbWeight; + newRedirBlk->CopyFlags(blk->Next(), (BBF_RUN_RARELY | BBF_PROF_WEIGHT)); + newRedirBlk->scaleBBWeight(weightScale); + + JITDUMP(FMT_BB " falls through to " FMT_BB "; inserted redirection block " FMT_BB "\n", blk->bbNum, + blk->Next()->bbNum, newRedirBlk->bbNum); + // This block isn't part of the loop, so below loop won't add + // refs for it. + comp->fgAddRefPred(targetBlk, newRedirBlk); + *insertAfter = newRedirBlk; + } + else + { + assert(!"Cannot handle fallthrough"); + } + } + + return BasicBlockVisit::Continue; + }); + + // Now go through the new blocks, remapping their jump targets within the loop + // and updating the preds lists. + VisitLoopBlocks([=](BasicBlock* blk) { + BasicBlock* newBlk = nullptr; + bool b = map->Lookup(blk, &newBlk); + assert(b && newBlk != nullptr); + + // Jump target should not be set yet + assert(!newBlk->HasInitializedTarget()); + + // First copy the jump destination(s) from "blk". + newBlk->CopyTarget(comp, blk); + + // Now redirect the new block according to "blockMap". + comp->optRedirectBlock(newBlk, map); + + // Add predecessor edges for the new successors, as well as the fall-through paths. + switch (newBlk->GetKind()) + { + case BBJ_ALWAYS: + case BBJ_CALLFINALLY: + case BBJ_CALLFINALLYRET: + comp->fgAddRefPred(newBlk->GetTarget(), newBlk); + break; + + case BBJ_COND: + comp->fgAddRefPred(newBlk->GetFalseTarget(), newBlk); + comp->fgAddRefPred(newBlk->GetTrueTarget(), newBlk); + break; + + case BBJ_SWITCH: + for (BasicBlock* const switchDest : newBlk->SwitchTargets()) + { + comp->fgAddRefPred(switchDest, newBlk); + } + break; + + default: + break; + } + + return BasicBlockVisit::Continue; + }); +} + //------------------------------------------------------------------------ // IterConst: Get the constant with which the iterator is modified // diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 730eb91738f67..8955d53528b75 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1778,36 +1778,20 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c return false; } - // Make sure the loop doesn't have any embedded exception handling. - // Walk the loop blocks from lexically first to lexically last (all blocks in this region must be - // part of the loop), looking for a `try` begin block. Note that a loop must entirely contain any - // EH region, or be itself entirely contained within an EH region. Thus, looking just for a `try` - // begin is sufficient; there is no need to look for other EH constructs, such as a `catch` begin. - // - // TODO: this limitation could be removed if we do the work to insert new EH regions in the exception table, - // for the cloned loop (and its embedded EH regions). - // - BasicBlockVisit result = loop->VisitLoopBlocks([=](BasicBlock* blk) { - if (bbIsTryBeg(blk)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has a `try` begin.\n", loop->GetIndex()); - return BasicBlockVisit::Abort; - } - - return BasicBlockVisit::Continue; - }); - - if (result == BasicBlockVisit::Abort) + INDEBUG(const char* reason); + if (!loop->CanDuplicate(INDEBUG(&reason))) { + JITDUMP("Loop cloning: rejecting loop " FMT_LP ": %s\n", loop->GetIndex(), reason); return false; } #ifdef DEBUG - // With the EH constraint above verified it is not possible for - // BBJ_RETURN blocks to be part of the loop; a BBJ_RETURN block can - // only be part of the loop if its exceptional flow can reach the - // header, but that would require the handler to also be part of - // the loop, which guarantees that the loop contains two distinct + // Today we will never see any BBJ_RETURN blocks because we cannot + // duplicate loops with EH in them. When we have no try-regions that start + // in the loop it is not possible for BBJ_RETURN blocks to be part of the + // loop; a BBJ_RETURN block can only be part of the loop if its exceptional + // flow can reach the header, but that would require the handler to also be + // part of the loop, which guarantees that the loop contains two distinct // EH regions. loop->VisitLoopBlocks([](BasicBlock* block) { assert(!block->KindIs(BBJ_RETURN)); @@ -1830,7 +1814,7 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c assert(loop->EntryEdges().size() == 1); BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); - // If the head and entry are in different EH regions, reject. + // If the preheader and header are in different EH regions, reject. if (!BasicBlock::sameEHRegion(preheader, loop->GetHeader())) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Preheader and header blocks are in different EH regions.\n", @@ -1838,23 +1822,6 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c return false; } - // Is the first block after the last block of the loop a handler or filter start? - // Usually, we create a dummy block after the original loop, to skip over the loop clone - // and go to where the original loop did. That raises problems when we don't actually go to - // that block; this is one of those cases. This could be fixed fairly easily; for example, - // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the - // loop. This is just a corner to cut to get this working faster. - // TODO: Should rework this to avoid the lexically bottom most block here. - BasicBlock* bottom = loop->GetLexicallyBottomMostBlock(); - - BasicBlock* bbAfterLoop = bottom->Next(); - if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Next block after bottom is a handler start.\n", - loop->GetIndex()); - return false; - } - assert(!requireIterable || !lvaVarAddrExposed(iterInfo->IterVar)); if (requireIterable) @@ -1961,12 +1928,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex #ifdef DEBUG if (verbose) { - printf("\nCloning loop " FMT_LP " with blocks:\n", loop->GetIndex()); - - loop->VisitLoopBlocksReversePostOrder([](BasicBlock* block) { - printf(" " FMT_BB "\n", block->bbNum); - return BasicBlockVisit::Continue; - }); + printf("\nCloning "); + FlowGraphNaturalLoop::Dump(loop); } #endif @@ -2078,139 +2041,17 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - loop->VisitLoopBlocksLexical([=, &newPred](BasicBlock* blk) { - // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later - // with BasicBlock::CopyTarget(). - BasicBlock* newBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); - JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); - - BasicBlock::CloneBlockState(this, newBlk, blk); - - // We're going to create the preds below, which will set the bbRefs properly, - // so clear out the cloned bbRefs field. - newBlk->bbRefs = 0; - - newBlk->scaleBBWeight(slowPathWeightScaleFactor); - blk->scaleBBWeight(fastPathWeightScaleFactor); - - // TODO: scale the pred edges of `blk`? - - // If the loop we're cloning contains nested loops, we need to clear the pre-header bit on - // any nested loop pre-header blocks, since they will no longer be loop pre-headers. - // - // TODO-Cleanup: BBF_LOOP_PREHEADER can be removed; we do not attempt - // to keep it up to date anymore when we do FG changes. - // - if (newBlk->HasFlag(BBF_LOOP_PREHEADER)) - { - JITDUMP("Removing BBF_LOOP_PREHEADER flag from nested cloned loop block " FMT_BB "\n", newBlk->bbNum); - newBlk->RemoveFlags(BBF_LOOP_PREHEADER); - } - - newBlk->RemoveFlags(BBF_OLD_LOOP_HEADER_QUIRK); - - newPred = newBlk; - blockMap->Set(blk, newBlk); - - // If the block falls through to a block outside the loop then we may - // need to insert a new block to redirect. - // Skip this for the bottom block; we duplicate the slow loop such that - // the bottom block will fall through to the bottom's original next. - if ((blk != bottom) && blk->bbFallsThrough() && !loop->ContainsBlock(blk->Next())) - { - if (blk->KindIs(BBJ_COND)) - { - BasicBlock* targetBlk = blk->GetFalseTarget(); - assert(blk->NextIs(targetBlk)); - - // Need to insert a block. - BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, targetBlk); - newRedirBlk->copyEHRegion(newPred); - newRedirBlk->bbWeight = blk->Next()->bbWeight; - newRedirBlk->CopyFlags(blk->Next(), (BBF_RUN_RARELY | BBF_PROF_WEIGHT)); - newRedirBlk->scaleBBWeight(slowPathWeightScaleFactor); - - JITDUMP(FMT_BB " falls through to " FMT_BB "; inserted redirection block " FMT_BB "\n", blk->bbNum, - blk->Next()->bbNum, newRedirBlk->bbNum); - // This block isn't part of the loop, so below loop won't add - // refs for it. - fgAddRefPred(targetBlk, newRedirBlk); - newPred = newRedirBlk; - } - else - { - assert(!"Cannot handle fallthrough"); - } - } + loop->Duplicate(&newPred, blockMap, slowPathWeightScaleFactor, /* bottomNeedsRedirection */ false); + // Scale old blocks to the fast path weight. + loop->VisitLoopBlocks([=](BasicBlock* block) { + block->scaleBBWeight(fastPathWeightScaleFactor); return BasicBlockVisit::Continue; }); // Perform the static optimizations on the fast path. optPerformStaticOptimizations(loop, context DEBUGARG(true)); - // Now go through the new blocks, remapping their jump targets within the loop - // and updating the preds lists. - loop->VisitLoopBlocks([=](BasicBlock* blk) { - BasicBlock* newblk = nullptr; - bool b = blockMap->Lookup(blk, &newblk); - assert(b && newblk != nullptr); - - // Jump target should not be set yet - assert(!newblk->HasInitializedTarget()); - - // First copy the jump destination(s) from "blk". - newblk->CopyTarget(this, blk); - - // Now redirect the new block according to "blockMap". - optRedirectBlock(newblk, blockMap); - - // Add predecessor edges for the new successors, as well as the fall-through paths. - switch (newblk->GetKind()) - { - case BBJ_ALWAYS: - case BBJ_CALLFINALLY: - case BBJ_CALLFINALLYRET: - fgAddRefPred(newblk->GetTarget(), newblk); - break; - - case BBJ_COND: - fgAddRefPred(newblk->GetFalseTarget(), newblk); - fgAddRefPred(newblk->GetTrueTarget(), newblk); - break; - - case BBJ_SWITCH: - for (BasicBlock* const switchDest : newblk->SwitchTargets()) - { - fgAddRefPred(switchDest, newblk); - } - break; - - default: - break; - } - - return BasicBlockVisit::Continue; - }); - -#ifdef DEBUG - // Display the preds for the new blocks, after all the new blocks have been redirected. - JITDUMP("Preds after loop copy:\n"); - loop->VisitLoopBlocksReversePostOrder([=](BasicBlock* blk) { - BasicBlock* newblk = nullptr; - bool b = blockMap->Lookup(blk, &newblk); - assert(b && newblk != nullptr); - JITDUMP(FMT_BB ":", newblk->bbNum); - for (BasicBlock* const predBlock : newblk->PredBlocks()) - { - JITDUMP(" " FMT_BB, predBlock->bbNum); - } - JITDUMP("\n"); - - return BasicBlockVisit::Continue; - }); -#endif // DEBUG - // Insert the loop choice conditions. We will create the following structure: // // [preheader] diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 492b317ef5ece..26b35a4d3d1a4 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1478,26 +1478,22 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) } // clang-format on - // After this point, assume we've changed the IR. In particular, we call gtSetStmtInfo() which - // can modify the IR. We may still fail to unroll if the EH region conditions don't hold, or if - // the size heuristics don't succeed. + INDEBUG(const char* reason); + if (!loop->CanDuplicate(INDEBUG(&reason))) + { + JITDUMP("Failed to unroll loop " FMT_LP ": %s\n", loop->GetIndex(), reason); + return false; + } + + // After this point, assume we've changed the IR. In particular, we call + // gtSetStmtInfo() which can modify the IR. *changedIR = true; // Heuristic: Estimated cost in code size of the unrolled loop. ClrSafeInt loopCostSz; // Cost is size of one iteration - BasicBlockVisit result = loop->VisitLoopBlocksReversePostOrder([=, &loopCostSz](BasicBlock* block) { - - if (!BasicBlock::sameEHRegion(block, loop->GetHeader())) - { - // Unrolling would require cloning EH regions - // Note that only non-funclet model (x86) could actually have a loop including a handler - // but not it's corresponding `try`, if its `try` was moved due to being marked "rare". - JITDUMP("Failed to unroll loop " FMT_LP ": EH constraint\n", loop->GetIndex()); - return BasicBlockVisit::Abort; - } - + loop->VisitLoopBlocksReversePostOrder([=, &loopCostSz](BasicBlock* block) { for (Statement* const stmt : block->Statements()) { gtSetStmtInfo(stmt); @@ -1507,17 +1503,13 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) return BasicBlockVisit::Continue; }); - if (result == BasicBlockVisit::Abort) - { - return false; - } - #ifdef DEBUG - // With the EH constraint above verified it is not possible for - // BBJ_RETURN blocks to be part of the loop; a BBJ_RETURN block can - // only be part of the loop if its exceptional flow can reach the - // header, but that would require the handler to also be part of - // the loop, which guarantees that the loop contains two distinct + // Today we will never see any BBJ_RETURN blocks because we cannot + // duplicate loops with EH in them. When we have no try-regions that start + // in the loop it is not possible for BBJ_RETURN blocks to be part of the + // loop; a BBJ_RETURN block can only be part of the loop if its exceptional + // flow can reach the header, but that would require the handler to also be + // part of the loop, which guarantees that the loop contains two distinct // EH regions. loop->VisitLoopBlocks([](BasicBlock* block) { assert(!block->KindIs(BBJ_RETURN)); @@ -1586,153 +1578,51 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) BasicBlock* exit = loop->ContainsBlock(exiting->GetTrueTarget()) ? exiting->GetFalseTarget() : exiting->GetTrueTarget(); - // If the bottom block falls out of the loop, then insert an - // explicit block to branch around the unrolled iterations we are - // going to create. + // If the original bottom block was falling out of the loop, then insert an + // explicit block to branch around the unrolled iterations we created. if (bottom->KindIs(BBJ_COND)) { // TODO-NoFallThrough: Shouldn't need new BBJ_ALWAYS block once bbFalseTarget can diverge from bbNext - BasicBlock* bottomNext = bottom->Next(); - assert(bottom->FalseTargetIs(bottomNext)); + BasicBlock* bottomFalseTarget = bottom->GetFalseTarget(); JITDUMP("Create branch around unrolled loop\n"); - BasicBlock* bottomRedirBlk = fgNewBBafter(BBJ_ALWAYS, bottom, /*extendRegion*/ true, bottomNext); + BasicBlock* bottomRedirBlk = fgNewBBafter(BBJ_ALWAYS, bottom, /*extendRegion*/ true, bottomFalseTarget); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", bottomRedirBlk->bbNum, bottom->bbNum); bottom->SetFalseTarget(bottomRedirBlk); fgAddRefPred(bottomRedirBlk, bottom); JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", bottom->bbNum, bottomRedirBlk->bbNum); - fgReplacePred(bottomNext, bottom, bottomRedirBlk); - JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", bottom->bbNum, bottomNext->bbNum, - bottomRedirBlk->bbNum, bottomNext->bbNum); + fgReplacePred(bottomFalseTarget, bottom, bottomRedirBlk); + JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", bottom->bbNum, + bottomFalseTarget->bbNum, bottomRedirBlk->bbNum, bottomFalseTarget->bbNum); insertAfter = bottomRedirBlk; } for (int lval = lbeg; iterToUnroll > 0; iterToUnroll--) { - BasicBlock* testBlock = nullptr; - loop->VisitLoopBlocksLexical([&](BasicBlock* block) { - - // Don't set a jump target for now. - // BasicBlock::CopyTarget() will fix the jump kind/target in the loop below. - BasicBlock* newBlock = fgNewBBafter(BBJ_ALWAYS, insertAfter, /*extendRegion*/ true); - insertAfter = newBlock; - - blockMap.Set(block, newBlock, BlockToBlockMap::Overwrite); - - // Now clone block state and statements from `from` block to `to` block. - // - BasicBlock::CloneBlockState(this, newBlock, block); - - optReplaceScalarUsesWithConst(newBlock, lvar, lval); - - newBlock->RemoveFlags(BBF_OLD_LOOP_HEADER_QUIRK); - - // Block weight should no longer have the loop multiplier - // - // Note this is not quite right, as we may not have upscaled by this amount - // and we might not have upscaled at all, if we had profile data. - // - newBlock->scaleBBWeight(1.0 / BB_LOOP_WEIGHT_SCALE); - - // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them. - assert(newBlock->KindIs(BBJ_ALWAYS)); - assert(!newBlock->HasInitializedTarget()); - - if (block == iterInfo.TestBlock) - { - // Remove the test; we're doing a full unroll. - - Statement* testCopyStmt = newBlock->lastStmt(); - GenTree* testCopyExpr = testCopyStmt->GetRootNode(); - assert(testCopyExpr->gtOper == GT_JTRUE); - GenTree* sideEffList = nullptr; - gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF); - if (sideEffList == nullptr) - { - fgRemoveStmt(newBlock, testCopyStmt); - } - else - { - testCopyStmt->SetRootNode(sideEffList); - } - - // Save the test block of the previously unrolled - // iteration, so that we can redirect it when we create - // the next iteration (or to the exit for the last - // iteration). - assert(testBlock == nullptr); - testBlock = newBlock; - } - else if (block->bbFallsThrough() && !loop->ContainsBlock(block->Next())) - { - assert(block->KindIs(BBJ_COND) && "Cannot handle fallthrough for non BBJ_COND block"); - // Handle fallthrough. - // TODO-Quirk: Skip empty blocks and go directly to their destination. - BasicBlock* targetBlk = block->Next(); - if (targetBlk->KindIs(BBJ_ALWAYS) && targetBlk->isEmpty()) - targetBlk = targetBlk->GetTarget(); - - BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, insertAfter, /* extendRegion */ true, targetBlk); - newRedirBlk->copyEHRegion(insertAfter); - newRedirBlk->bbWeight = block->Next()->bbWeight; - newRedirBlk->CopyFlags(block->Next(), BBF_RUN_RARELY | BBF_PROF_WEIGHT); - newRedirBlk->scaleBBWeight(1.0 / BB_LOOP_WEIGHT_SCALE); - - fgAddRefPred(targetBlk, newRedirBlk); - insertAfter = newRedirBlk; - } - - return BasicBlockVisit::Continue; - }); - - assert(testBlock != nullptr); + // Block weight should no longer have the loop multiplier + // + // Note this is not quite right, as we may not have upscaled by this amount + // and we might not have upscaled at all, if we had profile data. + // + weight_t scaleWeight = 1.0 / BB_LOOP_WEIGHT_SCALE; + loop->Duplicate(&insertAfter, &blockMap, scaleWeight, /* bottomNeedsRedirection */ true); - // Now redirect any branches within the newly-cloned iteration. + // Replace all uses of the loop iterator with the current value. loop->VisitLoopBlocks([=, &blockMap](BasicBlock* block) { - // Do not include the test block; we will redirect it on - // the next iteration or after the loop. - if (block == iterInfo.TestBlock) - { - return BasicBlockVisit::Continue; - } - - // Jump kind/target should not be set yet - BasicBlock* newBlock = blockMap[block]; - assert(!newBlock->HasInitializedTarget()); - - // Now copy the jump kind/target - newBlock->CopyTarget(this, block); - optRedirectBlock(newBlock, &blockMap, RedirectBlockOption::AddToPredLists); - + optReplaceScalarUsesWithConst(blockMap[block], lvar, lval); return BasicBlockVisit::Continue; }); - // Redirect previous iteration (or entry) to this iteration. - if (prevTestBlock != nullptr) - { - // Redirect exit edge from previous iteration to new entry. - assert(prevTestBlock->KindIs(BBJ_ALWAYS)); - BasicBlock* newHeader = blockMap[loop->GetHeader()]; - prevTestBlock->SetTarget(newHeader); - fgAddRefPred(newHeader, prevTestBlock); - - JITDUMP("Redirecting previously created exiting " FMT_BB " -> " FMT_BB " (unrolled iteration header)\n", - prevTestBlock->bbNum, newHeader->bbNum); - } - else - { - // Redirect all predecessors to the new one. - for (FlowEdge* enterEdge : loop->EntryEdges()) - { - BasicBlock* entering = enterEdge->getSourceBlock(); - JITDUMP("Redirecting " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", entering->bbNum, - loop->GetHeader()->bbNum, entering->bbNum, blockMap[loop->GetHeader()]->bbNum); - assert(!entering->KindIs(BBJ_COND)); // Ensured by canonicalization - optRedirectBlock(entering, &blockMap, Compiler::RedirectBlockOption::UpdatePredLists); - } - } + // Remove the test we created in the duplicate; we're doing a full unroll. + BasicBlock* testBlock = blockMap[iterInfo.TestBlock]; + + optRedirectPrevUnrollIteration(loop, prevTestBlock, blockMap[loop->GetHeader()]); + // Save the test block of the previously unrolled + // iteration, so that we can redirect it when we create + // the next iteration (or to the exit for the last + // iteration). prevTestBlock = testBlock; // update the new value for the unrolled iterator @@ -1761,26 +1651,7 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) // loop invariant is true on every iteration. That means we have a // guarding check before we enter the loop that will always be // false. - if (prevTestBlock != nullptr) - { - assert(prevTestBlock->KindIs(BBJ_ALWAYS)); - prevTestBlock->SetTarget(exit); - fgAddRefPred(exit, prevTestBlock); - JITDUMP("Redirecting final iteration exiting " FMT_BB " to original exit " FMT_BB "\n", prevTestBlock->bbNum, - exit->bbNum); - } - else - { - blockMap.Set(loop->GetHeader(), exit, BlockToBlockMap::Overwrite); - for (FlowEdge* entryEdge : loop->EntryEdges()) - { - BasicBlock* entering = entryEdge->getSourceBlock(); - assert(!entering->KindIs(BBJ_COND)); // Ensured by canonicalization - optRedirectBlock(entering, &blockMap, Compiler::RedirectBlockOption::UpdatePredLists); - JITDUMP("Redirecting original entry " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", - entering->bbNum, loop->GetHeader()->bbNum, entering->bbNum, exit->bbNum); - } - } + optRedirectPrevUnrollIteration(loop, prevTestBlock, exit); // The old loop body is unreachable now, but we will remove those // blocks after we finish unrolling. @@ -1800,6 +1671,65 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) return true; } +//----------------------------------------------------------------------------- +// optRedirectPrevUnrollIteration: +// Redirect the previous unrolled loop iteration (or entry) to a new target. +// +// Parameters: +// loop - The loop that is being unrolled +// prevTestBlock - The test block of the previous iteration, or nullptr if +// this is the first unrolled iteration. +// target - The new target for the previous iteration. +// +// +// Remarks: +// If "prevTestBlock" is nullptr, then the entry edges of the loop are +// redirected to the target. Otherwise "prevTestBlock" has its terminating +// statement removed and is changed to a BBJ_ALWAYS that goes to the target. +// +void Compiler::optRedirectPrevUnrollIteration(FlowGraphNaturalLoop* loop, BasicBlock* prevTestBlock, BasicBlock* target) +{ + if (prevTestBlock != nullptr) + { + assert(prevTestBlock->KindIs(BBJ_COND)); + Statement* testCopyStmt = prevTestBlock->lastStmt(); + GenTree* testCopyExpr = testCopyStmt->GetRootNode(); + assert(testCopyExpr->gtOper == GT_JTRUE); + GenTree* sideEffList = nullptr; + gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF); + if (sideEffList == nullptr) + { + fgRemoveStmt(prevTestBlock, testCopyStmt); + } + else + { + testCopyStmt->SetRootNode(sideEffList); + } + + fgRemoveRefPred(prevTestBlock->GetTrueTarget(), prevTestBlock); + fgRemoveRefPred(prevTestBlock->GetFalseTarget(), prevTestBlock); + + // Redirect exit edge from previous iteration to new entry. + prevTestBlock->SetKindAndTarget(BBJ_ALWAYS, target); + fgAddRefPred(target, prevTestBlock); + + JITDUMP("Redirecting previously created exiting " FMT_BB " -> " FMT_BB "\n", prevTestBlock->bbNum, + target->bbNum); + } + else + { + // Redirect all predecessors to the new one. + for (FlowEdge* enterEdge : loop->EntryEdges()) + { + BasicBlock* entering = enterEdge->getSourceBlock(); + JITDUMP("Redirecting " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", entering->bbNum, + loop->GetHeader()->bbNum, entering->bbNum, target->bbNum); + assert(!entering->KindIs(BBJ_COND)); // Ensured by canonicalization + fgReplaceJumpTarget(entering, target, loop->GetHeader()); + } + } +} + //----------------------------------------------------------------------------- // optReplaceScalarUsesWithConst: Replace all GT_LCL_VAR occurrences of a local // with a constant.