Skip to content

Commit

Permalink
squashed commit
Browse files Browse the repository at this point in the history
  • Loading branch information
newling committed Aug 26, 2024
1 parent 652d648 commit 46bb421
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
#include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h"
#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/IR/Iterators.h"
#include "mlir/IR/PatternMatch.h"

#define DEBUG_TYPE "iree-amdaie-tile-and-fuse"


namespace mlir::iree_compiler::AMDAIE {

namespace {
Expand Down Expand Up @@ -157,27 +157,43 @@ void AMDAIETileAndFusePass::runOnOperation() {

SmallVector<OpFoldResult> tileSizes =
getAsIndexOpFoldResult(context, tileSizesVal);

auto options = scf::SCFTilingOptions().setTileSizes(tileSizes);

// When tiling using scf.for we do not need to set any mapping.
if (!useSCFFor) {
options.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp);
// Here we assume there are always two levels of parallel (scf.forall)
// loops, and the first level of tiling is always using scf.forall and
// mapped to blocks. Currently we are not using mapping attributes for
// Conv2d ops, because there could be four parallel tiling dimensions.
// TODO (vivian): create AIE specific mapping attributes.
if (!isa<linalg::ConvolutionOpInterface>(consumerOp.getOperation())) {
if (tilingLevel == 0) {
options.setMapping(
{gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimY),
gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimX)});
} else {
options.setMapping(
{gpu::GPUThreadMappingAttr::get(context, gpu::MappingId::DimY),
gpu::GPUThreadMappingAttr::get(context, gpu::MappingId::DimX)});
}

// There is one induction variables in the scf.forall for each of the
// non-zero tile sizes. Recall that a '0' tile size corresonds to 'do
// not tile'.
uint32_t nbIndVars = std::count_if(tileSizesVal.begin(), tileSizesVal.end(),
[](int64_t t) { return t != 0; });

// See mlir::gpu::MappingId enum: there are currently 13 values.
if (nbIndVars > mlir::gpu::getMaxEnumValForMappingId()) {
LLVM_DEBUG(llvm::dbgs() << "----- skip, too many loops to tile -----\n");
return;
}
auto getMappingAttributeForDimension = [&](uint32_t i) -> Attribute {
auto id = static_cast<gpu::MappingId>(i);
if (tilingLevel == 0) return gpu::GPUBlockMappingAttr::get(context, id);
return gpu::GPUThreadMappingAttr::get(context, id);
};

SmallVector<Attribute> mapping;
mapping.reserve(nbIndVars);
for (uint32_t i = 0; i < nbIndVars; ++i) {
// DimY and DimX are swapped, so that DimX goes to AIE array columns and
// DimX goes to AIE arrat rows (or something like that...)
if (i == 0)
mapping.push_back(getMappingAttributeForDimension(1));
else if (i == 1)
mapping.push_back(getMappingAttributeForDimension(0));
else
mapping.push_back(getMappingAttributeForDimension(i));
}
options.setMapping(mapping);
}

IRRewriter rewriter(context);
Expand Down
18 changes: 18 additions & 0 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,24 @@ void addMLIRAIRLoweringPasses(OpPassManager &passManager, AMDAIEDevice device) {
passManager.addPass(createEraseHALDescriptorTypeFromMemRefPass());
passManager.addPass(memref::createFoldMemRefAliasOpsPass());
passManager.addPass(createAMDAIEBridgeToAIRPass());

// Running canonicalization for all pipelines here results in failures.
// Example
// ```
// 'memref.cast' op is an unsupported operation. This pass currently only
// supports AllocOp and SubViewOp as inputs.
// ```
// It is currently required for the convolution pipeline though, to remove the
// extra (size-1) thread- and group- dimensions.
//
// TODO(newling) there are better solutions like:
// 1) make canonicalization work for scf.forall
// 2) pass to collapse rank-4 scf.foralls to rank-2 scf.foralls.
// 3) resolve above 'unsupproted operation' error.
if (clUseTilePipeline == TilePassPipeline::ConvDecomposePipeline) {
passManager.addPass(createCanonicalizerPass());
}

// TODO (Erwei): Figure out a way to work with AMDAIEPackToDmaPass.
if (clUseTilePipeline == TilePassPipeline::PackPeelPipeline)
passManager.addPass(createAMDAIEDecomposeLinalgExtPackUnPackToAIRPass());
Expand Down

0 comments on commit 46bb421

Please sign in to comment.