From 8e29f6b01284132a3791f87bd79c0dec4a7c80f2 Mon Sep 17 00:00:00 2001 From: James Newling Date: Wed, 21 Aug 2024 11:52:29 -0700 Subject: [PATCH] squash --- .../Transforms/AMDAIELowerToAIE.cpp | 29 ++++++++++--------- .../Transforms/test/lower_to_aie.mlir | 16 +++++----- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp index a198e5cf7..6fccc017d 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp @@ -152,16 +152,14 @@ FailureOr createObjectFifo( 1, std::multiplies<>()); int64_t targetSize = std::accumulate(targetShape.begin(), targetShape.end(), 1, std::multiplies<>()); - // TODO(jornt) for now, memory space 1 is used for objectfifos. Maybe refactor - // `aie.objectfifo` in the future to support different memory spaces. MemRefType memrefType = sourceSize < targetSize ? MemRefType::get({sourceSize}, srcType.getElementType(), MemRefLayoutAttrInterface{}, - rewriter.getI64IntegerAttr(1)) + srcType.getMemorySpace()) : MemRefType::get({targetSize}, dstType.getElementType(), MemRefLayoutAttrInterface{}, - rewriter.getI64IntegerAttr(1)); + dstType.getMemorySpace()); AIE::AIEObjectFifoType dtype = AIE::AIEObjectFifoType::get(memrefType); auto fifo = rewriter.create( rewriter.getUnknownLoc(), symName, srcTile, dstTiles, @@ -204,10 +202,9 @@ LogicalResult accessOpToAIE(IRRewriter &rewriter, } auto type = cast(oldReinterpretOp.getResult().getType()); - // TODO(jornt): for now, memory space 1 is used for objectFifos. Refactor - // `aie.objectfifo` to support different memory spaces to avoid hardcoding. - MemRefType newType = - MemRefType::Builder(type).setMemorySpace(rewriter.getI64IntegerAttr(1)); + + MemRefType newType = MemRefType::Builder(type); + llvm::ArrayRef sizes = newType.getShape(); auto [strides, baseOffset] = getStridesAndOffset(newType); auto reinterpretOp = rewriter.create( @@ -229,6 +226,7 @@ LogicalResult acquireOpToAIE(IRRewriter &rewriter, IRMapping &mapper, SmallVector &toBeErased) { LLVM_DEBUG(llvm::dbgs() << "Convert [AMDAIE::LogicalObjectFifoAcquire]\n"); + OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(acquireOp); auto dmaOp = @@ -244,10 +242,13 @@ LogicalResult acquireOpToAIE(IRRewriter &rewriter, return acquireOp.emitError() << "input isn't mapped to an `aie.objectifo` operation"; } - AIE::AIEObjectFifoType ofTy = - cast(objFifo.getElemType()); - MemRefType elementType = MemRefType::Builder(ofTy.getElementType()) - .setMemorySpace(rewriter.getI64IntegerAttr(1)); + + auto acquireOpType = dyn_cast(acquireOp.getType()); + assert(acquireOpType && + "Expected LogicalObjectFifoAcquire to have type " + "LogicalObjectFifoType"); + MemRefType elementType = acquireOpType.getElementType(); + auto subviewType = AIE::AIEObjectFifoSubviewType::get(elementType); AIE::ObjectFifoPort port = acquireOp.getPort() == LogicalObjectFifoPort::Produce @@ -255,9 +256,11 @@ LogicalResult acquireOpToAIE(IRRewriter &rewriter, : AIE::ObjectFifoPort::Consume; auto objFifoAquireOp = rewriter.create( rewriter.getUnknownLoc(), subviewType, port, objFifo.getName(), 1); + auto subviewOp = rewriter.create( rewriter.getUnknownLoc(), elementType, objFifoAquireOp.getSubview(), rewriter.getIntegerAttr(rewriter.getI32Type(), 0)); + // Map acquire op to new acquire + subview op. mapper.map(acquireOp.getOperation(), subviewOp.getOperation()); mapper.map(acquireOp.getResult(), subviewOp.getOutput()); @@ -1008,7 +1011,7 @@ class AMDAIELowerToAIEPass } AMDAIELowerToAIEPass() = default; - AMDAIELowerToAIEPass(const AMDAIELowerToAIEPass &pass) {}; + AMDAIELowerToAIEPass(const AMDAIELowerToAIEPass &pass){}; void runOnOperation() override; }; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_to_aie.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_to_aie.mlir index 201ea0377..6766f04a4 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_to_aie.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_to_aie.mlir @@ -220,8 +220,8 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- // CHECK: aie.device -// CHECK-DAG: func.func private @ukernel_A(memref, index) attributes {llvm.bareptr = true} -// CHECK-DAG: func.func private @ukernel_B(memref, index, memref, index) attributes {llvm.bareptr = true} +// CHECK-DAG: func.func private @ukernel_A(memref, index) attributes {llvm.bareptr = true} +// CHECK-DAG: func.func private @ukernel_B(memref, index, memref, index) attributes {llvm.bareptr = true} // CHECK-DAG: %[[TILE_0_2:.+]] = aie.tile(0, 2) // CHECK: aie.core(%[[TILE_0_2]]) // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index @@ -233,11 +233,11 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // CHECK-SAME: Produce // CHECK: %[[ACCESS0:.+]] = aie.objectfifo.subview.access %[[ACQUIRE0]] // CHECK: %[[REINTERPRET0:.+]] = memref.reinterpret_cast %[[ACCESS0]] -// CHECK: linalg.fill ins(%{{.+}} : i32) outs(%[[REINTERPRET]] : memref<32x32xi32, 1>) +// CHECK: linalg.fill ins(%{{.+}} : i32) outs(%[[REINTERPRET]] : memref<32x32xi32, 2>) // CHECK: %[[BASE_BUFFER:.*]], %{{.+}}, %{{.+}}:2, %{{.+}}:2 = memref.extract_strided_metadata %[[REINTERPRET]] : // CHECK: %[[BASE_BUFFER0:.*]], %{{.+}}, %{{.+}}:2, %{{.+}}:2 = memref.extract_strided_metadata %[[REINTERPRET0]] : -// CHECK: func.call @ukernel_A(%[[BASE_BUFFER]], %[[C0]]) : (memref, index) -> () -// CHECK: func.call @ukernel_B(%[[BASE_BUFFER]], %[[C0]], %[[BASE_BUFFER0]], %[[C0]]) : (memref, index, memref, index) -> () +// CHECK: func.call @ukernel_A(%[[BASE_BUFFER]], %[[C0]]) : (memref, index) -> () +// CHECK: func.call @ukernel_B(%[[BASE_BUFFER]], %[[C0]], %[[BASE_BUFFER0]], %[[C0]]) : (memref, index, memref, index) -> () // CHECK: aie.end // CHECK: } {link_with = "/path/to/ukernel.o"} // CHECK: aiex.runtime_sequence @lower_to_aie_ukernel @@ -738,10 +738,10 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // CHECK: aie.device(npu1_4col) { // CHECK: %[[TILE_0_0:.*]] = aie.tile(0, 0) // CHECK: %[[TILE_0_1:.*]] = aie.tile(0, 1) -// CHECK: aie.objectfifo @[[OBJ0:.*]](%[[TILE_0_0]], {%[[TILE_0_1]]}, 2 : i32) : !aie.objectfifo> -// CHECK: aie.objectfifo @[[OBJ1:.*]](%[[TILE_0_0]], {%[[TILE_0_1]]}, 2 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo @[[OBJ0:.*]](%[[TILE_0_0]], {%[[TILE_0_1]]}, 2 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo @[[OBJ1:.*]](%[[TILE_0_0]], {%[[TILE_0_1]]}, 2 : i32) : !aie.objectfifo> // CHECK: aie.objectfifo @[[OBJ2:.*]](%[[TILE_0_1]] -// CHECK-SAME: %[[TILE_0_0]]}, 2 : i32) : !aie.objectfifo> +// CHECK-SAME: %[[TILE_0_0]]}, 2 : i32) : !aie.objectfifo> // CHECK: aiex.runtime_sequence @bf16_f32_lit_test // CHECK-SAME: (%[[LHS:.*]]: memref<32x32xbf16>, %[[RHS:.*]]: memref<32x32xbf16>, %[[OUT:.*]]: memref<32x32xf32>) { // CHECK: aiex.npu.dma_memcpy_nd