From 3a8534e0f279e41b272a59ab1139fc2f5100632d Mon Sep 17 00:00:00 2001 From: James Newling Date: Mon, 19 Aug 2024 14:08:49 -0700 Subject: [PATCH] simplify, simplify, simplify --- compiler/plugins/target/AMD-AIE/aie/AIEOps.td | 1 - .../AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp | 3 - .../target/AMD-AIE/aie/AMDAIEXToStandard.cpp | 89 -------- .../plugins/target/AMD-AIE/aie/CMakeLists.txt | 1 - compiler/plugins/target/AMD-AIE/aie/Passes.h | 4 +- .../aie/test/aiex_standard_lowering.mlir | 22 -- .../iree-amd-aie/PluginRegistration.cpp | 1 - .../AMD-AIE/iree-amd-aie/Target/AIETarget.cpp | 12 +- .../iree-amd-aie/Target/AMDAIETargetBCF.cpp | 7 +- .../Target/AMDAIETargetCDODirect.cpp | 12 +- .../Target/AMDAIETargetLdScript.cpp | 6 +- .../iree-amd-aie/Target/AMDAIETargets.h | 6 +- .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 206 +++++++++--------- .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.h | 12 +- .../aie_runtime/test/cdo/aie_cdo_gen_test.cxx | 15 +- 15 files changed, 130 insertions(+), 267 deletions(-) delete mode 100644 compiler/plugins/target/AMD-AIE/aie/AMDAIEXToStandard.cpp delete mode 100644 compiler/plugins/target/AMD-AIE/aie/test/aiex_standard_lowering.mlir diff --git a/compiler/plugins/target/AMD-AIE/aie/AIEOps.td b/compiler/plugins/target/AMD-AIE/aie/AIEOps.td index 763d76ff5..9a3607e9c 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AIEOps.td +++ b/compiler/plugins/target/AMD-AIE/aie/AIEOps.td @@ -23,7 +23,6 @@ class AIE_Op traits = []> : Op; def AIE_DeviceOp: AIE_Op<"device", [ - HasParent<"mlir::ModuleOp">, SymbolTable, SingleBlock, NoTerminator, IsolatedFromAbove ]> { let summary = "Define an AIE design targetting a complete device"; diff --git a/compiler/plugins/target/AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp b/compiler/plugins/target/AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp index d531ec619..89e7bfd8b 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp @@ -6,15 +6,12 @@ #include #include -#include #include #include "AIEDialect.h" #include "Passes.h" #include "iree-amd-aie/aie_runtime/iree_aie_router.h" #include "iree-amd-aie/aie_runtime/iree_aie_runtime.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/Support/raw_os_ostream.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" diff --git a/compiler/plugins/target/AMD-AIE/aie/AMDAIEXToStandard.cpp b/compiler/plugins/target/AMD-AIE/aie/AMDAIEXToStandard.cpp deleted file mode 100644 index 4e0de9c09..000000000 --- a/compiler/plugins/target/AMD-AIE/aie/AMDAIEXToStandard.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "AIEXDialect.h" -#include "Passes.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Transforms/DialectConversion.h" - -using namespace mlir; -using namespace xilinx; -using namespace xilinx::AIE; -using namespace xilinx::AIEX; - -#define DEBUG_TYPE "amdaiex-standard-lowering" - -template -struct AMDAIEXOpRemoval : OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - using OpAdaptor = typename MyAIEXOp::Adaptor; - ModuleOp &module; - - AMDAIEXOpRemoval(MLIRContext *context, ModuleOp &m, - PatternBenefit benefit = 1) - : OpConversionPattern(context, benefit), module(m) {} - - LogicalResult matchAndRewrite( - MyAIEXOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - Operation *Op = op.getOperation(); - rewriter.eraseOp(Op); - return success(); - } -}; - -namespace mlir::iree_compiler::AMDAIE { -struct AMDAIEXToStandardPass : mlir::OperationPass { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(AMDAIEXToStandardPass) - - AMDAIEXToStandardPass() - : mlir::OperationPass(resolveTypeID()) {} - - llvm::StringRef getArgument() const override { - return "amdaiex-standard-lowering"; - } - - llvm::StringRef getName() const override { return "AMDAIEXToStandardPass"; } - - std::unique_ptr clonePass() const override { - return std::make_unique( - *static_cast(this)); - } - - void getDependentDialects(::mlir::DialectRegistry ®istry) const override { - registry.insert(); - registry.insert(); - registry.insert(); - registry.insert(); - } - - void runOnOperation() override { - ModuleOp m = getOperation(); - ConversionTarget target(getContext()); - RewritePatternSet removepatterns(&getContext()); - removepatterns.add>(m.getContext(), m); - removepatterns.add>(m.getContext(), m); - removepatterns.add>(m.getContext(), m); - removepatterns.add>(m.getContext(), m); - removepatterns.add>(m.getContext(), m); - removepatterns.add>(m.getContext(), m); - removepatterns.add>(m.getContext(), m); - - if (failed(applyPartialConversion(m, target, std::move(removepatterns)))) - signalPassFailure(); - } -}; - -std::unique_ptr> createAMDAIEXToStandardPass() { - return std::make_unique(); -} - -void registerAMDAIEXToStandardPass() { - mlir::registerPass([]() -> std::unique_ptr { - return createAMDAIEXToStandardPass(); - }); -} -} // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt index db5c1e449..52244c48a 100644 --- a/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt @@ -142,7 +142,6 @@ iree_cc_library( AMDAIELocalizeLocks.cpp AMDAIENormalizeAddressSpaces.cpp AMDAIEObjectFifoStatefulTransform.cpp - AMDAIEXToStandard.cpp DEPS iree-amd-aie::aie_runtime::iree_aie_runtime_static ::AIEDialectIR diff --git a/compiler/plugins/target/AMD-AIE/aie/Passes.h b/compiler/plugins/target/AMD-AIE/aie/Passes.h index 347c32757..bf9e64477 100644 --- a/compiler/plugins/target/AMD-AIE/aie/Passes.h +++ b/compiler/plugins/target/AMD-AIE/aie/Passes.h @@ -34,7 +34,6 @@ createAMDAIEPathfinderPass(); std::unique_ptr> createAMDAIECoreToStandardPass(); std::unique_ptr> createAMDAIEDmaToNpuPass(); -std::unique_ptr> createAMDAIEXToStandardPass(); void registerAMDAIEAssignBufferAddressesBasic(); void registerAMDAIEAssignBufferDescriptorIDs(); @@ -44,9 +43,8 @@ void registerAMDAIELocalizeLocks(); void registerAMDAIENormalizeAddressSpaces(); void registerAMDAIEObjectFifoStatefulTransform(); void registerAMDAIERoutePathfinderFlows(); - void registerAMDAIEDmaToNpu(); -void registerAMDAIEXToStandardPass(); + } // namespace mlir::iree_compiler::AMDAIE #endif // AMDAIE_PASSES_H_ diff --git a/compiler/plugins/target/AMD-AIE/aie/test/aiex_standard_lowering.mlir b/compiler/plugins/target/AMD-AIE/aie/test/aiex_standard_lowering.mlir deleted file mode 100644 index 015aea837..000000000 --- a/compiler/plugins/target/AMD-AIE/aie/test/aiex_standard_lowering.mlir +++ /dev/null @@ -1,22 +0,0 @@ - -// RUN: iree-opt --amdaiex-standard-lowering %s | FileCheck %s - -// CHECK-LABEL: aie.device(npu1_4col) { -// CHECK: memref.global "public" @toMem : memref<16xi32> -// CHECK: func.func @dma_and_wait(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) { -// CHECK: return -// CHECK: } -// CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1) -// CHECK: } - -module { - aie.device(npu1_4col) { - memref.global "public" @toMem : memref<16xi32> - func.func @dma_and_wait(%arg0: memref<16xi32>, %arg1: memref<16xi32>) { - aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32> - aiex.npu.dma_wait {symbol = @toMem} - return - } - aie.shim_dma_allocation @toMem (MM2S, 1, 1) - } -} diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/PluginRegistration.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/PluginRegistration.cpp index 2ef29d294..50d72b077 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/PluginRegistration.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/PluginRegistration.cpp @@ -34,7 +34,6 @@ struct AMDAIESession AMDAIE::registerAMDAIEObjectFifoStatefulTransform(); AMDAIE::registerAMDAIERoutePathfinderFlows(); AMDAIE::registerAMDAIEDmaToNpu(); - AMDAIE::registerAMDAIEXToStandardPass(); AMDAIE::registerAIRConversionPasses(); AMDAIE::registerAIRTransformPasses(); aievec::registerConvertAIEVecToLLVMPass(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index 2aee8a2b4..fcba2923b 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -349,18 +349,8 @@ LogicalResult AIETargetBackend::serializeExecutable( ParserConfig pcfg(variantOp->getContext()); llvm::SourceMgr srcMgr; - // TODO(newling) check if we just just pass DeviceOp around directly, - // is is really necessary to wrap it in a module? - ModuleOp moduleWithOneDevice; - { - OpBuilder opBuilder(moduleOp.getContext()); - moduleWithOneDevice = opBuilder.create(moduleOp.getLoc()); - opBuilder.setInsertionPointToStart(moduleWithOneDevice.getBody()); - opBuilder.clone(*deviceOps[i].getOperation()); - } - if (failed(aie2xclbin( - /*ctx=*/variantOp->getContext(), moduleWithOneDevice, + /*ctx=*/variantOp->getContext(), deviceOps[i], /*outputNPU=*/npuInstPath.str().str(), /*outputXCLBin=*/xclbinPath.str().str(), /*printIRBeforeAll=*/options.aie2xclbinPrintIrBeforeAll, diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetBCF.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetBCF.cpp index 48fc13527..33a1567dc 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetBCF.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetBCF.cpp @@ -7,7 +7,6 @@ #include "AMDAIETargets.h" #include "aie/AIEDialect.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/IR/Module.h" using namespace mlir; using namespace xilinx; @@ -17,15 +16,11 @@ std::string utohexstr(uint32_t u) { return "0x" + llvm::utohexstr(u); } namespace mlir::iree_compiler::AMDAIE { -LogicalResult AIETranslateToBCF(ModuleOp module, raw_ostream &output, +LogicalResult AIETranslateToBCF(DeviceOp deviceOp, raw_ostream &output, int tileCol, int tileRow) { DenseMap tiles; DenseMap> buffers; - if (module.getOps().empty()) - module.emitOpError("expected aie.device operation at toplevel"); - DeviceOp deviceOp = *(module.getOps().begin()); - collectTiles(deviceOp, tiles); collectBuffers(deviceOp, buffers); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp index 7678f848a..29216d069 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -#include // uint +#include #include #include #include @@ -17,12 +17,9 @@ #include "iree-amd-aie/aie_runtime/iree_aie_runtime.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "mlir/IR/Block.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/BuiltinTypeInterfaces.h" #include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" @@ -341,14 +338,11 @@ LogicalResult generateCDOBinariesSeparately( return success(); } -LogicalResult AIETranslateToCDODirect(ModuleOp m, llvm::StringRef workDirPath, +LogicalResult AIETranslateToCDODirect(xilinx::AIE::DeviceOp device, + llvm::StringRef workDirPath, bool bigEndian, bool emitUnified, bool cdoDebug, bool aieSim, bool enableCores) { - auto devOps = m.getOps(); - assert(llvm::range_size(devOps) == 1 && - "only exactly 1 device op supported."); - DeviceOp device = *devOps.begin(); AMDAIEDeviceModel deviceModel = getDeviceModel(device.getDevice()); byte_ordering endianness = bigEndian ? byte_ordering::Big_Endian : byte_ordering::Little_Endian; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetLdScript.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetLdScript.cpp index a106f1e53..5cbebf39e 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetLdScript.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetLdScript.cpp @@ -50,14 +50,10 @@ static void writeLDScriptMap(raw_ostream &output, BufferOp buf, int offset) { // .bss : { *(.bss) } > data // } LogicalResult mlir::iree_compiler::AMDAIE::AIETranslateToLdScript( - ModuleOp module, raw_ostream &output, int tileCol, int tileRow) { + DeviceOp deviceOp, raw_ostream &output, int tileCol, int tileRow) { DenseMap tiles; DenseMap> buffers; - if (module.getOps().empty()) { - module.emitOpError("expected AIE.device operation at toplevel"); - } - DeviceOp deviceOp = *(module.getOps().begin()); collectTiles(deviceOp, tiles); ::collectBuffers(deviceOp, buffers); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargets.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargets.h index 5052fadd8..90a16e72a 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargets.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargets.h @@ -17,16 +17,16 @@ namespace mlir::iree_compiler::AMDAIE { std::vector AIETranslateToNPU(mlir::ModuleOp); -mlir::LogicalResult AIETranslateToLdScript(mlir::ModuleOp module, +mlir::LogicalResult AIETranslateToLdScript(xilinx::AIE::DeviceOp, llvm::raw_ostream &output, int tileCol, int tileRow); -mlir::LogicalResult AIETranslateToBCF(mlir::ModuleOp module, +mlir::LogicalResult AIETranslateToBCF(xilinx::AIE::DeviceOp, llvm::raw_ostream &output, int tileCol, int tileRow); mlir::LogicalResult AIETranslateToCDODirect( - mlir::ModuleOp m, llvm::StringRef workDirPath, bool bigEndian = false, + xilinx::AIE::DeviceOp, llvm::StringRef workDirPath, bool bigEndian = false, bool emitUnified = false, bool cdoDebug = false, bool aieSim = false, bool enableCores = true); } // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index e4684fa9a..f9802d99e 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -17,6 +17,8 @@ #include "aievec/Passes.h" #include "iree-amd-aie/Transforms/Passes.h" #include "iree/compiler/Utils/ToolUtils.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" @@ -25,6 +27,7 @@ #include "llvm/Support/Program.h" #include "llvm/Support/ToolOutputFile.h" #include "mlir/IR/AsmState.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" #include "mlir/Pass/PassManager.h" #include "mlir/Support/FileUtilities.h" @@ -348,12 +351,12 @@ static std::optional runTool( << "\n"; return {}; } - auto outputFromFile = maybeOutputFromFile.value(); + const std::string &outputFromFile = maybeOutputFromFile.value(); if (verbose) { - auto totalTime = std::chrono::duration_cast>( - stats.TotalTime) - .count(); + float totalTime = std::chrono::duration_cast>( + stats.TotalTime) + .count(); std::string exitStatusStr = result == 0 ? "Succeeded" : "Failed"; llvm::outs() << "\n" << exitStatusStr << " in totalTime " << totalTime @@ -424,7 +427,7 @@ static LogicalResult assembleFileUsingPeano( args.emplace_back("--target=aie2-none-unknown-elf"); std::vector peanoArgs = makePeanoOptArgs(); args.reserve(args.size() + peanoArgs.size()); - for (const auto &item : peanoArgs) { + for (const std::string &item : peanoArgs) { args.emplace_back("-mllvm"); args.emplace_back(item); } @@ -490,19 +493,13 @@ static_assert(std::is_same_v vitisDir, const std::string &targetArch, bool verbose, - Path peanoDir, const std::optional &ukernel) { - auto deviceOps = moduleOp.getOps(); - if (!llvm::hasSingleElement(deviceOps)) - return moduleOp.emitOpError("expected a single device op"); - - AIE::DeviceOp deviceOp = *deviceOps.begin(); + AIE::DeviceOp deviceOp, const std::string &objFile, Path tempDir, + bool useChess, std::optional vitisDir, const std::string &targetArch, + bool verbose, Path peanoDir, const std::optional &ukernel) { auto tileOps = deviceOp.getOps(); - std::string errorMessage; - for (auto tileOp : tileOps) { + for (AIE::TileOp tileOp : tileOps) { int col = tileOp.getCol(); int row = tileOp.getRow(); auto coreOp = getCoreOp(tileOp); @@ -572,7 +569,7 @@ static LogicalResult generateCoreElfFiles( } if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToBCF( - moduleOp, bcfOutput->os(), col, row))) { + deviceOp, bcfOutput->os(), col, row))) { llvm::errs() << "Failed to generate BCF"; return failure(); } @@ -606,7 +603,7 @@ static LogicalResult generateCoreElfFiles( return failure(); } if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToLdScript( - moduleOp, ldscriptOutput->os(), col, row))) { + deviceOp, ldscriptOutput->os(), col, row))) { llvm::errs() << "failed to generate ld script for core (" << col << "," << row << ")"; return failure(); @@ -638,17 +635,18 @@ static LogicalResult generateCoreElfFiles( return success(); } -static LogicalResult generateCDO(MLIRContext *context, ModuleOp moduleOp, +static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, const Path &tempDir) { - ModuleOp copy = moduleOp.clone(); + AIE::DeviceOp copy = deviceOp.clone(); std::string errorMessage; - PassManager passManager(context, ModuleOp::getOperationName()); + PassManager passManager(context, AIE::DeviceOp::getOperationName()); applyConfigToPassManager(passManager, printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing); - passManager.addNestedPass( + passManager.addPass( mlir::iree_compiler::AMDAIE::createAMDAIEPathfinderPass()); + if (failed(passManager.run(copy))) { llvm::errs() << "failed to run passes to prepare for XCLBin generation"; return failure(); @@ -667,56 +665,57 @@ static LogicalResult generateCDO(MLIRContext *context, ModuleOp moduleOp, static json::Object makeKernelJSON(const std::string &name, const std::string &id, const std::string &instance) { - return json::Object{ + using json::Object; + + return Object{ {"name", name}, {"type", "dpu"}, {"extended-data", - json::Object{ - {"subtype", "DPU"}, {"functional", "0"}, {"dpu_kernel_id", id}}}, - {"arguments", json::Array{json::Object{{"name", "opcode"}, - {"address-qualifier", "SCALAR"}, - {"type", "uint64_t"}, - {"offset", "0x00"}}, - json::Object{{"name", "instr"}, - {"memory-connection", "SRAM"}, - {"address-qualifier", "GLOBAL"}, - {"type", "char *"}, - {"offset", "0x08"}}, - json::Object{{"name", "ninstr"}, - {"address-qualifier", "SCALAR"}, - {"type", "uint32_t"}, - {"offset", "0x10"}}, - json::Object{{"name", "bo0"}, - {"memory-connection", "HOST"}, - {"address-qualifier", "GLOBAL"}, - {"type", "void*"}, - {"offset", "0x14"}}, - json::Object{{"name", "bo1"}, - {"memory-connection", "HOST"}, - {"address-qualifier", "GLOBAL"}, - {"type", "void*"}, - {"offset", "0x1c"}}, - json::Object{{"name", "bo2"}, - {"memory-connection", "HOST"}, - {"address-qualifier", "GLOBAL"}, - {"type", "void*"}, - {"offset", "0x24"}}, - json::Object{{"name", "bo3"}, - {"memory-connection", "HOST"}, - {"address-qualifier", "GLOBAL"}, - {"type", "void*"}, - {"offset", "0x2c"}}, - json::Object{{"name", "bo4"}, - {"memory-connection", "HOST"}, - {"address-qualifier", "GLOBAL"}, - {"type", "void*"}, - {"offset", "0x34"}}, - json::Object{{"name", "bo5"}, - {"memory-connection", "HOST"}, - {"address-qualifier", "GLOBAL"}, - {"type", "void*"}, - {"offset", "0x3c"}}}}, - {"instances", json::Array{json::Object{{"name", instance}}}}}; + Object{{"subtype", "DPU"}, {"functional", "0"}, {"dpu_kernel_id", id}}}, + {"arguments", json::Array{Object{{"name", "opcode"}, + {"address-qualifier", "SCALAR"}, + {"type", "uint64_t"}, + {"offset", "0x00"}}, + Object{{"name", "instr"}, + {"memory-connection", "SRAM"}, + {"address-qualifier", "GLOBAL"}, + {"type", "char *"}, + {"offset", "0x08"}}, + Object{{"name", "ninstr"}, + {"address-qualifier", "SCALAR"}, + {"type", "uint32_t"}, + {"offset", "0x10"}}, + Object{{"name", "bo0"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "void*"}, + {"offset", "0x14"}}, + Object{{"name", "bo1"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "void*"}, + {"offset", "0x1c"}}, + Object{{"name", "bo2"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "void*"}, + {"offset", "0x24"}}, + Object{{"name", "bo3"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "void*"}, + {"offset", "0x2c"}}, + Object{{"name", "bo4"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "void*"}, + {"offset", "0x34"}}, + Object{{"name", "bo5"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "void*"}, + {"offset", "0x3c"}}}}, + {"instances", json::Array{Object{{"name", instance}}}}}; } static LogicalResult generateXCLBin( @@ -1010,17 +1009,26 @@ struct RemoveAlignment2FromLLVMLoadPass } // namespace static LogicalResult generateUnifiedObject( - MLIRContext *context, ModuleOp moduleOp, const std::string &outputFile, + MLIRContext *context, AIE::DeviceOp deviceOp, const std::string &outputFile, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, bool useChess, bool verbose, Path tempDir, std::optional vitisDir, const std::string &targetArch, Path peanoDir) { - PassManager pm(context, moduleOp.getOperationName()); + // TODO(newling) to avoid nesting the DeviceOp in a ModuleOp, + // we need to make changes to core-to-standard-pass. + ModuleOp moduleWithOneDevice; + { + OpBuilder opBuilder(deviceOp.getContext()); + moduleWithOneDevice = opBuilder.create(deviceOp.getLoc()); + opBuilder.setInsertionPointToStart(moduleWithOneDevice.getBody()); + opBuilder.clone(*deviceOp.getOperation()); + } + PassManager pm(context, moduleWithOneDevice.getOperationName()); applyConfigToPassManager(pm, printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing); pm.addPass(mlir::iree_compiler::AMDAIE::createAMDAIECoreToStandardPass()); - pm.addPass(mlir::iree_compiler::AMDAIE::createAMDAIEXToStandardPass()); + // Convert specific vector dialect ops (like vector.contract) to the AIEVec // dialect mlir::iree_compiler::aievec::buildConvertVectorToAIEVec(pm); @@ -1033,14 +1041,14 @@ static LogicalResult generateUnifiedObject( llvm::outs() << "\n"; } - ModuleOp copy = moduleOp.clone(); - if (failed(pm.run(copy))) - return moduleOp.emitOpError("Failed to lower to LLVM"); + // AIE::DeviceOp copy = deviceOp.clone(); + if (failed(pm.run(moduleWithOneDevice))) + return deviceOp.emitOpError("Failed to lower to LLVM"); llvm::LLVMContext llvmContext; - auto llvmModule = translateModuleToLLVMIR(copy, llvmContext); + auto llvmModule = translateModuleToLLVMIR(moduleWithOneDevice, llvmContext); if (!llvmModule) - return moduleOp.emitOpError("Failed to translate module to LLVMIR"); + return deviceOp.emitOpError("Failed to translate module to LLVMIR"); std::string inputLLStr; { llvm::raw_string_ostream rso(inputLLStr); @@ -1097,19 +1105,13 @@ static LogicalResult generateUnifiedObject( return failure(); } } - copy->erase(); + moduleWithOneDevice->erase(); return success(); } -FailureOr> getNpuInstructions(ModuleOp moduleOp) { - auto ctx = moduleOp.getContext(); - auto deviceOps = moduleOp.getOps(); - auto nDeviceOps = std::distance(deviceOps.begin(), deviceOps.end()); - if (nDeviceOps != 1) - return emitError(UnknownLoc::get(ctx), - "Expected exactly one aie.device in the module"); - auto deviceOp = *deviceOps.begin(); - auto maybeNpuInstructions = deviceOp->getAttr("npu_instructions"); +FailureOr> getNpuInstructions(AIE::DeviceOp deviceOp) { + MLIRContext *ctx = deviceOp.getContext(); + mlir::Attribute maybeNpuInstructions = deviceOp->getAttr("npu_instructions"); if (!maybeNpuInstructions) return emitError(UnknownLoc::get(ctx), "Expected npu_instructions attribute on aie.device"); @@ -1119,7 +1121,8 @@ FailureOr> getNpuInstructions(ModuleOp moduleOp) { return emitError( UnknownLoc::get(ctx), "Failed to cast npu_instructions to DenseUI32ResourceElementsAttr"); - auto maybeArrayRef = npuInstructions.tryGetAsArrayRef(); + std::optional> maybeArrayRef = + npuInstructions.tryGetAsArrayRef(); if (!maybeArrayRef.has_value()) return emitError( UnknownLoc::get(ctx), @@ -1128,7 +1131,7 @@ FailureOr> getNpuInstructions(ModuleOp moduleOp) { } LogicalResult aie2xclbin( - MLIRContext *ctx, ModuleOp moduleOp, const std::string &outputNPU, + MLIRContext *ctx, AIE::DeviceOp deviceOp, const std::string &outputNPU, const std::string &outputXCLBin, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, const std::string &tempDir, bool useChess, bool verbose, @@ -1138,18 +1141,19 @@ LogicalResult aie2xclbin( const std::string &amdAIEInstallDir, const std::optional &InputXCLBin, const std::optional &ukernel) { - PassManager pm(ctx, mlir::ModuleOp::getOperationName()); + PassManager pm(ctx, AIE::DeviceOp::getOperationName()); applyConfigToPassManager(pm, printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing); - if (failed(pm.run(moduleOp))) - return moduleOp.emitOpError(": NPU Instruction pipeline failed"); + if (failed(pm.run(deviceOp))) + return deviceOp.emitOpError(": NPU Instruction pipeline failed"); - auto maybeNpuInstructions = getNpuInstructions(moduleOp); + FailureOr> maybeNpuInstructions = + getNpuInstructions(deviceOp); if (failed(maybeNpuInstructions)) { assert(false && "Failed to get NPU instructions"); return failure(); } - auto npuInstructions = maybeNpuInstructions.value(); + ArrayRef npuInstructions = maybeNpuInstructions.value(); std::string errorMessage; auto output = openOutputFile(outputNPU, &errorMessage); @@ -1158,29 +1162,29 @@ LogicalResult aie2xclbin( << errorMessage; return failure(); } - for (auto w : npuInstructions) output->os() << llvm::format("%08X\n", w); + for (uint32_t w : npuInstructions) output->os() << llvm::format("%08X\n", w); output->keep(); Path unifiedObj = Path(tempDir) / "input.o"; - if (failed(generateUnifiedObject(ctx, moduleOp, unifiedObj, printIRBeforeAll, + if (failed(generateUnifiedObject(ctx, deviceOp, unifiedObj, printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing, useChess, verbose, tempDir, vitisDir, targetArch, peanoDir))) - return moduleOp.emitOpError("Failed to generate unified object"); + return deviceOp.emitOpError("Failed to generate unified object"); - if (failed(generateCoreElfFiles(moduleOp, unifiedObj, tempDir, useChess, + if (failed(generateCoreElfFiles(deviceOp, unifiedObj, tempDir, useChess, vitisDir, targetArch, verbose, peanoDir, ukernel))) - return moduleOp.emitOpError("Failed to generate core ELF file(s)"); + return deviceOp.emitOpError("Failed to generate core ELF file(s)"); - if (failed(generateCDO(ctx, moduleOp, printIRBeforeAll, printIRAfterAll, + if (failed(generateCDO(ctx, deviceOp, printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing, tempDir))) - return moduleOp.emitOpError("Failed to generate CDO"); + return deviceOp.emitOpError("Failed to generate CDO"); if (failed(generateXCLBin(outputXCLBin, tempDir, xclBinKernelID, xclBinKernelName, xclBinInstanceName, amdAIEInstallDir, verbose, InputXCLBin))) - return moduleOp.emitOpError("Failed to generate XCLBin"); + return deviceOp.emitOpError("Failed to generate XCLBin"); return success(); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.h index 705e97d4f..290064170 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.h @@ -7,17 +7,15 @@ #include -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "mlir/IR/BuiltinOps.h" +#include "aie/AIEDialect.h" #include "mlir/IR/MLIRContext.h" #include "mlir/Support/LogicalResult.h" mlir::LogicalResult aie2xclbin( - mlir::MLIRContext *ctx, mlir::ModuleOp moduleOp, - const std::string &outputNPU, const std::string &outputXCLBin, - bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, - bool timing, const std::string &tempDir, bool useChess, bool verbose, + mlir::MLIRContext *ctx, xilinx::AIE::DeviceOp, const std::string &outputNPU, + const std::string &outputXCLBin, bool printIRBeforeAll, + bool printIRAfterAll, bool printIRModuleScope, bool timing, + const std::string &tempDir, bool useChess, bool verbose, const std::optional &vitisDir, const std::string &targetArch, const std::string &peanoDir, const std::string &xclBinKernelID, const std::string &xclBinKernelName, const std::string &xclBinInstanceName, diff --git a/runtime/src/iree-amd-aie/aie_runtime/test/cdo/aie_cdo_gen_test.cxx b/runtime/src/iree-amd-aie/aie_runtime/test/cdo/aie_cdo_gen_test.cxx index fcc0d39d7..7ea4b8269 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/test/cdo/aie_cdo_gen_test.cxx +++ b/runtime/src/iree-amd-aie/aie_runtime/test/cdo/aie_cdo_gen_test.cxx @@ -10,10 +10,7 @@ #include "aie/AIEDialect.h" #include "aie/AIEXDialect.h" #include "iree-amd-aie/Target/AMDAIETargets.h" -#include "iree-amd-aie/Target/XCLBinGen.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Parser/Parser.h" @@ -43,11 +40,19 @@ int main(int argc, char **argv) { mlir::ParserConfig parserConfig(&context); auto moduleOp = llvm::cast( mlir::parseSourceFile(mlirAbsPath, parserConfig).release()); + + auto deviceOps = moduleOp.getOps(); + auto nDeviceOps = std::distance(deviceOps.begin(), deviceOps.end()); + if (nDeviceOps != 1){ + std::cerr << "Error: Expected exactly one xilinx.aie.device op\n"; + return 1; + } + auto deviceOp = *deviceOps.begin(); llvm::DebugFlag = true; const char *debugTypes[3] = {"aie-generate-cdo", "iree-aie-runtime", "iree-aie-cdo-emitter"}; llvm::setCurrentDebugTypes(debugTypes, 3); - auto status = AIETranslateToCDODirect(moduleOp, workDir, false, false, false); + auto status = AIETranslateToCDODirect(deviceOp, workDir, false, false, false); std::vector diagnostics; ScopedDiagnosticHandler handler(moduleOp.getContext(), [&](Diagnostic &d) { llvm::raw_string_ostream(diagnostics.emplace_back()) @@ -59,7 +64,7 @@ int main(int argc, char **argv) { llvm::DebugFlag = false; llvm::setCurrentDebugType("aie-cdo-driver-debug"); - status = AIETranslateToCDODirect(moduleOp, workDir, false, false, true); + status = AIETranslateToCDODirect(deviceOp, workDir, false, false, true); if (failed(status)) for (const auto &diagnostic : diagnostics) std::cerr << diagnostic << "\n"; }