Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not serialize-deserialize module before calling aie2xclbin #685

Merged
merged 1 commit into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,12 @@

#include <algorithm>
#include <cassert>
#include <list>
#include <set>

#include "AIEDialect.h"
#include "Passes.h"
#include "iree-amd-aie/aie_runtime/iree_aie_router.h"
#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Support/raw_os_ostream.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"

Expand Down
36 changes: 18 additions & 18 deletions compiler/plugins/target/AMD-AIE/aie/AMDAIEDmaToNpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Format.h"
#include "mlir/IR/AsmState.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
Expand Down Expand Up @@ -512,6 +511,7 @@ struct AMDAIEDmaToNpuPass : mlir::OperationPass<DeviceOp> {

instructions[2] = count;
instructions[3] = instructions.size() * sizeof(uint32_t);

ArrayRef<uint32_t> instsArrRef(instructions.data(), instructions.size());
device->setAttr(
"npu_instructions",
Expand All @@ -521,24 +521,24 @@ struct AMDAIEDmaToNpuPass : mlir::OperationPass<DeviceOp> {
IntegerType::get(&getContext(), 32, IntegerType::Unsigned)),
"npu_instructions",
HeapAsmResourceBlob::allocateAndCopyInferAlign(instsArrRef)));
// The LX instructions for the entry point function are already generated by
// the pass hence we can safely delete the function as it is of no use to
// us. A reason to do this is that otherwise it is unceseccarily lowered to
// llvm where it can have a chance to crash in case the argument list is not
// lowerable for reasons such as memref's with dynamic offsets.
auto symName = dyn_cast_or_null<StringAttr>(device->getAttr("sym_name"));

SmallVector<RuntimeSequenceOp> seqOps;
device->walk([&](RuntimeSequenceOp seqOp) {
// if the deviceOp has a symbol name attached to it we look for the
// sequence op that partically matches that symbol, if not we collect all
// sequenceOps.
if (!symName ||
symName.str().find(seqOp.getSymName()->str()) != std::string::npos)
seqOps.push_back(seqOp);
});
// If exactly one entry point function is found we can delete it. For any
// other result we do not make any change.
if (seqOps.size() == 1) seqOps[0].erase();
device->walk([&](RuntimeSequenceOp seqOp) { seqOps.push_back(seqOp); });

if (seqOps.size() > 1) {
device->emitOpError("has ")
<< seqOps.size()
<< " aiex.runtime_sequence ops. Expected no more than 1.";
signalPassFailure();
}

if (seqOps.size() == 1) {
auto seqOp = seqOps[0];
StringRef name = seqOp.getSymName().value();
device->setAttr("runtime_sequence_name",
StringAttr::get(&getContext(), name));
seqOp.erase();
}
}
};

Expand Down
89 changes: 0 additions & 89 deletions compiler/plugins/target/AMD-AIE/aie/AMDAIEXToStandard.cpp

This file was deleted.

1 change: 0 additions & 1 deletion compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ iree_cc_library(
AMDAIELocalizeLocks.cpp
AMDAIENormalizeAddressSpaces.cpp
AMDAIEObjectFifoStatefulTransform.cpp
AMDAIEXToStandard.cpp
DEPS
iree-amd-aie::aie_runtime::iree_aie_runtime_static
::AIEDialectIR
Expand Down
4 changes: 1 addition & 3 deletions compiler/plugins/target/AMD-AIE/aie/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ createAMDAIEPathfinderPass();
std::unique_ptr<OperationPass<ModuleOp>> createAMDAIECoreToStandardPass();
std::unique_ptr<OperationPass<xilinx::AIE::DeviceOp>>
createAMDAIEDmaToNpuPass();
std::unique_ptr<OperationPass<ModuleOp>> createAMDAIEXToStandardPass();

void registerAMDAIEAssignBufferAddressesBasic();
void registerAMDAIEAssignBufferDescriptorIDs();
Expand All @@ -44,9 +43,8 @@ void registerAMDAIELocalizeLocks();
void registerAMDAIENormalizeAddressSpaces();
void registerAMDAIEObjectFifoStatefulTransform();
void registerAMDAIERoutePathfinderFlows();

void registerAMDAIEDmaToNpu();
void registerAMDAIEXToStandardPass();

} // namespace mlir::iree_compiler::AMDAIE

#endif // AMDAIE_PASSES_H_
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// CHECK: memref.global "public" @of_fromMem : memref<32xi32>
// CHECK: aie.shim_dma_allocation @of_fromMem(MM2S, 0, 0)
// CHECK: aie.shim_dma_allocation @of_toMem(S2MM, 0, 0)
// CHECK: } {npu_instructions = dense_resource<npu_instructions> : tensor<64xui32>}
// CHECK: } {npu_instructions = dense_resource<npu_instructions> : tensor<64xui32>, runtime_sequence_name = "sequence"}
newling marked this conversation as resolved.
Show resolved Hide resolved

// CHECK: {-#
// CHECK: dialect_resources: {
Expand Down

This file was deleted.

23 changes: 0 additions & 23 deletions compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,3 @@ module {
} {sym_name = "explicit_sym_name_0"}
}

// -----

// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @toMem : memref<16xi32>
// CHECK: func.func @pretend_microkernel
// CHECK: aiex.runtime_sequence @explicit_sym_name
// CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1)

module {
aie.device(npu1_4col) {
memref.global "public" @toMem : memref<16xi32>
func.func @pretend_microkernel(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
return
}

aiex.runtime_sequence @explicit_sym_name(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_wait {symbol = @toMem}
}
aie.shim_dma_allocation @toMem (MM2S, 1, 1)
} {sym_name = "wrong_sym_name"}
}

Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ struct AMDAIESession
AMDAIE::registerAMDAIEObjectFifoStatefulTransform();
AMDAIE::registerAMDAIERoutePathfinderFlows();
AMDAIE::registerAMDAIEDmaToNpu();
AMDAIE::registerAMDAIEXToStandardPass();
AMDAIE::registerAIRConversionPasses();
AMDAIE::registerAIRTransformPasses();
aievec::registerConvertAIEVecToLLVMPass();
Expand Down
93 changes: 39 additions & 54 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "aievec/XLLVMDialect.h"
#include "air/Dialect/AIR/AIRDialect.h"
#include "air/Dialect/AIRRt/AIRRtDialect.h"
#include "iree-amd-aie/IR/AMDAIEAttrs.h"
#include "iree-amd-aie/IR/AMDAIEDialect.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
Expand All @@ -27,9 +26,12 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
#include "mlir/Conversion/Passes.h"
#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/EmitC/IR/EmitC.h"
#include "mlir/Dialect/Func/Extensions/AllExtensions.h"
Expand Down Expand Up @@ -75,42 +77,27 @@ static llvm::cl::opt<std::string> clEnableAMDAIEUkernels(
"unprefixed microkernels to enable, e.g. `matmul`."),
llvm::cl::init("none"));

// Utility to find aie.device Op corresponding to the export Op.
// For example, we have
// hal.executable.variant {
// hal.executable.export symbol1
// hal.executable.export symbol2
// module {
// aie.device {
// ...
// aiex.runtime_sequence symbol1
// }
// aie.device {
// ...
// aiex.runtime_sequence symbol2
// }
// }
// }
// Hence we need to find the aiex.runtime_sequence that coresponds to the export
// op symbol and return its parent aie.device Op. This is what we will pass to
// the `aie2xclbin` tool for artifact generation per entry point.
static xilinx::AIE::DeviceOp getDeviceOpFromEntryPoint(ModuleOp moduleOp,
StringRef exportOpName) {
static xilinx::AIE::DeviceOp getDeviceOpWithName(ModuleOp moduleOp,
StringRef targetName) {
xilinx::AIE::DeviceOp deviceOp;

moduleOp.walk([&](xilinx::AIEX::RuntimeSequenceOp sequenceOp) {
if (sequenceOp.getSymName() == exportOpName) {
deviceOp =
dyn_cast_or_null<xilinx::AIE::DeviceOp>(sequenceOp->getParentOp());
return WalkResult::interrupt();
}
return WalkResult::advance();
uint32_t nDeviceOpsVisited = 0;
moduleOp.walk([&](xilinx::AIE::DeviceOp d) {
++nDeviceOpsVisited;
// This attribute should've been set in the dma-to-npu pass.
newling marked this conversation as resolved.
Show resolved Hide resolved
auto maybeName = d->getAttrOfType<StringAttr>("runtime_sequence_name");
if (!maybeName) return WalkResult::advance();
auto name = maybeName.getValue();
if (name != targetName) return WalkResult::advance();
deviceOp = d;
return WalkResult::interrupt();
});
if (!deviceOp) {
moduleOp.emitError()
<< "failed to find aie.device containing func.func with symbol "
<< exportOpName;
}

if (!deviceOp)
moduleOp.emitError() << "visited " << nDeviceOpsVisited
<< " aie.device ops, and failed to find one with name "
<< targetName;

return deviceOp;
}

Expand Down Expand Up @@ -291,7 +278,7 @@ LogicalResult AIETargetBackend::serializeExecutable(
}

StringRef exportOpName = exportOp.getSymName();
deviceOps.push_back(getDeviceOpFromEntryPoint(moduleOp, exportOpName));
deviceOps.push_back(getDeviceOpWithName(moduleOp, exportOpName));

// The xclbin kernel name, appended with instance name suffix (`:MLIRAIEV1`,
// 10 chars) is required by the xclbinutil to have a length smaller or equal
Expand Down Expand Up @@ -334,21 +321,8 @@ LogicalResult AIETargetBackend::serializeExecutable(
uint64_t ordinal = entryPointOrdinals.at(entryPointNames[i]);

entryPointNamesFb[ordinal] = entryPointNames[i];

SmallString<128> inputMlirPath(workDir);
llvm::sys::path::append(inputMlirPath,
entryPointNamesFb[ordinal] + ".aiecc.mlir");

std::string errorMessage;
{
auto inputMlirOut = openOutputFile(inputMlirPath, &errorMessage);
if (!inputMlirOut) {
return moduleOp.emitOpError()
<< "Failed to write MLIR: " << errorMessage;
}
deviceOps[i].print(inputMlirOut->os(), OpPrintingFlags().useLocalScope());
inputMlirOut->keep();
}

// we add the entry point to the working directory for xclbin artifacts if
// there are multiple entry points so that we dont overwrite the xclbinutil
// generated artifacts e.g kernels.json, for different entry points which
Expand All @@ -375,11 +349,22 @@ LogicalResult AIETargetBackend::serializeExecutable(
ParserConfig pcfg(variantOp->getContext());
llvm::SourceMgr srcMgr;

OwningOpRef<ModuleOp> owningModuleOp =
parseSourceFile<ModuleOp>(inputMlirPath, srcMgr, pcfg);
// Move DeviceOp into its own ModuleOp, if there are multiple DeviceOps.
// Required as core-to-standard pass will move all ops in DeviceOps into
// the parent ModuleOp, so if they're not separated, core code between
// DeviceOps gets incorrectly concatenated. There's probably a simpler
// workaround, to be reviewed as we continue to remove layers of crust.
if (deviceOps.size() > 1) {
OpBuilder opBuilder(deviceOps[i].getContext());
auto moduleWithOneDevice =
opBuilder.create<ModuleOp>(deviceOps[i].getLoc());
opBuilder.setInsertionPointToStart(moduleWithOneDevice.getBody());
Operation *repl = opBuilder.clone(*deviceOps[i].getOperation());
deviceOps[i] = cast<xilinx::AIE::DeviceOp>(repl);
}

if (failed(aie2xclbin(
/*ctx=*/variantOp->getContext(), /*moduleOp=*/*owningModuleOp,
/*ctx=*/variantOp->getContext(), deviceOps[i],
/*outputNPU=*/npuInstPath.str().str(),
/*outputXCLBin=*/xclbinPath.str().str(),
/*printIRBeforeAll=*/options.aie2xclbinPrintIrBeforeAll,
Expand Down
Loading
Loading