Skip to content

Commit

Permalink
Enable peano on Strix (#1057)
Browse files Browse the repository at this point in the history
  • Loading branch information
jtuyls authored Jan 24, 2025
1 parent a666937 commit 3407e5a
Show file tree
Hide file tree
Showing 12 changed files with 223 additions and 105 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@ jobs:
tar -xvf iree-dist-linux.tar
echo "IREE_INSTALL_DIR=$PWD/iree-install" >> $GITHUB_ENV
echo "PYTHONPATH=$PWD/iree-install/python_packages/iree_compiler:$PWD/iree-install/python_packages/iree_runtime" >> $GITHUB_ENV
bash build_tools/download_peano.sh
echo "PEANO_INSTALL_DIR=$PWD/llvm-aie" >> $GITHUB_ENV
- uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
Expand Down Expand Up @@ -305,6 +307,7 @@ jobs:
python build_tools/ci/cpu_comparison/run.py \
test_aie_vs_cpu \
$PWD/iree-install \
--peano_dir=$PWD/llvm-aie \
--vitis_dir=/opt/xilinx/Vitis/2024.2 \
--target_device="npu4" \
--reset_npu_between_runs \
Expand Down
33 changes: 17 additions & 16 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,38 +1681,39 @@ def __init__(self):
self.register(MatmulTransposeA(128, 256, 128, input_type, acc_type))
self.register(MatmulTransposeA(1536, 1536, 2048, input_type, acc_type))

# Matmul test(s):
self.register(
Matmul(
32,
32,
32,
"i32",
"i32",
name_suffix="chess_npu4",
run_on_target=["npu4"],
use_chess=True,
# NPU4 matmul test(s):
for use_chess in [True, False]:
self.register(
Matmul(
32,
32,
32,
"i32",
"i32",
name_suffix="chess_" + str(use_chess),
run_on_target=["npu4"],
use_chess=False,
)
)
)

self.register(
Matmul(
1024,
1024,
1024,
"i32",
"i32",
name_suffix="4rows_8cols_chess_npu4",
name_suffix="4rows_8cols_npu4",
run_on_target=["npu4"],
aie_compilation_flags=[
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=8",
],
use_chess=True,
use_chess=False,
)
)

for target in ["npu1_4col", "npu4"]:
use_chess = target == "npu4"
self.register(
Matmul(
32,
Expand All @@ -1722,7 +1723,7 @@ def __init__(self):
"i32",
name_suffix="infinite_loop_" + target,
run_on_target=[target],
use_chess=use_chess,
use_chess=False,
aie_compilation_flags=[
"--iree-amdaie-enable-infinite-loop-around-core-block=true"
],
Expand Down
2 changes: 1 addition & 1 deletion build_tools/peano_commit.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
19.0.0.2025010801+d0eacebf
19.0.0.2025012101+8c0c9916
159 changes: 79 additions & 80 deletions compiler/plugins/target/AMD-AIE/aie/AMDAIECoreToStandard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,45 +21,71 @@
using namespace mlir;
using namespace xilinx::AIE;

static void lockToStd(UseLockOp useLock, IRRewriter &rewriter) {
if (!isa<DeviceOp>(useLock->getParentOp())) {
std::string funcName = [&]() {
static LogicalResult lockToStd(IRRewriter &rewriter, Operation *parentOp,
const std::string &targetArch) {
OpBuilder::InsertionGuard guard(rewriter);
MLIRContext *ctx = rewriter.getContext();

StringAttr privateSym = StringAttr::get(ctx, "private");
auto buildDecl = [&](const std::string &funcName) {
rewriter.create<func::FuncOp>(
rewriter.getUnknownLoc(), funcName,
FunctionType::get(ctx, {rewriter.getI32Type(), rewriter.getI32Type()},
{}),
privateSym, ArrayAttr{}, ArrayAttr{});
};

std::string acquireFunction = "llvm." + targetArch + ".acquire";
std::string releaseFunction = "llvm." + targetArch + ".release";

buildDecl(acquireFunction);
buildDecl(releaseFunction);

WalkResult res = parentOp->walk([&](UseLockOp useLock) {
if (!isa<DeviceOp>(useLock->getParentOp())) {
std::string funcName;
switch (useLock.getAction()) {
case LockAction::Acquire:
case LockAction::AcquireGreaterEqual:
return "llvm.aie2.acquire";
funcName = acquireFunction;
break;
case LockAction::Release:
return "llvm.aie2.release";
funcName = releaseFunction;
break;
default:
assert(false && "Unknown lock action");
useLock.emitOpError() << "has an unsupported lock action";
return WalkResult::interrupt();
}
}();

// TODO(max): this can be simplified with
// SymbolTable::lookupNearestSymbolFrom if DeviceOp ceases to be a
// SymbolTable
ModuleOp modOp = useLock->getParentOfType<ModuleOp>();
func::FuncOp func = modOp.lookupSymbol<func::FuncOp>(funcName);
// TODO(max): this can be simplified with
// SymbolTable::lookupNearestSymbolFrom if DeviceOp ceases to be a
// SymbolTable
ModuleOp modOp = useLock->getParentOfType<ModuleOp>();
func::FuncOp func = modOp.lookupSymbol<func::FuncOp>(funcName);

int lockValue = useLock.getValue().value_or(1);
int lockValue = useLock.getValue().value_or(1);

// AIE2 acquire greater equal is encoded as a negative value.
if (useLock.getAction() == LockAction::AcquireGreaterEqual)
lockValue = -lockValue;
// AIE2 acquire greater equal is encoded as a negative value.
if (useLock.getAction() == LockAction::AcquireGreaterEqual)
lockValue = -lockValue;

rewriter.setInsertionPoint(useLock);
IntegerAttr lockAttr = rewriter.getI32IntegerAttr(lockValue);
IntegerType type = IntegerType::get(rewriter.getContext(), 32);
Location loc = useLock.getLoc();
rewriter.setInsertionPoint(useLock);
IntegerAttr lockAttr = rewriter.getI32IntegerAttr(lockValue);
IntegerType type = IntegerType::get(rewriter.getContext(), 32);
Location loc = useLock.getLoc();

SmallVector<Value, 2> args{
rewriter.create<arith::IndexCastOp>(loc, type, useLock.getLock()),
rewriter.create<arith::ConstantOp>(loc, type, lockAttr)};
SmallVector<Value, 2> args{
rewriter.create<arith::IndexCastOp>(loc, type, useLock.getLock()),
rewriter.create<arith::ConstantOp>(loc, type, lockAttr)};

rewriter.create<func::CallOp>(loc, func, args);
}
rewriter.create<func::CallOp>(loc, func, args);
}

rewriter.eraseOp(useLock);
rewriter.eraseOp(useLock);
return WalkResult::advance();
});
if (res.wasInterrupted()) return failure();
return success();
}

static void bufferToStd(ModuleOp module, BufferOp buffer,
Expand Down Expand Up @@ -132,41 +158,17 @@ void outlineOps(DeviceOp device) {
}

namespace mlir::iree_compiler::AMDAIE {
struct AMDAIECoreToStandardPass : mlir::OperationPass<ModuleOp> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(AMDAIECoreToStandardPass)

AMDAIECoreToStandardPass() : mlir::OperationPass<ModuleOp>(resolveTypeID()) {}
AMDAIECoreToStandardPass(const AMDAIECoreToStandardPass &other)
: mlir::OperationPass<mlir::ModuleOp>(other) {}

llvm::StringRef getArgument() const override {
return "amdaie-standard-lowering";
}

llvm::StringRef getName() const override {
return "AMDAIECoreToStandardPass";
}

std::unique_ptr<mlir::Pass> clonePass() const override {
return std::make_unique<AMDAIECoreToStandardPass>(
*static_cast<const AMDAIECoreToStandardPass *>(this));
}
struct AMDAIECoreToStandardPass
: public impl::AMDAIECoreToStandardBase<AMDAIECoreToStandardPass> {
AMDAIECoreToStandardPass(const AMDAIECoreToStandardOptions &options)
: AMDAIECoreToStandardBase(options) {}

void getDependentDialects(mlir::DialectRegistry &registry) const override {
registry.insert<mlir::func::FuncDialect>();
registry.insert<mlir::memref::MemRefDialect>();
registry.insert<xilinx::AIE::AIEDialect>();
}

mlir::Pass::Option<unsigned> tileCol{
*this, "tilecol",
llvm::cl::desc("X coordinate of tile to generate code for"),
llvm::cl::init(-1)};
mlir::Pass::Option<unsigned> tileRow{
*this, "tilerow",
llvm::cl::desc("Y coordinate of tile to generate code for"),
llvm::cl::init(-1)};

// Assert that cores are isolated
static bool coresAreIsolated(ModuleOp m) {
SmallVector<CoreOp> coreOps;
Expand Down Expand Up @@ -198,6 +200,19 @@ struct AMDAIECoreToStandardPass : mlir::OperationPass<ModuleOp> {
m.emitOpError("expected AIE.device operation at toplevel");
return signalPassFailure();
}
DeviceOp deviceOp = *m.getOps<DeviceOp>().begin();
AMDAIEDeviceModel deviceModel =
getDeviceModel(static_cast<AMDAIEDevice>(deviceOp.getDevice()));

std::optional<std::string> targetArch = deviceModel.getTargetArchString();
if (!targetArch.has_value()) {
deviceOp.emitError() << "doesn't have a target arch string";
return signalPassFailure();
}
// Chess uses `aie2` for both aie2 and aie2p, while peano separates between
// `aie2` and `aie2p`.
std::string targetArchStr =
lowerToChess ? "aie2" : StringRef(targetArch.value()).lower();

MLIRContext *ctx = &getContext();
IRRewriter rewriter(ctx);
Expand All @@ -206,43 +221,27 @@ struct AMDAIECoreToStandardPass : mlir::OperationPass<ModuleOp> {
// Ensure that we don't have an incorrect target triple. This may override
// some bogus target triple in the original mlir.
m->setAttr(LLVM::LLVMDialect::getTargetTripleAttrName(),
rewriter.getStringAttr("aie2"));
rewriter.getStringAttr(targetArchStr));

StringAttr privateSym = StringAttr::get(ctx, "private");
auto buildDecl = [&](const std::string &funcName) {
rewriter.create<func::FuncOp>(
rewriter.getUnknownLoc(), funcName,
FunctionType::get(ctx, {rewriter.getI32Type(), rewriter.getI32Type()},
{}),
privateSym, ArrayAttr{}, ArrayAttr{});
};
buildDecl("llvm.aie2.acquire");
buildDecl("llvm.aie2.release");

m.walk([&](UseLockOp useLock) { lockToStd(useLock, rewriter); });
if (failed(lockToStd(rewriter, m, targetArchStr)))
return signalPassFailure();

m.walk([&](BufferOp buffer) { bufferToStd(m, buffer, rewriter); });

if (!coresAreIsolated(m)) return signalPassFailure();

m.walk(
[&](CoreOp coreOp) { coreToStd(coreOp, rewriter, tileCol, tileRow); });
m.walk([&](CoreOp coreOp) { coreToStd(coreOp, rewriter, -1, -1); });

// Move all the func.func ops and memref.globals from device to module.
DeviceOp device = *m.getOps<DeviceOp>().begin();
outlineOps<memref::GlobalOp>(device);
outlineOps<func::FuncOp>(device);
rewriter.eraseOp(device);
outlineOps<memref::GlobalOp>(deviceOp);
outlineOps<func::FuncOp>(deviceOp);
rewriter.eraseOp(deviceOp);
}
};

std::unique_ptr<OperationPass<ModuleOp>> createAMDAIECoreToStandardPass() {
return std::make_unique<AMDAIECoreToStandardPass>();
std::unique_ptr<OperationPass<ModuleOp>> createAMDAIECoreToStandardPass(
AMDAIECoreToStandardOptions options) {
return std::make_unique<AMDAIECoreToStandardPass>(options);
}

void registerAMDAIECoreToStandard() {
mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
return createAMDAIECoreToStandardPass();
});
}
} // namespace mlir::iree_compiler::AMDAIE
25 changes: 25 additions & 0 deletions compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,30 @@ iree_cc_library(
# in-tree AIE and AIEX passes
###############################################################################

iree_tablegen_library(
NAME
AIEPassesIncGen
TD_FILE
"Passes.td"
OUTS
--gen-pass-decls Passes.h.inc
)

iree_cc_library(
NAME
AIEPassHeaders
HDRS
"PassDetail.h"
"Passes.h"
"Passes.h.inc"
DEPS
::AIEPassesIncGen
::AIEDialectIR
::AIEXDialectIR
MLIRPass
PUBLIC
)

iree_cc_library(
NAME
AIEPasses
Expand All @@ -145,6 +169,7 @@ iree_cc_library(
::AIEDialectIR
::AIEXDialectIR
::AIENormalizeAddressSpacesGen
::AIEPassHeaders
)

add_subdirectory(test)
22 changes: 22 additions & 0 deletions compiler/plugins/target/AMD-AIE/aie/PassDetail.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2025 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef AMDAIE_AIE_PASSDETAIL_H_
#define AMDAIE_AIE_PASSDETAIL_H_

#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassOptions.h"

namespace mlir::iree_compiler::AMDAIE {

#define GEN_PASS_DECL
#define GEN_PASS_DEF_AMDAIECORETOSTANDARD

#include "aie/Passes.h.inc"

} // namespace mlir::iree_compiler::AMDAIE

#endif // AMDAIE_AIE_PASSDETAIL_H_
11 changes: 6 additions & 5 deletions compiler/plugins/target/AMD-AIE/aie/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef AMDAIE_PASSES_H_
#define AMDAIE_PASSES_H_
#ifndef AMDAIE_AIE_PASSES_H_
#define AMDAIE_AIE_PASSES_H_

#include "AIEDialect.h"
#include "PassDetail.h"
#include "mlir/Pass/Pass.h"

namespace mlir::iree_compiler::AMDAIE {
Expand All @@ -29,18 +30,18 @@ std::unique_ptr<OperationPass<xilinx::AIE::DeviceOp>>
createAMDAIENormalizeAddressSpacesPass();
std::unique_ptr<OperationPass<xilinx::AIE::DeviceOp>>
createAMDAIEPathfinderPass();
std::unique_ptr<OperationPass<ModuleOp>> createAMDAIECoreToStandardPass();
std::unique_ptr<OperationPass<ModuleOp>> createAMDAIECoreToStandardPass(
AMDAIECoreToStandardOptions options = {});
std::unique_ptr<OperationPass<xilinx::AIE::DeviceOp>>
createAMDAIEDmaToNpuPass();

void registerAMDAIEAssignBufferAddressesBasic();
void registerAMDAIEAssignBufferDescriptorIDs();
void registerAMDAIECoreToStandard();
void registerAMDAIELocalizeLocks();
void registerAMDAIENormalizeAddressSpaces();
void registerAMDAIERoutePathfinderFlows();
void registerAMDAIEDmaToNpu();

} // namespace mlir::iree_compiler::AMDAIE

#endif // AMDAIE_PASSES_H_
#endif // AMDAIE_AIE_PASSES_H_
Loading

0 comments on commit 3407e5a

Please sign in to comment.