Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
lower scf parallel to calyx
Browse files Browse the repository at this point in the history
jiahanxie353 committed Nov 16, 2024
1 parent 2fd0b29 commit 9058f94
Showing 2 changed files with 231 additions and 2 deletions.
114 changes: 112 additions & 2 deletions lib/Conversion/SCFToCalyx/SCFToCalyx.cpp
Original file line number Diff line number Diff line change
@@ -119,10 +119,15 @@ struct CallScheduleable {
func::CallOp callOp;
};

struct ParScheduleable {
/// Parallel operation to schedule.
scf::ParallelOp parOp;
};

/// A variant of types representing scheduleable operations.
using Scheduleable =
std::variant<calyx::GroupOp, WhileScheduleable, ForScheduleable,
IfScheduleable, CallScheduleable>;
IfScheduleable, CallScheduleable, ParScheduleable>;

class IfLoweringStateInterface {
public:
@@ -275,6 +280,7 @@ class BuildOpGroups : public calyx::FuncOpPartialLoweringPattern {
.template Case<arith::ConstantOp, ReturnOp, BranchOpInterface,
/// SCF
scf::YieldOp, scf::WhileOp, scf::ForOp, scf::IfOp,
scf::ParallelOp, scf::ReduceOp,
/// memref
memref::AllocOp, memref::AllocaOp, memref::LoadOp,
memref::StoreOp,
@@ -338,6 +344,10 @@ class BuildOpGroups : public calyx::FuncOpPartialLoweringPattern {
LogicalResult buildOp(PatternRewriter &rewriter, scf::WhileOp whileOp) const;
LogicalResult buildOp(PatternRewriter &rewriter, scf::ForOp forOp) const;
LogicalResult buildOp(PatternRewriter &rewriter, scf::IfOp ifOp) const;
LogicalResult buildOp(PatternRewriter &rewriter,
scf::ReduceOp reduceOp) const;
LogicalResult buildOp(PatternRewriter &rewriter,
scf::ParallelOp parallelOp) const;
LogicalResult buildOp(PatternRewriter &rewriter, CallOp callOp) const;

/// buildLibraryOp will build a TCalyxLibOp inside a TGroupOp based on the
@@ -1093,6 +1103,19 @@ LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter,
return success();
}

LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter,
scf::ReduceOp reduceOp) const {
if (!reduceOp.getOperands().empty())
return failure();
return success();
}
LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter,
scf::ParallelOp parOp) const {
getState<ComponentLoweringState>().addBlockScheduleable(
parOp.getOperation()->getBlock(), ParScheduleable{parOp});
return success();
}

LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter,
CallOp callOp) const {
std::string instanceName = calyx::getInstanceName(callOp);
@@ -1481,6 +1504,75 @@ class BuildIfGroups : public calyx::FuncOpPartialLoweringPattern {
}
};

class BuildParGroups : public calyx::FuncOpPartialLoweringPattern {
using FuncOpPartialLoweringPattern::FuncOpPartialLoweringPattern;

LogicalResult
partiallyLowerFuncToComp(FuncOp funcOp,
PatternRewriter &rewriter) const override {
LogicalResult res = success();
funcOp.walk([&](Operation *op) {
if (auto scfParOp = dyn_cast<scf::ParallelOp>(op))
if (failed(partialEval(rewriter, scfParOp))) {
res = failure();
return WalkResult::interrupt();
}
return WalkResult::advance();
});

return res;
}

private:
LogicalResult partialEval(PatternRewriter &rewriter,
scf::ParallelOp scfParOp) const {
assert(scfParOp.getLoopSteps() && "Parallel loop must have steps");
auto *body = scfParOp.getBody();
auto parOpIVs = scfParOp.getInductionVars();
auto steps = scfParOp.getStep();
auto lowerBounds = scfParOp.getLowerBound();
auto upperBounds = scfParOp.getUpperBound();
rewriter.setInsertionPointAfter(scfParOp);
scf::ParallelOp newParOp = scfParOp.cloneWithoutRegions();
auto loc = newParOp.getLoc();
rewriter.insert(newParOp);
OpBuilder insideBuilder(newParOp);
Block *currBlock = nullptr;
auto &region = newParOp.getRegion();
IRMapping operandMap;
std::function<void(SmallVector<int64_t, 4> &, unsigned)> genIVCombinations;
genIVCombinations = [&](SmallVector<int64_t, 4> &indices, unsigned dim) {
if (dim == lowerBounds.size()) {
currBlock = &region.emplaceBlock();
insideBuilder.setInsertionPointToEnd(currBlock);
for (unsigned i = 0; i < indices.size(); ++i) {
Value ivConstant =
insideBuilder.create<arith::ConstantIndexOp>(loc, indices[i]);
operandMap.map(parOpIVs[i], ivConstant);
}
for (auto it = body->begin(); it != std::prev(body->end()); ++it)
insideBuilder.clone(*it, operandMap);
return;
}
auto lb = lowerBounds[dim].getDefiningOp<arith::ConstantIndexOp>();
auto ub = upperBounds[dim].getDefiningOp<arith::ConstantIndexOp>();
auto stepOp = steps[dim].getDefiningOp<arith::ConstantIndexOp>();
assert(lb && ub && stepOp && "Bounds and steps must be constants");
int64_t lbVal = lb.value();
int64_t ubVal = ub.value();
int64_t stepVal = stepOp.value();
for (int64_t iv = lbVal; iv < ubVal; iv += stepVal) {
indices[dim] = iv;
genIVCombinations(indices, dim + 1);
}
};
SmallVector<int64_t, 4> indices(lowerBounds.size());
genIVCombinations(indices, 0);
rewriter.replaceOp(scfParOp, newParOp);
return success();
}
};

/// Builds a control schedule by traversing the CFG of the function and
/// associating this with the previously created groups.
/// For simplicity, the generated control flow is expanded for all possible
@@ -1512,7 +1604,8 @@ class BuildControl : public calyx::FuncOpPartialLoweringPattern {
getState<ComponentLoweringState>().getBlockScheduleables(block);
auto loc = block->front().getLoc();

if (compBlockScheduleables.size() > 1) {
if (compBlockScheduleables.size() > 1 &&
!isa<scf::ParallelOp>(block->getParentOp())) {
auto seqOp = rewriter.create<calyx::SeqOp>(loc);
parentCtrlBlock = seqOp.getBodyBlock();
}
@@ -1547,6 +1640,19 @@ class BuildControl : public calyx::FuncOpPartialLoweringPattern {
rewriter.create<calyx::EnableOp>(whileLatchGroup.getLoc(),
whileLatchGroup.getName());

if (res.failed())
return res;
} else if (auto *parSchedPtr = std::get_if<ParScheduleable>(&group)) {
auto parOp = parSchedPtr->parOp;
auto calyxParOp = rewriter.create<calyx::ParOp>(parOp.getLoc());
LogicalResult res = LogicalResult::success();
for (auto &innerBlock : parOp.getRegion().getBlocks()) {
rewriter.setInsertionPointToEnd(calyxParOp.getBodyBlock());
auto seqOp = rewriter.create<calyx::SeqOp>(parOp.getLoc());
rewriter.setInsertionPointToEnd(seqOp.getBodyBlock());
res = scheduleBasicBlock(rewriter, path, seqOp.getBodyBlock(),
&innerBlock);
}
if (res.failed())
return res;
} else if (auto *forSchedPtr = std::get_if<ForScheduleable>(&group);
@@ -2241,6 +2347,9 @@ void SCFToCalyxPass::runOnOperation() {
/// This pass inlines scf.ExecuteRegionOp's by adding control-flow.
addGreedyPattern<InlineExecuteRegionOpPattern>(loweringPatterns);

addOncePattern<BuildParGroups>(loweringPatterns, patternState, funcMap,
*loweringState);

/// This pattern converts all index typed values to an i32 integer.
addOncePattern<calyx::ConvertIndexTypes>(loweringPatterns, patternState,
funcMap, *loweringState);
@@ -2270,6 +2379,7 @@ void SCFToCalyxPass::runOnOperation() {

addOncePattern<BuildIfGroups>(loweringPatterns, patternState, funcMap,
*loweringState);

/// This pattern converts operations within basic blocks to Calyx library
/// operators. Combinational operations are assigned inside a
/// calyx::CombGroupOp, and sequential inside calyx::GroupOps.
119 changes: 119 additions & 0 deletions test/Conversion/SCFToCalyx/convert_simple.mlir
Original file line number Diff line number Diff line change
@@ -257,3 +257,122 @@ module {
return %1 : f32
}
}

// -----

// Test parallel op lowering

// CHECK: calyx.wires {
// CHECK-DAG: calyx.group @bb0_0 {
// CHECK-DAG: calyx.assign %std_slice_7.in = %c0_i32 : i32
// CHECK-DAG: calyx.assign %mem_1.addr0 = %std_slice_7.out : i3
// CHECK-DAG: calyx.assign %mem_1.content_en = %true : i1
// CHECK-DAG: calyx.assign %mem_1.write_en = %false : i1
// CHECK-DAG: calyx.assign %load_0_reg.in = %mem_1.read_data : i32
// CHECK-DAG: calyx.assign %load_0_reg.write_en = %mem_1.done : i1
// CHECK-DAG: calyx.group_done %load_0_reg.done : i1
// CHECK-DAG: }
// CHECK-DAG: calyx.group @bb0_1 {
// CHECK-DAG: calyx.assign %std_slice_6.in = %c0_i32 : i32
// CHECK-DAG: calyx.assign %mem_0.addr0 = %std_slice_6.out : i3
// CHECK-DAG: calyx.assign %mem_0.write_data = %load_0_reg.out : i32
// CHECK-DAG: calyx.assign %mem_0.write_en = %true : i1
// CHECK-DAG: calyx.assign %mem_0.content_en = %true : i1
// CHECK-DAG: calyx.group_done %mem_0.done : i1
// CHECK-DAG: }
// CHECK-DAG: calyx.group @bb1_0 {
// CHECK-DAG: calyx.assign %std_slice_5.in = %c4_i32 : i32
// CHECK-DAG: calyx.assign %mem_1.addr0 = %std_slice_5.out : i3
// CHECK-DAG: calyx.assign %mem_1.content_en = %true : i1
// CHECK-DAG: calyx.assign %mem_1.write_en = %false : i1
// CHECK-DAG: calyx.assign %load_1_reg.in = %mem_1.read_data : i32
// CHECK-DAG: calyx.assign %load_1_reg.write_en = %mem_1.done : i1
// CHECK-DAG: calyx.group_done %load_1_reg.done : i1
// CHECK-DAG: }
// CHECK-DAG: calyx.group @bb1_1 {
// CHECK-DAG: calyx.assign %std_slice_4.in = %c1_i32 : i32
// CHECK-DAG: calyx.assign %mem_0.addr0 = %std_slice_4.out : i3
// CHECK-DAG: calyx.assign %mem_0.write_data = %load_1_reg.out : i32
// CHECK-DAG: calyx.assign %mem_0.write_en = %true : i1
// CHECK-DAG: calyx.assign %mem_0.content_en = %true : i1
// CHECK-DAG: calyx.group_done %mem_0.done : i1
// CHECK-DAG: }
// CHECK-DAG: calyx.group @bb2_0 {
// CHECK-DAG: calyx.assign %std_slice_3.in = %c2_i32 : i32
// CHECK-DAG: calyx.assign %mem_1.addr0 = %std_slice_3.out : i3
// CHECK-DAG: calyx.assign %mem_1.content_en = %true : i1
// CHECK-DAG: calyx.assign %mem_1.write_en = %false : i1
// CHECK-DAG: calyx.assign %load_2_reg.in = %mem_1.read_data : i32
// CHECK-DAG: calyx.assign %load_2_reg.write_en = %mem_1.done : i1
// CHECK-DAG: calyx.group_done %load_2_reg.done : i1
// CHECK-DAG: }
// CHECK-DAG: calyx.group @bb2_1 {
// CHECK-DAG: calyx.assign %std_slice_2.in = %c4_i32 : i32
// CHECK-DAG: calyx.assign %mem_0.addr0 = %std_slice_2.out : i3
// CHECK-DAG: calyx.assign %mem_0.write_data = %load_2_reg.out : i32
// CHECK-DAG: calyx.assign %mem_0.write_en = %true : i1
// CHECK-DAG: calyx.assign %mem_0.content_en = %true : i1
// CHECK-DAG: calyx.group_done %mem_0.done : i1
// CHECK-DAG: }
// CHECK-DAG: calyx.group @bb3_0 {
// CHECK-DAG: calyx.assign %std_slice_1.in = %c6_i32 : i32
// CHECK-DAG: calyx.assign %mem_1.addr0 = %std_slice_1.out : i3
// CHECK-DAG: calyx.assign %mem_1.content_en = %true : i1
// CHECK-DAG: calyx.assign %mem_1.write_en = %false : i1
// CHECK-DAG: calyx.assign %load_3_reg.in = %mem_1.read_data : i32
// CHECK-DAG: calyx.assign %load_3_reg.write_en = %mem_1.done : i1
// CHECK-DAG: calyx.group_done %load_3_reg.done : i1
// CHECK-DAG: }
// CHECK-DAG: calyx.group @bb3_1 {
// CHECK-DAG: calyx.assign %std_slice_0.in = %c5_i32 : i32
// CHECK-DAG: calyx.assign %mem_0.addr0 = %std_slice_0.out : i3
// CHECK-DAG: calyx.assign %mem_0.write_data = %load_3_reg.out : i32
// CHECK-DAG: calyx.assign %mem_0.write_en = %true : i1
// CHECK-DAG: calyx.assign %mem_0.content_en = %true : i1
// CHECK-DAG: calyx.group_done %mem_0.done : i1
// CHECK-DAG: }
// CHECK-DAG: }
// CHECK-DAG: calyx.control {
// CHECK-DAG: calyx.seq {
// CHECK-DAG: calyx.par {
// CHECK-DAG: calyx.seq {
// CHECK-DAG: calyx.enable @bb0_0
// CHECK-DAG: calyx.enable @bb0_1
// CHECK-DAG: }
// CHECK-DAG: calyx.seq {
// CHECK-DAG: calyx.enable @bb1_0
// CHECK-DAG: calyx.enable @bb1_1
// CHECK-DAG: }
// CHECK-DAG: calyx.seq {
// CHECK-DAG: calyx.enable @bb2_0
// CHECK-DAG: calyx.enable @bb2_1
// CHECK-DAG: }
// CHECK-DAG: calyx.seq {
// CHECK-DAG: calyx.enable @bb3_0
// CHECK-DAG: calyx.enable @bb3_1
// CHECK-DAG: }
// CHECK-DAG: }
// CHECK-DAG: }
// CHECK-DAG: }

module {
func.func @main() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c0 = arith.constant 0 : index
%alloc = memref.alloc() : memref<6xi32>
%alloc_1 = memref.alloc() : memref<6xi32>
scf.parallel (%arg2, %arg3) = (%c0, %c0) to (%c3, %c2) step (%c2, %c1) {
%4 = arith.shli %arg3, %c2 : index
%5 = arith.addi %4, %arg2 : index
%6 = memref.load %alloc_1[%5] : memref<6xi32>
%7 = arith.shli %arg2, %c1 : index
%8 = arith.addi %7, %arg3 : index
memref.store %6, %alloc[%8] : memref<6xi32>
scf.reduce
}
return
}
}

0 comments on commit 9058f94

Please sign in to comment.