Skip to content

Commit

Permalink
[KernelDispatch] Enable unpack outer permutation for (batch)matmul (#…
Browse files Browse the repository at this point in the history
…1011)

This PR enables outer permutation of the level 0 unpack, which results
in a logical objectFifo with outer dimension aligning with the columns
instead of rows. This helps with splitting and finding a routing
solution after splitting (as we always split logical objectFifos on the
outermost non-unit dimension).

For example, when targeting strix with an 8 column array, this results
in an output L2 objectFifo with shape `8x4x32x32` instead of
`4x8x32x32`. Splitting therefore happens on the dimension with size 8
instead of size 4, which helps with assigning one output buffer to every
column and finding enough channels for routing the data from L1 to L2.
  • Loading branch information
jtuyls authored Jan 8, 2025
1 parent ef4252c commit a7f0bef
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,19 @@ static LogicalResult setRootConfigForPackPeelPipeline(
outerPermVec.push_back(2);
}
SmallVector<SmallVector<int64_t>> outerPerm = {outerPermVec, outerPermVec};
if (isObjectFifo) {
// Add outer permutation for unpack. NOTE: This currently fails for some
// tests in the AIR pipeline.
transposePackIndices.push_back(2);
unpackEmpty.push_back(true);
innerPerm.push_back({0, 1});
if (isa<linalg::BatchMatmulOp>(linalgOp)) {
outerPerm.push_back({0, 2, 1});
} else {
outerPerm.push_back({1, 0});
}
}

auto packingConfigLevel0Attr = getPackingConfigPackingLevelAttr(
context, packedSizesL0, transposePackIndices, unpackEmpty, innerPerm,
outerPerm);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// Test generic version of matmul.

// CHECK{LITERAL}: #config = #iree_codegen.lowering_config<tile_sizes = [[128, 128], [0, 0, 1], [1, 1, 0, 0, 0, 0]]>
// CHECK{LITERAL}: #packingConfig = #amdaie.packing_config<packing_config = [{packedSizes = [32, 32, 32], transposePackIndices = [0, 1], unpackEmpty = [false, false], innerPerm = [[0, 1], [1, 0]], outerPerm = [[0, 1], [0, 1]]}, {packedSizes = [0, 0, 0, 4, 4, 8], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [1, 0], [0, 1]], outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>
// CHECK{LITERAL}: #amdaie.packing_config<packing_config = [{packedSizes = [32, 32, 32], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [1, 0], [0, 1]], outerPerm = [[0, 1], [0, 1], [1, 0]]}, {packedSizes = [0, 0, 0, 4, 4, 8], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [1, 0], [0, 1]], outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>
module {
func.func @matmul_generic_128x128x256_i32() {
%c0_i32 = arith.constant 0 : i32
Expand Down Expand Up @@ -33,7 +33,7 @@ module {
// Test generic version of matmul_transpose_b.

// CHECK{LITERAL}: #config = #iree_codegen.lowering_config<tile_sizes = [[128, 128], [0, 0, 1], [1, 1, 0, 0, 0, 0]]>
// CHECK{LITERAL}: #packingConfig = #amdaie.packing_config<packing_config = [{packedSizes = [32, 32, 32], transposePackIndices = [0, 1], unpackEmpty = [false, false], innerPerm = [[0, 1], [0, 1]], outerPerm = [[0, 1], [0, 1]]}, {packedSizes = [0, 0, 0, 4, 4, 8], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [0, 1], [0, 1]], outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>
// CHECK{LITERAL}: #amdaie.packing_config<packing_config = [{packedSizes = [32, 32, 32], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [0, 1], [0, 1]], outerPerm = [[0, 1], [0, 1], [1, 0]]}, {packedSizes = [0, 0, 0, 4, 4, 8], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [0, 1], [0, 1]], outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>
module {
func.func @matmul_transpose_b_generic_128x128x256_i32() {
%c0_i32 = arith.constant 0 : i32
Expand Down Expand Up @@ -63,7 +63,7 @@ module {
// Test generic version of matmul_transpose_a.

// CHECK{LITERAL}: #config = #iree_codegen.lowering_config<tile_sizes = [[128, 128], [0, 0, 1], [1, 1, 0, 0, 0, 0]]>
// CHECK{LITERAL}: #packingConfig = #amdaie.packing_config<packing_config = [{packedSizes = [32, 32, 32], transposePackIndices = [0, 1], unpackEmpty = [false, false], innerPerm = [[1, 0], [1, 0]], outerPerm = [[0, 1], [0, 1]]}, {packedSizes = [0, 0, 0, 4, 4, 8], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[1, 0], [1, 0], [0, 1]], outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>
// CHECK{LITERAL}: #amdaie.packing_config<packing_config = [{packedSizes = [32, 32, 32], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[1, 0], [1, 0], [0, 1]], outerPerm = [[0, 1], [0, 1], [1, 0]]}, {packedSizes = [0, 0, 0, 4, 4, 8], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[1, 0], [1, 0], [0, 1]], outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>
module {
func.func @matmul_transpose_a_generic_128x128x256_i32() {
%c0_i32 = arith.constant 0 : i32
Expand Down
Loading

0 comments on commit a7f0bef

Please sign in to comment.