Skip to content

Commit

Permalink
Move tensor_extract slice up in the block to respect bufferization
Browse files Browse the repository at this point in the history
  • Loading branch information
pashu123 committed Jan 9, 2025
1 parent 09d6cf4 commit ac25bbb
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,25 @@ void OptimizeTensorInsertExtractSlicesPass::runOnOperation() {
funcOp.walk([&](scf::ForOp forOp) { moveLoopInvariantCode(forOp); });
LDBG("after hoisting loop invariant code\n" << funcOp);

funcOp.walk([&](tensor::ExtractSliceOp extractSliceOp) {
// Check that all operands of tensor extractSliceOp are block arguments.
// In that case we can move the extract_slice op to the beginning of the
// block.
// TODO: Use a more general dominance analysis to move the op to the
// earliest point in same/other block.
for (Value operand : extractSliceOp.getOperands()) {
auto blockArg = dyn_cast<BlockArgument>(operand);
if (!blockArg || blockArg.getParentBlock() !=
extractSliceOp.getOperation()->getBlock()) {
return;
}
}
Value sourceSlice = extractSliceOp.getSource();
auto blockArg = dyn_cast<BlockArgument>(sourceSlice);
auto &op = blockArg.getParentBlock()->getOperations().front();
extractSliceOp->moveBefore(&op);
});

// TODO: walking in some reverse / inside-out order would be more efficient
// and would capture more cases.
funcOp.walk(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,6 @@ hal.executable private @main {
// CHECK: scf.forall ({{.*}}) in (17, 81) {
// CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %[[C0]] to %[[C721]] step %[[C1]] {{.*}} -> (vector<1x1x1x1x4x1xf32>)
// CHECK: gpu.barrier
// CHECK-DAG: %[[LHS_RD:.+]] = vector.transfer_read %[[B0]]{{.*}} vector<1xf16>
// CHECK-DAG: vector.transfer_write %[[LHS_RD]]
// Note that to simplify the test we are not showing the mapping of the RHS_RD
// to its buffer as it goes through an scf.if/else control structure
// involving allocas.
// CHECK-DAG: %[[RHS_RD:.+]] = vector.transfer_read {{.*}} vector<1xf16>
// CHECK-DAG: vector.transfer_write %[[RHS_RD]]
// CHECK: gpu.barrier
// CHECK-DAG: %[[LHS_MM0:.+]] = vector.transfer_read {{.*}} vector<4xf16>
// CHECK-DAG: %[[RHS_MM:.+]] = vector.transfer_read {{.*}} vector<4x1x1xf16>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1150,10 +1150,6 @@ hal.executable public @main {
// CHECK: scf.forall ({{.*}}) in (12, 37, 10) {
// CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c145 step %c1 {{.*}} -> (vector<1x1x1x4x1xf32>)
// CHECK: gpu.barrier
// CHECK-DAG: %[[LHS_RD:.+]] = vector.transfer_read {{.*}} vector<4xf32>
// CHECK-DAG: vector.transfer_write %[[LHS_RD]]
// CHECK-DAG: %[[RHS_RD:.+]] = vector.transfer_read {{.*}} vector<1xf32>
// CHECK-DAG: vector.transfer_write %[[RHS_RD]]
// CHECK: gpu.barrier
// CHECK-DAG: vector.transfer_read {{.*}} #gpu.address_space<workgroup>>, vector<1xf32>
// CHECK-DAG: vector.transfer_read {{.*}} #gpu.address_space<workgroup>>, vector<1xf32>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,7 @@ hal.executable private @attention_multiple_m_transpose {
// needed because the intrinsic layout mathes.
// TODO: With forall distribution it's allocating memory for S.
// MEMORY-LABEL: func.func @attention_multiple_m_transpose()
// MEMORY-COUNT-4: memref.alloc
// MEMORY-COUNT-3: memref.alloc
// MEMORY-NOT: memref.alloc

// -----
Expand Down Expand Up @@ -1160,7 +1160,7 @@ hal.executable private @attention_mfma_32x32x8 {
// Check that we only use alloc for Q, K, and V. No shared memory for S is
// needed because the intrinsic layout mathes.
// MEMORY-LABEL: func.func @attention_mfma_32x32x8()
// MEMORY-COUNT-4: memref.alloc
// MEMORY-COUNT-3: memref.alloc
// MEMORY-NOT: memref.alloc

// -----
Expand Down
2 changes: 1 addition & 1 deletion third_party/llvm-project
Submodule llvm-project updated 1564 files

0 comments on commit ac25bbb

Please sign in to comment.