diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp index af9eb75a0288..cb22b598a94b 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp @@ -77,6 +77,12 @@ llvm::cl::opt clGPUUnalignedGEMMVectorDistribution( "unaligned GEMMs when supported"), llvm::cl::init(false)); +llvm::cl::opt clGPUUseTileAndFuseConvolution( + "iree-codegen-llvmgpu-use-tile-and-fuse-convolution", + llvm::cl::desc( + "enable the tile and fuse pipeline for supported convolutions"), + llvm::cl::init(true)); + /// Flag to force using WMMA tensorcore operations. llvm::cl::opt clGPUUseWMMA("iree-codegen-llvmgpu-use-wmma", @@ -2196,12 +2202,19 @@ static bool distributeToSquare(const int64_t oh, const int64_t ow, // Convolution Pipeline Configuration //====---------------------------------------------------------------------===// -static LogicalResult setConvolutionConfig(IREE::GPU::TargetAttr target, - linalg::LinalgOp linalgOp, - const int64_t bestTilingFactor) { +static LogicalResult setConvolutionConfig( + IREE::GPU::TargetAttr target, mlir::FunctionOpInterface entryPointFn, + linalg::LinalgOp linalgOp, const int64_t bestTilingFactor) { if (!isa(linalgOp)) { return failure(); } + if (clGPUUseTileAndFuseConvolution) { + if (succeeded(IREE::GPU::setTileAndFuseLoweringConfig(target, entryPointFn, + linalgOp))) { + LDBG("Tile and fuse convolution config"); + return success(); + } + } const bool isNCHW = isa(*linalgOp); const bool isNHWC = isa(*linalgOp); @@ -2284,9 +2297,8 @@ static LogicalResult setConvolutionConfig(IREE::GPU::TargetAttr target, SmallVector windowTileSizes(4, 0); windowTileSizes[ohIndex] = 1; tileSizes.push_back(windowTileSizes); - auto funcOp = linalgOp->getParentOfType(); - return setOpConfigAndEntryPointFnTranslation(funcOp, linalgOp, tileSizes, - pipeline, workgroupSize); + return setOpConfigAndEntryPointFnTranslation( + entryPointFn, linalgOp, tileSizes, pipeline, workgroupSize); } //====---------------------------------------------------------------------===// @@ -2340,7 +2352,7 @@ static LogicalResult setRootConfig(IREE::GPU::TargetAttr target, LDBG("Warp Reduction Config"); return success(); } - if (succeeded(setConvolutionConfig(target, linalgOp, 16))) { + if (succeeded(setConvolutionConfig(target, entryPointFn, linalgOp, 16))) { LDBG("Convolution Config"); return success(); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir index d129117741e3..af33828f9135 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 \ +// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --iree-codegen-llvmgpu-use-tile-and-fuse-convolution=false \ // RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target,canonicalize)))))' \ // RUN: %s | FileCheck %s diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir index feb0e2766303..66fc62f2e482 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir @@ -612,12 +612,11 @@ func.func @forward_dispatch_1_conv_2d_nhwc_hwcf_256x112x112x64x7x7x3_f32() { return } -// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // -----