From a65e8e3ce61976ff8c216675bc3376e84216a27f Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Mon, 7 Oct 2019 15:51:34 +0000 Subject: [PATCH] [ROCm] Adding pass to generate the HSACO binary blob from the GPU kernel function --- CMakeLists.txt | 7 + include/mlir/CMakeLists.txt | 1 + .../mlir/Conversion/GPUToROCM/CMakeLists.txt | 33 ++ .../mlir/Conversion/GPUToROCM/GPUToROCMPass.h | 92 ++++ .../mlir/Conversion/GPUToROCM/ROCMConfig.h.in | 30 ++ lib/Conversion/CMakeLists.txt | 1 + lib/Conversion/GPUToROCM/CMakeLists.txt | 15 + .../GPUToROCM/ConvertKernelFuncToHSACO.cpp | 407 ++++++++++++++++++ test/Conversion/GPUToROCM/lit.local.cfg | 2 + .../lower-amdgpu-kernel-to-hsaco.mlir | 31 ++ test/lit.site.cfg.py.in | 1 + tools/mlir-opt/CMakeLists.txt | 5 + 12 files changed, 625 insertions(+) create mode 100644 include/mlir/Conversion/GPUToROCM/CMakeLists.txt create mode 100644 include/mlir/Conversion/GPUToROCM/GPUToROCMPass.h create mode 100644 include/mlir/Conversion/GPUToROCM/ROCMConfig.h.in create mode 100644 lib/Conversion/GPUToROCM/CMakeLists.txt create mode 100644 lib/Conversion/GPUToROCM/ConvertKernelFuncToHSACO.cpp create mode 100644 test/Conversion/GPUToROCM/lit.local.cfg create mode 100644 test/Conversion/GPUToROCM/lower-amdgpu-kernel-to-hsaco.mlir diff --git a/CMakeLists.txt b/CMakeLists.txt index 5329de4e8935..01acc0d7cd93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,13 @@ endif() set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner") +# Build the ROCM conversions if the AMDGPU backend is available +if ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) + set(MLIR_ROCM_CONVERSIONS_ENABLED 1) +else() + set(MLIR_ROCM_CONVERSIONS_ENABLED 0) +endif() + include_directories( "include") include_directories( ${MLIR_INCLUDE_DIR}) diff --git a/include/mlir/CMakeLists.txt b/include/mlir/CMakeLists.txt index 1a5094df90db..2977e49500e5 100644 --- a/include/mlir/CMakeLists.txt +++ b/include/mlir/CMakeLists.txt @@ -2,3 +2,4 @@ add_subdirectory(Analysis) add_subdirectory(Dialect) add_subdirectory(EDSC) add_subdirectory(Transforms) +add_subdirectory(Conversion/GPUToROCM) diff --git a/include/mlir/Conversion/GPUToROCM/CMakeLists.txt b/include/mlir/Conversion/GPUToROCM/CMakeLists.txt new file mode 100644 index 000000000000..e0a95217f454 --- /dev/null +++ b/include/mlir/Conversion/GPUToROCM/CMakeLists.txt @@ -0,0 +1,33 @@ +if(MLIR_ROCM_CONVERSIONS_ENABLED) + + # Check whether the ROCm installation dir exists + set(ROCM_INSTALL_DIR "/opt/rocm" CACHE STRING "ROCm installation directory") + if (EXISTS ${ROCM_INSTALL_DIR}) + message("-- ROCm Install Dir - ${ROCM_INSTALL_DIR}") + else() + message(SEND_ERROR "-- NOT FOUND : ROCm Install Dir - ${ROCM_INSTALL_DIR}") + endif() + + # Check whether the ROCm device library dir exists + set(ROCM_DEVICE_LIB_DIR ${ROCM_INSTALL_DIR}/lib) + if (EXISTS ${ROCM_DEVICE_LIB_DIR}) + message("-- ROCm Device Library Dir - ${ROCM_DEVICE_LIB_DIR}") + else () + message(SEND_ERROR "-- NOT FOUND : ROCm Device Library Dir - ${ROCM_DEVICE_LIB_DIR}") + endif() + + # Check whether the ROCm HCC linker exists + set(ROCM_HCC_LINKER ${ROCM_INSTALL_DIR}/hcc/bin/ld.lld) + if (EXISTS ${ROCM_HCC_LINKER}) + message("-- ROCm HCC Linker - ${ROCM_HCC_LINKER}") + else () + message(SEND_ERROR "-- NOT FOUND : ROCm HCC Linker - ${ROCM_HCC_LINKER}") + endif() + + # Generate the ROCm Configuration header file + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/ROCMConfig.h.in" + "${CMAKE_CURRENT_BINARY_DIR}/ROCMConfig.h" + ) + +endif() diff --git a/include/mlir/Conversion/GPUToROCM/GPUToROCMPass.h b/include/mlir/Conversion/GPUToROCM/GPUToROCMPass.h new file mode 100644 index 000000000000..e7698611d858 --- /dev/null +++ b/include/mlir/Conversion/GPUToROCM/GPUToROCMPass.h @@ -0,0 +1,92 @@ +//===- GPUToROCmPass.h - MLIR ROCm runtime support --------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +#ifndef MLIR_CONVERSION_GPUTOROCM_GPUTOROCMPASS_H_ +#define MLIR_CONVERSION_GPUTOROCM_GPUTOROCMPASS_H_ + +#include +#include +#include +#include + +#include "mlir/Conversion/GPUToROCM/ROCMConfig.h" + +namespace mlir { + +namespace rocm { + +/// string constants used by the ROCM backend +static constexpr const char *kHSACOAnnotation = "amdgpu.hsaco"; +static constexpr const char *kHSACOGetterAnnotation = "amdgpu.hsacogetter"; +static constexpr const char *kHSACOGetterSuffix = "_hsaco"; +static constexpr const char *kHSACOStorageSuffix = "_hsaco_cst"; + +/// enum to represent the AMD GPU versions supported by the ROCM backend +enum class AMDGPUVersion { GFX900 }; + +/// enum to represent the HSA Code Object versions supported by the ROCM backend +enum class HSACOVersion { V3 }; + +/// Configurable parameters for generating the HSACO blobs from GPU Kernels +struct HSACOGeneratorConfig { + + /// Constructor - sets the default values for the configurable parameters + HSACOGeneratorConfig(bool isTestMode) + : testMode(isTestMode), amdgpuVersion(AMDGPUVersion::GFX900), + hsacoVersion(HSACOVersion::V3), rocdlDir(ROCM_DEVICE_LIB_DIR), + linkerPath(ROCM_HCC_LINKER) {} + + /// testMode == true will result in skipping the HASCO generation process, and + /// simply return the string "HSACO" as the HSACO blob + bool testMode; + + /// the AMDGPU version for which to generate the HSACO + AMDGPUVersion amdgpuVersion; + + /// the code object version for the generated HSACO + HSACOVersion hsacoVersion; + + /// the directory containing the ROCDL bitcode libraries + std::string rocdlDir; + + /// the path the ld.lld linker to use when generating the HSACO + std::string linkerPath; +}; + +} // namespace rocm + +// unique pointer to the HSA Code Object (which is stored as char vector) +using OwnedHSACO = std::unique_ptr>; + +class ModuleOp; +template +class OpPassBase; + +/// Creates a pass to convert kernel functions into HSA Code Object blobs. +/// +/// This transformation takes the body of each function that is annotated with +/// the amdgpu_kernel calling convention, copies it to a new LLVM module, +/// compiles the module with help of the AMDGPU backend to GCN ISA, and then +/// invokes lld to produce a binary blob in HSA Code Object format. Such blob +/// is then attached as a string attribute named 'amdgpu.hsaco' to the kernel +/// function. After the transformation, the body of the kernel function is +/// removed (i.e., it is turned into a declaration). +std::unique_ptr> createConvertGPUKernelToHSACOPass( + rocm::HSACOGeneratorConfig hsacoGeneratorConfig); + +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUTOROCM_GPUTOROCMPASS_H_ diff --git a/include/mlir/Conversion/GPUToROCM/ROCMConfig.h.in b/include/mlir/Conversion/GPUToROCM/ROCMConfig.h.in new file mode 100644 index 000000000000..4f326c8186a8 --- /dev/null +++ b/include/mlir/Conversion/GPUToROCM/ROCMConfig.h.in @@ -0,0 +1,30 @@ +//===- ROCMConfig.h - ROCm Configuration Header -----------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +#ifndef MLIR_CONVERSION_GPUTOROCM_ROCMCONFIG_H_ +#define MLIR_CONVERSION_GPUTOROCM_ROCMCONFIG_H_ + +/// The code to generate the HSACO binary blobs (corresponding the GPU kernels) +/// assumes the presense of ROCm libraries/utilities. The location of these +/// tools is configured via cmake + +/// Path to the ROCm Device Library dir in the ROCM install +#cmakedefine ROCM_DEVICE_LIB_DIR "@ROCM_DEVICE_LIB_DIR@" + +/// Path to the HCC Linker in the ROCM install +#cmakedefine ROCM_HCC_LINKER "@ROCM_HCC_LINKER@" + +#endif // MLIR_CONVERSION_GPUTOROCM_ROCMCONFIG_H_ diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt index c0fd6b83fd95..57e3fb4cf114 100644 --- a/lib/Conversion/CMakeLists.txt +++ b/lib/Conversion/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(GPUToCUDA) add_subdirectory(GPUToNVVM) +add_subdirectory(GPUToROCM) add_subdirectory(GPUToROCDL) add_subdirectory(GPUToSPIRV) add_subdirectory(LoopsToGPU) diff --git a/lib/Conversion/GPUToROCM/CMakeLists.txt b/lib/Conversion/GPUToROCM/CMakeLists.txt new file mode 100644 index 000000000000..93eff82748b8 --- /dev/null +++ b/lib/Conversion/GPUToROCM/CMakeLists.txt @@ -0,0 +1,15 @@ +if(MLIR_ROCM_CONVERSIONS_ENABLED) + llvm_map_components_to_libnames(amdgpu "AMDGPU") + + add_llvm_library(MLIRGPUtoROCMTransforms + ConvertKernelFuncToHSACO.cpp + ) + target_link_libraries(MLIRGPUtoROCMTransforms + MLIRGPU + MLIRLLVMIR + MLIRROCDLIR + MLIRPass + MLIRTargetROCDLIR + ${amdgpu} + ) +endif() diff --git a/lib/Conversion/GPUToROCM/ConvertKernelFuncToHSACO.cpp b/lib/Conversion/GPUToROCM/ConvertKernelFuncToHSACO.cpp new file mode 100644 index 000000000000..dba2f25abb50 --- /dev/null +++ b/lib/Conversion/GPUToROCM/ConvertKernelFuncToHSACO.cpp @@ -0,0 +1,407 @@ +//===- ConvertKernelFuncToHSACO.cpp - MLIR GPU lowering passes ------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements a pass to convert gpu kernel functions into a +// corresponding binary blob that can be executed on a AMD GPU. Currently +// only translates the function itself but no dependencies. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/GPUToROCM/GPUToROCMPass.h" + +#include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Module.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Target/ROCDLIR.h" + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO/Internalize.h" + +#include +#include +#include + +using namespace mlir; + +#define DEBUG_TYPE "gpu-to-rocm-conversion" + +static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); + +static llvm::cl::opt + clDumpLLVMIR("rocm-dump-lllvm-ir", + llvm::cl::desc("Dump the LLVM IR when generating HSACO"), + llvm::cl::cat(clOptionsCategory)); +namespace { + +/// A pass converting tagged kernel functions to HSA Code Object blobs. +/// +/// If tagged as a kernel module, each contained function is translated to ROCDL +/// IR, which is then compiled using the llvm AMDGPU backend to generate the GPU +/// binary code (i.e. the HSACO file). The HSACO binary blob is attached as an +/// attribute to the function and the function body is erased. +class GpuKernelToHSACOPass : public ModulePass { +public: + GpuKernelToHSACOPass(rocm::HSACOGeneratorConfig hsacoGeneratorConfig = + rocm::HSACOGeneratorConfig(/*isTestMode=*/true)) + : config(hsacoGeneratorConfig) {} + + // Run the dialect converter on the module. + void runOnModule() override { + ModuleOp module = getModule(); + + // Nothing to do if this module does not contain the "gpu.kernel_module" + // attribute, which is used to mark the (nested) modules created to house + // the GPU kernel functions + if (!module.getAttrOfType( + gpu::GPUDialect::getKernelModuleAttrName()) || + !module.getName()) + return; + + // This is a module containing a GPU kernel function, we have work to do! + + // Make sure the AMDGPU target is initialized. + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmPrinter(); + + auto llvmModule = translateModuleToROCDLIR(module); + if (!llvmModule) + return signalPassFailure(); + + if (StringAttr hsacoAttr = + translateGpuModuleToHSACOAnnotation(*llvmModule, module)) + module.setAttr(rocm::kHSACOAnnotation, hsacoAttr); + else + signalPassFailure(); + } + +private: + /// Translates llvmModule to cubin and returns the result as attribute. + StringAttr translateGpuModuleToHSACOAnnotation(llvm::Module &llvmModule, + ModuleOp module); + + OwnedHSACO convertModuleToHSACO(llvm::Module &llvmModule, ModuleOp module); + + OwnedHSACO emitModuleToHSACO(llvm::Module &llvmModule, ModuleOp module, + llvm::TargetMachine &targetMachine); + + OwnedHSACO emitModuleToHSACOForTesting(llvm::Module &llvmModule, + ModuleOp module); + + rocm::HSACOGeneratorConfig config; +}; + +} // anonymous namespace + +// get the "-mcpu" option string corresponding to the given AMDGPU version enum +static std::string getMcpuOptionString(rocm::AMDGPUVersion v) { + switch (v) { + case rocm::AMDGPUVersion::GFX900: + return "gfx900"; + } + return ""; +} + +// get filename for file containing the AMDGPU version specific bitcodes +static std::string getBitcodeFilename(rocm::AMDGPUVersion v) { + switch (v) { + case rocm::AMDGPUVersion::GFX900: + return "oclc_isa_version_900.amdgcn.bc"; + } + return ""; +} + +// get the option string corresponding to the given HSACO version enum +static std::string getCodeObjectOptionString(rocm::HSACOVersion v) { + switch (v) { + case rocm::HSACOVersion::V3: + return "-code-object-v3"; + } + return "invalid HSACO version"; +} + +// Gets the ROCm-Device-Libs filenames for a particular AMDGPU version. +static std::vector +getROCDLPaths(const rocm::AMDGPUVersion amdgpuVersion, + llvm::StringRef rocdlDir) { + + // AMDGPU version-neutral bitcodes. + static constexpr StringLiteral rocdlFilenames[] = { + "hc.amdgcn.bc", + "opencl.amdgcn.bc", + "ocml.amdgcn.bc", + "ockl.amdgcn.bc", + "oclc_finite_only_off.amdgcn.bc", + "oclc_daz_opt_off.amdgcn.bc", + "oclc_correctly_rounded_sqrt_on.amdgcn.bc", + "oclc_unsafe_math_off.amdgcn.bc", + "oclc_wavefrontsize64_on.amdgcn.bc"}; + + // Construct full path to ROCDL bitcode libraries. + std::vector result; + for (auto filename : rocdlFilenames) { + llvm::SmallString<128> appendedPath; + llvm::sys::path::append(appendedPath, rocdlDir, filename); + result.push_back(appendedPath.str()); + } + + // Add AMDGPU version-specific bitcodes. + llvm::SmallString<128> appendedPath; + llvm::sys::path::append(appendedPath, rocdlDir, + getBitcodeFilename(amdgpuVersion)); + result.push_back(appendedPath.str()); + + return result; +} + +// Links the given llvm module with the given bitcode modules. +static LogicalResult +linkWithBitcodeModules(llvm::Module &llvmModule, ModuleOp module, + llvm::ArrayRef bitcodeModulePaths) { + llvm::Linker linker(llvmModule); + + for (auto &filename : bitcodeModulePaths) { + if (!llvm::sys::fs::exists(filename)) { + module.emitWarning("ROCDL bitcode module was not found at " + filename); + // TODO(rocm) + // The list currently returned by "getROCDLPaths" routine is a superset + // and some files in that list may not be available on older ROCM + // releases. So commenting out the call to propagate error status. + // Error propagation should be restored once the list returned by + // "getROCDLPaths" is stable/accurate. + // return failure(); + continue; + } + + llvm::SMDiagnostic diagnostic; + std::unique_ptr bitcodeModule( + llvm::parseIRFile(llvm::StringRef(filename.data(), filename.size()), + diagnostic, llvmModule.getContext())); + + if (bitcodeModule == nullptr) { + MLIRContext *mlirContext = module.getContext(); + auto parseErrorLocation = mlir::FileLineColLoc::get( + diagnostic.getFilename().str(), diagnostic.getLineNo(), + diagnostic.getColumnNo(), mlirContext); + mlir::emitError(parseErrorLocation, diagnostic.getMessage().str()); + module.emitError("Error parsing ROCDL bitcode module from " + filename); + return failure(); + } + + if (linker.linkInModule( + std::move(bitcodeModule), llvm::Linker::Flags::LinkOnlyNeeded, + [](llvm::Module &M, const llvm::StringSet<> &GVS) { + internalizeModule(M, [&M, &GVS](const llvm::GlobalValue &GV) { + return !GV.hasName() || (GVS.count(GV.getName()) == 0); + }); + })) { + module.emitError("Error linking ROCDL bitcode module from " + filename); + return failure(); + } + } + + return success(); +} + +// Returns whether the module uses any ROCDL bitcode functions. +// This function may have false positives +static bool couldNeedDeviceBitcode(const llvm::Module &llvmModule) { + for (const llvm::Function &llvmFunction : llvmModule.functions()) { + // This is a conservative approximation + // - not all such functions are in ROCm-Device-Libs. + if (!llvmFunction.isIntrinsic() && llvmFunction.isDeclaration()) + return true; + } + return false; +} + +// Links ROCm-Device-Libs into the given module if the module needs it. +static LogicalResult linkROCDLIfNecessary(llvm::Module &llvmModule, + ModuleOp module, + rocm::AMDGPUVersion amdgpuVersion, + llvm::StringRef rocdlDir) { + + if (!couldNeedDeviceBitcode(llvmModule)) + return success(); + + return linkWithBitcodeModules(llvmModule, module, + getROCDLPaths(amdgpuVersion, rocdlDir)); +} + +// Emits the given module to HSA Code Object. targetMachine is an initialized +// TargetMachine for the AMDGPU target. +OwnedHSACO +GpuKernelToHSACOPass::emitModuleToHSACO(llvm::Module &llvmModule, + ModuleOp module, + llvm::TargetMachine &targetMachine) { + llvm::SmallString<128> tempdirName; + if (llvm::sys::fs::createUniqueDirectory("/tmp/amdgpu_mlir", tempdirName)) { + module.emitError("Failed to create tempdir for generating HSACO\n"); + return {}; + } + + std::error_code ec; + if (clDumpLLVMIR) { + // dump the LLVM IR to file...this is just for debugging purposes + llvm::Twine irFilename = + llvm::Twine(llvmModule.getModuleIdentifier()) + ".ll"; + llvm::SmallString<128> irPath; + llvm::sys::path::append(irPath, tempdirName, irFilename); + + llvm::raw_fd_ostream irFileStream(irPath, ec, llvm::sys::fs::F_None); + llvmModule.print(irFileStream, nullptr); + irFileStream.flush(); + } + + // dump the GCN ISA binary file + llvm::Twine isabinFilename = + llvm::Twine(llvmModule.getModuleIdentifier()) + ".o"; + llvm::SmallString<128> isabinPath; + llvm::sys::path::append(isabinPath, tempdirName, isabinFilename); + + llvm::legacy::PassManager codegenPasses; + llvm::SmallVector stream; + llvm::raw_svector_ostream pstream(stream); + llvm::raw_fd_ostream isabinFileStream(isabinPath, ec, llvm::sys::fs::F_Text); + llvmModule.setDataLayout(targetMachine.createDataLayout()); + targetMachine.addPassesToEmitFile(codegenPasses, isabinFileStream, nullptr, + llvm::TargetMachine::CGFT_ObjectFile); + codegenPasses.run(llvmModule); + isabinFileStream.flush(); + + // generate the hsaco binary + // TODO(rocm): + // Currently we invoke lld.ld as a separate process to generate the hsaco + // file. Ideally we would like invoke it (ld.lld) via an API call to do the + // same. That will require building the "lld" project (which apparently is + // at the same level as "llvm") and figuring out how to call it from within + // this "mlir" project. + llvm::Twine hsacoFilename = + llvm::Twine(llvmModule.getModuleIdentifier()) + ".hsaco"; + llvm::SmallString<128> hsacoPath; + llvm::sys::path::append(hsacoPath, tempdirName, hsacoFilename); + + llvm::StringRef lldProgram(config.linkerPath); + std::vector lldArgs{ + llvm::StringRef("ld.lld"), llvm::StringRef("-flavor"), + llvm::StringRef("gnu"), llvm::StringRef("-shared"), + llvm::StringRef("isabinPath"), llvm::StringRef("-o"), + llvm::StringRef("hsacoPath"), + }; + lldArgs[4] = llvm::StringRef(isabinPath); + lldArgs[6] = llvm::StringRef(hsacoPath); + + std::string errorMessage; + int lldResult = llvm::sys::ExecuteAndWait( + lldProgram, llvm::ArrayRef(lldArgs), llvm::None, {}, 0, + 0, &errorMessage); + if (lldResult) { + module.emitError("ld.lld execution failed : " + errorMessage); + return {}; + } + // read HSACO + auto hsacoFileOrError = llvm::MemoryBuffer::getFileAsStream(hsacoPath); + if ((ec = hsacoFileOrError.getError())) + return {}; + + std::unique_ptr hsacoFile = + std::move(hsacoFileOrError.get()); + + return std::make_unique>(hsacoFile->getBufferStart(), + hsacoFile->getBufferEnd()); +} + +OwnedHSACO +GpuKernelToHSACOPass::emitModuleToHSACOForTesting(llvm::Module &llvmModule, + ModuleOp module) { + const char data[] = "HSACO"; + return std::make_unique>(data, data + sizeof(data) - 1); +} + +OwnedHSACO GpuKernelToHSACOPass::convertModuleToHSACO(llvm::Module &llvmModule, + ModuleOp module) { + if (config.testMode) + return emitModuleToHSACOForTesting(llvmModule, module); + + // Construct LLVM TargetMachine for AMDGPU target. + std::unique_ptr targetMachine; + { + std::string error; + llvm::Triple triple("amdgcn--amdhsa-amdgiz"); + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget("", triple, error); + if (target == nullptr) { + module.emitError("Cannot initialize target triple"); + return {}; + } + std::string mcpuStr = getMcpuOptionString(config.amdgpuVersion); + std::string codeObjectStr = getCodeObjectOptionString(config.hsacoVersion); + targetMachine.reset(target->createTargetMachine(triple.str(), mcpuStr, + codeObjectStr, {}, {})); + } + + // Set the data layout of the llvm module to match what the target needs. + llvmModule.setDataLayout(targetMachine->createDataLayout()); + + if (failed(linkROCDLIfNecessary(llvmModule, module, config.amdgpuVersion, + config.rocdlDir))) + return {}; + + // Lower LLVM module to HSA code object + return emitModuleToHSACO(llvmModule, module, *targetMachine); +} + +StringAttr GpuKernelToHSACOPass::translateGpuModuleToHSACOAnnotation( + llvm::Module &llvmModule, ModuleOp module) { + + OwnedHSACO hsaco = convertModuleToHSACO(llvmModule, module); + if (!hsaco) + return {}; + + return StringAttr::get({hsaco->data(), hsaco->size()}, module.getContext()); +} + +std::unique_ptr> mlir::createConvertGPUKernelToHSACOPass( + rocm::HSACOGeneratorConfig hsacoGeneratorConfig) { + return std::make_unique(hsacoGeneratorConfig); +} + +static PassRegistration + pass("test-kernel-to-hsaco", + "Convert all kernel functions to HSA code object blobs"); diff --git a/test/Conversion/GPUToROCM/lit.local.cfg b/test/Conversion/GPUToROCM/lit.local.cfg new file mode 100644 index 000000000000..1d5aaaf6c673 --- /dev/null +++ b/test/Conversion/GPUToROCM/lit.local.cfg @@ -0,0 +1,2 @@ +if not config.run_rocm_tests: + config.unsupported = True \ No newline at end of file diff --git a/test/Conversion/GPUToROCM/lower-amdgpu-kernel-to-hsaco.mlir b/test/Conversion/GPUToROCM/lower-amdgpu-kernel-to-hsaco.mlir new file mode 100644 index 000000000000..e43a44daf9c5 --- /dev/null +++ b/test/Conversion/GPUToROCM/lower-amdgpu-kernel-to-hsaco.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-opt %s --test-kernel-to-hsaco -split-input-file | FileCheck %s + +// CHECK: attributes {amdgpu.hsaco = "HSACO", gpu.kernel_module} +module @gpu_kernels attributes { gpu.kernel_module } { + // CHECK-LABEL: @kernel_A + llvm.func @kernel_A(%arg0 : !llvm.float, %arg1 : !llvm<"float*">) + // CHECK: attributes {gpu.kernel} + attributes { gpu.kernel } { + llvm.return + } +} + +// ----- + +// CHECK: attributes {amdgpu.hsaco = "HSACO", gpu.kernel_module} +module @gpu_kernels attributes { gpu.kernel_module } { + // CHECK-LABEL: @kernel_A + llvm.func @kernel_A(%arg0 : !llvm.float, %arg1 : !llvm<"float*">) + // CHECK: attributes {gpu.kernel} + attributes { gpu.kernel } { + llvm.return + } + + // CHECK-LABEL: @kernel_B + llvm.func @kernel_B(%arg0 : !llvm.float, %arg1 : !llvm<"float*">) + // CHECK: attributes {gpu.kernel} + attributes { gpu.kernel } { + llvm.return + } +} + diff --git a/test/lit.site.cfg.py.in b/test/lit.site.cfg.py.in index aab566173e6c..b038822b6e64 100644 --- a/test/lit.site.cfg.py.in +++ b/test/lit.site.cfg.py.in @@ -36,6 +36,7 @@ config.build_examples = @LLVM_BUILD_EXAMPLES@ config.run_cuda_tests = @MLIR_CUDA_CONVERSIONS_ENABLED@ config.cuda_wrapper_library_dir = "@MLIR_CUDA_WRAPPER_LIBRARY_DIR@" config.enable_cuda_runner = @MLIR_CUDA_RUNNER_ENABLED@ +config.run_rocm_tests = @MLIR_ROCM_CONVERSIONS_ENABLED@ # Support substitution of the tools_dir with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/tools/mlir-opt/CMakeLists.txt b/tools/mlir-opt/CMakeLists.txt index a279b0f4afcf..1965f8740032 100644 --- a/tools/mlir-opt/CMakeLists.txt +++ b/tools/mlir-opt/CMakeLists.txt @@ -55,6 +55,11 @@ if(MLIR_CUDA_CONVERSIONS_ENABLED) MLIRGPUtoCUDATransforms ) endif() +if(MLIR_ROCM_CONVERSIONS_ENABLED) + list(APPEND LIBS + MLIRGPUtoROCMTransforms + ) +endif() add_llvm_executable(mlir-opt mlir-opt.cpp )