Skip to content

Commit

Permalink
Bump LLVM Version
Browse files Browse the repository at this point in the history
(merge 65fa61d)
  • Loading branch information
ivanradanov committed Oct 6, 2023
1 parent 216f640 commit 9be12f8
Show file tree
Hide file tree
Showing 189 changed files with 3,685 additions and 3,525 deletions.
11 changes: 2 additions & 9 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: MLIR-GPU Test CI
name: llvm_project_build

on:
push:
Expand All @@ -24,7 +24,7 @@ jobs:
- compiler: clang
cxxcompiler: g++

timeout-minutes: 240
timeout-minutes: 360
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -46,13 +46,6 @@ jobs:
- name: add dependencies
run: sudo apt-get install -y ninja-build #cmake binutils-gold binutils binutils-dev ${{ matrix.compiler }} ${{ matrix.linker-pkg }}

#- name: setup cymbl
# run: |
# cd /
# sudo wget --no-verbose https://github.com/cymbl/cymbl.github.io/releases/download/0.0.1/LLVM-11.0.0git-Linux.sh
# printf "y\nn\n" | sudo bash LLVM-11.0.0git-Linux.sh
# printf "{\"refreshToken\":\"%s\"}" "${{ secrets.SuperSecret }}" > ~/.cymblconfig

- name: MLIR build
if: steps.cache-mlir.outputs.cache-hit != 'true'
run: |
Expand Down
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,8 @@ pythonenv*
/clang/utils/analyzer/projects/*/RefScanBuildResults
# automodapi puts generated documentation files here.
/lldb/docs/python_api/


# tmp output from tests
*.exec1
*.out1
43 changes: 25 additions & 18 deletions include/polygeist/BarrierUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,14 @@ allocateTemporaryBuffer(mlir::OpBuilder &rewriter, mlir::Value value,
mlir::ValueRange iterationCounts, bool alloca = true,
mlir::DataLayout *DLI = nullptr) {
using namespace mlir;
SmallVector<int64_t> bufferSize(iterationCounts.size(),
ShapedType::kDynamicSize);
SmallVector<int64_t> bufferSize(iterationCounts.size(), ShapedType::kDynamic);
mlir::Type ty = value.getType();
if (alloca)
if (auto allocaOp = value.getDefiningOp<memref::AllocaOp>()) {
auto mt = allocaOp.getType();
bool hasDynamicSize = false;
for (auto s : mt.getShape()) {
if (s == ShapedType::kDynamicSize) {
if (s == ShapedType::kDynamic) {
hasDynamicSize = true;
break;
}
Expand All @@ -84,10 +83,12 @@ mlir::Value allocateTemporaryBuffer<mlir::LLVM::AllocaOp>(
auto sz = val.getArraySize();
assert(DLI);
for (auto iter : iterationCounts) {
sz =
rewriter.create<arith::MulIOp>(value.getLoc(), sz,
rewriter.create<arith::IndexCastOp>(
value.getLoc(), sz.getType(), iter));
sz = cast<TypedValue<IntegerType>>(
rewriter
.create<arith::MulIOp>(value.getLoc(), sz,
rewriter.create<arith::IndexCastOp>(
value.getLoc(), sz.getType(), iter))
.getResult());
}
return rewriter.create<LLVM::AllocaOp>(value.getLoc(), val.getType(), sz);
}
Expand All @@ -100,18 +101,24 @@ mlir::Value allocateTemporaryBuffer<mlir::LLVM::CallOp>(
auto val = value.getDefiningOp<LLVM::AllocaOp>();
auto sz = val.getArraySize();
assert(DLI);
sz = rewriter.create<arith::MulIOp>(
value.getLoc(), sz,
rewriter.create<arith::ConstantIntOp>(
value.getLoc(),
DLI->getTypeSize(
val.getType().cast<LLVM::LLVMPointerType>().getElementType()),
sz.getType().cast<IntegerType>().getWidth()));
sz = cast<TypedValue<IntegerType>>(
rewriter
.create<arith::MulIOp>(
value.getLoc(), sz,
rewriter.create<arith::ConstantIntOp>(
value.getLoc(),
DLI->getTypeSize(val.getType()
.cast<LLVM::LLVMPointerType>()
.getElementType()),
sz.getType().cast<IntegerType>().getWidth()))
.getResult());
for (auto iter : iterationCounts) {
sz =
rewriter.create<arith::MulIOp>(value.getLoc(), sz,
rewriter.create<arith::IndexCastOp>(
value.getLoc(), sz.getType(), iter));
sz = cast<TypedValue<IntegerType>>(
rewriter
.create<arith::MulIOp>(value.getLoc(), sz,
rewriter.create<arith::IndexCastOp>(
value.getLoc(), sz.getType(), iter))
.getResult());
}
auto m = val->getParentOfType<ModuleOp>();
return callMalloc(rewriter, m, value.getLoc(), sz);
Expand Down
5 changes: 3 additions & 2 deletions include/polygeist/Ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ class BarrierElim final
}

Operation *op = barrier;
if (NotTopLevel && isa<mlir::scf::ParallelOp, mlir::AffineParallelOp>(
barrier->getParentOp()))
if (NotTopLevel &&
isa<mlir::scf::ParallelOp, mlir::affine::AffineParallelOp>(
barrier->getParentOp()))
return failure();

{
Expand Down
31 changes: 30 additions & 1 deletion include/polygeist/Passes/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
#define POLYGEIST_DIALECT_POLYGEIST_PASSES_H

#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "polygeist/Dialect.h"
#include <memory>

enum PolygeistAlternativesMode { PAM_Static, PAM_PGO_Profile, PAM_PGO_Opt };
Expand All @@ -19,7 +23,7 @@ class RewritePatternSet;
class DominanceInfo;
namespace polygeist {
std::unique_ptr<Pass> createParallelLICMPass();
std::unique_ptr<Pass> createMem2RegPass();
std::unique_ptr<Pass> createPolygeistMem2RegPass();
std::unique_ptr<Pass> createLoopRestructurePass();
std::unique_ptr<Pass> createInnerSerializationPass();
std::unique_ptr<Pass> createSerializationPass();
Expand Down Expand Up @@ -50,8 +54,14 @@ createConvertParallelToGPUPass1(std::string arch = "sm_60");
std::unique_ptr<Pass>
createConvertParallelToGPUPass2(bool emitGPUKernelLaunchBounds = true);
std::unique_ptr<Pass> createMergeGPUModulesPass();
std::unique_ptr<Pass> createConvertToOpaquePtrPass();
std::unique_ptr<Pass> createLowerAlternativesPass();
std::unique_ptr<Pass> createCollectKernelStatisticsPass();
std::unique_ptr<Pass> createPolygeistCanonicalizePass();
std::unique_ptr<Pass>
createPolygeistCanonicalizePass(const GreedyRewriteConfig &config,
ArrayRef<std::string> disabledPatterns,
ArrayRef<std::string> enabledPatterns);
std::unique_ptr<Pass> createGpuSerializeToCubinPass(
StringRef arch, StringRef features, int llvmOptLevel, int ptxasOptLevel,
std::string ptxasPath, std::string libDevicePath, bool outputIntermediate);
Expand Down Expand Up @@ -81,10 +91,26 @@ namespace arith {
class ArithDialect;
} // end namespace arith

namespace omp {
class OpenMPDialect;
} // end namespace omp

namespace polygeist {
class PolygeistDialect;
} // end namespace polygeist

namespace scf {
class SCFDialect;
} // end namespace scf

namespace cf {
class ControlFlowDialect;
} // end namespace cf

namespace math {
class MathDialect;
} // end namespace math

namespace memref {
class MemRefDialect;
} // end namespace memref
Expand All @@ -93,7 +119,10 @@ namespace func {
class FuncDialect;
}

namespace affine {
class AffineDialect;
}

namespace LLVM {
class LLVMDialect;
}
Expand Down
104 changes: 89 additions & 15 deletions include/polygeist/Passes/Passes.td
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
#ifndef POlYGEIST_PASSES
#ifndef POLYGEIST_PASSES
#define POLYGEIST_PASSES

include "mlir/Pass/PassBase.td"
include "mlir/Rewrite/PassUtil.td"

def AffineCFG : Pass<"affine-cfg"> {
let summary = "Replace scf.if and similar with affine.if";
let constructor = "mlir::polygeist::replaceAffineCFGPass()";
}

def Mem2Reg : Pass<"mem2reg"> {
def PolygeistMem2Reg : Pass<"polygeist-mem2reg"> {
let summary = "Replace scf.if and similar with affine.if";
let constructor = "mlir::polygeist::createMem2RegPass()";
let constructor = "mlir::polygeist::createPolygeistMem2RegPass()";
}

def SCFParallelLoopUnroll : Pass<"scf-parallel-loop-unroll"> {
let summary = "Unroll and interleave scf parallel loops";
let dependentDialects =
["::mlir::scf::SCFDialect"];
let dependentDialects = [
"scf::SCFDialect",
"arith::ArithDialect",
];
let constructor = "mlir::polygeist::createSCFParallelLoopUnrollPass()";
let options = [
Option<"unrollFactor", "unrollFactor", "int", /*default=*/"2", "Unroll factor">
Expand All @@ -37,8 +40,10 @@ def LowerAlternatives : Pass<"lower-alternatives", "mlir::ModuleOp"> {

def ConvertCudaRTtoCPU : Pass<"convert-cudart-to-cpu", "mlir::ModuleOp"> {
let summary = "Lower cudart functions to cpu versions";
let dependentDialects =
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect"];
let dependentDialects = [
"memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect",
"cf::ControlFlowDialect",
];
let constructor = "mlir::polygeist::createConvertCudaRTtoCPUPass()";
}

Expand All @@ -64,8 +69,14 @@ def ConvertCudaRTtoHipRT : Pass<"convert-cudart-to-gpu", "mlir::ModuleOp"> {

def ParallelLower : Pass<"parallel-lower", "mlir::ModuleOp"> {
let summary = "Lower gpu launch op to parallel ops";
let dependentDialects =
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect"];
let dependentDialects = [
"scf::SCFDialect",
"polygeist::PolygeistDialect",
"cf::ControlFlowDialect",
"memref::MemRefDialect",
"func::FuncDialect",
"LLVM::LLVMDialect",
];
let constructor = "mlir::polygeist::createParallelLowerPass()";
}

Expand All @@ -87,7 +98,7 @@ def SCFCPUify : Pass<"cpuify"> {
def ConvertParallelToGPU1 : Pass<"convert-parallel-to-gpu1"> {
let summary = "Convert parallel loops to gpu";
let constructor = "mlir::polygeist::createConvertParallelToGPUPass1()";
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "memref::MemRefDialect"];
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "memref::MemRefDialect", "gpu::GPUDialect"];
let options = [
Option<"arch", "arch", "std::string", /*default=*/"\"sm_60\"", "Target GPU architecture">
];
Expand All @@ -96,7 +107,13 @@ def ConvertParallelToGPU1 : Pass<"convert-parallel-to-gpu1"> {
def ConvertParallelToGPU2 : Pass<"convert-parallel-to-gpu2"> {
let summary = "Convert parallel loops to gpu";
let constructor = "mlir::polygeist::createConvertParallelToGPUPass2()";
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "memref::MemRefDialect"];
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "memref::MemRefDialect", "gpu::GPUDialect"];
}

def ConvertToOpaquePtrPass : Pass<"convert-to-opaque-ptr"> {
let summary = "Convert typed llvm pointers to opaque";
let constructor = "mlir::polygeist::createConvertToOpaquePtrPass()";
let dependentDialects = ["LLVM::LLVMDialect"];
}

def MergeGPUModulesPass : Pass<"merge-gpu-modules", "mlir::ModuleOp"> {
Expand All @@ -111,6 +128,7 @@ def InnerSerialization : Pass<"inner-serialize"> {
let dependentDialects =
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect"];
}

def Serialization : Pass<"serialize"> {
let summary = "remove scf.barrier";
let constructor = "mlir::polygeist::createSerializationPass()";
Expand All @@ -127,18 +145,28 @@ def SCFBarrierRemovalContinuation : InterfacePass<"barrier-removal-continuation"
def SCFRaiseToAffine : Pass<"raise-scf-to-affine"> {
let summary = "Raise SCF to affine";
let constructor = "mlir::polygeist::createRaiseSCFToAffinePass()";
let dependentDialects = ["AffineDialect"];
let dependentDialects = [
"affine::AffineDialect",
"scf::SCFDialect",
];
}

def SCFCanonicalizeFor : Pass<"canonicalize-scf-for"> {
let summary = "Run some additional canonicalization for scf::for";
let constructor = "mlir::polygeist::createCanonicalizeForPass()";
let dependentDialects = [
"scf::SCFDialect",
"math::MathDialect",
];
}

def ForBreakToWhile : Pass<"for-break-to-while"> {
let summary = "Rewrite scf.for(scf.if) to scf.while";
let constructor = "mlir::polygeist::createForBreakToWhilePass()";
let dependentDialects = ["arith::ArithDialect"];
let dependentDialects = [
"arith::ArithDialect",
"cf::ControlFlowDialect",
];
}

def ParallelLICM : Pass<"parallel-licm"> {
Expand All @@ -149,11 +177,48 @@ def ParallelLICM : Pass<"parallel-licm"> {
def OpenMPOptPass : Pass<"openmp-opt"> {
let summary = "Optimize OpenMP";
let constructor = "mlir::polygeist::createOpenMPOptPass()";
let dependentDialects = [
"memref::MemRefDialect",
"omp::OpenMPDialect",
"LLVM::LLVMDialect",
];
}

def PolygeistCanonicalize : Pass<"canonicalize-polygeist"> {
let constructor = "mlir::polygeist::createPolygeistCanonicalizePass()";
let dependentDialects = [
"func::FuncDialect",
"LLVM::LLVMDialect",
"memref::MemRefDialect",
"gpu::GPUDialect",
"arith::ArithDialect",
"cf::ControlFlowDialect",
"scf::SCFDialect",
"polygeist::PolygeistDialect",
];
let options = [
Option<"topDownProcessingEnabled", "top-down", "bool",
/*default=*/"true",
"Seed the worklist in general top-down order">,
Option<"enableRegionSimplification", "region-simplify", "bool",
/*default=*/"true",
"Perform control flow optimizations to the region tree">,
Option<"maxIterations", "max-iterations", "int64_t",
/*default=*/"10",
"Max. iterations between applying patterns / simplifying regions">,
Option<"maxNumRewrites", "max-num-rewrites", "int64_t", /*default=*/"-1",
"Max. number of pattern rewrites within an iteration">,
Option<"testConvergence", "test-convergence", "bool", /*default=*/"false",
"Test only: Fail pass on non-convergence to detect cyclic pattern">
] # RewritePassUtils.options;
}

def LoopRestructure : Pass<"loop-restructure"> {
let constructor = "mlir::polygeist::createLoopRestructurePass()";
let dependentDialects = ["::mlir::scf::SCFDialect"];
let dependentDialects = [
"scf::SCFDialect",
"polygeist::PolygeistDialect",
];
}

def RemoveTrivialUse : Pass<"trivialuse"> {
Expand Down Expand Up @@ -188,7 +253,16 @@ def ConvertPolygeistToLLVM : Pass<"convert-polygeist-to-llvm", "mlir::ModuleOp">
LLVM IR types.
}];
let constructor = "mlir::polygeist::createConvertPolygeistToLLVMPass()";
let dependentDialects = ["LLVM::LLVMDialect"];
let dependentDialects = [
"polygeist::PolygeistDialect",
"func::FuncDialect",
"LLVM::LLVMDialect",
"memref::MemRefDialect",
"gpu::GPUDialect",
"arith::ArithDialect",
"cf::ControlFlowDialect",
"scf::SCFDialect",
];
let options = [
Option<"useBarePtrCallConv", "use-bare-ptr-memref-call-conv", "bool",
/*default=*/"false",
Expand Down
Loading

0 comments on commit 9be12f8

Please sign in to comment.