Bump LLVM Version

(merge 65fa61d)
llvm · Oct 6, 2023 · 668cfba · 668cfba
1 parent 423dd17
commit 668cfba
Show file tree

Hide file tree

Showing 196 changed files with 5,116 additions and 3,803 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -1,4 +1,4 @@
-name: MLIR-GPU Test CI
+name: llvm_project_build
 
 on:
   push:
@@ -24,7 +24,7 @@ jobs:
           - compiler: clang
             cxxcompiler: g++
 
-    timeout-minutes: 240 
+    timeout-minutes: 360
     steps:
     - uses: actions/checkout@v3
       with:
@@ -46,13 +46,6 @@ jobs:
     - name: add dependencies
       run: sudo apt-get install -y ninja-build #cmake binutils-gold binutils binutils-dev ${{ matrix.compiler }} ${{ matrix.linker-pkg }}
 
-    #- name: setup cymbl
-    #  run: |
-    #      cd /
-    #      sudo wget --no-verbose https://github.com/cymbl/cymbl.github.io/releases/download/0.0.1/LLVM-11.0.0git-Linux.sh
-    #      printf "y\nn\n" | sudo bash LLVM-11.0.0git-Linux.sh
-    #      printf "{\"refreshToken\":\"%s\"}" "${{ secrets.SuperSecret }}" > ~/.cymblconfig
-
     - name: MLIR build
       if: steps.cache-mlir.outputs.cache-hit != 'true'
       run: |

diff --git a/.gitignore b/.gitignore
@@ -80,3 +80,8 @@ pythonenv*
 /clang/utils/analyzer/projects/*/RefScanBuildResults
 # automodapi puts generated documentation files here.
 /lldb/docs/python_api/
+
+
+# tmp output from tests
+*.exec1
+*.out1
diff --git a/include/polygeist/BarrierUtils.h b/include/polygeist/BarrierUtils.h
@@ -51,15 +51,14 @@ allocateTemporaryBuffer(mlir::OpBuilder &rewriter, mlir::Value value,
                         mlir::ValueRange iterationCounts, bool alloca = true,
                         mlir::DataLayout *DLI = nullptr) {
   using namespace mlir;
-  SmallVector<int64_t> bufferSize(iterationCounts.size(),
-                                  ShapedType::kDynamicSize);
+  SmallVector<int64_t> bufferSize(iterationCounts.size(), ShapedType::kDynamic);
   mlir::Type ty = value.getType();
   if (alloca)
     if (auto allocaOp = value.getDefiningOp<memref::AllocaOp>()) {
       auto mt = allocaOp.getType();
       bool hasDynamicSize = false;
       for (auto s : mt.getShape()) {
-        if (s == ShapedType::kDynamicSize) {
+        if (s == ShapedType::kDynamic) {
           hasDynamicSize = true;
           break;
         }
@@ -84,10 +83,12 @@ mlir::Value allocateTemporaryBuffer<mlir::LLVM::AllocaOp>(
   auto sz = val.getArraySize();
   assert(DLI);
   for (auto iter : iterationCounts) {
-    sz =
-        rewriter.create<arith::MulIOp>(value.getLoc(), sz,
-                                       rewriter.create<arith::IndexCastOp>(
-                                           value.getLoc(), sz.getType(), iter));
+    sz = cast<TypedValue<IntegerType>>(
+        rewriter
+            .create<arith::MulIOp>(value.getLoc(), sz,
+                                   rewriter.create<arith::IndexCastOp>(
+                                       value.getLoc(), sz.getType(), iter))
+            .getResult());
   }
   return rewriter.create<LLVM::AllocaOp>(value.getLoc(), val.getType(), sz);
 }
@@ -100,18 +101,24 @@ mlir::Value allocateTemporaryBuffer<mlir::LLVM::CallOp>(
   auto val = value.getDefiningOp<LLVM::AllocaOp>();
   auto sz = val.getArraySize();
   assert(DLI);
-  sz = rewriter.create<arith::MulIOp>(
-      value.getLoc(), sz,
-      rewriter.create<arith::ConstantIntOp>(
-          value.getLoc(),
-          DLI->getTypeSize(
-              val.getType().cast<LLVM::LLVMPointerType>().getElementType()),
-          sz.getType().cast<IntegerType>().getWidth()));
+  sz = cast<TypedValue<IntegerType>>(
+      rewriter
+          .create<arith::MulIOp>(
+              value.getLoc(), sz,
+              rewriter.create<arith::ConstantIntOp>(
+                  value.getLoc(),
+                  DLI->getTypeSize(val.getType()
+                                       .cast<LLVM::LLVMPointerType>()
+                                       .getElementType()),
+                  sz.getType().cast<IntegerType>().getWidth()))
+          .getResult());
   for (auto iter : iterationCounts) {
-    sz =
-        rewriter.create<arith::MulIOp>(value.getLoc(), sz,
-                                       rewriter.create<arith::IndexCastOp>(
-                                           value.getLoc(), sz.getType(), iter));
+    sz = cast<TypedValue<IntegerType>>(
+        rewriter
+            .create<arith::MulIOp>(value.getLoc(), sz,
+                                   rewriter.create<arith::IndexCastOp>(
+                                       value.getLoc(), sz.getType(), iter))
+            .getResult());
   }
   auto m = val->getParentOfType<ModuleOp>();
   return callMalloc(rewriter, m, value.getLoc(), sz);

diff --git a/include/polygeist/Ops.h b/include/polygeist/Ops.h
@@ -81,8 +81,9 @@ class BarrierElim final
     }
 
     Operation *op = barrier;
-    if (NotTopLevel && isa<mlir::scf::ParallelOp, mlir::AffineParallelOp>(
-                           barrier->getParentOp()))
+    if (NotTopLevel &&
+        isa<mlir::scf::ParallelOp, mlir::affine::AffineParallelOp>(
+            barrier->getParentOp()))
       return failure();
 
     {

diff --git a/include/polygeist/Passes/Passes.h b/include/polygeist/Passes/Passes.h
@@ -2,7 +2,11 @@
 #define POLYGEIST_DIALECT_POLYGEIST_PASSES_H
 
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "polygeist/Dialect.h"
 #include <memory>
 
 enum PolygeistAlternativesMode { PAM_Static, PAM_PGO_Profile, PAM_PGO_Opt };
@@ -19,7 +23,7 @@ class RewritePatternSet;
 class DominanceInfo;
 namespace polygeist {
 std::unique_ptr<Pass> createParallelLICMPass();
-std::unique_ptr<Pass> createMem2RegPass();
+std::unique_ptr<Pass> createPolygeistMem2RegPass();
 std::unique_ptr<Pass> createLoopRestructurePass();
 std::unique_ptr<Pass> createInnerSerializationPass();
 std::unique_ptr<Pass> createSerializationPass();
@@ -37,6 +41,7 @@ std::unique_ptr<Pass> createParallelLowerPass(
 std::unique_ptr<Pass> createConvertCudaRTtoCPUPass();
 std::unique_ptr<Pass> createConvertCudaRTtoGPUPass();
 std::unique_ptr<Pass> createConvertCudaRTtoHipRTPass();
+std::unique_ptr<Pass> createFixGPUFuncPass();
 std::unique_ptr<Pass> createSCFParallelLoopUnrollPass(int unrollFactor = 2);
 std::unique_ptr<Pass>
 createConvertPolygeistToLLVMPass(const LowerToLLVMOptions &options,
@@ -49,6 +54,14 @@ createConvertParallelToGPUPass1(std::string arch = "sm_60");
 std::unique_ptr<Pass>
 createConvertParallelToGPUPass2(bool emitGPUKernelLaunchBounds = true);
 std::unique_ptr<Pass> createMergeGPUModulesPass();
+std::unique_ptr<Pass> createConvertToOpaquePtrPass();
+std::unique_ptr<Pass> createLowerAlternativesPass();
+std::unique_ptr<Pass> createCollectKernelStatisticsPass();
+std::unique_ptr<Pass> createPolygeistCanonicalizePass();
+std::unique_ptr<Pass>
+createPolygeistCanonicalizePass(const GreedyRewriteConfig &config,
+                                ArrayRef<std::string> disabledPatterns,
+                                ArrayRef<std::string> enabledPatterns);
 std::unique_ptr<Pass> createGpuSerializeToCubinPass(
     StringRef arch, StringRef features, int llvmOptLevel, int ptxasOptLevel,
     std::string ptxasPath, std::string libDevicePath, bool outputIntermediate);
@@ -78,10 +91,26 @@ namespace arith {
 class ArithDialect;
 } // end namespace arith
 
+namespace omp {
+class OpenMPDialect;
+} // end namespace omp
+
+namespace polygeist {
+class PolygeistDialect;
+} // end namespace polygeist
+
 namespace scf {
 class SCFDialect;
 } // end namespace scf
 
+namespace cf {
+class ControlFlowDialect;
+} // end namespace cf
+
+namespace math {
+class MathDialect;
+} // end namespace math
+
 namespace memref {
 class MemRefDialect;
 } // end namespace memref
@@ -90,7 +119,10 @@ namespace func {
 class FuncDialect;
 }
 
+namespace affine {
 class AffineDialect;
+}
+
 namespace LLVM {
 class LLVMDialect;
 }