From 35006321178bdf51d401fe035545e04f50995c24 Mon Sep 17 00:00:00 2001
From: boschmitt <7152025+boschmitt@users.noreply.github.com>
Date: Thu, 28 Nov 2024 00:10:06 +0100
Subject: [PATCH 1/2] [nvq++] Removes MLIR's `scf` dialect

We don't use this dialect and its presence is a historical artifact.
This change triggered the removal of two tests:

* `test/Quake/ghz.qke`
* `test/Quake/iqft.qke`

Both tests are a reminder of a past when we had to write quantum kernels
directly in MLIR because of a lack of frontend. Both no longer test
aything useful.

The commit modifies `test/Quake/canonical-2.qke`, which was only testing
the canonicalization of `cc.scope` operations. The new form is removes
the clutter, making the test more precise.

`test/Translate/ghz.qke` had to be modified because it uses MLIR's
`affined.for` and its conversion to LLVMDialect requires `scf.for`.

Signed-off-by: boschmitt <7152025+boschmitt@users.noreply.github.com>
---
 include/cudaq/Optimizer/InitAllDialects.h     |   2 -
 lib/Frontend/nvqpp/ConvertExpr.cpp            |   1 -
 lib/Optimizer/CodeGen/CMakeLists.txt          |   1 -
 lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp     |   2 -
 lib/Optimizer/CodeGen/ConvertToQIR.cpp        |   2 -
 .../Transforms/ApplyOpSpecialization.cpp      |   6 -
 test/Quake/canonical-2.qke                    |  65 +++------
 test/Quake/ghz.qke                            |  45 ------
 test/Quake/iqft.qke                           | 138 ------------------
 test/Translate/ghz.qke                        |  77 +++++-----
 tools/cudaq-quake/CMakeLists.txt              |   1 -
 11 files changed, 64 insertions(+), 276 deletions(-)
 delete mode 100644 test/Quake/ghz.qke
 delete mode 100644 test/Quake/iqft.qke
diff --git a/include/cudaq/Optimizer/InitAllDialects.h b/include/cudaq/Optimizer/InitAllDialects.h
index fdb41114d5..c6df70d88f 100644
--- a/include/cudaq/Optimizer/InitAllDialects.h
+++ b/include/cudaq/Optimizer/InitAllDialects.h
@@ -18,7 +18,6 @@
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/SCF/IR/SCF.h"
 
 namespace cudaq {
 
@@ -35,7 +34,6 @@ inline void registerAllDialects(mlir::DialectRegistry &registry) {
     mlir::LLVM::LLVMDialect,
     mlir::math::MathDialect,
     mlir::memref::MemRefDialect,
-    mlir::scf::SCFDialect,
 
     // NVQ++ dialects
     cudaq::cc::CCDialect,
diff --git a/lib/Frontend/nvqpp/ConvertExpr.cpp b/lib/Frontend/nvqpp/ConvertExpr.cpp
index 8b1f2a2638..780fbe6acd 100644
--- a/lib/Frontend/nvqpp/ConvertExpr.cpp
+++ b/lib/Frontend/nvqpp/ConvertExpr.cpp
@@ -13,7 +13,6 @@
 #include "cudaq/Optimizer/Dialect/CC/CCOps.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Dialect/SCF/IR/SCF.h"
 
 #define DEBUG_TYPE "lower-ast-expr"
 
diff --git a/lib/Optimizer/CodeGen/CMakeLists.txt b/lib/Optimizer/CodeGen/CMakeLists.txt
index d555ce99a9..56951cd07a 100644
--- a/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -62,7 +62,6 @@ add_cudaq_library(OptCodeGen
     MLIRFuncToLLVM
     MLIRMathToFuncs
     MLIRMathToLLVM
-    MLIRSCFToControlFlow
 
     # Translation
     MLIRTargetLLVMIRExport
diff --git a/lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp b/lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp
index 3faa371156..7ad2618a4f 100644
--- a/lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp
+++ b/lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp
@@ -22,7 +22,6 @@
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
-#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Dialect/Arith/Transforms/Passes.h"
 #include "mlir/Target/LLVMIR/TypeToLLVM.h"
 
@@ -121,7 +120,6 @@ struct CCToLLVM : public cudaq::opt::impl::CCToLLVMBase<CCToLLVM> {
     arith::populateArithToLLVMConversionPatterns(ccTypeConverter, patterns);
     populateMathToLLVMConversionPatterns(ccTypeConverter, patterns);
 
-    populateSCFToControlFlowConversionPatterns(patterns);
     cf::populateControlFlowToLLVMConversionPatterns(ccTypeConverter, patterns);
     populateFuncToLLVMConversionPatterns(ccTypeConverter, patterns);
     cudaq::opt::populateCCToLLVMPatterns(ccTypeConverter, patterns);
diff --git a/lib/Optimizer/CodeGen/ConvertToQIR.cpp b/lib/Optimizer/CodeGen/ConvertToQIR.cpp
index c5b4606e2d..e23691af94 100644
--- a/lib/Optimizer/CodeGen/ConvertToQIR.cpp
+++ b/lib/Optimizer/CodeGen/ConvertToQIR.cpp
@@ -30,7 +30,6 @@
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
-#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Dialect/Arith/Transforms/Passes.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
@@ -172,7 +171,6 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase<ConvertToQIR> {
     arith::populateArithToLLVMConversionPatterns(typeConverter, patterns);
     populateMathToLLVMConversionPatterns(typeConverter, patterns);
 
-    populateSCFToControlFlowConversionPatterns(patterns);
     cf::populateControlFlowToLLVMConversionPatterns(typeConverter, patterns);
     populateFuncToLLVMConversionPatterns(typeConverter, patterns);
     cudaq::opt::populateCCToLLVMPatterns(typeConverter, patterns);
diff --git a/lib/Optimizer/Transforms/ApplyOpSpecialization.cpp b/lib/Optimizer/Transforms/ApplyOpSpecialization.cpp
index c308206cef..b8a66f52c1 100644
--- a/lib/Optimizer/Transforms/ApplyOpSpecialization.cpp
+++ b/lib/Optimizer/Transforms/ApplyOpSpecialization.cpp
@@ -13,7 +13,6 @@
 #include "cudaq/Optimizer/Transforms/Passes.h"
 #include "cudaq/Todo.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -558,11 +557,6 @@ class ApplySpecializationPass
         invert(newIfOp.getElseRegion());
         continue;
       }
-      if (auto forOp = dyn_cast<scf::ForOp>(op)) {
-        LLVM_DEBUG(llvm::dbgs() << "moving for: " << forOp << ".\n");
-        TODO_loc(loc, "cannot make adjoint of kernel with scf.for");
-        // should we convert to cc.loop and use code below?
-      }
       if (auto loopOp = dyn_cast<cudaq::cc::LoopOp>(op)) {
         LLVM_DEBUG(llvm::dbgs() << "moving loop: " << loopOp << ".\n");
         auto newLoopOp = cloneReversedLoop(builder, loopOp);
diff --git a/test/Quake/canonical-2.qke b/test/Quake/canonical-2.qke
index b37012ad7e..8dacf4c7f8 100644
--- a/test/Quake/canonical-2.qke
+++ b/test/Quake/canonical-2.qke
@@ -8,54 +8,29 @@
 
 // RUN: cudaq-opt -canonicalize %s | FileCheck %s
 
-  func.func @__nvqpp__mlirgen__reflect_about_uniform(%arg0: !quake.veq<?>) attributes {"cudaq-kernel"} {
-    %0 = quake.veq_size %arg0 : (!quake.veq<?>) -> i64
-    %c1_i32 = arith.constant 1 : i32
-    %1 = arith.extsi %c1_i32 : i32 to i64
-    %2 = arith.subi %0, %1 : i64
-    %c0_i64 = arith.constant 0 : i64
-    %c1_i64 = arith.constant 1 : i64
-    %3 = arith.subi %2, %c1_i64 : i64
-    %4 = quake.subveq %arg0, %c0_i64, %3 : (!quake.veq<?>, i64, i64) -> !quake.veq<?>
-    %5 = quake.veq_size %arg0 : (!quake.veq<?>) -> i64
-    %c1_i64_0 = arith.constant 1 : i64
-    %6 = arith.subi %5, %c1_i64_0 : i64
-    %7 = quake.extract_ref %arg0[%6] : (!quake.veq<?>,i64) -> !quake.ref
-    %8 = cc.create_lambda {
-      cc.scope {
-        %c0 = arith.constant 0 : index
-        %c1 = arith.constant 1 : index
-        %10 = quake.veq_size %arg0 : (!quake.veq<?>) -> i64
-        %11 = arith.index_cast %10 : i64 to index
-        scf.for %arg1 = %c0 to %11 step %c1 {
-          %12 = quake.extract_ref %arg0[%arg1] : (!quake.veq<?>,index) -> !quake.ref
-          quake.h %12 : (!quake.ref) -> ()
-        }
-      }
-    } : !cc.callable<() -> ()>
-    %9 = cc.create_lambda {
-      cc.scope {
-        quake.z [%4] %7 : (!quake.veq<?>, !quake.ref) -> ()
-      }
-    } : !cc.callable<() -> ()>
-    quake.compute_action %8, %9 : !cc.callable<() -> ()>, !cc.callable<() -> ()>
-    return
-  }
+func.func @canonicalize_scope(%arg0: !quake.ref) attributes {"cudaq-kernel"} {
+  %0 = cc.create_lambda {
+    cc.scope {
+      quake.h %arg0 : (!quake.ref) -> ()
+    }
+  } : !cc.callable<() -> ()>
+  %1 = cc.create_lambda {
+    cc.scope {
+      quake.z %arg0 : (!quake.ref) -> ()
+    }
+  } : !cc.callable<() -> ()>
+  quake.compute_action %0, %1 : !cc.callable<() -> ()>, !cc.callable<() -> ()>
+  return
+}
 
-// CHECK-LABEL:   func.func @__nvqpp__mlirgen__reflect_about_uniform(
-// CHECK:           %[[VAL_12:.*]] = cc.create_lambda {
+// CHECK-LABEL:   func.func @canonicalize_scope(
+// CHECK:           %[[VAL_0:.*]] = cc.create_lambda {
 // CHECK-NOT:       cc.scope
-// CHECK:             %[[VAL_13:.*]] = quake.veq_size %{{.*}} : (!quake.veq<?>) -> i64
-// CHECK:             %[[VAL_14:.*]] = arith.index_cast %[[VAL_13]] : i64 to index
-// CHECK:             scf.for %[[VAL_15:.*]] = %{{.*}} to %[[VAL_14]] step %
-// CHECK:               %[[VAL_16:.*]] = quake.extract_ref
-// CHECK:               quake.h %[[VAL_16]]
-// CHECK:             }
+// CHECK:             quake.h %{{.*}} :
 // CHECK:           } : !cc.callable<() -> ()>
-// CHECK:           %[[VAL_17:.*]] = cc.create_lambda {
+// CHECK:           %[[VAL_1:.*]] = cc.create_lambda {
 // CHECK-NOT:       cc.scope
-// CHECK:             quake.z [%{{.*}}] %{{.*}} :
+// CHECK:             quake.z %{{.*}} :
 // CHECK:           } : !cc.callable<() -> ()>
-// CHECK:           quake.compute_action
-// CHECK:           return
+// CHECK:           quake.compute_action %[[VAL_0]], %[[VAL_1]]
 
diff --git a/test/Quake/ghz.qke b/test/Quake/ghz.qke
deleted file mode 100644
index ad6e662dea..0000000000
--- a/test/Quake/ghz.qke
+++ /dev/null
@@ -1,45 +0,0 @@
-// ========================================================================== //
-// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                 //
-// All rights reserved.                                                       //
-//                                                                            //
-// This source code and the accompanying materials are made available under   //
-// the terms of the Apache License 2.0 which accompanies this distribution.   //
-// ========================================================================== //
-
-// RUN: cudaq-opt %s --canonicalize | FileCheck %s
-module {
-    // CHECK: func.func @ghz(%[[arg0:.*]]: i32) {
-    // CHECK: %[[C1:.*]] = arith.constant 1 : i32
-    // CHECK: %0 = quake.alloca !quake.veq<?>[%[[arg0]] : i32]
-    // CHECK: %1 = quake.extract_ref %0[0] : (!quake.veq<?>) -> !quake.ref
-    // CHECK: quake.h %1 :
-    // CHECK: %2 = arith.subi %arg0, %[[C1]] : i32
-    // CHECK: %3 = arith.index_cast %2 : i32 to index
-    // CHECK: affine.for %arg1 = 0 to %3 {
-    // CHECK:   %4 = arith.index_cast %arg1 : index to i32
-    // CHECK:   %5 = arith.addi %4, %[[C1]] : i32
-    // CHECK:   %6 = quake.extract_ref %0[%arg1] : (!quake.veq<?>, index) -> !quake.ref
-    // CHECK:   %7 = quake.extract_ref %0[%5] : (!quake.veq<?>, i32) -> !quake.ref
-    // CHECK:   quake.x [%6] %7 : (!quake.ref, !quake.ref) -> ()
-    // CHECK: }
-    // CHECK: return
-    // CHECK: }
-    func.func @ghz(%arg0 : i32) {
-        // %size = arith.constant 3 : i32
-        %c0 = arith.constant 0 : i32
-        %one = arith.constant 1 : i32
-        %q = quake.alloca !quake.veq<?>[%arg0 : i32]
-        %q0 = quake.extract_ref %q[%c0] : (!quake.veq<?>, i32) -> !quake.ref
-        quake.h %q0 : (!quake.ref) -> ()
-        %size_m_1 = arith.subi %arg0, %one : i32
-        %upper = arith.index_cast %size_m_1 : i32 to index
-        affine.for %i = 0 to %upper {
-            %i_int = arith.index_cast %i : index to i32
-            %ip1 = arith.addi %i_int, %one : i32
-            %qi = quake.extract_ref %q[%i] : (!quake.veq<?>, index) -> !quake.ref
-            %qi1 = quake.extract_ref %q[%ip1] : (!quake.veq<?>, i32) -> !quake.ref
-            quake.x [%qi] %qi1 : (!quake.ref, !quake.ref) -> ()
-        }
-        return
-    }
-}
diff --git a/test/Quake/iqft.qke b/test/Quake/iqft.qke
deleted file mode 100644
index 60adfbe545..0000000000
--- a/test/Quake/iqft.qke
+++ /dev/null
@@ -1,138 +0,0 @@
-// ========================================================================== //
-// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                 //
-// All rights reserved.                                                       //
-//                                                                            //
-// This source code and the accompanying materials are made available under   //
-// the terms of the Apache License 2.0 which accompanies this distribution.   //
-// ========================================================================== //
-
-// CUDA-Q code
-// struct iqft {
-//   void operator()(cudaq::qreg q) __qpu__ {
-//     int N = q.size();
-//     // Swap qubits
-//     for (int i = 0; i < N / 2; ++i) {
-//       swap(q[i], q[N - i - 1]);
-//     }
-
-//     for (int i = 0; i < N - 1; ++i) {
-//       h(q[i]);
-//       int j = i + 1;
-//       for (int y = i; y >= 0; --y) { // for (int y = -i; y < 1; y++)
-//         const double theta = -M_PI / std::pow(2.0, j - y);
-//         cphase(theta, q[j], q[y]);
-//       }
-//     }
-
-//     h(q[N - 1]);
-//   }
-// };
-
-// RUN: cudaq-opt %s --canonicalize | FileCheck %s
-
-// CHECK: #map = affine_map<(d0) -> (-d0)>
-// CHECK: module {
-// CHECK:   func.func @iqft(%arg0: !quake.veq<?>) {
-// CHECK:     %[[CF0:.*]] = arith.constant 2.000000e+00 : f64
-// CHECK:     %[[CF1:.*]] = arith.constant -3.1415926535897931 : f64
-// CHECK:     %[[CI1:.*]] = arith.constant 1 : index
-// CHECK:     %[[CI0:.*]] = arith.constant 0 : index
-// CHECK:     %[[C1:.*]] = arith.constant 1 : i32
-// CHECK:     %[[C2:.*]] = arith.constant 2 : i32
-// CHECK:     %c-1_i32 = arith.constant -1 : i32
-// CHECK:     %0 = quake.veq_size %arg0 : (!quake.veq<?>) -> i64
-// CHECK:     %1 = arith.trunci %0 : i64 to i32
-// CHECK:     %2 = arith.subi %1, %[[C1]] : i32
-// CHECK:     %3 = arith.index_cast %2 : i32 to index
-// CHECK:     %4 = arith.divsi %1, %[[C2]] : i32
-// CHECK:     %5 = arith.index_cast %4 : i32 to index
-// CHECK:     scf.for %arg1 = %[[CI0]] to %5 step %[[CI1]] {
-// CHECK:       %7 = arith.index_cast %arg1 : index to i32
-// CHECK:       %8 = arith.subi %1, %7 : i32
-// CHECK:       %9 = arith.subi %8, %[[C1]] : i32
-// CHECK:       %10 = quake.extract_ref %arg0[%7] : (!quake.veq<?>, i32) -> !quake.ref
-// CHECK:       %11 = quake.extract_ref %arg0[%9] : (!quake.veq<?>, i32) -> !quake.ref
-// CHECK:       quake.swap %10, %11 : (!quake.ref, !quake.ref) -> ()
-// CHECK:     }
-// CHECK:     affine.for %arg1 = 0 to %3 {
-// CHECK:       %7 = arith.index_cast %arg1 : index to i32
-// CHECK:       %8 = quake.extract_ref %arg0[%7] : (!quake.veq<?>, i32) -> !quake.ref
-// CHECK:       quake.h %8 : (!quake.ref) -> ()
-// CHECK:       %9 = arith.addi %7, %[[C1]] : i32
-// CHECK:       affine.for %arg2 = #map(%arg1) to 1 {
-// CHECK:         %10 = arith.index_cast %arg2 : index to i32
-// CHECK:         %11 = arith.muli %10, %c-1_i32 : i32
-// CHECK:         %12 = arith.subi %9, %11 : i32
-// CHECK:         %13 = arith.sitofp %12 : i32 to f64
-// CHECK:         %14 = math.powf %[[CF0]], %13 : f64
-// CHECK:         %15 = arith.divf %[[CF1]], %14 : f64
-// CHECK:         %16 = quake.extract_ref %arg0[%9] : (!quake.veq<?>, i32) -> !quake.ref
-// CHECK:         %17 = quake.extract_ref %arg0[%11] : (!quake.veq<?>, i32) -> !quake.ref
-// CHECK:         quake.r1 (%15) [%16] %17 : (f64, !quake.ref, !quake.ref) -> ()
-// CHECK:       }
-// CHECK:     }
-// CHECK:     %6 = quake.extract_ref %arg0[%2] : (!quake.veq<?>, i32) -> !quake.ref
-// CHECK:     quake.h %6 : (!quake.ref) -> ()
-// CHECK:     return
-// CHECK:   }
-// CHECK: }
-
-
-#lb = affine_map<(d0) -> (-1*d0)>
-
-module {
-    func.func @iqft(%arg0 : !quake.veq<?>) {
-        %c1 = arith.constant 1 : i32
-        %c0 = arith.constant 0 : i32
-        %c2 = arith.constant 2 : i32
-        %cn1 = arith.constant -1 : i32
-        %nn = quake.veq_size %arg0 : (!quake.veq<?>) -> i64
-        %n = arith.trunci %nn : i64 to i32
-        %nm1 = arith.subi %n, %c1 : i32
-        %nm1idx = arith.index_cast %nm1 : i32 to index
-        %upper = arith.divsi %n, %c2 : i32
-        %upper_cast = arith.index_cast %upper : i32 to index
-        %lower = arith.index_cast %c0 : i32 to index
-        %c1idx = arith.index_cast %c1 : i32 to index
-
-        scf.for %arg2 = %lower to %upper_cast step %c1idx {
-            %7 = arith.index_cast %arg2 : index to i32
-            %9 = arith.subi %n, %7 : i32
-            %10 = arith.subi %9, %c1 : i32
-            %qi = quake.extract_ref %arg0 [%7] : (!quake.veq<?>,i32) -> !quake.ref
-            %qi1 = quake.extract_ref %arg0 [%10] : (!quake.veq<?>,i32) -> !quake.ref
-            quake.swap %qi, %qi1 : (!quake.ref, !quake.ref) -> ()
-        }
-
-        affine.for %arg3 = 0 to %nm1idx {
-            %11 = arith.index_cast %arg3 : index to i32
-            %qi = quake.extract_ref %arg0[%11] : (!quake.veq<?>, i32) -> !quake.ref
-            quake.h %qi : (!quake.ref) -> ()
-            %13 = arith.addi %11, %c1 : i32
-            %12 = memref.alloca() : memref<i32>
-            memref.store %13, %12[] : memref<i32>
-
-            %lb = arith.muli %11, %cn1 : i32
-            %lbidx = arith.index_cast %lb : i32 to index
-            affine.for %arg4 = #lb(%arg3) to %c1idx {
-                %14 = arith.index_cast %arg4 : index to i32
-                %15 = arith.muli %14, %cn1 : i32
-                %cst = arith.constant 3.1415926535897931 : f64
-                %cst_3 = arith.constant -1.000000e+00 : f64
-                %16 = arith.mulf %cst_3, %cst : f64
-                %c2f = arith.sitofp %c2 : i32 to f64
-                %jmy = arith.subi %13, %15 : i32
-                %s2f = arith.sitofp %jmy : i32 to f64
-                %denom = math.powf %c2f, %s2f : f64
-                %24 = arith.divf %16, %denom : f64
-                %qj = quake.extract_ref %arg0[%13] : (!quake.veq<?>, i32) -> !quake.ref
-                %qy = quake.extract_ref %arg0[%15] : (!quake.veq<?>, i32) -> !quake.ref
-                quake.r1 (%24)[%qj] %qy : (f64,!quake.ref,!quake.ref) -> ()
-            }
-        }
-        %qnm1 = quake.extract_ref %arg0[%nm1] : (!quake.veq<?>,i32) -> !quake.ref
-        quake.h %qnm1 : (!quake.ref) -> ()
-        return
-    }
-}
-
diff --git a/test/Translate/ghz.qke b/test/Translate/ghz.qke
index da78b2ec13..498750ae07 100644
--- a/test/Translate/ghz.qke
+++ b/test/Translate/ghz.qke
@@ -9,48 +9,59 @@
 // RUN: cudaq-opt %s --canonicalize --add-dealloc | cudaq-translate --convert-to=qir | FileCheck %s
 module {
 // CHECK:    %[[VAL_0:.*]] = zext i32
-// CHECK:    %[[VAL_1:.*]] to i64
+// CHECK-SAME:    %[[VAL_1:.*]] to i64
 // CHECK:         %[[VAL_2:.*]] = tail call %[[VAL_3:.*]]* @__quantum__rt__qubit_allocate_array(i64 %[[VAL_0]])
 // CHECK:         %[[VAL_4:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_3]]* %[[VAL_2]], i64 0)
 // CHECK:         %[[VAL_5:.*]] = bitcast i8* %[[VAL_4]] to %[[VAL_6:.*]]**
 // CHECK:         %[[VAL_7:.*]] = load %[[VAL_6]]*, %[[VAL_6]]** %[[VAL_5]], align 8
 // CHECK:         tail call void @__quantum__qis__h(%[[VAL_6]]* %[[VAL_7]])
 // CHECK:         %[[VAL_8:.*]] = add i32 %[[VAL_1]], -1
-// CHECK:         %[[VAL_9:.*]] = sext i32 %[[VAL_8]] to i64
-// CHECK:         %[[VAL_10:.*]] = icmp sgt i32 %[[VAL_8]], 0
-// CHECK:         br i1 %[[VAL_10]], label %[[VAL_11:.*]], label %[[VAL_12:[^,]*]]
+// CHECK:         %[[VAL_9:.*]] = icmp eq i32 %[[VAL_8]], 0
+// CHECK:         br i1 %[[VAL_9]], label %[[VAL_10:.*]], label %[[VAL_11:.*]]
+// CHECK:       .lr.ph.preheader:
+// CHECK-SAME:  ; preds = %[[VAL_12:.*]]
+// CHECK:         %[[VAL_13:.*]] = zext i32 %[[VAL_8]] to i64
+// CHECK:         br label %[[VAL_14:.*]]
 // CHECK:       .lr.ph:
-// CHECK-SAME:  ; preds = %[[VAL_13:.*]], %[[VAL_11]]
-// CHECK:         %[[VAL_14:.*]] = phi i64 [ %[[VAL_15:.*]], %[[VAL_11]] ], [ 0, %[[VAL_13]] ]
-// CHECK:         %[[VAL_15]] = add nuw nsw i64 %[[VAL_14]], 1
-// CHECK:         %[[VAL_16:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_3]]* %[[VAL_2]], i64 %[[VAL_14]])
-// CHECK:         %[[VAL_17:.*]] = bitcast i8* %[[VAL_16]] to %[[VAL_6]]**
-// CHECK:         %[[VAL_18:.*]] = load %[[VAL_6]]*, %[[VAL_6]]** %[[VAL_17]], align 8
-// CHECK:         %[[VAL_19:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_3]]* %[[VAL_2]], i64 %[[VAL_15]])
-// CHECK:         %[[VAL_20:.*]] = bitcast i8* %[[VAL_19]] to %[[VAL_6]]**
-// CHECK:         %[[VAL_21:.*]] = load %[[VAL_6]]*, %[[VAL_6]]** %[[VAL_20]], align 8
-// CHECK: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 1, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_18]], %Qubit* %[[VAL_21]])
-// CHECK:         %[[VAL_22:.*]] = icmp eq i64 %[[VAL_15]], %[[VAL_9]]
-// CHECK:         br i1 %[[VAL_22]], label %[[VAL_12]], label %[[VAL_11]]
+// CHECK-SAME:  ; preds = %[[VAL_11]], %[[VAL_14]]
+// CHECK:         %[[VAL_15:.*]] = phi i64 [ 0, %[[VAL_11]] ], [ %[[VAL_16:.*]], %[[VAL_14]] ]
+// CHECK:         %[[VAL_17:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_3]]* %[[VAL_2]], i64 %[[VAL_15]])
+// CHECK:         %[[VAL_18:.*]] = bitcast i8* %[[VAL_17]] to %[[VAL_6]]**
+// CHECK:         %[[VAL_19:.*]] = load %[[VAL_6]]*, %[[VAL_6]]** %[[VAL_18]], align 8
+// CHECK:         %[[VAL_16]] = add nuw nsw i64 %[[VAL_15]], 1
+// CHECK:         %[[VAL_20:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_3]]* %[[VAL_2]], i64 %[[VAL_16]])
+// CHECK:         %[[VAL_21:.*]] = bitcast i8* %[[VAL_20]] to %[[VAL_6]]**
+// CHECK:         %[[VAL_22:.*]] = load %[[VAL_6]]*, %[[VAL_6]]** %[[VAL_21]], align 8
+// CHECK:         tail call void (i64, void (%[[VAL_3]]*, %[[VAL_6]]*)*, ...) @invokeWithControlQubits(i64 1, void (%[[VAL_3]]*, %[[VAL_6]]*)* nonnull @__quantum__qis__x__ctl, %[[VAL_6]]* %[[VAL_19]], %[[VAL_6]]* %[[VAL_22]])
+// CHECK:         %[[VAL_23:.*]] = icmp eq i64 %[[VAL_16]], %[[VAL_13]]
+// CHECK:         br i1 %[[VAL_23]], label %[[VAL_10]], label %[[VAL_14]]
 // CHECK:       ._crit_edge:
-// CHECK-SAME:  ; preds = %[[VAL_11]], %[[VAL_13]]
+// CHECK-SAME:  ; preds = %[[VAL_14]], %[[VAL_12]]
 // CHECK:         tail call void @__quantum__rt__qubit_release_array(%[[VAL_3]]* %[[VAL_2]])
 // CHECK:         ret void
-    func.func @ghz(%arg0 : i32) {
-        %c0 = arith.constant 0 : i32
-        %one = arith.constant 1 : i32
-        %q = quake.alloca !quake.veq<?>[%arg0 : i32]
-        %q0 = quake.extract_ref %q [%c0] : (!quake.veq<?>,i32) -> !quake.ref
-        quake.h %q0 : (!quake.ref) -> ()
-        %size_m_1 = arith.subi %arg0, %one : i32
-        %upper = arith.index_cast %size_m_1 : i32 to index
-        affine.for %i = 0 to %upper {
-            %i_int = arith.index_cast %i : index to i32
-            %ip1 = arith.addi %i_int, %one : i32
-            %qi = quake.extract_ref %q [%i] : (!quake.veq<?>,index) -> !quake.ref
-            %qi1 = quake.extract_ref %q [%ip1] : (!quake.veq<?>,i32) -> !quake.ref
-            quake.x [%qi] %qi1 : (!quake.ref,!quake.ref) -> ()
-        }
-        return
+
+  func.func @ghz(%arg0: i32){
+    %c1_i32 = arith.constant 1 : i32
+    %c0_i32 = arith.constant 0 : i32
+    %0 = quake.alloca !quake.veq<?>[%arg0 : i32]
+    %1 = quake.extract_ref %0[0] : (!quake.veq<?>) -> !quake.ref
+    quake.h %1 : (!quake.ref) -> ()
+    %2 = cc.loop while ((%arg1 = %c0_i32) -> (i32)) {
+      %4 = arith.subi %arg0, %c1_i32 : i32
+      %5 = arith.cmpi ult, %arg1, %4 : i32
+      cc.condition %5(%arg1 : i32)
+    } do {
+    ^bb0(%arg1: i32):
+      %4 = quake.extract_ref %0[%arg1] : (!quake.veq<?>, i32) -> !quake.ref
+      %5 = arith.addi %arg1, %c1_i32 : i32
+      %6 = quake.extract_ref %0[%5] : (!quake.veq<?>, i32) -> !quake.ref
+      quake.x [%4] %6 : (!quake.ref, !quake.ref) -> ()
+      cc.continue %arg1 : i32
+    } step {
+    ^bb0(%arg1: i32):
+      %3 = arith.addi %arg1, %c1_i32 : i32
+      cc.continue %3 : i32
     }
+    return
+  }
 }
diff --git a/tools/cudaq-quake/CMakeLists.txt b/tools/cudaq-quake/CMakeLists.txt
index 094378da76..f5f2960ed6 100644
--- a/tools/cudaq-quake/CMakeLists.txt
+++ b/tools/cudaq-quake/CMakeLists.txt
@@ -27,7 +27,6 @@ target_link_libraries(cudaq-quake
 
   MLIRAffineDialect
   MLIRMemRefDialect
-  MLIRSCFDialect
   
   clangCodeGen
   clangFrontendTool

From 982278e5ef93f3501476b732edceaa614ba951f6 Mon Sep 17 00:00:00 2001
From: boschmitt <7152025+boschmitt@users.noreply.github.com>
Date: Thu, 28 Nov 2024 16:40:21 +0100
Subject: [PATCH 2/2] [nvq++] Remove MLIR's `affine` dialect.

We don't use this dialect. The removal triggered the removel of one
test:

* `test/Quake/ccnot.qke`

The test was not testing anything useful, only kernel inlining, which is
already covered in other tests. Furthermore, the test is misleading
because, contrary to what the kernel name might indicate, it is not
implementing a `ccnot`.

Signed-off-by: boschmitt <7152025+boschmitt@users.noreply.github.com>
---
 include/cudaq/Frontend/nvqpp/ASTBridge.h      |  1 -
 include/cudaq/Optimizer/InitAllDialects.h     |  2 -
 include/cudaq/Optimizer/Transforms/Passes.h   |  1 -
 lib/Optimizer/CodeGen/CMakeLists.txt          |  1 -
 lib/Optimizer/CodeGen/ConvertToQIR.cpp        |  2 -
 lib/Optimizer/Transforms/PassDetails.h        |  1 -
 lib/Optimizer/Transforms/QuakeAddMetadata.cpp |  1 -
 runtime/cudaq/builder/kernel_builder.cpp      |  2 -
 test/Quake/ccnot.qke                          | 52 -------------------
 tools/cudaq-quake/CMakeLists.txt              |  1 -
 10 files changed, 64 deletions(-)
 delete mode 100644 test/Quake/ccnot.qke

diff --git a/include/cudaq/Frontend/nvqpp/ASTBridge.h b/include/cudaq/Frontend/nvqpp/ASTBridge.h
index baf4518fce..3b257dc51e 100644
--- a/include/cudaq/Frontend/nvqpp/ASTBridge.h
+++ b/include/cudaq/Frontend/nvqpp/ASTBridge.h
@@ -20,7 +20,6 @@
 #include "clang/Frontend/FrontendAction.h"
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "llvm/ADT/ScopedHashTable.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMTypes.h"
 #include "mlir/IR/Builders.h"
diff --git a/include/cudaq/Optimizer/InitAllDialects.h b/include/cudaq/Optimizer/InitAllDialects.h
index c6df70d88f..0748b5866a 100644
--- a/include/cudaq/Optimizer/InitAllDialects.h
+++ b/include/cudaq/Optimizer/InitAllDialects.h
@@ -10,7 +10,6 @@
 
 #include "cudaq/Optimizer/Dialect/CC/CCDialect.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
@@ -26,7 +25,6 @@ inline void registerAllDialects(mlir::DialectRegistry &registry) {
   // clang-format off
   registry.insert<
     // MLIR dialects
-    mlir::AffineDialect,
     mlir::arith::ArithDialect,
     mlir::cf::ControlFlowDialect,
     mlir::complex::ComplexDialect,
diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h
index 77f461bec2..d699e25355 100644
--- a/include/cudaq/Optimizer/Transforms/Passes.h
+++ b/include/cudaq/Optimizer/Transforms/Passes.h
@@ -42,7 +42,6 @@ std::unique_ptr<mlir::Pass>
 createQuakeSynthesizer(std::string_view, const void *,
                        std::size_t startingArgIdx = 0,
                        bool sameAddressSpace = false);
-std::unique_ptr<mlir::Pass> createRaiseToAffinePass();
 std::unique_ptr<mlir::Pass> createUnwindLoweringPass();
 
 std::unique_ptr<mlir::Pass>
diff --git a/lib/Optimizer/CodeGen/CMakeLists.txt b/lib/Optimizer/CodeGen/CMakeLists.txt
index 56951cd07a..5c056e0e11 100644
--- a/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -54,7 +54,6 @@ add_cudaq_library(OptCodeGen
     MLIRTransforms
 
     # Conversions
-    MLIRAffineToStandard
     MLIRArithToLLVM
     MLIRComplexToLibm
     MLIRComplexToLLVM
diff --git a/lib/Optimizer/CodeGen/ConvertToQIR.cpp b/lib/Optimizer/CodeGen/ConvertToQIR.cpp
index e23691af94..738ee66ea1 100644
--- a/lib/Optimizer/CodeGen/ConvertToQIR.cpp
+++ b/lib/Optimizer/CodeGen/ConvertToQIR.cpp
@@ -20,7 +20,6 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
 #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h"
 #include "mlir/Conversion/ComplexToLibm/ComplexToLibm.h"
@@ -166,7 +165,6 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase<ConvertToQIR> {
     populateComplexToLibmConversionPatterns(patterns, 1);
     populateComplexToLLVMConversionPatterns(typeConverter, patterns);
 
-    populateAffineToStdConversionPatterns(patterns);
     arith::populateCeilFloorDivExpandOpsPatterns(patterns);
     arith::populateArithToLLVMConversionPatterns(typeConverter, patterns);
     populateMathToLLVMConversionPatterns(typeConverter, patterns);
diff --git a/lib/Optimizer/Transforms/PassDetails.h b/lib/Optimizer/Transforms/PassDetails.h
index 80d2331d0b..9367f4a77f 100644
--- a/lib/Optimizer/Transforms/PassDetails.h
+++ b/lib/Optimizer/Transforms/PassDetails.h
@@ -11,7 +11,6 @@
 #include "cudaq/Optimizer/Dialect/CC/CCDialect.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/lib/Optimizer/Transforms/QuakeAddMetadata.cpp b/lib/Optimizer/Transforms/QuakeAddMetadata.cpp
index 6f174463f3..2c8dbc284b 100644
--- a/lib/Optimizer/Transforms/QuakeAddMetadata.cpp
+++ b/lib/Optimizer/Transforms/QuakeAddMetadata.cpp
@@ -12,7 +12,6 @@
 #include "cudaq/Optimizer/Transforms/Passes.h"
 #include "cudaq/Todo.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
diff --git a/runtime/cudaq/builder/kernel_builder.cpp b/runtime/cudaq/builder/kernel_builder.cpp
index be415c6620..6870ebef2a 100644
--- a/runtime/cudaq/builder/kernel_builder.cpp
+++ b/runtime/cudaq/builder/kernel_builder.cpp
@@ -17,8 +17,6 @@
 #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
 #include "cudaq/Optimizer/Transforms/Passes.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/ExecutionEngine/ExecutionEngine.h"
diff --git a/test/Quake/ccnot.qke b/test/Quake/ccnot.qke
deleted file mode 100644
index a74536dfd6..0000000000
--- a/test/Quake/ccnot.qke
+++ /dev/null
@@ -1,52 +0,0 @@
-// ========================================================================== //
-// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                 //
-// All rights reserved.                                                       //
-//                                                                            //
-// This source code and the accompanying materials are made available under   //
-// the terms of the Apache License 2.0 which accompanies this distribution.   //
-// ========================================================================== //
-
-// RUN: cudaq-opt %s --inline --canonicalize | FileCheck %s 
-
-module {
-
-    // CHECK-LABEL: func.func @apply_x(
-    // CHECK-SAME: %[[arg0:.*]]: !quake.ref) {
-    // CHECK:   quake.x %[[arg0]] :
-    // CHECK:   return
-    // CHECK: }
-    
-    // CHECK-LABEL: func.func @ccnot() {
-    // CHECK:   %[[a0:.*]] = quake.alloca !quake.veq<3>
-    // CHECK:   affine.for %[[arg0:.*]] = 0 to 3 {
-    // CHECK:     %[[a2:.*]] = quake.extract_ref %[[a0]][%[[arg0]]] : (!quake.veq<3>, index) -> !quake.ref
-    // CHECK:     quake.x %[[a2]] :
-    // CHECK:   }
-    // CHECK:   %[[a1:.*]] = quake.extract_ref %[[a0]][1] : (!quake.veq<3>) -> !quake.ref
-    // CHECK:   quake.x %[[a1]] :
-    // CHECK:   return
-    // CHECK: }
-    
-    func.func @apply_x(%q : !quake.ref) {
-        quake.x %q : (!quake.ref) -> ()
-        return
-    }
-
-    func.func @ccnot() {
-        %c_3 = arith.constant 3 : i32
-        %c_0 = arith.constant 0 : i32
-        %c_1 = arith.constant 1 : i32
-        %c_2 = arith.constant 2 : i32
-        %qubits = quake.alloca !quake.veq<?> [ %c_3 : i32 ]
-        %c_3_idx = arith.index_cast %c_3 : i32 to index
-        affine.for %i = 0 to %c_3_idx {
-            %q0 = quake.extract_ref %qubits [%i] : (!quake.veq<?>, index) -> !quake.ref
-            quake.x %q0 : (!quake.ref) -> ()
-        }
-
-        %q1 = quake.extract_ref %qubits [%c_1] : (!quake.veq<?>, i32) -> !quake.ref
-        func.call @apply_x(%q1) : (!quake.ref) -> ()
-
-        return
-    }
-}
diff --git a/tools/cudaq-quake/CMakeLists.txt b/tools/cudaq-quake/CMakeLists.txt
index f5f2960ed6..277df39c22 100644
--- a/tools/cudaq-quake/CMakeLists.txt
+++ b/tools/cudaq-quake/CMakeLists.txt
@@ -25,7 +25,6 @@ target_link_libraries(cudaq-quake
   MLIRLLVMCommonConversion
   MLIRLLVMToLLVMIRTranslation
 
-  MLIRAffineDialect
   MLIRMemRefDialect
   
   clangCodeGen