From a73d241bc25fa6daf9393fdb5db25107f980c88a Mon Sep 17 00:00:00 2001
From: max <maksim.levental@gmail.com>
Date: Fri, 22 Dec 2023 18:28:53 -0600
Subject: [PATCH 1/3] use star import in ext dir

---
 tests/test_transform.py | 55 +++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/tests/test_transform.py b/tests/test_transform.py
index 54cd3dc..fa28692 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -1,11 +1,24 @@
 from textwrap import dedent
 
 import pytest
+from mlir.dialects import linalg, arith
+from mlir.dialects import pdl
+from mlir.dialects.builtin import module
 from mlir.dialects.gpu import MappingId
+from mlir.dialects.transform import (
+    get_parent_op,
+    apply_patterns_canonicalization,
+    apply_cse,
+    any_op_t,
+    FailurePropagationMode,
+)
+from mlir.dialects.transform.extras import named_sequence, apply_patterns, sequence
+from mlir.dialects.transform.loop import loop_unroll
+from mlir.dialects.transform.structured import structured_match
+from mlir.ir import UnitAttr
 
 from mlir.extras import types as T
 from mlir.extras.ast.canonicalize import canonicalize
-from mlir.dialects import linalg, arith
 from mlir.extras.dialects.ext import linalg
 from mlir.extras.dialects.ext.func import func
 from mlir.extras.dialects.ext.gpu import block_attr, thread_attr
@@ -15,15 +28,10 @@
 )
 from mlir.extras.dialects.ext.tensor import pad
 from mlir.extras.dialects.ext.transform import (
-    sequence,
-    unroll,
-    get_parent,
     match,
     tile,
     tile_to_scf_forall,
-    apply_patterns,
 )
-from mlir.dialects.transform import apply_patterns_canonicalization, apply_cse
 from mlir.extras.runtime.passes import run_pipeline, Pipeline
 
 # noinspection PyUnresolvedReferences
@@ -42,11 +50,13 @@ def loop_unroll_op():
 
     loop_unroll_op.emit()
 
-    @sequence(target_tag="basic")
-    def basic(target):
-        m = match(target, ["arith.addi"])
-        loop = get_parent(m, op_name="scf.for")
-        unroll(loop, 4)
+    @module(attrs={"transform.with_named_sequence": UnitAttr.get()})
+    def mod():
+        @named_sequence("__transform_main", [any_op_t()], [])
+        def basic(target: any_op_t()):
+            m = structured_match(any_op_t(), target, ops=["arith.addi"])
+            loop = get_parent_op(pdl.op_t(), m, op_name="scf.for")
+            loop_unroll(loop, 4)
 
     correct = dedent(
         """\
@@ -60,23 +70,20 @@ def basic(target):
         }
         return
       }
-      transform.sequence  failures(propagate) attributes {transform.target_tag = "basic"} {
-      ^bb0(%arg0: !pdl.operation):
-        %0 = transform.structured.match ops{["arith.addi"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        %1 = get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !pdl.operation
-        transform.loop.unroll %1 {factor = 4 : i64} : !pdl.operation
+      module attributes {transform.with_named_sequence} {
+        transform.named_sequence @__transform_main(%arg0: !transform.any_op) {
+          %0 = transform.structured.match ops{["arith.addi"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          %1 = transform.get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !pdl.operation
+          transform.loop.unroll %1 {factor = 4 : i64} : !pdl.operation
+          transform.yield 
+        }
       }
     }
     """
     )
     filecheck(correct, ctx.module)
 
-    module = run_pipeline(
-        ctx.module,
-        Pipeline()
-        .add_pass("test-transform-dialect-interpreter")
-        .add_pass("test-transform-dialect-erase-schedule"),
-    )
+    mod = run_pipeline(ctx.module, Pipeline().add_pass("transform-interpreter"))
 
     correct = dedent(
         """\
@@ -108,7 +115,7 @@ def basic(target):
     }
     """
     )
-    filecheck(correct, module)
+    filecheck(correct, mod)
 
 
 def test_basic_tile(ctx):
@@ -123,7 +130,7 @@ def pad_(i: T.index(), j: T.index()):
 
     pad_tensor_3_4.emit()
 
-    @sequence(target_tag="basic")
+    @sequence([], FailurePropagationMode.Propagate, [])
     def basic(target):
         m = match(target, ["tensor.pad"])
         tiled_linalg_op, loops = tile(m, sizes=[2, 3])

From 92199663d5bcc773c3802c14544a3d983d9cd086 Mon Sep 17 00:00:00 2001
From: max <maksim.levental@gmail.com>
Date: Fri, 22 Dec 2023 19:55:40 -0600
Subject: [PATCH 2/3] fix transform

---
 mlir/extras/runtime/passes.py |  74 +---------
 tests/test_transform.py       | 268 +++++++++++++++++-----------------
 2 files changed, 144 insertions(+), 198 deletions(-)

diff --git a/mlir/extras/runtime/passes.py b/mlir/extras/runtime/passes.py
index 06d765e..1ed76c2 100644
--- a/mlir/extras/runtime/passes.py
+++ b/mlir/extras/runtime/passes.py
@@ -210,79 +210,17 @@ def lower_to_vulkan(self, index_bitwidth=None):
     def transform_dialect_erase_schedule(self):
         return self.add_pass("test-transform-dialect-erase-schedule")
 
-    def transform_dialect_interpreter(
+    def transform_interpreter(
         self,
-        bind_first_extra_to_ops=None,
-        bind_first_extra_to_params=None,
-        bind_first_extra_to_results_of_ops=None,
-        bind_second_extra_to_ops=None,
-        bind_second_extra_to_params=None,
-        bind_second_extra_to_results_of_ops=None,
         debug_payload_root_tag=None,
-        debug_transform_root_tag=None,
-        enable_expensive_checks=None,
-        transform_file_name=None,
-        test_module_generation=None,
+        disable_expensive_checks=None,
+        entry_point=None,
     ):
-        if bind_first_extra_to_ops is not None and isinstance(
-            bind_first_extra_to_ops, (list, tuple)
-        ):
-            bind_first_extra_to_ops = ",".join(map(str, bind_first_extra_to_ops))
-        if bind_first_extra_to_params is not None and isinstance(
-            bind_first_extra_to_params, (list, tuple)
-        ):
-            bind_first_extra_to_params = ",".join(map(str, bind_first_extra_to_params))
-        if bind_first_extra_to_results_of_ops is not None and isinstance(
-            bind_first_extra_to_results_of_ops, (list, tuple)
-        ):
-            bind_first_extra_to_results_of_ops = ",".join(
-                map(str, bind_first_extra_to_results_of_ops)
-            )
-        if bind_second_extra_to_ops is not None and isinstance(
-            bind_second_extra_to_ops, (list, tuple)
-        ):
-            bind_second_extra_to_ops = ",".join(map(str, bind_second_extra_to_ops))
-        if bind_second_extra_to_params is not None and isinstance(
-            bind_second_extra_to_params, (list, tuple)
-        ):
-            bind_second_extra_to_params = ",".join(
-                map(str, bind_second_extra_to_params)
-            )
-        if bind_second_extra_to_results_of_ops is not None and isinstance(
-            bind_second_extra_to_results_of_ops, (list, tuple)
-        ):
-            bind_second_extra_to_results_of_ops = ",".join(
-                map(str, bind_second_extra_to_results_of_ops)
-            )
-        if debug_payload_root_tag is not None and isinstance(
-            debug_payload_root_tag, (list, tuple)
-        ):
-            debug_payload_root_tag = ",".join(map(str, debug_payload_root_tag))
-        if debug_transform_root_tag is not None and isinstance(
-            debug_transform_root_tag, (list, tuple)
-        ):
-            debug_transform_root_tag = ",".join(map(str, debug_transform_root_tag))
-        if transform_file_name is not None and isinstance(
-            transform_file_name, (list, tuple)
-        ):
-            transform_file_name = ",".join(map(str, transform_file_name))
-        if test_module_generation is not None and isinstance(
-            test_module_generation, (list, tuple)
-        ):
-            test_module_generation = ",".join(map(str, test_module_generation))
         return self.add_pass(
-            "test-transform-dialect-interpreter",
-            bind_first_extra_to_ops=bind_first_extra_to_ops,
-            bind_first_extra_to_params=bind_first_extra_to_params,
-            bind_first_extra_to_results_of_ops=bind_first_extra_to_results_of_ops,
-            bind_second_extra_to_ops=bind_second_extra_to_ops,
-            bind_second_extra_to_params=bind_second_extra_to_params,
-            bind_second_extra_to_results_of_ops=bind_second_extra_to_results_of_ops,
+            "transform-interpreter",
             debug_payload_root_tag=debug_payload_root_tag,
-            debug_transform_root_tag=debug_transform_root_tag,
-            enable_expensive_checks=enable_expensive_checks,
-            transform_file_name=transform_file_name,
-            test_module_generation=test_module_generation,
+            disable_expensive_checks=disable_expensive_checks,
+            entry_point=entry_point,
         )
 
     ############################
diff --git a/tests/test_transform.py b/tests/test_transform.py
index fa28692..9276200 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -10,7 +10,6 @@
     apply_patterns_canonicalization,
     apply_cse,
     any_op_t,
-    FailurePropagationMode,
 )
 from mlir.dialects.transform.extras import named_sequence, apply_patterns, sequence
 from mlir.dialects.transform.loop import loop_unroll
@@ -32,6 +31,7 @@
     tile,
     tile_to_scf_forall,
 )
+from mlir.extras.meta import region_op
 from mlir.extras.runtime.passes import run_pipeline, Pipeline
 
 # noinspection PyUnresolvedReferences
@@ -53,7 +53,7 @@ def loop_unroll_op():
     @module(attrs={"transform.with_named_sequence": UnitAttr.get()})
     def mod():
         @named_sequence("__transform_main", [any_op_t()], [])
-        def basic(target: any_op_t()):
+        def basic(target):
             m = structured_match(any_op_t(), target, ops=["arith.addi"])
             loop = get_parent_op(pdl.op_t(), m, op_name="scf.for")
             loop_unroll(loop, 4)
@@ -83,7 +83,7 @@ def basic(target: any_op_t()):
     )
     filecheck(correct, ctx.module)
 
-    mod = run_pipeline(ctx.module, Pipeline().add_pass("transform-interpreter"))
+    mod = run_pipeline(ctx.module, Pipeline().transform_interpreter())
 
     correct = dedent(
         """\
@@ -130,10 +130,12 @@ def pad_(i: T.index(), j: T.index()):
 
     pad_tensor_3_4.emit()
 
-    @sequence([], FailurePropagationMode.Propagate, [])
-    def basic(target):
-        m = match(target, ["tensor.pad"])
-        tiled_linalg_op, loops = tile(m, sizes=[2, 3])
+    @module(attrs={"transform.with_named_sequence": UnitAttr.get()})
+    def mod():
+        @named_sequence("__transform_main", [any_op_t()], [])
+        def basic(target):
+            m = match(target, ["tensor.pad"])
+            tiled_linalg_op, loops = tile(m, sizes=[2, 3])
 
     correct = dedent(
         """\
@@ -145,22 +147,21 @@ def basic(target):
         } : tensor<4x16xf32> to tensor<12x23xf32>
         return %padded : tensor<12x23xf32>
       }
-      transform.sequence  failures(propagate) attributes {transform.target_tag = "basic"} {
-      ^bb0(%arg0: !pdl.operation):
-        %0 = transform.structured.match ops{["tensor.pad"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        %tiled_linalg_op, %loops:2 = transform.structured.tile_using_for %0[2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      module attributes {transform.with_named_sequence} {
+        transform.named_sequence @__transform_main(%arg0: !transform.any_op) {
+          %0 = transform.structured.match ops{["tensor.pad"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          %tiled_linalg_op, %loops:2 = transform.structured.tile_using_for %0[2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+          transform.yield 
+        }
       }
     }
     """
     )
     filecheck(correct, ctx.module)
 
-    module = run_pipeline(
+    mod = run_pipeline(
         ctx.module,
-        Pipeline()
-        .add_pass("test-transform-dialect-interpreter")
-        .add_pass("test-transform-dialect-erase-schedule")
-        .canonicalize(),
+        Pipeline().transform_interpreter().canonicalize(),
     )
     correct = dedent(
         """\
@@ -228,7 +229,7 @@ def basic(target):
     }
     """
     )
-    filecheck(correct, module)
+    filecheck(correct, mod)
 
 
 def test_linalg_tile(ctx: MLIRContext):
@@ -243,10 +244,12 @@ def matmul(
 
     matmul.emit()
 
-    @sequence(target_tag="basic")
-    def basic(target):
-        m = match(target, ["linalg.matmul"])
-        tiled_linalg_op, loops = tile(m, sizes=[2, 3])
+    @module(attrs={"transform.with_named_sequence": UnitAttr.get()})
+    def mod():
+        @named_sequence("__transform_main", [any_op_t()], [])
+        def basic(target):
+            m = match(target, ["linalg.matmul"])
+            tiled_linalg_op, loops = tile(m, sizes=[2, 3])
 
     correct = dedent(
         """\
@@ -255,22 +258,21 @@ def basic(target):
         %0 = linalg.matmul {cast = #linalg.type_fn<cast_signed>} ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>) outs(%arg2 : tensor<4x8xf32>) -> tensor<4x8xf32>
         return %0 : tensor<4x8xf32>
       }
-      transform.sequence  failures(propagate) attributes {transform.target_tag = "basic"} {
-      ^bb0(%arg0: !pdl.operation):
-        %0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        %tiled_linalg_op, %loops:2 = transform.structured.tile_using_for %0[2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      module attributes {transform.with_named_sequence} {
+        transform.named_sequence @__transform_main(%arg0: !transform.any_op) {
+          %0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          %tiled_linalg_op, %loops:2 = transform.structured.tile_using_for %0[2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+          transform.yield 
+        }
       }
     }
     """
     )
     filecheck(correct, ctx.module)
 
-    module = run_pipeline(
+    mod = run_pipeline(
         ctx.module,
-        Pipeline()
-        .add_pass("test-transform-dialect-interpreter")
-        .add_pass("test-transform-dialect-erase-schedule")
-        .canonicalize(),
+        Pipeline().transform_interpreter().canonicalize(),
     )
 
     correct = dedent(
@@ -300,7 +302,7 @@ def basic(target):
     }
     """
     )
-    filecheck(correct, module)
+    filecheck(correct, mod)
 
 
 def test_simple_matmul_tile_foreach_thread(ctx: MLIRContext):
@@ -315,10 +317,12 @@ def matmul(
 
     matmul.emit()
 
-    @sequence(target_tag="basic")
-    def basic(target):
-        m = match(target, ["linalg.matmul"])
-        tiled_linalg_op, loops = tile_to_scf_forall(m, tile_sizes=[2, 3])
+    @module(attrs={"transform.with_named_sequence": UnitAttr.get()})
+    def mod():
+        @named_sequence("__transform_main", [any_op_t()], [])
+        def basic(target):
+            m = match(target, ["linalg.matmul"])
+            tiled_linalg_op, loops = tile_to_scf_forall(m, tile_sizes=[2, 3])
 
     correct = dedent(
         """\
@@ -327,22 +331,21 @@ def basic(target):
         %0 = linalg.matmul {cast = #linalg.type_fn<cast_signed>} ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>) outs(%arg2 : tensor<4x8xf32>) -> tensor<4x8xf32>
         return %0 : tensor<4x8xf32>
       }
-      transform.sequence  failures(propagate) attributes {transform.target_tag = "basic"} {
-      ^bb0(%arg0: !pdl.operation):
-        %0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        %tiled_op, %forall_op = transform.structured.tile_using_forall %0 tile_sizes [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+      module attributes {transform.with_named_sequence} {
+        transform.named_sequence @__transform_main(%arg0: !transform.any_op) {
+          %0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          %tiled_op, %forall_op = transform.structured.tile_using_forall %0 tile_sizes [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+          transform.yield 
+        }
       }
     }
     """
     )
     filecheck(correct, ctx.module)
 
-    module = run_pipeline(
+    mod = run_pipeline(
         ctx.module,
-        Pipeline()
-        .add_pass("test-transform-dialect-interpreter")
-        .add_pass("test-transform-dialect-erase-schedule")
-        .canonicalize(),
+        Pipeline().transform_interpreter().canonicalize(),
     )
 
     correct = dedent(
@@ -374,7 +377,7 @@ def basic(target):
     """
     )
 
-    filecheck(correct, module)
+    filecheck(correct, mod)
 
 
 def test_common_extension_sugar(ctx: MLIRContext):
@@ -387,13 +390,15 @@ def select_cmp_eq_select(arg0: T.i64(), arg1: T.i64()):
 
     select_cmp_eq_select.emit()
 
-    @sequence(target_tag="basic")
-    def basic(target):
-        m = match(target, ["func.func"])
+    @module(attrs={"transform.with_named_sequence": UnitAttr.get()})
+    def mod():
+        @named_sequence("__transform_main", [any_op_t()], [])
+        def basic(target):
+            m = match(target, ["func.func"])
 
-        @apply_patterns(m)
-        def pats():
-            apply_patterns_canonicalization()
+            @apply_patterns(m)
+            def pats():
+                apply_patterns_canonicalization()
 
     correct = dedent(
         """\
@@ -403,24 +408,23 @@ def pats():
         %1 = arith.select %0, %arg0, %arg1 : i64
         return %1 : i64
       }
-      transform.sequence  failures(propagate) attributes {transform.target_tag = "basic"} {
-      ^bb0(%arg0: !pdl.operation):
-        %0 = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        apply_patterns to %0 {
-          transform.apply_patterns.canonicalization
-        } : !transform.any_op
+      module attributes {transform.with_named_sequence} {
+        transform.named_sequence @__transform_main(%arg0: !transform.any_op) {
+          %0 = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          transform.apply_patterns to %0 {
+            transform.apply_patterns.canonicalization
+          } : !transform.any_op
+          transform.yield 
+        }
       }
     }
     """
     )
     filecheck(correct, ctx.module)
 
-    module = run_pipeline(
+    mod = run_pipeline(
         ctx.module,
-        Pipeline()
-        .add_pass("test-transform-dialect-interpreter")
-        .add_pass("test-transform-dialect-erase-schedule")
-        .canonicalize(),
+        Pipeline().transform_interpreter().canonicalize(),
     )
 
     correct = dedent(
@@ -433,7 +437,7 @@ def pats():
     """
     )
 
-    filecheck(correct, module)
+    filecheck(correct, mod)
 
 
 def test_apply_cse(ctx: MLIRContext):
@@ -450,22 +454,24 @@ def matmul(
 
     matmul.emit()
 
-    @sequence(target_tag="basic")
-    def basic(variant_op):
-        matmul = match(variant_op, ["linalg.matmul"])
+    @module(attrs={"transform.with_named_sequence": UnitAttr.get()})
+    def mod():
+        @named_sequence("__transform_main", [any_op_t()], [])
+        def basic(variant_op):
+            matmul = match(variant_op, ["linalg.matmul"])
 
-        forall_op, tiled_generic = tile_to_scf_forall(
-            matmul, tile_sizes=[2], mapping=[block_attr(MappingId.DimX)]
-        )
+            forall_op, tiled_generic = tile_to_scf_forall(
+                matmul, tile_sizes=[2], mapping=[block_attr(MappingId.DimX)]
+            )
 
-        top_func = match(variant_op, ["func.func"])
+            top_func = match(variant_op, ["func.func"])
 
-        @apply_patterns(top_func)
-        def pats():
-            apply_patterns_canonicalization()
+            @apply_patterns(top_func)
+            def pats():
+                apply_patterns_canonicalization()
 
-        top_func = match(variant_op, ["func.func"])
-        apply_cse(top_func)
+            top_func = match(variant_op, ["func.func"])
+            apply_cse(top_func)
 
     ctx.module.operation.verify()
     correct = dedent(
@@ -475,28 +481,27 @@ def pats():
         %0 = linalg.matmul {cast = #linalg.type_fn<cast_signed>} ins(%arg0, %arg1 : tensor<3x5xf32>, tensor<5x3xf32>) outs(%arg2 : tensor<3x3xf32>) -> tensor<3x3xf32>
         return %0 : tensor<3x3xf32>
       }
-      transform.sequence  failures(propagate) attributes {transform.target_tag = "basic"} {
-      ^bb0(%arg0: !pdl.operation):
-        %0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        %tiled_op, %forall_op = transform.structured.tile_using_forall %0 tile_sizes [2](mapping = [#gpu.block<x>]) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-        %1 = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        apply_patterns to %1 {
-          transform.apply_patterns.canonicalization
-        } : !transform.any_op
-        %2 = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        apply_cse to %2 : !transform.any_op
+      module attributes {transform.with_named_sequence} {
+        transform.named_sequence @__transform_main(%arg0: !transform.any_op) {
+          %0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          %tiled_op, %forall_op = transform.structured.tile_using_forall %0 tile_sizes [2](mapping = [#gpu.block<x>]) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+          %1 = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          transform.apply_patterns to %1 {
+            transform.apply_patterns.canonicalization
+          } : !transform.any_op
+          %2 = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          transform.apply_cse to %2 : !transform.any_op
+          transform.yield 
+        }
       }
     }
     """
     )
     filecheck(correct, ctx.module)
 
-    module = run_pipeline(
+    mod = run_pipeline(
         ctx.module,
-        Pipeline()
-        .transform_dialect_interpreter()
-        .transform_dialect_erase_schedule()
-        .canonicalize(),
+        Pipeline().transform_interpreter().canonicalize(),
     )
 
     correct = dedent(
@@ -520,7 +525,7 @@ def pats():
     }
     """
     )
-    filecheck(correct, module)
+    filecheck(correct, mod)
 
 
 def test_two_schedules(ctx: MLIRContext):
@@ -539,31 +544,33 @@ def conv_2d_nhwc_hwcf(
 
     conv_2d_nhwc_hwcf.emit()
 
-    @sequence(target_tag="tile_outer")
-    def tile_outer(target):
-        m = match(target, ["linalg.conv_2d_nchw_fchw"])
-        tiled = tile_to_scf_forall(
-            m,
-            tile_sizes=[0, 1, 8, 8],
-            mapping=[
-                block_attr(MappingId.DimX),
-                block_attr(MappingId.DimY),
-                block_attr(MappingId.DimZ),
-            ],
-        )
-
-    @sequence(target_tag="tile_inner")
-    def tile_inner(target):
-        m = match(target, ["linalg.conv_2d_nchw_fchw"])
-        tiled = tile_to_scf_forall(
-            m,
-            tile_sizes=[0, 1, 1, 1],
-            mapping=[
-                thread_attr(MappingId.DimX),
-                thread_attr(MappingId.DimY),
-                thread_attr(MappingId.DimZ),
-            ],
-        )
+    @module(attrs={"transform.with_named_sequence": UnitAttr.get()})
+    def mod():
+        @named_sequence("tile_outer", [any_op_t()], [])
+        def tile_outer(target):
+            m = match(target, ["linalg.conv_2d_nchw_fchw"])
+            tiled = tile_to_scf_forall(
+                m,
+                tile_sizes=[0, 1, 8, 8],
+                mapping=[
+                    block_attr(MappingId.DimX),
+                    block_attr(MappingId.DimY),
+                    block_attr(MappingId.DimZ),
+                ],
+            )
+
+        @named_sequence("tile_inner", [any_op_t()], [])
+        def tile_inner(target):
+            m = match(target, ["linalg.conv_2d_nchw_fchw"])
+            tiled = tile_to_scf_forall(
+                m,
+                tile_sizes=[0, 1, 1, 1],
+                mapping=[
+                    thread_attr(MappingId.DimX),
+                    thread_attr(MappingId.DimY),
+                    thread_attr(MappingId.DimZ),
+                ],
+            )
 
     correct = dedent(
         """\
@@ -572,27 +579,28 @@ def tile_inner(target):
         %0 = linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : tensor<1x1x66x66xf32>, tensor<3x1x3x3xf32>) outs(%arg2 : tensor<1x3x64x64xf32>) -> tensor<1x3x64x64xf32>
         return %0 : tensor<1x3x64x64xf32>
       }
-      transform.sequence  failures(propagate) attributes {transform.target_tag = "tile_outer"} {
-      ^bb0(%arg0: !pdl.operation):
-        %0 = transform.structured.match ops{["linalg.conv_2d_nchw_fchw"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        %tiled_op, %forall_op = transform.structured.tile_using_forall %0 tile_sizes [0, 1, 8, 8](mapping = [#gpu.block<x>, #gpu.block<y>, #gpu.block<z>]) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-      }
-      transform.sequence  failures(propagate) attributes {transform.target_tag = "tile_inner"} {
-      ^bb0(%arg0: !pdl.operation):
-        %0 = transform.structured.match ops{["linalg.conv_2d_nchw_fchw"]} in %arg0 : (!pdl.operation) -> !transform.any_op
-        %tiled_op, %forall_op = transform.structured.tile_using_forall %0 tile_sizes [0, 1, 1, 1](mapping = [#gpu.thread<x>, #gpu.thread<y>, #gpu.thread<z>]) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+      module attributes {transform.with_named_sequence} {
+        transform.named_sequence @tile_outer(%arg0: !transform.any_op) {
+          %0 = transform.structured.match ops{["linalg.conv_2d_nchw_fchw"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          %tiled_op, %forall_op = transform.structured.tile_using_forall %0 tile_sizes [0, 1, 8, 8](mapping = [#gpu.block<x>, #gpu.block<y>, #gpu.block<z>]) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+          transform.yield 
+        }
+        transform.named_sequence @tile_inner(%arg0: !transform.any_op) {
+          %0 = transform.structured.match ops{["linalg.conv_2d_nchw_fchw"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+          %tiled_op, %forall_op = transform.structured.tile_using_forall %0 tile_sizes [0, 1, 1, 1](mapping = [#gpu.thread<x>, #gpu.thread<y>, #gpu.thread<z>]) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+          transform.yield 
+        }
       }
     }
     """
     )
     filecheck(correct, ctx.module)
 
-    module = run_pipeline(
+    mod = run_pipeline(
         ctx.module,
         Pipeline()
-        .transform_dialect_interpreter(debug_transform_root_tag="tile_outer")
-        .transform_dialect_interpreter(debug_transform_root_tag="tile_inner")
-        .transform_dialect_erase_schedule()
+        .transform_interpreter(entry_point="tile_outer")
+        .transform_interpreter(entry_point="tile_inner")
         .canonicalize(),
     )
 
@@ -630,4 +638,4 @@ def tile_inner(target):
     """
     )
 
-    filecheck(correct, module)
+    filecheck(correct, mod)

From ef96e7b6de0e44b2034ef86f1ecb09ca95de3c10 Mon Sep 17 00:00:00 2001
From: max <maksim.levental@gmail.com>
Date: Fri, 22 Dec 2023 21:08:25 -0600
Subject: [PATCH 3/3] fix paralle/reduce

---
 mlir/extras/dialects/ext/scf.py | 28 +++++++++----
 tests/test_scf.py               | 74 +++++++++++++++++++++++++++------
 tests/test_transform.py         |  1 -
 3 files changed, 81 insertions(+), 22 deletions(-)

diff --git a/mlir/extras/dialects/ext/scf.py b/mlir/extras/dialects/ext/scf.py
index 77f0189..3d6854e 100644
--- a/mlir/extras/dialects/ext/scf.py
+++ b/mlir/extras/dialects/ext/scf.py
@@ -292,7 +292,9 @@ def induction_variables(self):
         return self.body.arguments
 
 
-parange_ = region_op(_parfor(ParallelOp), terminator=yield__)
+parange_ = region_op(
+    _parfor(ParallelOp), terminator=lambda xs: reduce_return(xs[0]) if xs else None
+)
 parange = _parfor_cm(ParallelOp)
 
 
@@ -332,20 +334,30 @@ def while__(cond: Value, *, loc=None, ip=None):
 
 
 class ReduceOp(ReduceOp):
-    def __init__(self, operand, *, loc=None, ip=None):
-        super().__init__(operand, loc=loc, ip=ip)
-        self.regions[0].blocks.append(operand.type, operand.type)
+    def __init__(self, operands, num_reductions, *, loc=None, ip=None):
+        super().__init__(operands, num_reductions, loc=loc, ip=ip)
+        for i in range(num_reductions):
+            self.regions[i].blocks.append(operands[i].type, operands[i].type)
 
 
-def reduce_(operand, *, loc=None, ip=None):
-    if loc is None:
-        loc = get_user_code_loc()
-    return ReduceOp(operand, loc=loc, ip=ip)
+def reduce_(*operands, num_reductions=1):
+    loc = get_user_code_loc()
+    return ReduceOp(operands, num_reductions, loc=loc)
 
 
 reduce = region_op(reduce_, terminator=lambda xs: reduce_return(*xs))
 
 
+@region_adder(terminator=lambda xs: reduce_return(*xs))
+def reduce2(reduce_op):
+    return reduce_op.regions[1]
+
+
+@region_adder(terminator=lambda xs: reduce_return(*xs))
+def reduce3(reduce_op):
+    return reduce_op.regions[2]
+
+
 def yield_(*args):
     if len(args) == 1 and isinstance(args[0], (list, OpResultList)):
         args = list(args[0])
diff --git a/tests/test_scf.py b/tests/test_scf.py
index 8308e9d..86e1c1e 100644
--- a/tests/test_scf.py
+++ b/tests/test_scf.py
@@ -24,6 +24,8 @@
     while__,
     while___,
     placeholder_opaque_t,
+    reduce2,
+    reduce3,
 )
 from mlir.extras.dialects.ext.tensor import empty, Tensor
 from mlir.dialects.memref import alloca_scope_return
@@ -2429,6 +2431,7 @@ def forfoo(i, j, shared_outs):
     filecheck(correct, ctx.module)
 
 
+@pytest.mark.xfail
 def test_parange_no_inits(ctx: MLIRContext):
     ten = empty((10, 10), T.i32())
 
@@ -2494,12 +2497,12 @@ def test_forall_insert_slice_no_region_with_for(ctx: MLIRContext):
     filecheck(correct, ctx.module)
 
 
+@pytest.mark.xfail
 def test_parange_no_inits_with_for(ctx: MLIRContext):
     ten = empty((10, 10), T.i32())
 
     for i, j in parange([1, 1], [2, 2], [3, 3], inits=[]):
         one = constant(1.0)
-        yield_()
 
     ctx.module.operation.verify()
     correct = dedent(
@@ -2535,8 +2538,6 @@ def res(lhs: Tensor, rhs: Tensor):
             assert isinstance(rhs, Tensor)
             return lhs + rhs
 
-        yield_()
-
     ctx.module.operation.verify()
     correct = dedent(
         """\
@@ -2551,12 +2552,11 @@ def res(lhs: Tensor, rhs: Tensor):
       %1 = scf.parallel (%arg0, %arg1) = (%c1, %c1_0) to (%c2, %c2_1) step (%c3, %c3_2) init (%0) -> tensor<10x10xi32> {
         %cst = arith.constant 1.000000e+00 : f32
         %2 = tensor.empty() : tensor<10x10xi32>
-        scf.reduce(%2)  : tensor<10x10xi32> {
+        scf.reduce(%2 : tensor<10x10xi32>) {
         ^bb0(%arg2: tensor<10x10xi32>, %arg3: tensor<10x10xi32>):
           %3 = arith.addi %arg2, %arg3 : tensor<10x10xi32>
           scf.reduce.return %3 : tensor<10x10xi32>
         }
-        scf.yield
       }
     }
     """
@@ -2569,16 +2569,14 @@ def test_parange_inits_with_for_with_two_reduce(ctx: MLIRContext):
 
     for i, j in parange([1, 1], [2, 2], [3, 3], inits=[one, one]):
 
-        @reduce(i)
+        @reduce(i, j, num_reductions=2)
         def res1(lhs: T.index(), rhs: T.index()):
             return lhs + rhs
 
-        @reduce(j)
+        @reduce2(res1)
         def res1(lhs: T.index(), rhs: T.index()):
             return lhs + rhs
 
-        yield_()
-
     ctx.module.operation.verify()
     correct = dedent(
         """\
@@ -2591,17 +2589,67 @@ def res1(lhs: T.index(), rhs: T.index()):
       %c3 = arith.constant 3 : index
       %c3_3 = arith.constant 3 : index
       %0:2 = scf.parallel (%arg0, %arg1) = (%c1_0, %c1_1) to (%c2, %c2_2) step (%c3, %c3_3) init (%c1, %c1) -> (index, index) {
-        scf.reduce(%arg0)  : index {
+        scf.reduce(%arg0, %arg1 : index, index) {
         ^bb0(%arg2: index, %arg3: index):
           %1 = arith.addi %arg2, %arg3 : index
           scf.reduce.return %1 : index
-        }
-        scf.reduce(%arg1)  : index {
+        }, {
         ^bb0(%arg2: index, %arg3: index):
           %1 = arith.addi %arg2, %arg3 : index
           scf.reduce.return %1 : index
         }
-        scf.yield
+      }
+    }
+    """
+    )
+    filecheck(correct, ctx.module)
+
+
+def test_parange_inits_with_for_with_three_reduce(ctx: MLIRContext):
+    one = constant(1, index=True)
+
+    for i, j, k in parange([1, 1, 1], [2, 2, 2], [3, 3, 3], inits=[one, one, one]):
+
+        @reduce(i, j, k, num_reductions=3)
+        def res1(lhs: T.index(), rhs: T.index()):
+            return lhs + rhs
+
+        @reduce2(res1)
+        def res1(lhs: T.index(), rhs: T.index()):
+            return lhs + rhs
+
+        @reduce3(res1)
+        def res2(lhs: T.index(), rhs: T.index()):
+            return lhs + rhs
+
+    ctx.module.operation.verify()
+    correct = dedent(
+        """\
+    module {
+      %c1 = arith.constant 1 : index
+      %c1_0 = arith.constant 1 : index
+      %c1_1 = arith.constant 1 : index
+      %c1_2 = arith.constant 1 : index
+      %c2 = arith.constant 2 : index
+      %c2_3 = arith.constant 2 : index
+      %c2_4 = arith.constant 2 : index
+      %c3 = arith.constant 3 : index
+      %c3_5 = arith.constant 3 : index
+      %c3_6 = arith.constant 3 : index
+      %0:3 = scf.parallel (%arg0, %arg1, %arg2) = (%c1_0, %c1_1, %c1_2) to (%c2, %c2_3, %c2_4) step (%c3, %c3_5, %c3_6) init (%c1, %c1, %c1) -> (index, index, index) {
+        scf.reduce(%arg0, %arg1, %arg2 : index, index, index) {
+        ^bb0(%arg3: index, %arg4: index):
+          %1 = arith.addi %arg3, %arg4 : index
+          scf.reduce.return %1 : index
+        }, {
+        ^bb0(%arg3: index, %arg4: index):
+          %1 = arith.addi %arg3, %arg4 : index
+          scf.reduce.return %1 : index
+        }, {
+        ^bb0(%arg3: index, %arg4: index):
+          %1 = arith.addi %arg3, %arg4 : index
+          scf.reduce.return %1 : index
+        }
       }
     }
     """
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 9276200..e0b9f9e 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -31,7 +31,6 @@
     tile,
     tile_to_scf_forall,
 )
-from mlir.extras.meta import region_op
 from mlir.extras.runtime.passes import run_pipeline, Pipeline
 
 # noinspection PyUnresolvedReferences