nod-ai
diff --git a/‎compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.cpp
Lines changed: 63 additions & 75 deletions b/‎compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.cpp
Lines changed: 63 additions & 75 deletions
diff --git a/‎compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.h
Lines changed: 62 additions & 9 deletions b/‎compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.h
Lines changed: 62 additions & 9 deletions
@@ -17,31 +17,8 @@
 
 namespace mlir::iree_compiler::AMDAIE {
 
-namespace {
+namespace detail {
 
-/// Update the strides and offsets of `X` to match the strides of `Y` if it is
-/// possible to do so without changing the underlying access pattern of `X`. For
-/// example if
-///
-/// X has access pattern (offset: [5] sizes: [1] strides: [6]) and
-/// Y has access pattern (offset: [a] sizes: [b] strides: [3])
-///
-/// Then the access pattern for X can be changed to have access pattern
-/// (offset: [10] sizes: [1] strides: [3]) so that its stride matches Y's.
-///
-/// For this transformation to be possible in dimension `d` is it necessary that
-///
-/// 1) the size in dimension `d` of `X` is 1, and
-/// 2) the updated offset in `d` of `X` (i.e. offset * strideX / strideY)
-///    is an integer.
-///
-/// As another example, if we have:
-///
-/// X with access pattern (offset: [4, 8]  sizes: [1, 1] strides: [3, 8])
-/// Y with access pattern (offset: [a, b]  sizes: [d, e] strides: [6, 2])
-///
-/// then X can be transformed to have access pattern
-///                       (offset: [2, 32] sizes: [1, 1] strides: [6, 2])
 void matchStridesOfUnitDims(MLIRContext *ctx, ArrayRef<OpFoldResult> sizesX,
                             SmallVector<OpFoldResult> &stridesX,
                             SmallVector<OpFoldResult> &offsetsX,
@@ -70,33 +47,6 @@ void matchStridesOfUnitDims(MLIRContext *ctx, ArrayRef<OpFoldResult> sizesX,
   }
 }
 
-/// This function computes the difference between the global offsets of two
-/// access patterns. If it is not constant, i.e. if the difference contains
-/// an MLIR value which is not a constant, then nullopt is returned.
-///
-/// This function is useful when determining if the access pattern A, followed
-/// by the access pattern B, can be merged into a single access pattern.
-///
-/// \return global_offset(X) - global_offset(Y).
-///
-/// Background info: offsets, sizes, and strides define an access pattern into
-/// an array, where the i'th element accessed, for 0 <= i < prod_{d<D} sizes[d],
-/// is at index
-///
-///     sum_{d<D} (l(d,i) + offset[d]) * stride[d]                (1)
-///
-/// where l(d,i) is the component of global index `i` in dimension `d`:
-///
-///     i  = sum_{d<D} l(d,i) * size[d]                           (2)
-///
-/// Equation (1) can be rewritten with a global offset as
-///
-///     global_offset + sum_{d<D} l(d,i) * stride[d]              (3)
-///
-/// where the global offset is
-///
-///     global_offset = sum_{d<D} offset[d] * stride[d].
-///
 std::optional<int64_t> getGlobalOffsetDifference(
     ArrayRef<OpFoldResult> offsetsX, ArrayRef<OpFoldResult> stridesX,
     ArrayRef<OpFoldResult> offsetsY, ArrayRef<OpFoldResult> stridesY) {
@@ -108,9 +58,23 @@ std::optional<int64_t> getGlobalOffsetDifference(
          "expected same number of offsets for X and Y");
 
   int64_t globalOffsetDifference{0};
+
+  // In this function we're computing the constant globalOffsetDifference:
+  //
+  //    sum_{d} offsetsA[d] * stridesA[d]  -
+  //    sum_{d} offsetsB[d] * stridesB[d] .
+  //
+  // If all values in offsetsA, offsetsB, stridesA, stridesB are constant,
+  // this is straightforward. If not, we need all the non-constant terms to
+  // cancel. In the maps below, we store the terms with non-constants, and then
+  // check that they've all cancelled at the end. In `valToConst` we store terms
+  // where one of offset and stride is constant, and the other is not. In
+  // valPairs, we keep track of all the terms where neither the stride nor the
+  // offset is constant.
   DenseMap<Value, int64_t> valToConst;
+  DenseMap<std::pair<Value, Value>, int64_t> valPairs;
 
-  auto increment = [&](Value v, int64_t signedStride) {
+  auto incrementValConst = [&](Value v, int64_t signedStride) {
     auto iter = valToConst.find(v);
     if (iter == valToConst.end()) {
       valToConst[v] = signedStride;
@@ -119,43 +83,65 @@ std::optional<int64_t> getGlobalOffsetDifference(
     }
   };
 
+  auto incrementValVal = [&](Value v0, Value v1, int64_t sign) {
+    std::pair<Value, Value> p0(v0, v1);
+    auto iter0 = valPairs.find(p0);
+    if (iter0 != valPairs.end()) {
+      iter0->second += sign;
+      return;
+    }
+
+    std::pair<Value, Value> p1(v1, v0);
+    auto iter1 = valPairs.find(p1);
+    if (iter1 != valPairs.end()) {
+      iter1->second += sign;
+      return;
+    }
+    valPairs.insert({p0, sign});
+  };
+
+  // Add the term `offset * stride * sign` to the global offset different,
+  // triaging the different combinations of constant/non-constant.
   auto updateGlobalOffsetDifference = [&](OpFoldResult offset,
                                           OpFoldResult stride, int64_t sign) {
-    std::optional<int64_t> o = getConstantIntValue(offset);
-    std::optional<int64_t> s = getConstantIntValue(stride);
-    if (!o.has_value() && !s.has_value()) {
-      // The case where both the stride and offset are non-constant can be
-      // handled, but it'll add more complexity so I'm ignoring for now.
-      return false;
-    } else if (o.has_value() && s.has_value()) {
-      globalOffsetDifference += sign * o.value() * s.value();
-    } else if (o.has_value()) {
-      increment(cast<Value>(stride), sign * o.value());
-    } else if (s.has_value()) {
-      increment(cast<Value>(offset), sign * s.value());
+    std::optional<int64_t> cOffset = getConstantIntValue(offset);
+    std::optional<int64_t> cStride = getConstantIntValue(stride);
+    Value vOffset = dyn_cast<Value>(offset);
+    Value vStride = dyn_cast<Value>(stride);
+
+    if (!cOffset.has_value() && !cStride.has_value()) {
+      incrementValVal(vOffset, vStride, sign);
+    } else if (cOffset.has_value() && cStride.has_value()) {
+      globalOffsetDifference += sign * cOffset.value() * cStride.value();
+    } else if (cOffset.has_value()) {
+      incrementValConst(cast<Value>(stride), sign * cOffset.value());
+    } else if (cStride.has_value()) {
+      incrementValConst(cast<Value>(offset), sign * cStride.value());
     }
-    return true;
   };
 
   for (uint32_t i = 0; i < offsetsX.size(); ++i) {
     // If offsets and strides are the same, the contribution to the global
     // offset difference is zero, so we can skip this dimension.
     if (offsetsX[i] == offsetsY[i] && stridesX[i] == stridesY[i]) continue;
-
-    if (updateGlobalOffsetDifference(offsetsX[i], stridesX[i], 1) == false)
-      return std::nullopt;
-    if (updateGlobalOffsetDifference(offsetsY[i], stridesY[i], -1) == false)
-      return std::nullopt;
+    updateGlobalOffsetDifference(offsetsX[i], stridesX[i], 1);
+    updateGlobalOffsetDifference(offsetsY[i], stridesY[i], -1);
   }
 
+  // The cases where the non-constant terms did not all cancel, and so the
+  // global offset difference could not be determined to be constant.
   for (auto [offset, stride] : valToConst) {
-    // There is a non-constant offset with a stride that is not zero.
-    // This means that the global offset difference is not a constant.
     if (stride != 0) return std::nullopt;
   }
+  for (auto [valPair, valPairCount] : valPairs) {
+    if (valPairCount != 0) return std::nullopt;
+  }
 
   return globalOffsetDifference;
 }
+}  // namespace detail
+
+namespace {
 
 /// Consider 2 access patterns X and Y, where the access pattern for Y has one
 /// more dimension than the access pattern for X. This function inserts a
@@ -210,8 +196,8 @@ bool mergeInFirst(MLIRContext *ctx, SmallVector<OpFoldResult> &offsetsA,
     // dimensions of size 1, which is being ignored for now).
     return false;
   }
-  matchStridesOfUnitDims(ctx, sizesA, stridesA, offsetsA, stridesB);
-  matchStridesOfUnitDims(ctx, sizesB, stridesB, offsetsB, stridesA);
+  detail::matchStridesOfUnitDims(ctx, sizesA, stridesA, offsetsA, stridesB);
+  detail::matchStridesOfUnitDims(ctx, sizesB, stridesB, offsetsB, stridesA);
 
   // Check that strides and sizes are compatible for merging.
   if (stridesA != stridesB) return false;
@@ -221,7 +207,7 @@ bool mergeInFirst(MLIRContext *ctx, SmallVector<OpFoldResult> &offsetsA,
   }
 
   std::optional<int64_t> maybeOffsetDifference =
-      getGlobalOffsetDifference(offsetsB, stridesB, offsetsA, stridesA);
+      detail::getGlobalOffsetDifference(offsetsB, stridesB, offsetsA, stridesA);
 
   // The case where the global offset difference is not constant is difficult to
   // handle, unless we can prove that it is non-negative. Leaving this edge case
@@ -278,6 +264,8 @@ LogicalResult combineAccessPatterns(
   assert(offsetsB.size() == stridesB.size() &&
          "expected same number of source offsets and strides");
 
+  // Ensure that OpFoldResults are Attributes when they can be. Specifally
+  // this will replace arith.constant values with attributes.
   auto simplified =
       [&](ArrayRef<OpFoldResult> input) -> SmallVector<OpFoldResult> {
     SmallVector<OpFoldResult> x(input.begin(), input.end());
 
@@ -69,15 +69,68 @@ struct RetrieveScaleAndBias
   }
 };
 
-/// Check whether two access patterns are equal in value, starting from
-/// specified indices.
-bool areAccessPatternsEqualFromIndices(ArrayRef<OpFoldResult> offsetsA,
-                                       ArrayRef<OpFoldResult> sizesA,
-                                       ArrayRef<OpFoldResult> stridesA,
-                                       ArrayRef<OpFoldResult> offsetsB,
-                                       ArrayRef<OpFoldResult> sizesB,
-                                       ArrayRef<OpFoldResult> stridesB,
-                                       size_t indexA = 0, size_t indexB = 0);
+namespace detail {
+
+/// Update the strides and offsets of `X` to match the strides of `Y` if it is
+/// possible to do so without changing the underlying access pattern of `X`. For
+/// example if
+///
+/// X has access pattern (offset: [5] sizes: [1] strides: [6]) and
+/// Y has access pattern (offset: [a] sizes: [b] strides: [3])
+///
+/// Then the access pattern for X can be changed to have access pattern
+/// (offset: [10] sizes: [1] strides: [3]) so that its stride matches Y's.
+///
+/// For this transformation to be possible in dimension `d` is it necessary that
+///
+/// 1) the size in dimension `d` of `X` is 1, and
+/// 2) the updated offset in `d` of `X` (i.e. offset * strideX / strideY)
+///    is an integer.
+///
+/// As another example, if we have:
+///
+/// X with access pattern (offset: [4, 8]  sizes: [1, 1] strides: [3, 8])
+/// Y with access pattern (offset: [a, b]  sizes: [d, e] strides: [6, 2])
+///
+/// then X can be transformed to have access pattern
+///                       (offset: [2, 32] sizes: [1, 1] strides: [6, 2])
+void matchStridesOfUnitDims(MLIRContext *ctx, ArrayRef<OpFoldResult> sizesX,
+                            SmallVector<OpFoldResult> &stridesX,
+                            SmallVector<OpFoldResult> &offsetsX,
+                            ArrayRef<OpFoldResult> stridesY);
+
+/// This function computes the difference between the global offsets of two
+/// access patterns. If it is not constant, i.e. if the difference contains
+/// an MLIR value which is not a constant, then nullopt is returned.
+///
+/// This function is useful when determining if the access pattern A, followed
+/// by the access pattern B, can be merged into a single access pattern.
+///
+/// \return global_offset(X) - global_offset(Y).
+///
+/// Background info: offsets, sizes, and strides define an access pattern into
+/// an array, where the i'th element accessed, for 0 <= i < prod_{d<D} sizes[d],
+/// is at index
+///
+///     sum_{d<D} (l(d,i) + offset[d]) * stride[d]                (1)
+///
+/// where l(d,i) is the component of global index `i` in dimension `d`:
+///
+///     i  = sum_{d<D} l(d,i) * size[d]                           (2)
+///
+/// Equation (1) can be rewritten with a global offset as
+///
+///     global_offset + sum_{d<D} l(d,i) * stride[d]              (3)
+///
+/// where the global offset is
+///
+///     global_offset = sum_{d<D} offset[d] * stride[d].
+///
+std::optional<int64_t> getGlobalOffsetDifference(
+    ArrayRef<OpFoldResult> offsetsX, ArrayRef<OpFoldResult> stridesX,
+    ArrayRef<OpFoldResult> offsetsY, ArrayRef<OpFoldResult> stridesY);
+
+}  // namespace detail
 
 /// Combine two access patterns into a single one. Assumes that access pattern A
 /// belongs to a strided op which is ordered before the strided op B. Takes a