Move some strategy generation utilities from auto_sharding_dot_handler.cc to

Google-ML-Automation · Google-ML-Automation · commit 2fff53249ed4 · 2025-02-25T17:47:02.000-08:00
auto_sharding_strategy.h with the intention of using the utilities more broadly
throughout the codebase.

PiperOrigin-RevId: 731094359
diff --git a/xla/hlo/experimental/auto_sharding/auto_sharding_dot_handler.cc b/xla/hlo/experimental/auto_sharding/auto_sharding_dot_handler.cc
@@ -58,9 +58,6 @@ namespace xla {
 namespace spmd {
 namespace {
 
-using MeshDimSet = StableSet<int>;
-using DimMap = StableMap</*tensor dim*/ int, /*mesh dims*/ MeshDimSet>;
-
 // Contains base functionality common to both DotHandler and ConvHandler.
 class HandlerBase {
  protected:
@@ -145,33 +142,6 @@ class HandlerBase {
   std::optional<HloSharding> GetShardingFromUser(const HloSharding& lhs_spec,
                                                  const HloSharding& rhs_spec);
 
-  // Given a set of tensor dims, and a set of mesh dims, enumerates all mappings
-  // where a subset of all tensor dims is mapped to a subset of mesh dims, such
-  // that each tensor dim is mapped to at most mesh dim, and no two tensor dims
-  // are mapped to the same mesh dim.
-  void Enumerate(std::function<void(const DimMap&)> split_func, int tensor_rank,
-                 int current_mesh_dim_idx,
-                 const std::vector<int>& unassigned_mesh_dims,
-                 const DimMap& current_dim_map) {
-    if (current_mesh_dim_idx == unassigned_mesh_dims.size()) {
-      split_func(current_dim_map);
-      return;
-    }
-    // Current mesh dim is not assigned to any tensor dim
-    Enumerate(split_func, tensor_rank, current_mesh_dim_idx + 1,
-              unassigned_mesh_dims, current_dim_map);
-
-    for (int i = 0; i < tensor_rank; ++i) {
-      DimMap updated_dim_map = current_dim_map;
-      if (!updated_dim_map[i].empty() && !option_.allow_mixed_mesh_shape) {
-        continue;
-      }
-      updated_dim_map[i].insert(unassigned_mesh_dims[current_mesh_dim_idx]);
-      Enumerate(split_func, tensor_rank, current_mesh_dim_idx + 1,
-                unassigned_mesh_dims, updated_dim_map);
-    }
-  }
-
   bool IsMeshDimSetNonTrivial(const MeshDimSet& mesh_dim_set) {
     return absl::c_any_of(mesh_dim_set, [&](int mesh_dim) {
       return device_mesh_.dim(mesh_dim) > 1;
@@ -732,9 +702,8 @@ void DotHandler::GenerateDotShardingStrategiesFromOutputSharding(
                 /*compute_cost=*/0, communication_cost_fn);
   };
 
-  Enumerate(split_func, reduction_dims.size(),
-            /*current_mesh_dim_idx=*/0, unused_mesh_dims,
-            /*current_dim_map=*/{});
+  Enumerate(split_func, reduction_dims.size(), unused_mesh_dims,
+            option_.allow_mixed_mesh_shape);
 }
 
 void DotHandler::AppendAllGatherWindowedEinsumStrategyForOperand(
@@ -836,8 +805,7 @@ absl::Status DotHandler::RegisterStrategies() {
       [&](const DimMap& output_dim_map) {
         GenerateDotShardingStrategiesFromOutputSharding(output_dim_map);
       },
-      ins_->shape().rank(), /*current_mesh_dim_idx=*/0, all_mesh_dims,
-      /*current_dim_map=*/{});
+      ins_->shape().rank(), all_mesh_dims, option_.allow_mixed_mesh_shape);
   SortStrategies();
   return absl::OkStatus();
 }
@@ -957,8 +925,7 @@ absl::Status ConvHandler::RegisterStrategies() {
       [&](const DimMap& output_dim_map) {
         GenerateConvolutionShardingStrategiesFromOutputSharding(output_dim_map);
       },
-      2, /*current_mesh_dim_idx=*/0, all_mesh_dims,
-      /*current_dim_map=*/{});
+      2, all_mesh_dims, option_.allow_mixed_mesh_shape);
 
   SortStrategies();
   return absl::OkStatus();
@@ -997,9 +964,8 @@ void ConvHandler::SplitDepthwise(bool forward) {
       };
   std::vector<int> all_mesh_dims(device_mesh_.num_dimensions());
   std::iota(all_mesh_dims.begin(), all_mesh_dims.end(), 0);
-  Enumerate(split_func, ins_->shape().rank(), /*current_mesh_dim_idx=*/0,
-            all_mesh_dims,
-            /*current_dim_map=*/{});
+  Enumerate(split_func, ins_->shape().rank(), all_mesh_dims,
+            option_.allow_mixed_mesh_shape);
 }
 
 }  // namespace
diff --git a/xla/hlo/experimental/auto_sharding/auto_sharding_strategy.cc b/xla/hlo/experimental/auto_sharding/auto_sharding_strategy.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
+#include <functional>
 #include <memory>
 #include <optional>
 #include <string>
@@ -64,6 +65,43 @@ limitations under the License.
 namespace xla {
 namespace spmd {
 
+void EnumerateHelper(std::function<void(const DimMap&)> split_func,
+                     int tensor_rank, int current_mesh_dim_idx,
+                     const std::vector<int>& unassigned_mesh_dims,
+                     const DimMap& current_dim_map,
+                     bool allow_mixed_mesh_shape) {
+  if (current_mesh_dim_idx == unassigned_mesh_dims.size()) {
+    split_func(current_dim_map);
+    return;
+  }
+  // Current mesh dim is not assigned to any tensor dim
+  EnumerateHelper(split_func, tensor_rank, current_mesh_dim_idx + 1,
+                  unassigned_mesh_dims, current_dim_map,
+                  allow_mixed_mesh_shape);
+
+  for (int i = 0; i < tensor_rank; ++i) {
+    DimMap updated_dim_map = current_dim_map;
+    if (!updated_dim_map[i].empty() && !allow_mixed_mesh_shape) {
+      continue;
+    }
+    updated_dim_map[i].insert(unassigned_mesh_dims[current_mesh_dim_idx]);
+    EnumerateHelper(split_func, tensor_rank, current_mesh_dim_idx + 1,
+                    unassigned_mesh_dims, updated_dim_map,
+                    allow_mixed_mesh_shape);
+  }
+}
+
+// Map tensor dims from [0, tensor_shape.rank() - 1] to (atmost one or more,
+// depending on the value of allow_mixed_mesh_shape) mesh dims.
+void Enumerate(std::function<void(const DimMap&)> split_func,
+               int64_t tensor_rank,
+               const std::vector<int>& unassigned_mesh_dims,
+               bool allow_mixed_mesh_shape) {
+  EnumerateHelper(split_func, tensor_rank, /*current_mesh_dim_idx=*/0,
+                  unassigned_mesh_dims,
+                  /*current_dim_map=*/{}, allow_mixed_mesh_shape);
+}
+
 bool LeafVectorsAreConsistent(const std::vector<ShardingStrategy>& one,
                               const std::vector<ShardingStrategy>& two) {
   if (one.size() != two.size()) return false;
diff --git a/xla/hlo/experimental/auto_sharding/auto_sharding_strategy.h b/xla/hlo/experimental/auto_sharding/auto_sharding_strategy.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
+#include <functional>
 #include <iterator>
 #include <memory>
 #include <optional>
@@ -387,6 +388,16 @@ using AssociativeDotPairs =
 // The set of all alias pairs
 using AliasSet = StableSet<std::pair<NodeIdx, NodeIdx>>;
 
+// Utilities for creating sharding objects
+using MeshDimSet = StableSet<int>;
+using DimMap = StableMap</*tensor dim*/ int, /*mesh dims*/ MeshDimSet>;
+
+// Map tensor dims from [0, tensor_shape.rank() - 1] to (atmost one or more,
+// depending on the value of allow_mixed_mesh_shape) mesh dims.
+void Enumerate(std::function<void(const DimMap&)> split_func,
+               int64_t tensor_rank,
+               const std::vector<int>& unassigned_mesh_dims,
+               bool allow_mixed_mesh_shape);
 }  // namespace spmd
 }  // namespace xla
 #endif  // XLA_HLO_EXPERIMENTAL_AUTO_SHARDING_AUTO_SHARDING_STRATEGY_H_