Merge pull request #257 from bokveizen/fanchen_250306

fedebotu · web-flow · commit fd34371e2200 · 2025-03-20T22:24:29.000+09:00
[Additional features for #255] Added embedding functions for MCP
diff --git a/rl4co/models/nn/env_embeddings/context.py b/rl4co/models/nn/env_embeddings/context.py
@@ -3,7 +3,7 @@
 
 from tensordict import TensorDict
 
-from rl4co.utils.ops import gather_by_index
+from rl4co.utils.ops import gather_by_index, batched_scatter_sum
 
 
 def env_context_embedding(env_name: str, config: dict) -> nn.Module:
@@ -36,6 +36,7 @@ def env_context_embedding(env_name: str, config: dict) -> nn.Module:
         "mtvrp": MTVRPContext,
         "shpp": TSPContext,
         "flp": FLPContext,
+        "mcp": MCPContext,
     }
 
     if env_name not in embedding_registry:
@@ -379,9 +380,8 @@ class FLPContext(EnvContext):
     """
     def __init__(self, embed_dim: int):
         super(FLPContext, self).__init__(embed_dim=embed_dim)
-        self.embed_dim = embed_dim
-        # self.mlp_context = MLP(embed_dim, [embed_dim, embed_dim])
-        self.projection = nn.Linear(embed_dim, embed_dim, bias=True)
+        self.embed_dim = embed_dim        
+        self.project_context = nn.Linear(embed_dim, embed_dim, bias=True)
         
     def forward(self, embeddings, td):        
         cur_dist = td["distances"].unsqueeze(-2)  # (batch_size, 1, n_points)
@@ -390,5 +390,27 @@ def forward(self, embeddings, td):
         
         # softmax
         loc_best_soft = torch.softmax(dist_improve, dim=-1) # (batch_size, n_points)        
-        embed_best = (embeddings * loc_best_soft[..., None]).sum(-2)
-        return embed_best
+        context_embedding = (embeddings * loc_best_soft[..., None]).sum(-2)
+        return self.project_context(context_embedding)
+
+class MCPContext(EnvContext):
+    """Context embedding for the Maximum Coverage Problem (MCP).
+    """
+    def __init__(self, embed_dim: int):
+        super(MCPContext, self).__init__(embed_dim=embed_dim)
+        self.embed_dim = embed_dim        
+        self.project_context = nn.Linear(embed_dim, embed_dim, bias=True)
+    
+    def forward(self, embeddings, td):
+        membership_weighted = batched_scatter_sum(
+            td["weights"].unsqueeze(-1), td["membership"].long()
+        )
+        membership_weighted.squeeze_(-1)
+        # membership_weighted: [batch_size, n_sets]
+
+        # softmax; higher weights for better sets
+        membership_weighted = torch.softmax(
+            membership_weighted, dim=-1
+        )  # (batch_size, n_sets)
+        context_embedding = (membership_weighted.unsqueeze(-1) * embeddings).sum(1)
+        return self.project_context(context_embedding)
diff --git a/rl4co/models/nn/env_embeddings/init.py b/rl4co/models/nn/env_embeddings/init.py
@@ -4,7 +4,7 @@
 from tensordict.tensordict import TensorDict
 
 from rl4co.models.nn.ops import PositionalEncoding
-from rl4co.utils.ops import cartesian_to_polar
+from rl4co.utils.ops import cartesian_to_polar, batched_scatter_sum
 
 
 def env_init_embedding(env_name: str, config: dict) -> nn.Module:
@@ -41,6 +41,7 @@ def env_init_embedding(env_name: str, config: dict) -> nn.Module:
         "mtvrp": MTVRPInitEmbedding,
         "shpp": TSPInitEmbedding,
         "flp": FLPInitEmbedding,
+        "mcp": MCPInitEmbedding,
     }
 
     if env_name not in embedding_registry:
@@ -571,4 +572,16 @@ def __init__(self, embed_dim: int):
 
     def forward(self, td: TensorDict):
         hdim = self.projection(td["locs"])
-        return hdim
+        return hdim
+
+class MCPInitEmbedding(nn.Module):
+    def __init__(self, embed_dim: int):
+        super().__init__()
+        self.projection_items = nn.Linear(1, embed_dim, bias=True)
+
+    def forward(self, td: TensorDict):
+        items_embed = self.projection_items(td["weights"].unsqueeze(-1))
+        # sum pooling
+        membership_emb = batched_scatter_sum(items_embed, td["membership"].long())        
+        return membership_emb
+    
diff --git a/rl4co/utils/ops.py b/rl4co/utils/ops.py
@@ -283,3 +283,33 @@ def select_start_nodes_by_distance(td, env, num_starts, exclude_depot=True):
     )
     selected_nodes = node_index[:, 1:] if exclude_depot else node_index[:, :-1]
     return rearrange(selected_nodes, "b n -> (n b)")
+
+
+def batched_scatter_sum(src, idx):
+    """Performs a batched scatter and sum operation on the source tensor using the provided indices.
+
+    Parameters:
+        src (Tensor): A tensor of shape [batch_size, N, h].
+                      Contains the data to be scattered and summed.
+        idx (Tensor): A tensor of shape [batch_size, M, K] with zero-padding.
+                      Each non-zero element in idx represents an index (offset by 1)
+                      into src. A zero value indicates a padded (invalid) index.
+    
+    Returns:
+        Tensor: A tensor of shape [batch_size, M, h] where for each batch and each index j,
+                the output is computed as:
+                    Output[batch, j] = sum(src[batch, k - 1] for k in idx[batch, j] if k != 0)
+                The subtraction of 1 is applied because 0 is used as the padding value.
+
+    Details:
+        - A temporary target tensor (tgt) of shape [batch_size, N+1, h] is created,
+          where tgt[:, 1:] is populated with src.
+        - The function reshapes idx to gather the corresponding values and then reshapes
+          the result back to [batch_size, M, K, h] before summing over the scattering dimension.    
+    """
+    bs, N, h = src.shape
+    bs, M, K = idx.shape
+    tgt = torch.zeros(bs, N + 1, h, device=src.device)
+    tgt[:, 1:] = src
+    tgt = gather_by_index(tgt, idx.long().reshape(bs, -1), squeeze=False)
+    return tgt.reshape(bs, M, K, h).sum(-2)    
diff --git a/tests/test_policy.py b/tests/test_policy.py
@@ -20,6 +20,8 @@
         "dpp",
         "mdpp",
         "smtwtp",
+        "flp",
+        "mcp",
     ],
 )
 def test_am_policy(env_name, size=20, batch_size=2):

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,8 @@`
`20`	`20`	`"dpp",`
`21`	`21`	`"mdpp",`
`22`	`22`	`"smtwtp",`
	`23`	`+ "flp",`
	`24`	`+ "mcp",`
`23`	`25`	`],`
`24`	`26`	`)`
`25`	`27`	`def test_am_policy(env_name, size=20, batch_size=2):`