updated code to remove metricstracker

finbarrtimbers · finbarrtimbers · commit abe438769df7 · 2025-11-06T09:41:36.000-07:00
diff --git a/open_instruct/grpo_fast.py b/open_instruct/grpo_fast.py
@@ -92,7 +92,7 @@
     cleanup_all_llm_judge_clients,
     soft_format_reward_func,
 )
-from open_instruct.metrics import LossStatistics, MetricsTracker
+from open_instruct.metrics import LossStatistics
 from open_instruct.model_utils import (
     Batch,
     ModelConfig,
@@ -529,11 +529,10 @@ def compare_logprobs(
     mask: torch.Tensor,
     masked_mean_axis: int | None,
     masked_mean_denominator: float | None,
-    metrics_tracker: MetricsTracker,
-) -> None:
+) -> dict[str, float]:
     """Compare locally computed log probabilities with reference log probabilities.
 
-    Computes statistics on the difference between two sets of log probabilities and records
+    Computes statistics on the difference between two sets of log probabilities and returns
     debugging metrics including mean/max/std differences and reverse KL divergence.
 
     Args:
@@ -542,7 +541,9 @@ def compare_logprobs(
         mask: Boolean mask indicating valid response tokens (shape: [batch, seq_len])
         masked_mean_axis: Axis for masked mean reduction
         masked_mean_denominator: Denominator for masked mean computation
-        metrics_tracker: MetricsTracker instance for recording debug metrics
+
+    Returns:
+        Dictionary of debug metrics
     """
     with torch.no_grad():
         valid_mask = mask & ~torch.isnan(old_logprobs)
@@ -557,10 +558,12 @@ def compare_logprobs(
             masked_mean_axis,
             masked_mean_denominator,
         )
-        metrics_tracker.add("debug/vllm_vs_local_logprob_diff_mean", mean_diff)
-        metrics_tracker.add("debug/vllm_vs_local_logprob_diff_max", max_diff)
-        metrics_tracker.add("debug/vllm_vs_local_logprob_diff_std", std_diff)
-        metrics_tracker.add("debug/vllm_local_reverse_kl", reverse_kl)
+        return {
+            "debug/vllm_vs_local_logprob_diff_mean": mean_diff.item(),
+            "debug/vllm_vs_local_logprob_diff_max": max_diff.item(),
+            "debug/vllm_vs_local_logprob_diff_std": std_diff.item(),
+            "debug/vllm_local_reverse_kl": reverse_kl.item(),
+        }
 
 
 def maybe_apply_importance_sampling(
@@ -1009,7 +1012,6 @@ def load(self, path: str, map_location=None):
             else:
                 self.ref_policy.load_state_dict(state_dict)
             logger.info(f"{self.rank=}: Loaded reference policy checkpoint from {self.ref_policy_checkpoint_path}")
-        self.local_metrics = MetricsTracker(max_metrics=32, device=self.device)
         return optimization_steps_done
 
     def forward(
@@ -1147,6 +1149,7 @@ def train(
         num_mini_batches: int,
     ):
         args = self.args
+        local_metrics = {}
         to_device_inplace(collated_query_responses, self.device)
         to_device_inplace(collated_tool_masks, self.device)
         to_device_inplace(collated_attention_masks, self.device)
@@ -1268,13 +1271,12 @@ def train(
                     # Replace any remaining NaN values (query tokens in packed sequences are set to NaN by pack_sequences in rl_utils.py)
                     mb_vllm_logprobs = torch.nan_to_num(mb_vllm_logprobs, nan=INVALID_LOGPROB)
 
-                    compare_logprobs(
+                    local_metrics |= compare_logprobs(
                         mb_local_logprobs,
                         mb_vllm_logprobs,
                         mb_response_masks_bool,
                         args.masked_mean_axis,
                         args.masked_mean_denominator,
-                        self.local_metrics,
                     )
 
                     # Cache the old logprobs
@@ -1312,10 +1314,9 @@ def train(
                         args,
                     )
 
-            with torch.no_grad():
-                self.local_metrics.add_dict(loss_statistics.to_dict())
-                self.local_metrics.add("lr", self.scheduler.get_last_lr()[0])
-                return self.local_metrics.get_metrics_list()
+            local_metrics |= loss_statistics.to_dict()
+            local_metrics["lr"] = self.scheduler.get_last_lr()[0]
+            return local_metrics
 
     def save_checkpoint_state(self, checkpoint_state_dir: str, client_state: dict[str, Any]) -> None:
         args = self.args
diff --git a/open_instruct/metrics.py b/open_instruct/metrics.py
@@ -113,92 +113,24 @@ def update_stats(
                 mb_entropy, mb_response_masks_bool, args.masked_mean_axis, args.masked_mean_denominator
             ).float()
 
-    def to_dict(self) -> dict[str, torch.Tensor]:
+    def to_dict(self) -> dict[str, float]:
         """Convert accumulated statistics to a metrics dictionary.
 
         Returns:
             Dictionary mapping metric names to their averaged values across all minibatches
         """
         metrics = {
-            "objective/kl_avg": self.kl_stats[0].mean(),
-            "objective/kl2_avg": self.kl_stats[1].mean(),
-            "objective/kl3_avg": self.kl_stats[2].mean(),
-            "objective/kl4_avg": self.kl_stats[3].mean(),
-            "loss/policy_avg": self.pg_loss_stats.mean(),
-            "loss/kl_avg": self.kl_loss_stats.mean(),
-            "loss/total_avg": self.loss_stats.mean(),
-            "policy/clipfrac_avg": self.pg_clipfrac_stats.mean(),
-            "val/ratio": self.ratio_stats.mean(),
-            "val/ratio_var": self.ratio_stats.var(),
+            "objective/kl_avg": self.kl_stats[0].mean().item(),
+            "objective/kl2_avg": self.kl_stats[1].mean().item(),
+            "objective/kl3_avg": self.kl_stats[2].mean().item(),
+            "objective/kl4_avg": self.kl_stats[3].mean().item(),
+            "loss/policy_avg": self.pg_loss_stats.mean().item(),
+            "loss/kl_avg": self.kl_loss_stats.mean().item(),
+            "loss/total_avg": self.loss_stats.mean().item(),
+            "policy/clipfrac_avg": self.pg_clipfrac_stats.mean().item(),
+            "val/ratio": self.ratio_stats.mean().item(),
+            "val/ratio_var": self.ratio_stats.var().item(),
         }
         if self.entropy_stats is not None:
-            metrics["policy/entropy_avg"] = self.entropy_stats.mean()
+            metrics["policy/entropy_avg"] = self.entropy_stats.mean().item()
         return metrics
-
-
-class MetricsTracker:
-    """Preallocated tensor-based metrics storage for efficient distributed reduction.
-
-    Stores all metrics in a single preallocated tensor to enable efficient all-reduce
-    operations in distributed training. Maintains a mapping from metric names to
-    tensor indices for fast access.
-    """
-
-    def __init__(self, max_metrics: int = 32, device: str = "cuda"):
-        """Initialize metrics tracker.
-
-        Args:
-            max_metrics: Maximum number of unique metrics to track
-            device: Device to allocate metrics tensor on (default: "cuda")
-        """
-        self.metrics = torch.zeros(max_metrics, device=device)
-        self.names2idx = {}
-        self.current_idx = 0
-        self.max_metrics = max_metrics
-
-    def add(self, name: str, value: torch.tensor):
-        """Add or update a metric value.
-
-        If the metric name is new, allocates a new index in the metrics tensor.
-        If the metric already exists, updates its value at the existing index.
-
-        Args:
-            name: Metric name (e.g., "loss/policy_avg")
-            value: Metric value (scalar tensor or convertible to tensor)
-
-        Returns:
-            Self for method chaining
-
-        Raises:
-            ValueError: If max_metrics limit is exceeded
-        """
-        if name not in self.names2idx:
-            if self.current_idx >= self.max_metrics:
-                raise ValueError(f"Exceeded maximum number of metrics ({self.max_metrics})")
-            self.names2idx[name] = self.current_idx
-            self.current_idx += 1
-
-        self.metrics[self.names2idx[name]] = value
-        return self
-
-    def add_dict(self, metrics_dict: dict[str, torch.Tensor]):
-        """Add multiple metrics from a dictionary.
-
-        Args:
-            metrics_dict: Dictionary mapping metric names to values
-
-        Returns:
-            Self for method chaining
-        """
-        for k, v in metrics_dict.items():
-            self.add(k, v)
-        return self
-
-    def get_metrics_list(self) -> dict[str, float]:
-        """Convert tracked metrics to a dictionary of Python floats.
-
-        Returns:
-            Dictionary mapping metric names to their float values
-        """
-        metrics_list = self.metrics.tolist()
-        return {name: metrics_list[idx] for name, idx in self.names2idx.items()}