From 43c6c120d0176f0548a0dddf7a89e4393d017aa7 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 05:49:27 +0000
Subject: [PATCH 1/2] fix: suppress SubprocessREPL overhead output during
 benchmarks

- Add verbose parameter to SubprocessREPL (default: True for backwards compat)
- Only print overhead summary on cleanup when verbose=True
- BenchmarkRunner now sets verbose=False in environment_kwargs by default

This prevents the overhead summaries from interleaving with the progress bar
during parallel benchmark execution.
---
 benchmarks/runner.py                | 6 +++++-
 rlm/environments/subprocess_repl.py | 7 +++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/benchmarks/runner.py b/benchmarks/runner.py
index 5b837abb..a7cc3976 100644
--- a/benchmarks/runner.py
+++ b/benchmarks/runner.py
@@ -136,6 +136,10 @@ def __init__(
                 Signature: (completed, total, sample_result, stats) -> None
             **kwargs: Additional backend or environment kwargs.
         """
+        # Default environment_kwargs: suppress SubprocessREPL overhead output
+        env_kwargs = {"verbose": False}
+        env_kwargs.update(kwargs.get("environment_kwargs", {}))
+
         self.config = RunnerConfig(
             backend=backend,
             model=model,
@@ -147,7 +151,7 @@ def __init__(
             progress=progress,
             progress_callback=progress_callback,
             backend_kwargs={"model_name": model, **kwargs.get("backend_kwargs", {})},
-            environment_kwargs=kwargs.get("environment_kwargs", {}),
+            environment_kwargs=env_kwargs,
         )
         self._tqdm_available: bool | None = None
 
diff --git a/rlm/environments/subprocess_repl.py b/rlm/environments/subprocess_repl.py
index 0ceef81f..4e67471a 100644
--- a/rlm/environments/subprocess_repl.py
+++ b/rlm/environments/subprocess_repl.py
@@ -259,6 +259,7 @@ def __init__(
         allowed_packages: list[str] | None = None,
         auto_approve_packages: bool = False,
         package_approval_callback: Callable[[str], bool] | None = None,
+        verbose: bool = True,
         **kwargs,
     ):
         """
@@ -276,6 +277,7 @@ def __init__(
             allowed_packages: Pre-approved packages (no prompt needed).
             auto_approve_packages: If True, install packages without prompting.
             package_approval_callback: Custom function for package approval.
+            verbose: If True, print overhead summary on cleanup (default: True).
         """
         super().__init__(persistent=persistent, depth=depth, **kwargs)
 
@@ -295,6 +297,7 @@ def __init__(
         self.sandbox = sandbox
         self.auto_approve = auto_approve_packages
         self.approval_callback = package_approval_callback or self._default_approval
+        self.verbose = verbose
 
         # Pre-approved packages (stdlib + user-specified)
         self.allowed_packages: set[str] = {
@@ -836,8 +839,8 @@ def cleanup(self):
                 pass
             self._socket_server = None
 
-        # Print overhead summary
-        if hasattr(self, "_overhead_stats"):
+        # Print overhead summary (only if verbose)
+        if hasattr(self, "_overhead_stats") and getattr(self, "verbose", True):
             self.print_overhead_summary()
 
         # Unregister from emergency cleanup

From 9b7972bd7e26013c399839dc6b1cf8c2c6537367 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 05:52:04 +0000
Subject: [PATCH 2/2] refactor: make verbose=False the default for
 SubprocessREPL

Change default from verbose=True to verbose=False so overhead summaries
are not printed by default. Users who want to see them can pass
verbose=True explicitly.

Also remove the now-unnecessary override in BenchmarkRunner.
---
 benchmarks/runner.py                | 6 +-----
 rlm/environments/subprocess_repl.py | 4 ++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/benchmarks/runner.py b/benchmarks/runner.py
index a7cc3976..5b837abb 100644
--- a/benchmarks/runner.py
+++ b/benchmarks/runner.py
@@ -136,10 +136,6 @@ def __init__(
                 Signature: (completed, total, sample_result, stats) -> None
             **kwargs: Additional backend or environment kwargs.
         """
-        # Default environment_kwargs: suppress SubprocessREPL overhead output
-        env_kwargs = {"verbose": False}
-        env_kwargs.update(kwargs.get("environment_kwargs", {}))
-
         self.config = RunnerConfig(
             backend=backend,
             model=model,
@@ -151,7 +147,7 @@ def __init__(
             progress=progress,
             progress_callback=progress_callback,
             backend_kwargs={"model_name": model, **kwargs.get("backend_kwargs", {})},
-            environment_kwargs=env_kwargs,
+            environment_kwargs=kwargs.get("environment_kwargs", {}),
         )
         self._tqdm_available: bool | None = None
 
diff --git a/rlm/environments/subprocess_repl.py b/rlm/environments/subprocess_repl.py
index 4e67471a..a158b031 100644
--- a/rlm/environments/subprocess_repl.py
+++ b/rlm/environments/subprocess_repl.py
@@ -259,7 +259,7 @@ def __init__(
         allowed_packages: list[str] | None = None,
         auto_approve_packages: bool = False,
         package_approval_callback: Callable[[str], bool] | None = None,
-        verbose: bool = True,
+        verbose: bool = False,
         **kwargs,
     ):
         """
@@ -277,7 +277,7 @@ def __init__(
             allowed_packages: Pre-approved packages (no prompt needed).
             auto_approve_packages: If True, install packages without prompting.
             package_approval_callback: Custom function for package approval.
-            verbose: If True, print overhead summary on cleanup (default: True).
+            verbose: If True, print overhead summary on cleanup (default: False).
         """
         super().__init__(persistent=persistent, depth=depth, **kwargs)