From 43c6c120d0176f0548a0dddf7a89e4393d017aa7 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 16 Jan 2026 05:49:27 +0000 Subject: [PATCH 1/2] fix: suppress SubprocessREPL overhead output during benchmarks - Add verbose parameter to SubprocessREPL (default: True for backwards compat) - Only print overhead summary on cleanup when verbose=True - BenchmarkRunner now sets verbose=False in environment_kwargs by default This prevents the overhead summaries from interleaving with the progress bar during parallel benchmark execution. --- benchmarks/runner.py | 6 +++++- rlm/environments/subprocess_repl.py | 7 +++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/benchmarks/runner.py b/benchmarks/runner.py index 5b837abb..a7cc3976 100644 --- a/benchmarks/runner.py +++ b/benchmarks/runner.py @@ -136,6 +136,10 @@ def __init__( Signature: (completed, total, sample_result, stats) -> None **kwargs: Additional backend or environment kwargs. """ + # Default environment_kwargs: suppress SubprocessREPL overhead output + env_kwargs = {"verbose": False} + env_kwargs.update(kwargs.get("environment_kwargs", {})) + self.config = RunnerConfig( backend=backend, model=model, @@ -147,7 +151,7 @@ def __init__( progress=progress, progress_callback=progress_callback, backend_kwargs={"model_name": model, **kwargs.get("backend_kwargs", {})}, - environment_kwargs=kwargs.get("environment_kwargs", {}), + environment_kwargs=env_kwargs, ) self._tqdm_available: bool | None = None diff --git a/rlm/environments/subprocess_repl.py b/rlm/environments/subprocess_repl.py index 0ceef81f..4e67471a 100644 --- a/rlm/environments/subprocess_repl.py +++ b/rlm/environments/subprocess_repl.py @@ -259,6 +259,7 @@ def __init__( allowed_packages: list[str] | None = None, auto_approve_packages: bool = False, package_approval_callback: Callable[[str], bool] | None = None, + verbose: bool = True, **kwargs, ): """ @@ -276,6 +277,7 @@ def __init__( allowed_packages: Pre-approved packages (no prompt needed). auto_approve_packages: If True, install packages without prompting. package_approval_callback: Custom function for package approval. + verbose: If True, print overhead summary on cleanup (default: True). """ super().__init__(persistent=persistent, depth=depth, **kwargs) @@ -295,6 +297,7 @@ def __init__( self.sandbox = sandbox self.auto_approve = auto_approve_packages self.approval_callback = package_approval_callback or self._default_approval + self.verbose = verbose # Pre-approved packages (stdlib + user-specified) self.allowed_packages: set[str] = { @@ -836,8 +839,8 @@ def cleanup(self): pass self._socket_server = None - # Print overhead summary - if hasattr(self, "_overhead_stats"): + # Print overhead summary (only if verbose) + if hasattr(self, "_overhead_stats") and getattr(self, "verbose", True): self.print_overhead_summary() # Unregister from emergency cleanup From 9b7972bd7e26013c399839dc6b1cf8c2c6537367 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 16 Jan 2026 05:52:04 +0000 Subject: [PATCH 2/2] refactor: make verbose=False the default for SubprocessREPL Change default from verbose=True to verbose=False so overhead summaries are not printed by default. Users who want to see them can pass verbose=True explicitly. Also remove the now-unnecessary override in BenchmarkRunner. --- benchmarks/runner.py | 6 +----- rlm/environments/subprocess_repl.py | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/benchmarks/runner.py b/benchmarks/runner.py index a7cc3976..5b837abb 100644 --- a/benchmarks/runner.py +++ b/benchmarks/runner.py @@ -136,10 +136,6 @@ def __init__( Signature: (completed, total, sample_result, stats) -> None **kwargs: Additional backend or environment kwargs. """ - # Default environment_kwargs: suppress SubprocessREPL overhead output - env_kwargs = {"verbose": False} - env_kwargs.update(kwargs.get("environment_kwargs", {})) - self.config = RunnerConfig( backend=backend, model=model, @@ -151,7 +147,7 @@ def __init__( progress=progress, progress_callback=progress_callback, backend_kwargs={"model_name": model, **kwargs.get("backend_kwargs", {})}, - environment_kwargs=env_kwargs, + environment_kwargs=kwargs.get("environment_kwargs", {}), ) self._tqdm_available: bool | None = None diff --git a/rlm/environments/subprocess_repl.py b/rlm/environments/subprocess_repl.py index 4e67471a..a158b031 100644 --- a/rlm/environments/subprocess_repl.py +++ b/rlm/environments/subprocess_repl.py @@ -259,7 +259,7 @@ def __init__( allowed_packages: list[str] | None = None, auto_approve_packages: bool = False, package_approval_callback: Callable[[str], bool] | None = None, - verbose: bool = True, + verbose: bool = False, **kwargs, ): """ @@ -277,7 +277,7 @@ def __init__( allowed_packages: Pre-approved packages (no prompt needed). auto_approve_packages: If True, install packages without prompting. package_approval_callback: Custom function for package approval. - verbose: If True, print overhead summary on cleanup (default: True). + verbose: If True, print overhead summary on cleanup (default: False). """ super().__init__(persistent=persistent, depth=depth, **kwargs)