Add max_tasks_per_child config

yuxuan-z19 · yuxuan-z19 · commit 6ea793f571fa · 2025-11-27T16:14:36.000+08:00
diff --git a/openevolve/config.py b/openevolve/config.py
@@ -367,6 +367,9 @@ class Config:
     convergence_threshold: float = 0.001
     early_stopping_metric: str = "combined_score"
 
+    # Parallel controller settings
+    max_tasks_per_child: Optional[int] = None
+
     @classmethod
     def from_yaml(cls, path: Union[str, Path]) -> "Config":
         """Load configuration from a YAML file"""
diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
@@ -8,8 +8,9 @@
 import pickle
 import signal
 import time
-from concurrent.futures import ProcessPoolExecutor, Future, TimeoutError as FutureTimeoutError
-from dataclasses import dataclass, asdict
+from concurrent.futures import Future, ProcessPoolExecutor
+from concurrent.futures import TimeoutError as FutureTimeoutError
+from dataclasses import asdict, dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
@@ -37,11 +38,11 @@ class SerializableResult:
 def _worker_init(config_dict: dict, evaluation_file: str, parent_env: dict = None) -> None:
     """Initialize worker process with necessary components"""
     import os
-    
+
     # Set environment from parent process
     if parent_env:
         os.environ.update(parent_env)
-    
+
     global _worker_config
     global _worker_evaluation_file
     global _worker_evaluator
@@ -55,8 +56,8 @@ def _worker_init(config_dict: dict, evaluation_file: str, parent_env: dict = Non
         DatabaseConfig,
         EvaluatorConfig,
         LLMConfig,
-        PromptConfig,
         LLMModelConfig,
+        PromptConfig,
     )
 
     # Reconstruct model objects
@@ -125,7 +126,7 @@ def _lazy_init_worker_components():
             evaluator_llm,
             evaluator_prompt,
             database=None,  # No shared database in worker
-            suffix=getattr(_worker_config, 'file_suffix', '.py'),
+            suffix=getattr(_worker_config, "file_suffix", ".py"),
         )
 
 
@@ -201,7 +202,7 @@ def _run_iteration_worker(
 
         # Parse response based on evolution mode
         if _worker_config.diff_based_evolution:
-            from openevolve.utils.code_utils import extract_diffs, apply_diff, format_diff_summary
+            from openevolve.utils.code_utils import apply_diff, extract_diffs, format_diff_summary
 
             diff_blocks = extract_diffs(llm_response)
             if not diff_blocks:
@@ -275,7 +276,14 @@ def _run_iteration_worker(
 class ProcessParallelController:
     """Controller for process-based parallel evolution"""
 
-    def __init__(self, config: Config, evaluation_file: str, database: ProgramDatabase, evolution_tracer=None, file_suffix: str = ".py"):
+    def __init__(
+        self,
+        config: Config,
+        evaluation_file: str,
+        database: ProgramDatabase,
+        evolution_tracer=None,
+        file_suffix: str = ".py",
+    ):
         self.config = config
         self.evaluation_file = evaluation_file
         self.database = database
@@ -298,7 +306,7 @@ def _serialize_config(self, config: Config) -> dict:
 
         # The asdict() call itself triggers the deepcopy which tries to serialize novelty_llm. Remove it first.
         config.database.novelty_llm = None
-        
+
         return {
             "llm": {
                 "models": [asdict(m) for m in config.llm.models],
@@ -334,16 +342,21 @@ def start(self) -> None:
 
         # Pass current environment to worker processes
         import os
+
         current_env = dict(os.environ)
-        
+
         # Create process pool with initializer
         self.executor = ProcessPoolExecutor(
             max_workers=self.num_workers,
             initializer=_worker_init,
             initargs=(config_dict, self.evaluation_file, current_env),
+            max_tasks_per_child=self.config.max_tasks_per_child,
         )
 
-        logger.info(f"Started process pool with {self.num_workers} processes")
+        logger.info(
+            f"Started process pool with {self.num_workers} processes "
+            f"and max {self.config.max_tasks_per_child} tasks per child"
+        )
 
     def stop(self) -> None:
         """Stop the process pool"""
@@ -426,7 +439,9 @@ async def run_evolution(
         completed_iterations = 0
 
         # Island management
-        programs_per_island = self.config.database.programs_per_island or max(1, max_iterations // (self.config.database.num_islands * 10))
+        programs_per_island = self.config.database.programs_per_island or max(
+            1, max_iterations // (self.config.database.num_islands * 10)
+        )
         current_island_counter = 0
 
         # Early stopping tracking
@@ -480,15 +495,19 @@ async def run_evolution(
                     # Store artifacts
                     if result.artifacts:
                         self.database.store_artifacts(child_program.id, result.artifacts)
-                    
+
                     # Log evolution trace
                     if self.evolution_tracer:
                         # Retrieve parent program for trace logging
-                        parent_program = self.database.get(result.parent_id) if result.parent_id else None
+                        parent_program = (
+                            self.database.get(result.parent_id) if result.parent_id else None
+                        )
                         if parent_program:
                             # Determine island ID
-                            island_id = child_program.metadata.get("island", self.database.current_island)
-                            
+                            island_id = child_program.metadata.get(
+                                "island", self.database.current_island
+                            )
+
                             self.evolution_tracer.log_trace(
                                 iteration=completed_iteration,
                                 parent_program=parent_program,
@@ -500,7 +519,7 @@ async def run_evolution(
                                 metadata={
                                     "iteration_time": result.iteration_time,
                                     "changes": child_program.metadata.get("changes", ""),
-                                }
+                                },
                             )
 
                     # Log prompts
@@ -590,8 +609,10 @@ async def run_evolution(
 
                     # Check target score
                     if target_score is not None and child_program.metrics:
-                        if ('combined_score' in child_program.metrics and
-                            child_program.metrics['combined_score'] >= target_score):
+                        if (
+                            "combined_score" in child_program.metrics
+                            and child_program.metrics["combined_score"] >= target_score
+                        ):
                             logger.info(
                                 f"Target score {target_score} reached at iteration {completed_iteration}"
                             )
@@ -701,8 +722,7 @@ def _submit_iteration(
             # Use thread-safe sampling that doesn't modify shared state
             # This fixes the race condition from GitHub issue #246
             parent, inspirations = self.database.sample_from_island(
-                island_id=target_island,
-                num_inspirations=self.config.prompt.num_top_programs
+                island_id=target_island, num_inspirations=self.config.prompt.num_top_programs
             )
 
             # Create database snapshot