Refactored sandbox config and added fast boot (All-Hands-AI#2455)

* Refactored sandbox config and added fastboot * added tests * fixed tests * fixed tests * intimate user about breaking change * remove default config from eval * check for lowercase env * add test * Revert Migration * migrate old sandbox configs * resolve merge conflict * revert migration 2 * Revert "remove default config from eval" This reverts commit de57c58. * change type to box_type * fix var name * linted * lint * lint comments * fix tests * fix tests * fix typo * fix box_type, remove fast_boot * add tests for sandbox config * fix test * update eval docs * small removal comments * adapt toml template * old fields shouldn't be in the app dataclass * fix old keys in app config * clean up exec box --------- Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
invariantlabs-ai · Jul 5, 2024 · 143f38d · 143f38d
1 parent 82f4860
commit 143f38d
Show file tree

Hide file tree

Showing 22 changed files with 332 additions and 96 deletions.
diff --git a/.github/workflows/ghcr.yml b/.github/workflows/ghcr.yml
@@ -156,7 +156,7 @@ jobs:
 
       - name: Load sandbox image and run integration tests
         env:
-          SANDBOX_TYPE: ${{ matrix.sandbox }}
+          SANDBOX_BOX_TYPE: ${{ matrix.sandbox }}
         run: |
           # Load the Docker image and capture the output
           output=$(docker load -i /tmp/sandbox_image_amd64.tar)

diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml
@@ -55,7 +55,7 @@ jobs:
       env:
         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        SANDBOX_TYPE: ssh
+        SANDBOX_BOX_TYPE: ssh
       run: |
         # Append path to launch poetry
         export PATH="/github/home/.local/bin:$PATH"

diff --git a/.github/workflows/solve-issue.yml b/.github/workflows/solve-issue.yml
@@ -50,7 +50,7 @@ jobs:
         ISSUE_BODY: ${{ github.event.issue.body }}
         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        SANDBOX_TYPE: ssh
+        SANDBOX_BOX_TYPE: ssh
       run: |
         # Append path to launch poetry
         export PATH="/github/home/.local/bin:$PATH"

diff --git a/agenthub/micro/commit_writer/README.md b/agenthub/micro/commit_writer/README.md
@@ -3,7 +3,7 @@
 CommitWriterAgent can help write git commit message. Example:
 
 ```bash
-WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_TYPE="ssh" \
+WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_BOX_TYPE="ssh" \
   poetry run python opendevin/core/main.py -t "dummy task" -c CommitWriterAgent -d ./
 ```
 

diff --git a/agenthub/monologue_agent/utils/prompts.py b/agenthub/monologue_agent/utils/prompts.py
@@ -1,13 +1,13 @@
 from opendevin.core.config import config
 from opendevin.core.utils import json
-from opendevin.events.observation import (
-    CmdOutputObservation,
-)
 from opendevin.events.action import (
     Action,
 )
-
+from opendevin.events.observation import (
+    CmdOutputObservation,
+)
 from opendevin.events.serialization.action import action_from_dict
+
 ACTION_PROMPT = """
 You're a thoughtful robot. Your main task is this:
 %(task)s
@@ -206,7 +206,7 @@ def get_request_action_prompt(
         'background_commands': bg_commands_message,
         'hint': hint,
         'user': user,
-        'timeout': config.sandbox_timeout,
+        'timeout': config.sandbox.timeout,
         'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.workspace_mount_path_in_sandbox,
     }
 
@@ -242,4 +242,4 @@ def parse_summary_response(response: str) -> list[dict]:
     - list[dict]: The list of summaries output by the model
     """
     parsed = json.loads(response)
-    return parsed['new_monologue']
+    return parsed['new_monologue']
diff --git a/config.template.toml b/config.template.toml
@@ -19,9 +19,6 @@ workspace_base = "./workspace"
 # Cache directory path
 #cache_dir = "/tmp/cache"
 
-# Container image to use for the sandbox
-#sandbox_container_image = "ghcr.io/opendevin/sandbox:main"
-
 # Debugging enabled
 #debug = false
 
@@ -79,15 +76,6 @@ persist_sandbox = false
 # SSH port for the sandbox
 #ssh_port = 63710
 
-# Sandbox timeout in seconds
-#sandbox_timeout = 120
-
-# Sandbox type (ssh, exec, e2b, local)
-#sandbox_type = "ssh"
-
-# Sandbox user ID
-#sandbox_user_id = 1000
-
 # Use host network
 #use_host_network = false
 
@@ -174,7 +162,23 @@ model = "gpt-4o"
 # Name of the agent
 #name = "CodeActAgent"
 
+#################################### Sandbox ###################################
+# Configuration for the sandbox
+##############################################################################
+[sandbox]
+# Sandbox timeout in seconds
+#timeout = 120
+
+# Sandbox type (ssh, e2b, local)
+#box_type = "ssh"
+
+# Sandbox user ID
+#user_id = 1000
+
+# Container image to use for the sandbox
+#container_image = "ghcr.io/opendevin/sandbox:main"
+
 #################################### Eval ####################################
 # Configuration for the evaluation, please refer to the specific evaluation
 # plugin for the available options
-##############################################################################
+##############################################################################
diff --git a/evaluation/TUTORIAL.md b/evaluation/TUTORIAL.md
@@ -31,8 +31,6 @@ workspace_base = "/path/to/your/workspace"
 workspace_mount_path = "/path/to/your/workspace"
 # ==========================
 
-sandbox_type = "ssh"
-sandbox_timeout = 120
 ssh_hostname = "localhost"
 
 # SWEBench eval specific - but you can tweak it to your needs
@@ -41,6 +39,10 @@ run_as_devin = false
 # linting python after editing helps LLM fix indentations
 enable_auto_lint = true
 
+[sandbox]
+box_type = "ssh"
+timeout = 120
+
 [llm]
 # IMPORTANT: add your API key here, and set the model to the one you want to evaluate
 model = "gpt-4o-2024-05-13"

diff --git a/evaluation/agent_bench/README.md b/evaluation/agent_bench/README.md
@@ -18,15 +18,17 @@ cache_dir = "/path/to/cache"
 workspace_base = "/path/to/workspace"
 workspace_mount_path = "/path/to/workspace"
 
-sandbox_type = "ssh"
-sandbox_timeout = 120
 ssh_hostname = "localhost"
 
 use_host_network = false
 # AgentBench specific
 run_as_devin = true
 enable_auto_lint = true
 
+[sandbox]
+box_type = "ssh"
+timeout = 120
+
 [eval_gpt35_turbo]
 model = "gpt-3.5-turbo"
 api_key = "sk-123"

diff --git a/evaluation/miniwob/README.md b/evaluation/miniwob/README.md
@@ -16,9 +16,11 @@ Add the following configurations:
 [core]
 max_iterations = 100
 cache_dir = "/tmp/cache"
-sandbox_type = "ssh"
 ssh_hostname = "localhost"
-sandbox_timeout = 120
+
+[sandbox]
+box_type = "ssh"
+timeout = 120
 
 # TODO: Change these to the model you want to evaluate
 [eval_gpt4_1106_preview]

diff --git a/evaluation/swe_bench/README.md b/evaluation/swe_bench/README.md
@@ -44,9 +44,11 @@ Add the following configurations:
 [core]
 max_iterations = 100
 cache_dir = "/tmp/cache"
-sandbox_type = "ssh"
 ssh_hostname = "localhost"
-sandbox_timeout = 120
+
+[sandbox]
+box_type = "ssh"
+timeout = 120
 
 # SWEBench eval specific
 use_host_network = false

diff --git a/evaluation/webarena/README.md b/evaluation/webarena/README.md
@@ -16,9 +16,11 @@ Add the following configurations:
 [core]
 max_iterations = 100
 cache_dir = "/tmp/cache"
-sandbox_type = "ssh"
 ssh_hostname = "localhost"
-sandbox_timeout = 120
+
+[sandbox]
+box_type = "ssh"
+timeout = 120
 
 # TODO: Change these to the model you want to evaluate
 [eval_gpt4_1106_preview]

diff --git a/opendevin/core/config.py b/opendevin/core/config.py
@@ -125,6 +125,51 @@ def defaults_to_dict(self) -> dict:
         return result
 
 
+@dataclass
+class SandboxConfig(metaclass=Singleton):
+    """
+    Configuration for the sandbox.
+
+    Attributes:
+        box_type: The type of sandbox to use. Options are: ssh, e2b, local.
+        container_image: The container image to use for the sandbox.
+        user_id: The user ID for the sandbox.
+        timeout: The timeout for the sandbox.
+
+    """
+
+    box_type: str = 'ssh'
+    container_image: str = 'ghcr.io/opendevin/sandbox' + (
+        f':{os.getenv("OPEN_DEVIN_BUILD_VERSION")}'
+        if os.getenv('OPEN_DEVIN_BUILD_VERSION')
+        else ':main'
+    )
+    user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
+    timeout: int = 120
+
+    def defaults_to_dict(self) -> dict:
+        """
+        Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.
+        """
+        dict = {}
+        for f in fields(self):
+            dict[f.name] = get_field_info(f)
+        return dict
+
+    def __str__(self):
+        attr_str = []
+        for f in fields(self):
+            attr_name = f.name
+            attr_value = getattr(self, f.name)
+
+            attr_str.append(f'{attr_name}={repr(attr_value)}')
+
+        return f"SandboxConfig({', '.join(attr_str)})"
+
+    def __repr__(self):
+        return self.__str__()
+
+
 class UndefinedString(str, Enum):
     UNDEFINED = 'UNDEFINED'
 
@@ -137,6 +182,7 @@ class AppConfig(metaclass=Singleton):
     Attributes:
         llm: The LLM configuration.
         agent: The agent configuration.
+        sandbox: The sandbox configuration.
         runtime: The runtime environment.
         file_store: The file store to use.
         file_store_path: The path to the file store.
@@ -145,17 +191,14 @@ class AppConfig(metaclass=Singleton):
         workspace_mount_path_in_sandbox: The path to mount the workspace in the sandbox. Defaults to /workspace.
         workspace_mount_rewrite: The path to rewrite the workspace mount path to.
         cache_dir: The path to the cache directory. Defaults to /tmp/cache.
-        sandbox_container_image: The container image to use for the sandbox.
         run_as_devin: Whether to run as devin.
         max_iterations: The maximum number of iterations.
         max_budget_per_task: The maximum budget allowed per task, beyond which the agent will stop.
         e2b_api_key: The E2B API key.
-        sandbox_type: The type of sandbox to use. Options are: ssh, exec, e2b, local.
         use_host_network: Whether to use the host network.
         ssh_hostname: The SSH hostname.
         disable_color: Whether to disable color. For terminals that don't support color.
-        sandbox_user_id: The user ID for the sandbox.
-        sandbox_timeout: The timeout for the sandbox.
+        initialize_plugins: Whether to initialize plugins.
         debug: Whether to enable debugging.
         enable_auto_lint: Whether to enable auto linting. This is False by default, for regular runs of the app. For evaluation, please set this to True.
         enable_cli_session: Whether to enable saving and restoring the session when run from CLI.
@@ -166,6 +209,7 @@ class AppConfig(metaclass=Singleton):
 
     llm: LLMConfig = field(default_factory=LLMConfig)
     agent: AgentConfig = field(default_factory=AgentConfig)
+    sandbox: SandboxConfig = field(default_factory=SandboxConfig)
     runtime: str = 'server'
     file_store: str = 'memory'
     file_store_path: str = '/tmp/file_store'
@@ -176,21 +220,13 @@ class AppConfig(metaclass=Singleton):
     workspace_mount_path_in_sandbox: str = '/workspace'
     workspace_mount_rewrite: str | None = None
     cache_dir: str = '/tmp/cache'
-    sandbox_container_image: str = 'ghcr.io/opendevin/sandbox' + (
-        f':{os.getenv("OPEN_DEVIN_BUILD_VERSION")}'
-        if os.getenv('OPEN_DEVIN_BUILD_VERSION')
-        else ':main'
-    )
     run_as_devin: bool = True
     max_iterations: int = 100
     max_budget_per_task: float | None = None
     e2b_api_key: str = ''
-    sandbox_type: str = 'ssh'  # Can be 'ssh', 'exec', or 'e2b'
     use_host_network: bool = False
     ssh_hostname: str = 'localhost'
     disable_color: bool = False
-    sandbox_user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
-    sandbox_timeout: int = 120
     initialize_plugins: bool = True
     persist_sandbox: bool = False
     ssh_port: int = 63710
@@ -287,7 +323,7 @@ def get_field_info(f):
 
 def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, str]):
     """Reads the env-style vars and sets config attributes based on env vars or a config.toml dict.
-    Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED and others.
+    Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED, SANDBOX_TIMEOUT and others.
 
     Args:
         cfg: The AppConfig object to set attributes on.
@@ -335,6 +371,9 @@ def set_attr_from_env(sub_config: Any, prefix=''):
                         f'Error setting env var {env_var_name}={value}: check that the value is of the right type'
                     )
 
+    if 'SANDBOX_TYPE' in env_or_toml_dict:
+        logger.error('SANDBOX_TYPE is deprecated. Please use SANDBOX_BOX_TYPE instead.')
+        env_or_toml_dict['SANDBOX_BOX_TYPE'] = env_or_toml_dict.pop('SANDBOX_TYPE')
     # Start processing from the root of the config object
     set_attr_from_env(cfg)
 
@@ -380,8 +419,32 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
         if 'agent' in toml_config:
             agent_config = AgentConfig(**toml_config['agent'])
 
+        # set sandbox config from the toml file
+        sandbox_config = config.sandbox
+
+        # migrate old sandbox configs from [core] section to sandbox config
+        keys_to_migrate = [key for key in core_config if key.startswith('sandbox_')]
+        for key in keys_to_migrate:
+            new_key = key.replace('sandbox_', '')
+            if new_key == 'type':
+                new_key = 'box_type'
+            if new_key in sandbox_config.__annotations__:
+                # read the key in sandbox and remove it from core
+                setattr(sandbox_config, new_key, core_config.pop(key))
+            else:
+                logger.warning(f'Unknown sandbox config: {key}')
+
+        # the new style values override the old style values
+        if 'sandbox' in toml_config:
+            sandbox_config = SandboxConfig(**toml_config['sandbox'])
+
         # update the config object with the new values
-        AppConfig(llm=llm_config, agent=agent_config, **core_config)
+        AppConfig(
+            llm=llm_config,
+            agent=agent_config,
+            sandbox=sandbox_config,
+            **core_config,
+        )
     except (TypeError, KeyError) as e:
         logger.warning(
             f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
@@ -400,7 +463,7 @@ def finalize_config(cfg: AppConfig):
     cfg.workspace_base = os.path.abspath(cfg.workspace_base)
 
     # In local there is no sandbox, the workspace will have the same pwd as the host
-    if cfg.sandbox_type == 'local':
+    if cfg.sandbox.box_type == 'local':
         cfg.workspace_mount_path_in_sandbox = cfg.workspace_mount_path
 
     if cfg.workspace_mount_rewrite:  # and not config.workspace_mount_path:

diff --git a/opendevin/core/schema/config.py b/opendevin/core/schema/config.py
@@ -34,7 +34,7 @@ class ConfigType(str, Enum):
     MAX_ITERATIONS = 'MAX_ITERATIONS'
     AGENT = 'AGENT'
     E2B_API_KEY = 'E2B_API_KEY'
-    SANDBOX_TYPE = 'SANDBOX_TYPE'
+    SANDBOX_BOX_TYPE = 'SANDBOX_BOX_TYPE'
     SANDBOX_USER_ID = 'SANDBOX_USER_ID'
     SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
     USE_HOST_NETWORK = 'USE_HOST_NETWORK'

diff --git a/opendevin/runtime/docker/local_box.py b/opendevin/runtime/docker/local_box.py
@@ -26,7 +26,7 @@
 
 
 class LocalBox(Sandbox):
-    def __init__(self, timeout: int = config.sandbox_timeout):
+    def __init__(self, timeout: int = config.sandbox.timeout):
         os.makedirs(config.workspace_base, exist_ok=True)
         self.timeout = timeout
         self.background_commands: dict[int, Process] = {}