Skip to content

Commit

Permalink
Refactored sandbox config and added fast boot (All-Hands-AI#2455)
Browse files Browse the repository at this point in the history
* Refactored sandbox config and added fastboot

* added tests

* fixed tests

* fixed tests

* intimate user about breaking change

* remove default config from eval

* check for lowercase env

* add test

* Revert Migration

* migrate old sandbox configs

* resolve merge conflict

* revert migration 2

* Revert "remove default config from eval"

This reverts commit de57c58.

* change type to box_type

* fix var name

* linted

* lint

* lint comments

* fix tests

* fix tests

* fix typo

* fix box_type, remove fast_boot

* add tests for sandbox config

* fix test

* update eval docs

* small removal comments

* adapt toml template

* old fields shouldn't be in the app dataclass

* fix old keys in app config

* clean up exec box

---------

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
  • Loading branch information
SmartManoj and enyst authored Jul 5, 2024
1 parent 82f4860 commit 143f38d
Show file tree
Hide file tree
Showing 22 changed files with 332 additions and 96 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ghcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ jobs:

- name: Load sandbox image and run integration tests
env:
SANDBOX_TYPE: ${{ matrix.sandbox }}
SANDBOX_BOX_TYPE: ${{ matrix.sandbox }}
run: |
# Load the Docker image and capture the output
output=$(docker load -i /tmp/sandbox_image_amd64.tar)
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/review-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
env:
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SANDBOX_TYPE: ssh
SANDBOX_BOX_TYPE: ssh
run: |
# Append path to launch poetry
export PATH="/github/home/.local/bin:$PATH"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/solve-issue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
ISSUE_BODY: ${{ github.event.issue.body }}
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SANDBOX_TYPE: ssh
SANDBOX_BOX_TYPE: ssh
run: |
# Append path to launch poetry
export PATH="/github/home/.local/bin:$PATH"
Expand Down
2 changes: 1 addition & 1 deletion agenthub/micro/commit_writer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
CommitWriterAgent can help write git commit message. Example:

```bash
WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_TYPE="ssh" \
WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_BOX_TYPE="ssh" \
poetry run python opendevin/core/main.py -t "dummy task" -c CommitWriterAgent -d ./
```

Expand Down
12 changes: 6 additions & 6 deletions agenthub/monologue_agent/utils/prompts.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from opendevin.core.config import config
from opendevin.core.utils import json
from opendevin.events.observation import (
CmdOutputObservation,
)
from opendevin.events.action import (
Action,
)

from opendevin.events.observation import (
CmdOutputObservation,
)
from opendevin.events.serialization.action import action_from_dict

ACTION_PROMPT = """
You're a thoughtful robot. Your main task is this:
%(task)s
Expand Down Expand Up @@ -206,7 +206,7 @@ def get_request_action_prompt(
'background_commands': bg_commands_message,
'hint': hint,
'user': user,
'timeout': config.sandbox_timeout,
'timeout': config.sandbox.timeout,
'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.workspace_mount_path_in_sandbox,
}

Expand Down Expand Up @@ -242,4 +242,4 @@ def parse_summary_response(response: str) -> list[dict]:
- list[dict]: The list of summaries output by the model
"""
parsed = json.loads(response)
return parsed['new_monologue']
return parsed['new_monologue']
30 changes: 17 additions & 13 deletions config.template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ workspace_base = "./workspace"
# Cache directory path
#cache_dir = "/tmp/cache"

# Container image to use for the sandbox
#sandbox_container_image = "ghcr.io/opendevin/sandbox:main"

# Debugging enabled
#debug = false

Expand Down Expand Up @@ -79,15 +76,6 @@ persist_sandbox = false
# SSH port for the sandbox
#ssh_port = 63710

# Sandbox timeout in seconds
#sandbox_timeout = 120

# Sandbox type (ssh, exec, e2b, local)
#sandbox_type = "ssh"

# Sandbox user ID
#sandbox_user_id = 1000

# Use host network
#use_host_network = false

Expand Down Expand Up @@ -174,7 +162,23 @@ model = "gpt-4o"
# Name of the agent
#name = "CodeActAgent"

#################################### Sandbox ###################################
# Configuration for the sandbox
##############################################################################
[sandbox]
# Sandbox timeout in seconds
#timeout = 120

# Sandbox type (ssh, e2b, local)
#box_type = "ssh"

# Sandbox user ID
#user_id = 1000

# Container image to use for the sandbox
#container_image = "ghcr.io/opendevin/sandbox:main"

#################################### Eval ####################################
# Configuration for the evaluation, please refer to the specific evaluation
# plugin for the available options
##############################################################################
##############################################################################
6 changes: 4 additions & 2 deletions evaluation/TUTORIAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ workspace_base = "/path/to/your/workspace"
workspace_mount_path = "/path/to/your/workspace"
# ==========================

sandbox_type = "ssh"
sandbox_timeout = 120
ssh_hostname = "localhost"

# SWEBench eval specific - but you can tweak it to your needs
Expand All @@ -41,6 +39,10 @@ run_as_devin = false
# linting python after editing helps LLM fix indentations
enable_auto_lint = true

[sandbox]
box_type = "ssh"
timeout = 120

[llm]
# IMPORTANT: add your API key here, and set the model to the one you want to evaluate
model = "gpt-4o-2024-05-13"
Expand Down
6 changes: 4 additions & 2 deletions evaluation/agent_bench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@ cache_dir = "/path/to/cache"
workspace_base = "/path/to/workspace"
workspace_mount_path = "/path/to/workspace"

sandbox_type = "ssh"
sandbox_timeout = 120
ssh_hostname = "localhost"

use_host_network = false
# AgentBench specific
run_as_devin = true
enable_auto_lint = true

[sandbox]
box_type = "ssh"
timeout = 120

[eval_gpt35_turbo]
model = "gpt-3.5-turbo"
api_key = "sk-123"
Expand Down
6 changes: 4 additions & 2 deletions evaluation/miniwob/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ Add the following configurations:
[core]
max_iterations = 100
cache_dir = "/tmp/cache"
sandbox_type = "ssh"
ssh_hostname = "localhost"
sandbox_timeout = 120

[sandbox]
box_type = "ssh"
timeout = 120

# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
Expand Down
6 changes: 4 additions & 2 deletions evaluation/swe_bench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ Add the following configurations:
[core]
max_iterations = 100
cache_dir = "/tmp/cache"
sandbox_type = "ssh"
ssh_hostname = "localhost"
sandbox_timeout = 120

[sandbox]
box_type = "ssh"
timeout = 120

# SWEBench eval specific
use_host_network = false
Expand Down
6 changes: 4 additions & 2 deletions evaluation/webarena/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ Add the following configurations:
[core]
max_iterations = 100
cache_dir = "/tmp/cache"
sandbox_type = "ssh"
ssh_hostname = "localhost"
sandbox_timeout = 120

[sandbox]
box_type = "ssh"
timeout = 120

# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
Expand Down
93 changes: 78 additions & 15 deletions opendevin/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,51 @@ def defaults_to_dict(self) -> dict:
return result


@dataclass
class SandboxConfig(metaclass=Singleton):
"""
Configuration for the sandbox.
Attributes:
box_type: The type of sandbox to use. Options are: ssh, e2b, local.
container_image: The container image to use for the sandbox.
user_id: The user ID for the sandbox.
timeout: The timeout for the sandbox.
"""

box_type: str = 'ssh'
container_image: str = 'ghcr.io/opendevin/sandbox' + (
f':{os.getenv("OPEN_DEVIN_BUILD_VERSION")}'
if os.getenv('OPEN_DEVIN_BUILD_VERSION')
else ':main'
)
user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
timeout: int = 120

def defaults_to_dict(self) -> dict:
"""
Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.
"""
dict = {}
for f in fields(self):
dict[f.name] = get_field_info(f)
return dict

def __str__(self):
attr_str = []
for f in fields(self):
attr_name = f.name
attr_value = getattr(self, f.name)

attr_str.append(f'{attr_name}={repr(attr_value)}')

return f"SandboxConfig({', '.join(attr_str)})"

def __repr__(self):
return self.__str__()


class UndefinedString(str, Enum):
UNDEFINED = 'UNDEFINED'

Expand All @@ -137,6 +182,7 @@ class AppConfig(metaclass=Singleton):
Attributes:
llm: The LLM configuration.
agent: The agent configuration.
sandbox: The sandbox configuration.
runtime: The runtime environment.
file_store: The file store to use.
file_store_path: The path to the file store.
Expand All @@ -145,17 +191,14 @@ class AppConfig(metaclass=Singleton):
workspace_mount_path_in_sandbox: The path to mount the workspace in the sandbox. Defaults to /workspace.
workspace_mount_rewrite: The path to rewrite the workspace mount path to.
cache_dir: The path to the cache directory. Defaults to /tmp/cache.
sandbox_container_image: The container image to use for the sandbox.
run_as_devin: Whether to run as devin.
max_iterations: The maximum number of iterations.
max_budget_per_task: The maximum budget allowed per task, beyond which the agent will stop.
e2b_api_key: The E2B API key.
sandbox_type: The type of sandbox to use. Options are: ssh, exec, e2b, local.
use_host_network: Whether to use the host network.
ssh_hostname: The SSH hostname.
disable_color: Whether to disable color. For terminals that don't support color.
sandbox_user_id: The user ID for the sandbox.
sandbox_timeout: The timeout for the sandbox.
initialize_plugins: Whether to initialize plugins.
debug: Whether to enable debugging.
enable_auto_lint: Whether to enable auto linting. This is False by default, for regular runs of the app. For evaluation, please set this to True.
enable_cli_session: Whether to enable saving and restoring the session when run from CLI.
Expand All @@ -166,6 +209,7 @@ class AppConfig(metaclass=Singleton):

llm: LLMConfig = field(default_factory=LLMConfig)
agent: AgentConfig = field(default_factory=AgentConfig)
sandbox: SandboxConfig = field(default_factory=SandboxConfig)
runtime: str = 'server'
file_store: str = 'memory'
file_store_path: str = '/tmp/file_store'
Expand All @@ -176,21 +220,13 @@ class AppConfig(metaclass=Singleton):
workspace_mount_path_in_sandbox: str = '/workspace'
workspace_mount_rewrite: str | None = None
cache_dir: str = '/tmp/cache'
sandbox_container_image: str = 'ghcr.io/opendevin/sandbox' + (
f':{os.getenv("OPEN_DEVIN_BUILD_VERSION")}'
if os.getenv('OPEN_DEVIN_BUILD_VERSION')
else ':main'
)
run_as_devin: bool = True
max_iterations: int = 100
max_budget_per_task: float | None = None
e2b_api_key: str = ''
sandbox_type: str = 'ssh' # Can be 'ssh', 'exec', or 'e2b'
use_host_network: bool = False
ssh_hostname: str = 'localhost'
disable_color: bool = False
sandbox_user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
sandbox_timeout: int = 120
initialize_plugins: bool = True
persist_sandbox: bool = False
ssh_port: int = 63710
Expand Down Expand Up @@ -287,7 +323,7 @@ def get_field_info(f):

def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, str]):
"""Reads the env-style vars and sets config attributes based on env vars or a config.toml dict.
Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED and others.
Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED, SANDBOX_TIMEOUT and others.
Args:
cfg: The AppConfig object to set attributes on.
Expand Down Expand Up @@ -335,6 +371,9 @@ def set_attr_from_env(sub_config: Any, prefix=''):
f'Error setting env var {env_var_name}={value}: check that the value is of the right type'
)

if 'SANDBOX_TYPE' in env_or_toml_dict:
logger.error('SANDBOX_TYPE is deprecated. Please use SANDBOX_BOX_TYPE instead.')
env_or_toml_dict['SANDBOX_BOX_TYPE'] = env_or_toml_dict.pop('SANDBOX_TYPE')
# Start processing from the root of the config object
set_attr_from_env(cfg)

Expand Down Expand Up @@ -380,8 +419,32 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
if 'agent' in toml_config:
agent_config = AgentConfig(**toml_config['agent'])

# set sandbox config from the toml file
sandbox_config = config.sandbox

# migrate old sandbox configs from [core] section to sandbox config
keys_to_migrate = [key for key in core_config if key.startswith('sandbox_')]
for key in keys_to_migrate:
new_key = key.replace('sandbox_', '')
if new_key == 'type':
new_key = 'box_type'
if new_key in sandbox_config.__annotations__:
# read the key in sandbox and remove it from core
setattr(sandbox_config, new_key, core_config.pop(key))
else:
logger.warning(f'Unknown sandbox config: {key}')

# the new style values override the old style values
if 'sandbox' in toml_config:
sandbox_config = SandboxConfig(**toml_config['sandbox'])

# update the config object with the new values
AppConfig(llm=llm_config, agent=agent_config, **core_config)
AppConfig(
llm=llm_config,
agent=agent_config,
sandbox=sandbox_config,
**core_config,
)
except (TypeError, KeyError) as e:
logger.warning(
f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
Expand All @@ -400,7 +463,7 @@ def finalize_config(cfg: AppConfig):
cfg.workspace_base = os.path.abspath(cfg.workspace_base)

# In local there is no sandbox, the workspace will have the same pwd as the host
if cfg.sandbox_type == 'local':
if cfg.sandbox.box_type == 'local':
cfg.workspace_mount_path_in_sandbox = cfg.workspace_mount_path

if cfg.workspace_mount_rewrite: # and not config.workspace_mount_path:
Expand Down
2 changes: 1 addition & 1 deletion opendevin/core/schema/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class ConfigType(str, Enum):
MAX_ITERATIONS = 'MAX_ITERATIONS'
AGENT = 'AGENT'
E2B_API_KEY = 'E2B_API_KEY'
SANDBOX_TYPE = 'SANDBOX_TYPE'
SANDBOX_BOX_TYPE = 'SANDBOX_BOX_TYPE'
SANDBOX_USER_ID = 'SANDBOX_USER_ID'
SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
USE_HOST_NETWORK = 'USE_HOST_NETWORK'
Expand Down
2 changes: 1 addition & 1 deletion opendevin/runtime/docker/local_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@


class LocalBox(Sandbox):
def __init__(self, timeout: int = config.sandbox_timeout):
def __init__(self, timeout: int = config.sandbox.timeout):
os.makedirs(config.workspace_base, exist_ok=True)
self.timeout = timeout
self.background_commands: dict[int, Process] = {}
Expand Down
Loading

0 comments on commit 143f38d

Please sign in to comment.