diff --git a/.github/workflows/ghcr.yml b/.github/workflows/ghcr.yml index 71e5c4f41954..852438e27485 100644 --- a/.github/workflows/ghcr.yml +++ b/.github/workflows/ghcr.yml @@ -156,7 +156,7 @@ jobs: - name: Load sandbox image and run integration tests env: - SANDBOX_TYPE: ${{ matrix.sandbox }} + SANDBOX_BOX_TYPE: ${{ matrix.sandbox }} run: | # Load the Docker image and capture the output output=$(docker load -i /tmp/sandbox_image_amd64.tar) diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml index dca0394f0875..1a9aeccc391c 100644 --- a/.github/workflows/review-pr.yml +++ b/.github/workflows/review-pr.yml @@ -55,7 +55,7 @@ jobs: env: LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - SANDBOX_TYPE: ssh + SANDBOX_BOX_TYPE: ssh run: | # Append path to launch poetry export PATH="/github/home/.local/bin:$PATH" diff --git a/.github/workflows/solve-issue.yml b/.github/workflows/solve-issue.yml index 9f1985a36db3..df965b95542e 100644 --- a/.github/workflows/solve-issue.yml +++ b/.github/workflows/solve-issue.yml @@ -50,7 +50,7 @@ jobs: ISSUE_BODY: ${{ github.event.issue.body }} LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - SANDBOX_TYPE: ssh + SANDBOX_BOX_TYPE: ssh run: | # Append path to launch poetry export PATH="/github/home/.local/bin:$PATH" diff --git a/agenthub/micro/commit_writer/README.md b/agenthub/micro/commit_writer/README.md index ab6c5bbe7057..927bc67da286 100644 --- a/agenthub/micro/commit_writer/README.md +++ b/agenthub/micro/commit_writer/README.md @@ -3,7 +3,7 @@ CommitWriterAgent can help write git commit message. Example: ```bash -WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_TYPE="ssh" \ +WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_BOX_TYPE="ssh" \ poetry run python opendevin/core/main.py -t "dummy task" -c CommitWriterAgent -d ./ ``` diff --git a/agenthub/monologue_agent/utils/prompts.py b/agenthub/monologue_agent/utils/prompts.py index b5cb1a3f0038..49de7c821116 100644 --- a/agenthub/monologue_agent/utils/prompts.py +++ b/agenthub/monologue_agent/utils/prompts.py @@ -1,13 +1,13 @@ from opendevin.core.config import config from opendevin.core.utils import json -from opendevin.events.observation import ( - CmdOutputObservation, -) from opendevin.events.action import ( Action, ) - +from opendevin.events.observation import ( + CmdOutputObservation, +) from opendevin.events.serialization.action import action_from_dict + ACTION_PROMPT = """ You're a thoughtful robot. Your main task is this: %(task)s @@ -206,7 +206,7 @@ def get_request_action_prompt( 'background_commands': bg_commands_message, 'hint': hint, 'user': user, - 'timeout': config.sandbox_timeout, + 'timeout': config.sandbox.timeout, 'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.workspace_mount_path_in_sandbox, } @@ -242,4 +242,4 @@ def parse_summary_response(response: str) -> list[dict]: - list[dict]: The list of summaries output by the model """ parsed = json.loads(response) - return parsed['new_monologue'] \ No newline at end of file + return parsed['new_monologue'] diff --git a/config.template.toml b/config.template.toml index 9c0c6882ba90..fb2d3aabd8cc 100644 --- a/config.template.toml +++ b/config.template.toml @@ -19,9 +19,6 @@ workspace_base = "./workspace" # Cache directory path #cache_dir = "/tmp/cache" -# Container image to use for the sandbox -#sandbox_container_image = "ghcr.io/opendevin/sandbox:main" - # Debugging enabled #debug = false @@ -79,15 +76,6 @@ persist_sandbox = false # SSH port for the sandbox #ssh_port = 63710 -# Sandbox timeout in seconds -#sandbox_timeout = 120 - -# Sandbox type (ssh, exec, e2b, local) -#sandbox_type = "ssh" - -# Sandbox user ID -#sandbox_user_id = 1000 - # Use host network #use_host_network = false @@ -174,7 +162,23 @@ model = "gpt-4o" # Name of the agent #name = "CodeActAgent" +#################################### Sandbox ################################### +# Configuration for the sandbox +############################################################################## +[sandbox] +# Sandbox timeout in seconds +#timeout = 120 + +# Sandbox type (ssh, e2b, local) +#box_type = "ssh" + +# Sandbox user ID +#user_id = 1000 + +# Container image to use for the sandbox +#container_image = "ghcr.io/opendevin/sandbox:main" + #################################### Eval #################################### # Configuration for the evaluation, please refer to the specific evaluation # plugin for the available options -############################################################################## \ No newline at end of file +############################################################################## diff --git a/evaluation/TUTORIAL.md b/evaluation/TUTORIAL.md index 404fcdd7a119..b29ae7a43b9a 100644 --- a/evaluation/TUTORIAL.md +++ b/evaluation/TUTORIAL.md @@ -31,8 +31,6 @@ workspace_base = "/path/to/your/workspace" workspace_mount_path = "/path/to/your/workspace" # ========================== -sandbox_type = "ssh" -sandbox_timeout = 120 ssh_hostname = "localhost" # SWEBench eval specific - but you can tweak it to your needs @@ -41,6 +39,10 @@ run_as_devin = false # linting python after editing helps LLM fix indentations enable_auto_lint = true +[sandbox] +box_type = "ssh" +timeout = 120 + [llm] # IMPORTANT: add your API key here, and set the model to the one you want to evaluate model = "gpt-4o-2024-05-13" diff --git a/evaluation/agent_bench/README.md b/evaluation/agent_bench/README.md index 9c937588080c..1d69cf6a19ce 100644 --- a/evaluation/agent_bench/README.md +++ b/evaluation/agent_bench/README.md @@ -18,8 +18,6 @@ cache_dir = "/path/to/cache" workspace_base = "/path/to/workspace" workspace_mount_path = "/path/to/workspace" -sandbox_type = "ssh" -sandbox_timeout = 120 ssh_hostname = "localhost" use_host_network = false @@ -27,6 +25,10 @@ use_host_network = false run_as_devin = true enable_auto_lint = true +[sandbox] +box_type = "ssh" +timeout = 120 + [eval_gpt35_turbo] model = "gpt-3.5-turbo" api_key = "sk-123" diff --git a/evaluation/miniwob/README.md b/evaluation/miniwob/README.md index 2e48de72067d..58c284051697 100644 --- a/evaluation/miniwob/README.md +++ b/evaluation/miniwob/README.md @@ -16,9 +16,11 @@ Add the following configurations: [core] max_iterations = 100 cache_dir = "/tmp/cache" -sandbox_type = "ssh" ssh_hostname = "localhost" -sandbox_timeout = 120 + +[sandbox] +box_type = "ssh" +timeout = 120 # TODO: Change these to the model you want to evaluate [eval_gpt4_1106_preview] diff --git a/evaluation/swe_bench/README.md b/evaluation/swe_bench/README.md index 59f7f9e911d6..78f87a39ccc9 100644 --- a/evaluation/swe_bench/README.md +++ b/evaluation/swe_bench/README.md @@ -44,9 +44,11 @@ Add the following configurations: [core] max_iterations = 100 cache_dir = "/tmp/cache" -sandbox_type = "ssh" ssh_hostname = "localhost" -sandbox_timeout = 120 + +[sandbox] +box_type = "ssh" +timeout = 120 # SWEBench eval specific use_host_network = false diff --git a/evaluation/webarena/README.md b/evaluation/webarena/README.md index 6b6c29d31732..cb720a2ffc9c 100644 --- a/evaluation/webarena/README.md +++ b/evaluation/webarena/README.md @@ -16,9 +16,11 @@ Add the following configurations: [core] max_iterations = 100 cache_dir = "/tmp/cache" -sandbox_type = "ssh" ssh_hostname = "localhost" -sandbox_timeout = 120 + +[sandbox] +box_type = "ssh" +timeout = 120 # TODO: Change these to the model you want to evaluate [eval_gpt4_1106_preview] diff --git a/opendevin/core/config.py b/opendevin/core/config.py index aad3709c34dd..9c8bd5fb01ca 100644 --- a/opendevin/core/config.py +++ b/opendevin/core/config.py @@ -125,6 +125,51 @@ def defaults_to_dict(self) -> dict: return result +@dataclass +class SandboxConfig(metaclass=Singleton): + """ + Configuration for the sandbox. + + Attributes: + box_type: The type of sandbox to use. Options are: ssh, e2b, local. + container_image: The container image to use for the sandbox. + user_id: The user ID for the sandbox. + timeout: The timeout for the sandbox. + + """ + + box_type: str = 'ssh' + container_image: str = 'ghcr.io/opendevin/sandbox' + ( + f':{os.getenv("OPEN_DEVIN_BUILD_VERSION")}' + if os.getenv('OPEN_DEVIN_BUILD_VERSION') + else ':main' + ) + user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000 + timeout: int = 120 + + def defaults_to_dict(self) -> dict: + """ + Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional. + """ + dict = {} + for f in fields(self): + dict[f.name] = get_field_info(f) + return dict + + def __str__(self): + attr_str = [] + for f in fields(self): + attr_name = f.name + attr_value = getattr(self, f.name) + + attr_str.append(f'{attr_name}={repr(attr_value)}') + + return f"SandboxConfig({', '.join(attr_str)})" + + def __repr__(self): + return self.__str__() + + class UndefinedString(str, Enum): UNDEFINED = 'UNDEFINED' @@ -137,6 +182,7 @@ class AppConfig(metaclass=Singleton): Attributes: llm: The LLM configuration. agent: The agent configuration. + sandbox: The sandbox configuration. runtime: The runtime environment. file_store: The file store to use. file_store_path: The path to the file store. @@ -145,17 +191,14 @@ class AppConfig(metaclass=Singleton): workspace_mount_path_in_sandbox: The path to mount the workspace in the sandbox. Defaults to /workspace. workspace_mount_rewrite: The path to rewrite the workspace mount path to. cache_dir: The path to the cache directory. Defaults to /tmp/cache. - sandbox_container_image: The container image to use for the sandbox. run_as_devin: Whether to run as devin. max_iterations: The maximum number of iterations. max_budget_per_task: The maximum budget allowed per task, beyond which the agent will stop. e2b_api_key: The E2B API key. - sandbox_type: The type of sandbox to use. Options are: ssh, exec, e2b, local. use_host_network: Whether to use the host network. ssh_hostname: The SSH hostname. disable_color: Whether to disable color. For terminals that don't support color. - sandbox_user_id: The user ID for the sandbox. - sandbox_timeout: The timeout for the sandbox. + initialize_plugins: Whether to initialize plugins. debug: Whether to enable debugging. enable_auto_lint: Whether to enable auto linting. This is False by default, for regular runs of the app. For evaluation, please set this to True. enable_cli_session: Whether to enable saving and restoring the session when run from CLI. @@ -166,6 +209,7 @@ class AppConfig(metaclass=Singleton): llm: LLMConfig = field(default_factory=LLMConfig) agent: AgentConfig = field(default_factory=AgentConfig) + sandbox: SandboxConfig = field(default_factory=SandboxConfig) runtime: str = 'server' file_store: str = 'memory' file_store_path: str = '/tmp/file_store' @@ -176,21 +220,13 @@ class AppConfig(metaclass=Singleton): workspace_mount_path_in_sandbox: str = '/workspace' workspace_mount_rewrite: str | None = None cache_dir: str = '/tmp/cache' - sandbox_container_image: str = 'ghcr.io/opendevin/sandbox' + ( - f':{os.getenv("OPEN_DEVIN_BUILD_VERSION")}' - if os.getenv('OPEN_DEVIN_BUILD_VERSION') - else ':main' - ) run_as_devin: bool = True max_iterations: int = 100 max_budget_per_task: float | None = None e2b_api_key: str = '' - sandbox_type: str = 'ssh' # Can be 'ssh', 'exec', or 'e2b' use_host_network: bool = False ssh_hostname: str = 'localhost' disable_color: bool = False - sandbox_user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000 - sandbox_timeout: int = 120 initialize_plugins: bool = True persist_sandbox: bool = False ssh_port: int = 63710 @@ -287,7 +323,7 @@ def get_field_info(f): def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, str]): """Reads the env-style vars and sets config attributes based on env vars or a config.toml dict. - Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED and others. + Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED, SANDBOX_TIMEOUT and others. Args: cfg: The AppConfig object to set attributes on. @@ -335,6 +371,9 @@ def set_attr_from_env(sub_config: Any, prefix=''): f'Error setting env var {env_var_name}={value}: check that the value is of the right type' ) + if 'SANDBOX_TYPE' in env_or_toml_dict: + logger.error('SANDBOX_TYPE is deprecated. Please use SANDBOX_BOX_TYPE instead.') + env_or_toml_dict['SANDBOX_BOX_TYPE'] = env_or_toml_dict.pop('SANDBOX_TYPE') # Start processing from the root of the config object set_attr_from_env(cfg) @@ -380,8 +419,32 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): if 'agent' in toml_config: agent_config = AgentConfig(**toml_config['agent']) + # set sandbox config from the toml file + sandbox_config = config.sandbox + + # migrate old sandbox configs from [core] section to sandbox config + keys_to_migrate = [key for key in core_config if key.startswith('sandbox_')] + for key in keys_to_migrate: + new_key = key.replace('sandbox_', '') + if new_key == 'type': + new_key = 'box_type' + if new_key in sandbox_config.__annotations__: + # read the key in sandbox and remove it from core + setattr(sandbox_config, new_key, core_config.pop(key)) + else: + logger.warning(f'Unknown sandbox config: {key}') + + # the new style values override the old style values + if 'sandbox' in toml_config: + sandbox_config = SandboxConfig(**toml_config['sandbox']) + # update the config object with the new values - AppConfig(llm=llm_config, agent=agent_config, **core_config) + AppConfig( + llm=llm_config, + agent=agent_config, + sandbox=sandbox_config, + **core_config, + ) except (TypeError, KeyError) as e: logger.warning( f'Cannot parse config from toml, toml values have not been applied.\nError: {e}', @@ -400,7 +463,7 @@ def finalize_config(cfg: AppConfig): cfg.workspace_base = os.path.abspath(cfg.workspace_base) # In local there is no sandbox, the workspace will have the same pwd as the host - if cfg.sandbox_type == 'local': + if cfg.sandbox.box_type == 'local': cfg.workspace_mount_path_in_sandbox = cfg.workspace_mount_path if cfg.workspace_mount_rewrite: # and not config.workspace_mount_path: diff --git a/opendevin/core/schema/config.py b/opendevin/core/schema/config.py index 4d210f49f704..b6bb71bdccba 100644 --- a/opendevin/core/schema/config.py +++ b/opendevin/core/schema/config.py @@ -34,7 +34,7 @@ class ConfigType(str, Enum): MAX_ITERATIONS = 'MAX_ITERATIONS' AGENT = 'AGENT' E2B_API_KEY = 'E2B_API_KEY' - SANDBOX_TYPE = 'SANDBOX_TYPE' + SANDBOX_BOX_TYPE = 'SANDBOX_BOX_TYPE' SANDBOX_USER_ID = 'SANDBOX_USER_ID' SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT' USE_HOST_NETWORK = 'USE_HOST_NETWORK' diff --git a/opendevin/runtime/docker/local_box.py b/opendevin/runtime/docker/local_box.py index 81c4f138e4ca..fe32ea7310ad 100644 --- a/opendevin/runtime/docker/local_box.py +++ b/opendevin/runtime/docker/local_box.py @@ -26,7 +26,7 @@ class LocalBox(Sandbox): - def __init__(self, timeout: int = config.sandbox_timeout): + def __init__(self, timeout: int = config.sandbox.timeout): os.makedirs(config.workspace_base, exist_ok=True) self.timeout = timeout self.background_commands: dict[int, Process] = {} diff --git a/opendevin/runtime/docker/ssh_box.py b/opendevin/runtime/docker/ssh_box.py index 902ba1cc43ee..e8bcbec6a77a 100644 --- a/opendevin/runtime/docker/ssh_box.py +++ b/opendevin/runtime/docker/ssh_box.py @@ -209,7 +209,7 @@ class DockerSSHBox(Sandbox): def __init__( self, container_image: str | None = None, - timeout: int = config.sandbox_timeout, + timeout: int = config.sandbox.timeout, sid: str | None = None, ): logger.info( @@ -235,7 +235,7 @@ def __init__( self.instance_id = (sid or '') + str(uuid.uuid4()) self.timeout = timeout - self.container_image = container_image or config.sandbox_container_image + self.container_image = container_image or config.sandbox.container_image self.container_image = get_od_sandbox_image( self.container_image, self.docker_client ) @@ -661,11 +661,7 @@ def get_working_directory(self): @property def user_id(self): - return config.sandbox_user_id - - @property - def sandbox_user_id(self): - return config.sandbox_user_id + return config.sandbox.user_id @property def run_as_devin(self): diff --git a/opendevin/runtime/e2b/sandbox.py b/opendevin/runtime/e2b/sandbox.py index a4225a264313..5bee8cd0bb0f 100644 --- a/opendevin/runtime/e2b/sandbox.py +++ b/opendevin/runtime/e2b/sandbox.py @@ -24,7 +24,7 @@ class E2BBox(Sandbox): def __init__( self, template: str = 'open-devin', - timeout: int = config.sandbox_timeout, + timeout: int = config.sandbox.timeout, ): self.sandbox = E2BSandbox( api_key=config.e2b_api_key, diff --git a/opendevin/runtime/runtime.py b/opendevin/runtime/runtime.py index 87ca12334523..ca725be517c8 100644 --- a/opendevin/runtime/runtime.py +++ b/opendevin/runtime/runtime.py @@ -37,15 +37,15 @@ from opendevin.storage import FileStore, InMemoryFileStore -def create_sandbox(sid: str = 'default', sandbox_type: str = 'ssh') -> Sandbox: - if sandbox_type == 'local': +def create_sandbox(sid: str = 'default', box_type: str = 'ssh') -> Sandbox: + if box_type == 'local': return LocalBox() - elif sandbox_type == 'ssh': + elif box_type == 'ssh': return DockerSSHBox(sid=sid) - elif sandbox_type == 'e2b': + elif box_type == 'e2b': return E2BBox() else: - raise ValueError(f'Invalid sandbox type: {sandbox_type}') + raise ValueError(f'Invalid sandbox type: {box_type}') class Runtime: @@ -67,7 +67,7 @@ def __init__( ): self.sid = sid if sandbox is None: - self.sandbox = create_sandbox(sid, config.sandbox_type) + self.sandbox = create_sandbox(sid, config.sandbox.box_type) self._is_external_sandbox = False else: self.sandbox = sandbox diff --git a/tests/integration/regenerate.sh b/tests/integration/regenerate.sh index 434ccd2f5345..90ded6f4402e 100755 --- a/tests/integration/regenerate.sh +++ b/tests/integration/regenerate.sh @@ -25,7 +25,7 @@ echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX" mkdir -p $WORKSPACE_BASE # use environmental variable if exists, otherwise use "ssh" -SANDBOX_TYPE="${SANDBOX_TYPE:-ssh}" +SANDBOX_BOX_TYPE="${SANDBOX_TYPE:-ssh}" # TODO: we should also test PERSIST_SANDBOX = true, once it's fixed PERSIST_SANDBOX=false MAX_ITERATIONS=10 @@ -64,7 +64,7 @@ run_test() { pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append" fi - SANDBOX_TYPE=$SANDBOX_TYPE \ + SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \ PERSIST_SANDBOX=$PERSIST_SANDBOX \ WORKSPACE_BASE=$WORKSPACE_BASE \ WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \ @@ -133,7 +133,7 @@ trap cleanup EXIT regenerate_without_llm() { # set -x to print the command being executed set -x - SANDBOX_TYPE=$SANDBOX_TYPE \ + SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \ PERSIST_SANDBOX=$PERSIST_SANDBOX \ WORKSPACE_BASE=$WORKSPACE_BASE \ WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \ @@ -162,7 +162,7 @@ regenerate_with_llm() { set -x echo -e "/exit\n" | \ DEBUG=true \ - SANDBOX_TYPE=$SANDBOX_TYPE \ + SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \ PERSIST_SANDBOX=$PERSIST_SANDBOX \ WORKSPACE_BASE=$WORKSPACE_BASE \ WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \ diff --git a/tests/integration/test_agent.py b/tests/integration/test_agent.py index 05441977cf81..b2ef740cc4fa 100644 --- a/tests/integration/test_agent.py +++ b/tests/integration/test_agent.py @@ -32,7 +32,7 @@ ) @pytest.mark.skipif( (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') - and os.getenv('SANDBOX_TYPE', '').lower() != 'ssh', + and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh', reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful', ) @pytest.mark.skipif( @@ -71,7 +71,7 @@ def test_write_simple_script(): ) @pytest.mark.skipif( (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') - and os.getenv('SANDBOX_TYPE', '').lower() != 'ssh', + and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh', reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful', ) @pytest.mark.skipif( @@ -79,7 +79,7 @@ def test_write_simple_script(): reason='We only keep basic tests for MonologueAgent and PlannerAgent', ) @pytest.mark.skipif( - os.getenv('SANDBOX_TYPE') == 'local', + os.getenv('SANDBOX_BOX_TYPE') == 'local', reason='local sandbox shows environment-dependent absolute path for pwd command', ) def test_edits(): @@ -120,7 +120,7 @@ def test_edits(): reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default', ) @pytest.mark.skipif( - os.getenv('SANDBOX_TYPE') != 'ssh', + os.getenv('SANDBOX_BOX_TYPE') != 'ssh', reason='Currently, only ssh sandbox supports stateful tasks', ) def test_ipython(): @@ -154,7 +154,7 @@ def test_ipython(): reason='Currently, only ManagerAgent supports task rejection', ) @pytest.mark.skipif( - os.getenv('SANDBOX_TYPE') == 'local', + os.getenv('SANDBOX_BOX_TYPE') == 'local', reason='FIXME: local sandbox does not capture stderr', ) def test_simple_task_rejection(): @@ -177,7 +177,7 @@ def test_simple_task_rejection(): reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default', ) @pytest.mark.skipif( - os.getenv('SANDBOX_TYPE') != 'ssh', + os.getenv('SANDBOX_BOX_TYPE') != 'ssh', reason='Currently, only ssh sandbox supports stateful tasks', ) def test_ipython_module(): @@ -213,7 +213,7 @@ def test_ipython_module(): ) @pytest.mark.skipif( (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') - and os.getenv('SANDBOX_TYPE', '').lower() != 'ssh', + and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh', reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful', ) def test_browse_internet(http_server): diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 9ab65cf2da8b..411907e3ab90 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -51,6 +51,8 @@ def test_compat_env_to_config(monkeypatch, setup_env): monkeypatch.setenv('AGENT_MEMORY_MAX_THREADS', '4') monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True') monkeypatch.setenv('AGENT', 'CodeActAgent') + monkeypatch.setenv('SANDBOX_TYPE', 'local') + monkeypatch.setenv('SANDBOX_TIMEOUT', '10') config = AppConfig() load_from_env(config, os.environ) @@ -62,6 +64,10 @@ def test_compat_env_to_config(monkeypatch, setup_env): assert isinstance(config.agent, AgentConfig) assert isinstance(config.agent.memory_max_threads, int) assert config.agent.memory_max_threads == 4 + assert config.agent.memory_enabled is True + assert config.agent.name == 'CodeActAgent' + assert config.sandbox.box_type == 'local' + assert config.sandbox.timeout == 10 def test_load_from_old_style_env(monkeypatch, default_config): @@ -70,6 +76,7 @@ def test_load_from_old_style_env(monkeypatch, default_config): monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True') monkeypatch.setenv('AGENT_NAME', 'PlannerAgent') monkeypatch.setenv('WORKSPACE_BASE', '/opt/files/workspace') + monkeypatch.setenv('SANDBOX_CONTAINER_IMAGE', 'custom_image') load_from_env(default_config, os.environ) @@ -83,12 +90,14 @@ def test_load_from_old_style_env(monkeypatch, default_config): assert ( default_config.workspace_mount_path_in_sandbox is not UndefinedString.UNDEFINED ) + assert default_config.sandbox.container_image == 'custom_image' def test_load_from_new_style_toml(default_config, temp_toml_file): # Test loading configuration from a new-style TOML file with open(temp_toml_file, 'w', encoding='utf-8') as toml_file: - toml_file.write(""" + toml_file.write( + """ [llm] model = "test-model" api_key = "toml-api-key" @@ -97,9 +106,14 @@ def test_load_from_new_style_toml(default_config, temp_toml_file): name = "TestAgent" memory_enabled = true +[sandbox] +timeout = 1 + [core] workspace_base = "/opt/files2/workspace" -""") +sandbox_type = "local" +""" + ) load_from_toml(default_config, temp_toml_file) @@ -108,6 +122,11 @@ def test_load_from_new_style_toml(default_config, temp_toml_file): assert default_config.agent.name == 'TestAgent' assert default_config.agent.memory_enabled is True assert default_config.workspace_base == '/opt/files2/workspace' + assert default_config.sandbox.box_type == 'local' + assert default_config.sandbox.timeout == 1 + + # default config doesn't have a field sandbox_type + assert not hasattr(default_config, 'sandbox_type') # before finalize_config, workspace_mount_path is UndefinedString.UNDEFINED if it was not set assert default_config.workspace_mount_path is UndefinedString.UNDEFINED @@ -123,8 +142,56 @@ def test_load_from_new_style_toml(default_config, temp_toml_file): assert default_config.workspace_mount_path == '/opt/files2/workspace' -def test_env_overrides_toml(monkeypatch, default_config, temp_toml_file): - # Test that environment variables override TOML values using monkeypatch +def test_compat_load_sandbox_from_toml(default_config, temp_toml_file): + # test loading configuration from a new-style TOML file + # uses a toml file with sandbox_vars instead of a sandbox section + with open(temp_toml_file, 'w', encoding='utf-8') as toml_file: + toml_file.write( + """ +[llm] +model = "test-model" + +[agent] +name = "TestAgent" +memory_enabled = true + +[core] +workspace_base = "/opt/files2/workspace" +sandbox_type = "local" +sandbox_timeout = 500 +sandbox_container_image = "node:14" +sandbox_user_id = 1001 +""" + ) + + load_from_toml(default_config, temp_toml_file) + + assert default_config.llm.model == 'test-model' + assert default_config.agent.name == 'TestAgent' + assert default_config.agent.memory_enabled is True + assert default_config.workspace_base == '/opt/files2/workspace' + assert default_config.sandbox.box_type == 'local' + assert default_config.sandbox.timeout == 500 + assert default_config.sandbox.container_image == 'node:14' + assert default_config.sandbox.user_id == 1001 + assert default_config.workspace_mount_path_in_sandbox == '/workspace' + + finalize_config(default_config) + + # app config doesn't have fields sandbox_* + assert not hasattr(default_config, 'sandbox_type') + assert not hasattr(default_config, 'sandbox_timeout') + assert not hasattr(default_config, 'sandbox_container_image') + assert not hasattr(default_config, 'sandbox_user_id') + + # after finalize_config, workspace_mount_path is set to the absolute path of workspace_base + # if it was undefined + assert default_config.workspace_mount_path == '/opt/files2/workspace' + + +def test_env_overrides_compat_toml(monkeypatch, default_config, temp_toml_file): + # test that environment variables override TOML values using monkeypatch + # uses a toml file with sandbox_vars instead of a sandbox section with open(temp_toml_file, 'w', encoding='utf-8') as toml_file: toml_file.write(""" [llm] @@ -135,11 +202,15 @@ def test_env_overrides_toml(monkeypatch, default_config, temp_toml_file): workspace_base = "/opt/files3/workspace" sandbox_type = "local" disable_color = true +sandbox_timeout = 500 +sandbox_user_id = 1001 """) monkeypatch.setenv('LLM_API_KEY', 'env-api-key') monkeypatch.setenv('WORKSPACE_BASE', 'UNDEFINED') - monkeypatch.setenv('SANDBOX_TYPE', 'ssh') + monkeypatch.setenv('SANDBOX_TYPE', 'e2b') + monkeypatch.setenv('SANDBOX_TIMEOUT', '1000') + monkeypatch.setenv('SANDBOX_USER_ID', '1002') load_from_toml(default_config, temp_toml_file) @@ -160,20 +231,106 @@ def test_env_overrides_toml(monkeypatch, default_config, temp_toml_file): assert default_config.workspace_mount_path is UndefinedString.UNDEFINED assert default_config.workspace_mount_path == 'UNDEFINED' - assert default_config.sandbox_type == 'ssh' + assert default_config.sandbox.box_type == 'e2b' assert default_config.disable_color is True + assert default_config.sandbox.timeout == 1000 + assert default_config.sandbox.user_id == 1002 finalize_config(default_config) # after finalize_config, workspace_mount_path is set to absolute path of workspace_base if it was undefined assert default_config.workspace_mount_path == os.getcwd() + '/UNDEFINED' +def test_env_overrides_sandbox_toml(monkeypatch, default_config, temp_toml_file): + # test that environment variables override TOML values using monkeypatch + # uses a toml file with a sandbox section + with open(temp_toml_file, 'w', encoding='utf-8') as toml_file: + toml_file.write(""" +[llm] +model = "test-model" +api_key = "toml-api-key" + +[core] +workspace_base = "/opt/files3/workspace" + +[sandbox] +box_type = "e2b" +timeout = 500 +user_id = 1001 +""") + + monkeypatch.setenv('LLM_API_KEY', 'env-api-key') + monkeypatch.setenv('WORKSPACE_BASE', 'UNDEFINED') + monkeypatch.setenv('SANDBOX_TYPE', 'local') + monkeypatch.setenv('SANDBOX_TIMEOUT', '1000') + monkeypatch.setenv('SANDBOX_USER_ID', '1002') + + load_from_toml(default_config, temp_toml_file) + + # before finalize_config, workspace_mount_path is UndefinedString.UNDEFINED if it was not set + assert default_config.workspace_mount_path is UndefinedString.UNDEFINED + + # before load_from_env, values are set to the values from the toml file + assert default_config.llm.api_key == 'toml-api-key' + assert default_config.sandbox.box_type == 'e2b' + assert default_config.sandbox.timeout == 500 + assert default_config.sandbox.user_id == 1001 + + load_from_env(default_config, os.environ) + + # values from env override values from toml + assert os.environ.get('LLM_MODEL') is None + assert default_config.llm.model == 'test-model' + assert default_config.llm.api_key == 'env-api-key' + + assert default_config.sandbox.box_type == 'local' + assert default_config.sandbox.timeout == 1000 + assert default_config.sandbox.user_id == 1002 + + finalize_config(default_config) + # after finalize_config, workspace_mount_path is set to absolute path of workspace_base if it was undefined + assert default_config.workspace_mount_path == os.getcwd() + '/UNDEFINED' + + +def test_sandbox_config_from_toml(default_config, temp_toml_file): + # Test loading configuration from a new-style TOML file + with open(temp_toml_file, 'w', encoding='utf-8') as toml_file: + toml_file.write( + """ +[core] +workspace_base = "/opt/files/workspace" + +[llm] +model = "test-model" + +[sandbox] +box_type = "local" +timeout = 1 +container_image = "custom_image" +user_id = 1001 +""" + ) + + load_from_toml(default_config, temp_toml_file) + load_from_env(default_config, os.environ) + finalize_config(default_config) + + assert default_config.llm.model == 'test-model' + assert default_config.sandbox.box_type == 'local' + assert default_config.sandbox.timeout == 1 + assert default_config.sandbox.container_image == 'custom_image' + assert default_config.sandbox.user_id == 1001 + + def test_defaults_dict_after_updates(default_config): # Test that `defaults_dict` retains initial values after updates. initial_defaults = default_config.defaults_dict assert ( initial_defaults['workspace_mount_path']['default'] is UndefinedString.UNDEFINED ) + assert initial_defaults['llm']['api_key']['default'] is None + assert initial_defaults['agent']['name']['default'] == 'CodeActAgent' + updated_config = AppConfig() updated_config.llm.api_key = 'updated-api-key' updated_config.agent.name = 'MonologueAgent' @@ -185,6 +342,12 @@ def test_defaults_dict_after_updates(default_config): defaults_after_updates['workspace_mount_path']['default'] is UndefinedString.UNDEFINED ) + assert defaults_after_updates['sandbox']['box_type']['default'] == 'ssh' + assert defaults_after_updates['sandbox']['timeout']['default'] == 120 + assert ( + defaults_after_updates['sandbox']['container_image']['default'] + == 'ghcr.io/opendevin/sandbox:main' + ) assert defaults_after_updates == initial_defaults @@ -210,7 +373,7 @@ def test_invalid_toml_format(monkeypatch, temp_toml_file, default_config): def test_finalize_config(default_config): # Test finalize config assert default_config.workspace_mount_path is UndefinedString.UNDEFINED - default_config.sandbox_type = 'local' + default_config.sandbox.box_type = 'local' finalize_config(default_config) assert ( @@ -233,7 +396,7 @@ def test_workspace_mount_path_default(default_config): def test_workspace_mount_path_in_sandbox_local(default_config): assert default_config.workspace_mount_path_in_sandbox == '/workspace' - default_config.sandbox_type = 'local' + default_config.sandbox.box_type = 'local' finalize_config(default_config) assert ( default_config.workspace_mount_path_in_sandbox diff --git a/tests/unit/test_ipython.py b/tests/unit/test_ipython.py index 24d9b3041a5a..227cc1de8887 100644 --- a/tests/unit/test_ipython.py +++ b/tests/unit/test_ipython.py @@ -79,7 +79,7 @@ def test_sandbox_jupyter_plugin_backticks(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): box = DockerSSHBox() box.init_plugins([JupyterRequirement]) diff --git a/tests/unit/test_sandbox.py b/tests/unit/test_sandbox.py index 47c4808d8ad4..cd38242a6bb6 100644 --- a/tests/unit/test_sandbox.py +++ b/tests/unit/test_sandbox.py @@ -92,7 +92,7 @@ def test_ssh_box_run_as_devin(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): for box in [ DockerSSHBox() @@ -134,7 +134,7 @@ def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): box = DockerSSHBox() exit_code, output = box.execute('pwd && ls -l') @@ -152,7 +152,7 @@ def test_ssh_box_stateful_cmd_run_as_devin(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): box = DockerSSHBox() exit_code, output = box.execute('mkdir test') @@ -178,7 +178,7 @@ def test_ssh_box_failed_cmd_run_as_devin(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): box = DockerSSHBox() exit_code, output = box.execute('non_existing_command') @@ -193,7 +193,7 @@ def test_single_multiline_command(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): box = DockerSSHBox() exit_code, output = box.execute('echo \\\n -e "foo"') @@ -209,7 +209,7 @@ def test_multiline_echo(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): box = DockerSSHBox() exit_code, output = box.execute('echo -e "hello\nworld"') @@ -226,7 +226,7 @@ def test_sandbox_whitespace(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): box = DockerSSHBox() exit_code, output = box.execute('echo -e "\\n\\n\\n"') @@ -242,7 +242,7 @@ def test_sandbox_jupyter_plugin(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new='true'), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ): box = DockerSSHBox() box.init_plugins([JupyterRequirement]) @@ -335,8 +335,8 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir): # get a temporary directory with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir - ), patch.object(config, 'run_as_devin', new=True), patch.object( - config, 'sandbox_type', new='ssh' + ), patch.object(config, 'run_as_devin', new='true'), patch.object( + config.sandbox, 'box_type', new='ssh' ), patch.object(config, 'enable_auto_lint', new=True): assert config.enable_auto_lint box = DockerSSHBox() @@ -353,9 +353,9 @@ def test_agnostic_sandbox_jupyter_agentskills_fileop_pwd(temp_dir): with patch.object(config, 'workspace_base', new=temp_dir), patch.object( config, 'workspace_mount_path', new=temp_dir ), patch.object(config, 'run_as_devin', new=True), patch.object( - config, 'sandbox_type', new='ssh' + config.sandbox, 'box_type', new='ssh' ), patch.object( - config, 'sandbox_container_image', new=base_sandbox_image + config.sandbox, 'container_image', new=base_sandbox_image ), patch.object(config, 'enable_auto_lint', new=False): assert not config.enable_auto_lint box = DockerSSHBox()