From bd68249fba5cb89c18037b0782fd0156b3a2de01 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Wed, 31 Jul 2024 04:30:59 +0800 Subject: [PATCH] [Arch] Test `EventStreamRuntime` to ensure its feature parity with `ServerRuntime` (#3157) * Remove global config from memory * Remove runtime global config * Remove from storage * Remove global config * Fix event stream tests * Fix sandbox issue * Change config * Removed transferred tests * Add swe env box * Fixes on testing * Fixed some tests * Merge with stashed changes * Fix typing * Fix ipython test * Revive function * Make temp_dir fixture * Remove test to avoid circular import * fix eventstream filestore for test_runtime * fix parse arg issue that cause integration test to fail * support swebench pull from custom namespace * add back simple tests for runtime * move multi-line bash tests to test_runtime; support multi-line bash for esruntime; * add testcase to handle PS2 prompt * use bashlex for bash parsing to handle multi-line commands; add testcases for multi-line commands * revert ghcr runtime change * Apply stash * fix run as other user; make test async; * fix test runtime for run as od * add run-as-devin to all the runtime tests * handle the case when username is root * move all run-as-devin tests from sandbox; only tests a few cases on different user to save time; * move over multi-line echo related tests to test_runtime * fix user-specific jupyter by fixing the pypoetry virtualenv folder * make plugin's init async; chdir at initialization of jupyter plugin; move ipy simple testcase to test runtime; * support agentskills import in move tests for jupyter pwd tests; overload `add_env_vars` for EventStreamRuntime to update env var also in Jupyter; make agentskills read env var lazily, in case env var is updated; * fix ServerRuntime agentskills issue * move agnostic image test to test_runtime * merge runtime tests in CI * fix enable auto lint as env var * update warning message * update warning message * test for different container images * change parsing output as debug * add exception handling for update_pwd_decorator * fix unit test indentation * add plugins as default input to Runtime class; remove init_sandbox_plugins; implement add_env_var (include jupyter) in the base class; * fix server runtime auto lint * Revert "add exception handling for update_pwd_decorator" This reverts commit 2b668b1506e02145cb8f87e321aad62febca3d50. * tries to print debugging info for agentskills * explictly setting uid (try fix permission issue) * Revert "tries to print debugging info for agentskills" This reverts commit 8be4c86756f0e3fc62957b327ba2ac4999c419de. * set sandbox user id during testing to hopefully fix the permission issue * add browser tools for server runtime * try to debug for old pwd * update debug cmd * only test agnostic runtime when TEST_RUNTIME is Server * fix temp dir mkdir * load TEST_RUNTIME at the beginning * remove ipython tests * only log to file when DEBUG * default logging to project root * temporarily remove log to file * fix LLM logger dir * fix logger * make set pwd an optional aux action * fix prev pwd * fix infinity recursion * simplify * do not import the whole od library to avoid logger folder by jupyter * fix browsing * increase timeout * attempt to fix agentskills yet again * clean up in testcases, since CI maybe run as non-root * add _cause attribute for event.id * remove parent * add a bunch of debugging statement again for CI :( * fix temp_dir fixture * change all temp dir to follow pytest's tmp_path_factory * remove extra bracket * clean up error printing a bit * jupyter chdir to self.config.workspace_mount_path_in_sandbox on initialization * jupyter chdir to self.config.workspace_mount_path_in_sandbox on initialization * add typing for tmp dir fixture * clear the directory before running the test to avoid weird CI temp dir * remove agnostic test case for server runtime * Revert "remove agnostic test case for server runtime" This reverts commit 30e2181c3fc1410e69596c2dcd06be01f1d016b3. * disable agnostic tests in CI * fix test --------- Co-authored-by: Graham Neubig --- .github/workflows/ghcr-runtime.yml | 265 --------- .github/workflows/ghcr.yml | 238 +++++++- opendevin/core/logger.py | 18 +- opendevin/core/main.py | 8 +- opendevin/runtime/client/client.py | 137 ++++- opendevin/runtime/client/runtime.py | 24 +- opendevin/runtime/e2b/runtime.py | 4 +- .../plugins/agent_skills/agentskills.py | 93 ++- opendevin/runtime/plugins/jupyter/__init__.py | 38 +- .../runtime/plugins/jupyter/execute_server.py | 4 +- opendevin/runtime/plugins/mixin.py | 5 +- opendevin/runtime/plugins/requirement.py | 2 +- opendevin/runtime/runtime.py | 23 +- opendevin/runtime/server/runtime.py | 21 +- opendevin/runtime/utils/bash.py | 7 +- .../utils/runtime_templates/Dockerfile.j2 | 7 +- opendevin/server/session/agent.py | 10 +- tests/unit/test_agent_skill.py | 478 +++++++-------- tests/unit/test_bash_parsing.py | 2 +- tests/unit/test_event_stream.py | 10 +- tests/unit/test_ipython.py | 77 --- tests/unit/test_is_stuck.py | 21 +- tests/unit/test_micro_agents.py | 22 +- tests/unit/test_runtime.py | 559 ++++++++++++++++-- tests/unit/test_runtime_build.py | 7 +- tests/unit/test_sandbox.py | 317 ---------- 26 files changed, 1307 insertions(+), 1090 deletions(-) delete mode 100644 .github/workflows/ghcr-runtime.yml delete mode 100644 tests/unit/test_ipython.py delete mode 100644 tests/unit/test_sandbox.py diff --git a/.github/workflows/ghcr-runtime.yml b/.github/workflows/ghcr-runtime.yml deleted file mode 100644 index cecb1807c04d..000000000000 --- a/.github/workflows/ghcr-runtime.yml +++ /dev/null @@ -1,265 +0,0 @@ -name: Build Publish and Test Runtime Image - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - -on: - push: - branches: - - main - tags: - - '*' - pull_request: - workflow_dispatch: - inputs: - reason: - description: 'Reason for manual trigger' - required: true - default: '' - -jobs: - ghcr_build_runtime: - runs-on: ubuntu-latest - - outputs: - tags: ${{ steps.capture-tags.outputs.tags }} - - permissions: - contents: read - packages: write - - strategy: - matrix: - image: ["od_runtime"] - base_image: ["ubuntu:22.04"] - platform: ["amd64", "arm64"] - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: true - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: false - swap-storage: true - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - id: buildx - uses: docker/setup-buildx-action@v3 - - - name: Install poetry via pipx - run: pipx install poetry - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: "poetry" - - - name: Install Python dependencies using Poetry - run: make install-python-dependencies - - - name: Create source distribution and Dockerfile - run: poetry run python3 opendevin/runtime/utils/runtime_build.py --base_image ${{ matrix.base_image }} --build_folder containers/runtime - - - name: Build and export image - id: build - run: ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} ${{ matrix.platform }} - - - name: Capture tags - id: capture-tags - run: | - tags=$(cat tags.txt) - echo "tags=$tags" - echo "tags=$tags" >> $GITHUB_OUTPUT - - - name: Upload Docker image as artifact - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} - path: /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar - - test-for-runtime: - name: Test for Runtime - runs-on: ubuntu-latest - needs: ghcr_build_runtime - env: - PERSIST_SANDBOX: "false" - steps: - - uses: actions/checkout@v4 - - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # when set to "true" but frees about 6 GB - tool-cache: true - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - - - name: Install poetry via pipx - run: pipx install poetry - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: "poetry" - - - name: Install Python dependencies using Poetry - run: make install-python-dependencies - - - name: Download Runtime Docker image - uses: actions/download-artifact@v4 - with: - name: od_runtime-docker-image-amd64 - path: /tmp/ - - - name: Load Runtime image and run runtime tests - run: | - # Load the Docker image and capture the output - output=$(docker load -i /tmp/od_runtime_image_amd64.tar) - - # Extract the first image name from the output - image_name=$(echo "$output" | grep -oP 'Loaded image: \K.*' | head -n 1) - - # Print the full name of the image - echo "Loaded Docker image: $image_name" - - SANDBOX_CONTAINER_IMAGE=$image_name TEST_IN_CI=true poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml -s ./tests/unit/test_runtime.py - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - - ghcr_push: - runs-on: ubuntu-latest - # don't push if runtime tests fail - needs: [ghcr_build_runtime, test-for-runtime] - if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') - - env: - tags: ${{ needs.ghcr_build_runtime.outputs.tags }} - - permissions: - contents: read - packages: write - - strategy: - matrix: - image: ["od_runtime"] - platform: ["amd64", "arm64"] - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - tool-cache: true - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: false - swap-storage: true - - - name: Login to GHCR - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Download Docker images - uses: actions/download-artifact@v4 - with: - name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} - path: /tmp/${{ matrix.platform }} - - - name: List downloaded files - run: | - ls -la /tmp/${{ matrix.platform }} - file /tmp/${{ matrix.platform }}/* - - - name: Load images and push to registry - run: | - mv /tmp/${{ matrix.platform }}/${{ matrix.image }}_image_${{ matrix.platform }}.tar ./${{ matrix.image }}_image_${{ matrix.platform }}.tar - if ! loaded_image=$(docker load -i ${{ matrix.image }}_image_${{ matrix.platform }}.tar | grep "Loaded image:" | head -n 1 | awk '{print $3}'); then - echo "Failed to load Docker image" - exit 1 - fi - echo "loaded image = $loaded_image" - tags=$(echo ${tags} | tr ' ' '\n') - image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') - echo "image name = $image_name" - for tag in $tags; do - echo "tag = $tag" - if [ -n "$image_name" ]; then - docker tag $loaded_image $image_name:${tag}_${{ matrix.platform }} - docker push $image_name:${tag}_${{ matrix.platform }} - else - echo "Skipping tag and push due to empty image_name" - fi - done - - create_manifest: - runs-on: ubuntu-latest - needs: [ghcr_build_runtime, ghcr_push] - if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') - - env: - tags: ${{ needs.ghcr_build_runtime.outputs.tags }} - - strategy: - matrix: - image: ["od_runtime"] - - permissions: - contents: read - packages: write - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Login to GHCR - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Create and push multi-platform manifest - run: | - image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') - echo "image name = $image_name" - tags=$(echo ${tags} | tr ' ' '\n') - for tag in $tags; do - echo 'tag = $tag' - docker buildx imagetools create --tag $image_name:$tag \ - $image_name:${tag}_amd64 \ - $image_name:${tag}_arm64 - done diff --git a/.github/workflows/ghcr.yml b/.github/workflows/ghcr.yml index 43427b641b11..c5379e0c6b34 100644 --- a/.github/workflows/ghcr.yml +++ b/.github/workflows/ghcr.yml @@ -1,4 +1,4 @@ -name: Build Publish and Test Docker Image +name: Build Publish and Test Runtime Image concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -77,15 +77,109 @@ jobs: name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} path: /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar - test-for-sandbox: - name: Test for Sandbox + ghcr_build_runtime: runs-on: ubuntu-latest - needs: ghcr_build + + outputs: + tags: ${{ steps.capture-tags.outputs.tags }} + + permissions: + contents: read + packages: write + + strategy: + matrix: + image: ["od_runtime"] + base_image: ["ubuntu:22.04"] + platform: ["amd64", "arm64"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: true + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + swap-storage: true + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + + - name: Install poetry via pipx + run: pipx install poetry + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "poetry" + + - name: Install Python dependencies using Poetry + run: make install-python-dependencies + + - name: Create source distribution and Dockerfile + run: poetry run python3 opendevin/runtime/utils/runtime_build.py --base_image ${{ matrix.base_image }} --build_folder containers/runtime + + - name: Build and export image + id: build + run: ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} ${{ matrix.platform }} + + - name: Capture tags + id: capture-tags + run: | + tags=$(cat tags.txt) + echo "tags=$tags" + echo "tags=$tags" >> $GITHUB_OUTPUT + + - name: Upload Docker image as artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} + path: /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar + + test_runtime: + name: Test Runtime + runs-on: ubuntu-latest + needs: [ghcr_build_runtime, ghcr_build] env: PERSIST_SANDBOX: "false" + + strategy: + matrix: + runtime_type: ["eventstream", "server"] + steps: - uses: actions/checkout@v4 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # when set to "true" but frees about 6 GB + tool-cache: true + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + - name: Install poetry via pipx run: pipx install poetry @@ -98,16 +192,28 @@ jobs: - name: Install Python dependencies using Poetry run: make install-python-dependencies - - name: Download sandbox Docker image + - name: Download Runtime Docker image + if: matrix.runtime_type == 'eventstream' + uses: actions/download-artifact@v4 + with: + name: od_runtime-docker-image-amd64 + path: /tmp/ + + - name: Download Sandbox Docker image + if: matrix.runtime_type == 'server' uses: actions/download-artifact@v4 with: name: sandbox-docker-image-amd64 path: /tmp/ - - name: Load sandbox image and run sandbox tests + - name: Load Runtime image and run runtime tests run: | # Load the Docker image and capture the output - output=$(docker load -i /tmp/sandbox_image_amd64.tar) + if [ "${{ matrix.runtime_type }}" == "eventstream" ]; then + output=$(docker load -i /tmp/od_runtime_image_amd64.tar) + else + output=$(docker load -i /tmp/sandbox_image_amd64.tar) + fi # Extract the first image name from the output image_name=$(echo "$output" | grep -oP 'Loaded image: \K.*' | head -n 1) @@ -115,14 +221,14 @@ jobs: # Print the full name of the image echo "Loaded Docker image: $image_name" - SANDBOX_CONTAINER_IMAGE=$image_name TEST_IN_CI=true poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml -s ./tests/unit/test_sandbox.py + TEST_RUNTIME=${{ matrix.runtime_type }} SANDBOX_USER_ID=$(id -u) SANDBOX_CONTAINER_IMAGE=$image_name TEST_IN_CI=true poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml -s ./tests/unit/test_runtime.py - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - integration-tests-on-linux: + integration_tests_on_linux: name: Integration Tests on Linux runs-on: ubuntu-latest needs: ghcr_build @@ -174,10 +280,11 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + ghcr_push: runs-on: ubuntu-latest # don't push if integration tests or sandbox tests fail - needs: [ghcr_build, integration-tests-on-linux, test-for-sandbox] + needs: [ghcr_build, test_runtime, integration_tests_on_linux] if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') env: @@ -223,6 +330,78 @@ jobs: docker push $image_name:${tag}_${{ matrix.platform }} done + ghcr_push_runtime: + runs-on: ubuntu-latest + # don't push if runtime tests fail + needs: [ghcr_build_runtime, test_runtime, integration_tests_on_linux] + if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') + + env: + tags: ${{ needs.ghcr_build_runtime.outputs.tags }} + + permissions: + contents: read + packages: write + + strategy: + matrix: + image: ["od_runtime"] + platform: ["amd64", "arm64"] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + swap-storage: true + + - name: Login to GHCR + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Download Docker images + uses: actions/download-artifact@v4 + with: + name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} + path: /tmp/${{ matrix.platform }} + + - name: List downloaded files + run: | + ls -la /tmp/${{ matrix.platform }} + file /tmp/${{ matrix.platform }}/* + + - name: Load images and push to registry + run: | + mv /tmp/${{ matrix.platform }}/${{ matrix.image }}_image_${{ matrix.platform }}.tar ./${{ matrix.image }}_image_${{ matrix.platform }}.tar + if ! loaded_image=$(docker load -i ${{ matrix.image }}_image_${{ matrix.platform }}.tar | grep "Loaded image:" | head -n 1 | awk '{print $3}'); then + echo "Failed to load Docker image" + exit 1 + fi + echo "loaded image = $loaded_image" + tags=$(echo ${tags} | tr ' ' '\n') + image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') + echo "image name = $image_name" + for tag in $tags; do + echo "tag = $tag" + if [ -n "$image_name" ]; then + docker tag $loaded_image $image_name:${tag}_${{ matrix.platform }} + docker push $image_name:${tag}_${{ matrix.platform }} + else + echo "Skipping tag and push due to empty image_name" + fi + done + create_manifest: runs-on: ubuntu-latest needs: [ghcr_build, ghcr_push] @@ -261,3 +440,42 @@ jobs: $image_name:${tag}_amd64 \ $image_name:${tag}_arm64 done + + create_manifest_runtime: + runs-on: ubuntu-latest + needs: [ghcr_build_runtime, ghcr_push_runtime] + if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') + + env: + tags: ${{ needs.ghcr_build_runtime.outputs.tags }} + + strategy: + matrix: + image: ["od_runtime"] + + permissions: + contents: read + packages: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Login to GHCR + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create and push multi-platform manifest + run: | + image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') + echo "image name = $image_name" + tags=$(echo ${tags} | tr ' ' '\n') + for tag in $tags; do + echo 'tag = $tag' + docker buildx imagetools create --tag $image_name:$tag \ + $image_name:${tag}_amd64 \ + $image_name:${tag}_arm64 + done diff --git a/opendevin/core/logger.py b/opendevin/core/logger.py index e5a74206f016..a39530befc91 100644 --- a/opendevin/core/logger.py +++ b/opendevin/core/logger.py @@ -123,9 +123,8 @@ def get_console_handler(): return console_handler -def get_file_handler(log_dir=None): +def get_file_handler(log_dir): """Returns a file handler for logging.""" - log_dir = os.path.join(os.getcwd(), 'logs') if log_dir is None else log_dir os.makedirs(log_dir, exist_ok=True) timestamp = datetime.now().strftime('%Y-%m-%d') file_name = f'opendevin_{timestamp}.log' @@ -159,16 +158,21 @@ def log_uncaught_exceptions(ex_cls, ex, tb): opendevin_logger = logging.getLogger('opendevin') opendevin_logger.setLevel(logging.INFO) +LOG_DIR = os.path.join( + # parent dir of opendevin/core (i.e., root of the repo) + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + 'logs', +) if DEBUG: opendevin_logger.setLevel(logging.DEBUG) -opendevin_logger.addHandler(get_file_handler()) + # default log to project root + opendevin_logger.info('DEBUG logging is enabled. Logging to %s', LOG_DIR) +opendevin_logger.addHandler(get_file_handler(LOG_DIR)) opendevin_logger.addHandler(get_console_handler()) opendevin_logger.addFilter(SensitiveDataFilter(opendevin_logger.name)) opendevin_logger.propagate = False opendevin_logger.debug('Logging initialized') -opendevin_logger.debug( - 'Logging to %s', os.path.join(os.getcwd(), 'logs', 'opendevin.log') -) + # Exclude LiteLLM from logging output logging.getLogger('LiteLLM').disabled = True @@ -194,7 +198,7 @@ def __init__(self, filename, mode='a', encoding='utf-8', delay=False): self.session = datetime.now().strftime('%y-%m-%d_%H-%M') else: self.session = 'default' - self.log_directory = os.path.join(os.getcwd(), 'logs', 'llm', self.session) + self.log_directory = os.path.join(LOG_DIR, 'llm', self.session) os.makedirs(self.log_directory, exist_ok=True) if not DEBUG: # Clear the log directory if not in debug mode diff --git a/opendevin/core/main.py b/opendevin/core/main.py index f51e0d51783d..9676948662c5 100644 --- a/opendevin/core/main.py +++ b/opendevin/core/main.py @@ -87,9 +87,13 @@ async def run_agent_controller( # runtime and tools runtime_cls = get_runtime_cls(config.runtime) - runtime = runtime_cls(config=config, event_stream=event_stream, sandbox=sandbox) + runtime = runtime_cls( + config=config, + event_stream=event_stream, + sandbox=sandbox, + plugins=controller.agent.sandbox_plugins, + ) await runtime.ainit() - runtime.init_sandbox_plugins(controller.agent.sandbox_plugins) runtime.init_runtime_tools( controller.agent.runtime_tools, is_async=False, diff --git a/opendevin/runtime/client/client.py b/opendevin/runtime/client/client.py index aa3c12d9fb1f..63c3526406e0 100644 --- a/opendevin/runtime/client/client.py +++ b/opendevin/runtime/client/client.py @@ -13,6 +13,8 @@ import asyncio import os import re +import subprocess +from contextlib import asynccontextmanager from pathlib import Path import pexpect @@ -35,6 +37,7 @@ ErrorObservation, FileReadObservation, FileWriteObservation, + IPythonRunCellObservation, Observation, ) from opendevin.events.serialization import event_from_dict, event_to_dict @@ -48,8 +51,6 @@ from opendevin.runtime.server.files import insert_lines, read_lines from opendevin.runtime.utils import split_bash_commands -app = FastAPI() - class ActionRequest(BaseModel): action: dict @@ -60,19 +61,81 @@ class RuntimeClient: It is responsible for executing actions received from OpenDevin backend and producing observations. """ - def __init__(self, plugins_to_load: list[Plugin], work_dir: str) -> None: - self._init_bash_shell(work_dir) + def __init__( + self, plugins_to_load: list[Plugin], work_dir: str, username: str, user_id: int + ) -> None: + self.plugins_to_load = plugins_to_load + self.username = username + self.user_id = user_id + self.pwd = work_dir # current PWD + self._init_user(self.username, self.user_id) + self._init_bash_shell(self.pwd, self.username) self.lock = asyncio.Lock() self.plugins: dict[str, Plugin] = {} self.browser = BrowserEnv() - for plugin in plugins_to_load: - plugin.initialize() + async def ainit(self): + for plugin in self.plugins_to_load: + await plugin.initialize(self.username) self.plugins[plugin.name] = plugin logger.info(f'Initializing plugin: {plugin.name}') - def _init_bash_shell(self, work_dir: str) -> None: - self.shell = pexpect.spawn('/bin/bash', encoding='utf-8', echo=False) + if isinstance(plugin, JupyterPlugin): + await self.run_ipython( + IPythonRunCellAction(code=f'import os; os.chdir("{self.pwd}")') + ) + + # This is a temporary workaround + # TODO: refactor AgentSkills to be part of JupyterPlugin + # AFTER ServerRuntime is deprecated + if 'agent_skills' in self.plugins and 'jupyter' in self.plugins: + obs = await self.run_ipython( + IPythonRunCellAction( + code=( + 'import sys\n' + 'sys.path.insert(0, "/opendevin/code/opendevin/runtime/plugins/agent_skills")\n' + 'from agentskills import *' + ) + ) + ) + logger.info(f'AgentSkills initialized: {obs}') + + def _init_user(self, username: str, user_id: int) -> None: + """Create user if not exists.""" + # Skip root since it is already created + if username == 'root': + return + + # Add sudoer + sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers" + output = subprocess.run(sudoer_line, shell=True, capture_output=True) + if output.returncode != 0: + raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}') + logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]') + + # Add user + output = subprocess.run( + ( + f'useradd -rm -d /home/{username} -s /bin/bash ' + f'-g root -G sudo -g root -G sudo -u {user_id} {username}' + ), + shell=True, + capture_output=True, + ) + if output.returncode != 0: + raise RuntimeError( + f'Failed to create user {username}: {output.stderr.decode()}' + ) + logger.debug( + f'Added user {username} successfully. Output: [{output.stdout.decode()}]' + ) + + def _init_bash_shell(self, work_dir: str, username: str) -> None: + self.shell = pexpect.spawn( + f'su - {username}', + encoding='utf-8', + echo=False, + ) self.__bash_PS1 = r'[PEXPECT_BEGIN] \u@\h:\w [PEXPECT_END]' # This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed @@ -85,8 +148,11 @@ def _init_bash_shell(self, work_dir: str) -> None: self.shell.sendline(f'cd {work_dir}') self.shell.expect(self.__bash_expect_regex) + logger.debug( + f'Bash initialized. Working directory: {work_dir}. Output: {self.shell.before}' + ) - def _get_bash_prompt(self): + def _get_bash_prompt_and_update_pwd(self): ps1 = self.shell.after # begin at the last occurence of '[PEXPECT_BEGIN]'. @@ -103,6 +169,8 @@ def _get_bash_prompt(self): matched is not None ), f'Failed to parse bash prompt: {ps1}. This should not happen.' username, hostname, working_dir = matched.groups() + self._prev_pwd = self.pwd + self.pwd = working_dir # re-assemble the prompt prompt = f'{username}@{hostname}:{working_dir} ' @@ -112,20 +180,25 @@ def _get_bash_prompt(self): prompt += '$' return prompt + ' ' - def _execute_bash(self, command: str, keep_prompt: bool = True) -> tuple[str, int]: + def _execute_bash( + self, + command: str, + keep_prompt: bool = True, + timeout: int = 300, + ) -> tuple[str, int]: logger.debug(f'Executing command: {command}') self.shell.sendline(command) - self.shell.expect(self.__bash_expect_regex) + self.shell.expect(self.__bash_expect_regex, timeout=timeout) output = self.shell.before if keep_prompt: - output += '\r\n' + self._get_bash_prompt() + output += '\r\n' + self._get_bash_prompt_and_update_pwd() logger.debug(f'Command output: {output}') # Get exit code self.shell.sendline('echo $?') logger.debug(f'Executing command for exit code: {command}') - self.shell.expect(self.__bash_expect_regex) + self.shell.expect(self.__bash_expect_regex, timeout=timeout) _exit_code_output = self.shell.before logger.debug(f'Exit code Output: {_exit_code_output}') exit_code = int(_exit_code_output.strip().split()[0]) @@ -134,7 +207,6 @@ def _execute_bash(self, command: str, keep_prompt: bool = True) -> tuple[str, in async def run_action(self, action) -> Observation: action_type = action.action observation = await getattr(self, action_type)(action) - observation._parent = action.id return observation async def run(self, action: CmdRunAction) -> CmdOutputObservation: @@ -164,7 +236,18 @@ async def run(self, action: CmdRunAction) -> CmdOutputObservation: async def run_ipython(self, action: IPythonRunCellAction) -> Observation: if 'jupyter' in self.plugins: _jupyter_plugin: JupyterPlugin = self.plugins['jupyter'] # type: ignore - return await _jupyter_plugin.run(action) + + # This is used to make AgentSkills in Jupyter aware of the + # current working directory in Bash + if not hasattr(self, '_prev_pwd') or self.pwd != self._prev_pwd: + reset_jupyter_pwd_code = ( + f'import os; os.environ["JUPYTER_PWD"] = "{self.pwd}"\n\n' + ) + _aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code) + _ = await _jupyter_plugin.run(_aux_action) + + obs: IPythonRunCellObservation = await _jupyter_plugin.run(action) + return obs else: raise RuntimeError( 'JupyterRequirement not found. Unable to run IPython action.' @@ -272,6 +355,10 @@ def close(self): parser.add_argument('port', type=int, help='Port to listen on') parser.add_argument('--working-dir', type=str, help='Working directory') parser.add_argument('--plugins', type=str, help='Plugins to initialize', nargs='+') + parser.add_argument( + '--username', type=str, help='User to run as', default='opendevin' + ) + parser.add_argument('--user-id', type=int, help='User ID to run as', default=1000) # example: python client.py 8000 --working-dir /workspace --plugins JupyterRequirement args = parser.parse_args() @@ -282,16 +369,34 @@ def close(self): raise ValueError(f'Plugin {plugin} not found') plugins_to_load.append(ALL_PLUGINS[plugin]()) # type: ignore - client = RuntimeClient(plugins_to_load, work_dir=args.working_dir) + client: RuntimeClient | None = None + + @asynccontextmanager + async def lifespan(app: FastAPI): + global client + client = RuntimeClient( + plugins_to_load, + work_dir=args.working_dir, + username=args.username, + user_id=args.user_id, + ) + await client.ainit() + yield + # Clean up & release the resources + client.close() + + app = FastAPI(lifespan=lifespan) @app.middleware('http') async def one_request_at_a_time(request: Request, call_next): + assert client is not None async with client.lock: response = await call_next(request) return response @app.post('/execute_action') async def execute_action(action_request: ActionRequest): + assert client is not None try: action = event_from_dict(action_request.action) if not isinstance(action, Action): diff --git a/opendevin/runtime/client/runtime.py b/opendevin/runtime/client/runtime.py index 5c39c3a59189..9116b9f90dfa 100644 --- a/opendevin/runtime/client/runtime.py +++ b/opendevin/runtime/client/runtime.py @@ -44,10 +44,12 @@ def __init__( config: AppConfig, event_stream: EventStream, sid: str = 'default', - container_image: str | None = None, plugins: list[PluginRequirement] | None = None, + container_image: str | None = None, ): - super().__init__(config, event_stream, sid) # will initialize the event stream + super().__init__( + config, event_stream, sid, plugins + ) # will initialize the event stream self._port = find_available_tcp_port() self.api_url = f'http://localhost:{self._port}' self.session: Optional[aiohttp.ClientSession] = None @@ -139,7 +141,9 @@ async def _init_container( 'PYTHONUNBUFFERED=1 poetry run ' f'python -u -m opendevin.runtime.client.client {self._port} ' f'--working-dir {sandbox_workspace_dir} ' - f'--plugins {plugin_names}' + f'--plugins {plugin_names} ' + f'--username {"opendevin" if self.config.run_as_devin else "root"} ' + f'--user-id {self.config.sandbox.user_id}' ), network_mode=network_mode, ports=port_mapping, @@ -206,7 +210,7 @@ async def on_event(self, event: Event) -> None: if isinstance(event, Action): logger.info(event, extra={'msg_type': 'ACTION'}) observation = await self.run_action(event) - # observation._cause = event.id # type: ignore[attr-defined] + observation._cause = event.id # type: ignore[attr-defined] logger.info(observation, extra={'msg_type': 'OBSERVATION'}) source = event.source if event.source else EventSource.AGENT await self.event_stream.add_event(observation, source) @@ -248,7 +252,6 @@ async def run_action(self, action: Action, timeout: int = 600) -> Observation: except Exception as e: logger.error(f'Error during command execution: {e}') obs = ErrorObservation(f'Command execution failed: {str(e)}') - obs._parent = action.id # type: ignore[attr-defined] return obs async def run(self, action: CmdRunAction) -> Observation: @@ -277,14 +280,3 @@ def get_working_directory(self): raise NotImplementedError( 'This method is not implemented in the runtime client.' ) - - ############################################################################ - # Initialization work inside sandbox image - ############################################################################ - - # init_runtime_tools direcctly do as what Runtime do - - # Do in the od_runtime_client - # Overwrite the init_sandbox_plugins - def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None: - pass diff --git a/opendevin/runtime/e2b/runtime.py b/opendevin/runtime/e2b/runtime.py index 36d162b3d8d8..733e9f757ed1 100644 --- a/opendevin/runtime/e2b/runtime.py +++ b/opendevin/runtime/e2b/runtime.py @@ -11,6 +11,7 @@ ) from opendevin.events.stream import EventStream from opendevin.runtime import Sandbox +from opendevin.runtime.plugins import PluginRequirement from opendevin.runtime.server.files import insert_lines, read_lines from opendevin.runtime.server.runtime import ServerRuntime @@ -24,9 +25,10 @@ def __init__( config: AppConfig, event_stream: EventStream, sid: str = 'default', + plugins: list[PluginRequirement] | None = None, sandbox: Sandbox | None = None, ): - super().__init__(config, event_stream, sid, sandbox) + super().__init__(config, event_stream, sid, plugins, sandbox) if not isinstance(self.sandbox, E2BSandbox): raise ValueError('E2BRuntime requires an E2BSandbox') self.file_store = E2BFileStore(self.sandbox.filesystem) diff --git a/opendevin/runtime/plugins/agent_skills/agentskills.py b/opendevin/runtime/plugins/agent_skills/agentskills.py index d6fdea9084eb..b7244b744ab4 100644 --- a/opendevin/runtime/plugins/agent_skills/agentskills.py +++ b/opendevin/runtime/plugins/agent_skills/agentskills.py @@ -41,30 +41,76 @@ WINDOW = 100 -ENABLE_AUTO_LINT = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true' - # This is also used in unit tests! MSG_FILE_UPDATED = '[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]' + +# ================================================================================================== # OPENAI -OPENAI_API_KEY = os.getenv( - 'OPENAI_API_KEY', os.getenv('SANDBOX_ENV_OPENAI_API_KEY', '') -) -OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1') -OPENAI_MODEL = os.getenv('OPENAI_MODEL', 'gpt-4o-2024-05-13') -MAX_TOKEN = os.getenv('MAX_TOKEN', 500) +# TODO: Move this to EventStream Actions when EventStreamRuntime is fully implemented +# NOTE: we need to get env vars inside functions because they will be set in IPython +# AFTER the agentskills is imported (the case for EventStreamRuntime) +# ================================================================================================== +def _get_openai_api_key(): + return os.getenv('OPENAI_API_KEY', os.getenv('SANDBOX_ENV_OPENAI_API_KEY', '')) + + +def _get_openai_base_url(): + return os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1') + + +def _get_openai_model(): + return os.getenv('OPENAI_MODEL', 'gpt-4o-2024-05-13') -OPENAI_PROXY = f'{OPENAI_BASE_URL}/chat/completions' -client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL) +def _get_max_token(): + return os.getenv('MAX_TOKEN', 500) + + +def _get_openai_client(): + client = OpenAI(api_key=_get_openai_api_key(), base_url=_get_openai_base_url()) + return client + + +# ================================================================================================== # Define the decorator using the functionality of UpdatePwd def update_pwd_decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): - old_pwd = os.getcwd() jupyter_pwd = os.environ.get('JUPYTER_PWD', None) + try: + old_pwd = os.getcwd() + except FileNotFoundError: + import json + import subprocess + + print( + f'DEBUGGING Environment variables: {json.dumps(dict(os.environ), indent=2)}' + ) + print(f'DEBUGGING User ID: {os.getuid()}, Group ID: {os.getgid()}') + + out = subprocess.run(['pwd'], capture_output=True) + old_pwd = out.stdout.decode('utf-8').strip() + os.chdir(old_pwd) + print(f'DEBUGGING Change to working directory: {old_pwd}') + + import tempfile + + try: + tempfile.TemporaryFile(dir=old_pwd) + print(f'DEBUGGING Directory {old_pwd} is writable') + except Exception as e: + print(f'DEBUGGING Directory {old_pwd} is not writable: {str(e)}') + + # ls -alh + out = subprocess.run(['ls', '-alh', old_pwd], capture_output=True) + print( + f'DEBUGGING OLD working directory contents: {out.stdout.decode("utf-8")}' + ) + print(f'DEBUGGING Target JUPYTER pwd: {jupyter_pwd}') + if jupyter_pwd: os.chdir(jupyter_pwd) try: @@ -506,7 +552,10 @@ def _edit_file_impl( shutil.move(temp_file_path, src_abs_path) # Handle linting - if ENABLE_AUTO_LINT: + # NOTE: we need to get env var inside this function + # because the env var will be set AFTER the agentskills is imported + enable_auto_lint = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true' + if enable_auto_lint: # BACKUP the original file original_file_backup_path = os.path.join( os.path.dirname(file_name), @@ -954,7 +1003,9 @@ def parse_audio(file_path: str, model: str = 'whisper-1') -> None: try: # TODO: record the COST of the API call with open(file_path, 'rb') as audio_file: - transcript = client.audio.translations.create(model=model, file=audio_file) + transcript = _get_openai_client().audio.translations.create( + model=model, file=audio_file + ) print(transcript.text) except Exception as e: @@ -975,10 +1026,10 @@ def parse_image( # TODO: record the COST of the API call try: base64_image = _base64_img(file_path) - response = client.chat.completions.create( - model=OPENAI_MODEL, + response = _get_openai_client().chat.completions.create( + model=_get_openai_model(), messages=_prepare_image_messages(task, base64_image), - max_tokens=MAX_TOKEN, + max_tokens=_get_max_token(), ) content = response.choices[0].message.content print(content) @@ -1021,10 +1072,10 @@ def parse_video( print(f'Process the {file_path}, current No. {idx * frame_interval} frame...') # TODO: record the COST of the API call try: - response = client.chat.completions.create( - model=OPENAI_MODEL, + response = _get_openai_client().chat.completions.create( + model=_get_openai_model(), messages=_prepare_image_messages(task, base64_frame), - max_tokens=MAX_TOKEN, + max_tokens=_get_max_token(), ) content = response.choices[0].message.content @@ -1077,7 +1128,9 @@ def parse_pptx(file_path: str) -> None: 'parse_pptx', ] -if OPENAI_API_KEY and OPENAI_BASE_URL: +# This is called from OpenDevin's side +# If SANDBOX_ENV_OPENAI_API_KEY is set, we will be able to use these tools in the sandbox environment +if _get_openai_api_key() and _get_openai_base_url(): __all__ += ['parse_audio', 'parse_video', 'parse_image'] DOCUMENTATION = '' diff --git a/opendevin/runtime/plugins/jupyter/__init__.py b/opendevin/runtime/plugins/jupyter/__init__.py index 1178a95fd0da..891909fd001f 100644 --- a/opendevin/runtime/plugins/jupyter/__init__.py +++ b/opendevin/runtime/plugins/jupyter/__init__.py @@ -3,8 +3,9 @@ import time from dataclasses import dataclass +from opendevin.core.logger import opendevin_logger as logger from opendevin.events.action import Action, IPythonRunCellAction -from opendevin.events.observation import IPythonRunCellObservation, Observation +from opendevin.events.observation import IPythonRunCellObservation from opendevin.runtime.plugins.requirement import Plugin, PluginRequirement from opendevin.runtime.utils import find_available_tcp_port @@ -29,33 +30,38 @@ class JupyterRequirement(PluginRequirement): class JupyterPlugin(Plugin): name: str = 'jupyter' - def initialize(self, kernel_id: str = 'opendevin-default'): + async def initialize(self, username: str, kernel_id: str = 'opendevin-default'): self.kernel_gateway_port = find_available_tcp_port() self.kernel_id = kernel_id self.gateway_process = subprocess.Popen( - [ - '/opendevin/miniforge3/bin/mamba', - 'run', - '-n', - 'base', - 'poetry', - 'run', - 'jupyter', - 'kernelgateway', - '--KernelGatewayApp.ip=0.0.0.0', - f'--KernelGatewayApp.port={self.kernel_gateway_port}', - ], + ( + f"su - {username} -s /bin/bash << 'EOF'\n" + 'cd /opendevin/code\n' + 'export POETRY_VIRTUALENVS_PATH=/opendevin/poetry;\n' + '/opendevin/miniforge3/bin/mamba run -n base ' + 'poetry run jupyter kernelgateway ' + '--KernelGatewayApp.ip=0.0.0.0 ' + f'--KernelGatewayApp.port={self.kernel_gateway_port}\n' + 'EOF' + ), stderr=subprocess.STDOUT, + shell=True, ) # read stdout until the kernel gateway is ready + output = '' while True and self.gateway_process.stdout is not None: line = self.gateway_process.stdout.readline().decode('utf-8') + output += line if 'at' in line: break time.sleep(1) - print('Waiting for jupyter kernel gateway to start...') + logger.debug('Waiting for jupyter kernel gateway to start...') - async def run(self, action: Action) -> Observation: + logger.info( + f'Jupyter kernel gateway started at port {self.kernel_gateway_port}. Output: {output}' + ) + + async def run(self, action: Action) -> IPythonRunCellObservation: if not isinstance(action, IPythonRunCellAction): raise ValueError( f'Jupyter plugin only supports IPythonRunCellAction, but got {action}' diff --git a/opendevin/runtime/plugins/jupyter/execute_server.py b/opendevin/runtime/plugins/jupyter/execute_server.py index 1388875c9b5a..f43bc2e73ae5 100755 --- a/opendevin/runtime/plugins/jupyter/execute_server.py +++ b/opendevin/runtime/plugins/jupyter/execute_server.py @@ -73,8 +73,8 @@ async def initialize(self): if os.path.exists('/opendevin/plugins/agent_skills/agentskills.py'): self.tools_to_run.append('from agentskills import *') for tool in self.tools_to_run: - # logging.info(f'Tool initialized:\n{tool}') - await self.execute(tool) + res = await self.execute(tool) + logging.info(f'Tool [{tool}] initialized:\n{res}') self.initialized = True async def _send_heartbeat(self): diff --git a/opendevin/runtime/plugins/mixin.py b/opendevin/runtime/plugins/mixin.py index 14088eb493fa..6e3de379a4f0 100644 --- a/opendevin/runtime/plugins/mixin.py +++ b/opendevin/runtime/plugins/mixin.py @@ -82,13 +82,14 @@ def init_plugins(self: SandboxProtocol, requirements: list[PluginRequirement]): raise RuntimeError( f'Failed to initialize plugin {requirement.name} with exit code {_exit_code} and output: {total_output.strip()}' ) - logger.info(f'Plugin {requirement.name} initialized successfully') + logger.debug(f'Output: {total_output.strip()}') else: if exit_code != 0: raise RuntimeError( f'Failed to initialize plugin {requirement.name} with exit code {exit_code} and output: {output}' ) - logger.info(f'Plugin {requirement.name} initialized successfully.') + logger.debug(f'Output: {output}') + logger.info(f'Plugin {requirement.name} initialized successfully') else: logger.info('Skipping plugin initialization in the sandbox') diff --git a/opendevin/runtime/plugins/requirement.py b/opendevin/runtime/plugins/requirement.py index 3971dad8c184..14399061e61a 100644 --- a/opendevin/runtime/plugins/requirement.py +++ b/opendevin/runtime/plugins/requirement.py @@ -14,7 +14,7 @@ class Plugin: name: str @abstractmethod - def initialize(self): + async def initialize(self, username: str): """Initialize the plugin.""" pass diff --git a/opendevin/runtime/runtime.py b/opendevin/runtime/runtime.py index b38de62f3795..70873d0397b4 100644 --- a/opendevin/runtime/runtime.py +++ b/opendevin/runtime/runtime.py @@ -28,7 +28,7 @@ RejectObservation, ) from opendevin.events.serialization.action import ACTION_TYPE_TO_CLASS -from opendevin.runtime.plugins import PluginRequirement +from opendevin.runtime.plugins import JupyterRequirement, PluginRequirement from opendevin.runtime.tools import RuntimeTool from opendevin.storage import FileStore @@ -60,10 +60,13 @@ def __init__( config: AppConfig, event_stream: EventStream, sid: str = 'default', + plugins: list[PluginRequirement] | None = None, ): self.sid = sid self.event_stream = event_stream self.event_stream.subscribe(EventStreamSubscriber.RUNTIME, self.on_event) + self.plugins = plugins if plugins is not None else [] + self.config = copy.deepcopy(config) self.DEFAULT_ENV_VARS = _default_env_vars(config.sandbox) atexit.register(self.close_sync) @@ -101,10 +104,6 @@ def close_sync(self) -> None: # Methods we plan to deprecate when we move to new EventStreamRuntime # ==================================================================== - def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None: - # TODO: deprecate this method when we move to the new EventStreamRuntime - raise NotImplementedError('This method is not implemented in the base class.') - def init_runtime_tools( self, runtime_tools: list[RuntimeTool], @@ -117,6 +116,17 @@ def init_runtime_tools( # ==================================================================== async def add_env_vars(self, env_vars: dict[str, str]) -> None: + # Add env vars to the IPython shell (if Jupyter is used) + if any(isinstance(plugin, JupyterRequirement) for plugin in self.plugins): + code = 'import os\n' + for key, value in env_vars.items(): + # Note: json.dumps gives us nice escaping for free + code += f'os.environ["{key}"] = {json.dumps(value)}\n' + code += '\n' + obs = await self.run_ipython(IPythonRunCellAction(code)) + logger.info(f'Added env vars to IPython: code={code}, obs={obs}') + + # Add env vars to the Bash shell cmd = '' for key, value in env_vars.items(): # Note: json.dumps gives us nice escaping for free @@ -125,7 +135,7 @@ async def add_env_vars(self, env_vars: dict[str, str]) -> None: return cmd = cmd.strip() logger.debug(f'Adding env var: {cmd}') - obs: Observation = await self.run(CmdRunAction(cmd)) + obs = await self.run(CmdRunAction(cmd)) if not isinstance(obs, CmdOutputObservation) or obs.exit_code != 0: raise RuntimeError( f'Failed to add env vars [{env_vars}] to environment: {obs.content}' @@ -164,7 +174,6 @@ async def run_action(self, action: Action) -> Observation: 'Action has been rejected by the user! Waiting for further user input.' ) observation = await getattr(self, action_type)(action) - observation._parent = action.id # type: ignore[attr-defined] return observation # ==================================================================== diff --git a/opendevin/runtime/server/runtime.py b/opendevin/runtime/server/runtime.py index 392c0bafd5da..9cfa6ad446c6 100644 --- a/opendevin/runtime/server/runtime.py +++ b/opendevin/runtime/server/runtime.py @@ -25,7 +25,7 @@ Sandbox, ) from opendevin.runtime.browser.browser_env import BrowserEnv -from opendevin.runtime.plugins import PluginRequirement +from opendevin.runtime.plugins import JupyterRequirement, PluginRequirement from opendevin.runtime.runtime import Runtime from opendevin.runtime.tools import RuntimeTool from opendevin.storage.local import LocalFileStore @@ -40,9 +40,10 @@ def __init__( config: AppConfig, event_stream: EventStream, sid: str = 'default', + plugins: list[PluginRequirement] | None = None, sandbox: Sandbox | None = None, ): - super().__init__(config, event_stream, sid) + super().__init__(config, event_stream, sid, plugins) self.file_store = LocalFileStore(config.workspace_base) if sandbox is None: self.sandbox = self.create_sandbox(sid, config.sandbox.box_type) @@ -79,19 +80,29 @@ def create_sandbox(self, sid: str = 'default', box_type: str = 'ssh') -> Sandbox raise ValueError(f'Invalid sandbox type: {box_type}') async def ainit(self, env_vars: dict[str, str] | None = None): + # init sandbox plugins + self.sandbox.init_plugins(self.plugins) + # MUST call super().ainit() to initialize both default env vars # AND the ones in env vars! await super().ainit(env_vars) + if any(isinstance(plugin, JupyterRequirement) for plugin in self.plugins): + obs = await self.run_ipython( + IPythonRunCellAction( + code=f'import os; os.chdir("{self.config.workspace_mount_path_in_sandbox}")' + ) + ) + logger.info( + f'Switch to working directory {self.config.workspace_mount_path_in_sandbox} in IPython. Output: {obs.content}' + ) + async def close(self): if hasattr(self, '_is_external_sandbox') and not self._is_external_sandbox: self.sandbox.close() if hasattr(self, 'browser') and self.browser is not None: self.browser.close() - def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None: - self.sandbox.init_plugins(plugins) - def init_runtime_tools( self, runtime_tools: list[RuntimeTool], diff --git a/opendevin/runtime/utils/bash.py b/opendevin/runtime/utils/bash.py index a65342d5ffa3..6de80c38840a 100644 --- a/opendevin/runtime/utils/bash.py +++ b/opendevin/runtime/utils/bash.py @@ -7,8 +7,11 @@ def split_bash_commands(commands): try: parsed = bashlex.parse(commands) except bashlex.errors.ParsingError as e: - logger.error( - f'Failed to parse bash commands\n[input]: {commands}\n[error]: {e}' + logger.debug( + f'Failed to parse bash commands\n' + f'[input]: {commands}\n' + f'[warning]: {e}\n' + f'The original command will be returned as is.' ) # If parsing fails, return the original commands return [commands] diff --git a/opendevin/runtime/utils/runtime_templates/Dockerfile.j2 b/opendevin/runtime/utils/runtime_templates/Dockerfile.j2 index 2502518b88e8..fc15bda732c1 100644 --- a/opendevin/runtime/utils/runtime_templates/Dockerfile.j2 +++ b/opendevin/runtime/utils/runtime_templates/Dockerfile.j2 @@ -5,6 +5,7 @@ FROM {{ base_image }} # START: Build Runtime Image from Scratch # ================================================================ FROM {{ base_image }} + {% if 'ubuntu' in base_image and (base_image.endswith(':latest') or base_image.endswith(':24.04')) %} {% set LIBGL_MESA = 'libgl1' %} {% else %} @@ -20,8 +21,10 @@ RUN apt-get update && \ # Create necessary directories RUN mkdir -p /opendevin && \ mkdir -p /opendevin/logs && \ - chmod 777 /opendevin/logs && \ - echo "" > /opendevin/bash.bashrc + mkdir -p /opendevin/poetry && \ + chmod 777 -R /opendevin + +ENV POETRY_VIRTUALENVS_PATH=/opendevin/poetry RUN if [ ! -d /opendevin/miniforge3 ]; then \ wget --progress=bar:force -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \ diff --git a/opendevin/server/session/agent.py b/opendevin/server/session/agent.py index 616a99501bff..91e20b986b01 100644 --- a/opendevin/server/session/agent.py +++ b/opendevin/server/session/agent.py @@ -51,7 +51,7 @@ async def start( raise Exception( 'Session already started. You need to close this session and start a new one.' ) - await self._create_runtime(runtime_name, config) + await self._create_runtime(runtime_name, config, agent) await self._create_controller( agent, confirmation_mode, @@ -71,7 +71,7 @@ async def close(self): await self.runtime.close() self._closed = True - async def _create_runtime(self, runtime_name: str, config: AppConfig): + async def _create_runtime(self, runtime_name: str, config: AppConfig, agent: Agent): """Creates a runtime instance.""" if self.runtime is not None: raise Exception('Runtime already created') @@ -79,7 +79,10 @@ async def _create_runtime(self, runtime_name: str, config: AppConfig): logger.info(f'Using runtime: {runtime_name}') runtime_cls = get_runtime_cls(runtime_name) self.runtime = runtime_cls( - config=config, event_stream=self.event_stream, sid=self.sid + config=config, + event_stream=self.event_stream, + sid=self.sid, + plugins=agent.sandbox_plugins, ) await self.runtime.ainit() @@ -107,7 +110,6 @@ async def _create_controller( 'CodeActAgent requires DockerSSHBox as sandbox! Using other sandbox that are not stateful' ' LocalBox will not work properly.' ) - self.runtime.init_sandbox_plugins(agent.sandbox_plugins) self.runtime.init_runtime_tools(agent.runtime_tools) self.controller = AgentController( diff --git a/tests/unit/test_agent_skill.py b/tests/unit/test_agent_skill.py index 46d205e6f420..5067b4be3d46 100644 --- a/tests/unit/test_agent_skill.py +++ b/tests/unit/test_agent_skill.py @@ -2,6 +2,7 @@ import io import os import sys +from unittest.mock import patch import docx import pytest @@ -488,13 +489,9 @@ def test_open_file_large_line_number_consecutive_diff_window(tmp_path): assert result == expected -def test_edit_file_by_replace_window(tmp_path, monkeypatch): - # Set environment variable via monkeypatch does NOT work! - monkeypatch.setattr( - 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True - ) - - content = """def any_int(a, b, c): +def test_edit_file_by_replace_window(tmp_path): + with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): + content = """def any_int(a, b, c): return isinstance(a, int) and isinstance(b, int) and isinstance(c, int) def test_any_int(): @@ -528,83 +525,83 @@ def check(any_int): check(any_int)""" - temp_file_path = tmp_path / 'error-test.py' - temp_file_path.write_text(content) + temp_file_path = tmp_path / 'error-test.py' + temp_file_path.write_text(content) - open_file(str(temp_file_path)) + open_file(str(temp_file_path)) - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - edit_file_by_replace( - str(temp_file_path), - to_replace=' assert any_int(1.0, 2, 3) == False', - new_content=' assert any_int(1.0, 2, 3) == False', + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + edit_file_by_replace( + str(temp_file_path), + to_replace=' assert any_int(1.0, 2, 3) == False', + new_content=' assert any_int(1.0, 2, 3) == False', + ) + result = buf.getvalue() + expected = ( + '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' + 'ERRORS:\n' + + str(temp_file_path) + + ':9:9: ' + + 'E999 IndentationError: unexpected indent\n' + '[This is how your edit would have looked if applied]\n' + '-------------------------------------------------\n' + '(this is the beginning of the file)\n' + '1|def any_int(a, b, c):\n' + '2| return isinstance(a, int) and isinstance(b, int) and isinstance(c, int)\n' + '3|\n' + '4|def test_any_int():\n' + '5| assert any_int(1, 2, 3) == True\n' + '6| assert any_int(1.5, 2, 3) == False\n' + '7| assert any_int(1, 2.5, 3) == False\n' + '8| assert any_int(1, 2, 3.5) == False\n' + '9| assert any_int(1.0, 2, 3) == False\n' + '10| assert any_int(1, 2.0, 3) == False\n' + '11| assert any_int(1, 2, 3.0) == False\n' + '12| assert any_int(0, 0, 0) == True\n' + '13| assert any_int(-1, -2, -3) == True\n' + '14| assert any_int(1, -2, 3) == True\n' + '15| assert any_int(1.5, -2, 3) == False\n' + '16| assert any_int(1, -2.5, 3) == False\n' + '17|\n' + '18|def check(any_int):\n' + '19| # Check some simple cases\n' + '20| assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)"\n' + '21| assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)"\n' + '(12 more lines below)\n' + '-------------------------------------------------\n' + '\n' + '[This is the original code before your edit]\n' + '-------------------------------------------------\n' + '(this is the beginning of the file)\n' + '1|def any_int(a, b, c):\n' + '2| return isinstance(a, int) and isinstance(b, int) and isinstance(c, int)\n' + '3|\n' + '4|def test_any_int():\n' + '5| assert any_int(1, 2, 3) == True\n' + '6| assert any_int(1.5, 2, 3) == False\n' + '7| assert any_int(1, 2.5, 3) == False\n' + '8| assert any_int(1, 2, 3.5) == False\n' + '9| assert any_int(1.0, 2, 3) == False\n' + '10| assert any_int(1, 2.0, 3) == False\n' + '11| assert any_int(1, 2, 3.0) == False\n' + '12| assert any_int(0, 0, 0) == True\n' + '13| assert any_int(-1, -2, -3) == True\n' + '14| assert any_int(1, -2, 3) == True\n' + '15| assert any_int(1.5, -2, 3) == False\n' + '16| assert any_int(1, -2.5, 3) == False\n' + '17|\n' + '18|def check(any_int):\n' + '19| # Check some simple cases\n' + '20| assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)"\n' + '21| assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)"\n' + '(12 more lines below)\n' + '-------------------------------------------------\n' + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' + 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' + 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' ) - result = buf.getvalue() - expected = ( - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - 'ERRORS:\n' - + str(temp_file_path) - + ':9:9: ' - + 'E999 IndentationError: unexpected indent\n' - '[This is how your edit would have looked if applied]\n' - '-------------------------------------------------\n' - '(this is the beginning of the file)\n' - '1|def any_int(a, b, c):\n' - '2| return isinstance(a, int) and isinstance(b, int) and isinstance(c, int)\n' - '3|\n' - '4|def test_any_int():\n' - '5| assert any_int(1, 2, 3) == True\n' - '6| assert any_int(1.5, 2, 3) == False\n' - '7| assert any_int(1, 2.5, 3) == False\n' - '8| assert any_int(1, 2, 3.5) == False\n' - '9| assert any_int(1.0, 2, 3) == False\n' - '10| assert any_int(1, 2.0, 3) == False\n' - '11| assert any_int(1, 2, 3.0) == False\n' - '12| assert any_int(0, 0, 0) == True\n' - '13| assert any_int(-1, -2, -3) == True\n' - '14| assert any_int(1, -2, 3) == True\n' - '15| assert any_int(1.5, -2, 3) == False\n' - '16| assert any_int(1, -2.5, 3) == False\n' - '17|\n' - '18|def check(any_int):\n' - '19| # Check some simple cases\n' - '20| assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)"\n' - '21| assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)"\n' - '(12 more lines below)\n' - '-------------------------------------------------\n' - '\n' - '[This is the original code before your edit]\n' - '-------------------------------------------------\n' - '(this is the beginning of the file)\n' - '1|def any_int(a, b, c):\n' - '2| return isinstance(a, int) and isinstance(b, int) and isinstance(c, int)\n' - '3|\n' - '4|def test_any_int():\n' - '5| assert any_int(1, 2, 3) == True\n' - '6| assert any_int(1.5, 2, 3) == False\n' - '7| assert any_int(1, 2.5, 3) == False\n' - '8| assert any_int(1, 2, 3.5) == False\n' - '9| assert any_int(1.0, 2, 3) == False\n' - '10| assert any_int(1, 2.0, 3) == False\n' - '11| assert any_int(1, 2, 3.0) == False\n' - '12| assert any_int(0, 0, 0) == True\n' - '13| assert any_int(-1, -2, -3) == True\n' - '14| assert any_int(1, -2, 3) == True\n' - '15| assert any_int(1.5, -2, 3) == False\n' - '16| assert any_int(1, -2.5, 3) == False\n' - '17|\n' - '18|def check(any_int):\n' - '19| # Check some simple cases\n' - '20| assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)"\n' - '21| assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)"\n' - '(12 more lines below)\n' - '-------------------------------------------------\n' - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' - ) - assert result == expected + assert result == expected # ================================ @@ -1253,153 +1250,140 @@ def test_find_file_not_exist_file_specific_path(tmp_path): assert result.split('\n') == expected.split('\n') -def test_edit_lint_file_pass(tmp_path, monkeypatch): - # Enable linting - monkeypatch.setattr( - 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True - ) - - file_path = _generate_test_file_with_lines(tmp_path, 1) - - # Test linting functionality - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - open_file(str(file_path)) - insert_content_at_line(str(file_path), 1, "print('hello')\n") - result = buf.getvalue() - assert result is not None - expected = ( - f'[File: {file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - f'[File: {file_path} (1 lines total after edit)]\n' - '(this is the beginning of the file)\n' - "1|print('hello')\n" - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - -def test_lint_file_fail_undefined_name(tmp_path, monkeypatch, capsys): +def test_edit_lint_file_pass(tmp_path): # Enable linting - monkeypatch.setattr( - 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True - ) + with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): + file_path = _generate_test_file_with_lines(tmp_path, 1) - current_line = 1 + # Test linting functionality + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(file_path)) + insert_content_at_line(str(file_path), 1, "print('hello')\n") + result = buf.getvalue() + assert result is not None + expected = ( + f'[File: {file_path} (1 lines total)]\n' + '(this is the beginning of the file)\n' + '1|\n' + '(this is the end of the file)\n' + f'[File: {file_path} (1 lines total after edit)]\n' + '(this is the beginning of the file)\n' + "1|print('hello')\n" + '(this is the end of the file)\n' + + MSG_FILE_UPDATED.format(line_number=1) + + '\n' + ) + assert result.split('\n') == expected.split('\n') - file_path = _generate_test_file_with_lines(tmp_path, 1) - open_file(str(file_path), current_line) - insert_content_at_line(str(file_path), 1, 'undefined_name()\n') +def test_lint_file_fail_undefined_name(tmp_path, capsys): + with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): + current_line = 1 - result = capsys.readouterr().out - assert result is not None + file_path = _generate_test_file_with_lines(tmp_path, 1) - expected = ( - f'[File: {file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - 'ERRORS:\n' - f"{file_path}:1:1: F821 undefined name 'undefined_name'\n" - '[This is how your edit would have looked if applied]\n' - '-------------------------------------------------\n' - '(this is the beginning of the file)\n' - '1|undefined_name()\n' - '(this is the end of the file)\n' - '-------------------------------------------------\n\n' - '[This is the original code before your edit]\n' - '-------------------------------------------------\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '-------------------------------------------------\n' - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' - ) - assert result.split('\n') == expected.split('\n') + open_file(str(file_path), current_line) + insert_content_at_line(str(file_path), 1, 'undefined_name()\n') + result = capsys.readouterr().out + assert result is not None -def test_lint_file_fail_undefined_name_long(tmp_path, monkeypatch, capsys): - # Enable linting - monkeypatch.setattr( - 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True - ) + expected = ( + f'[File: {file_path} (1 lines total)]\n' + '(this is the beginning of the file)\n' + '1|\n' + '(this is the end of the file)\n' + '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' + 'ERRORS:\n' + f"{file_path}:1:1: F821 undefined name 'undefined_name'\n" + '[This is how your edit would have looked if applied]\n' + '-------------------------------------------------\n' + '(this is the beginning of the file)\n' + '1|undefined_name()\n' + '(this is the end of the file)\n' + '-------------------------------------------------\n\n' + '[This is the original code before your edit]\n' + '-------------------------------------------------\n' + '(this is the beginning of the file)\n' + '1|\n' + '(this is the end of the file)\n' + '-------------------------------------------------\n' + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' + 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' + 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' + ) + assert result.split('\n') == expected.split('\n') - num_lines = 1000 - error_line = 500 - file_path = _generate_test_file_with_lines(tmp_path, num_lines) +def test_lint_file_fail_undefined_name_long(tmp_path, capsys): + with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): + num_lines = 1000 + error_line = 500 - error_message = f"{file_path}:{error_line}:1: F821 undefined name 'undefined_name'" + file_path = _generate_test_file_with_lines(tmp_path, num_lines) - open_file(str(file_path)) - insert_content_at_line(str(file_path), error_line, 'undefined_name()\n') + error_message = ( + f"{file_path}:{error_line}:1: F821 undefined name 'undefined_name'" + ) - result = capsys.readouterr().out - assert result is not None + open_file(str(file_path)) + insert_content_at_line(str(file_path), error_line, 'undefined_name()\n') - open_lines = '\n'.join([f'{i}|' for i in range(1, WINDOW + 1)]) - expected = ( - f'[File: {file_path} ({num_lines} lines total)]\n' - '(this is the beginning of the file)\n' - f'{open_lines}\n' - f'({num_lines - WINDOW} more lines below)\n' - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - f'ERRORS:\n{error_message}\n' - '[This is how your edit would have looked if applied]\n' - '-------------------------------------------------\n' - '(489 more lines above)\n' - + _numbered_test_lines(error_line - 10, error_line - 1) - + '500|undefined_name()\n' - + _numbered_test_lines(error_line + 1, error_line + 10) - + '(491 more lines below)\n' - + '-------------------------------------------------\n\n' - '[This is the original code before your edit]\n' - '-------------------------------------------------\n' - '(489 more lines above)\n' - + _numbered_test_lines(error_line - 10, error_line + 10) - + '(490 more lines below)\n' - + '-------------------------------------------------\n' - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' - ) - assert result.split('\n') == expected.split('\n') + result = capsys.readouterr().out + assert result is not None + open_lines = '\n'.join([f'{i}|' for i in range(1, WINDOW + 1)]) + expected = ( + f'[File: {file_path} ({num_lines} lines total)]\n' + '(this is the beginning of the file)\n' + f'{open_lines}\n' + f'({num_lines - WINDOW} more lines below)\n' + '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' + f'ERRORS:\n{error_message}\n' + '[This is how your edit would have looked if applied]\n' + '-------------------------------------------------\n' + '(489 more lines above)\n' + + _numbered_test_lines(error_line - 10, error_line - 1) + + '500|undefined_name()\n' + + _numbered_test_lines(error_line + 1, error_line + 10) + + '(491 more lines below)\n' + + '-------------------------------------------------\n\n' + '[This is the original code before your edit]\n' + '-------------------------------------------------\n' + '(489 more lines above)\n' + + _numbered_test_lines(error_line - 10, error_line + 10) + + '(490 more lines below)\n' + + '-------------------------------------------------\n' + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' + 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' + 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' + ) + assert result.split('\n') == expected.split('\n') -def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys): - # Disable linting - monkeypatch.setattr( - 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', False - ) - file_path = _generate_test_file_with_lines(tmp_path, 1) +def test_lint_file_disabled_undefined_name(tmp_path, capsys): + with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'False'}): + file_path = _generate_test_file_with_lines(tmp_path, 1) - open_file(str(file_path)) - insert_content_at_line(str(file_path), 1, 'undefined_name()\n') + open_file(str(file_path)) + insert_content_at_line(str(file_path), 1, 'undefined_name()\n') - result = capsys.readouterr().out - assert result is not None - expected = ( - f'[File: {file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - f'[File: {file_path} (1 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|undefined_name()\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') + result = capsys.readouterr().out + assert result is not None + expected = ( + f'[File: {file_path} (1 lines total)]\n' + '(this is the beginning of the file)\n' + '1|\n' + '(this is the end of the file)\n' + f'[File: {file_path} (1 lines total after edit)]\n' + '(this is the beginning of the file)\n' + '1|undefined_name()\n' + '(this is the end of the file)\n' + + MSG_FILE_UPDATED.format(line_number=1) + + '\n' + ) + assert result.split('\n') == expected.split('\n') def test_parse_docx(tmp_path): @@ -1521,44 +1505,40 @@ def test_parse_pptx(tmp_path): assert output == expected_output, f'Expected output does not match. Got: {output}' -def test_lint_file_fail_non_python(tmp_path, monkeypatch, capsys): - monkeypatch.setattr( - 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True - ) - - current_line = 1 - - file_path = _generate_ruby_test_file_with_lines(tmp_path, 1) +def test_lint_file_fail_non_python(tmp_path, capsys): + with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): + current_line = 1 + file_path = _generate_ruby_test_file_with_lines(tmp_path, 1) - open_file(str(file_path), current_line) - insert_content_at_line( - str(file_path), 1, "def print_hello_world()\n puts 'Hello World'" - ) - result = capsys.readouterr().out - assert result is not None - expected = ( - f'[File: {file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - 'ERRORS:\n' - f'{file_path}:1\n' - '[This is how your edit would have looked if applied]\n' - '-------------------------------------------------\n' - '(this is the beginning of the file)\n' - '1|def print_hello_world()\n' - "2| puts 'Hello World'\n" - '(this is the end of the file)\n' - '-------------------------------------------------\n\n' - '[This is the original code before your edit]\n' - '-------------------------------------------------\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '-------------------------------------------------\n' - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' - ) - assert result.split('\n') == expected.split('\n') + open_file(str(file_path), current_line) + insert_content_at_line( + str(file_path), 1, "def print_hello_world()\n puts 'Hello World'" + ) + result = capsys.readouterr().out + assert result is not None + expected = ( + f'[File: {file_path} (1 lines total)]\n' + '(this is the beginning of the file)\n' + '1|\n' + '(this is the end of the file)\n' + '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' + 'ERRORS:\n' + f'{file_path}:1\n' + '[This is how your edit would have looked if applied]\n' + '-------------------------------------------------\n' + '(this is the beginning of the file)\n' + '1|def print_hello_world()\n' + "2| puts 'Hello World'\n" + '(this is the end of the file)\n' + '-------------------------------------------------\n\n' + '[This is the original code before your edit]\n' + '-------------------------------------------------\n' + '(this is the beginning of the file)\n' + '1|\n' + '(this is the end of the file)\n' + '-------------------------------------------------\n' + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' + 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' + 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' + ) + assert result.split('\n') == expected.split('\n') diff --git a/tests/unit/test_bash_parsing.py b/tests/unit/test_bash_parsing.py index 37c40f3dd889..014a638eb466 100644 --- a/tests/unit/test_bash_parsing.py +++ b/tests/unit/test_bash_parsing.py @@ -114,7 +114,7 @@ def test_jupyter_heredoc(): print('Hello, `World`! ') EOL - [error]: here-document at line 0 delimited by end-of-file (wanted "'EOL'") (position 75) + [warning]: here-document at line 0 delimited by end-of-file (wanted "'EOL'") (position 75) TODO: remove this tests after the deprecation of ServerRuntime """ diff --git a/tests/unit/test_event_stream.py b/tests/unit/test_event_stream.py index a07c9c791cfa..df4aec0fe401 100644 --- a/tests/unit/test_event_stream.py +++ b/tests/unit/test_event_stream.py @@ -1,8 +1,7 @@ import json -import pathlib -import tempfile import pytest +from pytest import TempPathFactory from opendevin.events import EventSource, EventStream from opendevin.events.action import ( @@ -13,11 +12,8 @@ @pytest.fixture -def temp_dir(monkeypatch): - # get a temporary directory - with tempfile.TemporaryDirectory() as temp_dir: - pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) - yield temp_dir +def temp_dir(tmp_path_factory: TempPathFactory) -> str: + return str(tmp_path_factory.mktemp('test_event_stream')) def collect_events(stream): diff --git a/tests/unit/test_ipython.py b/tests/unit/test_ipython.py deleted file mode 100644 index 66a03cc03a1f..000000000000 --- a/tests/unit/test_ipython.py +++ /dev/null @@ -1,77 +0,0 @@ -import pathlib -import tempfile -from unittest.mock import MagicMock, call, patch - -import pytest - -from opendevin.core.config import AppConfig, SandboxConfig -from opendevin.events.action import IPythonRunCellAction -from opendevin.events.observation import IPythonRunCellObservation -from opendevin.runtime.server.runtime import ServerRuntime - - -@pytest.fixture -def temp_dir(monkeypatch): - # get a temporary directory - with tempfile.TemporaryDirectory() as temp_dir: - pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) - yield temp_dir - - -@pytest.mark.asyncio -async def test_run_python_backticks(): - # Create a mock event_stream - mock_event_stream = MagicMock() - - test_code = "print('Hello, `World`!\n')" - - # Mock the asynchronous sandbox execute method - mock_sandbox_execute = MagicMock() - mock_sandbox_execute.side_effect = [ - (0, ''), # Initial call during DockerSSHBox initialization - (0, ''), # Initial call during DockerSSHBox initialization - (0, ''), # Initial call during DockerSSHBox initialization - (0, ''), # Write command - (0, test_code), # Execute command - ] - - # Set up the patches for the runtime and sandbox - with patch( - 'opendevin.runtime.docker.ssh_box.DockerSSHBox.execute', - new=mock_sandbox_execute, - ): - # Initialize the runtime with the mock event_stream - runtime = ServerRuntime( - config=AppConfig( - persist_sandbox=False, sandbox=SandboxConfig(box_type='ssh') - ), - event_stream=mock_event_stream, - ) - - # Define the test action with a simple IPython command - action = IPythonRunCellAction(code=test_code) - - # Call the run_ipython method with the test action - result = await runtime.run_action(action) - - # Assert that the result is an instance of IPythonRunCellObservation - assert isinstance(result, IPythonRunCellObservation) - - # Assert that the execute method was called with the correct commands - expected_write_command = ( - "cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{test_code}\n' 'EOL' - ) - expected_execute_command = 'cat /tmp/opendevin_jupyter_temp.py | execute_cli' - mock_sandbox_execute.assert_has_calls( - [ - call('mkdir -p /tmp'), - call('git config --global user.name "OpenDevin"'), - call('git config --global user.email "opendevin@all-hands.dev"'), - call(expected_write_command), - call(expected_execute_command), - ] - ) - - assert ( - test_code == result.content - ), f'The output should contain the expected print output, got: {result.content}' diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index b4154e709881..b9da7e497855 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -1,8 +1,8 @@ import logging -import tempfile from unittest.mock import Mock, patch import pytest +from pytest import TempPathFactory from opendevin.controller.agent_controller import AgentController from opendevin.controller.state.state import State @@ -29,14 +29,17 @@ def collect_events(stream): @pytest.fixture -def event_stream(): - with tempfile.TemporaryDirectory() as temp_dir: - file_store = get_file_store('local', temp_dir) - event_stream = EventStream('asdf', file_store) - yield event_stream - - # clear after each test - event_stream.clear() +def temp_dir(tmp_path_factory: TempPathFactory) -> str: + return str(tmp_path_factory.mktemp('test_is_stuck')) + + +@pytest.fixture +def event_stream(temp_dir): + file_store = get_file_store('local', temp_dir) + event_stream = EventStream('asdf', file_store) + yield event_stream + # clear after each test + event_stream.clear() class TestStuckDetector: diff --git a/tests/unit/test_micro_agents.py b/tests/unit/test_micro_agents.py index 3c474aca140e..a89a0c579c77 100644 --- a/tests/unit/test_micro_agents.py +++ b/tests/unit/test_micro_agents.py @@ -1,10 +1,10 @@ import json import os -import tempfile from unittest.mock import MagicMock import pytest import yaml +from pytest import TempPathFactory from agenthub.micro.registry import all_microagents from opendevin.controller.agent import Agent @@ -17,14 +17,18 @@ @pytest.fixture -def event_stream(): - with tempfile.TemporaryDirectory() as temp_dir: - file_store = get_file_store('local', temp_dir) - event_stream = EventStream('asdf', file_store) - yield event_stream - - # clear after each test - event_stream.clear() +def temp_dir(tmp_path_factory: TempPathFactory) -> str: + return str(tmp_path_factory.mktemp('test_micro_agents')) + + +@pytest.fixture +def event_stream(temp_dir): + file_store = get_file_store('local', temp_dir) + event_stream = EventStream('asdf', file_store) + yield event_stream + + # clear after each test + event_stream.clear() def test_all_agents_are_loaded(): diff --git a/tests/unit/test_runtime.py b/tests/unit/test_runtime.py index 4810481ec3ae..70376de5ce06 100644 --- a/tests/unit/test_runtime.py +++ b/tests/unit/test_runtime.py @@ -2,12 +2,11 @@ import asyncio import os -import pathlib -import tempfile import time from unittest.mock import patch import pytest +from pytest import TempPathFactory from opendevin.core.config import AppConfig, SandboxConfig, load_from_env from opendevin.core.logger import opendevin_logger as logger @@ -41,62 +40,106 @@ def print_method_name(request): @pytest.fixture -def temp_dir(monkeypatch): - # get a temporary directory - with tempfile.TemporaryDirectory() as temp_dir: - pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) - yield temp_dir +def temp_dir(tmp_path_factory: TempPathFactory) -> str: + return str(tmp_path_factory.mktemp('test_runtime')) + + +TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'both') +PY3_FOR_TESTING = '/opendevin/miniforge3/bin/mamba run -n base python3' # This assures that all tests run together for each runtime, not alternating between them, # which caused them to fail previously. -@pytest.fixture(scope='module', params=[EventStreamRuntime, ServerRuntime]) +@pytest.fixture(scope='module') def box_class(request): + time.sleep(1) + runtime = TEST_RUNTIME + if runtime.lower() == 'eventstream': + return EventStreamRuntime + elif runtime.lower() == 'server': + return ServerRuntime + else: + return pytest.param([EventStreamRuntime, ServerRuntime]) + + +# TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated. +# since `EventStreamRuntime` supports running as an arbitrary user. +@pytest.fixture(scope='module', params=[True, False]) +def run_as_devin(request): + time.sleep(1) + return request.param + + +@pytest.fixture(scope='module', params=[True, False]) +def enable_auto_lint(request): time.sleep(1) return request.param -async def _load_runtime(temp_dir, box_class): +@pytest.fixture(scope='module', params=['ubuntu:22.04', 'debian:11']) +def container_image(request): + time.sleep(1) + return request.param + + +async def _load_runtime( + temp_dir, + box_class, + run_as_devin: bool = True, + enable_auto_lint: bool = False, + container_image: str | None = None, +): sid = 'test' cli_session = 'main_test' - plugins = [JupyterRequirement(), AgentSkillsRequirement()] + # AgentSkills need to be initialized **before** Jupyter + # otherwise Jupyter will not access the proper dependencies installed by AgentSkills + plugins = [AgentSkillsRequirement(), JupyterRequirement()] config = AppConfig( workspace_base=temp_dir, workspace_mount_path=temp_dir, - sandbox=SandboxConfig( - use_host_network=True, - ), + sandbox=SandboxConfig(use_host_network=True), ) load_from_env(config, os.environ) + config.run_as_devin = run_as_devin + config.sandbox.enable_auto_lint = enable_auto_lint file_store = get_file_store(config.file_store, config.file_store_path) event_stream = EventStream(cli_session, file_store) - container_image = config.sandbox.container_image - # NOTE: we will use the default container image specified in the config.sandbox - # if it is an official od_runtime image. - if 'od_runtime' not in container_image: - container_image = 'ubuntu:22.04' - logger.warning( - f'`{config.sandbox.container_image}` is not an od_runtime image. Will use `{container_image}` as the container image for testing.' - ) + if container_image is not None: + config.sandbox.container_image = container_image + if box_class == EventStreamRuntime: + # NOTE: we will use the default container image specified in the config.sandbox + # if it is an official od_runtime image. + cur_container_image = config.sandbox.container_image + if 'od_runtime' not in cur_container_image: + cur_container_image = 'ubuntu:22.04' + logger.warning( + f'`{config.sandbox.container_image}` is not an od_runtime image. Will use `{cur_container_image}` as the container image for testing.' + ) + runtime = EventStreamRuntime( config=config, event_stream=event_stream, sid=sid, + plugins=plugins, # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime # Instead, we will pre-build a suite of container images with OD-runtime-cli installed. - container_image=container_image, - plugins=plugins, + container_image=cur_container_image, ) await runtime.ainit() elif box_class == ServerRuntime: - runtime = ServerRuntime(config=config, event_stream=event_stream, sid=sid) + runtime = ServerRuntime( + config=config, event_stream=event_stream, sid=sid, plugins=plugins + ) await runtime.ainit() - runtime.init_sandbox_plugins(plugins) + from opendevin.runtime.tools import ( + RuntimeTool, # deprecate this after ServerRuntime is deprecated + ) + runtime.init_runtime_tools( - [], + [RuntimeTool.BROWSER], is_async=False, runtime_tools_config={}, ) @@ -107,9 +150,9 @@ async def _load_runtime(temp_dir, box_class): @pytest.mark.asyncio -async def test_env_vars_os_environ(temp_dir, box_class): +async def test_env_vars_os_environ(temp_dir, box_class, run_as_devin): with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}): - runtime = await _load_runtime(temp_dir, box_class) + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) obs: CmdOutputObservation = await runtime.run_action( CmdRunAction(command='env') @@ -206,8 +249,8 @@ async def test_env_vars_runtime_add_env_vars_overwrite(temp_dir, box_class): @pytest.mark.asyncio -async def test_bash_command_pexcept(temp_dir, box_class): - runtime = await _load_runtime(temp_dir, box_class) +async def test_bash_command_pexcept(temp_dir, box_class, run_as_devin): + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) # We set env var PS1="\u@\h:\w $" # and construct the PEXCEPT prompt base on it. @@ -237,8 +280,8 @@ async def test_bash_command_pexcept(temp_dir, box_class): @pytest.mark.asyncio -async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class): - runtime = await _load_runtime(temp_dir, box_class) +async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_devin): + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) # Test run command action_cmd = CmdRunAction(command='ls -l') @@ -297,16 +340,25 @@ async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class): else: assert obs.path == '/workspace/hello.sh' + # clean up + action = CmdRunAction(command='rm -rf hello.sh') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + await runtime.close() await asyncio.sleep(1) @pytest.mark.asyncio -async def test_simple_browse(temp_dir, box_class): - runtime = await _load_runtime(temp_dir, box_class) +async def test_simple_browse(temp_dir, box_class, run_as_devin): + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) # Test browse - action_cmd = CmdRunAction(command='python -m http.server 8000 > server.log 2>&1 &') + action_cmd = CmdRunAction( + command=f'{PY3_FOR_TESTING} -m http.server 8000 > server.log 2>&1 &' + ) logger.info(action_cmd, extra={'msg_type': 'ACTION'}) obs = await runtime.run_action(action_cmd) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) @@ -315,6 +367,12 @@ async def test_simple_browse(temp_dir, box_class): assert obs.exit_code == 0 assert '[1]' in obs.content + action_cmd = CmdRunAction(command='sleep 5 && cat server.log') + logger.info(action_cmd, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action_cmd) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + action_browse = BrowseURLAction(url='http://localhost:8000') logger.info(action_browse, extra={'msg_type': 'ACTION'}) obs = await runtime.run_action(action_browse) @@ -331,11 +389,65 @@ async def test_simple_browse(temp_dir, box_class): assert 'Directory listing for /' in obs.content assert 'server.log' in obs.content + # clean up + action = CmdRunAction(command='rm -rf server.log') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_single_multiline_command(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) + + action = CmdRunAction(command='echo \\\n -e "foo"') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'foo' in obs.content + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_multiline_echo(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) + + action = CmdRunAction(command='echo -e "hello\nworld"') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'hello\r\nworld' in obs.content + await runtime.close() + await asyncio.sleep(1) @pytest.mark.asyncio -async def test_multiline_commands(temp_dir, box_class): +async def test_runtime_whitespace(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) + + action = CmdRunAction(command='echo -e "\\n\\n\\n"') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + + assert obs.exit_code == 0, 'The exit code should be 0.' + assert '\r\n\r\n\r\n' in obs.content + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_multiple_multiline_commands(temp_dir, box_class, run_as_devin): cmds = [ 'ls -l', 'echo -e "hello\nworld"', @@ -365,7 +477,7 @@ async def test_multiline_commands(temp_dir, box_class): ] joined_cmds = '\n'.join(cmds) - runtime = await _load_runtime(temp_dir, box_class) + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) action = CmdRunAction(command=joined_cmds) logger.info(action, extra={'msg_type': 'ACTION'}) @@ -388,9 +500,9 @@ async def test_multiline_commands(temp_dir, box_class): @pytest.mark.asyncio -async def test_no_ps2_in_output(temp_dir, box_class): +async def test_no_ps2_in_output(temp_dir, box_class, run_as_devin): """Test that the PS2 sign is not added to the output of a multiline command.""" - runtime = await _load_runtime(temp_dir, box_class) + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) action = CmdRunAction(command='echo -e "hello\nworld"') logger.info(action, extra={'msg_type': 'ACTION'}) @@ -406,6 +518,9 @@ async def test_no_ps2_in_output(temp_dir, box_class): assert 'hello\r\nworld' in obs.content assert '>' not in obs.content + await runtime.close() + await asyncio.sleep(1) + @pytest.mark.asyncio async def test_multiline_command_loop(temp_dir, box_class): @@ -449,3 +564,369 @@ async def test_multiline_command_loop(temp_dir, box_class): await runtime.close() await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_cmd_run(temp_dir, box_class, run_as_devin): + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) + + action = CmdRunAction(command='ls -l') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + assert 'total 0' in obs.content + + action = CmdRunAction(command='mkdir test') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + + action = CmdRunAction(command='ls -l') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + if run_as_devin: + assert 'opendevin' in obs.content + else: + assert 'root' in obs.content + assert 'test' in obs.content + + action = CmdRunAction(command='touch test/foo.txt') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + + action = CmdRunAction(command='ls -l test') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + assert 'foo.txt' in obs.content + + # clean up: this is needed, since CI will not be + # run as root, and this test may leave a file + # owned by root + action = CmdRunAction(command='rm -rf test') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_devin): + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) + + action = CmdRunAction(command='cd ~ && pwd') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + if run_as_devin: + assert '/home/opendevin' in obs.content + else: + assert '/root' in obs.content + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_multi_cmd_run_in_single_line(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) + + action = CmdRunAction(command='pwd && ls -l') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + assert '/workspace' in obs.content + assert 'total 0' in obs.content + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_stateful_cmd(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) + + action = CmdRunAction(command='mkdir test') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0, 'The exit code should be 0.' + + action = CmdRunAction(command='cd test') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0, 'The exit code should be 0.' + + action = CmdRunAction(command='pwd') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert '/workspace/test' in obs.content + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_failed_cmd(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) + + action = CmdRunAction(command='non_existing_command') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.' + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_ipython_multi_user(temp_dir, box_class, run_as_devin): + runtime = await _load_runtime(temp_dir, box_class, run_as_devin) + + # Test run ipython + # get username + test_code = "import os; print(os.environ['USER'])" + action_ipython = IPythonRunCellAction(code=test_code) + logger.info(action_ipython, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action_ipython) + assert isinstance(obs, IPythonRunCellObservation) + + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + if run_as_devin: + assert 'opendevin' in obs.content + else: + assert 'root' in obs.content + + # print pwd + test_code = 'import os; print(os.getcwd())' + action_ipython = IPythonRunCellAction(code=test_code) + logger.info(action_ipython, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action_ipython) + assert isinstance(obs, IPythonRunCellObservation) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.content.strip() == '/workspace' + + # write a file + test_code = "with open('test.txt', 'w') as f: f.write('Hello, world!')" + action_ipython = IPythonRunCellAction(code=test_code) + logger.info(action_ipython, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action_ipython) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, IPythonRunCellObservation) + assert obs.content.strip() == '[Code executed successfully with no output]' + + # check file owner via bash + action = CmdRunAction(command='ls -alh test.txt') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + if run_as_devin: + # -rw-r--r-- 1 opendevin root 13 Jul 28 03:53 test.txt + assert 'opendevin' in obs.content.split('\r\n')[0] + assert 'root' in obs.content.split('\r\n')[0] + else: + # -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt + assert 'root' in obs.content.split('\r\n')[0] + + # clean up + action = CmdRunAction(command='rm -rf test') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_ipython_simple(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) + + # Test run ipython + # get username + test_code = 'print(1)' + action_ipython = IPythonRunCellAction(code=test_code) + logger.info(action_ipython, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action_ipython) + assert isinstance(obs, IPythonRunCellObservation) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.content.strip() == '1' + + +async def _test_ipython_agentskills_fileop_pwd_impl( + runtime: ServerRuntime | EventStreamRuntime, enable_auto_lint: bool +): + # remove everything in /workspace + action = CmdRunAction(command='rm -rf /workspace/*') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + action = CmdRunAction(command='mkdir test') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + + action = IPythonRunCellAction(code="create_file('hello.py')") + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, IPythonRunCellObservation) + assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( + '[File: /workspace/hello.py (1 lines total)]\n' + '(this is the beginning of the file)\n' + '1|\n' + '(this is the end of the file)\n' + '[File hello.py created.]\n' + ).strip().split('\n') + + action = CmdRunAction(command='cd test') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + + # This should create a file in the current working directory + # i.e., /workspace/test/hello.py instead of /workspace/hello.py + action = IPythonRunCellAction(code="create_file('hello.py')") + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, IPythonRunCellObservation) + assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( + '[File: /workspace/test/hello.py (1 lines total)]\n' + '(this is the beginning of the file)\n' + '1|\n' + '(this is the end of the file)\n' + '[File hello.py created.]\n' + ).strip().split('\n') + + if enable_auto_lint: + # edit file, but make a mistake in indentation + action = IPythonRunCellAction( + code="insert_content_at_line('hello.py', 1, ' print(\"hello world\")')" + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, IPythonRunCellObservation) + assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( + """ +[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.] +ERRORS: +/workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent +[This is how your edit would have looked if applied] +------------------------------------------------- +(this is the beginning of the file) +1| print("hello world") +(this is the end of the file) +------------------------------------------------- + +[This is the original code before your edit] +------------------------------------------------- +(this is the beginning of the file) +1| +(this is the end of the file) +------------------------------------------------- +Your changes have NOT been applied. Please fix your edit command and try again. +You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code. +DO NOT re-run the same failed edit command. Running it again will lead to the same error. +""" + ).strip().split('\n') + + # edit file with correct indentation + action = IPythonRunCellAction( + code="insert_content_at_line('hello.py', 1, 'print(\"hello world\")')" + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(obs, IPythonRunCellObservation) + assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( + """ +[File: /workspace/test/hello.py (1 lines total after edit)] +(this is the beginning of the file) +1|print("hello world") +(this is the end of the file) +[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] +""" + ).strip().split('\n') + + action = CmdRunAction(command='rm -rf /workspace/*') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + +@pytest.mark.asyncio +async def test_ipython_agentskills_fileop_pwd(temp_dir, box_class, enable_auto_lint): + """Make sure that cd in bash also update the current working directory in ipython.""" + + runtime = await _load_runtime( + temp_dir, box_class, enable_auto_lint=enable_auto_lint + ) + await _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint) + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.skipif( + TEST_RUNTIME.lower() == 'eventstream', + reason='Skip this if we want to test EventStreamRuntime', +) +@pytest.mark.skipif( + os.environ.get('TEST_IN_CI', 'false').lower() == 'true', + # FIXME: There's some weird issue with the CI environment. + reason='Skip this if in CI.', +) +@pytest.mark.asyncio +async def test_ipython_agentskills_fileop_pwd_agnostic_sandbox( + temp_dir, enable_auto_lint, container_image +): + """Make sure that cd in bash also update the current working directory in ipython.""" + + runtime = await _load_runtime( + temp_dir, + # NOTE: we only test for ServerRuntime, since EventStreamRuntime is image agnostic by design. + ServerRuntime, + enable_auto_lint=enable_auto_lint, + container_image=container_image, + ) + await _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint) + await runtime.close() + await asyncio.sleep(1) diff --git a/tests/unit/test_runtime_build.py b/tests/unit/test_runtime_build.py index d18cffb43ac7..49c6ea0ca744 100644 --- a/tests/unit/test_runtime_build.py +++ b/tests/unit/test_runtime_build.py @@ -1,11 +1,11 @@ import os import tarfile -import tempfile from importlib.metadata import version from unittest.mock import MagicMock, patch import pytest import toml +from pytest import TempPathFactory from opendevin.runtime.utils.runtime_build import ( _generate_dockerfile, @@ -20,9 +20,8 @@ @pytest.fixture -def temp_dir(): - with tempfile.TemporaryDirectory() as temp_dir: - yield temp_dir +def temp_dir(tmp_path_factory: TempPathFactory) -> str: + return str(tmp_path_factory.mktemp('test_runtime_build')) def test_put_source_code_to_dir(temp_dir): diff --git a/tests/unit/test_sandbox.py b/tests/unit/test_sandbox.py deleted file mode 100644 index c6e57bb2e3dd..000000000000 --- a/tests/unit/test_sandbox.py +++ /dev/null @@ -1,317 +0,0 @@ -import os -import pathlib -import tempfile - -import pytest - -from opendevin.core.config import AppConfig, SandboxConfig -from opendevin.runtime.docker.ssh_box import DockerSSHBox -from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement - - -def create_docker_box_from_app_config( - path: str, config: AppConfig | None = None -) -> DockerSSHBox: - if config is None: - config = AppConfig( - sandbox=SandboxConfig( - box_type='ssh', - ), - persist_sandbox=False, - ) - return DockerSSHBox( - config=config.sandbox, - persist_sandbox=config.persist_sandbox, - workspace_mount_path=path, - sandbox_workspace_dir=config.workspace_mount_path_in_sandbox, - cache_dir=config.cache_dir, - run_as_devin=True, - ssh_hostname=config.ssh_hostname, - ssh_password=config.ssh_password, - ssh_port=config.ssh_port, - ) - - -@pytest.fixture(autouse=True) -def print_method_name(request): - print('\n########################################################################') - print(f'Running test: {request.node.name}') - print('########################################################################') - - -@pytest.fixture -def temp_dir(monkeypatch): - # get a temporary directory - with tempfile.TemporaryDirectory() as temp_dir: - pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) - yield temp_dir - - -def test_ssh_box_run_as_devin(temp_dir): - # get a temporary directory - for box in [ - create_docker_box_from_app_config(temp_dir), - ]: # FIXME: permission error on mkdir test for exec box - exit_code, output = box.execute('ls -l') - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output.strip() == 'total 0' - - assert box.workspace_mount_path == temp_dir - exit_code, output = box.execute('ls -l') - assert exit_code == 0, 'The exit code should be 0.' - assert output.strip() == 'total 0' - - exit_code, output = box.execute('mkdir test') - assert exit_code == 0, 'The exit code should be 0.' - assert output.strip() == '' - - exit_code, output = box.execute('ls -l') - assert exit_code == 0, 'The exit code should be 0.' - assert 'opendevin' in output, "The output should contain username 'opendevin'" - assert 'test' in output, 'The output should contain the test directory' - - exit_code, output = box.execute('touch test/foo.txt') - assert exit_code == 0, 'The exit code should be 0.' - assert output.strip() == '' - - exit_code, output = box.execute('ls -l test') - assert exit_code == 0, 'The exit code should be 0.' - assert 'foo.txt' in output, 'The output should contain the foo.txt file' - box.close() - - -def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir): - box = create_docker_box_from_app_config(temp_dir) - exit_code, output = box.execute('pwd && ls -l') - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - expected_lines = ['/workspace', 'total 0'] - line_sep = '\r\n' if isinstance(box, DockerSSHBox) else '\n' - assert output == line_sep.join(expected_lines), ( - 'The output should be the same as the input for ' + box.__class__.__name__ - ) - box.close() - - -def test_ssh_box_stateful_cmd_run_as_devin(temp_dir): - box = create_docker_box_from_app_config(temp_dir) - exit_code, output = box.execute('mkdir test') - assert exit_code == 0, 'The exit code should be 0.' - assert output.strip() == '' - - exit_code, output = box.execute('cd test') - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output.strip() == '', ( - 'The output should be empty for ' + box.__class__.__name__ - ) - - exit_code, output = box.execute('pwd') - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output.strip() == '/workspace/test', ( - 'The output should be /workspace for ' + box.__class__.__name__ - ) - box.close() - - -def test_ssh_box_failed_cmd_run_as_devin(temp_dir): - box = create_docker_box_from_app_config(temp_dir) - exit_code, output = box.execute('non_existing_command') - assert exit_code != 0, ( - 'The exit code should not be 0 for a failed command for ' - + box.__class__.__name__ - ) - box.close() - - -def test_single_multiline_command(temp_dir): - box = create_docker_box_from_app_config(temp_dir) - exit_code, output = box.execute('echo \\\n -e "foo"') - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - # FIXME: why is there a `>` in the output? Probably PS2? - assert output == '> foo', ( - 'The output should be the same as the input for ' + box.__class__.__name__ - ) - box.close() - - -def test_multiline_echo(temp_dir): - box = create_docker_box_from_app_config(temp_dir) - exit_code, output = box.execute('echo -e "hello\nworld"') - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - # FIXME: why is there a `>` in the output? - assert output == '> hello\r\nworld', ( - 'The output should be the same as the input for ' + box.__class__.__name__ - ) - box.close() - - -def test_sandbox_whitespace(temp_dir): - box = create_docker_box_from_app_config(temp_dir) - exit_code, output = box.execute('echo -e "\\n\\n\\n"') - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output == '\r\n\r\n\r\n', ( - 'The output should be the same as the input for ' + box.__class__.__name__ - ) - box.close() - - -def test_sandbox_jupyter_plugin(temp_dir): - box = create_docker_box_from_app_config(temp_dir) - box.init_plugins([JupyterRequirement]) - exit_code, output = box.execute('echo "print(1)" | execute_cli') - print(output) - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output == '1\r\n', ( - 'The output should be the same as the input for ' + box.__class__.__name__ - ) - box.close() - - -def _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config: AppConfig): - box.init_plugins([AgentSkillsRequirement, JupyterRequirement]) - exit_code, output = box.execute('mkdir test') - print(output) - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - - exit_code, output = box.execute('echo "create_file(\'hello.py\')" | execute_cli') - print(output) - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output.strip().split('\r\n') == ( - '[File: /workspace/hello.py (1 lines total)]\r\n' - '(this is the beginning of the file)\r\n' - '1|\r\n' - '(this is the end of the file)\r\n' - '[File hello.py created.]\r\n' - ).strip().split('\r\n') - - exit_code, output = box.execute('cd test') - print(output) - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - - exit_code, output = box.execute('echo "create_file(\'hello.py\')" | execute_cli') - print(output) - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output.strip().split('\r\n') == ( - '[File: /workspace/test/hello.py (1 lines total)]\r\n' - '(this is the beginning of the file)\r\n' - '1|\r\n' - '(this is the end of the file)\r\n' - '[File hello.py created.]\r\n' - ).strip().split('\r\n') - - if config.sandbox.enable_auto_lint: - # edit file, but make a mistake in indentation - exit_code, output = box.execute( - 'echo "insert_content_at_line(\'hello.py\', 1, \' print(\\"hello world\\")\')" | execute_cli' - ) - print(output) - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output.strip().split('\r\n') == ( - """ -[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.] -ERRORS: -/workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent -[This is how your edit would have looked if applied] -------------------------------------------------- -(this is the beginning of the file) -1| print("hello world") -(this is the end of the file) -------------------------------------------------- - -[This is the original code before your edit] -------------------------------------------------- -(this is the beginning of the file) -1| -(this is the end of the file) -------------------------------------------------- -Your changes have NOT been applied. Please fix your edit command and try again. -You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code. -DO NOT re-run the same failed edit command. Running it again will lead to the same error. -""" - ).strip().split('\n') - - # edit file with correct indentation - exit_code, output = box.execute( - 'echo "insert_content_at_line(\'hello.py\', 1, \'print(\\"hello world\\")\')" | execute_cli' - ) - print(output) - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output.strip().split('\r\n') == ( - """ -[File: /workspace/test/hello.py (1 lines total after edit)] -(this is the beginning of the file) -1|print("hello world") -(this is the end of the file) -[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] -""" - ).strip().split('\n') - - exit_code, output = box.execute('rm -rf /workspace/*') - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - box.close() - - -def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir): - # get a temporary directory - config = AppConfig( - sandbox=SandboxConfig( - box_type='ssh', - enable_auto_lint=False, - ), - persist_sandbox=False, - ) - assert not config.sandbox.enable_auto_lint - box = create_docker_box_from_app_config(temp_dir, config) - _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config) - - -@pytest.mark.skipif( - os.getenv('TEST_IN_CI') != 'true', - reason='The unittest need to download image, so only run on CI', -) -def test_agnostic_sandbox_jupyter_agentskills_fileop_pwd(temp_dir): - for base_sandbox_image in ['ubuntu:22.04', 'debian:11']: - config = AppConfig( - sandbox=SandboxConfig( - box_type='ssh', - container_image=base_sandbox_image, - enable_auto_lint=False, - ), - persist_sandbox=False, - ) - assert not config.sandbox.enable_auto_lint - box = create_docker_box_from_app_config(temp_dir, config) - _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config) - - -def test_sandbox_jupyter_plugin_backticks(temp_dir): - config = AppConfig( - sandbox=SandboxConfig( - box_type='ssh', - ), - persist_sandbox=False, - ) - box = DockerSSHBox( - config=config.sandbox, - persist_sandbox=config.persist_sandbox, - workspace_mount_path=temp_dir, - sandbox_workspace_dir=config.workspace_mount_path_in_sandbox, - cache_dir=config.cache_dir, - run_as_devin=True, - ssh_hostname=config.ssh_hostname, - ssh_password=config.ssh_password, - ssh_port=config.ssh_port, - ) - box.init_plugins([JupyterRequirement]) - test_code = "print('Hello, `World`!')" - expected_write_command = ( - "cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{test_code}\n' 'EOL' - ) - expected_execute_command = 'cat /tmp/opendevin_jupyter_temp.py | execute_cli' - exit_code, output = box.execute(expected_write_command) - exit_code, output = box.execute(expected_execute_command) - print(output) - assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__ - assert output.strip() == 'Hello, `World`!', ( - 'The output should be the same as the input for ' + box.__class__.__name__ - ) - box.close()