INT: prevent error on repeat integration tests after failed test(s) (A…

…ll-Hands-AI#2935) * Integration tests: prevent File not found error * forgot to remove debug calls in regenerate.sh
invariantlabs-ai · Jul 18, 2024 · 5a57130 · 5a57130
1 parent 728131f
commit 5a57130
Show file tree

Hide file tree

Showing 4 changed files with 133 additions and 48 deletions.
diff --git a/opendevin/runtime/browser/browser_env.py b/opendevin/runtime/browser/browser_env.py
@@ -47,6 +47,16 @@ def __init__(
         self.process = multiprocessing.Process(
             target=self.browser_process,
         )
+
+        try:
+            self.original_cwd = os.getcwd()
+        except FileNotFoundError:
+            logger.warning(
+                'Current working directory does not exist. Using /tmp as fallback.'
+            )
+            self.original_cwd = '/tmp'
+            os.chdir('/tmp')
+
         if is_async:
             threading.Thread(target=self.init_browser).start()
         else:
@@ -66,7 +76,23 @@ def get_html_text_converter(self):
 
     def init_browser(self):
         logger.info('Starting browser env...')
-        self.process.start()
+
+        # Ensure we're in a valid directory before starting the process
+        try:
+            os.chdir(self.original_cwd)
+            logger.debug(f'Changed back to original directory: {self.original_cwd}')
+        except Exception as e:
+            logger.error(f'Failed to change to original directory: {e}')
+            # If we can't change to the original directory, try to use a known valid directory
+            os.chdir('/tmp')
+            logger.debug('Changed to /tmp directory as fallback')
+
+        try:
+            self.process.start()
+        except Exception as e:
+            logger.error(f'Failed to start browser process: {e}')
+            raise
+
         if not self.check_alive():
             self.close()
             raise BrowserInitException('Failed to start browser environment.')

diff --git a/opendevin/runtime/server/runtime.py b/opendevin/runtime/server/runtime.py
@@ -79,9 +79,9 @@ def __init__(
         self.browser: BrowserEnv | None = None
 
     async def close(self):
-        if not self._is_external_sandbox:
+        if hasattr(self, '_is_external_sandbox') and not self._is_external_sandbox:
             self.sandbox.close()
-        if self.browser is not None:
+        if hasattr(self, 'browser') and self.browser is not None:
             self.browser.close()
 
     def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None:
@@ -109,7 +109,7 @@ async def run(self, action: CmdRunAction) -> Observation:
         return self._run_command(action.command)
 
     async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
-        obs = self._run_command(
+        self._run_command(
             ("cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{action.code}\n' 'EOL'),
         )
 
@@ -150,7 +150,7 @@ async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
 
                     # re-init the kernel after restart
                     if action.kernel_init_code:
-                        obs = self._run_command(
+                        self._run_command(
                             (
                                 f"cat > /tmp/opendevin_jupyter_init.py <<'EOL'\n"
                                 f'{action.kernel_init_code}\n'

diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -1,6 +1,7 @@
 import io
 import os
 import re
+import shutil
 import subprocess
 import tempfile
 from functools import partial
@@ -12,8 +13,13 @@
 
 from opendevin.llm.llm import message_separator
 
-script_dir = os.path.dirname(os.path.realpath(__file__))
-workspace_path = os.getenv('WORKSPACE_BASE')
+script_dir = os.environ.get('SCRIPT_DIR')
+project_root = os.environ.get('PROJECT_ROOT')
+workspace_path = os.environ.get('WORKSPACE_BASE')
+
+assert script_dir is not None, 'SCRIPT_DIR environment variable is not set'
+assert project_root is not None, 'PROJECT_ROOT environment variable is not set'
+assert workspace_path is not None, 'WORKSPACE_BASE environment variable is not set'
 
 
 class SecretExit(Exception):
@@ -204,16 +210,41 @@ def http_server():
 def set_up():
     global cur_id
     cur_id = 0
-    assert workspace_path is not None
+    assert workspace_path is not None, 'workspace_path is not set'
+
+    # Remove and recreate the workspace_path
     if os.path.exists(workspace_path):
-        for file in os.listdir(workspace_path):
-            os.remove(os.path.join(workspace_path, file))
+        shutil.rmtree(workspace_path)
+    os.makedirs(workspace_path)
 
 
 @pytest.fixture(autouse=True)
 def resource_setup():
-    set_up()
-    if not os.path.exists(workspace_path):
-        os.makedirs(workspace_path)
-    # Yield to test execution
-    yield
+    try:
+        original_cwd = os.getcwd()
+    except FileNotFoundError:
+        print(
+            '[DEBUG] Original working directory does not exist. Using /tmp as fallback.'
+        )
+        original_cwd = '/tmp'
+        os.chdir('/tmp')
+
+    try:
+        set_up()
+        yield
+    finally:
+        try:
+            print(f'[DEBUG] Final working directory: {os.getcwd()}')
+        except FileNotFoundError:
+            print('[DEBUG] Final working directory does not exist')
+
+        if os.path.exists(workspace_path):
+            shutil.rmtree(workspace_path)
+        os.makedirs(workspace_path, exist_ok=True)
+
+        # Try to change back to the original directory
+        try:
+            os.chdir(original_cwd)
+            print(f'[DEBUG] Changed back to original directory: {original_cwd}')
+        except Exception:
+            os.chdir('/tmp')
diff --git a/tests/integration/regenerate.sh b/tests/integration/regenerate.sh
@@ -11,6 +11,17 @@ unset SANDBOX_ENV_OPENAI_API_KEY
 unset OPENAI_BASE_URL
 unset OPENAI_MODEL
 
+# Get the absolute path of the script directory
+get_script_dir() {
+    local source="${BASH_SOURCE[0]}"
+    while [ -h "$source" ]; do
+        local dir="$( cd -P "$( dirname "$source" )" && pwd )"
+        source="$(readlink "$source")"
+        [[ $source != /* ]] && source="$dir/$source"
+    done
+    echo "$( cd -P "$( dirname "$source" )" && pwd )"
+}
+
 TMP_FILE="${TMP_FILE:-tmp.log}"
 
 if [ -z $WORKSPACE_MOUNT_PATH ]; then
@@ -20,14 +31,23 @@ if [ -z $WORKSPACE_BASE ]; then
   WORKSPACE_BASE=$(pwd)
 fi
 
-WORKSPACE_MOUNT_PATH+="/_test_workspace"
-WORKSPACE_BASE+="/_test_workspace"
+export SCRIPT_DIR=$(get_script_dir)
+export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
+
+WORKSPACE_MOUNT_PATH=$(realpath "${WORKSPACE_MOUNT_PATH}/_test_workspace")
+WORKSPACE_BASE=$(realpath "${WORKSPACE_BASE}/_test_workspace")
 WORKSPACE_MOUNT_PATH_IN_SANDBOX="/workspace"
 
+echo "Current working directory: $(pwd)"
+echo "SCRIPT_DIR: $SCRIPT_DIR"
+echo "PROJECT_ROOT: $PROJECT_ROOT"
 echo "WORKSPACE_BASE: $WORKSPACE_BASE"
 echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH"
 echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"
 
+# Ensure we're in the correct directory
+cd "$PROJECT_ROOT" || exit 1
+
 mkdir -p $WORKSPACE_BASE
 
 # use environmental variable if exists, otherwise use "ssh"
@@ -71,14 +91,18 @@ num_of_agents=${#agents[@]}
 
 # run integration test against a specific agent & test
 run_test() {
-  local pytest_cmd="poetry run pytest -s ./tests/integration/test_agent.py::$test_name"
+  # Ensure we're in the correct directory
+  cd "$PROJECT_ROOT" || exit 1
 
+  local pytest_cmd="poetry run pytest --cache-clear -s $SCRIPT_DIR/test_agent.py::$test_name"
   # Check if TEST_IN_CI is defined
   if [ -n "$TEST_IN_CI" ]; then
     pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append"
   fi
 
-  SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
+  env SCRIPT_DIR="$SCRIPT_DIR" \
+    PROJECT_ROOT="$PROJECT_ROOT" \
+    SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
     PERSIST_SANDBOX=$PERSIST_SANDBOX \
     WORKSPACE_BASE=$WORKSPACE_BASE \
     WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
@@ -120,7 +144,7 @@ run_test() {
 
 # browsing capability needs a local http server
 launch_http_server() {
-  poetry run python tests/integration/start_http_server.py &
+  poetry run python $SCRIPT_DIR/start_http_server.py &
   HTTP_SERVER_PID=$!
   echo "Test http server launched, PID = $HTTP_SERVER_PID"
   sleep 10
@@ -147,15 +171,17 @@ trap cleanup EXIT
 regenerate_without_llm() {
   # set -x to print the command being executed
   set -x
-  SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
-    PERSIST_SANDBOX=$PERSIST_SANDBOX \
-    WORKSPACE_BASE=$WORKSPACE_BASE \
-    WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
-    WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
-    MAX_ITERATIONS=$MAX_ITERATIONS \
-    FORCE_APPLY_PROMPTS=true \
-    DEFAULT_AGENT=$agent \
-    poetry run pytest -s ./tests/integration/test_agent.py::$test_name
+  env SCRIPT_DIR="$SCRIPT_DIR" \
+      PROJECT_ROOT="$PROJECT_ROOT" \
+      SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
+      PERSIST_SANDBOX=$PERSIST_SANDBOX \
+      WORKSPACE_BASE=$WORKSPACE_BASE \
+      WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
+      WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
+      MAX_ITERATIONS=$MAX_ITERATIONS \
+      FORCE_APPLY_PROMPTS=true \
+      DEFAULT_AGENT=$agent \
+      poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name
   set +x
 }
 
@@ -165,37 +191,39 @@ regenerate_with_llm() {
   fi
 
   rm -rf $WORKSPACE_BASE/*
-  if [ -d "tests/integration/workspace/$test_name" ]; then
-    cp -r tests/integration/workspace/$test_name/* $WORKSPACE_BASE
+  if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
+    cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
   fi
 
   rm -rf logs
-  rm -rf tests/integration/mock/$agent/$test_name/*
+  rm -rf "$SCRIPT_DIR/mock/$agent/$test_name/*"
   # set -x to print the command being executed
   set -x
   echo -e "/exit\n" | \
-    DEBUG=true \
-    SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
-    PERSIST_SANDBOX=$PERSIST_SANDBOX \
-    WORKSPACE_BASE=$WORKSPACE_BASE \
-    WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
-    WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
-    poetry run python ./opendevin/core/main.py \
-    -i $MAX_ITERATIONS \
-    -t "$task Do not ask me for confirmation at any point." \
-    -c $agent
+    env SCRIPT_DIR="$SCRIPT_DIR" \
+      PROJECT_ROOT="$PROJECT_ROOT" \
+      DEBUG=true \
+      SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
+      PERSIST_SANDBOX=$PERSIST_SANDBOX \
+      WORKSPACE_BASE=$WORKSPACE_BASE \
+      WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
+      AGENT=$agent \
+      WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
+      poetry run python "$PROJECT_ROOT/opendevin/core/main.py" \
+      -i $MAX_ITERATIONS \
+      -t "$task Do not ask me for confirmation at any point." \
+      -c $agent
   set +x
 
-  mkdir -p tests/integration/mock/$agent/$test_name/
-  mv logs/llm/**/* tests/integration/mock/$agent/$test_name/
+  mkdir -p "$SCRIPT_DIR/mock/$agent/$test_name/"
+  mv logs/llm/**/* "$SCRIPT_DIR/mock/$agent/$test_name/"
 
 }
 
 ##############################################################
 ##                      MAIN PROGRAM                        ##
 ##############################################################
 
-
 if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then
   echo "Every task must correspond to one test case"
   exit 1
@@ -222,8 +250,8 @@ for ((i = 0; i < num_of_tests; i++)); do
 
     echo -e "\n\n\n\n========STEP 1: Running $test_name for $agent========\n\n\n\n"
     rm -rf $WORKSPACE_BASE/*
-    if [ -d "tests/integration/workspace/$test_name" ]; then
-      cp -r "tests/integration/workspace/$test_name"/* $WORKSPACE_BASE
+    if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
+      cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
     fi
 
     if [ "$TEST_ONLY" = true ]; then
@@ -245,7 +273,7 @@ for ((i = 0; i < num_of_tests; i++)); do
 
       if [ "$FORCE_USE_LLM" = true ]; then
         echo -e "\n\n\n\n========FORCE_USE_LLM, skipping step 2 & 3========\n\n\n\n"
-      elif [ ! -d "tests/integration/mock/$agent/$test_name" ]; then
+      elif [ ! -d "$SCRIPT_DIR/mock/$agent/$test_name" ]; then
         echo -e "\n\n\n\n========No existing mock responses for $agent/$test_name, skipping step 2 & 3========\n\n\n\n"
       else
         echo -e "\n\n\n\n========STEP 2: $test_name failed, regenerating prompts for $agent WITHOUT money cost========\n\n\n\n"