Skip to content

Commit

Permalink
INT: prevent error on repeat integration tests after failed test(s) (A…
Browse files Browse the repository at this point in the history
…ll-Hands-AI#2935)

* Integration tests: prevent File not found error

* forgot to remove debug calls in regenerate.sh
  • Loading branch information
tobitege authored Jul 18, 2024
1 parent 728131f commit 5a57130
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 48 deletions.
28 changes: 27 additions & 1 deletion opendevin/runtime/browser/browser_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ def __init__(
self.process = multiprocessing.Process(
target=self.browser_process,
)

try:
self.original_cwd = os.getcwd()
except FileNotFoundError:
logger.warning(
'Current working directory does not exist. Using /tmp as fallback.'
)
self.original_cwd = '/tmp'
os.chdir('/tmp')

if is_async:
threading.Thread(target=self.init_browser).start()
else:
Expand All @@ -66,7 +76,23 @@ def get_html_text_converter(self):

def init_browser(self):
logger.info('Starting browser env...')
self.process.start()

# Ensure we're in a valid directory before starting the process
try:
os.chdir(self.original_cwd)
logger.debug(f'Changed back to original directory: {self.original_cwd}')
except Exception as e:
logger.error(f'Failed to change to original directory: {e}')
# If we can't change to the original directory, try to use a known valid directory
os.chdir('/tmp')
logger.debug('Changed to /tmp directory as fallback')

try:
self.process.start()
except Exception as e:
logger.error(f'Failed to start browser process: {e}')
raise

if not self.check_alive():
self.close()
raise BrowserInitException('Failed to start browser environment.')
Expand Down
8 changes: 4 additions & 4 deletions opendevin/runtime/server/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def __init__(
self.browser: BrowserEnv | None = None

async def close(self):
if not self._is_external_sandbox:
if hasattr(self, '_is_external_sandbox') and not self._is_external_sandbox:
self.sandbox.close()
if self.browser is not None:
if hasattr(self, 'browser') and self.browser is not None:
self.browser.close()

def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None:
Expand Down Expand Up @@ -109,7 +109,7 @@ async def run(self, action: CmdRunAction) -> Observation:
return self._run_command(action.command)

async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
obs = self._run_command(
self._run_command(
("cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{action.code}\n' 'EOL'),
)

Expand Down Expand Up @@ -150,7 +150,7 @@ async def run_ipython(self, action: IPythonRunCellAction) -> Observation:

# re-init the kernel after restart
if action.kernel_init_code:
obs = self._run_command(
self._run_command(
(
f"cat > /tmp/opendevin_jupyter_init.py <<'EOL'\n"
f'{action.kernel_init_code}\n'
Expand Down
51 changes: 41 additions & 10 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import io
import os
import re
import shutil
import subprocess
import tempfile
from functools import partial
Expand All @@ -12,8 +13,13 @@

from opendevin.llm.llm import message_separator

script_dir = os.path.dirname(os.path.realpath(__file__))
workspace_path = os.getenv('WORKSPACE_BASE')
script_dir = os.environ.get('SCRIPT_DIR')
project_root = os.environ.get('PROJECT_ROOT')
workspace_path = os.environ.get('WORKSPACE_BASE')

assert script_dir is not None, 'SCRIPT_DIR environment variable is not set'
assert project_root is not None, 'PROJECT_ROOT environment variable is not set'
assert workspace_path is not None, 'WORKSPACE_BASE environment variable is not set'


class SecretExit(Exception):
Expand Down Expand Up @@ -204,16 +210,41 @@ def http_server():
def set_up():
global cur_id
cur_id = 0
assert workspace_path is not None
assert workspace_path is not None, 'workspace_path is not set'

# Remove and recreate the workspace_path
if os.path.exists(workspace_path):
for file in os.listdir(workspace_path):
os.remove(os.path.join(workspace_path, file))
shutil.rmtree(workspace_path)
os.makedirs(workspace_path)


@pytest.fixture(autouse=True)
def resource_setup():
set_up()
if not os.path.exists(workspace_path):
os.makedirs(workspace_path)
# Yield to test execution
yield
try:
original_cwd = os.getcwd()
except FileNotFoundError:
print(
'[DEBUG] Original working directory does not exist. Using /tmp as fallback.'
)
original_cwd = '/tmp'
os.chdir('/tmp')

try:
set_up()
yield
finally:
try:
print(f'[DEBUG] Final working directory: {os.getcwd()}')
except FileNotFoundError:
print('[DEBUG] Final working directory does not exist')

if os.path.exists(workspace_path):
shutil.rmtree(workspace_path)
os.makedirs(workspace_path, exist_ok=True)

# Try to change back to the original directory
try:
os.chdir(original_cwd)
print(f'[DEBUG] Changed back to original directory: {original_cwd}')
except Exception:
os.chdir('/tmp')
94 changes: 61 additions & 33 deletions tests/integration/regenerate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@ unset SANDBOX_ENV_OPENAI_API_KEY
unset OPENAI_BASE_URL
unset OPENAI_MODEL

# Get the absolute path of the script directory
get_script_dir() {
local source="${BASH_SOURCE[0]}"
while [ -h "$source" ]; do
local dir="$( cd -P "$( dirname "$source" )" && pwd )"
source="$(readlink "$source")"
[[ $source != /* ]] && source="$dir/$source"
done
echo "$( cd -P "$( dirname "$source" )" && pwd )"
}

TMP_FILE="${TMP_FILE:-tmp.log}"

if [ -z $WORKSPACE_MOUNT_PATH ]; then
Expand All @@ -20,14 +31,23 @@ if [ -z $WORKSPACE_BASE ]; then
WORKSPACE_BASE=$(pwd)
fi

WORKSPACE_MOUNT_PATH+="/_test_workspace"
WORKSPACE_BASE+="/_test_workspace"
export SCRIPT_DIR=$(get_script_dir)
export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")

WORKSPACE_MOUNT_PATH=$(realpath "${WORKSPACE_MOUNT_PATH}/_test_workspace")
WORKSPACE_BASE=$(realpath "${WORKSPACE_BASE}/_test_workspace")
WORKSPACE_MOUNT_PATH_IN_SANDBOX="/workspace"

echo "Current working directory: $(pwd)"
echo "SCRIPT_DIR: $SCRIPT_DIR"
echo "PROJECT_ROOT: $PROJECT_ROOT"
echo "WORKSPACE_BASE: $WORKSPACE_BASE"
echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH"
echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"

# Ensure we're in the correct directory
cd "$PROJECT_ROOT" || exit 1

mkdir -p $WORKSPACE_BASE

# use environmental variable if exists, otherwise use "ssh"
Expand Down Expand Up @@ -71,14 +91,18 @@ num_of_agents=${#agents[@]}

# run integration test against a specific agent & test
run_test() {
local pytest_cmd="poetry run pytest -s ./tests/integration/test_agent.py::$test_name"
# Ensure we're in the correct directory
cd "$PROJECT_ROOT" || exit 1

local pytest_cmd="poetry run pytest --cache-clear -s $SCRIPT_DIR/test_agent.py::$test_name"
# Check if TEST_IN_CI is defined
if [ -n "$TEST_IN_CI" ]; then
pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append"
fi

SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
env SCRIPT_DIR="$SCRIPT_DIR" \
PROJECT_ROOT="$PROJECT_ROOT" \
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
Expand Down Expand Up @@ -120,7 +144,7 @@ run_test() {

# browsing capability needs a local http server
launch_http_server() {
poetry run python tests/integration/start_http_server.py &
poetry run python $SCRIPT_DIR/start_http_server.py &
HTTP_SERVER_PID=$!
echo "Test http server launched, PID = $HTTP_SERVER_PID"
sleep 10
Expand All @@ -147,15 +171,17 @@ trap cleanup EXIT
regenerate_without_llm() {
# set -x to print the command being executed
set -x
SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
MAX_ITERATIONS=$MAX_ITERATIONS \
FORCE_APPLY_PROMPTS=true \
DEFAULT_AGENT=$agent \
poetry run pytest -s ./tests/integration/test_agent.py::$test_name
env SCRIPT_DIR="$SCRIPT_DIR" \
PROJECT_ROOT="$PROJECT_ROOT" \
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
MAX_ITERATIONS=$MAX_ITERATIONS \
FORCE_APPLY_PROMPTS=true \
DEFAULT_AGENT=$agent \
poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name
set +x
}

Expand All @@ -165,37 +191,39 @@ regenerate_with_llm() {
fi

rm -rf $WORKSPACE_BASE/*
if [ -d "tests/integration/workspace/$test_name" ]; then
cp -r tests/integration/workspace/$test_name/* $WORKSPACE_BASE
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
fi

rm -rf logs
rm -rf tests/integration/mock/$agent/$test_name/*
rm -rf "$SCRIPT_DIR/mock/$agent/$test_name/*"
# set -x to print the command being executed
set -x
echo -e "/exit\n" | \
DEBUG=true \
SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
poetry run python ./opendevin/core/main.py \
-i $MAX_ITERATIONS \
-t "$task Do not ask me for confirmation at any point." \
-c $agent
env SCRIPT_DIR="$SCRIPT_DIR" \
PROJECT_ROOT="$PROJECT_ROOT" \
DEBUG=true \
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
AGENT=$agent \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
poetry run python "$PROJECT_ROOT/opendevin/core/main.py" \
-i $MAX_ITERATIONS \
-t "$task Do not ask me for confirmation at any point." \
-c $agent
set +x

mkdir -p tests/integration/mock/$agent/$test_name/
mv logs/llm/**/* tests/integration/mock/$agent/$test_name/
mkdir -p "$SCRIPT_DIR/mock/$agent/$test_name/"
mv logs/llm/**/* "$SCRIPT_DIR/mock/$agent/$test_name/"

}

##############################################################
## MAIN PROGRAM ##
##############################################################


if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then
echo "Every task must correspond to one test case"
exit 1
Expand All @@ -222,8 +250,8 @@ for ((i = 0; i < num_of_tests; i++)); do

echo -e "\n\n\n\n========STEP 1: Running $test_name for $agent========\n\n\n\n"
rm -rf $WORKSPACE_BASE/*
if [ -d "tests/integration/workspace/$test_name" ]; then
cp -r "tests/integration/workspace/$test_name"/* $WORKSPACE_BASE
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
fi

if [ "$TEST_ONLY" = true ]; then
Expand All @@ -245,7 +273,7 @@ for ((i = 0; i < num_of_tests; i++)); do

if [ "$FORCE_USE_LLM" = true ]; then
echo -e "\n\n\n\n========FORCE_USE_LLM, skipping step 2 & 3========\n\n\n\n"
elif [ ! -d "tests/integration/mock/$agent/$test_name" ]; then
elif [ ! -d "$SCRIPT_DIR/mock/$agent/$test_name" ]; then
echo -e "\n\n\n\n========No existing mock responses for $agent/$test_name, skipping step 2 & 3========\n\n\n\n"
else
echo -e "\n\n\n\n========STEP 2: $test_name failed, regenerating prompts for $agent WITHOUT money cost========\n\n\n\n"
Expand Down

0 comments on commit 5a57130

Please sign in to comment.