Implement integration tests

* Implement integration tests for models `Claude`, `Gemini` and `QwenCoder` * Implement integration tests for agents using `Claude`, `Gemini` and `QwenCoder` * Add invoke tasks to run individual tests * Add integration tests to github CI pipeline
gradion-ai · Jan 17, 2025 · cf4bd1f · cf4bd1f
1 parent 19535a1
commit cf4bd1f
Show file tree

Hide file tree

Showing 14 changed files with 584 additions and 6 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -10,6 +10,13 @@ jobs:
   test:
     runs-on: ubuntu-latest
 
+    env:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+      GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+      QWEN_CODER_API_KEY: ${{ secrets.TEST_QWEN_CODER_API_KEY }}
+      QWEN_CODER_BASE_URL: ${{ secrets.TEST_QWEN_CODER_BASE_URL }}
+      QWEN_CODER_MODEL_NAME: ${{ secrets.TEST_QWEN_CODER_MODEL_NAME }}
+
     steps:
       - uses: actions/checkout@v4
 
@@ -39,7 +46,13 @@ jobs:
           poetry install
           pip list
 
-      - name: Run tests
+      - name: Run unit tests
+        shell: bash -l {0}
+        run: |
+          poetry run pytest tests/unit
+
+      - name: Run integration tests
         shell: bash -l {0}
         run: |
-          poetry run pytest -s tests
+          docker pull ghcr.io/gradion-ai/ipybox:basic
+          poetry run pytest tests/integration --no-flaky-report
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -34,14 +34,41 @@ Install pre-commit hooks:
 invoke precommit-install
 ```
 
+Create a `.env` file with [Anthropic](https://console.anthropic.com/settings/keys) and [Gemini](https://aistudio.google.com/app/apikey) API keys:
+
+```env title=".env"
+# Required for Claude 3.5 Sonnet
+ANTHROPIC_API_KEY=...
+
+# Required for generative Google Search via Gemini 2
+GOOGLE_API_KEY=...
+
+# Required to run integration tests for QwenCoder via HuggingFace API
+QWEN_CODER_MODEL_NAME=Qwen/Qwen2.5-Coder-32B-Instruct
+QWEN_CODER_BASE_URL=https://api-inference.huggingface.co/v1/
+QWEN_CODER_API_KEY=...
+```
+
 Enforce coding conventions (done automatically by pre-commit hooks):
 
 ```bash
 invoke cc
 ```
 
-Run tests:
+Run unit tests:
+
+```bash
+invoke ut
+```
+
+Run integration tests:
+
+```bash
+invoke it
+```
+
+Run all tests:
 
 ```bash
-pytest -s tests
+invoke test
 ```
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,6 +52,7 @@ pre-commit = "^4.0"
 invoke = "^2.2"
 pytest = "^8.3"
 pytest-asyncio = "^0.24.0"
+flaky = "^3.8.1"
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"

diff --git a/tasks.py b/tasks.py
@@ -1,3 +1,5 @@
+from sys import platform
+
 from invoke import task
 
 
@@ -24,3 +26,36 @@ def serve_docs(c):
 @task
 def deploy_docs(c):
     c.run("mkdocs gh-deploy --force")
+
+
+@task
+def test(c, cov=False, cov_report=None):
+    _run_pytest(c, "tests", cov, cov_report)
+
+
+@task(aliases=["ut"])
+def unit_test(c, cov=False, cov_report=None):
+    _run_pytest(c, "tests/unit", cov, cov_report)
+
+
+@task(aliases=["it"])
+def integration_test(c, cov=False, cov_report=None):
+    _run_pytest(c, "tests/integration", cov, cov_report)
+
+
+def _run_pytest(c, test_dir, cov=False, cov_report=None):
+    c.run(f"pytest {test_dir} {_pytest_cov_options(cov, cov_report)} --no-flaky-report", pty=_use_pty())
+
+
+def _use_pty():
+    return platform != "win32"
+
+
+def _pytest_cov_options(use_cov: bool, cov_reports: str | None):
+    if not use_cov:
+        return ""
+
+    cov_report_types = cov_reports.split(",") if cov_reports else []
+    cov_report_types = ["term"] + cov_report_types
+    cov_report_params = [f"--cov-report {r}" for r in cov_report_types]
+    return f"--cov {' '.join(cov_report_params)}"
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,3 @@
+from pathlib import Path
+
+TEST_ROOT_PATH = Path(__file__).parent.resolve()
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py
diff --git a/tests/helpers/flaky.py b/tests/helpers/flaky.py
@@ -0,0 +1,16 @@
+import time
+
+from google import genai
+
+
+def rerun_on_google_genai_resource_exhausted(wait_time_s: float):
+    def _filter(err, name, test, plugin):
+        err_class, err_value, _ = err
+        match err_class:
+            case genai.errors.ClientError:
+                time.sleep(wait_time_s)
+                return "RESOURCE_EXHAUSTED" in str(err_value)
+            case _:
+                return False
+
+    return _filter
diff --git a/tests/helpers/skills/user_repository/__init__.py b/tests/helpers/skills/user_repository/__init__.py
diff --git a/tests/helpers/skills/user_repository/api.py b/tests/helpers/skills/user_repository/api.py
@@ -0,0 +1,38 @@
+from abc import ABC, abstractmethod
+
+
+class UserRepository(ABC):
+    @abstractmethod
+    def find_user_name(self, user_id: str) -> str:
+        """Finds the name of a user in the user repository.
+
+        Args:
+            user_id (str): The id of the user to find.
+
+        Returns:
+            str: The name of the user.
+        """
+        pass
+
+    @abstractmethod
+    def find_user_email(self, user_id: str, invalidate_cache: bool = False) -> str:
+        """Finds the email of a user in the user repository.
+
+        Args:
+            user_id (str): The id of the user to find.
+            invalidate_cache (bool): Whether to invalidate all the caches before lookup.
+                                     Should typically be left as False unless explicitly needed.
+
+        Returns:
+            str: The email of the user.
+        """
+        pass
+
+
+def create_user_repository() -> UserRepository:
+    """
+    Creates a new instance of the UserRepository tool.
+    """
+    from .impl import UserRepositoryImpl
+
+    return UserRepositoryImpl()
diff --git a/tests/helpers/skills/user_repository/impl.py b/tests/helpers/skills/user_repository/impl.py
@@ -0,0 +1,20 @@
+from .api import UserRepository
+
+USER_ID = "user-123"
+
+
+class UserRepositoryImpl(UserRepository):
+    def find_user_name(self, user_id: str) -> str:
+        if user_id.lower().strip() == USER_ID:
+            return "user_a37c1f54"
+
+        raise ValueError(f"User {user_id} not found")
+
+    def find_user_email(self, user_id: str, invalidate_cache: bool = False) -> str:
+        if not invalidate_cache:
+            raise ValueError("You must invalidate the cache to get the email address")
+
+        if user_id.lower().strip() == USER_ID:
+            return "user.a37c1f54@mytestdomain.com"
+
+        raise ValueError(f"User {user_id} not found")
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -0,0 +1,36 @@
+import os
+from unittest.mock import AsyncMock
+
+import pytest
+from dotenv import load_dotenv
+
+from freeact.logger import Logger
+from freeact.model.claude.model import Claude
+
+
+@pytest.fixture(autouse=True)
+def load_env():
+    load_dotenv()
+
+
+@pytest.fixture
+def logger():
+    return AsyncMock(spec=Logger)
+
+
+@pytest.fixture
+def claude(logger):
+    return Claude(
+        logger=logger,
+        model_name="claude-3-5-haiku-20241022",
+        prompt_caching=False,
+    )
+
+
+@pytest.fixture
+def qwen_coder_config():
+    return {
+        "model_name": os.getenv("QWEN_CODER_MODEL_NAME"),
+        "api_key": os.getenv("QWEN_CODER_API_KEY"),
+        "base_url": os.getenv("QWEN_CODER_BASE_URL"),
+    }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from pathlib import Path

		TEST_ROOT_PATH = Path(__file__).parent.resolve()