diff --git a/scripts/run_functional_tests.py b/scripts/run_functional_tests.py
new file mode 100644
index 0000000..7d9e909
--- /dev/null
+++ b/scripts/run_functional_tests.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+"""Run functional tests against a remote API endpoint.
+
+This script bypasses the tests/conftest.py which overrides API_KEY.
+
+Usage:
+    python scripts/run_functional_tests.py \
+        --api-base "https://example.com" \
+        --api-key "your-api-key"
+"""
+
+import argparse
+import asyncio
+import sys
+import time
+import uuid
+from dataclasses import dataclass
+from typing import Dict, List, Tuple
+
+import httpx
+
+# Language snippets: (code, expected_substring_in_stdout)
+LANGUAGE_SNIPPETS: Dict[str, Tuple[str, str]] = {
+    "py": ("print('py: sum(1..10)=', sum(range(1,11)))", "55"),
+    "js": ("console.log('js: sum(1..10)=' + (1+2+3+4+5+6+7+8+9+10));", "55"),
+    "ts": ("console.log('ts: sum(1..10)=' + (1+2+3+4+5+6+7+8+9+10));", "55"),
+    "go": (
+        'package main\n\nimport (\n\t"fmt"\n)\n\nfunc main() {\n\ts := 0\n\t'
+        'for i := 1; i <= 10; i++ {\n\t\ts += i\n\t}\n\t'
+        'fmt.Printf("go: sum(1..10)=%d\\n", s)\n}',
+        "55",
+    ),
+    "java": (
+        "public class Code { public static void main(String[] args){ "
+        'int s=0; for(int i=1;i<=10;i++) s+=i; System.out.println("java: sum(1..10)="+s); } }',
+        "55",
+    ),
+    "c": (
+        '#include <stdio.h>\nint main(){int s=0; for(int i=1;i<=10;i++) s+=i; '
+        'printf("c: sum(1..10)=%d\\n", s); return 0;}',
+        "55",
+    ),
+    "cpp": (
+        "#include <iostream>\nint main(){int s=0; for(int i=1;i<=10;i++) s+=i; "
+        'std::cout << "cpp: sum(1..10)=" << s << std::endl; return 0;}',
+        "55",
+    ),
+    "php": (
+        '<?php $s=0; for($i=1;$i<=10;$i++){ $s+=$i; } echo "php: sum(1..10)=$s\\n";',
+        "55",
+    ),
+    "rs": (
+        "fn main(){ let mut s = 0; for i in 1..=10 { s += i; } "
+        'println!("rs: sum(1..10)={}", s); }',
+        "55",
+    ),
+    "r": ("cat('r: sum(1..10)=', sum(1:10), '\\n')", "55"),
+    "f90": (
+        "program sum\n  integer :: s, i\n  s = 0\n  do i = 1, 10\n     s = s + i\n  end do\n"
+        '  print *, "f90: sum(1..10)=", s\nend program sum\n',
+        "55",
+    ),
+    "d": (
+        'import std.stdio;\nvoid main(){ int s=0; foreach(i; 1..11) s+=i; writeln("d: sum(1..10)=", s); }',
+        "55",
+    ),
+}
+
+
+@dataclass
+class TestResult:
+    name: str
+    passed: bool
+    message: str
+    duration_ms: float
+
+
+class FunctionalTester:
+    def __init__(self, api_base: str, api_key: str, timeout: int = 60):
+        self.api_base = api_base.rstrip("/")
+        self.api_key = api_key
+        self.timeout = timeout
+        self.results: List[TestResult] = []
+
+    def headers(self) -> Dict[str, str]:
+        return {"x-api-key": self.api_key, "Content-Type": "application/json"}
+
+    async def run_all(self):
+        async with httpx.AsyncClient(
+            base_url=self.api_base, timeout=self.timeout, verify=False
+        ) as client:
+            # Health tests
+            await self.test_health(client)
+            await self.test_health_detailed(client)
+
+            # Language execution tests
+            for lang, (code, expected) in LANGUAGE_SNIPPETS.items():
+                await self.test_language_execution(client, lang, code, expected)
+
+            # State persistence tests
+            await self.test_state_persistence(client)
+
+            # File tests
+            await self.test_file_upload_download(client)
+
+        self.print_summary()
+
+    async def test_health(self, client: httpx.AsyncClient):
+        start = time.perf_counter()
+        try:
+            r = await client.get("/health")
+            passed = r.status_code == 200 and "status" in r.json()
+            msg = f"Status: {r.status_code}" if passed else f"Failed: {r.text[:100]}"
+        except Exception as e:
+            passed = False
+            msg = str(e)
+        self.results.append(TestResult(
+            "health_check", passed, msg, (time.perf_counter() - start) * 1000
+        ))
+
+    async def test_health_detailed(self, client: httpx.AsyncClient):
+        start = time.perf_counter()
+        try:
+            r = await client.get("/health/detailed", headers=self.headers())
+            passed = r.status_code in [200, 503]
+            msg = f"Status: {r.status_code}" if passed else f"Failed: {r.text[:100]}"
+        except Exception as e:
+            passed = False
+            msg = str(e)
+        self.results.append(TestResult(
+            "health_detailed", passed, msg, (time.perf_counter() - start) * 1000
+        ))
+
+    async def test_language_execution(
+        self, client: httpx.AsyncClient, lang: str, code: str, expected: str
+    ):
+        start = time.perf_counter()
+        try:
+            entity_id = f"test-{uuid.uuid4().hex[:8]}"
+            r = await client.post(
+                "/exec",
+                headers=self.headers(),
+                json={"code": code, "lang": lang, "entity_id": entity_id},
+            )
+            if r.status_code == 200:
+                data = r.json()
+                stdout = data.get("stdout", "")
+                if expected in stdout:
+                    passed = True
+                    msg = f"OK - output contains '{expected}'"
+                else:
+                    passed = False
+                    msg = f"Expected '{expected}' in stdout, got: {stdout[:100]}"
+            else:
+                passed = False
+                msg = f"Status {r.status_code}: {r.text[:100]}"
+        except Exception as e:
+            passed = False
+            msg = str(e)
+        self.results.append(TestResult(
+            f"exec_{lang}", passed, msg, (time.perf_counter() - start) * 1000
+        ))
+
+    async def test_state_persistence(self, client: httpx.AsyncClient):
+        start = time.perf_counter()
+        entity_id = f"state-test-{uuid.uuid4().hex[:8]}"
+        try:
+            # Step 1: Create variable
+            r1 = await client.post(
+                "/exec",
+                headers=self.headers(),
+                json={"code": "test_var = 42", "lang": "py", "entity_id": entity_id},
+            )
+            if r1.status_code != 200:
+                self.results.append(TestResult(
+                    "state_persistence", False, f"Step 1 failed: {r1.text[:100]}",
+                    (time.perf_counter() - start) * 1000
+                ))
+                return
+
+            has_state = r1.json().get("has_state", False)
+
+            # Step 2: Use variable
+            r2 = await client.post(
+                "/exec",
+                headers=self.headers(),
+                json={"code": "print(test_var + 1)", "lang": "py", "entity_id": entity_id},
+            )
+            if r2.status_code != 200:
+                self.results.append(TestResult(
+                    "state_persistence", False, f"Step 2 failed: {r2.text[:100]}",
+                    (time.perf_counter() - start) * 1000
+                ))
+                return
+
+            stdout = r2.json().get("stdout", "")
+            if "43" in stdout:
+                passed = True
+                msg = f"OK - state persisted (has_state={has_state})"
+            else:
+                passed = False
+                msg = f"Expected '43' in stdout, got: {stdout[:100]}, stderr: {r2.json().get('stderr', '')[:100]}"
+
+        except Exception as e:
+            passed = False
+            msg = str(e)
+        self.results.append(TestResult(
+            "state_persistence", passed, msg, (time.perf_counter() - start) * 1000
+        ))
+
+    async def test_file_upload_download(self, client: httpx.AsyncClient):
+        start = time.perf_counter()
+        entity_id = f"file-test-{uuid.uuid4().hex[:8]}"
+        try:
+            # Upload
+            files = {"files": ("test.txt", b"hello world", "text/plain")}
+            r = await client.post(
+                "/upload",
+                headers={"x-api-key": self.api_key},
+                files=files,
+                data={"entity_id": entity_id},
+            )
+            if r.status_code != 200:
+                self.results.append(TestResult(
+                    "file_upload", False, f"Upload failed: {r.text[:100]}",
+                    (time.perf_counter() - start) * 1000
+                ))
+                return
+
+            data = r.json()
+            session_id = data.get("session_id")
+            file_list = data.get("files", [])
+            if not file_list:
+                self.results.append(TestResult(
+                    "file_upload", False, "No files in response",
+                    (time.perf_counter() - start) * 1000
+                ))
+                return
+
+            file_id = file_list[0].get("fileId")
+
+            # Download
+            r2 = await client.get(
+                f"/download/{session_id}/{file_id}",
+                headers=self.headers(),
+            )
+            if r2.status_code == 200 and r2.content == b"hello world":
+                passed = True
+                msg = "OK - upload and download successful"
+            else:
+                passed = False
+                msg = f"Download failed: status={r2.status_code}"
+
+        except Exception as e:
+            passed = False
+            msg = str(e)
+        self.results.append(TestResult(
+            "file_upload_download", passed, msg, (time.perf_counter() - start) * 1000
+        ))
+
+    def print_summary(self):
+        passed = sum(1 for r in self.results if r.passed)
+        failed = len(self.results) - passed
+
+        print("\n" + "=" * 70)
+        print("FUNCTIONAL TEST RESULTS")
+        print("=" * 70)
+        print(f"Endpoint: {self.api_base}")
+        print("=" * 70)
+
+        for r in self.results:
+            status = "PASS" if r.passed else "FAIL"
+            print(f"[{status}] {r.name:30} ({r.duration_ms:7.1f}ms) - {r.message[:50]}")
+
+        print("=" * 70)
+        print(f"TOTAL: {passed}/{len(self.results)} passed, {failed} failed")
+        print(f"Success rate: {passed/len(self.results)*100:.1f}%")
+        print("=" * 70)
+
+        return failed == 0
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run functional tests")
+    parser.add_argument("--api-base", required=True, help="API base URL")
+    parser.add_argument("--api-key", required=True, help="API key")
+    parser.add_argument("--timeout", type=int, default=60, help="Request timeout")
+    args = parser.parse_args()
+
+    tester = FunctionalTester(args.api_base, args.api_key, args.timeout)
+    asyncio.run(tester.run_all())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/services/container/pool.py b/src/services/container/pool.py
index d51d7ba..79d4bfd 100644
--- a/src/services/container/pool.py
+++ b/src/services/container/pool.py
@@ -251,15 +251,21 @@ def get_stats(self, language: str = None) -> Dict[str, PoolStats]:
     async def _create_fresh_container(
         self, session_id: str, language: str
     ) -> Container:
-        """Create a new container."""
+        """Create a new container when pool is exhausted."""
         image = self._container_manager.get_image_for_language(language)
 
         # Ensure image is available
         await self._container_manager.pull_image_if_needed(image)
 
+        # Enable REPL mode for Python if configured (same as pooled containers)
+        use_repl_mode = language == "py" and settings.repl_enabled
+
         # Create and start container
         container = self._container_manager.create_container(
-            image=image, session_id=session_id, language=language
+            image=image,
+            session_id=session_id,
+            language=language,
+            repl_mode=use_repl_mode,
         )
 
         started = await self._container_manager.start_container(container)
@@ -270,11 +276,22 @@ async def _create_fresh_container(
                 pass
             raise RuntimeError(f"Failed to start container for {language}")
 
+        # For REPL containers, wait for REPL to be ready before returning
+        if use_repl_mode:
+            repl_ready = await self._wait_for_repl_ready(container)
+            if not repl_ready:
+                logger.warning(
+                    "REPL not ready in fresh container",
+                    container_id=container.id[:12],
+                    language=language,
+                )
+
         logger.info(
             "Created fresh container",
             session_id=session_id[:12] if session_id else "none",
             container_id=container.id[:12],
             language=language,
+            repl_mode=use_repl_mode,
         )
 
         return container
diff --git a/src/services/execution/runner.py b/src/services/execution/runner.py
index e69b519..9170b47 100644
--- a/src/services/execution/runner.py
+++ b/src/services/execution/runner.py
@@ -398,14 +398,29 @@ async def _create_fresh_container(
         image = self.container_manager.get_image_for_language(language)
         await self.container_manager.pull_image_if_needed(image)
 
+        # Enable REPL mode for Python if configured (matches pool behavior)
+        use_repl_mode = language == "py" and settings.repl_enabled
+
         container = self.container_manager.create_container(
             image=image,
             session_id=session_id,
             working_dir="/mnt/data",
             language=language,
+            repl_mode=use_repl_mode,
         )
         await self.container_manager.start_container(container)
 
+        # For REPL containers, wait for REPL to be ready before returning
+        if use_repl_mode:
+            repl_executor = REPLExecutor(self.container_manager.client)
+            ready = await repl_executor.wait_for_ready(container, timeout=10.0)
+            if not ready:
+                logger.warning(
+                    "REPL not ready in fresh container, may affect performance",
+                    session_id=session_id[:12],
+                    container_id=container.id[:12],
+                )
+
         self.session_containers[session_id] = container
         logger.info(
             "Fresh container created",
diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py
new file mode 100644
index 0000000..a20c405
--- /dev/null
+++ b/tests/functional/__init__.py
@@ -0,0 +1 @@
+"""Functional tests for live API endpoint testing."""
diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
new file mode 100644
index 0000000..ee6a033
--- /dev/null
+++ b/tests/functional/conftest.py
@@ -0,0 +1,134 @@
+"""Functional test fixtures for live API testing.
+
+These tests run against a real API endpoint (local or remote).
+Configure via environment variables:
+    API_BASE: Base URL (default: http://localhost:8000)
+    API_KEY: API key for authentication
+    API_TIMEOUT: Request timeout in seconds (default: 60)
+
+Example:
+    API_BASE="https://code-exec.eastus.cloudapp.azure.com" \
+    API_KEY="sk-your-api-key" \
+    pytest tests/functional/ -v
+"""
+
+import os
+import uuid
+from typing import AsyncGenerator, Dict, Tuple
+
+import httpx
+import pytest
+import pytest_asyncio
+
+# Configuration from environment
+API_BASE = os.environ.get("API_BASE", "http://localhost:8000")
+API_KEY = os.environ.get("API_KEY", "test-api-key-for-development-only")
+API_TIMEOUT = int(os.environ.get("API_TIMEOUT", "60"))
+
+
+# Language snippets: (code, expected_substring_in_stdout)
+# All compute sum(1..10) = 55 for consistency
+LANGUAGE_SNIPPETS: Dict[str, Tuple[str, str]] = {
+    "py": ("print('py: sum(1..10)=', sum(range(1,11)))", "55"),
+    "js": ("console.log('js: sum(1..10)=' + (1+2+3+4+5+6+7+8+9+10));", "55"),
+    "ts": ("console.log('ts: sum(1..10)=' + (1+2+3+4+5+6+7+8+9+10));", "55"),
+    "go": (
+        'package main\n\nimport (\n\t"fmt"\n)\n\nfunc main() {\n\ts := 0\n\t'
+        'for i := 1; i <= 10; i++ {\n\t\ts += i\n\t}\n\t'
+        'fmt.Printf("go: sum(1..10)=%d\\n", s)\n}',
+        "55",
+    ),
+    "java": (
+        "public class Code { public static void main(String[] args){ "
+        'int s=0; for(int i=1;i<=10;i++) s+=i; System.out.println("java: sum(1..10)="+s); } }',
+        "55",
+    ),
+    "c": (
+        '#include <stdio.h>\nint main(){int s=0; for(int i=1;i<=10;i++) s+=i; '
+        'printf("c: sum(1..10)=%d\\n", s); return 0;}',
+        "55",
+    ),
+    "cpp": (
+        "#include <iostream>\nint main(){int s=0; for(int i=1;i<=10;i++) s+=i; "
+        'std::cout << "cpp: sum(1..10)=" << s << std::endl; return 0;}',
+        "55",
+    ),
+    "php": (
+        '<?php $s=0; for($i=1;$i<=10;$i++){ $s+=$i; } echo "php: sum(1..10)=$s\\n";',
+        "55",
+    ),
+    "rs": (
+        "fn main(){ let mut s = 0; for i in 1..=10 { s += i; } "
+        'println!("rs: sum(1..10)={}", s); }',
+        "55",
+    ),
+    "r": ("cat('r: sum(1..10)=', sum(1:10), '\\n')", "55"),
+    "f90": (
+        "program sum\n  integer :: s, i\n  s = 0\n  do i = 1, 10\n     s = s + i\n  end do\n"
+        '  print *, "f90: sum(1..10)=", s\nend program sum\n',
+        "55",
+    ),
+    "d": (
+        'import std.stdio;\nvoid main(){ int s=0; foreach(i; 1..11) s+=i; writeln("d: sum(1..10)=", s); }',
+        "55",
+    ),
+}
+
+
+@pytest.fixture(scope="session")
+def api_base() -> str:
+    """API base URL."""
+    return API_BASE.rstrip("/")
+
+
+@pytest.fixture(scope="session")
+def api_key() -> str:
+    """API key for authentication."""
+    return API_KEY
+
+
+@pytest.fixture(scope="session")
+def auth_headers(api_key: str) -> Dict[str, str]:
+    """Standard authentication headers."""
+    return {
+        "x-api-key": api_key,
+        "Content-Type": "application/json",
+    }
+
+
+@pytest_asyncio.fixture
+async def async_client(api_base: str) -> AsyncGenerator[httpx.AsyncClient, None]:
+    """Async HTTP client for functional tests."""
+    client = httpx.AsyncClient(
+        base_url=api_base,
+        timeout=API_TIMEOUT,
+        verify=False,  # Allow self-signed certs
+    )
+    try:
+        yield client
+    finally:
+        try:
+            await client.aclose()
+        except RuntimeError:
+            # Ignore "Event loop is closed" errors during teardown
+            pass
+
+
+@pytest.fixture
+def unique_session_id() -> str:
+    """Generate unique session ID for test isolation."""
+    return f"func-test-{uuid.uuid4().hex[:12]}"
+
+
+@pytest.fixture
+def unique_entity_id() -> str:
+    """Generate unique entity ID for test isolation."""
+    return f"entity-{uuid.uuid4().hex[:8]}"
+
+
+@pytest.fixture(params=list(LANGUAGE_SNIPPETS.keys()))
+def language_test_case(request):
+    """Parametrized fixture for all 12 languages."""
+    lang = request.param
+    code, expected = LANGUAGE_SNIPPETS[lang]
+    return {"lang": lang, "code": code, "expected_output": expected}
diff --git a/tests/functional/test_exec_languages.py b/tests/functional/test_exec_languages.py
new file mode 100644
index 0000000..52879a2
--- /dev/null
+++ b/tests/functional/test_exec_languages.py
@@ -0,0 +1,112 @@
+"""Functional tests for code execution across all 12 supported languages."""
+
+import time
+
+import pytest
+
+
+class TestLanguageExecution:
+    """Test POST /exec for all supported languages."""
+
+    @pytest.mark.asyncio
+    async def test_language_execution(
+        self, async_client, auth_headers, language_test_case, unique_entity_id
+    ):
+        """Test that each language executes and produces expected output."""
+        start = time.perf_counter()
+
+        response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": language_test_case["code"],
+                "lang": language_test_case["lang"],
+                "entity_id": unique_entity_id,
+                "user_id": "functional-test",
+            },
+        )
+
+        latency = time.perf_counter() - start
+
+        # Basic assertions
+        assert response.status_code == 200, (
+            f"Failed for {language_test_case['lang']}: {response.text}"
+        )
+
+        data = response.json()
+
+        # Verify response has LibreChat-compatible fields
+        assert "session_id" in data, "Response must have session_id"
+        assert "stdout" in data, "Response must have stdout"
+        assert "stderr" in data, "Response must have stderr"
+        assert "files" in data, "Response must have files"
+
+        # Verify types
+        assert isinstance(data["session_id"], str)
+        assert isinstance(data["stdout"], str)
+        assert isinstance(data["stderr"], str)
+        assert isinstance(data["files"], list)
+
+        # Verify output contains expected substring (the sum result "55")
+        assert language_test_case["expected_output"] in data["stdout"], (
+            f"Expected '{language_test_case['expected_output']}' in stdout for "
+            f"{language_test_case['lang']}, got: {data['stdout']}"
+        )
+
+        # Timing assertion: execution should complete within 30 seconds
+        assert latency < 30.0, f"Execution took {latency:.1f}s, expected < 30s"
+
+
+class TestPythonExecution:
+    """Specific tests for Python execution features."""
+
+    @pytest.mark.asyncio
+    async def test_python_with_imports(self, async_client, auth_headers, unique_entity_id):
+        """Test Python execution with standard library imports."""
+        response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "import json; print(json.dumps({'ok': True}))",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+
+        assert response.status_code == 200
+        stdout = response.json()["stdout"].lower()
+        assert '{"ok": true}' in stdout or "{'ok': true}" in stdout.replace('"', "'")
+
+    @pytest.mark.asyncio
+    async def test_python_with_numpy(self, async_client, auth_headers, unique_entity_id):
+        """Test Python execution with NumPy."""
+        response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "import numpy as np; print(f'mean={np.mean([1,2,3,4,5])}')",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+
+        assert response.status_code == 200
+        assert "mean=3.0" in response.json()["stdout"]
+
+    @pytest.mark.asyncio
+    async def test_python_error_in_stderr(self, async_client, auth_headers, unique_entity_id):
+        """Test that Python errors appear in stderr, not as HTTP error."""
+        response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "raise ValueError('test error')",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+
+        # CRITICAL: Should return 200, not 4xx/5xx (LibreChat compatibility)
+        assert response.status_code == 200
+        data = response.json()
+        assert "ValueError" in data["stderr"] or "test error" in data["stderr"]
diff --git a/tests/functional/test_exec_workflow.py b/tests/functional/test_exec_workflow.py
new file mode 100644
index 0000000..869643d
--- /dev/null
+++ b/tests/functional/test_exec_workflow.py
@@ -0,0 +1,220 @@
+"""Functional tests for execution workflows: sessions, state, files."""
+
+import pytest
+
+
+class TestSessionWorkflow:
+    """Test session creation and reuse."""
+
+    @pytest.mark.asyncio
+    async def test_execution_creates_session(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Execution creates a new session."""
+        response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={"code": "print('hello')", "lang": "py", "entity_id": unique_entity_id},
+        )
+
+        assert response.status_code == 200
+        session_id = response.json()["session_id"]
+        assert session_id is not None
+        assert len(session_id) > 0
+
+    @pytest.mark.asyncio
+    async def test_session_reuse_with_entity_id(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Same entity_id reuses the same session."""
+        # First execution
+        r1 = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={"code": "x = 42", "lang": "py", "entity_id": unique_entity_id},
+        )
+        session1 = r1.json()["session_id"]
+
+        # Second execution with same entity_id
+        r2 = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={"code": "print(x)", "lang": "py", "entity_id": unique_entity_id},
+        )
+        session2 = r2.json()["session_id"]
+
+        assert session1 == session2
+
+
+class TestLibreChatCompatibility:
+    """Test LibreChat API response format compatibility."""
+
+    @pytest.mark.asyncio
+    async def test_response_has_librechat_fields(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Response has all required LibreChat fields."""
+        response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={"code": "print('test')", "lang": "py", "entity_id": unique_entity_id},
+        )
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # LibreChat requires these 4 fields
+        assert "session_id" in data
+        assert "files" in data
+        assert "stdout" in data
+        assert "stderr" in data
+
+        # Verify types
+        assert isinstance(data["session_id"], str)
+        assert isinstance(data["files"], list)
+        assert isinstance(data["stdout"], str)
+        assert isinstance(data["stderr"], str)
+
+    @pytest.mark.asyncio
+    async def test_execution_error_returns_200(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Code execution errors return HTTP 200 with error in stderr."""
+        response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "this is not valid python [[[",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+
+        # CRITICAL: Should return 200, not 4xx or 5xx
+        assert response.status_code == 200
+
+        data = response.json()
+        # Should have standard response format with error in stderr
+        assert "session_id" in data
+        assert "files" in data
+        assert "stdout" in data
+        assert "stderr" in data
+        # stderr should contain the error
+        assert len(data["stderr"]) > 0
+
+
+class TestStatePersistence:
+    """Test Python state persistence across executions."""
+
+    @pytest.mark.asyncio
+    async def test_variable_persists_across_executions(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Variables persist between executions in same session."""
+        # Define variable
+        r1 = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={"code": "counter = 100", "lang": "py", "entity_id": unique_entity_id},
+        )
+        assert r1.status_code == 200
+
+        # Use variable in next execution
+        r2 = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "print(counter + 1)",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        assert r2.status_code == 200
+        assert "101" in r2.json()["stdout"]
+
+    @pytest.mark.asyncio
+    async def test_function_persists_across_executions(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Functions persist between executions."""
+        # Define function
+        r1 = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "def greet(name): return f'Hello, {name}!'",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        assert r1.status_code == 200
+
+        # Call function
+        r2 = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "print(greet('World'))",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        assert r2.status_code == 200
+        assert "Hello, World!" in r2.json()["stdout"]
+
+    @pytest.mark.asyncio
+    async def test_exec_response_includes_state_fields(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Python execution response includes state fields."""
+        r = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={"code": "data = [1,2,3]", "lang": "py", "entity_id": unique_entity_id},
+        )
+
+        assert r.status_code == 200
+        data = r.json()
+
+        # State fields should be present for Python
+        assert "has_state" in data
+        # If state was captured, additional fields should be present
+        if data.get("has_state"):
+            assert "state_size" in data or data["state_size"] is None
+            assert "state_hash" in data or data["state_hash"] is None
+
+    @pytest.mark.asyncio
+    async def test_dataframe_persists_across_executions(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Pandas DataFrames persist between executions.
+
+        Note: This test may occasionally fail against live API due to
+        state persistence timing. Re-run if it fails sporadically.
+        """
+        # Create DataFrame
+        r1 = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "import pandas as pd; df = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        assert r1.status_code == 200
+
+        # Access DataFrame
+        r2 = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "print(f'sum_a={df.a.sum()}, sum_b={df.b.sum()}')",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        assert r2.status_code == 200
+        stdout = r2.json()["stdout"]
+        assert "sum_a=6" in stdout
+        assert "sum_b=15" in stdout
diff --git a/tests/functional/test_files.py b/tests/functional/test_files.py
new file mode 100644
index 0000000..bfe35e1
--- /dev/null
+++ b/tests/functional/test_files.py
@@ -0,0 +1,289 @@
+"""Functional tests for file management endpoints."""
+
+import pytest
+
+
+class TestFileUpload:
+    """Test POST /upload."""
+
+    @pytest.mark.asyncio
+    async def test_upload_single_file(self, async_client, auth_headers, unique_entity_id):
+        """Upload a single file using 'files' field."""
+        files = {"files": ("test.txt", b"Hello World", "text/plain")}
+        data = {"entity_id": unique_entity_id}
+
+        response = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data=data,
+        )
+
+        assert response.status_code == 200
+        result = response.json()
+
+        assert result["message"] == "success"
+        assert "session_id" in result
+        assert len(result["files"]) == 1
+        assert "fileId" in result["files"][0]
+        assert "filename" in result["files"][0]
+
+    @pytest.mark.asyncio
+    async def test_librechat_upload_format(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Test LibreChat 'file' (singular) field name."""
+        # LibreChat uses 'file' singular
+        files = {"file": ("document.pdf", b"PDF content here", "application/pdf")}
+        data = {"entity_id": unique_entity_id}
+
+        response = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data=data,
+        )
+
+        assert response.status_code == 200
+        assert response.json()["message"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_upload_returns_session_id(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Upload response includes session_id."""
+        files = {"files": ("test.txt", b"content", "text/plain")}
+        data = {"entity_id": unique_entity_id}
+
+        response = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data=data,
+        )
+
+        result = response.json()
+        assert "session_id" in result
+        assert len(result["session_id"]) > 0
+
+    @pytest.mark.asyncio
+    async def test_upload_returns_file_info(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Upload response includes file info with fileId and filename."""
+        files = {"files": ("myfile.csv", b"a,b,c\n1,2,3", "text/csv")}
+        data = {"entity_id": unique_entity_id}
+
+        response = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data=data,
+        )
+
+        result = response.json()
+        assert len(result["files"]) == 1
+        file_info = result["files"][0]
+        assert "fileId" in file_info
+        assert "filename" in file_info
+        assert file_info["filename"] == "myfile.csv"
+
+
+class TestFileList:
+    """Test GET /files/{session_id}."""
+
+    @pytest.mark.asyncio
+    async def test_list_files_empty_session(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """List files for non-existent session returns empty array."""
+        response = await async_client.get(
+            f"/files/{unique_session_id}",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 200
+        assert response.json() == []
+
+    @pytest.mark.asyncio
+    async def test_list_files_after_upload(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """List files returns uploaded file info."""
+        # First upload a file
+        files = {"files": ("list-test.txt", b"content for list test", "text/plain")}
+        upload = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data={"entity_id": unique_entity_id},
+        )
+        session_id = upload.json()["session_id"]
+
+        # List files
+        response = await async_client.get(
+            f"/files/{session_id}",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 200
+        files_list = response.json()
+        assert len(files_list) >= 1
+
+    @pytest.mark.asyncio
+    async def test_list_files_detail_simple(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """List files with detail=simple returns minimal info."""
+        # First upload a file
+        files = {"files": ("simple-test.txt", b"content", "text/plain")}
+        upload = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data={"entity_id": unique_entity_id},
+        )
+        session_id = upload.json()["session_id"]
+
+        # List with simple detail
+        response = await async_client.get(
+            f"/files/{session_id}?detail=simple",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 200
+        files_list = response.json()
+        assert isinstance(files_list, list)
+
+    @pytest.mark.asyncio
+    async def test_list_files_detail_summary(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """List files with detail=summary returns summary info."""
+        # First upload a file
+        files = {"files": ("summary-test.txt", b"content", "text/plain")}
+        upload = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data={"entity_id": unique_entity_id},
+        )
+        session_id = upload.json()["session_id"]
+
+        # List with summary detail
+        response = await async_client.get(
+            f"/files/{session_id}?detail=summary",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 200
+        files_list = response.json()
+        assert isinstance(files_list, list)
+
+
+class TestFileDownload:
+    """Test GET /download/{session_id}/{file_id}."""
+
+    @pytest.mark.asyncio
+    async def test_download_uploaded_file(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Download uploaded file returns correct content."""
+        content = b"Download test content - unique data 12345"
+        files = {"files": ("download-test.txt", content, "text/plain")}
+
+        upload = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data={"entity_id": unique_entity_id},
+        )
+
+        session_id = upload.json()["session_id"]
+        file_id = upload.json()["files"][0]["fileId"]
+
+        response = await async_client.get(
+            f"/download/{session_id}/{file_id}",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 200
+        assert response.content == content
+
+    @pytest.mark.asyncio
+    async def test_download_nonexistent_returns_404(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """Download non-existent file returns 404."""
+        response = await async_client.get(
+            f"/download/{unique_session_id}/fake-file-id",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 404
+
+
+class TestFileDelete:
+    """Test DELETE /files/{session_id}/{file_id}."""
+
+    @pytest.mark.asyncio
+    async def test_delete_file(self, async_client, auth_headers, unique_entity_id):
+        """Delete uploaded file returns 200."""
+        files = {"files": ("delete-test.txt", b"Delete me", "text/plain")}
+
+        upload = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data={"entity_id": unique_entity_id},
+        )
+
+        session_id = upload.json()["session_id"]
+        file_id = upload.json()["files"][0]["fileId"]
+
+        # Delete
+        response = await async_client.delete(
+            f"/files/{session_id}/{file_id}",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 200
+
+    @pytest.mark.asyncio
+    async def test_file_not_in_list_after_delete(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Deleted file no longer appears in file list."""
+        files = {"files": ("delete-verify.txt", b"To be deleted", "text/plain")}
+
+        upload = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data={"entity_id": unique_entity_id},
+        )
+
+        session_id = upload.json()["session_id"]
+        file_id = upload.json()["files"][0]["fileId"]
+
+        # Delete
+        await async_client.delete(
+            f"/files/{session_id}/{file_id}",
+            headers=auth_headers,
+        )
+
+        # Verify deleted - list should be empty or not contain the file
+        list_response = await async_client.get(
+            f"/files/{session_id}",
+            headers=auth_headers,
+        )
+
+        files_list = list_response.json()
+        file_ids = []
+        for f in files_list:
+            # Handle different response formats
+            fid = f.get("id") or f.get("fileId") or f.get("file_id")
+            if fid:
+                file_ids.append(fid)
+
+        assert file_id not in file_ids
diff --git a/tests/functional/test_health.py b/tests/functional/test_health.py
new file mode 100644
index 0000000..b643ef2
--- /dev/null
+++ b/tests/functional/test_health.py
@@ -0,0 +1,59 @@
+"""Functional tests for health check endpoints."""
+
+import time
+
+import pytest
+
+
+class TestBasicHealth:
+    """Tests for GET /health (no auth required)."""
+
+    @pytest.mark.asyncio
+    async def test_health_returns_200(self, async_client):
+        """Health endpoint returns 200 OK."""
+        response = await async_client.get("/health")
+        assert response.status_code == 200
+
+    @pytest.mark.asyncio
+    async def test_health_under_1s(self, async_client):
+        """Health check responds within 1 second."""
+        start = time.perf_counter()
+        response = await async_client.get("/health")
+        latency = time.perf_counter() - start
+
+        assert response.status_code == 200
+        assert latency < 1.0, f"Health check took {latency:.2f}s, expected < 1s"
+
+    @pytest.mark.asyncio
+    async def test_health_response_fields(self, async_client):
+        """Health response has expected fields."""
+        response = await async_client.get("/health")
+        data = response.json()
+
+        # Required fields
+        assert "status" in data, "Missing field: status"
+        assert "version" in data, "Missing field: version"
+        assert data["status"] == "healthy"
+
+
+class TestDetailedHealth:
+    """Tests for GET /health/detailed (requires auth)."""
+
+    @pytest.mark.asyncio
+    async def test_detailed_health_requires_auth(self, async_client):
+        """Detailed health check requires API key."""
+        response = await async_client.get("/health/detailed")
+        assert response.status_code == 401
+
+    @pytest.mark.asyncio
+    async def test_detailed_health_with_auth(self, async_client, auth_headers):
+        """Detailed health check returns service status."""
+        response = await async_client.get("/health/detailed", headers=auth_headers)
+
+        # May be 200 (healthy) or 503 (degraded/unhealthy)
+        assert response.status_code in [200, 503]
+        data = response.json()
+
+        assert "status" in data
+        assert "services" in data
+        assert "summary" in data
diff --git a/tests/functional/test_state.py b/tests/functional/test_state.py
new file mode 100644
index 0000000..f1286b6
--- /dev/null
+++ b/tests/functional/test_state.py
@@ -0,0 +1,257 @@
+"""Functional tests for state persistence API endpoints.
+
+These are extended functionality beyond LibreChat's current usage.
+LibreChat currently only supports file session persistence, not Python state.
+"""
+
+import pytest
+
+
+class TestStateInfo:
+    """Test GET /state/{session_id}/info."""
+
+    @pytest.mark.asyncio
+    async def test_info_nonexistent_state(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """Info for non-existent state returns exists=false."""
+        response = await async_client.get(
+            f"/state/{unique_session_id}/info",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["exists"] is False
+
+    @pytest.mark.asyncio
+    async def test_info_after_execution(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """State info after Python execution shows state exists."""
+        # Create state via execution
+        exec_response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "state_test_var = {'key': 'value', 'number': 42}",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        assert exec_response.status_code == 200
+        session_id = exec_response.json()["session_id"]
+
+        # Check state info
+        info_response = await async_client.get(
+            f"/state/{session_id}/info",
+            headers=auth_headers,
+        )
+
+        assert info_response.status_code == 200
+        data = info_response.json()
+        # State should exist after Python execution
+        assert "exists" in data
+        if data["exists"]:
+            assert "size_bytes" in data
+            assert "hash" in data
+
+
+class TestStateDownload:
+    """Test GET /state/{session_id}."""
+
+    @pytest.mark.asyncio
+    async def test_download_nonexistent_state(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """Download state for non-existent session returns 404."""
+        response = await async_client.get(
+            f"/state/{unique_session_id}",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 404
+
+    @pytest.mark.asyncio
+    async def test_download_state_after_execution(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """Download state after Python execution returns binary data."""
+        # Create state via execution
+        exec_response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "download_test_data = {'key': 'value'}",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        assert exec_response.status_code == 200
+        session_id = exec_response.json()["session_id"]
+
+        # Try to download state
+        state_response = await async_client.get(
+            f"/state/{session_id}",
+            headers=auth_headers,
+        )
+
+        # May be 200 (state exists) or 404 (no state captured)
+        assert state_response.status_code in [200, 404]
+
+        if state_response.status_code == 200:
+            # Should have ETag header
+            assert "etag" in state_response.headers
+            # Should have binary content
+            assert len(state_response.content) > 0
+
+    @pytest.mark.asyncio
+    async def test_state_etag_conditional_request(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """State download supports ETag conditional requests."""
+        # Create state
+        exec_response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "etag_test_data = [1, 2, 3, 4, 5]",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        session_id = exec_response.json()["session_id"]
+
+        # First download to get ETag
+        first_response = await async_client.get(
+            f"/state/{session_id}",
+            headers=auth_headers,
+        )
+
+        if first_response.status_code == 200:
+            etag = first_response.headers.get("etag")
+            if etag:
+                # Second request with If-None-Match should return 304
+                second_response = await async_client.get(
+                    f"/state/{session_id}",
+                    headers={**auth_headers, "If-None-Match": etag},
+                )
+                # Should return 304 Not Modified
+                assert second_response.status_code in [200, 304]
+
+
+class TestStateUpload:
+    """Test POST /state/{session_id}."""
+
+    @pytest.mark.asyncio
+    async def test_upload_valid_state(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """Upload valid state returns 201."""
+        # Create minimal valid state (version 2 + lz4 compressed data)
+        # Version byte 0x02 indicates state format version 2
+        state_bytes = b"\x02" + b"x" * 100  # Version byte + dummy data
+
+        response = await async_client.post(
+            f"/state/{unique_session_id}",
+            headers={**auth_headers, "Content-Type": "application/octet-stream"},
+            content=state_bytes,
+        )
+
+        assert response.status_code == 201
+        data = response.json()
+        assert data["message"] == "state_uploaded"
+        assert data["size"] == len(state_bytes)
+
+    @pytest.mark.asyncio
+    async def test_upload_invalid_version(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """Upload state with invalid version returns 400."""
+        # Invalid version byte (0x99 is not valid)
+        state_bytes = b"\x99invalid_version_data"
+
+        response = await async_client.post(
+            f"/state/{unique_session_id}",
+            headers={**auth_headers, "Content-Type": "application/octet-stream"},
+            content=state_bytes,
+        )
+
+        assert response.status_code == 400
+
+    @pytest.mark.asyncio
+    async def test_upload_empty_state(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """Upload empty state returns 400."""
+        response = await async_client.post(
+            f"/state/{unique_session_id}",
+            headers={**auth_headers, "Content-Type": "application/octet-stream"},
+            content=b"",
+        )
+
+        assert response.status_code == 400
+
+
+class TestStateDelete:
+    """Test DELETE /state/{session_id}."""
+
+    @pytest.mark.asyncio
+    async def test_delete_state(self, async_client, auth_headers, unique_session_id):
+        """Delete state returns 204."""
+        response = await async_client.delete(
+            f"/state/{unique_session_id}",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 204
+
+    @pytest.mark.asyncio
+    async def test_delete_nonexistent_state(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """Delete non-existent state still returns 204."""
+        response = await async_client.delete(
+            f"/state/{unique_session_id}",
+            headers=auth_headers,
+        )
+
+        assert response.status_code == 204
+
+    @pytest.mark.asyncio
+    async def test_state_not_found_after_delete(
+        self, async_client, auth_headers, unique_entity_id
+    ):
+        """State returns 404 after deletion."""
+        # Create state
+        exec_response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": "delete_test_data = 'to be deleted'",
+                "lang": "py",
+                "entity_id": unique_entity_id,
+            },
+        )
+        session_id = exec_response.json()["session_id"]
+
+        # Verify state exists (or might not if state capture didn't happen)
+        check_response = await async_client.get(
+            f"/state/{session_id}/info",
+            headers=auth_headers,
+        )
+
+        if check_response.json().get("exists"):
+            # Delete state
+            delete_response = await async_client.delete(
+                f"/state/{session_id}",
+                headers=auth_headers,
+            )
+            assert delete_response.status_code == 204
+
+            # Verify state no longer exists
+            info_response = await async_client.get(
+                f"/state/{session_id}/info",
+                headers=auth_headers,
+            )
+            assert info_response.json()["exists"] is False
diff --git a/tests/functional/test_timing.py b/tests/functional/test_timing.py
new file mode 100644
index 0000000..d5dbdb9
--- /dev/null
+++ b/tests/functional/test_timing.py
@@ -0,0 +1,129 @@
+"""Functional tests for timing and performance assertions."""
+
+import time
+
+import pytest
+
+
+class TestExecutionTiming:
+    """Test execution timing constraints."""
+
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("lang,code", [
+        ("py", "print('timing test')"),
+        ("js", "console.log('timing test');"),
+        (
+            "go",
+            'package main\nimport "fmt"\nfunc main() { fmt.Println("timing test") }',
+        ),
+    ])
+    async def test_simple_execution_under_30s(
+        self, async_client, auth_headers, unique_entity_id, lang, code
+    ):
+        """Simple execution completes within 30 seconds."""
+        start = time.perf_counter()
+        response = await async_client.post(
+            "/exec",
+            headers=auth_headers,
+            json={
+                "code": code,
+                "lang": lang,
+                "entity_id": f"{unique_entity_id}-{lang}",
+            },
+        )
+        latency = time.perf_counter() - start
+
+        assert response.status_code == 200
+        assert latency < 30.0, f"{lang} execution took {latency:.1f}s, expected < 30s"
+
+
+class TestHealthTiming:
+    """Test health endpoint timing."""
+
+    @pytest.mark.asyncio
+    async def test_health_under_1s(self, async_client):
+        """Health check responds within 1 second."""
+        start = time.perf_counter()
+        response = await async_client.get("/health")
+        latency = time.perf_counter() - start
+
+        assert response.status_code == 200
+        assert latency < 1.0, f"Health check took {latency:.2f}s, expected < 1s"
+
+    @pytest.mark.asyncio
+    async def test_detailed_health_under_5s(self, async_client, auth_headers):
+        """Detailed health check responds within 5 seconds."""
+        start = time.perf_counter()
+        response = await async_client.get("/health/detailed", headers=auth_headers)
+        latency = time.perf_counter() - start
+
+        assert response.status_code in [200, 503]
+        assert latency < 5.0, f"Detailed health took {latency:.2f}s, expected < 5s"
+
+
+class TestFileTiming:
+    """Test file operation timing."""
+
+    @pytest.mark.asyncio
+    async def test_upload_under_10s(self, async_client, auth_headers, unique_entity_id):
+        """File upload completes within 10 seconds."""
+        content = b"x" * 1024 * 100  # 100KB
+        files = {"files": ("timing-test.txt", content, "text/plain")}
+
+        start = time.perf_counter()
+        response = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data={"entity_id": unique_entity_id},
+        )
+        latency = time.perf_counter() - start
+
+        assert response.status_code == 200
+        assert latency < 10.0, f"Upload took {latency:.1f}s, expected < 10s"
+
+    @pytest.mark.asyncio
+    async def test_download_under_5s(self, async_client, auth_headers, unique_entity_id):
+        """File download completes within 5 seconds."""
+        # Upload first
+        content = b"download timing test content"
+        files = {"files": ("download-timing.txt", content, "text/plain")}
+
+        upload = await async_client.post(
+            "/upload",
+            headers={"x-api-key": auth_headers["x-api-key"]},
+            files=files,
+            data={"entity_id": unique_entity_id},
+        )
+        session_id = upload.json()["session_id"]
+        file_id = upload.json()["files"][0]["fileId"]
+
+        # Time the download
+        start = time.perf_counter()
+        response = await async_client.get(
+            f"/download/{session_id}/{file_id}",
+            headers=auth_headers,
+        )
+        latency = time.perf_counter() - start
+
+        assert response.status_code == 200
+        assert latency < 5.0, f"Download took {latency:.1f}s, expected < 5s"
+
+
+class TestStateTiming:
+    """Test state operation timing."""
+
+    @pytest.mark.asyncio
+    async def test_state_info_under_2s(
+        self, async_client, auth_headers, unique_session_id
+    ):
+        """State info check responds within 2 seconds."""
+        start = time.perf_counter()
+        response = await async_client.get(
+            f"/state/{unique_session_id}/info",
+            headers=auth_headers,
+        )
+        latency = time.perf_counter() - start
+
+        assert response.status_code == 200
+        assert latency < 2.0, f"State info took {latency:.1f}s, expected < 2s"