diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 71fd888f..5b71a3f7 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v4 - uses: astral-sh/setup-uv@v4 - run: uv sync --extra dev - - run: uv run pytest --cov-report term --cov-report html --cov-report xml --cov=src/libkernelbot unit-tests -v + - run: uv run pytest --cov-report term --cov-report html --cov-report xml --cov=src/libkernelbot -m "not integration" tests -v - uses: actions/upload-artifact@v4 with: name: coverage @@ -25,13 +25,22 @@ jobs: uses: py-cov-action/python-coverage-comment-action@v3 with: GITHUB_TOKEN: ${{ github.token }} - - name: Store Pull Request comment to be posted uses: actions/upload-artifact@v4 if: steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true' with: - # If you use a different name, update COMMENT_ARTIFACT_NAME accordingly name: python-coverage-comment-action - # If you use a different name, update COMMENT_FILENAME accordingly path: python-coverage-comment-action.txt + integration-tests: + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} + MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v4 + - run: uv sync --extra dev + - run: uv run modal token set --token-id ${MODAL_TOKEN_ID} --token-secret ${MODAL_TOKEN_SECRET} + - run: uv run pytest -m integration tests -v diff --git a/examples/vectoradd_py/submission_cuda_inline.py b/examples/vectoradd_py/submission_cuda_inline.py index d505d2a4..51841871 100644 --- a/examples/vectoradd_py/submission_cuda_inline.py +++ b/examples/vectoradd_py/submission_cuda_inline.py @@ -54,7 +54,6 @@ """ - add_module = load_inline( name='add_cuda', cpp_sources=add_cpp_source, @@ -63,62 +62,12 @@ verbose=True, ) + def add(A, B): if not A.is_cuda or not B.is_cuda: raise RuntimeError("Both tensors must be on GPU") return add_module.add_cuda(A, B) -def custom_kernel(data: input_t) -> output_t: - """ - Custom implementation of vector addition using CUDA inline function. - Args: - inputs: List of pairs of tensors [A, B] to be added. - Returns: - List of tensors containing element-wise sums. - """ - A, B = data - assert A.is_cuda and B.is_cuda, "Input tensors must be on GPU" - assert A.shape == B.shape, "Input tensors must have the same shape" - assert A.dtype == torch.float16 and B.dtype == torch.float16, "Input tensors must be float16" - - M, N = A.shape - C = torch.empty_like(A) - - n_threads = 256 - n_blocks = (M * N + n_threads - 1) // n_threads - - cuda_source = """ - extern "C" __global__ void add_kernel( - const half* __restrict__ A, - const half* __restrict__ B, - half* __restrict__ C, - const int n_elements - ) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < n_elements) { - C[idx] = __hadd(A[idx], B[idx]); - } - } - """ - - module = torch.utils.cpp_extension.load_inline( - name=f"add_kernel_{M}_{N}", - cpp_sources="", - cuda_sources=cuda_source, - functions=["add_kernel"], - with_cuda=True, - extra_cuda_cflags=["-arch=sm_70"], # Adjust based on your GPU architecture - ) - - module.add_kernel( - cuda_stream=torch.cuda.current_stream(), - args=[ - A.reshape(-1), B.reshape(-1), C.reshape(-1), - M * N, - ], - blocks=n_blocks, - threads=n_threads, - ) - - return C +def custom_kernel(data: input_t) -> output_t: + return add(*data) diff --git a/pyproject.toml b/pyproject.toml index bb72bd06..0c66e0bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,9 @@ exclude_lines = [ [tool.pytest.ini_options] testpaths = ["scripts", "tests"] python_files = ["test_*.py", "*_test.py", "ci_test_*.py"] +markers = [ + "integration: integration tests that need to interact externally, e.g., with modal/github actions/etc" +] [tool.ruff] line-length = 120 diff --git a/src/libkernelbot/launchers/modal.py b/src/libkernelbot/launchers/modal.py index 26adfe33..6c2308ec 100644 --- a/src/libkernelbot/launchers/modal.py +++ b/src/libkernelbot/launchers/modal.py @@ -32,7 +32,7 @@ async def run_submission( result = await loop.run_in_executor( None, - lambda: modal.Function.lookup("discord-bot-runner", func_name).remote(config=config), + lambda: modal.Function.from_name("discord-bot-runner", func_name).remote(config=config), ) await status.update("✅ Waiting for modal run to finish... Done") diff --git a/src/runners/modal_runner.py b/src/runners/modal_runner.py index eb74d173..bb8d952a 100644 --- a/src/runners/modal_runner.py +++ b/src/runners/modal_runner.py @@ -33,10 +33,10 @@ "PyYAML", ) .pip_install( - "torch~=2.7", + "torch>=2.7.0,<2.8.0", "torchvision~=0.22", - "torchaudio~=2.7", - index_url="https://download.pytorch.org/whl/cu128" + "torchaudio>=2.7.0,<2.8.0", + index_url="https://download.pytorch.org/whl/cu128", ) # other frameworks .pip_install( diff --git a/unit-tests/conftest.py b/tests/conftest.py similarity index 91% rename from unit-tests/conftest.py rename to tests/conftest.py index 170b0196..0408b82e 100644 --- a/unit-tests/conftest.py +++ b/tests/conftest.py @@ -8,14 +8,10 @@ @pytest.fixture(scope="module") -def docker_compose(): - tgt_path = Path.cwd() - if tgt_path.name == "unit-tests": - tgt_path = tgt_path.parent - +def docker_compose(project_root: Path): """Start a test database and run migrations""" subprocess.check_call( - ["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"], cwd=tgt_path + ["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"], cwd=project_root ) try: @@ -25,7 +21,7 @@ def docker_compose(): ["docker", "compose", "-f", "docker-compose.test.yml", "ps", "-q", "migrate-test"], capture_output=True, text=True, - cwd=tgt_path, + cwd=project_root, ) if not result.stdout.strip(): # Container no longer exists @@ -37,7 +33,7 @@ def docker_compose(): ["docker", "compose", "-f", "docker-compose.test.yml", "logs", "migrate-test"], capture_output=True, text=True, - cwd=tgt_path, + cwd=project_root, ) if "error" in logs.stdout.lower(): @@ -46,7 +42,7 @@ def docker_compose(): yield finally: subprocess.run( - ["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"], cwd=tgt_path + ["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"], cwd=project_root ) @@ -122,3 +118,8 @@ def task_directory(tmp_path): # Create task.yml Path.write_text(tmp_path / "task.yml", TASK_YAML) return tmp_path + + +@pytest.fixture(scope="session") +def project_root(): + return Path(__file__).parent.parent diff --git a/unit-tests/test_backend.py b/tests/test_backend.py similarity index 100% rename from unit-tests/test_backend.py rename to tests/test_backend.py diff --git a/unit-tests/test_leaderboard_db.py b/tests/test_leaderboard_db.py similarity index 100% rename from unit-tests/test_leaderboard_db.py rename to tests/test_leaderboard_db.py diff --git a/tests/test_modal.py b/tests/test_modal.py new file mode 100644 index 00000000..9fa1725e --- /dev/null +++ b/tests/test_modal.py @@ -0,0 +1,220 @@ +import os +import subprocess +from pathlib import Path +from typing import Tuple + +import pytest + +from libkernelbot.consts import GPU_TO_SM, ModalGPU, SubmissionMode +from libkernelbot.launchers import ModalLauncher +from libkernelbot.report import RunProgressReporter +from libkernelbot.task import build_task_config, make_task_definition + + +class MockProgressReporter(RunProgressReporter): + """Test progress reporter that captures messages.""" + + def __init__(self, title: str = "Test Modal Run"): + super().__init__(title) + self.messages = [] + self.updates = [] + + async def push(self, message: str): + self.messages.append(message) + + async def update(self, message: str): + self.updates.append(message) + + +@pytest.fixture(scope="session") +def modal_deployment(project_root: Path): + """ + Fixture that ensures Modal is deployed before running tests. + Runs once per test session and deploys to the specified Modal environment. + """ + # Determine Modal environment (default to 'test' if not specified) + modal_env = os.getenv("PYTEST_MODAL_ENV", "pytest") + + print(f"🚀 Deploying to Modal environment: {modal_env}") + + # Deploy to Modal with specific environment + try: + result = subprocess.run( + ["modal", "deploy", "--env", modal_env, "modal_runner_archs.py"], + cwd=project_root / "src" / "runners", + capture_output=True, + text=True, + timeout=600, # 10 minute timeout in case image needs to be built (can be very slow) + ) + + if result.returncode != 0: + # if it fails simply because the environment does not exist, we can fix that + if "No such environment" in result.stderr: + result = subprocess.run( + ["modal", "environment", "create", modal_env], + cwd=project_root / "src" / "runners", + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + pytest.fail( + f"Modal environment `{modal_env}` not available, " + f"and failed to create: {result.stderr}" + ) + else: + # try again, now that the env exists. + result = subprocess.run( + ["modal", "deploy", "--env", modal_env, "modal_runner_archs.py"], + cwd=project_root / "src" / "runners", + capture_output=True, + text=True, + timeout=600, + ) + if result.returncode != 0: + pytest.fail( + f"Modal deploy failed:\n" + f"STDOUT:\n{result.stdout}\n" + f"STDERR:\n{result.stderr}" + ) + else: + pytest.fail( + f"Modal deploy failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + ) + + print(f"✅ Modal deployment to '{modal_env}' completed successfully") + print(f"Deploy output: {result.stdout}") + + # Set the Modal environment for the session + original_env = os.environ.get("MODAL_ENVIRONMENT") + os.environ["MODAL_ENVIRONMENT"] = modal_env + + yield modal_env + + # Restore original environment + if original_env is not None: + os.environ["MODAL_ENVIRONMENT"] = original_env + elif "MODAL_ENVIRONMENT" in os.environ: + del os.environ["MODAL_ENVIRONMENT"] + + except subprocess.TimeoutExpired as e: + pytest.fail( + f"Modal deploy timed out after 5 minutes:\nstdout: {e.stdout}, stderr:{e.stderr}" + ) + except Exception as e: + pytest.fail(f"Modal deploy failed with exception: {e}") + + +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.parametrize( + "gpu_type", [ModalGPU.T4, ModalGPU.L4, ModalGPU.A100, ModalGPU.H100, ModalGPU.B200] +) +@pytest.mark.parametrize( + "task", + [ + ("vectoradd_py", "submission_cuda_inline.py"), + ("vectoradd_py", "submission_triton.py"), + ], +) +async def test_modal_launcher_python_script( + modal_deployment, project_root: Path, gpu_type: ModalGPU, task: Tuple[str, str] +): + """ + Test ModalLauncher with a real Python script using examples/identity_py. + """ + launcher = ModalLauncher(add_include_dirs=[]) + reporter = MockProgressReporter("progress") + + # Load the real identity_py task + task_path = project_root / "examples" / task[0] + if not task_path.exists(): + pytest.skip("examples/identity_py not found - skipping Modal integration test") + + # Load the task definition + task_definition = make_task_definition(task_path) + + # Use the actual working submission from the examples + submission_content = (task_path / task[1]).read_text() + + config = build_task_config( + task=task_definition.task, + submission_content=submission_content, + arch=GPU_TO_SM[gpu_type.name], + mode=SubmissionMode.TEST, + ) + + result = await launcher.run_submission(config, gpu_type, reporter) + + # Basic structure and success + assert result.success, f"Expected successful run, got: {result.error}" + assert result.error == "" + assert isinstance(result.runs, dict) + + # System info - test actual expected values + assert gpu_type.name in result.system.gpu + assert "Linux" in result.system.platform + assert result.system.torch.startswith("2.7") # update when the image changes + + # Test run structure + assert "test" in result.runs + test_run = result.runs["test"] + + # Run needs to succeed + assert test_run.run.success is True + assert test_run.run.passed is True + assert test_run.run.exit_code == 0 + assert test_run.run.duration > 0 + + # Test need to succeed + assert test_run.run.result["check"] == "pass" + test_count = int(test_run.run.result["test-count"]) + assert test_count == 5 + for i in range(test_count): + assert test_run.run.result[f"test.{i}.status"] == "pass" + assert "size:" in test_run.run.result[f"test.{i}.spec"] + assert "seed:" in test_run.run.result[f"test.{i}.spec"] + + # sanity check for timings + assert test_run.start < test_run.end + + # check messages + assert reporter.messages == ["⏳ Waiting for Modal run to finish..."] + assert reporter.updates == ["✅ Waiting for modal run to finish... Done"] + + +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.parametrize("script", ["cheat-fd.py", "cheat-input.py", "cheat-rng.py"]) +async def test_modal_launcher_failing_script(modal_deployment, project_root: Path, script: str): + """ + Test ModalLauncher with a real Python scripts that are designed to be wrong. + """ + launcher = ModalLauncher(add_include_dirs=[]) + reporter = MockProgressReporter("progress") + gpu_type = ModalGPU.T4 + + # Load the real identity_py task + task_path = project_root / "examples" / "identity_py" + if not task_path.exists(): + pytest.skip("examples/identity_py not found - skipping Modal integration test") + + # Load the task definition + task_definition = make_task_definition(task_path) + + # Use the actual working submission from the examples + submission_content = (task_path / script).read_text() + task_definition.task.seed = 653212 + config = build_task_config( + task=task_definition.task, + submission_content=submission_content, + arch=GPU_TO_SM[gpu_type.name], + mode=SubmissionMode.LEADERBOARD, + ) + + result = await launcher.run_submission(config, gpu_type, reporter) + + # Basic structure and success + assert result.success, f"Expected successful run, got: {result.error}" + assert result.error == "" + assert result.runs["test"].run.passed is False or result.runs["benchmark"].run.passed is False diff --git a/unit-tests/test_report.py b/tests/test_report.py similarity index 100% rename from unit-tests/test_report.py rename to tests/test_report.py diff --git a/unit-tests/test_submission.py b/tests/test_submission.py similarity index 100% rename from unit-tests/test_submission.py rename to tests/test_submission.py diff --git a/unit-tests/test_task.py b/tests/test_task.py similarity index 100% rename from unit-tests/test_task.py rename to tests/test_task.py diff --git a/unit-tests/test_utils.py b/tests/test_utils.py similarity index 100% rename from unit-tests/test_utils.py rename to tests/test_utils.py