From f48486a2560a2f6be944c53f52e13dd141536dea Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 01:33:55 +0200 Subject: [PATCH 1/9] utility fixture --- unit-tests/conftest.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/unit-tests/conftest.py b/unit-tests/conftest.py index 170b0196..0408b82e 100644 --- a/unit-tests/conftest.py +++ b/unit-tests/conftest.py @@ -8,14 +8,10 @@ @pytest.fixture(scope="module") -def docker_compose(): - tgt_path = Path.cwd() - if tgt_path.name == "unit-tests": - tgt_path = tgt_path.parent - +def docker_compose(project_root: Path): """Start a test database and run migrations""" subprocess.check_call( - ["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"], cwd=tgt_path + ["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"], cwd=project_root ) try: @@ -25,7 +21,7 @@ def docker_compose(): ["docker", "compose", "-f", "docker-compose.test.yml", "ps", "-q", "migrate-test"], capture_output=True, text=True, - cwd=tgt_path, + cwd=project_root, ) if not result.stdout.strip(): # Container no longer exists @@ -37,7 +33,7 @@ def docker_compose(): ["docker", "compose", "-f", "docker-compose.test.yml", "logs", "migrate-test"], capture_output=True, text=True, - cwd=tgt_path, + cwd=project_root, ) if "error" in logs.stdout.lower(): @@ -46,7 +42,7 @@ def docker_compose(): yield finally: subprocess.run( - ["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"], cwd=tgt_path + ["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"], cwd=project_root ) @@ -122,3 +118,8 @@ def task_directory(tmp_path): # Create task.yml Path.write_text(tmp_path / "task.yml", TASK_YAML) return tmp_path + + +@pytest.fixture(scope="session") +def project_root(): + return Path(__file__).parent.parent From 9483faa35dd73d91842dd903a8d65db6922fe06d Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 01:34:12 +0200 Subject: [PATCH 2/9] pin torch version --- src/runners/modal_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/runners/modal_runner.py b/src/runners/modal_runner.py index eb74d173..bb8d952a 100644 --- a/src/runners/modal_runner.py +++ b/src/runners/modal_runner.py @@ -33,10 +33,10 @@ "PyYAML", ) .pip_install( - "torch~=2.7", + "torch>=2.7.0,<2.8.0", "torchvision~=0.22", - "torchaudio~=2.7", - index_url="https://download.pytorch.org/whl/cu128" + "torchaudio>=2.7.0,<2.8.0", + index_url="https://download.pytorch.org/whl/cu128", ) # other frameworks .pip_install( From 030d51b7f782a570bb68546836e88a844cd3236a Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 01:35:48 +0200 Subject: [PATCH 3/9] added basic testing for modal launcher --- unit-tests/test_modal.py | 199 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 unit-tests/test_modal.py diff --git a/unit-tests/test_modal.py b/unit-tests/test_modal.py new file mode 100644 index 00000000..043b63de --- /dev/null +++ b/unit-tests/test_modal.py @@ -0,0 +1,199 @@ +import os +import subprocess +import pytest +from pathlib import Path + +from libkernelbot.launchers import ModalLauncher +from libkernelbot.consts import SubmissionMode, GPU_TO_SM, ModalGPU +from libkernelbot.task import make_task_definition, build_task_config +from libkernelbot.report import RunProgressReporter + + +class MockProgressReporter(RunProgressReporter): + """Test progress reporter that captures messages.""" + + def __init__(self, title: str = "Test Modal Run"): + super().__init__(title) + self.messages = [] + self.updates = [] + + async def push(self, message: str): + self.messages.append(message) + + async def update(self, message: str): + self.updates.append(message) + + +@pytest.fixture(scope="session") +def modal_deployment(project_root: Path): + """ + Fixture that ensures Modal is deployed before running tests. + Runs once per test session and deploys to the specified Modal environment. + """ + # Determine Modal environment (default to 'test' if not specified) + modal_env = os.getenv("PYTEST_MODAL_ENV", "pytest") + + print(f"🚀 Deploying to Modal environment: {modal_env}") + + # Deploy to Modal with specific environment + try: + result = subprocess.run( + ["modal", "deploy", "--env", modal_env, "modal_runner_archs.py"], + cwd=project_root / "src" / "runners", + capture_output=True, + text=True, + timeout=600 # 10 minute timeout in case image needs to be built + ) + + if result.returncode != 0: + # if it fails simply because the environment does not exist, we can fix that + if "No such environment" in result.stderr: + result = subprocess.run( + ["modal", "environment", "create", modal_env], + cwd=project_root / "src" / "runners", + capture_output=True, + text=True, + timeout=30 + ) + if result.returncode != 0: + pytest.fail(f"Modal environment `{modal_env}` not available, and failed to create: {result.stderr}") + else: + # try again, now that the env exists. + result = subprocess.run( + ["modal", "deploy", "--env", modal_env, "modal_runner_archs.py"], + cwd=project_root / "src" / "runners", + capture_output=True, + text=True, + timeout=300 + ) + if result.returncode != 0: + pytest.fail(f"Modal deploy failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}") + else: + pytest.fail(f"Modal deploy failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}") + + print(f"✅ Modal deployment to '{modal_env}' completed successfully") + print(f"Deploy output: {result.stdout}") + + # Set the Modal environment for the session + original_env = os.environ.get("MODAL_ENVIRONMENT") + os.environ["MODAL_ENVIRONMENT"] = modal_env + + yield modal_env + + # Restore original environment + if original_env is not None: + os.environ["MODAL_ENVIRONMENT"] = original_env + elif "MODAL_ENVIRONMENT" in os.environ: + del os.environ["MODAL_ENVIRONMENT"] + + except subprocess.TimeoutExpired as e: + pytest.fail(f"Modal deploy timed out after 5 minutes:\nstdout: {e.stdout}, stderr:{e.stderr}") + except Exception as e: + pytest.fail(f"Modal deploy failed with exception: {e}") + + +@pytest.mark.asyncio +@pytest.mark.parametrize("gpu_type", [ModalGPU.T4, ModalGPU.L4, ModalGPU.A100, ModalGPU.H100, ModalGPU.B200]) +async def test_modal_launcher_python_script(modal_deployment, project_root: Path, gpu_type: ModalGPU): + """ + Test ModalLauncher with a real Python script using examples/identity_py. + """ + launcher = ModalLauncher(add_include_dirs=[]) + reporter = MockProgressReporter("progress") + + # Load the real identity_py task + task_path = project_root / "examples" / "identity_py" + if not task_path.exists(): + pytest.skip("examples/identity_py not found - skipping Modal integration test") + + # Load the task definition + task_definition = make_task_definition(task_path) + + # Use the actual working submission from the examples + submission_content = (task_path / "submission.py").read_text() + + config = build_task_config( + task=task_definition.task, + submission_content=submission_content, + arch=GPU_TO_SM[gpu_type.name], + mode=SubmissionMode.TEST + ) + + result = await launcher.run_submission(config, gpu_type, reporter) + + # Basic structure and success + assert result.success, f"Expected successful run, got: {result.error}" + assert result.error == "" + assert isinstance(result.runs, dict) + + # System info - test actual expected values + assert gpu_type.name in result.system.gpu + assert "Linux" in result.system.platform + assert result.system.torch.startswith("2.7") # update when the image changes + + # Test run structure + assert 'test' in result.runs + test_run = result.runs['test'] + + # For Python runs, compilation is None + assert test_run.compilation is None + + # Run needs to succeed + assert test_run.run.success is True + assert test_run.run.passed is True + assert test_run.run.exit_code == 0 + assert test_run.run.stdout == "" + assert test_run.run.stderr == "" + assert test_run.run.duration > 0 + + # Test need to succeed + assert test_run.run.result['check'] == 'pass' + test_count = int(test_run.run.result['test-count']) + assert test_count == 5 + for i in range(test_count): + assert test_run.run.result[f'test.{i}.status'] == 'pass' + assert 'size:' in test_run.run.result[f'test.{i}.spec'] + assert 'seed:' in test_run.run.result[f'test.{i}.spec'] + + # sanity check for timings + assert test_run.start < test_run.end + + # check messages + assert reporter.messages == ['⏳ Waiting for Modal run to finish...'] + assert reporter.updates == ['✅ Waiting for modal run to finish... Done'] + + +@pytest.mark.asyncio +@pytest.mark.parametrize("script", ["cheat-fd.py", "cheat-input.py", "cheat-rng.py"]) +async def test_modal_launcher_failing_script(modal_deployment, project_root: Path, script: str): + """ + Test ModalLauncher with a real Python scripts that are designed to be wrong. + """ + launcher = ModalLauncher(add_include_dirs=[]) + reporter = MockProgressReporter("progress") + gpu_type = ModalGPU.T4 + + # Load the real identity_py task + task_path = project_root / "examples" / "identity_py" + if not task_path.exists(): + pytest.skip("examples/identity_py not found - skipping Modal integration test") + + # Load the task definition + task_definition = make_task_definition(task_path) + + # Use the actual working submission from the examples + submission_content = (task_path / script).read_text() + task_definition.task.seed = 653212 + config = build_task_config( + task=task_definition.task, + submission_content=submission_content, + arch=GPU_TO_SM[gpu_type.name], + mode=SubmissionMode.LEADERBOARD, + ) + + result = await launcher.run_submission(config, gpu_type, reporter) + + # Basic structure and success + assert result.success, f"Expected successful run, got: {result.error}" + assert result.error == "" + assert result.runs['test'].run.passed is False or result.runs['benchmark'].run.passed is False From 77e290e3c61ab4b0659bba1910442012f030eab2 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:19:52 +0200 Subject: [PATCH 4/9] modal token --- .github/workflows/testing.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 71fd888f..4b486964 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -11,10 +11,14 @@ jobs: unit-tests: runs-on: ubuntu-latest timeout-minutes: 10 + env: + MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} + MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} steps: - uses: actions/checkout@v4 - uses: astral-sh/setup-uv@v4 - run: uv sync --extra dev + - run: uv run modal token set --token-id ${MODAL_TOKEN_ID} --token-secret ${MODAL_TOKEN_SECRET} - run: uv run pytest --cov-report term --cov-report html --cov-report xml --cov=src/libkernelbot unit-tests -v - uses: actions/upload-artifact@v4 with: From e87acbfb97b246552452755f410ae5cefa5a5bb9 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 11:07:00 +0200 Subject: [PATCH 5/9] use new function name --- src/libkernelbot/launchers/modal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libkernelbot/launchers/modal.py b/src/libkernelbot/launchers/modal.py index 26adfe33..6c2308ec 100644 --- a/src/libkernelbot/launchers/modal.py +++ b/src/libkernelbot/launchers/modal.py @@ -32,7 +32,7 @@ async def run_submission( result = await loop.run_in_executor( None, - lambda: modal.Function.lookup("discord-bot-runner", func_name).remote(config=config), + lambda: modal.Function.from_name("discord-bot-runner", func_name).remote(config=config), ) await status.update("✅ Waiting for modal run to finish... Done") From f458349db7813631b4ccef6ce9d6896d3ab549bc Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 11:19:22 +0200 Subject: [PATCH 6/9] split into unit and integration tests --- .github/workflows/testing.yml | 21 +++++++++++++-------- pyproject.toml | 3 +++ unit-tests/test_modal.py | 6 ++++-- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 4b486964..9c101191 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -11,15 +11,11 @@ jobs: unit-tests: runs-on: ubuntu-latest timeout-minutes: 10 - env: - MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} - MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} steps: - uses: actions/checkout@v4 - uses: astral-sh/setup-uv@v4 - run: uv sync --extra dev - - run: uv run modal token set --token-id ${MODAL_TOKEN_ID} --token-secret ${MODAL_TOKEN_SECRET} - - run: uv run pytest --cov-report term --cov-report html --cov-report xml --cov=src/libkernelbot unit-tests -v + - run: uv run pytest --cov-report term --cov-report html --cov-report xml --cov=src/libkernelbot -m "not integration" unit-tests -v - uses: actions/upload-artifact@v4 with: name: coverage @@ -29,13 +25,22 @@ jobs: uses: py-cov-action/python-coverage-comment-action@v3 with: GITHUB_TOKEN: ${{ github.token }} - - name: Store Pull Request comment to be posted uses: actions/upload-artifact@v4 if: steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true' with: - # If you use a different name, update COMMENT_ARTIFACT_NAME accordingly name: python-coverage-comment-action - # If you use a different name, update COMMENT_FILENAME accordingly path: python-coverage-comment-action.txt + integration-tests: + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} + MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v4 + - run: uv sync --extra dev + - run: uv run modal token set --token-id ${MODAL_TOKEN_ID} --token-secret ${MODAL_TOKEN_SECRET} + - run: uv run pytest -m integration -v diff --git a/pyproject.toml b/pyproject.toml index bb72bd06..0c66e0bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,9 @@ exclude_lines = [ [tool.pytest.ini_options] testpaths = ["scripts", "tests"] python_files = ["test_*.py", "*_test.py", "ci_test_*.py"] +markers = [ + "integration: integration tests that need to interact externally, e.g., with modal/github actions/etc" +] [tool.ruff] line-length = 120 diff --git a/unit-tests/test_modal.py b/unit-tests/test_modal.py index 043b63de..718b086c 100644 --- a/unit-tests/test_modal.py +++ b/unit-tests/test_modal.py @@ -42,7 +42,7 @@ def modal_deployment(project_root: Path): cwd=project_root / "src" / "runners", capture_output=True, text=True, - timeout=600 # 10 minute timeout in case image needs to be built + timeout=600 # 10 minute timeout in case image needs to be built (can be very slow) ) if result.returncode != 0: @@ -64,7 +64,7 @@ def modal_deployment(project_root: Path): cwd=project_root / "src" / "runners", capture_output=True, text=True, - timeout=300 + timeout=600 ) if result.returncode != 0: pytest.fail(f"Modal deploy failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}") @@ -92,6 +92,7 @@ def modal_deployment(project_root: Path): pytest.fail(f"Modal deploy failed with exception: {e}") +@pytest.mark.integration @pytest.mark.asyncio @pytest.mark.parametrize("gpu_type", [ModalGPU.T4, ModalGPU.L4, ModalGPU.A100, ModalGPU.H100, ModalGPU.B200]) async def test_modal_launcher_python_script(modal_deployment, project_root: Path, gpu_type: ModalGPU): @@ -163,6 +164,7 @@ async def test_modal_launcher_python_script(modal_deployment, project_root: Path assert reporter.updates == ['✅ Waiting for modal run to finish... Done'] +@pytest.mark.integration @pytest.mark.asyncio @pytest.mark.parametrize("script", ["cheat-fd.py", "cheat-input.py", "cheat-rng.py"]) async def test_modal_launcher_failing_script(modal_deployment, project_root: Path, script: str): From 09c9e8ff93267a807a7ab9a066749f2477d70dd2 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 11:28:26 +0200 Subject: [PATCH 7/9] rename --- .github/workflows/testing.yml | 4 ++-- {unit-tests => tests}/conftest.py | 0 {unit-tests => tests}/test_backend.py | 0 {unit-tests => tests}/test_leaderboard_db.py | 0 {unit-tests => tests}/test_modal.py | 0 {unit-tests => tests}/test_report.py | 0 {unit-tests => tests}/test_submission.py | 0 {unit-tests => tests}/test_task.py | 0 {unit-tests => tests}/test_utils.py | 0 9 files changed, 2 insertions(+), 2 deletions(-) rename {unit-tests => tests}/conftest.py (100%) rename {unit-tests => tests}/test_backend.py (100%) rename {unit-tests => tests}/test_leaderboard_db.py (100%) rename {unit-tests => tests}/test_modal.py (100%) rename {unit-tests => tests}/test_report.py (100%) rename {unit-tests => tests}/test_submission.py (100%) rename {unit-tests => tests}/test_task.py (100%) rename {unit-tests => tests}/test_utils.py (100%) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 9c101191..5b71a3f7 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v4 - uses: astral-sh/setup-uv@v4 - run: uv sync --extra dev - - run: uv run pytest --cov-report term --cov-report html --cov-report xml --cov=src/libkernelbot -m "not integration" unit-tests -v + - run: uv run pytest --cov-report term --cov-report html --cov-report xml --cov=src/libkernelbot -m "not integration" tests -v - uses: actions/upload-artifact@v4 with: name: coverage @@ -43,4 +43,4 @@ jobs: - uses: astral-sh/setup-uv@v4 - run: uv sync --extra dev - run: uv run modal token set --token-id ${MODAL_TOKEN_ID} --token-secret ${MODAL_TOKEN_SECRET} - - run: uv run pytest -m integration -v + - run: uv run pytest -m integration tests -v diff --git a/unit-tests/conftest.py b/tests/conftest.py similarity index 100% rename from unit-tests/conftest.py rename to tests/conftest.py diff --git a/unit-tests/test_backend.py b/tests/test_backend.py similarity index 100% rename from unit-tests/test_backend.py rename to tests/test_backend.py diff --git a/unit-tests/test_leaderboard_db.py b/tests/test_leaderboard_db.py similarity index 100% rename from unit-tests/test_leaderboard_db.py rename to tests/test_leaderboard_db.py diff --git a/unit-tests/test_modal.py b/tests/test_modal.py similarity index 100% rename from unit-tests/test_modal.py rename to tests/test_modal.py diff --git a/unit-tests/test_report.py b/tests/test_report.py similarity index 100% rename from unit-tests/test_report.py rename to tests/test_report.py diff --git a/unit-tests/test_submission.py b/tests/test_submission.py similarity index 100% rename from unit-tests/test_submission.py rename to tests/test_submission.py diff --git a/unit-tests/test_task.py b/tests/test_task.py similarity index 100% rename from unit-tests/test_task.py rename to tests/test_task.py diff --git a/unit-tests/test_utils.py b/tests/test_utils.py similarity index 100% rename from unit-tests/test_utils.py rename to tests/test_utils.py From b5abc33023e668b551ab657b92eec4919cd36586 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 11:38:20 +0200 Subject: [PATCH 8/9] lint --- tests/test_modal.py | 66 ++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/tests/test_modal.py b/tests/test_modal.py index 718b086c..72873cf4 100644 --- a/tests/test_modal.py +++ b/tests/test_modal.py @@ -1,12 +1,13 @@ import os import subprocess -import pytest from pathlib import Path +import pytest + +from libkernelbot.consts import GPU_TO_SM, ModalGPU, SubmissionMode from libkernelbot.launchers import ModalLauncher -from libkernelbot.consts import SubmissionMode, GPU_TO_SM, ModalGPU -from libkernelbot.task import make_task_definition, build_task_config from libkernelbot.report import RunProgressReporter +from libkernelbot.task import build_task_config, make_task_definition class MockProgressReporter(RunProgressReporter): @@ -42,7 +43,7 @@ def modal_deployment(project_root: Path): cwd=project_root / "src" / "runners", capture_output=True, text=True, - timeout=600 # 10 minute timeout in case image needs to be built (can be very slow) + timeout=600, # 10 minute timeout in case image needs to be built (can be very slow) ) if result.returncode != 0: @@ -53,10 +54,13 @@ def modal_deployment(project_root: Path): cwd=project_root / "src" / "runners", capture_output=True, text=True, - timeout=30 + timeout=30, ) if result.returncode != 0: - pytest.fail(f"Modal environment `{modal_env}` not available, and failed to create: {result.stderr}") + pytest.fail( + f"Modal environment `{modal_env}` not available, " + f"and failed to create: {result.stderr}" + ) else: # try again, now that the env exists. result = subprocess.run( @@ -64,12 +68,18 @@ def modal_deployment(project_root: Path): cwd=project_root / "src" / "runners", capture_output=True, text=True, - timeout=600 + timeout=600, ) if result.returncode != 0: - pytest.fail(f"Modal deploy failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}") + pytest.fail( + f"Modal deploy failed:\n" + f"STDOUT:\n{result.stdout}\n" + f"STDERR:\n{result.stderr}" + ) else: - pytest.fail(f"Modal deploy failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}") + pytest.fail( + f"Modal deploy failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + ) print(f"✅ Modal deployment to '{modal_env}' completed successfully") print(f"Deploy output: {result.stdout}") @@ -87,15 +97,21 @@ def modal_deployment(project_root: Path): del os.environ["MODAL_ENVIRONMENT"] except subprocess.TimeoutExpired as e: - pytest.fail(f"Modal deploy timed out after 5 minutes:\nstdout: {e.stdout}, stderr:{e.stderr}") + pytest.fail( + f"Modal deploy timed out after 5 minutes:\nstdout: {e.stdout}, stderr:{e.stderr}" + ) except Exception as e: pytest.fail(f"Modal deploy failed with exception: {e}") @pytest.mark.integration @pytest.mark.asyncio -@pytest.mark.parametrize("gpu_type", [ModalGPU.T4, ModalGPU.L4, ModalGPU.A100, ModalGPU.H100, ModalGPU.B200]) -async def test_modal_launcher_python_script(modal_deployment, project_root: Path, gpu_type: ModalGPU): +@pytest.mark.parametrize( + "gpu_type", [ModalGPU.T4, ModalGPU.L4, ModalGPU.A100, ModalGPU.H100, ModalGPU.B200] +) +async def test_modal_launcher_python_script( + modal_deployment, project_root: Path, gpu_type: ModalGPU +): """ Test ModalLauncher with a real Python script using examples/identity_py. """ @@ -117,7 +133,7 @@ async def test_modal_launcher_python_script(modal_deployment, project_root: Path task=task_definition.task, submission_content=submission_content, arch=GPU_TO_SM[gpu_type.name], - mode=SubmissionMode.TEST + mode=SubmissionMode.TEST, ) result = await launcher.run_submission(config, gpu_type, reporter) @@ -133,8 +149,8 @@ async def test_modal_launcher_python_script(modal_deployment, project_root: Path assert result.system.torch.startswith("2.7") # update when the image changes # Test run structure - assert 'test' in result.runs - test_run = result.runs['test'] + assert "test" in result.runs + test_run = result.runs["test"] # For Python runs, compilation is None assert test_run.compilation is None @@ -148,20 +164,20 @@ async def test_modal_launcher_python_script(modal_deployment, project_root: Path assert test_run.run.duration > 0 # Test need to succeed - assert test_run.run.result['check'] == 'pass' - test_count = int(test_run.run.result['test-count']) + assert test_run.run.result["check"] == "pass" + test_count = int(test_run.run.result["test-count"]) assert test_count == 5 for i in range(test_count): - assert test_run.run.result[f'test.{i}.status'] == 'pass' - assert 'size:' in test_run.run.result[f'test.{i}.spec'] - assert 'seed:' in test_run.run.result[f'test.{i}.spec'] + assert test_run.run.result[f"test.{i}.status"] == "pass" + assert "size:" in test_run.run.result[f"test.{i}.spec"] + assert "seed:" in test_run.run.result[f"test.{i}.spec"] # sanity check for timings assert test_run.start < test_run.end # check messages - assert reporter.messages == ['⏳ Waiting for Modal run to finish...'] - assert reporter.updates == ['✅ Waiting for modal run to finish... Done'] + assert reporter.messages == ["⏳ Waiting for Modal run to finish..."] + assert reporter.updates == ["✅ Waiting for modal run to finish... Done"] @pytest.mark.integration @@ -169,8 +185,8 @@ async def test_modal_launcher_python_script(modal_deployment, project_root: Path @pytest.mark.parametrize("script", ["cheat-fd.py", "cheat-input.py", "cheat-rng.py"]) async def test_modal_launcher_failing_script(modal_deployment, project_root: Path, script: str): """ - Test ModalLauncher with a real Python scripts that are designed to be wrong. - """ + Test ModalLauncher with a real Python scripts that are designed to be wrong. + """ launcher = ModalLauncher(add_include_dirs=[]) reporter = MockProgressReporter("progress") gpu_type = ModalGPU.T4 @@ -198,4 +214,4 @@ async def test_modal_launcher_failing_script(modal_deployment, project_root: Pat # Basic structure and success assert result.success, f"Expected successful run, got: {result.error}" assert result.error == "" - assert result.runs['test'].run.passed is False or result.runs['benchmark'].run.passed is False + assert result.runs["test"].run.passed is False or result.runs["benchmark"].run.passed is False From c3e57bc2e8781b6ecad338fd695b75ccdf7237aa Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Tue, 26 Aug 2025 00:03:34 +0200 Subject: [PATCH 9/9] more interesting tests --- .../vectoradd_py/submission_cuda_inline.py | 57 +------------------ tests/test_modal.py | 19 ++++--- 2 files changed, 14 insertions(+), 62 deletions(-) diff --git a/examples/vectoradd_py/submission_cuda_inline.py b/examples/vectoradd_py/submission_cuda_inline.py index d505d2a4..51841871 100644 --- a/examples/vectoradd_py/submission_cuda_inline.py +++ b/examples/vectoradd_py/submission_cuda_inline.py @@ -54,7 +54,6 @@ """ - add_module = load_inline( name='add_cuda', cpp_sources=add_cpp_source, @@ -63,62 +62,12 @@ verbose=True, ) + def add(A, B): if not A.is_cuda or not B.is_cuda: raise RuntimeError("Both tensors must be on GPU") return add_module.add_cuda(A, B) -def custom_kernel(data: input_t) -> output_t: - """ - Custom implementation of vector addition using CUDA inline function. - Args: - inputs: List of pairs of tensors [A, B] to be added. - Returns: - List of tensors containing element-wise sums. - """ - A, B = data - assert A.is_cuda and B.is_cuda, "Input tensors must be on GPU" - assert A.shape == B.shape, "Input tensors must have the same shape" - assert A.dtype == torch.float16 and B.dtype == torch.float16, "Input tensors must be float16" - - M, N = A.shape - C = torch.empty_like(A) - - n_threads = 256 - n_blocks = (M * N + n_threads - 1) // n_threads - - cuda_source = """ - extern "C" __global__ void add_kernel( - const half* __restrict__ A, - const half* __restrict__ B, - half* __restrict__ C, - const int n_elements - ) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < n_elements) { - C[idx] = __hadd(A[idx], B[idx]); - } - } - """ - - module = torch.utils.cpp_extension.load_inline( - name=f"add_kernel_{M}_{N}", - cpp_sources="", - cuda_sources=cuda_source, - functions=["add_kernel"], - with_cuda=True, - extra_cuda_cflags=["-arch=sm_70"], # Adjust based on your GPU architecture - ) - - module.add_kernel( - cuda_stream=torch.cuda.current_stream(), - args=[ - A.reshape(-1), B.reshape(-1), C.reshape(-1), - M * N, - ], - blocks=n_blocks, - threads=n_threads, - ) - - return C +def custom_kernel(data: input_t) -> output_t: + return add(*data) diff --git a/tests/test_modal.py b/tests/test_modal.py index 72873cf4..9fa1725e 100644 --- a/tests/test_modal.py +++ b/tests/test_modal.py @@ -1,6 +1,7 @@ import os import subprocess from pathlib import Path +from typing import Tuple import pytest @@ -109,8 +110,15 @@ def modal_deployment(project_root: Path): @pytest.mark.parametrize( "gpu_type", [ModalGPU.T4, ModalGPU.L4, ModalGPU.A100, ModalGPU.H100, ModalGPU.B200] ) +@pytest.mark.parametrize( + "task", + [ + ("vectoradd_py", "submission_cuda_inline.py"), + ("vectoradd_py", "submission_triton.py"), + ], +) async def test_modal_launcher_python_script( - modal_deployment, project_root: Path, gpu_type: ModalGPU + modal_deployment, project_root: Path, gpu_type: ModalGPU, task: Tuple[str, str] ): """ Test ModalLauncher with a real Python script using examples/identity_py. @@ -119,7 +127,7 @@ async def test_modal_launcher_python_script( reporter = MockProgressReporter("progress") # Load the real identity_py task - task_path = project_root / "examples" / "identity_py" + task_path = project_root / "examples" / task[0] if not task_path.exists(): pytest.skip("examples/identity_py not found - skipping Modal integration test") @@ -127,7 +135,7 @@ async def test_modal_launcher_python_script( task_definition = make_task_definition(task_path) # Use the actual working submission from the examples - submission_content = (task_path / "submission.py").read_text() + submission_content = (task_path / task[1]).read_text() config = build_task_config( task=task_definition.task, @@ -152,15 +160,10 @@ async def test_modal_launcher_python_script( assert "test" in result.runs test_run = result.runs["test"] - # For Python runs, compilation is None - assert test_run.compilation is None - # Run needs to succeed assert test_run.run.success is True assert test_run.run.passed is True assert test_run.run.exit_code == 0 - assert test_run.run.stdout == "" - assert test_run.run.stderr == "" assert test_run.run.duration > 0 # Test need to succeed