From 252762a9d46e39915742795020b336b726423b36 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 2 Aug 2025 14:02:58 +0200 Subject: [PATCH 1/6] require database configuration using URL --- src/kernelbot/env.py | 5 ----- src/libkernelbot/backend.py | 7 +------ src/libkernelbot/leaderboard_db.py | 17 ++--------------- 3 files changed, 3 insertions(+), 26 deletions(-) diff --git a/src/kernelbot/env.py b/src/kernelbot/env.py index b2e457da..b1758b63 100644 --- a/src/kernelbot/env.py +++ b/src/kernelbot/env.py @@ -45,10 +45,5 @@ def init_environment(): env.PROBLEM_DEV_DIR = os.getenv("PROBLEM_DEV_DIR", "examples") # PostgreSQL-specific constants -env.POSTGRES_HOST = os.getenv("POSTGRES_HOST") -env.POSTGRES_DATABASE = os.getenv("POSTGRES_DATABASE") -env.POSTGRES_USER = os.getenv("POSTGRES_USER") -env.POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD") -env.POSTGRES_PORT = os.getenv("POSTGRES_PORT") env.DATABASE_URL = os.getenv("DATABASE_URL") env.DISABLE_SSL = os.getenv("DISABLE_SSL") diff --git a/src/libkernelbot/backend.py b/src/libkernelbot/backend.py index 3874b142..ec1b8769 100644 --- a/src/libkernelbot/backend.py +++ b/src/libkernelbot/backend.py @@ -29,13 +29,8 @@ def __init__( ): self.debug_mode = debug_mode self.db = LeaderboardDB( - env.POSTGRES_HOST, - env.POSTGRES_DATABASE, - env.POSTGRES_USER, - env.POSTGRES_PASSWORD, - env.POSTGRES_PORT, url=env.DATABASE_URL, - ssl_mode="require" if not env.DISABLE_SSL else "disable", + ssl_mode="require" if not getattr(env, "DISABLE_SSL", "") else "disable", ) try: diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index c322ab68..93284fbc 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -18,17 +18,8 @@ class LeaderboardDB: - def __init__( - self, host: str, database: str, user: str, password: str, port: str, url: str, ssl_mode: str - ): + def __init__(self, url: str, ssl_mode: str): """Initialize database connection parameters""" - self.connection_params = { - "host": host, - "database": database, - "user": user, - "password": password, - "port": port, - } self.url = url self.ssl_mode = ssl_mode self.connection: Optional[psycopg2.extensions.connection] = None @@ -39,11 +30,7 @@ def __init__( def connect(self) -> bool: """Establish connection to the database""" try: - self.connection = ( - psycopg2.connect(self.url, sslmode=self.ssl_mode) - if self.url - else psycopg2.connect(**self.connection_params) - ) + self.connection = psycopg2.connect(self.url, sslmode=self.ssl_mode) self.cursor = self.connection.cursor() return True except psycopg2.Error as e: From 824f9b3aa3a6e79a3d0304afc5c22e952e6f8c8c Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 2 Aug 2025 14:03:30 +0200 Subject: [PATCH 2/6] update handling of fixtures using conftest.py --- src/libkernelbot/report.py | 7 +- unit-tests/conftest.py | 124 ++++++++++++++++++++++++++++++ unit-tests/test_leaderboard_db.py | 68 ---------------- unit-tests/test_report.py | 43 +++++++---- unit-tests/test_task.py | 38 --------- 5 files changed, 158 insertions(+), 122 deletions(-) create mode 100644 unit-tests/conftest.py diff --git a/src/libkernelbot/report.py b/src/libkernelbot/report.py index dd3a2f32..ec52e7bd 100644 --- a/src/libkernelbot/report.py +++ b/src/libkernelbot/report.py @@ -33,8 +33,8 @@ class Log: class RunResultReport: - def __init__(self): - self.data: List[Text | Log] = [] + def __init__(self, data=None): + self.data: List[Text | Log] = data or [] def add_text(self, section: str): self.data.append(Text(section)) @@ -42,6 +42,9 @@ def add_text(self, section: str): def add_log(self, header: str, log: str): self.data.append(Log(header, log)) + def __repr__(self): + return f"RunResultReport(data={self.data})" + def _generate_compile_report(reporter: "RunResultReport", comp: CompileResult): message = "" diff --git a/unit-tests/conftest.py b/unit-tests/conftest.py new file mode 100644 index 00000000..170b0196 --- /dev/null +++ b/unit-tests/conftest.py @@ -0,0 +1,124 @@ +import subprocess +import time +from pathlib import Path + +import pytest + +DATABASE_URL = "postgresql://postgres:postgres@localhost:5433/clusterdev" + + +@pytest.fixture(scope="module") +def docker_compose(): + tgt_path = Path.cwd() + if tgt_path.name == "unit-tests": + tgt_path = tgt_path.parent + + """Start a test database and run migrations""" + subprocess.check_call( + ["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"], cwd=tgt_path + ) + + try: + # Wait for migrations to finish + while True: + result = subprocess.run( + ["docker", "compose", "-f", "docker-compose.test.yml", "ps", "-q", "migrate-test"], + capture_output=True, + text=True, + cwd=tgt_path, + ) + + if not result.stdout.strip(): # Container no longer exists + break + time.sleep(1) + + # Check if migrations succeeded + logs = subprocess.run( + ["docker", "compose", "-f", "docker-compose.test.yml", "logs", "migrate-test"], + capture_output=True, + text=True, + cwd=tgt_path, + ) + + if "error" in logs.stdout.lower(): + raise Exception(f"Migrations failed: {logs.stdout}") + + yield + finally: + subprocess.run( + ["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"], cwd=tgt_path + ) + + +def _nuke_contents(db): + db.cursor.execute( + "TRUNCATE leaderboard.code_files, leaderboard.submission, leaderboard.runs, " + "leaderboard.leaderboard, leaderboard.user_info, leaderboard.templates, " + "leaderboard.gpu_type RESTART IDENTITY CASCADE" + ) + db.connection.commit() + + +@pytest.fixture() +def database(docker_compose): + from libkernelbot import leaderboard_db + + db = leaderboard_db.LeaderboardDB( + url=DATABASE_URL, + ssl_mode="disable", + ) + + with db: + _nuke_contents(db) + yield db + with db: + _nuke_contents(db) + + +@pytest.fixture() +def bot(docker_compose, database): + from types import SimpleNamespace + + from libkernelbot import backend + + env = SimpleNamespace() + env.DATABASE_URL = DATABASE_URL + env.DISABLE_SSL = "1" + yield backend.KernelBackend(env, False) + + +TASK_YAML = """ +lang: py +description: "Test task description" +ranking_by: geom +test_timeout: 120 +files: + - name: "kernel.py" + source: "kernel.py" + - name: "submission.py" + source: "@SUBMISSION@" +config: + main: "kernel.py" +tests: + - input_size: 1000 + dtype: "float32" +benchmarks: + - input_size: 10000 + dtype: "float32" +templates: + Python: "template.py" + CUDA: "template.cu" +""" + + +@pytest.fixture +def task_directory(tmp_path): + """Create a temporary directory structure for task definition testing""" + # Create source files + Path.write_text(tmp_path / "kernel.py", "def kernel(): pass") + Path.write_text(tmp_path / "template.py", "# Python template") + Path.write_text(tmp_path / "template.cu", "// CUDA template") + + # Create task.yml + Path.write_text(tmp_path / "task.yml", TASK_YAML) + return tmp_path diff --git a/unit-tests/test_leaderboard_db.py b/unit-tests/test_leaderboard_db.py index 939f69b9..741515a8 100644 --- a/unit-tests/test_leaderboard_db.py +++ b/unit-tests/test_leaderboard_db.py @@ -1,77 +1,13 @@ import copy import dataclasses import datetime -import subprocess -import time import pytest from test_report import sample_compile_result, sample_run_result, sample_system_info -from test_task import task_directory from libkernelbot import leaderboard_db from libkernelbot.utils import KernelBotError -DATABASE_URL = "postgresql://postgres:postgres@localhost:5433/clusterdev" - - -@pytest.fixture(scope="module") -def docker_compose(): - """Start a test database and run migrations""" - subprocess.check_call(["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"]) - - try: - # Wait for migrations to finish - while True: - result = subprocess.run( - ["docker", "compose", "-f", "docker-compose.test.yml", "ps", "-q", "migrate-test"], - capture_output=True, - text=True, - ) - - if not result.stdout.strip(): # Container no longer exists - break - time.sleep(1) - - # Check if migrations succeeded - logs = subprocess.run( - ["docker", "compose", "-f", "docker-compose.test.yml", "logs", "migrate-test"], - capture_output=True, - text=True, - ) - - if "error" in logs.stdout.lower(): - raise Exception(f"Migrations failed: {logs.stdout}") - - yield leaderboard_db.LeaderboardDB( - host="", - database="", - port="", - user="", - password="", - url=DATABASE_URL, - ssl_mode="disable", - ) - finally: - subprocess.run(["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"]) - - -def _nuke_contents(db): - db.cursor.execute( - "TRUNCATE leaderboard.code_files, leaderboard.submission, leaderboard.runs, " - "leaderboard.leaderboard, leaderboard.user_info, leaderboard.templates, " - "leaderboard.gpu_type RESTART IDENTITY CASCADE" - ) - db.connection.commit() - - -@pytest.fixture() -def database(docker_compose): - with docker_compose as db: - _nuke_contents(db) - yield docker_compose - with docker_compose as db: - _nuke_contents(db) - def _submit_leaderboard(database, task_directory): """ @@ -628,7 +564,3 @@ def test_generate_stats(database, submit_leaderboard): "sub_waiting": 0, "total_runtime.A100": datetime.timedelta(seconds=35), } - - -# this is her just to make ruff leave pytest fixtures alone -__all__ = [task_directory] diff --git a/unit-tests/test_report.py b/unit-tests/test_report.py index e9961a9a..ea148267 100644 --- a/unit-tests/test_report.py +++ b/unit-tests/test_report.py @@ -38,16 +38,9 @@ def sample_compile_result() -> CompileResult: ) -def sample_run_result() -> RunResult: - return RunResult( - success=True, - passed=True, - command="./test", - exit_code=0, - duration=1.5, - stdout="All tests passed", - stderr="", - result={ +def sample_run_result(mode="test") -> RunResult: + if mode == "test": + result = { "test-count": "3", "test.0.status": "pass", "test.0.spec": "Test addition", @@ -57,16 +50,38 @@ def sample_run_result() -> RunResult: "test.2.status": "fail", "test.2.spec": "Test division", "test.2.error": "Division by zero", - }, + } + elif mode == "benchmark": + result = { + "benchmark-count": "1", + "benchmark.0.status": "pass", + "benchmark.0.spec": "Matrix multiplication", + "benchmark.0.mean": "1.5", + "benchmark.0.err": "0.1", + "benchmark.0.best": "1.3", + "benchmark.0.worst": "1.8", + } + else: + assert False, f"Invalid mode: {mode}" + + return RunResult( + success=True, + passed=True, + command="./test", + exit_code=0, + duration=1.5, + stdout="log stdout", + stderr="", + result=result, ) -def create_eval_result() -> EvalResult: +def create_eval_result(mode="test") -> EvalResult: return EvalResult( start=datetime.datetime.now() - datetime.timedelta(minutes=5), end=datetime.datetime.now(), compilation=sample_compile_result(), - run=sample_run_result(), + run=sample_run_result(mode), ) @@ -618,7 +633,7 @@ def test_generate_report_leaderboard_failure(sample_full_result: FullResult): "./test```\n" "**timed out** after 10.00 seconds." ), - Log(header="Program stdout", content="All tests passed"), + Log(header="Program stdout", content="log stdout"), ] diff --git a/unit-tests/test_task.py b/unit-tests/test_task.py index f3abc138..6bbd73a8 100644 --- a/unit-tests/test_task.py +++ b/unit-tests/test_task.py @@ -1,6 +1,5 @@ import copy import json -from pathlib import Path import pytest @@ -214,43 +213,6 @@ def test_build_task_config_cuda(): assert result == expected -TASK_YAML = """ -lang: py -description: "Test task description" -ranking_by: geom -test_timeout: 120 -files: - - name: "kernel.py" - source: "kernel.py" - - name: "submission.py" - source: "@SUBMISSION@" -config: - main: "kernel.py" -tests: - - input_size: 1000 - dtype: "float32" -benchmarks: - - input_size: 10000 - dtype: "float32" -templates: - Python: "template.py" - CUDA: "template.cu" -""" - - -@pytest.fixture -def task_directory(tmp_path): - """Create a temporary directory structure for task definition testing""" - # Create source files - Path.write_text(tmp_path / "kernel.py", "def kernel(): pass") - Path.write_text(tmp_path / "template.py", "# Python template") - Path.write_text(tmp_path / "template.cu", "// CUDA template") - - # Create task.yml - Path.write_text(tmp_path / "task.yml", TASK_YAML) - return tmp_path - - def test_make_task_definition(task_directory): """Test make_task_definition with a complete YAML structure""" From 59e744108cb593067bfb9000eef0ce860bb219af Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 2 Aug 2025 14:03:42 +0200 Subject: [PATCH 3/6] add simple backend tests --- pyproject.toml | 5 +- unit-tests/test_backend.py | 210 +++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 2 deletions(-) create mode 100644 unit-tests/test_backend.py diff --git a/pyproject.toml b/pyproject.toml index 2157d2c3..bb72bd06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,14 +30,15 @@ dev = [ "ruff", "pre-commit", "pytest", - "pytest-coverage" + "pytest-coverage", + "pytest-asyncio" ] [tool.setuptools.packages.find] where = ["src"] [tool.coverage.run] -omit = ["src/libkernelbot/backend.py", "src/libkernelbot/run_eval.py", "src/libkernelbot/launchers/*.py"] +omit = ["src/libkernelbot/run_eval.py", "src/libkernelbot/launchers/*.py"] relative_files = true [tool.coverage.report] diff --git a/unit-tests/test_backend.py b/unit-tests/test_backend.py new file mode 100644 index 00000000..2560d54c --- /dev/null +++ b/unit-tests/test_backend.py @@ -0,0 +1,210 @@ +import datetime +from decimal import Decimal +from unittest.mock import AsyncMock, MagicMock + +import pytest +from test_leaderboard_db import _submit_leaderboard +from test_report import create_eval_result, sample_system_info + +from libkernelbot import backend, consts, report +from libkernelbot.run_eval import FullResult + + +class MockProgressReporter(report.RunProgressReporter): + def __init__(self, title): + super().__init__(title) + self._update_message = AsyncMock() + self.display_report = AsyncMock() + + +@pytest.mark.asyncio +async def test_handle_submission(bot: backend.KernelBackend, task_directory): + _submit_leaderboard(bot.db, task_directory) + with bot.db as db: + task = db.get_leaderboard("submit-leaderboard")["task"] + mock_launcher = MagicMock(spec=backend.Launcher) + mock_launcher.name = "launcher" + mock_launcher.gpus = [consts.ModalGPU.A100] + mock_launcher.run_submission = AsyncMock( + return_value=FullResult( + success=True, error="", system=sample_system_info(), runs={"test": create_eval_result()} + ) + ) + bot.register_launcher(mock_launcher) + + reporter = MockProgressReporter("report") + + await bot.handle_submission( + consts.ModalGPU.A100, + reporter, + "pass", + "submit.py", + task, + consts.SubmissionMode.LEADERBOARD, + -1, + ) + + assert reporter.title == "report ✅ success" + assert reporter.lines == [ + "> ✅ Compilation successful", + "> ✅ Testing successful", + "> ❌ Benchmarks missing", + "> ❌ Leaderboard missing", + ] + + call_args = reporter.display_report.call_args[0] + assert call_args[0] == "❌ submit.py on A100 (launcher)" + from libkernelbot.report import Log, Text + + assert call_args[1].data == [ + Text( + text="\n" + "Running on:\n" + "* GPU: `NVIDIA RTX 4090`\n" + "* CPU: `Intel i9-12900K`\n" + "* Platform: `Linux-5.15.0`\n" + "* Torch: `2.0.1+cu118`\n" + ), + Log( + header="✅ Passed 3/3 tests", + content="✅ Test addition\n" + "> Addition works correctly\n" + "✅ Test multiplication\n" + "❌ Test division\n" + "> Division by zero", + ), + Log(header="Program stdout", content="log stdout"), + ] + + assert mock_launcher.run_submission.call_count == 1 + assert mock_launcher.run_submission.call_args[0][0] == { + "arch": "80", + "benchmark_timeout": 180, + "benchmarks": [{"dtype": "float32", "input_size": 10000}], + "lang": "py", + "main": "kernel.py", + "mode": "leaderboard", + "ranked_timeout": 180, + "ranking_by": "geom", + "seed": None, + "sources": {"kernel.py": "def kernel(): pass", "submission.py": "pass"}, + "test_timeout": 120, + "tests": [{"dtype": "float32", "input_size": 1000}], + } + + with bot.db as db: + db.cursor.execute("SELECT COUNT(*) FROM leaderboard.runs") + assert db.cursor.fetchone()[0] == 0 + + +@pytest.mark.asyncio +async def test_submit_leaderboard(bot: backend.KernelBackend, task_directory): + _submit_leaderboard(bot.db, task_directory) + submit_time = datetime.datetime.now(tz=datetime.timezone.utc) + with bot.db as db: + task = db.get_leaderboard("submit-leaderboard")["task"] + s_id = db.create_submission( + "submit-leaderboard", + "submit.py", + 34, + "pass", + submit_time, + ) + mock_launcher = MagicMock(spec=backend.Launcher) + mock_launcher.name = "launcher" + mock_launcher.gpus = [consts.ModalGPU.A100] + eval_result = create_eval_result("benchmark") + mock_launcher.run_submission = AsyncMock( + return_value=FullResult( + success=True, error="", system=sample_system_info(), runs={"leaderboard": eval_result} + ) + ) + bot.register_launcher(mock_launcher) + + reporter = MockProgressReporter("report") + + await bot.submit_leaderboard( + s_id, + "pass", + "submit.py", + consts.ModalGPU.A100, + reporter, + task, + consts.SubmissionMode.LEADERBOARD, + seed=1337, + ) + + # make sure we're not messing up the original seed + assert task.seed is None + + assert mock_launcher.run_submission.call_count == 1 + assert mock_launcher.run_submission.call_args[0][0] == { + "arch": "80", + "benchmark_timeout": 180, + "benchmarks": [{"dtype": "float32", "input_size": 10000}], + "lang": "py", + "main": "kernel.py", + "mode": "leaderboard", + "ranked_timeout": 180, + "ranking_by": "geom", + "seed": 1337, + "sources": {"kernel.py": "def kernel(): pass", "submission.py": "pass"}, + "test_timeout": 120, + "tests": [{"dtype": "float32", "input_size": 1000}], + } + + with bot.db as db: + sub = db.get_submission_by_id(s_id) + assert sub == { + "code": "pass", + "done": False, + "file_name": "submit.py", + "leaderboard_id": s_id, + "leaderboard_name": "submit-leaderboard", + "runs": [ + { + "compilation": { + "command": "nvcc -o test test.cu", + "exit_code": 0, + "nvcc_found": True, + "nvcc_version": "11.8", + "stderr": "", + "stdout": "", + "success": True, + }, + "end_time": eval_result.end.replace(tzinfo=datetime.timezone.utc), + "meta": { + "command": "./test", + "duration": 1.5, + "exit_code": 0, + "stderr": "", + "stdout": "log stdout", + "success": True, + }, + "mode": "leaderboard", + "passed": True, + "result": { + "benchmark-count": "1", + "benchmark.0.best": "1.3", + "benchmark.0.err": "0.1", + "benchmark.0.mean": "1.5", + "benchmark.0.spec": "Matrix multiplication", + "benchmark.0.status": "pass", + "benchmark.0.worst": "1.8", + }, + "runner": "A100", + "score": Decimal("1.5e-9"), + "secret": False, + "start_time": eval_result.start.replace(tzinfo=datetime.timezone.utc), + "system": { + "cpu": "Intel i9-12900K", + "gpu": "NVIDIA RTX 4090", + "platform": "Linux-5.15.0", + "torch": "2.0.1+cu118", + }, + } + ], + "submission_id": 1, + "submission_time": submit_time, + "user_id": "34", + } From 6b9e2797dfb8fc3f8ab8036d227417ecd1d9f91b Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 2 Aug 2025 16:54:25 +0200 Subject: [PATCH 4/6] add backend tests --- unit-tests/test_backend.py | 190 +++++++++++++++++++++++++++++++++---- 1 file changed, 171 insertions(+), 19 deletions(-) diff --git a/unit-tests/test_backend.py b/unit-tests/test_backend.py index 2560d54c..dcc4e8d6 100644 --- a/unit-tests/test_backend.py +++ b/unit-tests/test_backend.py @@ -1,6 +1,6 @@ import datetime from decimal import Decimal -from unittest.mock import AsyncMock, MagicMock +from unittest.mock import ANY, AsyncMock, MagicMock import pytest from test_leaderboard_db import _submit_leaderboard @@ -17,20 +17,35 @@ def __init__(self, title): self.display_report = AsyncMock() -@pytest.mark.asyncio -async def test_handle_submission(bot: backend.KernelBackend, task_directory): - _submit_leaderboard(bot.db, task_directory) - with bot.db as db: - task = db.get_leaderboard("submit-leaderboard")["task"] +class MockMultReporter(report.MultiProgressReporter): + def __init__(self): + super().__init__() + self.show = AsyncMock() + self.make_message = MagicMock() + self.reporter_list: list[MockProgressReporter] = [] + + def add_run(self, title: str) -> "report.RunProgressReporter": + self.reporter_list.append(MockProgressReporter(title)) + return self.reporter_list[-1] + + +def _mock_launcher(bot: backend.KernelBackend, runs: dict, name="launcher"): mock_launcher = MagicMock(spec=backend.Launcher) - mock_launcher.name = "launcher" + mock_launcher.name = name mock_launcher.gpus = [consts.ModalGPU.A100] mock_launcher.run_submission = AsyncMock( - return_value=FullResult( - success=True, error="", system=sample_system_info(), runs={"test": create_eval_result()} - ) + return_value=FullResult(success=True, error="", system=sample_system_info(), runs=runs) ) bot.register_launcher(mock_launcher) + return mock_launcher + + +@pytest.mark.asyncio +async def test_handle_submission(bot: backend.KernelBackend, task_directory): + _submit_leaderboard(bot.db, task_directory) + with bot.db as db: + task = db.get_leaderboard("submit-leaderboard")["task"] + mock_launcher = _mock_launcher(bot, {"test": create_eval_result()}) reporter = MockProgressReporter("report") @@ -110,16 +125,8 @@ async def test_submit_leaderboard(bot: backend.KernelBackend, task_directory): "pass", submit_time, ) - mock_launcher = MagicMock(spec=backend.Launcher) - mock_launcher.name = "launcher" - mock_launcher.gpus = [consts.ModalGPU.A100] eval_result = create_eval_result("benchmark") - mock_launcher.run_submission = AsyncMock( - return_value=FullResult( - success=True, error="", system=sample_system_info(), runs={"leaderboard": eval_result} - ) - ) - bot.register_launcher(mock_launcher) + mock_launcher = _mock_launcher(bot, {"leaderboard": eval_result}) reporter = MockProgressReporter("report") @@ -208,3 +215,148 @@ async def test_submit_leaderboard(bot: backend.KernelBackend, task_directory): "submission_time": submit_time, "user_id": "34", } + + +@pytest.mark.asyncio +async def test_submit_full(bot: backend.KernelBackend, task_directory): + _submit_leaderboard(bot.db, task_directory) + with bot.db as db: + task = db.get_leaderboard("submit-leaderboard")["task"] + + eval_result = create_eval_result("benchmark") + mock_launcher = _mock_launcher(bot, {"leaderboard": eval_result}) + + from libkernelbot.submission import ProcessedSubmissionRequest + + req = ProcessedSubmissionRequest( + code="pass", + file_name="submission.py", + user_id=5, + user_name="user", + gpus=["A100"], + leaderboard="submit-leaderboard", + task=task, + secret_seed=42, + task_gpus=["A100", "H100"], + ) + reporter = MockMultReporter() + s_id, results = await bot.submit_full( + req, mode=consts.SubmissionMode.LEADERBOARD, reporter=reporter + ) + + expected_result = mock_launcher.run_submission.return_value + assert len(results) == 2 + assert results == [expected_result, expected_result] + + r1, r2 = reporter.reporter_list + assert r1.lines == [ + "> ✅ Compilation successful", + "> ❌ Tests missing", + "> ❌ Benchmarks missing", + "> ✅ Leaderboard run successful", + ] + assert r2.lines == [ + "> ✅ Compilation successful", + "> ❌ Tests missing", + "> ❌ Benchmarks missing", + "> ✅ Leaderboard run successful", + ] + assert r1.title == "A100 on Modal ✅ success" + assert r2.title == "A100 on Modal (secret) ✅ success" + + with bot.db as db: + db_results = db.get_submission_by_id(s_id) + assert db_results == { + "code": "pass", + "done": True, + "file_name": "submission.py", + "leaderboard_id": 1, + "leaderboard_name": "submit-leaderboard", + "runs": [ + { + "compilation": { + "command": "nvcc -o test test.cu", + "exit_code": 0, + "nvcc_found": True, + "nvcc_version": "11.8", + "stderr": "", + "stdout": "", + "success": True, + }, + "end_time": ANY, + "meta": { + "command": "./test", + "duration": 1.5, + "exit_code": 0, + "stderr": "", + "stdout": "log stdout", + "success": True, + }, + "mode": "leaderboard", + "passed": True, + "result": { + "benchmark-count": "1", + "benchmark.0.best": "1.3", + "benchmark.0.err": "0.1", + "benchmark.0.mean": "1.5", + "benchmark.0.spec": "Matrix multiplication", + "benchmark.0.status": "pass", + "benchmark.0.worst": "1.8", + }, + "runner": "A100", + "score": Decimal("1.5E-9"), + "secret": False, + "start_time": ANY, + "system": { + "cpu": "Intel i9-12900K", + "gpu": "NVIDIA RTX 4090", + "platform": "Linux-5.15.0", + "torch": "2.0.1+cu118", + }, + }, + { + "compilation": { + "command": "nvcc -o test test.cu", + "exit_code": 0, + "nvcc_found": True, + "nvcc_version": "11.8", + "stderr": "", + "stdout": "", + "success": True, + }, + "end_time": ANY, + "meta": { + "command": "./test", + "duration": 1.5, + "exit_code": 0, + "stderr": "", + "stdout": "log stdout", + "success": True, + }, + "mode": "leaderboard", + "passed": True, + "result": { + "benchmark-count": "1", + "benchmark.0.best": "1.3", + "benchmark.0.err": "0.1", + "benchmark.0.mean": "1.5", + "benchmark.0.spec": "Matrix multiplication", + "benchmark.0.status": "pass", + "benchmark.0.worst": "1.8", + }, + "runner": "A100", + "score": Decimal("1.5E-9"), + "secret": True, + "start_time": ANY, + "system": { + "cpu": "Intel i9-12900K", + "gpu": "NVIDIA RTX 4090", + "platform": "Linux-5.15.0", + "torch": "2.0.1+cu118", + }, + }, + ], + "submission_id": 1, + "submission_time": ANY, + "user_id": "5", + } From 2b30ec0b04ad20595055875bd62585bc3e936454 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:08:41 +0200 Subject: [PATCH 5/6] extract function to libkernelbot --- src/kernelbot/cogs/leaderboard_cog.py | 37 +----------------- src/libkernelbot/submission.py | 34 ++++++++++++++++- unit-tests/test_report.py | 2 +- unit-tests/test_submission.py | 55 ++++++++++++++++++++++++++- 4 files changed, 90 insertions(+), 38 deletions(-) diff --git a/src/kernelbot/cogs/leaderboard_cog.py b/src/kernelbot/cogs/leaderboard_cog.py index 6cb8bb40..84dbcc5b 100644 --- a/src/kernelbot/cogs/leaderboard_cog.py +++ b/src/kernelbot/cogs/leaderboard_cog.py @@ -19,10 +19,9 @@ from libkernelbot.leaderboard_db import ( LeaderboardItem, LeaderboardRankedEntry, - RunItem, SubmissionItem, ) -from libkernelbot.submission import SubmissionRequest, prepare_submission +from libkernelbot.submission import SubmissionRequest, generate_run_verdict, prepare_submission from libkernelbot.utils import format_time, setup_logging if TYPE_CHECKING: @@ -57,38 +56,6 @@ async def select_gpu_view( await view.wait() return view - def generate_run_verdict(self, run: RunItem, sub_data: SubmissionItem): - medals = {1: "🥇 First", 2: "🥈 Second", 3: "🥉 Third"} - - # get the competition - with self.bot.leaderboard_db as db: - competition = db.get_leaderboard_submissions( - sub_data["leaderboard_name"], run["runner"] - ) - # compare against the competition - other_by_user = False - run_time = float(run["score"]) - score_text = format_time(run_time * 1e9) - - for entry in competition: - # can we find our own run? Only if it is the fastest submission by this user - if entry["submission_id"] == sub_data["submission_id"]: - rank = entry["rank"] - if 1 <= rank <= 3: - return f"> {medals[rank]} place on {run['runner']}: {score_text}" - elif rank <= 10: - return f"> {rank}th place on {run['runner']}: {score_text}" - else: - return f"> Personal best on {run['runner']}: {score_text}" - elif entry["user_id"] == sub_data["user_id"]: - other_by_user = True - if other_by_user: - # User already has a submission that is faster - return f"> Successful on {run['runner']}: {score_text}" - else: - # no submission by the user exists - return f"> 🍾 First successful submission on {run['runner']}: {score_text}" - async def post_submit_hook(self, interaction: discord.Interaction, sub_id: int): with self.bot.leaderboard_db as db: sub_data: SubmissionItem = db.get_submission_by_id(sub_id) @@ -100,7 +67,7 @@ async def post_submit_hook(self, interaction: discord.Interaction, sub_id: int): and run["mode"] == SubmissionMode.LEADERBOARD.value and run["passed"] ): - result_lines.append(self.generate_run_verdict(run, sub_data)) + result_lines.append(generate_run_verdict(self.bot.backend, run, sub_data)) if len(result_lines) > 0: await send_discord_message( diff --git a/src/libkernelbot/submission.py b/src/libkernelbot/submission.py index 67202890..1f7c23cc 100644 --- a/src/libkernelbot/submission.py +++ b/src/libkernelbot/submission.py @@ -8,10 +8,11 @@ from better_profanity import profanity from libkernelbot.consts import RankCriterion +from libkernelbot.db_types import RunItem, SubmissionItem from libkernelbot.leaderboard_db import LeaderboardDB, LeaderboardItem from libkernelbot.run_eval import FullResult from libkernelbot.task import LeaderboardTask -from libkernelbot.utils import KernelBotError, setup_logging +from libkernelbot.utils import KernelBotError, format_time, setup_logging if typing.TYPE_CHECKING: from backend import KernelBackend @@ -194,3 +195,34 @@ def compute_score(result: FullResult, task: LeaderboardTask, submission_id: int) raise KernelBotError(f"Invalid ranking criterion {task.ranking_by}") return score + + +def generate_run_verdict(backend: "KernelBackend", run: RunItem, sub_data: SubmissionItem): + medals = {1: "🥇 First", 2: "🥈 Second", 3: "🥉 Third"} + + # get the competition + with backend.db as db: + competition = db.get_leaderboard_submissions(sub_data["leaderboard_name"], run["runner"]) + # compare against the competition + other_by_user = False + run_time = float(run["score"]) + score_text = format_time(run_time * 1e9) + + for entry in competition: + # can we find our own run? Only if it is the fastest submission by this user + if entry["submission_id"] == sub_data["submission_id"]: + rank = entry["rank"] + if 1 <= rank <= 3: + return f"> {medals[rank]} place on {run['runner']}: {score_text}" + elif rank <= 10: + return f"> {rank}th place on {run['runner']}: {score_text}" + else: + return f"> Personal best on {run['runner']}: {score_text}" + elif entry["user_id"] == sub_data["user_id"]: + other_by_user = True + if other_by_user: + # User already has a submission that is faster + return f"> Successful on {run['runner']}: {score_text}" + else: + # no submission by the user exists + return f"> 🍾 First successful submission on {run['runner']}: {score_text}" diff --git a/unit-tests/test_report.py b/unit-tests/test_report.py index ea148267..e9efc41c 100644 --- a/unit-tests/test_report.py +++ b/unit-tests/test_report.py @@ -62,7 +62,7 @@ def sample_run_result(mode="test") -> RunResult: "benchmark.0.worst": "1.8", } else: - assert False, f"Invalid mode: {mode}" + raise AssertionError(f"Invalid mode: {mode}") return RunResult( success=True, diff --git a/unit-tests/test_submission.py b/unit-tests/test_submission.py index 58f57c93..5dc1fd11 100644 --- a/unit-tests/test_submission.py +++ b/unit-tests/test_submission.py @@ -6,7 +6,7 @@ from libkernelbot import submission from libkernelbot.consts import RankCriterion -from libkernelbot.db_types import LeaderboardItem +from libkernelbot.db_types import LeaderboardItem, RunItem, SubmissionItem from libkernelbot.utils import KernelBotError @@ -344,3 +344,56 @@ def test_compute_score(): mock_task.ranking_by = "WRONG" with pytest.raises(KernelBotError, match="Invalid ranking criterion WRONG"): submission.compute_score(mock_result, mock_task, 1) + + +def test_generate_run_verdict(mock_backend): + """Test generate_run_verdict function with various ranking scenarios.""" + from libkernelbot.submission import generate_run_verdict + + # Mock run and submission data + run_item: RunItem = {"score": 1.5, "runner": "A100"} + + sub_data: SubmissionItem = { + "submission_id": 123, + "leaderboard_name": "test_board", + "user_id": 42, + } + + # Test first place + mock_backend.db.get_leaderboard_submissions.return_value = [ + {"submission_id": 123, "user_id": 42, "rank": 1}, + {"submission_id": 542, "user_id": 44, "rank": 2}, + ] + result = generate_run_verdict(mock_backend, run_item, sub_data) + assert result == "> 🥇 First place on A100: 1500 ms" + + # Test 5th place + mock_backend.db.get_leaderboard_submissions.return_value = [ + {"submission_id": 652, "user_id": 41, "rank": 4}, + {"submission_id": 123, "user_id": 42, "rank": 5}, + ] + result = generate_run_verdict(mock_backend, run_item, sub_data) + assert result == "> 5th place on A100: 1500 ms" + + # Test personal best (rank > 10) + mock_backend.db.get_leaderboard_submissions.return_value = [ + {"submission_id": 123, "user_id": 42, "rank": 15} + ] + result = generate_run_verdict(mock_backend, run_item, sub_data) + assert result == "> Personal best on A100: 1500 ms" + + # This user already has a faster submission + mock_backend.db.get_leaderboard_submissions.return_value = [ + {"submission_id": 999, "user_id": 42, "rank": 3}, + {"submission_id": 256, "user_id": 41, "rank": 2}, + ] + result = generate_run_verdict(mock_backend, run_item, sub_data) + assert result == "> Successful on A100: 1500 ms" + + # Test first submission by user + mock_backend.db.get_leaderboard_submissions.return_value = [ + {"submission_id": 256, "user_id": 41, "rank": 2}, + {"submission_id": 999, "user_id": 999, "rank": 999}, + ] + result = generate_run_verdict(mock_backend, run_item, sub_data) + assert result == "> 🍾 First successful submission on A100: 1500 ms" From ccdf4f17a3e0ddcba71958a40c3c98860b2136a1 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 09:20:44 +0200 Subject: [PATCH 6/6] rename unit tests -> testing --- .github/workflows/coverage.yml | 2 +- .github/workflows/{unit_tests.yml => testing.yml} | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) rename .github/workflows/{unit_tests.yml => testing.yml} (98%) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 933ae0a5..8617e2a5 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -1,7 +1,7 @@ name: Post coverage comment on: workflow_run: - workflows: ["UnitTests"] + workflows: ["Testing"] types: - completed diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/testing.yml similarity index 98% rename from .github/workflows/unit_tests.yml rename to .github/workflows/testing.yml index cf4a7356..71fd888f 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/testing.yml @@ -1,4 +1,4 @@ -name: UnitTests +name: Testing on: push: @@ -34,3 +34,4 @@ jobs: name: python-coverage-comment-action # If you use a different name, update COMMENT_FILENAME accordingly path: python-coverage-comment-action.txt +