From b7e8c8c59214a76f81b3cc69ed32fb104cca830e Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Mon, 25 Aug 2025 19:15:27 +0200 Subject: [PATCH 1/6] Simplify run_single_evaluation This de-duplicates some duplicated code paths. This makes it easier to patch profiling calls into the function later on. --- src/libkernelbot/run_eval.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/libkernelbot/run_eval.py b/src/libkernelbot/run_eval.py index 73e7e374..8f468e39 100644 --- a/src/libkernelbot/run_eval.py +++ b/src/libkernelbot/run_eval.py @@ -301,26 +301,24 @@ def run_single_evaluation( A single runner run, either in the context of test files, or in the context of benchmark files. """ - if mode == "test": - with tempfile.NamedTemporaryFile("w") as tests_file: - tests_file.write(tests) - tests_file.flush() - return run_program( - call + [mode, tests_file.name], seed=seed, timeout=test_timeout, multi_gpu=multi_gpu - ) - elif mode in ["benchmark", "profile", "leaderboard"]: - timeout = ranked_timeout if mode == "leaderboard" else benchmark_timeout - with tempfile.NamedTemporaryFile("w") as bench_file: + with tempfile.NamedTemporaryFile("w") as cases: + if mode == "test": + timeout = test_timeout + cases.write(tests) + elif mode in ["benchmark", "profile", "leaderboard"]: + timeout = ranked_timeout if mode == "leaderboard" else benchmark_timeout if ranking_by == "last": - bench_file.write(benchmarks.splitlines(keepends=True)[-1]) + cases.write(benchmarks.splitlines(keepends=True)[-1]) else: - bench_file.write(benchmarks) - bench_file.flush() - return run_program( - call + [mode, bench_file.name], seed=seed, timeout=timeout, multi_gpu=multi_gpu - ) - else: - raise ValueError(f"Invalid mode {mode}") + cases.write(benchmarks) + else: + raise ValueError(f"Invalid mode {mode}") + + cases.flush() + + call += [mode, cases.name] + + return run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu) def make_system_info() -> SystemInfo: From f3dd42e8eba528be1d37c0206fb65e9324982670 Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Mon, 25 Aug 2025 19:59:43 +0200 Subject: [PATCH 2/6] add gpu runtime info to SystemInfo This way we can tell whether we are using CUDA or ROCm later on. This also fixes the ROCm fallback path. --- scripts/ci_test_cuda.py | 4 +++- scripts/ci_test_python.py | 3 ++- src/libkernelbot/report.py | 1 + src/libkernelbot/run_eval.py | 33 +++++++++++++++++++++++++-------- tests/test_backend.py | 4 ++++ tests/test_report.py | 12 +++++++++++- 6 files changed, 46 insertions(+), 11 deletions(-) diff --git a/scripts/ci_test_cuda.py b/scripts/ci_test_cuda.py index ae1a6cd7..c3fa893c 100644 --- a/scripts/ci_test_cuda.py +++ b/scripts/ci_test_cuda.py @@ -4,7 +4,7 @@ import pytest from libkernelbot.consts import ExitCode, SubmissionMode -from libkernelbot.run_eval import compile_cuda_script, run_cuda_script +from libkernelbot.run_eval import compile_cuda_script, make_system_info, run_cuda_script ref = Path("examples/identity_cuda/reference.cuh").read_text() task_h = Path("examples/identity_cuda/task.h").read_text() @@ -19,6 +19,7 @@ def run_cuda_helper(sources: dict, headers: dict = None, arch=None, **kwargs): headers = header_files eval_result = run_cuda_script( + make_system_info(), sources, headers, arch=arch, @@ -194,6 +195,7 @@ def test_include_dirs(tmp_path: Path): # can also use generic flags argument result = run_cuda_script( + make_system_info(), {"eval.cu": eval_cu, "submission.cu": sub}, header_files, flags=["-I.", f"-I{tmp_path}"], diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py index 41ac92bd..7cc4fedd 100644 --- a/scripts/ci_test_python.py +++ b/scripts/ci_test_python.py @@ -1,7 +1,7 @@ from pathlib import Path from libkernelbot.consts import ExitCode, SubmissionMode -from libkernelbot.run_eval import run_pytorch_script +from libkernelbot.run_eval import make_system_info, run_pytorch_script ref = Path("examples/identity_py/reference.py").read_text() task = Path("examples/identity_py/task.py").read_text() @@ -12,6 +12,7 @@ def run_pytorch_helper(sources: dict, tests=None, **kwargs): result = run_pytorch_script( + make_system_info(), sources, "eval.py", mode=SubmissionMode.TEST.value, diff --git a/src/libkernelbot/report.py b/src/libkernelbot/report.py index ec52e7bd..805a17df 100644 --- a/src/libkernelbot/report.py +++ b/src/libkernelbot/report.py @@ -267,6 +267,7 @@ def generate_system_info(system: SystemInfo): Running on: * GPU: `{system.gpu}` * CPU: `{system.cpu}` +* Runtime: `{system.runtime}` * Platform: `{system.platform}` * Torch: `{system.torch}` """ diff --git a/src/libkernelbot/run_eval.py b/src/libkernelbot/run_eval.py index 8f468e39..6f39d78f 100644 --- a/src/libkernelbot/run_eval.py +++ b/src/libkernelbot/run_eval.py @@ -1,6 +1,7 @@ import dataclasses import datetime import functools +import json import os import shlex import subprocess @@ -46,6 +47,7 @@ class SystemInfo: gpu: str = '' # Model name of the GPU device_count: int = 1 # Number of GPUs cpu: str = '' # Model name of the CPU + runtime: str = '' # Whether CUDA or ROCm platform: str = '' # Platform string of the machine torch: str = '' # Torch version # fmt: on @@ -285,6 +287,7 @@ def run_program( def run_single_evaluation( + system: SystemInfo, call: list[str], mode: str, *, @@ -321,7 +324,7 @@ def run_single_evaluation( return run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu) -def make_system_info() -> SystemInfo: +def make_system_info() -> SystemInfo: # noqa: C901 info = SystemInfo() try: import torch @@ -332,19 +335,29 @@ def make_system_info() -> SystemInfo: if torch.cuda.is_available(): info.gpu = torch.cuda.get_device_name() info.device_count = torch.cuda.device_count() + if torch.version.hip is not None: + info.runtime = "ROCm" + elif torch.version.cuda is not None: + info.runtime = "CUDA" except ImportError: # get GPU info manually try: info.gpu = subprocess.check_output( ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], encoding="utf-8" ) + info.device_count = info.gpu.count('\n') + info.runtime = "CUDA" except subprocess.CalledProcessError: # try again for HIP - # TODO suggested by Claude, untested try: - info.gpu = subprocess.check_output( - ["rocm-smi", "--showproductname"], encoding="utf-8" - ) + rocm_info = json.loads(subprocess.check_output( + ["rocm-smi", "--showproductname", "--json"], encoding="utf-8" + )) + if len(rocm_info) > 0: + info.gpu = next(rocm_info.__iter__())["Card Series"] + + info.device_count = len(rocm_info) + info.runtime = "ROCm" except subprocess.CalledProcessError: # OK, no GPU info available pass @@ -373,6 +386,7 @@ def make_system_info() -> SystemInfo: def run_cuda_script( # # noqa: C901 + system: SystemInfo, sources: dict[str, str], headers: Optional[dict[str, str]] = None, arch: Optional[int] = None, @@ -432,7 +446,7 @@ def run_cuda_script( # # noqa: C901 if os.path.exists(f): os.remove(f) - run_result = run_single_evaluation(["./eval.out"], **kwargs) + run_result = run_single_evaluation(system, ["./eval.out"], **kwargs) return EvalResult( start=start, end=datetime.datetime.now(), @@ -442,6 +456,7 @@ def run_cuda_script( # # noqa: C901 def run_pytorch_script( # noqa: C901 + system: SystemInfo, sources: dict[str, str], main: str, **kwargs, @@ -493,7 +508,7 @@ def run_pytorch_script( # noqa: C901 exit_code=e.returncode, ) - run = run_single_evaluation(["python", main], **kwargs) + run = run_single_evaluation(system, ["python", main], **kwargs) return EvalResult( start=start, @@ -556,7 +571,9 @@ def build_test_string(tests: list[dict]): def run_config(config: dict): + system = make_system_info() common_args = { + "system": system, "tests": build_test_string(config.get("tests", [])), "benchmarks": build_test_string(config.get("benchmarks", [])), "seed": config.get("seed", None), @@ -589,4 +606,4 @@ def run_config(config: dict): raise ValueError(f"Invalid language {config['lang']}") results = run_evaluation(runner, config["mode"]) - return FullResult(success=True, error="", runs=results, system=make_system_info()) + return FullResult(success=True, error="", runs=results, system=system) diff --git a/tests/test_backend.py b/tests/test_backend.py index 585674cd..94cc2795 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -77,6 +77,7 @@ async def test_handle_submission(bot: backend.KernelBackend, task_directory): "Running on:\n" "* GPU: `NVIDIA RTX 4090`\n" "* CPU: `Intel i9-12900K`\n" + "* Runtime: `CUDA`\n" "* Platform: `Linux-5.15.0`\n" "* Torch: `2.0.1+cu118`\n" ), @@ -210,6 +211,7 @@ async def test_submit_leaderboard(bot: backend.KernelBackend, task_directory): "device_count": 1, "gpu": "NVIDIA RTX 4090", "platform": "Linux-5.15.0", + "runtime": "CUDA", "torch": "2.0.1+cu118", }, } @@ -315,6 +317,7 @@ async def test_submit_full(bot: backend.KernelBackend, task_directory): "device_count": 1, "gpu": "NVIDIA RTX 4090", "platform": "Linux-5.15.0", + "runtime": "CUDA", "torch": "2.0.1+cu118", }, }, @@ -357,6 +360,7 @@ async def test_submit_full(bot: backend.KernelBackend, task_directory): "device_count": 1, "gpu": "NVIDIA RTX 4090", "platform": "Linux-5.15.0", + "runtime": "CUDA", "torch": "2.0.1+cu118", }, }, diff --git a/tests/test_report.py b/tests/test_report.py index e9efc41c..ab62d8e6 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -22,7 +22,11 @@ # define helpers and fixtures that create mock results def sample_system_info() -> SystemInfo: return SystemInfo( - gpu="NVIDIA RTX 4090", cpu="Intel i9-12900K", platform="Linux-5.15.0", torch="2.0.1+cu118" + gpu="NVIDIA RTX 4090", + cpu="Intel i9-12900K", + runtime="CUDA", + platform="Linux-5.15.0", + torch="2.0.1+cu118", ) @@ -491,6 +495,7 @@ def test_generate_report_test_failure(sample_full_result: FullResult): "Running on:\n" "* GPU: `NVIDIA RTX 4090`\n" "* CPU: `Intel i9-12900K`\n" + "* Runtime: `CUDA`\n" "* Platform: `Linux-5.15.0`\n" "* Torch: `2.0.1+cu118`\n" ), @@ -522,6 +527,7 @@ def test_generate_report_benchmark_failure(sample_full_result: FullResult): "Running on:\n" "* GPU: `NVIDIA RTX 4090`\n" "* CPU: `Intel i9-12900K`\n" + "* Runtime: `CUDA`\n" "* Platform: `Linux-5.15.0`\n" "* Torch: `2.0.1+cu118`\n" ), @@ -556,6 +562,7 @@ def test_generate_report_benchmark_failure(sample_full_result: FullResult): "Running on:\n" "* GPU: `NVIDIA RTX 4090`\n" "* CPU: `Intel i9-12900K`\n" + "* Runtime: `CUDA`\n" "* Platform: `Linux-5.15.0`\n" "* Torch: `2.0.1+cu118`\n" ), @@ -591,6 +598,7 @@ def test_generate_report_leaderboard_failure(sample_full_result: FullResult): "Running on:\n" "* GPU: `NVIDIA RTX 4090`\n" "* CPU: `Intel i9-12900K`\n" + "* Runtime: `CUDA`\n" "* Platform: `Linux-5.15.0`\n" "* Torch: `2.0.1+cu118`\n" ), @@ -616,6 +624,7 @@ def test_generate_report_leaderboard_failure(sample_full_result: FullResult): "Running on:\n" "* GPU: `NVIDIA RTX 4090`\n" "* CPU: `Intel i9-12900K`\n" + "* Runtime: `CUDA`\n" "* Platform: `Linux-5.15.0`\n" "* Torch: `2.0.1+cu118`\n" ), @@ -653,6 +662,7 @@ def test_generate_report_profile(sample_full_result: FullResult): "Running on:\n" "* GPU: `NVIDIA RTX 4090`\n" "* CPU: `Intel i9-12900K`\n" + "* Runtime: `CUDA`\n" "* Platform: `Linux-5.15.0`\n" "* Torch: `2.0.1+cu118`\n" ), From c4c6b4197b99db2086522fb617ddcbfc114ea3a2 Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Thu, 28 Aug 2025 22:45:13 +0200 Subject: [PATCH 3/6] add 'link' report result type This will be used to communicate external download links such as profiling results. --- src/kernelbot/discord_reporter.py | 6 ++++++ src/libkernelbot/report.py | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/kernelbot/discord_reporter.py b/src/kernelbot/discord_reporter.py index f49e5225..3b6fd8c3 100644 --- a/src/kernelbot/discord_reporter.py +++ b/src/kernelbot/discord_reporter.py @@ -2,6 +2,7 @@ from discord_utils import _send_split_log from libkernelbot.report import ( + Link, Log, MultiProgressReporter, RunProgressReporter, @@ -69,6 +70,11 @@ async def display_report(self, title: str, report: RunResultReport): message += part.text elif isinstance(part, Log): message = await _send_split_log(thread, message, part.header, part.content) + elif isinstance(part, Link): + if len(message) > 0: + await thread.send(message) + message = "" + await thread.send(f"{part.title}: [{part.text}]({part.url})") if len(message) > 0: await thread.send(message) diff --git a/src/libkernelbot/report.py b/src/libkernelbot/report.py index 805a17df..1ad52aa0 100644 --- a/src/libkernelbot/report.py +++ b/src/libkernelbot/report.py @@ -32,9 +32,20 @@ class Log: content: str +@dataclasses.dataclass +class Link: + """ + Link represents a link in the profiling report, to result data + which can be downloaded by clicking it. + """ + title: str + text: str + url: str + + class RunResultReport: def __init__(self, data=None): - self.data: List[Text | Log] = data or [] + self.data: List[Text | Log | Link] = data or [] def add_text(self, section: str): self.data.append(Text(section)) @@ -42,6 +53,9 @@ def add_text(self, section: str): def add_log(self, header: str, log: str): self.data.append(Log(header, log)) + def add_link(self, title: str, text: str, url: str): + self.data.append(Link(title, text, url)) + def __repr__(self): return f"RunResultReport(data={self.data})" From 9474255b5bc9c12ee860ef0d68b93c7b425ca47d Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Thu, 28 Aug 2025 22:50:13 +0200 Subject: [PATCH 4/6] add profiling data infrastructure A new ProfileResult type is added to run_eval, which is is returned in the EvalResult type. Among other fields, this contains the `download_url` field which should be used by the user to download profiling data. Note that the actual public download link may not be known in run_eval.py. In this case, it is the intention that the launcher fixes up the `download_url` before returning the results back to libkernelbot. --- src/libkernelbot/launchers/github.py | 24 ++++++++++++++++-------- src/libkernelbot/report.py | 7 +++++++ src/libkernelbot/run_eval.py | 22 ++++++++++++++++++---- tests/test_report.py | 18 ++++++++++++++++-- 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/libkernelbot/launchers/github.py b/src/libkernelbot/launchers/github.py index be968429..1ba1108d 100644 --- a/src/libkernelbot/launchers/github.py +++ b/src/libkernelbot/launchers/github.py @@ -23,7 +23,14 @@ SubmissionMode, ) from libkernelbot.report import RunProgressReporter -from libkernelbot.run_eval import CompileResult, EvalResult, FullResult, RunResult, SystemInfo +from libkernelbot.run_eval import ( + CompileResult, + EvalResult, + FullResult, + ProfileResult, + RunResult, + SystemInfo, +) from libkernelbot.utils import setup_logging from .launcher import Launcher @@ -123,17 +130,18 @@ async def run_submission( runs = {} # convert json back to EvalResult structures, which requires # special handling for datetime and our dataclasses. + for k, v in data["runs"].items(): - if "compilation" in v and v["compilation"] is not None: - comp = CompileResult(**v["compilation"]) - else: - comp = None - run = RunResult(**v["run"]) + comp_res = None if v.get("compilation") is None else CompileResult(**v["compilation"]) + run_res = None if v.get("run") is None else RunResult(**v["run"]) + profile_res = None if v.get("profile") is None else ProfileResult(**v["profile"]) + res = EvalResult( start=datetime.datetime.fromisoformat(v["start"]), end=datetime.datetime.fromisoformat(v["end"]), - compilation=comp, - run=run, + compilation=comp_res, + run=run_res, + profile=profile_res, ) runs[k] = res diff --git a/src/libkernelbot/report.py b/src/libkernelbot/report.py index 1ad52aa0..25bb27cb 100644 --- a/src/libkernelbot/report.py +++ b/src/libkernelbot/report.py @@ -337,6 +337,13 @@ def generate_report(result: FullResult) -> RunResultReport: # noqa: C901 make_profile_log(prof_run.run), ) + if prof_run.profile is not None and prof_run.profile.download_url is not None: + report.add_link( + f"{prof_run.profile.profiler} profiling output", + "Download from GitHub", + prof_run.profile.download_url, + ) + if "leaderboard" in runs: bench_run = runs["leaderboard"] if _handle_crash_report(report, bench_run): diff --git a/src/libkernelbot/run_eval.py b/src/libkernelbot/run_eval.py index 6f39d78f..e8722ba7 100644 --- a/src/libkernelbot/run_eval.py +++ b/src/libkernelbot/run_eval.py @@ -14,6 +14,16 @@ from libkernelbot.consts import CUDA_FLAGS, ExitCode, Timeout +@dataclasses.dataclass +class ProfileResult: + # fmt: off + profiler: str # The profiler used to gather this data + # Public download URL of all files created by the profiler + # This may also be configured later + download_url: Optional[str] + #fmt: on + + @dataclasses.dataclass class CompileResult: # fmt: off @@ -60,6 +70,7 @@ class EvalResult: end: datetime.datetime # and when did it finish compilation: CompileResult | None # results of compilation run: RunResult | None # result of actually running the executable/script + profile: ProfileResult | None # result of profiling the executable # fmt: on @@ -299,7 +310,7 @@ def run_single_evaluation( ranked_timeout: int = Timeout.RANKED, ranking_by: str = "last", seed: Optional[int] = None, -) -> RunResult: +) -> tuple[RunResult, Optional[ProfileResult]]: """ A single runner run, either in the context of test files, or in the context of benchmark files. @@ -321,7 +332,7 @@ def run_single_evaluation( call += [mode, cases.name] - return run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu) + return run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu), None def make_system_info() -> SystemInfo: # noqa: C901 @@ -436,6 +447,7 @@ def run_cuda_script( # # noqa: C901 end=datetime.datetime.now(), compilation=compile_result, run=None, + profile=None, ) # cleaning up all source files _before_ we let the user code run, just in @@ -446,12 +458,13 @@ def run_cuda_script( # # noqa: C901 if os.path.exists(f): os.remove(f) - run_result = run_single_evaluation(system, ["./eval.out"], **kwargs) + run_result, profile_result = run_single_evaluation(system, ["./eval.out"], **kwargs) return EvalResult( start=start, end=datetime.datetime.now(), compilation=compile_result, run=run_result, + profile=profile_result, ) @@ -508,13 +521,14 @@ def run_pytorch_script( # noqa: C901 exit_code=e.returncode, ) - run = run_single_evaluation(system, ["python", main], **kwargs) + run, profile = run_single_evaluation(system, ["python", main], **kwargs) return EvalResult( start=start, end=datetime.datetime.now(), compilation=comp, run=run, + profile=profile, ) finally: for f in sources.keys(): diff --git a/tests/test_report.py b/tests/test_report.py index ab62d8e6..a1964e62 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -16,7 +16,14 @@ make_short_report, make_test_log, ) -from libkernelbot.run_eval import CompileResult, EvalResult, FullResult, RunResult, SystemInfo +from libkernelbot.run_eval import ( + CompileResult, + EvalResult, + FullResult, + ProfileResult, + RunResult, + SystemInfo, +) # define helpers and fixtures that create mock results @@ -86,6 +93,7 @@ def create_eval_result(mode="test") -> EvalResult: end=datetime.datetime.now(), compilation=sample_compile_result(), run=sample_run_result(mode), + profile=None, ) @@ -298,6 +306,7 @@ def test_make_short_report_full_success(): stderr="", result={}, ), + profile=None, ) result = make_short_report(runs, full=True) @@ -653,8 +662,12 @@ def test_generate_report_profile(sample_full_result: FullResult): "benchmark.0.spec": "Benchmark", "benchmark.0.report": base64.b64encode(b"Profile report", b"+*").decode("utf-8"), } + sample_full_result.runs["profile"].profile = ProfileResult( + profiler="NSight", + download_url="https://example.com", + ) report = generate_report(sample_full_result) - from libkernelbot.report import Log, Text + from libkernelbot.report import Link, Log, Text assert report.data == [ Text( @@ -675,6 +688,7 @@ def test_generate_report_profile(sample_full_result: FullResult): "> Division by zero", ), Log(header="Profiling", content="Benchmark\n\n Profile report\n"), + Link("NSight profiling output", "Download from GitHub", "https://example.com"), ] From a6fe7c5ac6b89e1748151d5d362949db3edae67d Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Thu, 28 Aug 2025 23:03:43 +0200 Subject: [PATCH 5/6] github launcher: separate artifact downloading from indexing The new function `GitHubRun.get_artifact_index` returns a dict of artifacts available from the run. For each artifact, the GitHub API URL and public download URL are returned. The latter is not available directly from the GitHub API, however, it can be easily constructed from the data that is available in the worflow result. `download_artifacts` is replaced by a function which downloads a specific artifact rather than all of them. Additionally, the function no longer writes to a temp file when downloading the artifact; the results of the download request can be piped directly into zipfile using BytesIO. --- src/libkernelbot/launchers/github.py | 79 +++++++++++++++++----------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/src/libkernelbot/launchers/github.py b/src/libkernelbot/launchers/github.py index 1ba1108d..41c603ab 100644 --- a/src/libkernelbot/launchers/github.py +++ b/src/libkernelbot/launchers/github.py @@ -1,10 +1,11 @@ import asyncio import base64 +import dataclasses import datetime +import io import json import math import pprint -import tempfile import uuid import zipfile import zlib @@ -56,7 +57,7 @@ def __init__(self, repo: str, token: str, branch: str): self.token = token self.branch = branch - async def run_submission( + async def run_submission( # noqa: C901 self, config: dict, gpu_type: GPU, status: RunProgressReporter ) -> FullResult: gpu_vendor = None @@ -113,15 +114,17 @@ async def run_submission( await status.push("Downloading artifacts...") logger.info("Downloading artifacts...") - artifacts = await run.download_artifacts() - if "run-result" not in artifacts: - logger.error("Could not find `run-result` among artifacts: %s", artifacts.keys()) + index = run.get_artifact_index() + + if "run-result" not in index: + logger.error("Could not find `run-result` among artifacts: %s", index.keys()) await status.push("Downloading artifacts... failed") return FullResult( success=False, error="Could not download artifacts", runs={}, system=SystemInfo() ) - logs = artifacts["run-result"]["result.json"].decode("utf-8") + artifact = await run.download_artifact(index["run-result"]) + logs = artifact["result.json"].decode("utf-8") await status.update("Downloading artifacts... done") logger.info("Downloading artifacts... done") @@ -155,6 +158,13 @@ async def wait_callback(self, run: "GitHubRun", status: RunProgressReporter): ) +@dataclasses.dataclass +class GitHubArtifact: + name: str + archive_download_url: str + public_download_url: str + + class GitHubRun: def __init__(self, repo: str, token: str, branch: str, workflow_file: str): gh = Github(token) @@ -331,34 +341,43 @@ async def wait_for_completion( logger.error(f"Error waiting for GitHub run {self.run_id}: {e}", exc_info=e) raise # Re-raise other exceptions - async def download_artifacts(self) -> dict: - logger.info("Attempting to download artifacts for run %s", self.run_id) + + def get_artifact_index(self) -> dict[str, GitHubArtifact]: + logger.info("Creating artifact index for run %s", self.run_id) artifacts = self.run.get_artifacts() extracted = {} for artifact in artifacts: - url = artifact.archive_download_url - headers = {"Authorization": f"token {self.token}"} - response = requests.get(url, headers=headers) - - if response.status_code == 200: - with tempfile.NamedTemporaryFile("w+b") as temp: - temp.write(response.content) - temp.flush() - - with zipfile.ZipFile(temp.name) as z: - artifact_dict = {} - for file in z.namelist(): - with z.open(file) as f: - artifact_dict[file] = f.read() - - extracted[artifact.name] = artifact_dict - else: - raise RuntimeError( - f"Failed to download artifact {artifact.name}. " - f"Status code: {response.status_code}" - ) + extracted[artifact.name] = GitHubArtifact( + name=artifact.name, + archive_download_url=artifact.archive_download_url, + # Non-machine users cannot download from the archive_download_url and + # the GitHub API does not give us access to the public download url. + public_download_url=f"{self.repo.html_url}/actions/runs/{self.run_id}/artifacts/{artifact.id}", + ) - logger.info("Download artifacts for run %s: %s", self.run_id, list(extracted.keys())) return extracted + + + async def download_artifact(self, artifact: GitHubArtifact) -> dict: + logger.info("Attempting to download artifact '%s' for run %s", artifact.name, self.run_id) + + url = artifact.archive_download_url + headers = {"Authorization": f"token {self.token}"} + response = requests.get(url, headers=headers) + + if response.status_code == 200: + artifact_dict = {} + with zipfile.ZipFile(io.BytesIO(response.content)) as z: + for file in z.namelist(): + with z.open(file) as f: + artifact_dict[file] = f.read() + + logger.info("Downloaded artifact '%s' for run %s", artifact.name, self.run_id) + return artifact_dict + else: + raise RuntimeError( + f"Failed to download artifact {artifact.name}. " + f"Status code: {response.status_code}" + ) From b44da6434e9da6e6d7d56f3e9ed3fbc1cda8e94e Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Thu, 28 Aug 2025 23:12:11 +0200 Subject: [PATCH 6/6] github runner: yield 'profile_data/*' from job as profile data The idea is that eval_run.py places profiling data in the profile_data/ directory, which is then automatically exported to the user. This is done by uploading that directory as the 'profile-data' artifact, then fetching its public download link and returning that as the ProfileResult.download_url. --- .github/workflows/amd_workflow.yml | 17 ++++++++++++----- .github/workflows/nvidia_workflow.yml | 15 ++++++++++----- src/libkernelbot/launchers/github.py | 6 ++++++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/.github/workflows/amd_workflow.yml b/.github/workflows/amd_workflow.yml index 8a0014ef..39dc66c5 100644 --- a/.github/workflows/amd_workflow.yml +++ b/.github/workflows/amd_workflow.yml @@ -35,13 +35,13 @@ jobs: run: | # Extract the payload content without printing it PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH) - + # Apply mask to the extracted content echo "::add-mask::$PAYLOAD" - + # Now write to file (won't be logged since it's masked) echo "$PAYLOAD" > payload.json - + - name: Set venv directory based on runner run: | if [[ "${{ github.event.inputs.runner }}" == "amdgpu-mi250-x86-64" ]]; then @@ -77,5 +77,12 @@ jobs: if: always() with: name: run-result - path: | - result.json + path: result.json + + - name: Upload profiling artifacts + uses: actions/upload-artifact@v4 + if: always() + with: + name: profile-data + path: profile_data/* + retention-days: 1 diff --git a/.github/workflows/nvidia_workflow.yml b/.github/workflows/nvidia_workflow.yml index e16cf4d5..b50ec044 100644 --- a/.github/workflows/nvidia_workflow.yml +++ b/.github/workflows/nvidia_workflow.yml @@ -42,10 +42,10 @@ jobs: # Extract the payload content without printing it apt-get update && apt-get install -y jq PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH) - + # Apply mask to the extracted content echo "::add-mask::$PAYLOAD" - + # Now write to file (won't be logged since it's masked) echo "$PAYLOAD" > payload.json @@ -73,15 +73,20 @@ jobs: shell: bash run: | python src/runners/github-runner.py - cat result.json # Debug: show output - name: Upload training artifacts uses: actions/upload-artifact@v4 if: always() with: name: run-result - path: | - result.json + path: result.json + - name: Upload profiling artifacts + uses: actions/upload-artifact@v4 + if: always() + with: + name: profile-data + path: profile_data/* + retention-days: 1 env: CUDA_VISIBLE_DEVICES: 0 diff --git a/src/libkernelbot/launchers/github.py b/src/libkernelbot/launchers/github.py index 41c603ab..c748e0fe 100644 --- a/src/libkernelbot/launchers/github.py +++ b/src/libkernelbot/launchers/github.py @@ -139,6 +139,12 @@ async def run_submission( # noqa: C901 run_res = None if v.get("run") is None else RunResult(**v["run"]) profile_res = None if v.get("profile") is None else ProfileResult(**v["profile"]) + # Update profile artifact to the actual download URL. + # For the GitHub launcher the profile_artifact currently just contains + # the name of the artifact. + if profile_res is not None: + profile_res.download_url = index["profile-data"].public_download_url + res = EvalResult( start=datetime.datetime.fromisoformat(v["start"]), end=datetime.datetime.fromisoformat(v["end"]),