gpu-mode · Snektron · Sep 10, 2025 · Aug 25, 2025 · Aug 25, 2025 · Aug 28, 2025
diff --git a/.github/workflows/amd_workflow.yml b/.github/workflows/amd_workflow.yml
@@ -35,13 +35,13 @@ jobs:
       run: |
         # Extract the payload content without printing it
         PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH)
-        
+
         # Apply mask to the extracted content
         echo "::add-mask::$PAYLOAD"
-        
+
         # Now write to file (won't be logged since it's masked)
         echo "$PAYLOAD" > payload.json
-    
+
     - name: Set venv directory based on runner
       run: |
         if [[ "${{ github.event.inputs.runner }}" == "amdgpu-mi250-x86-64" ]]; then
@@ -77,5 +77,12 @@ jobs:
       if: always()
       with:
         name: run-result
-        path: |
-          result.json
+        path: result.json
+
+    - name: Upload profiling artifacts
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: profile-data
+        path: profile_data/*
+        retention-days: 1
diff --git a/.github/workflows/nvidia_workflow.yml b/.github/workflows/nvidia_workflow.yml
@@ -42,10 +42,10 @@ jobs:
         # Extract the payload content without printing it
         apt-get update && apt-get install -y jq
         PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH)
-        
+
         # Apply mask to the extracted content
         echo "::add-mask::$PAYLOAD"
-        
+
         # Now write to file (won't be logged since it's masked)
         echo "$PAYLOAD" > payload.json
 
@@ -73,15 +73,20 @@ jobs:
       shell: bash
       run: |
         python src/runners/github-runner.py
-        cat result.json  # Debug: show output
 
     - name: Upload training artifacts
       uses: actions/upload-artifact@v4
       if: always()
       with:
         name: run-result
-        path: |
-          result.json
+        path: result.json
 
+    - name: Upload profiling artifacts
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: profile-data
+        path: profile_data/*
+        retention-days: 1
     env:
       CUDA_VISIBLE_DEVICES: 0
diff --git a/scripts/ci_test_cuda.py b/scripts/ci_test_cuda.py
@@ -4,7 +4,7 @@
 import pytest
 
 from libkernelbot.consts import ExitCode, SubmissionMode
-from libkernelbot.run_eval import compile_cuda_script, run_cuda_script
+from libkernelbot.run_eval import compile_cuda_script, make_system_info, run_cuda_script
 
 ref = Path("examples/identity_cuda/reference.cuh").read_text()
 task_h = Path("examples/identity_cuda/task.h").read_text()
@@ -19,6 +19,7 @@ def run_cuda_helper(sources: dict, headers: dict = None, arch=None, **kwargs):
         headers = header_files
 
     eval_result = run_cuda_script(
+        make_system_info(),
         sources,
         headers,
         arch=arch,
@@ -194,6 +195,7 @@ def test_include_dirs(tmp_path: Path):
 
     # can also use generic flags argument
     result = run_cuda_script(
+        make_system_info(),
         {"eval.cu": eval_cu, "submission.cu": sub},
         header_files,
         flags=["-I.", f"-I{tmp_path}"],

diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 
 from libkernelbot.consts import ExitCode, SubmissionMode
-from libkernelbot.run_eval import run_pytorch_script
+from libkernelbot.run_eval import make_system_info, run_pytorch_script
 
 ref = Path("examples/identity_py/reference.py").read_text()
 task = Path("examples/identity_py/task.py").read_text()
@@ -12,6 +12,7 @@
 
 def run_pytorch_helper(sources: dict, tests=None, **kwargs):
     result = run_pytorch_script(
+        make_system_info(),
         sources,
         "eval.py",
         mode=SubmissionMode.TEST.value,

diff --git a/src/kernelbot/discord_reporter.py b/src/kernelbot/discord_reporter.py
@@ -2,6 +2,7 @@
 from discord_utils import _send_split_log
 
 from libkernelbot.report import (
+    Link,
     Log,
     MultiProgressReporter,
     RunProgressReporter,
@@ -69,6 +70,11 @@ async def display_report(self, title: str, report: RunResultReport):
                 message += part.text
             elif isinstance(part, Log):
                 message = await _send_split_log(thread, message, part.header, part.content)
+            elif isinstance(part, Link):
+                if len(message) > 0:
+                    await thread.send(message)
+                    message = ""
+                await thread.send(f"{part.title}: [{part.text}]({part.url})")
 
         if len(message) > 0:
             await thread.send(message)

diff --git a/src/libkernelbot/launchers/github.py b/src/libkernelbot/launchers/github.py
@@ -1,10 +1,11 @@
 import asyncio
 import base64
+import dataclasses
 import datetime
+import io
 import json
 import math
 import pprint
-import tempfile
 import uuid
 import zipfile
 import zlib
@@ -23,7 +24,14 @@
     SubmissionMode,
 )
 from libkernelbot.report import RunProgressReporter
-from libkernelbot.run_eval import CompileResult, EvalResult, FullResult, RunResult, SystemInfo
+from libkernelbot.run_eval import (
+    CompileResult,
+    EvalResult,
+    FullResult,
+    ProfileResult,
+    RunResult,
+    SystemInfo,
+)
 from libkernelbot.utils import setup_logging
 
 from .launcher import Launcher
@@ -49,7 +57,7 @@ def __init__(self, repo: str, token: str, branch: str):
         self.token = token
         self.branch = branch
 
-    async def run_submission(
+    async def run_submission(  # noqa: C901
         self, config: dict, gpu_type: GPU, status: RunProgressReporter
     ) -> FullResult:
         gpu_vendor = None
@@ -106,15 +114,17 @@ async def run_submission(
         await status.push("Downloading artifacts...")
         logger.info("Downloading artifacts...")
 
-        artifacts = await run.download_artifacts()
-        if "run-result" not in artifacts:
-            logger.error("Could not find `run-result` among artifacts: %s", artifacts.keys())
+        index = run.get_artifact_index()
+
+        if "run-result" not in index:
+            logger.error("Could not find `run-result` among artifacts: %s", index.keys())
             await status.push("Downloading artifacts...  failed")
             return FullResult(
                 success=False, error="Could not download artifacts", runs={}, system=SystemInfo()
             )
 
-        logs = artifacts["run-result"]["result.json"].decode("utf-8")
+        artifact = await run.download_artifact(index["run-result"])
+        logs = artifact["result.json"].decode("utf-8")
 
         await status.update("Downloading artifacts... done")
         logger.info("Downloading artifacts... done")
@@ -123,17 +133,24 @@ async def run_submission(
         runs = {}
         # convert json back to EvalResult structures, which requires
         # special handling for datetime and our dataclasses.
+
         for k, v in data["runs"].items():
-            if "compilation" in v and v["compilation"] is not None:
-                comp = CompileResult(**v["compilation"])
-            else:
-                comp = None
-            run = RunResult(**v["run"])
+            comp_res = None if v.get("compilation") is None else CompileResult(**v["compilation"])
+            run_res = None if v.get("run") is None else RunResult(**v["run"])
+            profile_res = None if v.get("profile") is None else ProfileResult(**v["profile"])
+
+            # Update profile artifact to the actual download URL.
+            # For the GitHub launcher the profile_artifact currently just contains
+            # the name of the artifact.
+            if profile_res is not None:
+                profile_res.download_url = index["profile-data"].public_download_url
+
             res = EvalResult(
                 start=datetime.datetime.fromisoformat(v["start"]),
                 end=datetime.datetime.fromisoformat(v["end"]),
-                compilation=comp,
-                run=run,
+                compilation=comp_res,
+                run=run_res,
+                profile=profile_res,
             )
             runs[k] = res
 
@@ -147,6 +164,13 @@ async def wait_callback(self, run: "GitHubRun", status: RunProgressReporter):
         )
 
 
+@dataclasses.dataclass
+class GitHubArtifact:
+    name: str
+    archive_download_url: str
+    public_download_url: str
+
+
 class GitHubRun:
     def __init__(self, repo: str, token: str, branch: str, workflow_file: str):
         gh = Github(token)
@@ -323,34 +347,43 @@ async def wait_for_completion(
                 logger.error(f"Error waiting for GitHub run {self.run_id}: {e}", exc_info=e)
                 raise  # Re-raise other exceptions
 
-    async def download_artifacts(self) -> dict:
-        logger.info("Attempting to download artifacts for run %s", self.run_id)
+
+    def get_artifact_index(self) -> dict[str, GitHubArtifact]:
+        logger.info("Creating artifact index for run %s", self.run_id)
         artifacts = self.run.get_artifacts()
 
         extracted = {}
 
         for artifact in artifacts:
-            url = artifact.archive_download_url
-            headers = {"Authorization": f"token {self.token}"}
-            response = requests.get(url, headers=headers)
-
-            if response.status_code == 200:
-                with tempfile.NamedTemporaryFile("w+b") as temp:
-                    temp.write(response.content)
-                    temp.flush()
-
-                    with zipfile.ZipFile(temp.name) as z:
-                        artifact_dict = {}
-                        for file in z.namelist():
-                            with z.open(file) as f:
-                                artifact_dict[file] = f.read()
-
-                extracted[artifact.name] = artifact_dict
-            else:
-                raise RuntimeError(
-                    f"Failed to download artifact {artifact.name}. "
-                    f"Status code: {response.status_code}"
-                )
+            extracted[artifact.name] = GitHubArtifact(
+                name=artifact.name,
+                archive_download_url=artifact.archive_download_url,
+                # Non-machine users cannot download from the archive_download_url and
+                # the GitHub API does not give us access to the public download url.
+                public_download_url=f"{self.repo.html_url}/actions/runs/{self.run_id}/artifacts/{artifact.id}",
+            )
 
-        logger.info("Download artifacts for run %s: %s", self.run_id, list(extracted.keys()))
         return extracted
+
+
+    async def download_artifact(self, artifact: GitHubArtifact) -> dict:
+        logger.info("Attempting to download artifact '%s' for run %s", artifact.name, self.run_id)
+
+        url = artifact.archive_download_url
+        headers = {"Authorization": f"token {self.token}"}
+        response = requests.get(url, headers=headers)
+
+        if response.status_code == 200:
+            artifact_dict = {}
+            with zipfile.ZipFile(io.BytesIO(response.content)) as z:
+                for file in z.namelist():
+                    with z.open(file) as f:
+                        artifact_dict[file] = f.read()
+
+            logger.info("Downloaded artifact '%s' for run %s", artifact.name, self.run_id)
+            return artifact_dict
+        else:
+            raise RuntimeError(
+                f"Failed to download artifact {artifact.name}. "
+                f"Status code: {response.status_code}"
+            )
diff --git a/src/libkernelbot/report.py b/src/libkernelbot/report.py
@@ -32,16 +32,30 @@ class Log:
     content: str
 
 
+@dataclasses.dataclass
+class Link:
+    """
+    Link represents a link in the profiling report, to result data
+    which can be downloaded by clicking it.
+    """
+    title: str
+    text: str
+    url: str
+
+
 class RunResultReport:
     def __init__(self, data=None):
-        self.data: List[Text | Log] = data or []
+        self.data: List[Text | Log | Link] = data or []
 
     def add_text(self, section: str):
         self.data.append(Text(section))
 
     def add_log(self, header: str, log: str):
         self.data.append(Log(header, log))
 
+    def add_link(self, title: str, text: str, url: str):
+        self.data.append(Link(title, text, url))
+
     def __repr__(self):
         return f"RunResultReport(data={self.data})"
 
@@ -267,6 +281,7 @@ def generate_system_info(system: SystemInfo):
 Running on:
 * GPU: `{system.gpu}`
 * CPU: `{system.cpu}`
+* Runtime: `{system.runtime}`
 * Platform: `{system.platform}`
 * Torch: `{system.torch}`
 """
@@ -322,6 +337,13 @@ def generate_report(result: FullResult) -> RunResultReport:  # noqa: C901
             make_profile_log(prof_run.run),
         )
 
+        if prof_run.profile is not None and prof_run.profile.download_url is not None:
+            report.add_link(
+                f"{prof_run.profile.profiler} profiling output",
+                "Download from GitHub",
+                prof_run.profile.download_url,
+            )
+
     if "leaderboard" in runs:
         bench_run = runs["leaderboard"]
         if _handle_crash_report(report, bench_run):