@@ -192,4 +192,72 @@ async def test_github_launcher_failing_script(project_root: Path, github_config:
192192 test_passed = result .runs .get ("test" , {}).run .passed if "test" in result .runs else True
193193 benchmark_passed = result .runs .get ("benchmark" , {}).run .passed if "benchmark" in result .runs else True
194194
195- assert not (test_passed and benchmark_passed ), "Expected at least one run to fail for cheating script"
195+ assert not (test_passed and benchmark_passed ), "Expected at least one run to fail for cheating script"
196+
197+
198+
199+
200+ @pytest .mark .integration
201+ @pytest .mark .asyncio
202+ @pytest .mark .parametrize ("gpu_type" , [GitHubGPU .MI300x8 ])
203+ async def test_github_launcher_multi_gpu (project_root : Path , github_config : GitHubConfig , gpu_type : GitHubGPU ):
204+ """
205+ Test GitHubLauncher with a real Python script using real GitHub Actions.
206+ Tests all GPU types to verify runners are working.
207+ """
208+ launcher = GitHubLauncher (repo = github_config .repo , token = github_config .token , branch = github_config .branch )
209+ reporter = MockProgressReporter ("GitHub Integration Test" )
210+
211+ # Load the real identity_py task
212+ task_path = project_root / "examples" / "gather"
213+ if not task_path .exists ():
214+ pytest .skip ("examples/gather not found - skipping GitHub integration test" )
215+
216+ task_definition = make_task_definition (task_path )
217+ submission_content = (task_path / "submission.py" ).read_text ()
218+
219+ config = build_task_config (
220+ task = task_definition .task ,
221+ submission_content = submission_content ,
222+ arch = 0 , # Not used for GitHub launcher
223+ mode = SubmissionMode .TEST ,
224+ )
225+
226+ result = await launcher .run_submission (config , gpu_type , reporter )
227+
228+ # Basic structure and success
229+ assert result .success , f"Expected successful run, got: { result .error } "
230+ assert result .error == ""
231+ assert isinstance (result .runs , dict )
232+
233+ assert result .system .device_count == 8
234+
235+ # Test run structure
236+ assert "test" in result .runs
237+ test_run = result .runs ["test" ]
238+
239+ # For Python runs, compilation is None
240+ assert test_run .compilation is None
241+
242+ # Run needs to succeed
243+ assert test_run .run .success is True
244+ assert test_run .run .passed is True
245+ assert test_run .run .exit_code == 0
246+ assert test_run .run .duration > 0
247+
248+ # Test results need to succeed
249+ assert test_run .run .result ["check" ] == "pass"
250+ test_count = int (test_run .run .result ["test-count" ])
251+ assert test_count == 1
252+ for i in range (test_count ):
253+ assert test_run .run .result [f"test.{ i } .status" ] == "pass"
254+ assert "size:" in test_run .run .result [f"test.{ i } .spec" ]
255+ assert "seed:" in test_run .run .result [f"test.{ i } .spec" ]
256+
257+ # Sanity check for timings
258+ assert test_run .start < test_run .end
259+
260+ # Check reporter messages
261+ assert any ("Waiting for workflow" in msg for msg in reporter .messages )
262+ assert any ("artifacts" in msg .lower () for msg in reporter .messages )
263+ assert any ("completed" in update for update in reporter .updates )
0 commit comments