Skip to content

Commit edfd3f0

Browse files
committed
implement ROCm profiling
This uses rocPROF to fetch some interesting data and put it in the profile_data directory, the download link of which is then returned to the user.
1 parent 4e25bcf commit edfd3f0

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

src/libkernelbot/run_eval.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,62 @@ def run_program(
297297
)
298298

299299

300+
def profile_program(
301+
system: SystemInfo,
302+
call: list[str],
303+
seed: Optional[int],
304+
timeout: int,
305+
multi_gpu: bool,
306+
) -> tuple[RunResult, Optional[ProfileResult]]:
307+
# The runner-specific configuration should implement logic
308+
# to fetch the data in this directory and return it as
309+
# ProfileResult.download_url.
310+
output_dir = Path('profile_data')
311+
312+
if system.runtime == "ROCm":
313+
# Wrap program in rocprof
314+
output_dir.mkdir()
315+
call = [
316+
"rocprofv3",
317+
"--log-level",
318+
"fatal",
319+
"--hip-trace",
320+
"--kernel-trace",
321+
"--rccl-trace",
322+
"--marker-trace",
323+
"--hip-trace",
324+
"--memory-copy-trace",
325+
# TODO(Robin): New? Doesn't work in the runner
326+
# "--memory-allocation-trace",
327+
"--scratch-memory-trace",
328+
# TODO(Robin): The HSA trace is very large. Skip for now, maybe make optional later?
329+
# "--hsa-trace",
330+
"--output-format",
331+
"pftrace",
332+
"csv",
333+
"-d",
334+
str(output_dir),
335+
# Just store the files as %pid%_tracename.ext instead of putting them in an
336+
# additional directory named after the hostname.
337+
"-o",
338+
"%pid%",
339+
"--",
340+
] + call
341+
342+
run_result = run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu)
343+
profile_result = None
344+
345+
if run_result.success:
346+
profile_result = ProfileResult(
347+
profiler='rocPROF',
348+
download_url=None,
349+
)
350+
351+
return run_result, profile_result
352+
else:
353+
# TODO: Implement profiling for other platforms
354+
return run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu), None
355+
300356
def run_single_evaluation(
301357
system: SystemInfo,
302358
call: list[str],
@@ -332,6 +388,9 @@ def run_single_evaluation(
332388

333389
call += [mode, cases.name]
334390

391+
if mode == "profile":
392+
return profile_program(system, call, seed=seed, timeout=timeout, multi_gpu=multi_gpu)
393+
335394
return run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu), None
336395

337396

0 commit comments

Comments
 (0)