@@ -633,6 +633,36 @@ run_ucx_perftest_with_daemon() {
633633 done
634634}
635635
636+ #
637+ # Run UCX performance cuda device test
638+ #
639+ run_ucx_perftest_cuda_device () {
640+ if [ " X$have_cuda " != " Xyes" ]; then
641+ echo " ==== CUDA not available, skipping CUDA device tests ===="
642+ return 0
643+ fi
644+
645+ if ! has_gpunetio_devel; then
646+ echo " ==== DOCA not available, skipping CUDA device tests ===="
647+ return 0
648+ fi
649+
650+ if [ " $( get_num_gpus) " -eq 0 ]; then
651+ echo " ==== No NVIDIA GPUs found, skipping CUDA device tests ===="
652+ return 0
653+ fi
654+
655+ echo " ==== Running ucx_perftest with cuda kernel ===="
656+ ucx_inst_ptest=$ucx_inst /share/ucx/perftest
657+ ucx_perftest=" $ucx_inst /bin/ucx_perftest"
658+ ucp_test_args=" -b $ucx_inst_ptest /test_types_ucp_device_cuda"
659+
660+ # TODO: Run on all GPUs
661+ ucp_client_args=" -a cuda $( hostname) "
662+
663+ run_client_server_app " $ucx_perftest " " $ucp_test_args " " $ucp_client_args " 0 0
664+ }
665+
636666#
637667# Test malloc hooks with mpi
638668#
@@ -1208,6 +1238,7 @@ run_tests() {
12081238 do_distributed_task 3 4 run_ucp_client_server
12091239 do_distributed_task 0 4 test_no_cuda_context
12101240 do_distributed_task 1 4 run_ucx_perftest_with_daemon
1241+ do_distributed_task 1 4 run_ucx_perftest_cuda_device
12111242
12121243 # long devel tests
12131244 do_distributed_task 0 4 run_ucp_hello
0 commit comments