ai-dynamo · ovidiusm · Sep 4, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025
@@ -214,6 +214,11 @@ def nixl_bench_args(func):
         type=str,
         help="Comma-separated GPU CUDA device id to use for communication (only used with GPUNETIO backend)",
     )(func)
+    func = click.option(
+        "--gpunetio_oob_list",
+        type=str,
+        help="OOB network interface name for control path (only used with GPUNETIO backend)",
+    )(func)
     func = click.option(
         "--hf3fs_iopool_size",
         type=int,

@@ -63,6 +63,7 @@ def __init__(
         benchmark_group="default",
         gds_mt_num_threads=1,
         gpunetio_device_list="0",
+        gpunetio_oob_list="",
         hf3fs_iopool_size=64,
         obj_access_key="",
         obj_secret_key="",
@@ -115,6 +116,7 @@ def __init__(
             worker_type (str, optional): Type of worker. Defaults to "nixl".
             gds_mt_num_threads (int, optional): Number of threads for GDS_MT plugin. Defaults to 1.
             gpunetio_device_list (str, optional): GPU device list for GPUNETIO plugin. Defaults to "0".
+            gpunetio_oob_list (str, optional): OOB network interface name for control path for GPUNETIO plugin. Defaults to "".
             hf3fs_iopool_size (int, optional): IO pool size for HF3FS plugin. Defaults to 64.
             obj_access_key (str, optional): Access key for OBJ/S3 plugin. Defaults to "".
             obj_secret_key (str, optional): Secret key for OBJ/S3 plugin. Defaults to "".
@@ -162,6 +164,7 @@ def __init__(
         self.worker_type = worker_type
         self.gds_mt_num_threads = gds_mt_num_threads
         self.gpunetio_device_list = gpunetio_device_list
+        self.gpunetio_oob_list = gpunetio_oob_list
         self.hf3fs_iopool_size = hf3fs_iopool_size
         self.obj_access_key = obj_access_key
         self.obj_secret_key = obj_secret_key
@@ -320,6 +323,7 @@ def _params(self):
             "worker_type": self.worker_type,
             "gds_mt_num_threads": self.gds_mt_num_threads,
             "gpunetio_device_list": self.gpunetio_device_list,
+            "gpunetio_oob_list": self.gpunetio_oob_list,
             "hf3fs_iopool_size": self.hf3fs_iopool_size,
             "obj_access_key": self.obj_access_key,
             "obj_secret_key": self.obj_secret_key,
@@ -379,6 +383,7 @@ def defaults():
             "benchmark_group": "default",
             "gds_mt_num_threads": 1,
             "gpunetio_device_list": "0",
+            "gpunetio_oob_list": "",
             "hf3fs_iopool_size": 64,
             "obj_access_key": "",
             "obj_secret_key": "",

@@ -107,6 +107,9 @@ DEFINE_string (posix_api_type,
 // DOCA GPUNetIO options - only used when backend is DOCA GPUNetIO
 DEFINE_string(gpunetio_device_list, "0", "Comma-separated GPU CUDA device id to use for \
 		      communication (only used with nixl worker)");
+// DOCA GPUNetIO options - only used when backend is DOCA GPUNetIO
+DEFINE_string(gpunetio_oob_list, "", "Comma-separated OOB network interface name \
+		      for control path (only used with nixl worker)");
 
 // OBJ options - only used when backend is OBJ
 DEFINE_string(obj_access_key, "", "Access key for S3 backend");
@@ -155,6 +158,7 @@ int xferBenchConfig::gds_batch_pool_size = 0;
 int xferBenchConfig::gds_batch_limit = 0;
 int xferBenchConfig::gds_mt_num_threads = 0;
 std::string xferBenchConfig::gpunetio_device_list = "";
+std::string xferBenchConfig::gpunetio_oob_list = "";
 std::vector<std::string> devices = { };
 int xferBenchConfig::num_files = 0;
 std::string xferBenchConfig::posix_api_type = "";
@@ -219,6 +223,7 @@ xferBenchConfig::loadFromFlags() {
         // Load DOCA-specific configurations if backend is DOCA
         if (backend == XFERBENCH_BACKEND_GPUNETIO) {
             gpunetio_device_list = FLAGS_gpunetio_device_list;
+            gpunetio_oob_list = FLAGS_gpunetio_oob_list;
         }
 
         // Load HD3FS-specific configurations if backend is HD3FS
@@ -429,6 +434,8 @@ xferBenchConfig::printConfig() {
         if (backend == XFERBENCH_BACKEND_GPUNETIO) {
             printOption ("GPU CUDA Device id list (--device_list=dev1,dev2,...)",
                          gpunetio_device_list);
+            printOption ("OOB network interface name for control path (--oob_list=ifface)",
+                         gpunetio_oob_list);
         }
     }
     printOption ("Initiator seg type (--initiator_seg_type=[DRAM,VRAM])", initiator_seg_type);

@@ -156,6 +156,7 @@ class xferBenchConfig {
         static int gds_batch_limit;
         static int gds_mt_num_threads;
         static std::string gpunetio_device_list;
+        static std::string gpunetio_oob_list;
         static long page_size;
         static std::string obj_access_key;
         static std::string obj_secret_key;

@@ -169,10 +169,13 @@ xferBenchNixlWorker::xferBenchNixlWorker(int *argc, char ***argv, std::vector<st
         std::cout << "POSIX backend with API type: " << xferBenchConfig::posix_api_type
                   << std::endl;
     } else if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_GPUNETIO)) {
-        std::cout << "GPUNETIO backend, network device " << devices[0] << " GPU device "
-                  << xferBenchConfig::gpunetio_device_list << std::endl;
+        std::cout << "GPUNETIO backend, network device " << devices[0]
+                  << " GPU device " << xferBenchConfig::gpunetio_device_list
+                  << " OOB interface " << xferBenchConfig::gpunetio_oob_list
+                  << std::endl;
         backend_params["network_devices"] = devices[0];
         backend_params["gpu_devices"] = xferBenchConfig::gpunetio_device_list;
+        backend_params["oob_interface"] = xferBenchConfig::gpunetio_oob_list;
     } else if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_MOONCAKE)) {
         std::cout << "Mooncake backend" << std::endl;
     } else if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_HF3FS)) {

@@ -36,6 +36,7 @@ Stream pool mode instead is when applications mostly wants to process data on th
 
 DOCA GPUNetIO backend takes 3 input parameters:
 - network_devices: network device to be used during the execution (e.g. mlx5_0). Current release supports only 1 network device.
+- oob_interface: network interface to be used when exchanging control info during initiator/target connection. Optional parameter, not needed if the network device is set in Ethernet mode.
 - gpu_devices: GPU CUDA ID to be used during the execution (e.g. 0). Current release supports only 1 GPU device.
 - cuda_streams: how many CUDA streams the backend should created at setup time in the internal pool. Relevant only if the application wants to use the "stream pool" mode. If this parameter is not specified, default value is `DOCA_POST_STREAM_NUM`.