From 130fb58608f96472120139c1e723bb737d8f0aa5 Mon Sep 17 00:00:00 2001
From: Jagadish Krishnamoorthy <jagadish.krishnamoorthy@amd.com>
Date: Tue, 29 Oct 2024 06:47:43 -0700
Subject: [PATCH] [cifar ds training]: Set cuda device during initialization of
 distributed backend. (#931)

* Set cuda device during initialization of distributed backend.

The commit is needed to avoid GPU 0 being set as the
compute stream via torch.cuda.current_stream() during initialization
across all GPUs.

Signed-off-by: Jagadish Krishnamoorthy <jagadish.krishnamoorthy@amd.com>

* Use device-agnostic accelerator API.

Signed-off-by: Jagadish Krishnamoorthy <jagadish.krishnamoorthy@amd.com>

---------

Signed-off-by: Jagadish Krishnamoorthy <jagadish.krishnamoorthy@amd.com>
---
 training/cifar/cifar10_deepspeed.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/training/cifar/cifar10_deepspeed.py b/training/cifar/cifar10_deepspeed.py
index 521a75cdf..9888544d5 100755
--- a/training/cifar/cifar10_deepspeed.py
+++ b/training/cifar/cifar10_deepspeed.py
@@ -1,4 +1,5 @@
 import argparse
+import os
 
 import deepspeed
 import torch
@@ -279,6 +280,8 @@ def test(model_engine, testset, local_device, target_dtype, test_batch_size=4):
 def main(args):
     # Initialize DeepSpeed distributed backend.
     deepspeed.init_distributed()
+    _local_rank = int(os.environ.get("LOCAL_RANK"))
+    get_accelerator().set_device(_local_rank)
 
     ########################################################################
     # Step1. Data Preparation.