@@ -12,7 +12,7 @@ use std::sync::atomic::AtomicUsize;
12
12
use std:: sync:: Arc ;
13
13
14
14
use half:: { bf16, f16} ;
15
- use log:: { info, trace, warn, error } ;
15
+ use log:: { error , info, trace, warn} ;
16
16
17
17
use crate :: reduce:: { Reduce , WorkingMemory } ;
18
18
use crate :: utils:: * ;
@@ -103,7 +103,11 @@ fn reduce_loop<T: Float>(
103
103
trace ! ( "rank({})/job({}) reduce wait recv" , i, job_idx) ;
104
104
105
105
loop {
106
- hint:: spin_loop ( ) ;
106
+ if cfg ! ( no_spinloop) {
107
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
108
+ } else {
109
+ hint:: spin_loop ( ) ;
110
+ }
107
111
let send_ready = send_ready. load ( std:: sync:: atomic:: Ordering :: Relaxed ) ;
108
112
let send_expect = ( 1 << args. send_threads ) - 1 ;
109
113
let recv_ready = recv_ready. load ( std:: sync:: atomic:: Ordering :: Relaxed ) ;
@@ -213,7 +217,11 @@ fn send_loop<T: Float>(
213
217
for ( idx, ( readys, send) ) in sends. iter ( ) . enumerate ( ) . cycle ( ) {
214
218
for ready in readys. iter ( ) {
215
219
loop {
216
- hint:: spin_loop ( ) ;
220
+ if cfg ! ( no_spinloop) {
221
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
222
+ } else {
223
+ hint:: spin_loop ( ) ;
224
+ }
217
225
let ready = ready. load ( std:: sync:: atomic:: Ordering :: Relaxed ) ;
218
226
// trace!(
219
227
// "[send] rank({})/job({}) send ready: 0b{:016b}",
@@ -235,7 +243,11 @@ fn send_loop<T: Float>(
235
243
236
244
let mut reqs = vec_of_none ( send. len ( ) ) ;
237
245
loop {
238
- hint:: spin_loop ( ) ;
246
+ if cfg ! ( no_spinloop) {
247
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
248
+ } else {
249
+ hint:: spin_loop ( ) ;
250
+ }
239
251
if rank. load ( std:: sync:: atomic:: Ordering :: Relaxed ) != nrank {
240
252
warn ! ( "rank != nrank" ) ;
241
253
warn ! ( "send thread({}) exit." , i) ;
@@ -260,7 +272,12 @@ fn send_loop<T: Float>(
260
272
let start = std:: time:: Instant :: now ( ) ;
261
273
262
274
loop {
263
- hint:: spin_loop ( ) ;
275
+ if cfg ! ( no_spinloop) {
276
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
277
+ } else {
278
+ hint:: spin_loop ( ) ;
279
+ }
280
+
264
281
if rank. load ( std:: sync:: atomic:: Ordering :: Relaxed ) != nrank {
265
282
warn ! ( "rank != nrank" ) ;
266
283
warn ! ( "send thread({}) exit." , i) ;
@@ -360,7 +377,11 @@ fn recv_loop<T: Float>(
360
377
for ( job_idx, ( readys, recv) ) in recvs. iter_mut ( ) . enumerate ( ) {
361
378
for ready in readys. iter ( ) {
362
379
loop {
363
- hint:: spin_loop ( ) ;
380
+ if cfg ! ( no_spinloop) {
381
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
382
+ } else {
383
+ hint:: spin_loop ( ) ;
384
+ }
364
385
let ready = ready. load ( std:: sync:: atomic:: Ordering :: Relaxed ) ;
365
386
// trace!(
366
387
// "[recv] rank({})/job({}) recv ready: 0b{:016b}",
@@ -382,7 +403,11 @@ fn recv_loop<T: Float>(
382
403
383
404
let mut reqs = vec_of_none ( recv. len ( ) ) ;
384
405
loop {
385
- hint:: spin_loop ( ) ;
406
+ if cfg ! ( no_spinloop) {
407
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
408
+ } else {
409
+ hint:: spin_loop ( ) ;
410
+ }
386
411
if rank. load ( std:: sync:: atomic:: Ordering :: Relaxed ) != nrank {
387
412
warn ! ( "rank != nrank" ) ;
388
413
warn ! ( "recv thread({}) exit." , i) ;
@@ -408,7 +433,12 @@ fn recv_loop<T: Float>(
408
433
let start = std:: time:: Instant :: now ( ) ;
409
434
410
435
loop {
411
- hint:: spin_loop ( ) ;
436
+ if cfg ! ( no_spinloop) {
437
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
438
+ } else {
439
+ hint:: spin_loop ( ) ;
440
+ }
441
+
412
442
if rank. load ( std:: sync:: atomic:: Ordering :: Relaxed ) != nrank {
413
443
warn ! ( "rank != nrank" ) ;
414
444
warn ! ( "recv thread({}) exit." , i) ;
@@ -522,7 +552,12 @@ fn upstream_loop<T: Float>(
522
552
for ( idx, ( send_ready, reduce_readys, buf) ) in jobs. iter_mut ( ) . enumerate ( ) {
523
553
for reduce_ready in reduce_readys. iter ( ) {
524
554
loop {
525
- hint:: spin_loop ( ) ;
555
+ if cfg ! ( no_spinloop) {
556
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
557
+ } else {
558
+ hint:: spin_loop ( ) ;
559
+ }
560
+
526
561
let reduce_ready = reduce_ready. load ( std:: sync:: atomic:: Ordering :: Relaxed ) ;
527
562
if reduce_ready == 0 {
528
563
break ;
@@ -536,7 +571,11 @@ fn upstream_loop<T: Float>(
536
571
}
537
572
538
573
loop {
539
- hint:: spin_loop ( ) ;
574
+ if cfg ! ( no_spinloop) {
575
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
576
+ } else {
577
+ hint:: spin_loop ( ) ;
578
+ }
540
579
let send_ready = send_ready. load ( std:: sync:: atomic:: Ordering :: Relaxed ) ;
541
580
let send_expect = ( 1 << args. send_threads ) - 1 ;
542
581
if send_ready == send_expect {
@@ -554,15 +593,20 @@ fn upstream_loop<T: Float>(
554
593
let mut rrequest: Option < Request > = None ;
555
594
556
595
loop {
596
+ if cfg ! ( no_spinloop) {
597
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
598
+ } else {
599
+ hint:: spin_loop ( ) ;
600
+ }
601
+
557
602
if srequest. is_none ( ) {
558
603
srequest = nccl_net:: isend ( & scomm, send_mh, buf. lock ( ) . as_ref ( ) , tag) . unwrap ( ) ;
559
604
if srequest. is_some ( ) {
560
605
trace ! ( "upstream send : idx: {} start" , idx) ;
561
606
}
562
607
}
563
608
if rrequest. is_none ( ) {
564
- rrequest =
565
- nccl_net:: irecv ( & rcomm, recv_mh, buf. lock ( ) . as_mut ( ) , tag) . unwrap ( ) ;
609
+ rrequest = nccl_net:: irecv ( & rcomm, recv_mh, buf. lock ( ) . as_mut ( ) , tag) . unwrap ( ) ;
566
610
if srequest. is_some ( ) {
567
611
trace ! ( "upstream recv : idx: {} start" , idx) ;
568
612
}
@@ -573,6 +617,12 @@ fn upstream_loop<T: Float>(
573
617
}
574
618
575
619
loop {
620
+ if cfg ! ( no_spinloop) {
621
+ std:: thread:: sleep ( NO_SPINLOOP_INTERVAL ) ;
622
+ } else {
623
+ hint:: spin_loop ( ) ;
624
+ }
625
+
576
626
if srequest. is_some ( ) {
577
627
match nccl_net:: test ( & srequest. as_ref ( ) . unwrap ( ) ) {
578
628
Ok ( ( send_done, _) ) => {
@@ -583,7 +633,7 @@ fn upstream_loop<T: Float>(
583
633
}
584
634
Err ( e) => {
585
635
error ! ( "upstream send : idx: {} error: {:?}" , idx, e) ;
586
- return
636
+ return ;
587
637
}
588
638
}
589
639
}
@@ -597,7 +647,7 @@ fn upstream_loop<T: Float>(
597
647
}
598
648
Err ( e) => {
599
649
error ! ( "upstream recv : idx: {} error: {:?}" , idx, e) ;
600
- return
650
+ return ;
601
651
}
602
652
}
603
653
}
0 commit comments