-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathscheduler.c
1726 lines (1462 loc) · 63.5 KB
/
scheduler.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include "debug.h"
#include <assert.h>
#include <pthread.h>
#include <stdatomic.h>
#include <stdint.h>
#ifdef __linux__
#include <sched.h>
#endif
#include <stdio.h>
#include <string.h>
#include <unwind.h>
#ifdef __APPLE__
#include <mach/mach_time.h>
#endif
#include "cilk-internal.h"
#include "cilk2c.h"
#include "closure.h"
#include "fiber-header.h"
#include "fiber.h"
#include "frame.h"
#include "global.h"
#include "jmpbuf.h"
#include "local-hypertable.h"
#include "local.h"
#include "readydeque.h"
#include "scheduler.h"
#include "worker_coord.h"
#include "worker_sleep.h"
// ==============================================
// Global and thread-local variables.
// ==============================================
// Boolean tracking whether the Cilk program is using an extension, e.g.,
// pedigrees.
bool __cilkrts_use_extension = false;
// Boolean tracking whether the execution is currently in a cilkified region.
bool __cilkrts_need_to_cilkify = true;
// TLS pointer to the current worker structure.
__thread __cilkrts_worker *__cilkrts_tls_worker = &default_worker;
// TLS pointer to the current fiber header.
//
// Although we could store the current fiber header in the worker, the code on
// the work needs to access the current fiber header more frequently than the
// worker itself. Thus, it's notably faster to store a pointer to the current
// fiber header itself in TLS.
__thread struct cilk_fiber *__cilkrts_current_fh = NULL;
// ==============================================
// Misc. helper functions
// ==============================================
/***********************************************************
* Internal random number generator.
***********************************************************/
static void rts_srand(__cilkrts_worker *const w, unsigned int seed) {
w->l->rand_next = seed;
}
static unsigned int update_rand_state(unsigned int state) {
return state * 1103515245 + 12345;
}
static unsigned int get_rand(unsigned int state) {
return state >> 16;
}
static void worker_change_state(__cilkrts_worker *w,
enum __cilkrts_worker_state s) {
/* TODO: Update statistics based on state change. */
CILK_ASSERT(w->l->state != s);
w->l->state = s;
#if 0
/* This is a valuable assertion but there is no way to make it
work. The reducer map is not moved out of the worker until
long after scheduler state has been entered. */
if (s != WORKER_RUN) {
CILK_ASSERT(w->reducer_map == NULL);
}
#endif
}
/***********************************************************
* Managing the 'E' in the THE protocol
***********************************************************/
static void increment_exception_pointer(worker_id self,
__cilkrts_worker *const victim_w,
Closure *cl) {
Closure_assert_ownership(self, cl);
CILK_ASSERT(cl->status == CLOSURE_RUNNING);
__cilkrts_stack_frame **exc =
atomic_load_explicit(&victim_w->exc, memory_order_relaxed);
if (exc != EXCEPTION_INFINITY) {
/* SEQ_CST order is required between increment of exc and test of tail.
Currently do_dekker_on has a fence. */
atomic_store_explicit(&victim_w->exc, exc + 1, memory_order_relaxed);
}
}
static void decrement_exception_pointer(worker_id self,
__cilkrts_worker *const victim_w,
Closure *cl) {
Closure_assert_ownership(self, cl);
__cilkrts_stack_frame **exc =
atomic_load_explicit(&victim_w->exc, memory_order_relaxed);
if (exc != EXCEPTION_INFINITY) {
atomic_store_explicit(&victim_w->exc, exc - 1, memory_order_relaxed);
}
}
static void reset_exception_pointer(__cilkrts_worker *const w, worker_id self,
Closure *cl) {
Closure_assert_ownership(self, cl);
CILK_ASSERT((cl->frame == NULL) || (cl->fiber->worker == w));
atomic_store_explicit(&w->exc,
atomic_load_explicit(&w->head, memory_order_relaxed),
memory_order_release);
}
/* Unused for now but may be helpful later
static void signal_immediate_exception_to_all(__cilkrts_worker *const w) {
int i, active_size = w->g->nworkers;
__cilkrts_worker *curr_w;
for(i=0; i<active_size; i++) {
curr_w = w->g->workers[i];
curr_w->exc = EXCEPTION_INFINITY;
}
// make sure the exception is visible, before we continue
Cilk_fence();
}
*/
static void setup_for_execution(__cilkrts_worker *w, Closure *t) {
cilkrts_alert(SCHED, "(setup_for_execution) closure %p", (void *)t);
struct cilk_fiber *fh = t->fiber;
fh->worker = w;
Closure_set_status(t, CLOSURE_RUNNING);
__cilkrts_stack_frame **init = w->l->shadow_stack;
atomic_store_explicit(&w->head, init, memory_order_relaxed);
atomic_store_explicit(&w->exc, init, memory_order_relaxed);
atomic_store_explicit(&w->tail, init, memory_order_release);
/* push the first frame on the current_stack_frame */
__cilkrts_stack_frame *sf = t->frame;
fh->current_stack_frame = sf;
sf->fh = fh;
__cilkrts_current_fh = fh;
}
// ANGE: When this is called, either a) a worker is about to pass a sync (though
// not on the right fiber), or b) a worker just performed a provably good steal
// successfully
// JFC: This is called from
// worker_scheduler -> ... -> Closure_return -> provably_good_steal_maybe
// user code -> __cilkrts_sync -> Cilk_sync
static void setup_for_sync(__cilkrts_worker *w, worker_id self, Closure *t) {
Closure_assert_ownership(self, t);
// ANGE: this must be true since in case a) we would have freed it in
// Cilk_sync, or in case b) we would have freed it when we first returned to
// the runtime before doing the provably good steal.
CILK_ASSERT(t->fiber != t->fiber_child);
// ANGE: note that in case a) this fiber won't get freed for awhile,
// since we will longjmp back to the original function's fiber and
// never go back to the runtime; we will only free it either once
// when we get back to the runtime or when we encounter a case
// where we need to.
if (t->fiber)
cilk_fiber_deallocate_to_pool(w, t->fiber);
t->fiber = t->fiber_child;
t->fiber_child = NULL;
if (USE_EXTENSION) {
if (t->ext_fiber)
cilk_fiber_deallocate_to_pool(w, t->ext_fiber);
t->ext_fiber = t->ext_fiber_child;
t->ext_fiber_child = NULL;
}
CILK_ASSERT(t->fiber);
// __cilkrts_alert(STEAL | ALERT_FIBER,
// "(setup_for_sync) set t %p and t->fiber %p", (void *)t,
// (void *)t->fiber);
__cilkrts_set_synced(t->frame);
struct cilk_fiber *fh = t->fiber;
__cilkrts_current_fh = fh;
t->frame->fh = fh;
fh->worker = w;
CILK_ASSERT_POINTER_EQUAL(fh->current_stack_frame, t->frame);
SP(t->frame) = (void *)t->orig_rsp;
if (USE_EXTENSION) {
// Set the worker's extension (analogous to updating the worker's stack
// pointer).
w->extension = t->frame->extension;
// Set the worker's extension stack to be the start of the saved
// extension fiber.
w->ext_stack = sysdep_get_stack_start(t->ext_fiber);
}
t->orig_rsp = NULL; // unset once we have sync-ed
}
// ==============================================
// TLS related functions
// ==============================================
CHEETAH_INTERNAL void __cilkrts_set_tls_worker(__cilkrts_worker *w) {
__cilkrts_tls_worker = w;
}
// ==============================================
// Closure return protocol related functions
// ==============================================
/* Doing an "unconditional steal" to steal back the call parent closure */
static Closure *setup_call_parent_resumption(ReadyDeque *deques,
__cilkrts_worker *const w,
worker_id self,
Closure *t) {
deque_assert_ownership(deques, self, self);
Closure_assert_ownership(self, t);
CILK_ASSERT_POINTER_EQUAL(w, __cilkrts_get_tls_worker());
CILK_ASSERT_POINTER_EQUAL(w->head, w->tail);
Closure_change_status(t, CLOSURE_SUSPENDED, CLOSURE_RUNNING);
return t;
}
void Cilk_set_return(__cilkrts_worker *const w) {
Closure *t;
cilkrts_alert(RETURN, "(Cilk_set_return)");
ReadyDeque *deques = w->g->deques;
worker_id self = w->self;
deque_lock_self(deques, self);
t = deque_peek_bottom(deques, self, self);
Closure_lock(self, t);
CILK_ASSERT(t->status == CLOSURE_RUNNING);
CILK_ASSERT(Closure_has_children(t) == 0);
// all hyperobjects from child or right sibling must have been reduced
CILK_ASSERT(t->child_ht == (hyper_table *)NULL &&
t->right_ht == (hyper_table *)NULL);
CILK_ASSERT(t->call_parent);
CILK_ASSERT_NULL(t->spawn_parent);
CILK_ASSERT((t->frame->flags & CILK_FRAME_DETACHED) == 0);
Closure *call_parent = t->call_parent;
Closure *t1 = deque_xtract_bottom(deques, self, self);
USE_UNUSED(t1);
CILK_ASSERT_POINTER_EQUAL(t, t1);
CILK_ASSERT(__cilkrts_stolen(t->frame));
deque_add_bottom(deques, call_parent, self, self);
t->frame = NULL;
Closure_unlock(self, t);
Closure_lock(self, call_parent);
CILK_ASSERT_POINTER_EQUAL(call_parent->fiber, t->fiber);
t->fiber = NULL;
if (USE_EXTENSION) {
CILK_ASSERT_POINTER_EQUAL(call_parent->ext_fiber, t->ext_fiber);
t->ext_fiber = NULL;
}
Closure_remove_callee(call_parent);
setup_call_parent_resumption(deques, w, self, call_parent);
Closure_unlock(self, call_parent);
deque_unlock_self(deques, self);
Closure_destroy(w, t);
}
static Closure *provably_good_steal_maybe(__cilkrts_worker *const w,
worker_id self, Closure *parent) {
Closure_assert_ownership(self, parent);
local_state *l = w->l;
// cilkrts_alert(STEAL, "(provably_good_steal_maybe) cl %p",
// (void *)parent);
CILK_ASSERT(!l->provably_good_steal);
if (!Closure_has_children(parent) && parent->status == CLOSURE_SUSPENDED) {
// cilkrts_alert(STEAL | ALERT_SYNC,
// "(provably_good_steal_maybe) completing a sync");
CILK_ASSERT(parent->frame != NULL);
/* do a provably-good steal; this is *really* simple */
l->provably_good_steal = true;
setup_for_sync(w, self, parent);
CILK_ASSERT(parent->owner_ready_deque == NO_WORKER);
Closure_make_ready(parent);
cilkrts_alert(STEAL | ALERT_SYNC,
"(provably_good_steal_maybe) returned %p",
(void *)parent);
return parent;
}
return NULL;
}
/***
* Return protocol for a spawned child.
*
* Some notes on reducer implementation (which was taken out):
*
* If any reducer is accessed by the child closure, we need to reduce the
* reducer view with the child's right_ht, and its left sibling's
* right_ht (or parent's child_ht if it's the left most child)
* before we unlink the child from its sibling closure list.
*
* When we modify the sibling links (left_sib / right_sib), we always lock
* the parent and the child. When we retrieve the reducer maps from left
* sibling or parent from their place holders (right_ht / child_ht),
* we always lock the closure from whom we are getting the maps from.
* The locking order is always parent first then child, right child first,
* then left.
*
* Once we have done the reduce operation, we try to deposit reducers
* from the child to either its left sibling's right_ht or parent's
* child_ht. Note that even though we have performed the reduce, by
* the time we deposit the views, the child's left sibling may have
* changed, or child may become the new left most child. Similarly,
* the child's right_ht may have something new again. If that's the
* case, we need to do the reduce again.
*
* This function returns a closure to be executed next, or NULL if none.
* The child must not be locked by ourselves, and be in no deque.
***/
static Closure *Closure_return(__cilkrts_worker *const w, worker_id self,
Closure *child) {
Closure *res = (Closure *)NULL;
Closure *const parent = child->spawn_parent;
CILK_ASSERT(child);
CILK_ASSERT(child->join_counter == 0);
CILK_ASSERT(child->status == CLOSURE_RETURNING);
CILK_ASSERT(child->owner_ready_deque == NO_WORKER);
Closure_assert_alienation(self, child);
CILK_ASSERT(child->has_cilk_callee == 0);
CILK_ASSERT_NULL(child->call_parent);
CILK_ASSERT(parent != NULL);
cilkrts_alert(RETURN, "(Closure_return) child %p, parent %p",
(void *)child, (void *)parent);
/* The frame should have passed a sync successfully meaning it
has not accumulated any maps from its children and the
active map is in the worker rather than the closure. */
CILK_ASSERT(!child->child_ht && !child->user_ht);
/* If in the future the worker's map is not created lazily,
assert it is not null here. */
/* need a loop as multiple siblings can return while we
are performing reductions */
// always lock from top to bottom
Closure_lock(self, parent);
Closure_lock(self, child);
// Deal with reducers.
// Get the current active hypermap.
hyper_table *active_ht = w->hyper_table;
w->hyper_table = NULL;
while (true) {
// invariant: a closure cannot unlink itself w/out lock on parent
// so what this points to cannot change while we have lock on parent
hyper_table *rht = child->right_ht;
child->right_ht = NULL;
// Get the "left" hypermap, which either belongs to a left sibling, if
// it exists, or the parent, otherwise.
hyper_table **lht_ptr;
Closure *const left_sib = child->left_sib;
if (left_sib != NULL) {
lht_ptr = &left_sib->right_ht;
} else {
lht_ptr = &parent->child_ht;
}
hyper_table *lht = *lht_ptr;
*lht_ptr = NULL;
// If we have no hypermaps on either the left or right, deposit the
// active hypermap and break from the loop.
if (lht == NULL && rht == NULL) {
/* deposit views */
*lht_ptr = active_ht;
break;
}
Closure_unlock(self, child);
Closure_unlock(self, parent);
// merge reducers
if (lht) {
active_ht = merge_two_hts(lht, active_ht);
}
if (rht) {
active_ht = merge_two_hts(active_ht, rht);
}
Closure_lock(self, parent);
Closure_lock(self, child);
}
/* The returning closure and its parent are locked. */
// Execute left-holder logic for stacks.
if (child->left_sib || parent->fiber_child) {
// Case where we are not the leftmost stack.
CILK_ASSERT(parent->fiber_child != child->fiber);
cilk_fiber_deallocate_to_pool(w, child->fiber);
if (USE_EXTENSION && child->ext_fiber) {
cilk_fiber_deallocate_to_pool(w, child->ext_fiber);
}
} else {
// We are leftmost, pass stack/fiber up to parent.
// Thus, no stack/fiber to free.
CILK_ASSERT_POINTER_EQUAL(
parent->frame, child->fiber->current_stack_frame);
parent->fiber_child = child->fiber;
if (USE_EXTENSION) {
parent->ext_fiber_child = child->ext_fiber;
}
}
child->fiber = NULL;
child->ext_fiber = NULL;
// Propagate whether the parent needs to handle an exception. We could
// check the hypermap for an exception reducer, but using a separate boolean
// avoids the expense of a table lookup.
if (child->exception_pending) {
parent->exception_pending = true;
parent->frame->flags |= CILK_FRAME_EXCEPTION_PENDING;
}
Closure_remove_child(self, parent, child); // unlink child from tree
// we have deposited our views and unlinked; we can quit now
// invariant: we can only decide to quit when we see no more maps
// from the right, we have deposited our own views, and unlink from
// the tree. All these are done while holding lock on the parent.
// Before, another worker could deposit more views into our
// right_ht slot after we decide to quit, but now this cannot
// occur as the worker depositing the views to our right_ht also
// must hold lock on the parent to do so.
Closure_unlock(self, child);
/* Closure_unlock(parent);*/
Closure_destroy(w, child);
/* Closure_lock(parent);*/
CILK_ASSERT(parent->status != CLOSURE_RETURNING);
CILK_ASSERT(parent->frame != NULL);
// CILK_ASSERT(parent->frame->magic == CILK_STACKFRAME_MAGIC);
CILK_ASSERT(parent->join_counter);
--parent->join_counter;
res = provably_good_steal_maybe(w, self, parent);
if (res) {
hyper_table *child_ht = parent->child_ht;
hyper_table *active_ht = parent->user_ht;
parent->child_ht = NULL;
parent->user_ht = NULL;
w->hyper_table = merge_two_hts(child_ht, active_ht);
setup_for_execution(w, res);
}
Closure_unlock(self, parent);
return res;
}
/*
* ANGE: t is returning; call the return protocol; see comments above
* Closure_return. res is either the next closure to execute
* (provably-good-steal the parent closure), or NULL if nothing should be
* executed next.
*
* Only called from do_what_it_says when the closure->status =
* CLOSURE_RETURNING
*/
static Closure *return_value(__cilkrts_worker *const w, worker_id self,
Closure *t) {
cilkrts_alert(RETURN, "(return_value) closure %p", (void *)t);
Closure *res = NULL;
CILK_ASSERT(t->status == CLOSURE_RETURNING);
CILK_ASSERT_NULL(t->call_parent);
if (t->call_parent == NULL) {
res = Closure_return(w, self, t);
} /* else {
// ANGE: the ONLY way a closure with call parent can reach here
// is when the user program calls Cilk_exit, leading to global abort
// Not supported at the moment
} */
cilkrts_alert(RETURN, "(return_value) returning closure %p", (void *)t);
return res;
}
/*
* This is called from the user code (in cilk2c_inlined) if E >= T. Two
* possibilities:
* 1. Someone stole the last frame from this worker, hence E >= T when child
* returns.
* 2. Someone invokes signal_immediate_exception with the closure currently
* running on the worker's deque. This is only possible with abort.
*/
void Cilk_exception_handler(__cilkrts_worker *w, char *exn) {
Closure *t;
worker_id self = w->self;
ReadyDeque *deques = w->g->deques;
deque_lock_self(deques, self);
t = deque_peek_bottom(deques, self, self);
CILK_ASSERT(t);
Closure_lock(self, t);
cilkrts_alert(EXCEPT, "(Cilk_exception_handler) closure %p!", (void *)t);
/* Reset the E pointer. */
reset_exception_pointer(w, self, t);
CILK_ASSERT(t->status == CLOSURE_RUNNING ||
// for during abort process
t->status == CLOSURE_RETURNING);
/* These will not change while the deque is locked. */
__cilkrts_stack_frame **head =
atomic_load_explicit(&w->head, memory_order_relaxed);
__cilkrts_stack_frame **tail =
atomic_load_explicit(&w->tail, memory_order_relaxed);
if (head > tail) {
cilkrts_alert(EXCEPT, "(Cilk_exception_handler) this is a steal!");
if (NULL != exn) {
// The spawned child is throwing an exception. Save that exception
// object for later processing.
struct closure_exception *exn_r =
(struct closure_exception *)internal_reducer_lookup(
w, &exception_reducer, sizeof(exception_reducer),
init_exception_reducer, reduce_exception_reducer);
exn_r->exn = exn;
t->exception_pending = true;
}
if (t->status == CLOSURE_RUNNING) {
CILK_ASSERT(Closure_has_children(t) == 0);
Closure_set_status(t, CLOSURE_RETURNING);
}
w->l->returning = true;
Closure_unlock(self, t);
longjmp_to_runtime(w); // NOT returning back to user code
} else { // not steal, not abort; false alarm
Closure_unlock(self, t);
deque_unlock_self(deques, self);
return;
}
}
// ==============================================
// Steal related functions
// ==============================================
static inline bool trivial_stacklet(const __cilkrts_stack_frame *head) {
assert(head);
bool is_trivial = (head->flags & CILK_FRAME_DETACHED);
return is_trivial;
}
/*
* This return the oldest frame in stacklet that has not been promoted to
* full frame (i.e., never been stolen), or the closest detached frame
* if nothing in this stacklet has been promoted.
*/
static inline __cilkrts_stack_frame *
oldest_non_stolen_frame_in_stacklet(__cilkrts_stack_frame *head) {
__cilkrts_stack_frame *cur = head;
while (cur && (cur->flags & CILK_FRAME_DETACHED) == 0 && cur->call_parent &&
__cilkrts_not_stolen(cur->call_parent)) {
cur = cur->call_parent;
}
return cur;
}
static Closure *setup_call_parent_closure_helper(
__cilkrts_worker *const w, __cilkrts_worker *const victim_w,
__cilkrts_stack_frame *frame, void *extension, Closure *oldest) {
Closure *call_parent, *curr_cl;
if (oldest->frame == frame) {
CILK_ASSERT(__cilkrts_stolen(oldest->frame));
CILK_ASSERT(oldest->fiber);
return oldest;
}
call_parent = setup_call_parent_closure_helper(
w, victim_w, frame->call_parent, extension, oldest);
__cilkrts_set_stolen(frame);
curr_cl = Closure_create(w, frame);
CILK_ASSERT(call_parent->fiber);
Closure_set_status(curr_cl, CLOSURE_SUSPENDED);
curr_cl->fiber = call_parent->fiber;
if (USE_EXTENSION) {
curr_cl->frame->extension = extension;
curr_cl->ext_fiber = call_parent->ext_fiber;
}
Closure_add_callee(call_parent, curr_cl);
return curr_cl;
}
/***
* ANGE: youngest_cl is the spawning parent that the thief is trying to
* extract and resume. Temporarily its call_parent is pointing to the
* oldest closure on top of victim's deque when the steal occurs.
* There may be more frames between them (i.e., stacklet contains more
* than two frames) that require promotion. This function promotes
* and suspends them.
***/
static void setup_closures_in_stacklet(__cilkrts_worker *const w,
__cilkrts_worker *const victim_w,
Closure *youngest_cl) {
Closure *call_parent;
Closure *oldest_cl = youngest_cl->call_parent;
__cilkrts_stack_frame *youngest, *oldest;
youngest = youngest_cl->frame;
void *extension = USE_EXTENSION ? youngest->extension : NULL;
oldest = oldest_non_stolen_frame_in_stacklet(youngest);
CILK_ASSERT_POINTER_EQUAL(youngest, youngest_cl->frame);
CILK_ASSERT(__cilkrts_stolen(youngest));
CILK_ASSERT((oldest_cl->frame == NULL && oldest != youngest) ||
(oldest_cl->frame == oldest->call_parent &&
__cilkrts_stolen(oldest_cl->frame)));
if (oldest_cl->frame == NULL) {
CILK_ASSERT(__cilkrts_not_stolen(oldest));
CILK_ASSERT(oldest->flags & CILK_FRAME_DETACHED);
__cilkrts_set_stolen(oldest);
oldest_cl->frame = oldest;
if (USE_EXTENSION) {
oldest_cl->frame->extension = extension;
}
}
call_parent = setup_call_parent_closure_helper(
w, victim_w, youngest->call_parent, extension, oldest_cl);
CILK_ASSERT(youngest_cl->fiber != oldest_cl->fiber);
Closure_add_callee(call_parent, youngest_cl);
}
/*
* Do the thief part of Dekker's protocol. Return the head pointer upon
* success, NULL otherwise. The protocol fails when the victim already popped T
* so that E=T.
*/
static __cilkrts_stack_frame **do_dekker_on(worker_id self,
__cilkrts_worker *const victim_w,
Closure *cl) {
Closure_assert_ownership(self, cl);
increment_exception_pointer(self, victim_w, cl);
/* Force a global order between the increment of exc above and any
decrement of tail by the victim. __cilkrts_leave_frame must also
have a SEQ_CST fence or atomic. Additionally the increment of
tail in compiled code has release semantics and needs to be paired
with an acquire load unless there is an intervening fence. */
atomic_thread_fence(memory_order_seq_cst);
/*
* The thief won't steal from this victim if there is only one frame on cl's
* stack
*/
__cilkrts_stack_frame **head =
atomic_load_explicit(&victim_w->head, memory_order_relaxed);
__cilkrts_stack_frame **tail =
atomic_load_explicit(&victim_w->tail, memory_order_acquire);
if (head >= tail) {
decrement_exception_pointer(self, victim_w, cl);
return NULL;
}
return head;
}
/***
* Promote the child frame of parent to a full closure.
* Detach the parent and return it.
*
* Assumptions: the parent is running on victim, and we own
* the locks of both parent and deque[victim].
* The child keeps running on the same cache of the parent.
* The parent's join counter is incremented.
*
* In order to promote a child frame to a closure,
* the parent's frame must be the last in its ready queue.
*
* Returns the child.
*
* ANGE: I don't think this function actually detach the parent. Someone
* calling this function has to do deque_xtract_top on the victim's
* deque to get the parent closure. This is the only time I can
* think of, where the ready deque contains more than one frame.
***/
static Closure *promote_child(__cilkrts_stack_frame **head, ReadyDeque *deques,
__cilkrts_worker *const w,
__cilkrts_worker *const victim_w, Closure *cl,
Closure **res, worker_id self, worker_id pn) {
deque_assert_ownership(deques, self, pn);
Closure_assert_ownership(self, cl);
CILK_ASSERT(cl->status == CLOSURE_RUNNING);
CILK_ASSERT(cl->owner_ready_deque == pn);
CILK_ASSERT_NULL(cl->next_ready);
/* cl may have a call parent: it might be promoted as its containing
* stacklet is stolen, and it's call parent is promoted into full and
* suspended
*/
CILK_ASSERT(cl == w->g->root_closure || cl->spawn_parent ||
cl->call_parent);
Closure *spawn_parent = NULL;
__cilkrts_stack_frame *frame_to_steal = *head;
// ANGE: This must be true if we get this far.
// Note that it can be that H == T here; victim could have done T-- after
// the thief passes Dekker, in which case, thief gets the last frame, and H
// == T. Victim won't be able to proceed further until the thief finishes
// stealing, releasing the deque lock; at which point, the victim will
// realize that it should return back to runtime.
//
// These assertions are commented out because they can impact performance
// noticeably by introducing contention.
/* CILK_ASSERT(head <= victim_w->exc); */
/* CILK_ASSERT(head <= victim_w->tail); */
CILK_ASSERT(frame_to_steal != NULL);
// ANGE: if cl's frame is set AND equal to the frame at *HEAD, cl must be
// either the root frame or have been stolen before. On the other hand, if
// cl's frame is not set, the top stacklet may contain one frame (the
// detached spawn helper resulted from spawning an expression) or more than
// one frame, where the right-most (oldest) frame is a spawn helper that
// called a Cilk function (regular cilk_spawn of function).
if (cl->frame == frame_to_steal) { // stolen before
CILK_ASSERT(__cilkrts_stolen(frame_to_steal));
spawn_parent = cl;
} else if (trivial_stacklet(frame_to_steal)) { // spawning expression
CILK_ASSERT(__cilkrts_not_stolen(frame_to_steal));
CILK_ASSERT(frame_to_steal->call_parent &&
__cilkrts_stolen(frame_to_steal->call_parent));
CILK_ASSERT((frame_to_steal->flags & CILK_FRAME_LAST) == 0);
__cilkrts_set_stolen(frame_to_steal);
Closure_set_frame(cl, frame_to_steal);
spawn_parent = cl;
} else { // spawning a function and stacklet never gotten stolen before
// cl->frame could either be NULL or some older frame (e.g.,
// cl->frame was stolen and resumed, it calls another frame which
// spawned, and the spawned frame is the frame_to_steal now). ANGE:
// if this is the case, we must create a new Closure representing
// the left-most frame (the one to be stolen and resume).
spawn_parent = Closure_create(w, frame_to_steal);
__cilkrts_set_stolen(frame_to_steal);
Closure_set_status(spawn_parent, CLOSURE_RUNNING);
// At this point, spawn_parent is a new Closure formally associated with
// the stolen frame, frame_to_steal, meaning that spawn_parent->frame is
// set to point to frame_to_steal. The remainder of this function will
// insert spawn_parent as a callee of cl and create a new Closure,
// spawn_child, for the spawned child computation. The spawn_child
// Closure is nominally associated with the child of the stolen frame,
// but the Closure's frame pointer is not set.
//
// Pictorially, this code path organizes the Closures and stack frames
// as follows, where cl->frame may or may not be set to point to a stack
// frame, as denoted by the dashed arrow.
//
// *Closures* *Stack frames*
// +----+
// | cl | - - - - - - > (called frame or spawn helper)
// +----+ ^
// ^ ... |
// | (zero or more called frames)
// v ... ^
// +--------------+ |
// | spawn_parent | --> (frame_to_steal)
// +--------------+ ^
// ^ |
// | |
// v |
// +-------------+ |
// | spawn_child | (spawn helper)
// +-------------+
//
// There is no need to promote any of the called frames between the
// frame_to_steal and the frame (nominally) associated with cl. All of
// those frames are called frames. When frame_to_steal returns,
// spawn_parent will be popped, returning the closure tree to a familiar
// state: cl will be (nominally) associated with a frame that has called
// frames below it, and a worker will be working on the bottommost of
// those called frames.
Closure_add_callee(cl, spawn_parent);
spawn_parent->call_parent = cl;
// suspend cl & remove it from deque
Closure_suspend_victim(deques, self, pn, cl);
Closure_unlock(self, cl);
Closure_lock(self, spawn_parent);
*res = spawn_parent;
}
if (spawn_parent->orig_rsp == NULL) {
spawn_parent->orig_rsp = SP(frame_to_steal);
}
CILK_ASSERT(spawn_parent->has_cilk_callee == 0);
// ANGE: we set this frame lazily
Closure *spawn_child = Closure_create(w, NULL);
spawn_child->spawn_parent = spawn_parent;
Closure_set_status(spawn_child, CLOSURE_RUNNING);
/***
* Register this child, which sets up its sibling links.
* We do this here instead of in finish_promote, because we must setup
* the sib links for the new child before its pointer escapses.
***/
Closure_add_child(self, spawn_parent, spawn_child);
++spawn_parent->join_counter;
atomic_store_explicit(&victim_w->head, head + 1, memory_order_release);
/* insert the closure on the victim processor's deque */
deque_add_bottom(deques, spawn_child, self, pn);
/* at this point the child can be freely executed */
return spawn_child;
}
/***
* Finishes the promotion process. The child is already fully promoted
* and requires no more work (we only use the given pointer to identify
* the child). This function does some more work on the parent to make
* the promotion complete.
*
* ANGE: This includes promoting everything along the stolen stacklet
* into full closures.
***/
static void finish_promote(__cilkrts_worker *const w, worker_id self,
__cilkrts_worker *const victim_w, Closure *parent,
bool has_frames_to_promote) {
Closure_assert_ownership(self, parent);
CILK_ASSERT(parent->has_cilk_callee == 0);
CILK_ASSERT(__cilkrts_stolen(parent->frame));
// ANGE: if there are more frames to promote, the youngest frame that we
// are stealing (i.e., parent) has been promoted and its closure call_parent
// has been set to the closure of the oldest frame in the stacklet
// temporarily, with multiple shadow frames in between that still need
// their own closure. Set those up.
if (has_frames_to_promote) {
setup_closures_in_stacklet(w, victim_w, parent);
}
__cilkrts_set_unsynced(parent->frame);
/* Make the parent ready */
Closure_make_ready(parent);
return;
}
/***
* ANGE: This function promotes all frames in the top-most stacklet into
* its own closures and also creates a new child closure to leave it with
* the victim. Normally this is invoked by Closure_steal, but a worker
* may also invoke Closure steal on itself (for the purpose of race detecting
* Cilk code with reducers). Thus, this function is written to check for
* that --- if w == victim_w, we don't actually create a new fiber for
* the stolen parent.
*
* NOTE: this function assumes that w holds the lock on victim_w's deque
* and Closure cl and releases them before returning.
***/
static Closure *extract_top_spawning_closure(__cilkrts_stack_frame **head,
ReadyDeque *deques,
__cilkrts_worker *const w,
__cilkrts_worker *const victim_w,
Closure *cl, worker_id self,
worker_id victim_id) {
Closure *res = NULL, *child;
struct cilk_fiber *parent_fiber = cl->fiber;
struct cilk_fiber *parent_ext_fiber = cl->ext_fiber;
deque_assert_ownership(deques, self, victim_id);
Closure_assert_ownership(self, cl);
CILK_ASSERT(parent_fiber);
/*
* if dekker passes, promote the child to a full closure,
* and steal the parent
*/
child = promote_child(head, deques, w, victim_w, cl, &res, self, victim_id);
cilkrts_alert(STEAL,
"(Closure_steal) promote gave cl/res/child = %p/%p/%p",
(void *)cl, (void *)res, (void *)child);
/* detach the parent */
if (res == (Closure *)NULL) {
// ANGE: in this case, the spawning parent to steal / resume
// is simply cl (i.e., there is only one frame in the stacklet),
// so we didn't set res in promote_child.
res = deque_xtract_top(deques, self, victim_id);
CILK_ASSERT_POINTER_EQUAL(cl, res);
}
res->fiber = cilk_fiber_allocate_from_pool(w);
if (USE_EXTENSION) {
res->ext_fiber = cilk_fiber_allocate_from_pool(w);
}
// make sure we are not holding the lock on child
Closure_assert_alienation(self, child);
child->fiber = parent_fiber;
if (USE_EXTENSION) {
child->ext_fiber = parent_ext_fiber;
}
return res;
}
/*
* stealing protocol. Tries to steal from the victim; returns a
* stolen closure, or NULL if none.
*/
static Closure *Closure_steal(__cilkrts_worker **workers,
ReadyDeque *deques,
__cilkrts_worker *const w,
worker_id self, worker_id victim) {
Closure *cl;
Closure *res = (Closure *)NULL;
__cilkrts_worker *victim_w;
victim_w = workers[victim];