Skip to content

Commit

Permalink
VM barrier needs to store GC root
Browse files Browse the repository at this point in the history
On the VM barrier waiting, it needs to store machine context
as a GC root.

Also it needs to wait for barrier synchronization correctly
by `while` (for continuous barrier sync by another ractor).

This is why GC marking misses are occerred on ARM machines.
like: https://rubyci.s3.amazonaws.com/fedora40-arm/ruby-master/log/20240702T063002Z.fail.html.gz
  • Loading branch information
ko1 committed Jul 4, 2024
1 parent 478ada0 commit 4949f89
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 14 deletions.
3 changes: 2 additions & 1 deletion thread_pthread.c
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ thread_sched_setup_running_threads(struct rb_thread_sched *sched, rb_ractor_t *c
}

if (add_th) {
if (UNLIKELY(vm->ractor.sched.barrier_waiting)) {
while (UNLIKELY(vm->ractor.sched.barrier_waiting)) {
RUBY_DEBUG_LOG("barrier-wait");

ractor_sched_barrier_join_signal_locked(vm);
Expand All @@ -605,6 +605,7 @@ thread_sched_setup_running_threads(struct rb_thread_sched *sched, rb_ractor_t *c
ccan_list_add(&vm->ractor.sched.running_threads, &add_th->sched.node.running_threads);
vm->ractor.sched.running_cnt++;
sched->is_running = true;
VM_ASSERT(!vm->ractor.sched.barrier_waiting);
}

if (add_timeslice_th) {
Expand Down
40 changes: 27 additions & 13 deletions vm_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,26 @@ rb_vm_locked_p(void)
return vm_locked(GET_VM());
}

static bool
vm_need_barrier_waiting(const rb_vm_t *vm)
{
#ifdef RUBY_THREAD_PTHREAD_H
return vm->ractor.sched.barrier_waiting;
#else
return vm->ractor.sync.barrier_waiting;
#endif
}

static bool
vm_need_barrier(bool no_barrier, const rb_ractor_t *cr, const rb_vm_t *vm)
{
#ifdef RUBY_THREAD_PTHREAD_H
return !no_barrier && cr->threads.sched.running != NULL && vm_need_barrier_waiting(vm); // ractor has running threads.
#else
return !no_barrier && vm_need_barrier_waiting(vm);
#endif
}

static void
vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsigned int *lev APPEND_LOCATION_ARGS)
{
Expand All @@ -58,23 +78,17 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign
VM_ASSERT(vm->ractor.sync.lock_owner == NULL);
VM_ASSERT(vm->ractor.sync.lock_rec == 0);

#ifdef RUBY_THREAD_PTHREAD_H
if (!no_barrier &&
cr->threads.sched.running != NULL // ractor has running threads.
) {
// barrier
if (vm_need_barrier(no_barrier, cr, vm)) {
rb_execution_context_t *ec = GET_EC();
RB_VM_SAVE_MACHINE_CONTEXT(rb_ec_thread_ptr(ec));

while (vm->ractor.sched.barrier_waiting) {
do {
VM_ASSERT(vm_need_barrier_waiting(vm));
RUBY_DEBUG_LOG("barrier serial:%u", vm->ractor.sched.barrier_serial);
rb_ractor_sched_barrier_join(vm, cr);
}
} while (vm_need_barrier_waiting(vm));
}
#else
if (!no_barrier) {
while (vm->ractor.sync.barrier_waiting) {
rb_ractor_sched_barrier_join(vm, cr);
}
}
#endif

VM_ASSERT(vm->ractor.sync.lock_rec == 0);
VM_ASSERT(vm->ractor.sync.lock_owner == NULL);
Expand Down

0 comments on commit 4949f89

Please sign in to comment.