diff --git a/bootstraptest/test_thread.rb b/bootstraptest/test_thread.rb index 0c163d0db02dca..18b4fcd2e99263 100644 --- a/bootstraptest/test_thread.rb +++ b/bootstraptest/test_thread.rb @@ -242,6 +242,20 @@ end } +assert_equal 'true', %{ + Thread.new{}.join + begin + Process.waitpid2 fork{ + Thread.new{ + sleep 0.1 + }.join + } + true + rescue NotImplementedError + true + end +} + assert_equal 'ok', %{ open("zzz_t1.rb", "w") do |f| f.puts <<-END diff --git a/common.mk b/common.mk index b8ae911ef2129e..ab151d0f8c30ee 100644 --- a/common.mk +++ b/common.mk @@ -15323,6 +15323,7 @@ ruby.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h ruby.$(OBJEXT): $(top_srcdir)/internal/serial.h ruby.$(OBJEXT): $(top_srcdir)/internal/static_assert.h ruby.$(OBJEXT): $(top_srcdir)/internal/string.h +ruby.$(OBJEXT): $(top_srcdir)/internal/thread.h ruby.$(OBJEXT): $(top_srcdir)/internal/variable.h ruby.$(OBJEXT): $(top_srcdir)/internal/vm.h ruby.$(OBJEXT): $(top_srcdir)/internal/warnings.h @@ -17536,6 +17537,7 @@ thread.$(OBJEXT): $(top_srcdir)/internal/time.h thread.$(OBJEXT): $(top_srcdir)/internal/variable.h thread.$(OBJEXT): $(top_srcdir)/internal/vm.h thread.$(OBJEXT): $(top_srcdir)/internal/warnings.h +thread.$(OBJEXT): {$(VPATH)}$(COROUTINE_H) thread.$(OBJEXT): {$(VPATH)}assert.h thread.$(OBJEXT): {$(VPATH)}atomic.h thread.$(OBJEXT): {$(VPATH)}backward/2/assume.h @@ -17730,6 +17732,7 @@ thread.$(OBJEXT): {$(VPATH)}thread.h thread.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).c thread.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h thread.$(OBJEXT): {$(VPATH)}thread_native.h +thread.$(OBJEXT): {$(VPATH)}thread_pthread_mn.c thread.$(OBJEXT): {$(VPATH)}thread_sync.c thread.$(OBJEXT): {$(VPATH)}thread_sync.rbinc thread.$(OBJEXT): {$(VPATH)}timev.h diff --git a/configure.ac b/configure.ac index fc468389daeffc..03165b79cab869 100644 --- a/configure.ac +++ b/configure.ac @@ -1342,6 +1342,8 @@ AC_CHECK_HEADERS(syscall.h) AC_CHECK_HEADERS(time.h) AC_CHECK_HEADERS(ucontext.h) AC_CHECK_HEADERS(utime.h) +AC_CHECK_HEADERS(sys/epoll.h) + AS_CASE("$target_cpu", [x64|x86_64|i[3-6]86*], [ AC_CHECK_HEADERS(x86intrin.h) ]) diff --git a/debug.c b/debug.c index b65f368fbfc97b..e84e3d602a3eb1 100644 --- a/debug.c +++ b/debug.c @@ -443,6 +443,10 @@ setup_debug_log(void) (ruby_debug_log_mode & ruby_debug_log_memory) ? "[mem]" : "", (ruby_debug_log_mode & ruby_debug_log_stderr) ? "[stderr]" : "", (ruby_debug_log_mode & ruby_debug_log_file) ? "[file]" : ""); + if (debug_log.output_file[0]) { + fprintf(stderr, "RUBY_DEBUG_LOG filename=%s\n", debug_log.output_file); + } + rb_nativethread_lock_initialize(&debug_log.lock); setup_debug_log_filter(); @@ -609,10 +613,11 @@ ruby_debug_log(const char *file, int line, const char *func_name, const char *fm // ractor information if (ruby_single_main_ractor == NULL) { rb_ractor_t *cr = th ? th->ractor : NULL; + rb_vm_t *vm = GET_VM(); if (r && len < MAX_DEBUG_LOG_MESSAGE_LEN) { - r = snprintf(buff + len, MAX_DEBUG_LOG_MESSAGE_LEN - len, "\tr:#%d/%u", - cr ? (int)rb_ractor_id(cr) : -1, GET_VM()->ractor.cnt); + r = snprintf(buff + len, MAX_DEBUG_LOG_MESSAGE_LEN - len, "\tr:#%d/%u (%u)", + cr ? (int)rb_ractor_id(cr) : -1, vm->ractor.cnt, vm->ractor.sched.running_cnt); if (r < 0) rb_bug("ruby_debug_log returns %d", r); len += r; diff --git a/dir.c b/dir.c index 9c9f594a78d395..21b1acada3e58f 100644 --- a/dir.c +++ b/dir.c @@ -805,7 +805,7 @@ dir_read(VALUE dir) struct dirent *dp; GetDIR(dir, dirp); - errno = 0; + rb_errno_set(0); if ((dp = READDIR(dirp->dir, dirp->enc)) != NULL) { return rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc); } @@ -1723,7 +1723,7 @@ nogvl_opendir_at(void *ptr) /* fallthrough*/ case 0: if (fd >= 0) close(fd); - errno = e; + rb_errno_set(e); } } #else /* !USE_OPENDIR_AT */ diff --git a/eval.c b/eval.c index 844c537cc41d06..be450a02f5552e 100644 --- a/eval.c +++ b/eval.c @@ -2110,3 +2110,21 @@ Init_eval(void) id_signo = rb_intern_const("signo"); id_status = rb_intern_const("status"); } + +int +rb_errno(void) +{ + return *rb_orig_errno_ptr(); +} + +void +rb_errno_set(int e) +{ + *rb_orig_errno_ptr() = e; +} + +int * +rb_errno_ptr(void) +{ + return rb_orig_errno_ptr(); +} diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h index eb9a7e4d0f88e9..0c1e1c2a6e3a34 100644 --- a/include/ruby/ruby.h +++ b/include/ruby/ruby.h @@ -270,6 +270,24 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 0) */ int ruby_vsnprintf(char *str, size_t n, char const *fmt, va_list ap); +// TODO: doc + +#include + +int rb_errno(void); +void rb_errno_set(int); +int *rb_errno_ptr(void); + +static inline int * +rb_orig_errno_ptr(void) +{ + return &errno; +} + +#define rb_orig_errno errno +#undef errno +#define errno (*rb_errno_ptr()) + /** @cond INTERNAL_MACRO */ #if RBIMPL_HAS_WARNING("-Wgnu-zero-variadic-macro-arguments") # /* Skip it; clang -pedantic doesn't like the following */ diff --git a/internal/thread.h b/internal/thread.h index c41a16c12d7697..647d1c40c5260b 100644 --- a/internal/thread.h +++ b/internal/thread.h @@ -50,6 +50,7 @@ void rb_mutex_allow_trap(VALUE self, int val); VALUE rb_uninterruptible(VALUE (*b_proc)(VALUE), VALUE data); VALUE rb_mutex_owned_p(VALUE self); VALUE rb_exec_recursive_outer_mid(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE h, ID mid); +void ruby_mn_threads_params(void); int rb_thread_wait_for_single_fd(int fd, int events, struct timeval * timeout); diff --git a/process.c b/process.c index 85f1fca467a8e5..c215c1f70d5b36 100644 --- a/process.c +++ b/process.c @@ -685,10 +685,16 @@ rb_last_status_set(int status, rb_pid_t pid) GET_THREAD()->last_status = rb_process_status_new(pid, status, 0); } +static void +last_status_clear(rb_thread_t *th) +{ + th->last_status = Qnil; +} + void rb_last_status_clear(void) { - GET_THREAD()->last_status = Qnil; + last_status_clear(GET_THREAD()); } static rb_pid_t @@ -1654,26 +1660,13 @@ before_exec(void) before_exec_async_signal_safe(); } -/* This function should be async-signal-safe. Actually it is. */ static void -after_exec_async_signal_safe(void) -{ -} - -static void -after_exec_non_async_signal_safe(void) +after_exec(void) { rb_thread_reset_timer_thread(); rb_thread_start_timer_thread(); } -static void -after_exec(void) -{ - after_exec_async_signal_safe(); - after_exec_non_async_signal_safe(); -} - #if defined HAVE_WORKING_FORK || defined HAVE_DAEMON static void before_fork_ruby(void) @@ -1686,10 +1679,14 @@ after_fork_ruby(rb_pid_t pid) { rb_threadptr_pending_interrupt_clear(GET_THREAD()); if (pid == 0) { + // child clear_pid_cache(); rb_thread_atfork(); } - after_exec(); + else { + // parent + after_exec(); + } } #endif @@ -4210,16 +4207,19 @@ rb_fork_ruby2(struct rb_process_status *status) while (1) { prefork(); - disable_child_handler_before_fork(&old); + before_fork_ruby(); - pid = rb_fork(); - err = errno; - if (status) { - status->pid = pid; - status->error = err; + disable_child_handler_before_fork(&old); + { + pid = rb_fork(); + err = errno; + if (status) { + status->pid = pid; + status->error = err; + } } - after_fork_ruby(pid); disable_child_handler_fork_parent(&old); /* yes, bad name */ + after_fork_ruby(pid); if (pid >= 0) { /* fork succeed */ return pid; @@ -4663,11 +4663,16 @@ static VALUE do_spawn_process(VALUE arg) { struct spawn_args *argp = (struct spawn_args *)arg; + rb_execarg_parent_start1(argp->execarg); + return (VALUE)rb_spawn_process(DATA_PTR(argp->execarg), argp->errmsg.ptr, argp->errmsg.buflen); } +NOINLINE(static rb_pid_t + rb_execarg_spawn(VALUE execarg_obj, char *errmsg, size_t errmsg_buflen)); + static rb_pid_t rb_execarg_spawn(VALUE execarg_obj, char *errmsg, size_t errmsg_buflen) { @@ -4676,8 +4681,10 @@ rb_execarg_spawn(VALUE execarg_obj, char *errmsg, size_t errmsg_buflen) args.execarg = execarg_obj; args.errmsg.ptr = errmsg; args.errmsg.buflen = errmsg_buflen; - return (rb_pid_t)rb_ensure(do_spawn_process, (VALUE)&args, - execarg_parent_end, execarg_obj); + + rb_pid_t r = (rb_pid_t)rb_ensure(do_spawn_process, (VALUE)&args, + execarg_parent_end, execarg_obj); + return r; } static rb_pid_t @@ -4820,13 +4827,14 @@ rb_spawn(int argc, const VALUE *argv) static VALUE rb_f_system(int argc, VALUE *argv, VALUE _) { + rb_thread_t *th = GET_THREAD(); VALUE execarg_obj = rb_execarg_new(argc, argv, TRUE, TRUE); struct rb_execarg *eargp = rb_execarg_get(execarg_obj); struct rb_process_status status = {0}; eargp->status = &status; - rb_last_status_clear(); + last_status_clear(th); // This function can set the thread's last status. // May be different from waitpid_state.pid on exec failure. @@ -4834,12 +4842,10 @@ rb_f_system(int argc, VALUE *argv, VALUE _) if (pid > 0) { VALUE status = rb_process_status_wait(pid, 0); - struct rb_process_status *data = rb_check_typeddata(status, &rb_process_status_type); - // Set the last status: rb_obj_freeze(status); - GET_THREAD()->last_status = status; + th->last_status = status; if (data->status == EXIT_SUCCESS) { return Qtrue; diff --git a/ractor.c b/ractor.c index e0919ac56b1804..b4787e7a01d5ee 100644 --- a/ractor.c +++ b/ractor.c @@ -112,18 +112,16 @@ ractor_unlock_self(rb_ractor_t *cr, const char *file, int line) #define RACTOR_LOCK_SELF(r) ractor_lock_self(r, __FILE__, __LINE__) #define RACTOR_UNLOCK_SELF(r) ractor_unlock_self(r, __FILE__, __LINE__) -static void -ractor_cond_wait(rb_ractor_t *r) +void +rb_ractor_lock_self(rb_ractor_t *r) { -#if RACTOR_CHECK_MODE > 0 - VALUE locked_by = r->sync.locked_by; - r->sync.locked_by = Qnil; -#endif - rb_native_cond_wait(&r->sync.cond, &r->sync.lock); + RACTOR_LOCK_SELF(r); +} -#if RACTOR_CHECK_MODE > 0 - r->sync.locked_by = locked_by; -#endif +void +rb_ractor_unlock_self(rb_ractor_t *r) +{ + RACTOR_UNLOCK_SELF(r); } // Ractor status @@ -243,7 +241,9 @@ ractor_free(void *ptr) rb_ractor_t *r = (rb_ractor_t *)ptr; RUBY_DEBUG_LOG("free r:%d", rb_ractor_id(r)); rb_native_mutex_destroy(&r->sync.lock); +#ifdef RUBY_THREAD_WIN32_H rb_native_cond_destroy(&r->sync.cond); +#endif ractor_queue_free(&r->sync.recv_queue); ractor_queue_free(&r->sync.takers_queue); ractor_local_storage_free(r); @@ -531,6 +531,19 @@ ractor_sleeping_by(const rb_ractor_t *r, enum rb_ractor_wait_status wait_status) return (r->sync.wait.status & wait_status) && r->sync.wait.wakeup_status == wakeup_none; } +#ifdef RUBY_THREAD_PTHREAD_H +// thread_*.c +void rb_ractor_sched_wakeup(rb_ractor_t *r); +#else + +static void +rb_ractor_sched_wakeup(rb_ractor_t *r) +{ + rb_native_cond_broadcast(&r->sync.cond); +} +#endif + + static bool ractor_wakeup(rb_ractor_t *r, enum rb_ractor_wait_status wait_status, enum rb_ractor_wakeup_status wakeup_status) { @@ -544,7 +557,7 @@ ractor_wakeup(rb_ractor_t *r, enum rb_ractor_wait_status wait_status, enum rb_ra if (ractor_sleeping_by(r, wait_status)) { r->sync.wait.wakeup_status = wakeup_status; - rb_native_cond_broadcast(&r->sync.cond); + rb_ractor_sched_wakeup(r); return true; } else { @@ -552,6 +565,73 @@ ractor_wakeup(rb_ractor_t *r, enum rb_ractor_wait_status wait_status, enum rb_ra } } +static void +ractor_sleep_interrupt(void *ptr) +{ + rb_ractor_t *r = ptr; + + RACTOR_LOCK(r); + { + ractor_wakeup(r, wait_receiving | wait_taking | wait_yielding, wakeup_by_interrupt); + } + RACTOR_UNLOCK(r); +} + +typedef void (*ractor_sleep_cleanup_function)(rb_ractor_t *cr, void *p); + +static void +ractor_check_ints(rb_execution_context_t *ec, rb_ractor_t *cr, ractor_sleep_cleanup_function cf_func, void *cf_data) +{ + if (cr->sync.wait.status != wait_none) { + enum rb_ractor_wait_status prev_wait_status = cr->sync.wait.status; + cr->sync.wait.status = wait_none; + cr->sync.wait.wakeup_status = wakeup_by_interrupt; + + RACTOR_UNLOCK(cr); + { + if (cf_func) { + int state; + EC_PUSH_TAG(ec); + if ((state = EC_EXEC_TAG()) == TAG_NONE) { + rb_thread_check_ints(); + } + EC_POP_TAG(); + + if (state) { + (*cf_func)(cr, cf_data); + EC_JUMP_TAG(ec, state); + } + } + else { + rb_thread_check_ints(); + } + } + + // reachable? + RACTOR_LOCK(cr); + cr->sync.wait.status = prev_wait_status; + } +} + +#ifdef RUBY_THREAD_PTHREAD_H +void rb_ractor_sched_sleep(rb_execution_context_t *ec, rb_ractor_t *cr, rb_unblock_function_t *ubf); +#else + +// win32 +static void +ractor_cond_wait(rb_ractor_t *r) +{ +#if RACTOR_CHECK_MODE > 0 + VALUE locked_by = r->sync.locked_by; + r->sync.locked_by = Qnil; +#endif + rb_native_cond_wait(&r->sync.cond, &r->sync.lock); + +#if RACTOR_CHECK_MODE > 0 + r->sync.locked_by = locked_by; +#endif +} + static void * ractor_sleep_wo_gvl(void *ptr) { @@ -569,18 +649,17 @@ ractor_sleep_wo_gvl(void *ptr) } static void -ractor_sleep_interrupt(void *ptr) +rb_ractor_sched_sleep(rb_execution_context_t *ec, rb_ractor_t *cr, rb_unblock_function_t *ubf) { - rb_ractor_t *r = ptr; - - RACTOR_LOCK(r); + RACTOR_UNLOCK(cr); { - ractor_wakeup(r, wait_receiving | wait_taking | wait_yielding, wakeup_by_interrupt); + rb_nogvl(ractor_sleep_wo_gvl, cr, + ubf, cr, + RB_NOGVL_UBF_ASYNC_SAFE | RB_NOGVL_INTR_FAIL); } - RACTOR_UNLOCK(r); + RACTOR_LOCK(cr); } - -typedef void (*ractor_sleep_cleanup_function)(rb_ractor_t *cr, void *p); +#endif static enum rb_ractor_wakeup_status ractor_sleep_with_cleanup(rb_execution_context_t *ec, rb_ractor_t *cr, enum rb_ractor_wait_status wait_status, @@ -600,40 +679,12 @@ ractor_sleep_with_cleanup(rb_execution_context_t *ec, rb_ractor_t *cr, enum rb_r RUBY_DEBUG_LOG("sleep by %s", wait_status_str(wait_status)); - RACTOR_UNLOCK(cr); - { - rb_nogvl(ractor_sleep_wo_gvl, cr, - ractor_sleep_interrupt, cr, - RB_NOGVL_UBF_ASYNC_SAFE | RB_NOGVL_INTR_FAIL); + while (cr->sync.wait.wakeup_status == wakeup_none) { + rb_ractor_sched_sleep(ec, cr, ractor_sleep_interrupt); + ractor_check_ints(ec, cr, cf_func, cf_data); } - RACTOR_LOCK(cr); - - // rb_nogvl() can be canceled by interrupts - if (cr->sync.wait.status != wait_none) { - cr->sync.wait.status = wait_none; - cr->sync.wait.wakeup_status = wakeup_by_interrupt; - - RACTOR_UNLOCK(cr); - { - if (cf_func) { - int state; - EC_PUSH_TAG(ec); - if ((state = EC_EXEC_TAG()) == TAG_NONE) { - rb_thread_check_ints(); - } - EC_POP_TAG(); - if (state) { - (*cf_func)(cr, cf_data); - EC_JUMP_TAG(ec, state); - } - } - else { - rb_thread_check_ints(); - } - } - RACTOR_LOCK(cr); // reachable? - } + cr->sync.wait.status = wait_none; // TODO: multi-thread wakeup_status = cr->sync.wait.wakeup_status; @@ -1943,7 +1994,7 @@ rb_ractor_atfork(rb_vm_t *vm, rb_thread_t *th) } #endif -void rb_thread_sched_init(struct rb_thread_sched *); +void rb_thread_sched_init(struct rb_thread_sched *, bool atfork); void rb_ractor_living_threads_init(rb_ractor_t *r) @@ -1959,11 +2010,15 @@ ractor_init(rb_ractor_t *r, VALUE name, VALUE loc) ractor_queue_setup(&r->sync.recv_queue); ractor_queue_setup(&r->sync.takers_queue); rb_native_mutex_initialize(&r->sync.lock); + rb_native_cond_initialize(&r->barrier_wait_cond); + +#ifdef RUBY_THREAD_WIN32_H rb_native_cond_initialize(&r->sync.cond); rb_native_cond_initialize(&r->barrier_wait_cond); +#endif // thread management - rb_thread_sched_init(&r->threads.sched); + rb_thread_sched_init(&r->threads.sched, false); rb_ractor_living_threads_init(r); // naming @@ -2218,6 +2273,8 @@ ractor_check_blocking(rb_ractor_t *cr, unsigned int remained_thread_cnt, const c } } +void rb_threadptr_remove(rb_thread_t *th); + void rb_ractor_living_threads_remove(rb_ractor_t *cr, rb_thread_t *th) { @@ -2225,6 +2282,8 @@ rb_ractor_living_threads_remove(rb_ractor_t *cr, rb_thread_t *th) RUBY_DEBUG_LOG("r->threads.cnt:%d--", cr->threads.cnt); ractor_check_blocking(cr, cr->threads.cnt - 1, __FILE__, __LINE__); + rb_threadptr_remove(th); + if (cr->threads.cnt == 1) { vm_remove_ractor(th->vm, cr); } @@ -2327,6 +2386,9 @@ ractor_terminal_interrupt_all(rb_vm_t *vm) } } +void rb_add_running_thread(rb_thread_t *th); +void rb_del_running_thread(rb_thread_t *th); + void rb_ractor_terminate_all(void) { @@ -2354,7 +2416,9 @@ rb_ractor_terminate_all(void) // wait for 1sec rb_vm_ractor_blocking_cnt_inc(vm, cr, __FILE__, __LINE__); + rb_del_running_thread(rb_ec_thread_ptr(cr->threads.running_ec)); rb_vm_cond_timedwait(vm, &vm->ractor.sync.terminate_cond, 1000 /* ms */); + rb_add_running_thread(rb_ec_thread_ptr(cr->threads.running_ec)); rb_vm_ractor_blocking_cnt_dec(vm, cr, __FILE__, __LINE__); ractor_terminal_interrupt_all(vm); diff --git a/ractor_core.h b/ractor_core.h index 38aded15074bef..36c0e91c7aeef5 100644 --- a/ractor_core.h +++ b/ractor_core.h @@ -103,7 +103,6 @@ struct rb_ractor_sync { #if RACTOR_CHECK_MODE > 0 VALUE locked_by; #endif - rb_nativethread_cond_t cond; bool incoming_port_closed; bool outgoing_port_closed; @@ -120,7 +119,12 @@ struct rb_ractor_sync { struct ractor_wait { enum rb_ractor_wait_status status; enum rb_ractor_wakeup_status wakeup_status; + rb_thread_t *waiting_thread; } wait; + +#ifndef RUBY_THREAD_PTHREAD_H + rb_nativethread_cond_t cond; +#endif }; // created @@ -310,11 +314,13 @@ static inline void rb_ractor_set_current_ec_(rb_ractor_t *cr, rb_execution_context_t *ec, const char *file, int line) { #ifdef RB_THREAD_LOCAL_SPECIFIER + # ifdef __APPLE__ rb_current_ec_set(ec); # else ruby_current_ec = ec; # endif + #else native_tls_set(ruby_current_ec_key, ec); #endif diff --git a/ruby.c b/ruby.c index 9d9ba316cb4d97..1b19d7c9157510 100644 --- a/ruby.c +++ b/ruby.c @@ -53,6 +53,7 @@ #include "internal/loadpath.h" #include "internal/missing.h" #include "internal/object.h" +#include "internal/thread.h" #include "internal/ruby_parser.h" #include "internal/variable.h" #include "ruby/encoding.h" @@ -1431,6 +1432,7 @@ proc_long_options(ruby_cmdline_options_t *opt, const char *s, long argc, char ** else if (strcmp("verbose", s) == 0) { opt->verbose = 1; ruby_verbose = Qtrue; + bp(); } else if (strcmp("jit", s) == 0) { #if !USE_RJIT @@ -2148,6 +2150,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) #endif ruby_gc_set_params(); + ruby_mn_threads_params(); ruby_init_loadpath(); Init_enc(); diff --git a/spec/ruby/core/thread/native_thread_id_spec.rb b/spec/ruby/core/thread/native_thread_id_spec.rb index 8460a1db8cf19f..17a08c8a150730 100644 --- a/spec/ruby/core/thread/native_thread_id_spec.rb +++ b/spec/ruby/core/thread/native_thread_id_spec.rb @@ -19,8 +19,15 @@ main_thread_id = Thread.current.native_thread_id t_thread_id = t.native_thread_id - t_thread_id.should be_kind_of(Integer) + if ruby_version_is "3.3" + # native_thread_id can be nil on a M:N scheduler + t_thread_id.should be_kind_of(Integer) if t_thread_id != nil + else + t_thread_id.should be_kind_of(Integer) + end + main_thread_id.should_not == t_thread_id + t.run t.join t.native_thread_id.should == nil diff --git a/test/ruby/test_thread.rb b/test/ruby/test_thread.rb index bcd65909032ccb..da14c429e67141 100644 --- a/test/ruby/test_thread.rb +++ b/test/ruby/test_thread.rb @@ -1435,7 +1435,8 @@ def test_thread_native_thread_id Thread.pass until th1.stop? # After a thread starts (and execute `sleep`), it returns native_thread_id - assert_instance_of Integer, th1.native_thread_id + native_tid = th1.native_thread_id + assert_instance_of Integer, native_tid if native_tid # it can be nil th1.wakeup Thread.pass while th1.alive? diff --git a/thread.c b/thread.c index 7654fc03a45b48..d59e3d877395cd 100644 --- a/thread.c +++ b/thread.c @@ -147,7 +147,6 @@ static const char *thread_status_name(rb_thread_t *th, int detail); static int hrtime_update_expire(rb_hrtime_t *, const rb_hrtime_t); NORETURN(static void async_bug_fd(const char *mesg, int errno_arg, int fd)); static int consume_communication_pipe(int fd); -static int check_signals_nogvl(rb_thread_t *, int sigwait_fd); static volatile int system_working = 1; @@ -260,12 +259,6 @@ timeout_prepare(rb_hrtime_t **to, rb_hrtime_t *rel, rb_hrtime_t *end, MAYBE_UNUSED(NOINLINE(static int thread_start_func_2(rb_thread_t *th, VALUE *stack_start))); -static void -ubf_sigwait(void *ignore) -{ - rb_thread_wakeup_timer_thread(0); -} - #include THREAD_IMPL_SRC /* @@ -646,20 +639,13 @@ static int thread_start_func_2(rb_thread_t *th, VALUE *stack_start) { STACK_GROW_DIR_DETECTION; - enum ruby_tag_type state; - VALUE errinfo = Qnil; - size_t size = th->vm->default_params.thread_vm_stack_size / sizeof(VALUE); - rb_thread_t *ractor_main_th = th->ractor->threads.main; - VALUE * vm_stack = NULL; - VM_ASSERT(th != th->vm->ractor.main_thread); RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + VM_ASSERT(th != th->vm->ractor.main_thread); - // setup native thread - thread_sched_to_running(TH_SCHED(th), th); - ruby_thread_set_native(th); - - RUBY_DEBUG_LOG("got lock. th:%u", rb_th_serial(th)); + enum ruby_tag_type state; + VALUE errinfo = Qnil; + rb_thread_t *ractor_main_th = th->ractor->threads.main; // setup ractor if (rb_ractor_status_p(th->ractor, ractor_blocking)) { @@ -674,17 +660,6 @@ thread_start_func_2(rb_thread_t *th, VALUE *stack_start) RB_VM_UNLOCK(); } - // This assertion is not passed on win32 env. Check it later. - // VM_ASSERT((size * sizeof(VALUE)) <= th->ec->machine.stack_maxsize); - - // setup VM and machine stack - vm_stack = alloca(size * sizeof(VALUE)); - VM_ASSERT(vm_stack); - - rb_ec_initialize_vm_stack(th->ec, vm_stack, size); - th->ec->machine.stack_start = STACK_DIR_UPPER(vm_stack + size, vm_stack); - th->ec->machine.stack_maxsize -= size * sizeof(VALUE); - // Ensure that we are not joinable. VM_ASSERT(UNDEF_P(th->value)); @@ -990,11 +965,11 @@ rb_thread_create(VALUE (*fn)(void *), void *arg) } VALUE -rb_thread_create_ractor(rb_ractor_t *g, VALUE args, VALUE proc) +rb_thread_create_ractor(rb_ractor_t *r, VALUE args, VALUE proc) { struct thread_create_params params = { .type = thread_invoke_type_ractor_proc, - .g = g, + .g = r, .args = args, .proc = proc, }; @@ -1375,14 +1350,14 @@ sleep_forever(rb_thread_t *th, unsigned int fl) void rb_thread_sleep_forever(void) { - RUBY_DEBUG_LOG(""); + RUBY_DEBUG_LOG("forever"); sleep_forever(GET_THREAD(), SLEEP_SPURIOUS_CHECK); } void rb_thread_sleep_deadly(void) { - RUBY_DEBUG_LOG(""); + RUBY_DEBUG_LOG("deadly"); sleep_forever(GET_THREAD(), SLEEP_DEADLOCKABLE|SLEEP_SPURIOUS_CHECK); } @@ -1394,7 +1369,7 @@ rb_thread_sleep_deadly_allow_spurious_wakeup(VALUE blocker, VALUE timeout, rb_hr rb_fiber_scheduler_block(scheduler, blocker, timeout); } else { - RUBY_DEBUG_LOG(""); + RUBY_DEBUG_LOG("..."); if (end) { sleep_hrtime_until(GET_THREAD(), end, SLEEP_SPURIOUS_CHECK); } @@ -1491,7 +1466,7 @@ blocking_region_begin(rb_thread_t *th, struct rb_blocking_region_buffer *region, th->status = THREAD_STOPPED; rb_ractor_blocking_threads_inc(th->ractor, __FILE__, __LINE__); - RUBY_DEBUG_LOG(""); + RUBY_DEBUG_LOG("thread_id:%p", (void *)th->nt->thread_id); RB_VM_SAVE_MACHINE_CONTEXT(th); thread_sched_to_waiting(TH_SCHED(th), th); @@ -1519,8 +1494,12 @@ blocking_region_end(rb_thread_t *th, struct rb_blocking_region_buffer *region) th->status = region->prev_status; } - RUBY_DEBUG_LOG(""); + RUBY_DEBUG_LOG("end"); + +#ifndef _WIN32 + // GET_THREAD() clears WSAGetLastError() VM_ASSERT(th == GET_THREAD()); +#endif } void * @@ -1544,14 +1523,11 @@ rb_nogvl(void *(*func)(void *), void *data1, if (flags & RB_NOGVL_UBF_ASYNC_SAFE) { vm->ubf_async_safe = 1; } - else { - ubf_th = rb_thread_start_unblock_thread(); - } } BLOCKING_REGION(th, { val = func(data1); - saved_errno = errno; + saved_errno = rb_errno(); }, ubf, data2, flags & RB_NOGVL_INTR_FAIL); if (is_main_thread) vm->ubf_async_safe = 0; @@ -1564,7 +1540,7 @@ rb_nogvl(void *(*func)(void *), void *data1, thread_value(rb_thread_kill(ubf_th)); } - errno = saved_errno; + rb_errno_set(saved_errno); return val; } @@ -1689,11 +1665,31 @@ rb_thread_io_wake_pending_closer(struct waiting_fd *wfd) } } +static int +waitfd_to_waiting_flag(int wfd_event) +{ + return wfd_event << 1; +} + VALUE -rb_thread_io_blocking_region(rb_blocking_function_t *func, void *data1, int fd) +rb_thread_io_blocking_call(rb_blocking_function_t *func, void *data1, int fd, int events) { - volatile VALUE val = Qundef; /* shouldn't be used */ rb_execution_context_t * volatile ec = GET_EC(); + rb_thread_t *th = rb_ec_thread_ptr(ec); + + RUBY_DEBUG_LOG("th:%u fd:%d ev:%d", rb_th_serial(th), fd, events); + +#ifdef RUBY_THREAD_PTHREAD_H + if (events && !th_has_dedicated_nt(th)) { + VM_ASSERT(events == RB_WAITFD_IN || events == RB_WAITFD_OUT); + + // wait readable/writable + thread_sched_wait_events(TH_SCHED(th), th, fd, waitfd_to_waiting_flag(events), NULL); + RUBY_VM_CHECK_INTS_BLOCKING(ec); + } +#endif + + volatile VALUE val = Qundef; /* shouldn't be used */ volatile int saved_errno = 0; enum ruby_tag_type state; @@ -1746,6 +1742,12 @@ rb_thread_io_blocking_region(rb_blocking_function_t *func, void *data1, int fd) return val; } +VALUE +rb_thread_io_blocking_region(rb_blocking_function_t *func, void *data1, int fd) +{ + return rb_thread_io_blocking_call(func, data1, fd, 0); +} + /* * rb_thread_call_with_gvl - re-enter the Ruby world after GVL release. * @@ -2379,15 +2381,12 @@ rb_threadptr_execute_interrupts(rb_thread_t *th, int blocking_timing) /* signal handling */ if (trap_interrupt && (th == th->vm->ractor.main_thread)) { enum rb_thread_status prev_status = th->status; - int sigwait_fd = rb_sigwait_fd_get(th); - if (sigwait_fd >= 0) { - (void)consume_communication_pipe(sigwait_fd); - rb_sigwait_fd_put(th, sigwait_fd); - } th->status = THREAD_RUNNABLE; - while ((sig = rb_get_next_signal()) != 0) { - ret |= rb_signal_exec(th, sig); + { + while ((sig = rb_get_next_signal()) != 0) { + ret |= rb_signal_exec(th, sig); + } } th->status = prev_status; } @@ -2432,7 +2431,7 @@ rb_threadptr_execute_interrupts(rb_thread_t *th, int blocking_timing) limits_us >>= -th->priority; if (th->status == THREAD_RUNNABLE) - th->running_time_us += TIME_QUANTUM_USEC; + th->running_time_us += 10 * 1000; // 10ms = 10_000us // TODO: use macro VM_ASSERT(th->ec->cfp); EXEC_EVENT_HOOK(th->ec, RUBY_INTERNAL_EVENT_SWITCH, th->ec->cfp->self, @@ -3362,7 +3361,7 @@ rb_thread_setname(VALUE thread, VALUE name) name = rb_str_new_frozen(name); } target_th->name = name; - if (threadptr_initialized(target_th)) { + if (threadptr_initialized(target_th) && target_th->has_dedicated_nt) { native_set_another_thread_name(target_th->nt->thread_id, name); } return name; @@ -4148,7 +4147,6 @@ wait_retryable(int *result, int errnum, rb_hrtime_t *rel, rb_hrtime_t end) struct select_set { int max; - int sigwait_fd; rb_thread_t *th; rb_fdset_t *rset; rb_fdset_t *wset; @@ -4164,10 +4162,6 @@ select_set_free(VALUE p) { struct select_set *set = (struct select_set *)p; - if (set->sigwait_fd >= 0) { - rb_sigwait_fd_put(set->th, set->sigwait_fd); - } - rb_fd_term(&set->orig_rset); rb_fd_term(&set->orig_wset); rb_fd_term(&set->orig_eset); @@ -4175,24 +4169,6 @@ select_set_free(VALUE p) return Qfalse; } -static const rb_hrtime_t * -sigwait_timeout(rb_thread_t *th, int sigwait_fd, const rb_hrtime_t *orig, - int *drained_p) -{ - static const rb_hrtime_t quantum = TIME_QUANTUM_USEC * 1000; - - if (sigwait_fd >= 0 && (!ubf_threads_empty() || BUSY_WAIT_SIGNALS)) { - *drained_p = check_signals_nogvl(th, sigwait_fd); - if (!orig || *orig > quantum) - return &quantum; - } - - return orig; -} - -#define sigwait_signals_fd(result, cond, sigwait_fd) \ - (result > 0 && (cond) ? (result--, (sigwait_fd)) : -1) - static VALUE do_select(VALUE p) { @@ -4211,28 +4187,18 @@ do_select(VALUE p) TRUE) do { - int drained; lerrno = 0; BLOCKING_REGION(set->th, { - const rb_hrtime_t *sto; struct timeval tv; - sto = sigwait_timeout(set->th, set->sigwait_fd, to, &drained); if (!RUBY_VM_INTERRUPTED(set->th->ec)) { - result = native_fd_select(set->max, set->rset, set->wset, - set->eset, - rb_hrtime2timeval(&tv, sto), set->th); + result = native_fd_select(set->max, + set->rset, set->wset, set->eset, + rb_hrtime2timeval(&tv, to), set->th); if (result < 0) lerrno = errno; } - }, set->sigwait_fd >= 0 ? ubf_sigwait : ubf_select, set->th, TRUE); - - if (set->sigwait_fd >= 0) { - int fd = sigwait_signals_fd(result, - rb_fd_isset(set->sigwait_fd, set->rset), - set->sigwait_fd); - (void)check_signals_nogvl(set->th, fd); - } + }, ubf_select, set->th, TRUE); RUBY_VM_CHECK_INTS_BLOCKING(set->th->ec); /* may raise */ } while (wait_retryable(&result, lerrno, to, end) && do_select_update()); @@ -4244,18 +4210,6 @@ do_select(VALUE p) return (VALUE)result; } -static rb_fdset_t * -init_set_fd(int fd, rb_fdset_t *fds) -{ - if (fd < 0) { - return 0; - } - rb_fd_init(fds); - rb_fd_set(fd, fds); - - return fds; -} - int rb_thread_fd_select(int max, rb_fdset_t * read, rb_fdset_t * write, rb_fdset_t * except, struct timeval *timeout) @@ -4279,16 +4233,6 @@ rb_thread_fd_select(int max, rb_fdset_t * read, rb_fdset_t * write, rb_fdset_t * return 0; } - set.sigwait_fd = rb_sigwait_fd_get(set.th); - if (set.sigwait_fd >= 0) { - if (set.rset) - rb_fd_set(set.sigwait_fd, set.rset); - else - set.rset = init_set_fd(set.sigwait_fd, &set.orig_rset); - if (set.sigwait_fd >= set.max) { - set.max = set.sigwait_fd + 1; - } - } #define fd_init_copy(f) do { \ if (set.f) { \ rb_fd_resize(set.max - 1, set.f); \ @@ -4325,19 +4269,35 @@ rb_thread_fd_select(int max, rb_fdset_t * read, rb_fdset_t * write, rb_fdset_t * int rb_thread_wait_for_single_fd(int fd, int events, struct timeval *timeout) { - struct pollfd fds[2]; + struct pollfd fds[1]; int result = 0; - int drained; nfds_t nfds; - rb_unblock_function_t *ubf; struct waiting_fd wfd; int state; volatile int lerrno; - wfd.th = GET_THREAD(); + rb_thread_t *th = wfd.th = GET_THREAD(); wfd.fd = fd; wfd.busy = NULL; +#ifdef RUBY_THREAD_PTHREAD_H + if (!th->nt->dedicated) { + rb_hrtime_t rel, *prel; + + if (timeout) { + rel = rb_timeval2hrtime(timeout); + prel = &rel; + } + else { + prel = NULL; + } + + if (thread_sched_wait_events(TH_SCHED(th), th, fd, waitfd_to_waiting_flag(events), prel)) { + return 0; // timeout + } + } +#endif + RB_VM_LOCK_ENTER(); { ccan_list_add(&wfd.th->vm->waiting_fds, &wfd.wfd_node); @@ -4353,36 +4313,18 @@ rb_thread_wait_for_single_fd(int fd, int events, struct timeval *timeout) fds[0].events = (short)events; fds[0].revents = 0; do { - fds[1].fd = rb_sigwait_fd_get(wfd.th); - - if (fds[1].fd >= 0) { - fds[1].events = POLLIN; - fds[1].revents = 0; - nfds = 2; - ubf = ubf_sigwait; - } - else { - nfds = 1; - ubf = ubf_select; - } + nfds = 1; lerrno = 0; BLOCKING_REGION(wfd.th, { - const rb_hrtime_t *sto; struct timespec ts; - sto = sigwait_timeout(wfd.th, fds[1].fd, to, &drained); if (!RUBY_VM_INTERRUPTED(wfd.th->ec)) { - result = ppoll(fds, nfds, rb_hrtime2timespec(&ts, sto), 0); + result = ppoll(fds, nfds, rb_hrtime2timespec(&ts, to), 0); if (result < 0) lerrno = errno; } - }, ubf, wfd.th, TRUE); + }, ubf_select, wfd.th, TRUE); - if (fds[1].fd >= 0) { - int fd1 = sigwait_signals_fd(result, fds[1].revents, fds[1].fd); - (void)check_signals_nogvl(wfd.th, fd1); - rb_sigwait_fd_put(wfd.th, fds[1].fd); - } RUBY_VM_CHECK_INTS_BLOCKING(wfd.th->ec); } while (wait_retryable(&result, lerrno, to, end)); } @@ -4470,6 +4412,18 @@ select_single_cleanup(VALUE ptr) return (VALUE)-1; } +static rb_fdset_t * +init_set_fd(int fd, rb_fdset_t *fds) +{ + if (fd < 0) { + return 0; + } + rb_fd_init(fds); + rb_fd_set(fd, fds); + + return fds; +} + int rb_thread_wait_for_single_fd(int fd, int events, struct timeval *timeout) { @@ -4552,16 +4506,13 @@ consume_communication_pipe(int fd) ssize_t result; int ret = FALSE; /* for rb_sigwait_sleep */ - /* - * disarm UBF_TIMER before we read, because it can become - * re-armed at any time via sighandler and the pipe will refill - * We can disarm it because this thread is now processing signals - * and we do not want unnecessary SIGVTALRM - */ - ubf_timer_disarm(); - while (1) { result = read(fd, buff, sizeof(buff)); +#if USE_EVENTFD + RUBY_DEBUG_LOG("resultf:%d buff:%lu", (int)result, (unsigned long)buff[0]); +#else + RUBY_DEBUG_LOG("result:%d", (int)result); +#endif if (result > 0) { ret = TRUE; if (USE_EVENTFD || result < (ssize_t)sizeof(buff)) { @@ -4588,24 +4539,6 @@ consume_communication_pipe(int fd) } } -static int -check_signals_nogvl(rb_thread_t *th, int sigwait_fd) -{ - rb_vm_t *vm = GET_VM(); /* th may be 0 */ - int ret = sigwait_fd >= 0 ? consume_communication_pipe(sigwait_fd) : FALSE; - ubf_wakeup_all_threads(); - if (rb_signal_buff_size()) { - if (th == vm->ractor.main_thread) { - /* no need to lock + wakeup if already in main thread */ - RUBY_VM_SET_TRAP_INTERRUPT(th->ec); - } - else { - threadptr_trap_interrupt(vm->ractor.main_thread); - } - } - return ret; -} - void rb_thread_stop_timer_thread(void) { @@ -4702,6 +4635,10 @@ rb_thread_atfork_internal(rb_thread_t *th, void (*atfork)(rb_thread_t *, const r rb_ractor_sleeper_threads_clear(th->ractor); rb_clear_coverages(); + // restart timer thread (timer threads access to `vm->waitpid_lock` and so on. + rb_thread_reset_timer_thread(); + rb_thread_start_timer_thread(); + VM_ASSERT(vm->ractor.blocking_cnt == 0); VM_ASSERT(vm->ractor.cnt == 1); } @@ -5467,8 +5404,16 @@ Init_Thread(void) /* main thread setting */ { /* acquire global vm lock */ - struct rb_thread_sched *sched = TH_SCHED(th); - thread_sched_to_running(sched, th); +#ifdef HAVE_PTHREAD_NP_H + VM_ASSERT(TH_SCHED(th)->running == th); +#endif + // thread_sched_to_running() should not be called because + // it assumes blocked by thread_sched_to_waiting(). + // thread_sched_to_running(sched, th); + +#ifdef RB_INTERNAL_THREAD_HOOK + RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_RESUMED); +#endif th->pending_interrupt_queue = rb_ary_hidden_new(0); th->pending_interrupt_queue_checked = 0; @@ -5481,7 +5426,7 @@ Init_Thread(void) Init_thread_sync(); // TODO: Suppress unused function warning for now - if (0) rb_thread_sched_destroy(NULL); + // if (0) rb_thread_sched_destroy(NULL); } int @@ -5511,7 +5456,7 @@ debug_deadlock_check(rb_ractor_t *r, VALUE msg) ccan_list_for_each(&r->threads.set, th, lt_node) { rb_str_catf(msg, "* %+"PRIsVALUE"\n rb_thread_t:%p " "native:%p int:%u", - th->self, (void *)th, thread_id_str(th), th->ec->interrupt_flag); + th->self, (void *)th, th->nt ? thread_id_str(th) : "N/A", th->ec->interrupt_flag); if (th->locking_mutex) { rb_mutex_t *mutex = mutex_ptr(th->locking_mutex); @@ -5537,14 +5482,18 @@ rb_check_deadlock(rb_ractor_t *r) { if (GET_THREAD()->vm->thread_ignore_deadlock) return; - int found = 0; - rb_thread_t *th = NULL; +#ifdef RUBY_THREAD_PTHREAD_H + if (r->threads.sched.readyq_cnt > 0) return; +#endif + int sleeper_num = rb_ractor_sleeper_thread_num(r); int ltnum = rb_ractor_living_thread_num(r); if (ltnum > sleeper_num) return; if (ltnum < sleeper_num) rb_bug("sleeper must not be more than vm_living_thread_num(vm)"); - if (patrol_thread && patrol_thread != GET_THREAD()) return; + + int found = 0; + rb_thread_t *th = NULL; ccan_list_for_each(&r->threads.set, th, lt_node) { if (th->status != THREAD_STOPPED_FOREVER || RUBY_VM_INTERRUPTED(th->ec)) { diff --git a/thread_none.c b/thread_none.c index 27c6ddf754a18c..fff6e63e11c509 100644 --- a/thread_none.c +++ b/thread_none.c @@ -42,7 +42,7 @@ thread_sched_yield(struct rb_thread_sched *sched, rb_thread_t *th) } void -rb_thread_sched_init(struct rb_thread_sched *sched) +rb_thread_sched_init(struct rb_thread_sched *sched, bool atfork) { } @@ -134,6 +134,11 @@ Init_native_thread(rb_thread_t *main_th) ruby_thread_set_native(main_th); } +void +ruby_mn_threads_params(void) +{ +} + static void native_thread_destroy(rb_thread_t *th) { @@ -276,9 +281,51 @@ native_fd_select(int n, rb_fdset_t *readfds, rb_fdset_t *writefds, rb_fdset_t *e return rb_fd_select(n, readfds, writefds, exceptfds, timeout); } -static VALUE -rb_thread_start_unblock_thread(void) +static bool +th_has_dedicated_nt(const rb_thread_t *th) +{ + return true; +} + +void +rb_add_running_thread(rb_thread_t *th){ + // do nothing +} + +void +rb_del_running_thread(rb_thread_t *th) +{ + // do nothing +} + +void +rb_threadptr_sched_free(rb_thread_t *th) +{ + // do nothing +} + +void +rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr) +{ + // do nothing +} + +void +rb_ractor_sched_barrier_join(rb_vm_t *vm, rb_ractor_t *cr) +{ + // do nothing +} + +void +rb_threadptr_remove(rb_thread_t *th) +{ + // do nothing +} + +void +rb_thread_sched_mark_zombies(rb_vm_t *vm) { - return Qfalse; + // do nothing } + #endif /* THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION */ diff --git a/thread_none.h b/thread_none.h index 89f64667f0c158..ac47e52bdaeba6 100644 --- a/thread_none.h +++ b/thread_none.h @@ -16,5 +16,6 @@ struct rb_thread_sched_item {}; struct rb_thread_sched {}; RUBY_EXTERN struct rb_execution_context_struct *ruby_current_ec; +NOINLINE(struct rb_execution_context_struct *rb_current_ec_noinline(void)); // for assertions #endif /* RUBY_THREAD_NONE_H */ diff --git a/thread_pthread.c b/thread_pthread.c index 7c53325240cb64..787b6f437c9943 100644 --- a/thread_pthread.c +++ b/thread_pthread.c @@ -60,6 +60,23 @@ static pthread_condattr_t *condattr_monotonic = &condattr_mono; static const void *const condattr_monotonic = NULL; #endif +#include COROUTINE_H + +#ifndef HAVE_SYS_EPOLL_H +#define HAVE_SYS_EPOLL_H 0 +#else +// force setting for debug +// #undef HAVE_SYS_EPOLL_H +// #define HAVE_SYS_EPOLL_H 0 +#endif + +#if HAVE_SYS_EPOLL_H && !defined(COROUTINE_PTHREAD_CONTEXT) + #include + #define USE_MN_THREADS 1 +#else + #define USE_MN_THREADS 0 +#endif + // native thread wrappers #define NATIVE_MUTEX_LOCK_DEBUG 0 @@ -251,140 +268,7 @@ static rb_serial_t current_fork_gen = 1; /* We can't use GET_VM()->fork_gen */ # define USE_UBF_LIST 1 #endif -/* - * UBF_TIMER and ubf_list both use SIGVTALRM. - * - * UBF_TIMER has NOTHING to do with thread timeslices (TIMER_INTERRUPT_MASK) - * - * UBF_TIMER is to close TOCTTOU signal race on programs where we - * cannot rely on GVL contention (vm->gvl.timer) to perform wakeups - * while a thread is doing blocking I/O on sockets or pipes. With - * rb_thread_call_without_gvl and similar functions: - * - * (1) Check interrupts. - * (2) release GVL. - * (2a) signal received - * (3) call func with data1 (blocks for a long time without ubf_timer) - * (4) acquire GVL. - * Other Ruby threads can not run in parallel any more. - * (5) Check interrupts. - * - * We need UBF_TIMER to break out of (3) if (2a) happens. - * - * ubf_list wakeups may be triggered on gvl_yield. - * - * If we have vm->gvl.timer (on GVL contention), we don't need UBF_TIMER - * as it can perform the same tasks while doing timeslices. - */ -#define UBF_TIMER_NONE 0 -#define UBF_TIMER_POSIX 1 -#define UBF_TIMER_PTHREAD 2 - -#ifndef UBF_TIMER -# if defined(HAVE_TIMER_SETTIME) && defined(HAVE_TIMER_CREATE) && \ - defined(CLOCK_MONOTONIC) && defined(USE_UBF_LIST) - /* preferred */ -# define UBF_TIMER UBF_TIMER_POSIX -# elif defined(USE_UBF_LIST) - /* safe, but inefficient */ -# define UBF_TIMER UBF_TIMER_PTHREAD -# else - /* we'll be racy without SIGVTALRM for ubf_list */ -# define UBF_TIMER UBF_TIMER_NONE -# endif -#endif - -enum rtimer_state { - /* alive, after timer_create: */ - RTIMER_DISARM, - RTIMER_ARMING, - RTIMER_ARMED, - - RTIMER_DEAD -}; - -#if UBF_TIMER == UBF_TIMER_POSIX -static const struct itimerspec zero; -static struct { - rb_atomic_t state_; /* rtimer_state */ - rb_serial_t fork_gen; - timer_t timerid; -} timer_posix = { - /* .state = */ RTIMER_DEAD, -}; - -#define TIMER_STATE_DEBUG 0 - -static const char * -rtimer_state_name(enum rtimer_state state) -{ - switch (state) { - case RTIMER_DISARM: return "disarm"; - case RTIMER_ARMING: return "arming"; - case RTIMER_ARMED: return "armed"; - case RTIMER_DEAD: return "dead"; - default: rb_bug("unreachable"); - } -} - -static enum rtimer_state -timer_state_exchange(enum rtimer_state state) -{ - enum rtimer_state prev = ATOMIC_EXCHANGE(timer_posix.state_, state); - if (TIMER_STATE_DEBUG) fprintf(stderr, "state (exc): %s->%s\n", rtimer_state_name(prev), rtimer_state_name(state)); - return prev; -} - -static enum rtimer_state -timer_state_cas(enum rtimer_state expected_prev, enum rtimer_state state) -{ - enum rtimer_state prev = ATOMIC_CAS(timer_posix.state_, expected_prev, state); - - if (TIMER_STATE_DEBUG) { - if (prev == expected_prev) { - fprintf(stderr, "state (cas): %s->%s\n", rtimer_state_name(prev), rtimer_state_name(state)); - } - else { - fprintf(stderr, "state (cas): %s (expected:%s)\n", rtimer_state_name(prev), rtimer_state_name(expected_prev)); - } - } - - return prev; -} - -#elif UBF_TIMER == UBF_TIMER_PTHREAD -static void *timer_pthread_fn(void *); -static struct { - int low[2]; - rb_atomic_t armed; /* boolean */ - rb_serial_t fork_gen; - pthread_t thid; -} timer_pthread = { - { -1, -1 }, -}; -#endif - -static const rb_hrtime_t *sigwait_timeout(rb_thread_t *, int sigwait_fd, - const rb_hrtime_t *, - int *drained_p); -static void ubf_timer_disarm(void); static void threadptr_trap_interrupt(rb_thread_t *); -static void ubf_wakeup_all_threads(void); -static int ubf_threads_empty(void); - -#define TIMER_THREAD_CREATED_P() (signal_self_pipe.fork_gen == current_fork_gen) - -/* for testing, and in case we come across a platform w/o pipes: */ -#define BUSY_WAIT_SIGNALS (0) - -/* - * sigwait_th is the thread which owns sigwait_fd and sleeps on it - * (using ppoll). RJIT worker can be sigwait_th==0, so we initialize - * it to THREAD_INVALID at startup and fork time. It is the ONLY thread - * allowed to read from sigwait_fd, otherwise starvation can occur. - */ -#define THREAD_INVALID ((const rb_thread_t *)-1) -static const rb_thread_t *sigwait_th; #ifdef HAVE_SCHED_YIELD #define native_thread_yield() (void)sched_yield() @@ -399,363 +283,1436 @@ static const rb_thread_t *sigwait_th; #define TIME_QUANTUM_USEC (TIME_QUANTUM_MSEC * 1000) #define TIME_QUANTUM_NSEC (TIME_QUANTUM_USEC * 1000) -/* - * Designate the next sched.timer thread, favor the last thread in - * the readyq since it will be in readyq longest - */ -static int -designate_timer_thread(struct rb_thread_sched *sched) -{ - rb_thread_t *last; +static void native_thread_dedicated_inc(rb_vm_t *vm, rb_ractor_t *cr, struct rb_native_thread *nt); +static void native_thread_dedicated_dec(rb_vm_t *vm, rb_ractor_t *cr, struct rb_native_thread *nt); +static void native_thread_assign(struct rb_native_thread *nt, rb_thread_t *th); - last = ccan_list_tail(&sched->readyq, rb_thread_t, sched.node.readyq); +static void ractor_sched_enq(rb_vm_t *vm, rb_ractor_t *r); +static void timer_thread_wakeup(void); +static void timer_thread_wakeup_locked(rb_vm_t *vm); +static void timer_thread_wakeup_force(void); +static void thread_sched_switch(rb_thread_t *cth, rb_thread_t *next_th); - if (last) { - rb_native_cond_signal(&last->nt->cond.readyq); - return TRUE; - } - else { - return FALSE; - } +#define thread_sched_dump(s) thread_sched_dump_(__FILE__, __LINE__, s) + +static bool +th_has_dedicated_nt(const rb_thread_t *th) +{ + // TODO: th->has_dedicated_nt + return th->nt->dedicated > 0; } -/* - * We become designated timer thread to kick vm->gvl.owner - * periodically. Continue on old timeout if it expired. - */ +RBIMPL_ATTR_MAYBE_UNUSED() static void -do_gvl_timer(struct rb_thread_sched *sched, rb_thread_t *th) +thread_sched_dump_(const char *file, int line, struct rb_thread_sched *sched) { - rb_vm_t *vm = GET_VM(); - static rb_hrtime_t abs; + fprintf(stderr, "@%s:%d running:%d\n", file, line, sched->running ? (int)sched->running->serial : -1); + rb_thread_t *th; + int i = 0; + ccan_list_for_each(&sched->readyq, th, sched.node.readyq) { + i++; if (i>10) rb_bug("too many"); + fprintf(stderr, " ready:%d (%sNT:%d)\n", th->serial, + th->nt ? (th->nt->dedicated ? "D" : "S") : "x", + th->nt ? (int)th->nt->serial : -1); + } +} + +#define ractor_sched_dump(s) ractor_sched_dump_(__FILE__, __LINE__, s) - sched->timer = th; +RBIMPL_ATTR_MAYBE_UNUSED() +static void +ractor_sched_dump_(const char *file, int line, rb_vm_t *vm) +{ + rb_ractor_t *r; - /* take over wakeups from UBF_TIMER */ - ubf_timer_disarm(); + fprintf(stderr, "ractor_sched_dump %s:%d\n", file, line); - if (sched->timer_err == ETIMEDOUT) { - abs = native_cond_timeout(&th->nt->cond.readyq, TIME_QUANTUM_NSEC); + int i = 0; + ccan_list_for_each(&vm->ractor.sched.grq, r, threads.sched.grq_node) { + i++; + if (i>10) rb_bug("!!"); + fprintf(stderr, " %d ready:%d\n", i, rb_ractor_id(r)); } - sched->timer_err = native_cond_timedwait(&th->nt->cond.readyq, &sched->lock, &abs); +} - ubf_wakeup_all_threads(); +#define thread_sched_lock(a, b) thread_sched_lock_(a, b, __FILE__, __LINE__) +#define thread_sched_unlock(a, b) thread_sched_unlock_(a, b, __FILE__, __LINE__) - if (UNLIKELY(rb_signal_buff_size())) { - if (th == vm->ractor.main_thread) { - RUBY_VM_SET_TRAP_INTERRUPT(th->ec); - } - else { - threadptr_trap_interrupt(vm->ractor.main_thread); - } - } +static void +thread_sched_lock_(struct rb_thread_sched *sched, rb_thread_t *th, const char *file, int line) +{ + rb_native_mutex_lock(&sched->lock_); - /* - * Timeslice. Warning: the process may fork while this - * thread is contending for GVL: - */ - const rb_thread_t *running; - if ((running = sched->running) != 0) { - // strictly speaking, accessing "running" is not thread-safe - RUBY_VM_SET_TIMER_INTERRUPT(running->ec); - } - sched->timer = 0; +#if VM_CHECK_MODE + RUBY_DEBUG_LOG2(file, line, "th:%u prev_owner:%u", rb_th_serial(th), rb_th_serial(sched->lock_owner)); + VM_ASSERT(sched->lock_owner == NULL); + sched->lock_owner = th; +#else + RUBY_DEBUG_LOG2(file, line, "th:%u", rb_th_serial(th)); +#endif } static void -thread_sched_to_ready_common(struct rb_thread_sched *sched, rb_thread_t *th) +thread_sched_unlock_(struct rb_thread_sched *sched, rb_thread_t *th, const char *file, int line) { - ccan_list_add_tail(&sched->readyq, &th->sched.node.readyq); + RUBY_DEBUG_LOG2(file, line, "th:%u", rb_th_serial(th)); + +#if VM_CHECK_MODE + VM_ASSERT(sched->lock_owner == th); + sched->lock_owner = NULL; +#endif + + rb_native_mutex_unlock(&sched->lock_); } static void -thread_sched_to_running_common(struct rb_thread_sched *sched, rb_thread_t *th) +thread_sched_set_lock_owner(struct rb_thread_sched *sched, rb_thread_t *th) { - RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_READY); - if (sched->running) { - VM_ASSERT(th->unblock.func == 0 && - "we must not be in ubf_list and GVL readyq at the same time"); - - // waiting -> ready - thread_sched_to_ready_common(sched, th); + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); - // wait for running chance - do { - if (!sched->timer) { - do_gvl_timer(sched, th); - } - else { - rb_native_cond_wait(&th->nt->cond.readyq, &sched->lock); - } - } while (sched->running); +#if VM_CHECK_MODE > 0 + sched->lock_owner = th; +#endif +} - ccan_list_del_init(&th->sched.node.readyq); +static void +ASSERT_thread_sched_locked(struct rb_thread_sched *sched, rb_thread_t *th) +{ + VM_ASSERT(rb_native_mutex_trylock(&sched->lock_) == EBUSY); - if (sched->need_yield) { - sched->need_yield = 0; - rb_native_cond_signal(&sched->switch_cond); - } +#if VM_CHECK_MODE + if (th) { + VM_ASSERT(sched->lock_owner == th); } - else { /* reset timer if uncontended */ - sched->timer_err = ETIMEDOUT; + else { + VM_ASSERT(sched->lock_owner != NULL); } +#endif +} - // ready -> running - sched->running = th; - - RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_RESUMED); +#define ractor_sched_lock(a, b) ractor_sched_lock_(a, b, __FILE__, __LINE__) +#define ractor_sched_unlock(a, b) ractor_sched_unlock_(a, b, __FILE__, __LINE__) - if (!sched->timer) { - /* Make sure that this thread is not currently the sigwait_thread before we - decide to wake it up. Otherwise, we can end up in a loop of the following - operations: - * We are in native_sleep -> sigwait_sleep - * A signal arives, kicking this thread out of rb_sigwait_sleep - * We get here because of the call to THREAD_BLOCKING_END() in native_sleep - * write into the sigwait_fd pipe here - * re-loop around in native_sleep() because the desired sleep time has not - actually yet expired - * that calls rb_sigwait_sleep again - * the ppoll() in rb_sigwait_sleep immediately returns because of the byte we - wrote to the sigwait_fd here - * that wakes the thread up again and we end up here again. - Such a loop can only be broken by the main thread waking up and handling the - signal, such that ubf_threads_empty() below becomes true again; however this - loop can actually keep things so busy (and cause so much contention on the - main thread's interrupt_lock) that the main thread doesn't deal with the - signal for many seconds. This seems particuarly likely on FreeBSD 13. - */ - if (!designate_timer_thread(sched) && !ubf_threads_empty() && th != sigwait_th) { - rb_thread_wakeup_timer_thread(-1); - } +RBIMPL_ATTR_MAYBE_UNUSED() +static unsigned int +rb_ractor_serial(const rb_ractor_t *r) { + if (r) { + return rb_ractor_id(r); + } + else { + return 0; } } static void -thread_sched_to_running(struct rb_thread_sched *sched, rb_thread_t *th) +ractor_sched_set_locked(rb_vm_t *vm, rb_ractor_t *cr) { - rb_native_mutex_lock(&sched->lock); - thread_sched_to_running_common(sched, th); - rb_native_mutex_unlock(&sched->lock); +#if VM_CHECK_MODE > 0 + VM_ASSERT(vm->ractor.sched.lock_owner == NULL); + VM_ASSERT(vm->ractor.sched.locked == false); + + vm->ractor.sched.lock_owner = cr; + vm->ractor.sched.locked = true; +#endif } -static rb_thread_t * -thread_sched_to_waiting_common(struct rb_thread_sched *sched, rb_thread_t *th) +static void +ractor_sched_set_unlocked(rb_vm_t *vm, rb_ractor_t *cr) { - rb_thread_t *next; - sched->running = NULL; - next = ccan_list_top(&sched->readyq, rb_thread_t, sched.node.readyq); - if (next) rb_native_cond_signal(&next->nt->cond.readyq); +#if VM_CHECK_MODE > 0 + VM_ASSERT(vm->ractor.sched.locked); + VM_ASSERT(vm->ractor.sched.lock_owner == cr); - return next; + vm->ractor.sched.locked = false; + vm->ractor.sched.lock_owner = NULL; +#endif } static void -thread_sched_to_waiting(struct rb_thread_sched *sched, rb_thread_t *th) +ractor_sched_lock_(rb_vm_t *vm, rb_ractor_t *cr, const char *file, int line) { - RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED); - rb_native_mutex_lock(&sched->lock); - thread_sched_to_waiting_common(sched, th); - rb_native_mutex_unlock(&sched->lock); + rb_native_mutex_lock(&vm->ractor.sched.lock); + +#if VM_CHECK_MODE + RUBY_DEBUG_LOG2(file, line, "cr:%u prev_owner:%u", rb_ractor_serial(cr), rb_ractor_serial(vm->ractor.sched.lock_owner)); +#else + RUBY_DEBUG_LOG2(file, line, "cr:%u", rb_ractor_serial(cr)); +#endif + + ractor_sched_set_locked(vm, cr); } static void -thread_sched_to_dead(struct rb_thread_sched *sched, rb_thread_t *th) +ractor_sched_unlock_(rb_vm_t *vm, rb_ractor_t *cr, const char *file, int line) { - RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_EXITED); - thread_sched_to_waiting(sched, th); + RUBY_DEBUG_LOG2(file, line, "cr:%u", rb_ractor_serial(cr)); + + ractor_sched_set_unlocked(vm, cr); + rb_native_mutex_unlock(&vm->ractor.sched.lock); } static void -thread_sched_yield(struct rb_thread_sched *sched, rb_thread_t *th) +ASSERT_ractor_sched_locked(rb_vm_t *vm, rb_ractor_t *cr) { - rb_thread_t *next; + VM_ASSERT(rb_native_mutex_trylock(&vm->ractor.sched.lock) == EBUSY); + VM_ASSERT(vm->ractor.sched.locked); + VM_ASSERT(cr == NULL || vm->ractor.sched.lock_owner == cr); +} - /* - * Perhaps other threads are stuck in blocking region w/o GVL, too, - * (perhaps looping in io_close_fptr) so we kick them: - */ - ubf_wakeup_all_threads(); - rb_native_mutex_lock(&sched->lock); - next = thread_sched_to_waiting_common(sched, th); - - /* An another thread is processing GVL yield. */ - if (UNLIKELY(sched->wait_yield)) { - while (sched->wait_yield) - rb_native_cond_wait(&sched->switch_wait_cond, &sched->lock); - } - else if (next) { - /* Wait until another thread task takes GVL. */ - sched->need_yield = 1; - sched->wait_yield = 1; - while (sched->need_yield) - rb_native_cond_wait(&sched->switch_cond, &sched->lock); - sched->wait_yield = 0; - rb_native_cond_broadcast(&sched->switch_wait_cond); - } - else { - rb_native_mutex_unlock(&sched->lock); - native_thread_yield(); - rb_native_mutex_lock(&sched->lock); - rb_native_cond_broadcast(&sched->switch_wait_cond); +RBIMPL_ATTR_MAYBE_UNUSED() +static bool +ractor_sched_running_threads_contain_p(rb_vm_t *vm, rb_thread_t *th) +{ + rb_thread_t *rth; + ccan_list_for_each(&vm->ractor.sched.running_threads, rth, sched.node.running_threads) { + if (rth == th) return true; } - thread_sched_to_running_common(sched, th); - rb_native_mutex_unlock(&sched->lock); + return false; } -void -rb_thread_sched_init(struct rb_thread_sched *sched) +RBIMPL_ATTR_MAYBE_UNUSED() +static unsigned int +ractor_sched_running_threads_size(rb_vm_t *vm) { - rb_native_mutex_initialize(&sched->lock); - rb_native_cond_initialize(&sched->switch_cond); - rb_native_cond_initialize(&sched->switch_wait_cond); - ccan_list_head_init(&sched->readyq); - sched->running = NULL; - sched->timer = 0; - sched->timer_err = ETIMEDOUT; - sched->need_yield = 0; - sched->wait_yield = 0; + rb_thread_t *th; + unsigned int i = 0; + ccan_list_for_each(&vm->ractor.sched.running_threads, th, sched.node.running_threads) { + i++; + } + return i; } -// TODO - -static void clear_thread_cache_altstack(void); - -static void -rb_thread_sched_destroy(struct rb_thread_sched *sched) +RBIMPL_ATTR_MAYBE_UNUSED() +static unsigned int +ractor_sched_timeslice_threads_size(rb_vm_t *vm) { - /* - * only called once at VM shutdown (not atfork), another thread - * may still grab vm->gvl.lock when calling gvl_release at - * the end of thread_start_func_2 - */ - if (0) { - rb_native_cond_destroy(&sched->switch_wait_cond); - rb_native_cond_destroy(&sched->switch_cond); - rb_native_mutex_destroy(&sched->lock); + rb_thread_t *th; + unsigned int i = 0; + ccan_list_for_each(&vm->ractor.sched.timeslice_threads, th, sched.node.timeslice_threads) { + i++; } - clear_thread_cache_altstack(); + return i; } -#ifdef RB_THREAD_T_HAS_NATIVE_ID -static int -get_native_thread_id(void) +RBIMPL_ATTR_MAYBE_UNUSED() +static bool +ractor_sched_timeslice_threads_contain_p(rb_vm_t *vm, rb_thread_t *th) { -#ifdef __linux__ - return (int)syscall(SYS_gettid); -#elif defined(__FreeBSD__) - return pthread_getthreadid_np(); -#endif + rb_thread_t *rth; + ccan_list_for_each(&vm->ractor.sched.timeslice_threads, rth, sched.node.timeslice_threads) { + if (rth == th) return true; + } + return false; } -#endif -#if defined(HAVE_WORKING_FORK) -static void thread_cache_reset(void); +static void ractor_sched_barrier_join_signal_locked(rb_vm_t *vm); +static void ractor_sched_barrier_join_wait_locked(rb_vm_t *vm, rb_thread_t *th); + +// setup timeslice signals by the timer thread. static void -thread_sched_atfork(struct rb_thread_sched *sched) +thread_sched_setup_running_threads(struct rb_thread_sched *sched, rb_ractor_t *cr, rb_vm_t *vm, + rb_thread_t *add_th, rb_thread_t *del_th, rb_thread_t *add_timeslice_th) { - current_fork_gen++; - thread_cache_reset(); - rb_thread_sched_init(sched); - thread_sched_to_running(sched, GET_THREAD()); -#ifdef RB_THREAD_T_HAS_NATIVE_ID - GET_THREAD()->nt->tid = get_native_thread_id(); -#endif -} +#if USE_RUBY_DEBUG_LOG + unsigned int prev_running_cnt = vm->ractor.sched.running_cnt; #endif -#ifdef RB_THREAD_LOCAL_SPECIFIER -static RB_THREAD_LOCAL_SPECIFIER rb_thread_t *ruby_native_thread; -#else -static pthread_key_t ruby_native_thread_key; -#endif + rb_thread_t *del_timeslice_th; + + if (del_th && sched->is_running_timeslice) { + del_timeslice_th = del_th; + sched->is_running_timeslice = false; + } + else { + del_timeslice_th = NULL; + } + + RUBY_DEBUG_LOG("+:%u -:%u +ts:%u -ts:%u", + rb_th_serial(add_th), rb_th_serial(del_th), + rb_th_serial(add_timeslice_th), rb_th_serial(del_timeslice_th)); + + ractor_sched_lock(vm, cr); + { + // update running_threads + if (del_th) { + VM_ASSERT(ractor_sched_running_threads_contain_p(vm, del_th)); + VM_ASSERT(del_timeslice_th != NULL || + !ractor_sched_timeslice_threads_contain_p(vm, del_th)); + + ccan_list_del_init(&del_th->sched.node.running_threads); + vm->ractor.sched.running_cnt--; + + if (UNLIKELY(vm->ractor.sched.barrier_waiting)) { + ractor_sched_barrier_join_signal_locked(vm); + } + sched->is_running = false; + } + + if (add_th) { + if (UNLIKELY(vm->ractor.sched.barrier_waiting)) { + RUBY_DEBUG_LOG("barrier-wait"); + + ractor_sched_barrier_join_signal_locked(vm); + ractor_sched_barrier_join_wait_locked(vm, add_th); + } + + VM_ASSERT(!ractor_sched_running_threads_contain_p(vm, add_th)); + VM_ASSERT(!ractor_sched_timeslice_threads_contain_p(vm, add_th)); + + ccan_list_add(&vm->ractor.sched.running_threads, &add_th->sched.node.running_threads); + vm->ractor.sched.running_cnt++; + sched->is_running = true; + } + + if (add_timeslice_th) { + // update timeslice threads + int was_empty = ccan_list_empty(&vm->ractor.sched.timeslice_threads); + VM_ASSERT(!ractor_sched_timeslice_threads_contain_p(vm, add_timeslice_th)); + ccan_list_add(&vm->ractor.sched.timeslice_threads, &add_timeslice_th->sched.node.timeslice_threads); + sched->is_running_timeslice = true; + if (was_empty) { + timer_thread_wakeup_locked(vm); + } + } + + if (del_timeslice_th) { + VM_ASSERT(ractor_sched_timeslice_threads_contain_p(vm, del_timeslice_th)); + ccan_list_del_init(&del_timeslice_th->sched.node.timeslice_threads); + } + + VM_ASSERT(ractor_sched_running_threads_size(vm) == vm->ractor.sched.running_cnt); + VM_ASSERT(ractor_sched_timeslice_threads_size(vm) <= vm->ractor.sched.running_cnt); + } + ractor_sched_unlock(vm, cr); + + if (add_th && !del_th && UNLIKELY(vm->ractor.sync.lock_owner != NULL)) { + // it can be after barrier synchronization by another ractor + RB_VM_LOCK_ENTER(); + RB_VM_LOCK_LEAVE(); + } + + //RUBY_DEBUG_LOG("+:%u -:%u +ts:%u -ts:%u run:%u->%u", + // rb_th_serial(add_th), rb_th_serial(del_th), + // rb_th_serial(add_timeslice_th), rb_th_serial(del_timeslice_th), + RUBY_DEBUG_LOG("run:%u->%u", prev_running_cnt, vm->ractor.sched.running_cnt); +} static void -null_func(int i) +thread_sched_add_running_thread(struct rb_thread_sched *sched, rb_thread_t *th) { - /* null */ + ASSERT_thread_sched_locked(sched, th); + VM_ASSERT(sched->running == th); + + rb_vm_t *vm = th->vm; + thread_sched_setup_running_threads(sched, th->ractor, vm, th, NULL, ccan_list_empty(&sched->readyq) ? NULL : th); } -rb_thread_t * -ruby_thread_from_native(void) +static void +thread_sched_del_running_thread(struct rb_thread_sched *sched, rb_thread_t *th) { -#ifdef RB_THREAD_LOCAL_SPECIFIER - return ruby_native_thread; -#else - return pthread_getspecific(ruby_native_thread_key); -#endif + ASSERT_thread_sched_locked(sched, th); + + rb_vm_t *vm = th->vm; + thread_sched_setup_running_threads(sched, th->ractor, vm, NULL, th, NULL); } -int -ruby_thread_set_native(rb_thread_t *th) +void +rb_add_running_thread(rb_thread_t *th) { - if (th) { -#ifdef USE_UBF_LIST - ccan_list_node_init(&th->sched.node.ubf); -#endif + struct rb_thread_sched *sched = TH_SCHED(th); + + thread_sched_lock(sched, th); + { + thread_sched_add_running_thread(sched, th); } + thread_sched_unlock(sched, th); +} - // setup TLS +void +rb_del_running_thread(rb_thread_t *th) +{ + struct rb_thread_sched *sched = TH_SCHED(th); - if (th && th->ec) { - rb_ractor_set_current_ec(th->ractor, th->ec); + thread_sched_lock(sched, th); + { + thread_sched_del_running_thread(sched, th); } -#ifdef RB_THREAD_LOCAL_SPECIFIER - ruby_native_thread = th; - return 1; -#else - return pthread_setspecific(ruby_native_thread_key, th) == 0; -#endif + thread_sched_unlock(sched, th); } +// setup current or next running thread +// sched->running should be set only on this function. +// +// if th is NULL, there is no running threads. static void -native_thread_init(struct rb_native_thread *nt) +thread_sched_set_running(struct rb_thread_sched *sched, rb_thread_t *th) { -#ifdef RB_THREAD_T_HAS_NATIVE_ID - nt->tid = get_native_thread_id(); -#endif - rb_native_cond_initialize(&nt->cond.readyq); - if (&nt->cond.readyq != &nt->cond.intr) - rb_native_cond_initialize(&nt->cond.intr); + RUBY_DEBUG_LOG("th:%u->th:%u", rb_th_serial(sched->running), rb_th_serial(th)); + VM_ASSERT(sched->running != th); + + sched->running = th; } -void -Init_native_thread(rb_thread_t *main_th) +RBIMPL_ATTR_MAYBE_UNUSED() +static bool +thread_sched_readyq_contain_p(struct rb_thread_sched *sched, rb_thread_t *th) { -#if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) - if (condattr_monotonic) { - int r = pthread_condattr_init(condattr_monotonic); - if (r == 0) { - r = pthread_condattr_setclock(condattr_monotonic, CLOCK_MONOTONIC); - } - if (r) condattr_monotonic = NULL; + rb_thread_t *rth; + ccan_list_for_each(&sched->readyq, rth, sched.node.readyq) { + if (rth == th) return true; } -#endif + return false; +} -#ifndef RB_THREAD_LOCAL_SPECIFIER - if (pthread_key_create(&ruby_native_thread_key, 0) == EAGAIN) { - rb_bug("pthread_key_create failed (ruby_native_thread_key)"); +// deque thread from the ready queue. +// if the ready queue is empty, return NULL. +// +// return deque'ed running thread (or NULL). +static rb_thread_t * +thread_sched_deq(struct rb_thread_sched *sched) +{ + ASSERT_thread_sched_locked(sched, NULL); + rb_thread_t *next_th; + + VM_ASSERT(sched->running != NULL); + + if (ccan_list_empty(&sched->readyq)) { + next_th = NULL; + } + else { + next_th = ccan_list_pop(&sched->readyq, rb_thread_t, sched.node.readyq); + + VM_ASSERT(sched->readyq_cnt > 0); + sched->readyq_cnt--; + ccan_list_node_init(&next_th->sched.node.readyq); + } + + RUBY_DEBUG_LOG("next_th:%u readyq_cnt:%d", rb_th_serial(next_th), sched->readyq_cnt); + + return next_th; +} + +// enqueue ready thread to the ready queue. +static void +thread_sched_enq(struct rb_thread_sched *sched, rb_thread_t *ready_th) +{ + ASSERT_thread_sched_locked(sched, NULL); + RUBY_DEBUG_LOG("ready_th:%u readyq_cnt:%d", rb_th_serial(ready_th), sched->readyq_cnt); + + VM_ASSERT(sched->running != NULL); + VM_ASSERT(!thread_sched_readyq_contain_p(sched, ready_th)); + + if (sched->is_running) { + if (ccan_list_empty(&sched->readyq)) { + // add sched->running to timeslice + thread_sched_setup_running_threads(sched, ready_th->ractor, ready_th->vm, NULL, NULL, sched->running); + } + } + else { + VM_ASSERT(!ractor_sched_timeslice_threads_contain_p(ready_th->vm, sched->running)); + } + + ccan_list_add_tail(&sched->readyq, &ready_th->sched.node.readyq); + sched->readyq_cnt++; +} + +// DNT: kick condvar +// SNT: TODO +static void +thread_sched_wakeup_running_thread(struct rb_thread_sched *sched, rb_thread_t *next_th, bool will_switch) +{ + ASSERT_thread_sched_locked(sched, NULL); + VM_ASSERT(sched->running == next_th); + + if (next_th) { + if (next_th->nt) { + if (th_has_dedicated_nt(next_th)) { + RUBY_DEBUG_LOG("pinning th:%u", next_th->serial); + rb_native_cond_signal(&next_th->nt->cond.readyq); + } + else { + // TODO + RUBY_DEBUG_LOG("th:%u is already running.", next_th->serial); + } + } + else { + if (will_switch) { + RUBY_DEBUG_LOG("th:%u (do nothing)", rb_th_serial(next_th)); + } + else { + RUBY_DEBUG_LOG("th:%u (enq)", rb_th_serial(next_th)); + ractor_sched_enq(next_th->vm, next_th->ractor); + } + } + } + else { + RUBY_DEBUG_LOG("no waiting threads%s", ""); + } +} + +// waiting -> ready (locked) +static void +thread_sched_to_ready_common(struct rb_thread_sched *sched, rb_thread_t *th, bool wakeup, bool will_switch) +{ + RUBY_DEBUG_LOG("th:%u running:%u redyq_cnt:%d", rb_th_serial(th), rb_th_serial(sched->running), sched->readyq_cnt); + + VM_ASSERT(sched->running != th); + VM_ASSERT(!thread_sched_readyq_contain_p(sched, th)); + + if (sched->running == NULL) { + thread_sched_set_running(sched, th); + if (wakeup) thread_sched_wakeup_running_thread(sched, th, will_switch); + } + else { + thread_sched_enq(sched, th); + } + + RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_READY); +} + +// waiting -> ready +// +// `th` had became "waiting" state by `thread_sched_to_waiting` +// and `thread_sched_to_ready` enqueue `th` to the thread ready queue. +RBIMPL_ATTR_MAYBE_UNUSED() +static void +thread_sched_to_ready(struct rb_thread_sched *sched, rb_thread_t *th) +{ + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + + thread_sched_lock(sched, th); + { + thread_sched_to_ready_common(sched, th, true, false); + } + thread_sched_unlock(sched, th); +} + +// wait until sched->running is `th`. +static void +thread_sched_wait_running_turn(struct rb_thread_sched *sched, rb_thread_t *th, bool can_direct_transfer) +{ + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + + ASSERT_thread_sched_locked(sched, th); + VM_ASSERT(th == GET_THREAD()); + + if (th != sched->running) { + // already deleted from running threads + // VM_ASSERT(!ractor_sched_running_threads_contain_p(th->vm, th)); // need locking + + // wait for execution right + rb_thread_t *next_th; + while((next_th = sched->running) != th) { + if (th_has_dedicated_nt(th)) { + RUBY_DEBUG_LOG("(nt) sleep th:%u running:%u", rb_th_serial(th), rb_th_serial(sched->running)); + + thread_sched_set_lock_owner(sched, NULL); + { + RUBY_DEBUG_LOG("nt:%d cond:%p", th->nt->serial, &th->nt->cond.readyq); + rb_native_cond_wait(&th->nt->cond.readyq, &sched->lock_); + } + thread_sched_set_lock_owner(sched, th); + + RUBY_DEBUG_LOG("(nt) wakeup %s", sched->running == th ? "success" : "failed"); + if (th == sched->running) { + rb_ractor_thread_switch(th->ractor, th); + } + } + else { + // search another ready thread + if (can_direct_transfer && + (next_th = sched->running) != NULL && + !next_th->nt // next_th is running or has dedicated nt + ) { + + RUBY_DEBUG_LOG("th:%u->%u (direct)", rb_th_serial(th), rb_th_serial(next_th)); + + thread_sched_set_lock_owner(sched, NULL); + { + rb_ractor_set_current_ec(th->ractor, NULL); + thread_sched_switch(th, next_th); + } + thread_sched_set_lock_owner(sched, th); + } + else { + // search another ready ractor + struct rb_native_thread *nt = th->nt; + native_thread_assign(NULL, th); + + RUBY_DEBUG_LOG("th:%u->%u (ractor scheduling)", rb_th_serial(th), rb_th_serial(next_th)); + + thread_sched_set_lock_owner(sched, NULL); + { + rb_ractor_set_current_ec(th->ractor, NULL); + coroutine_transfer(th->sched.context, nt->nt_context); + } + thread_sched_set_lock_owner(sched, th); + } + + VM_ASSERT(GET_EC() == th->ec); + } + } + + VM_ASSERT(th->nt != NULL); + VM_ASSERT(GET_EC() == th->ec); + VM_ASSERT(th->sched.waiting_reason.flags == thread_sched_waiting_none); + + // add th to running threads + thread_sched_add_running_thread(sched, th); + } + + // VM_ASSERT(ractor_sched_running_threads_contain_p(th->vm, th)); need locking + RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_RESUMED); +} + +// waiting -> ready -> running (locked) +static void +thread_sched_to_running_common(struct rb_thread_sched *sched, rb_thread_t *th) +{ + RUBY_DEBUG_LOG("th:%u dedicated:%d", rb_th_serial(th), th_has_dedicated_nt(th)); + + VM_ASSERT(sched->running != th); + VM_ASSERT(th_has_dedicated_nt(th)); + VM_ASSERT(GET_THREAD() == th); + + native_thread_dedicated_dec(th->vm, th->ractor, th->nt); + + // waiting -> ready + thread_sched_to_ready_common(sched, th, false, false); + + if (sched->running == th) { + thread_sched_add_running_thread(sched, th); + } + + // TODO: check SNT number + thread_sched_wait_running_turn(sched, th, false); +} + +// waiting -> ready -> running +// +// `th` had been waiting by `thread_sched_to_waiting()` +// and run a dedicated task (like waitpid and so on). +// After the dedicated task, this function is called +// to join a normal thread-scheduling. +static void +thread_sched_to_running(struct rb_thread_sched *sched, rb_thread_t *th) +{ + thread_sched_lock(sched, th); + { + thread_sched_to_running_common(sched, th); + } + thread_sched_unlock(sched, th); +} + +// resume a next thread in the thread ready queue. +// +// deque next running thread from the ready thread queue and +// resume this thread if available. +// +// If the next therad has a dedicated native thraed, simply signal to resume. +// Otherwise, make the ractor ready and other nt will run the ractor and the thread. +static void +thread_sched_wakeup_next_thread(struct rb_thread_sched *sched, rb_thread_t *th, bool will_switch) +{ + ASSERT_thread_sched_locked(sched, th); + + VM_ASSERT(sched->running == th); + VM_ASSERT(sched->running->nt != NULL); + + rb_thread_t *next_th = thread_sched_deq(sched); + + RUBY_DEBUG_LOG("next_th:%u", rb_th_serial(next_th)); + VM_ASSERT(th != next_th); + + thread_sched_set_running(sched, next_th); + VM_ASSERT(next_th == sched->running); + thread_sched_wakeup_running_thread(sched, next_th, will_switch); + + if (th != next_th) { + thread_sched_del_running_thread(sched, th); + } +} + +// running -> waiting +// +// to_dead: false +// th will run dedicated task. +// run another ready thread. +// to_dead: true +// th will be dead. +// run another ready thread. +static void +thread_sched_to_waiting_common0(struct rb_thread_sched *sched, rb_thread_t *th, bool to_dead) +{ + if (rb_internal_thread_event_hooks) { + rb_thread_execute_hooks(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED); + } + + if (!to_dead) native_thread_dedicated_inc(th->vm, th->ractor, th->nt); + + RUBY_DEBUG_LOG("%sth:%u", to_dead ? "to_dead " : "", rb_th_serial(th)); + + bool can_switch = to_dead ? !th_has_dedicated_nt(th) : false; + thread_sched_wakeup_next_thread(sched, th, can_switch); +} + +// running -> dead (locked) +static void +thread_sched_to_dead_common(struct rb_thread_sched *sched, rb_thread_t *th) +{ + RUBY_DEBUG_LOG("dedicated:%d", th->nt->dedicated); + RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_EXITED); + + thread_sched_to_waiting_common0(sched, th, true); +} + +// running -> dead +static void +thread_sched_to_dead(struct rb_thread_sched *sched, rb_thread_t *th) +{ + thread_sched_lock(sched, th); + { + thread_sched_to_dead_common(sched, th); + } + thread_sched_unlock(sched, th); +} + +// running -> waiting (locked) +// +// This thread will run dedicated task (th->nt->dedicated++). +static void +thread_sched_to_waiting_common(struct rb_thread_sched *sched, rb_thread_t *th) +{ + RUBY_DEBUG_LOG("dedicated:%d", th->nt->dedicated); + thread_sched_to_waiting_common0(sched, th, false); +} + +// running -> waiting +// +// This thread will run a dedicated task. +static void +thread_sched_to_waiting(struct rb_thread_sched *sched, rb_thread_t *th) +{ + RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED); + + thread_sched_lock(sched, th); + { + thread_sched_to_waiting_common(sched, th); + } + thread_sched_unlock(sched, th); +} + +// mini utility func +static void +setup_ubf(rb_thread_t *th, rb_unblock_function_t *func, void *arg) +{ + rb_native_mutex_lock(&th->interrupt_lock); + { + th->unblock.func = func; + th->unblock.arg = arg; + } + rb_native_mutex_unlock(&th->interrupt_lock); +} + +static void +ubf_waiting(void *ptr) +{ + rb_thread_t *th = (rb_thread_t *)ptr; + struct rb_thread_sched *sched = TH_SCHED(th); + + // only once. it is safe because th->interrupt_lock is already acquired. + th->unblock.func = NULL; + th->unblock.arg = NULL; + + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + + thread_sched_lock(sched, th); + { + if (sched->running == th) { + // not sleeping yet. + } + else { + thread_sched_to_ready_common(sched, th, true, false); + } + } + thread_sched_unlock(sched, th); +} + +// running -> waiting +// +// This thread will sleep until other thread wakeup the thread. +static void +thread_sched_to_waiting_until_wakeup(struct rb_thread_sched *sched, rb_thread_t *th) +{ + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + + RB_VM_SAVE_MACHINE_CONTEXT(th); + setup_ubf(th, ubf_waiting, (void *)th); + + thread_sched_lock(sched, th); + { + if (!RUBY_VM_INTERRUPTED(th->ec)) { + bool can_direct_transfer = !th_has_dedicated_nt(th); + thread_sched_wakeup_next_thread(sched, th, can_direct_transfer); + thread_sched_wait_running_turn(sched, th, can_direct_transfer); + } + else { + RUBY_DEBUG_LOG("th:%u interrupted", rb_th_serial(th)); + } + } + thread_sched_unlock(sched, th); + + setup_ubf(th, NULL, NULL); +} + +// run another thread in the ready queue. +// continue to run if there are no ready threads. +static void +thread_sched_yield(struct rb_thread_sched *sched, rb_thread_t *th) +{ + RUBY_DEBUG_LOG("th:%d sched->readyq_cnt:%d", (int)th->serial, sched->readyq_cnt); + + thread_sched_lock(sched, th); + { + if (!ccan_list_empty(&sched->readyq)) { + thread_sched_wakeup_next_thread(sched, th, !th_has_dedicated_nt(th)); + bool can_direct_transfer = !th_has_dedicated_nt(th); + thread_sched_to_ready_common(sched, th, false, can_direct_transfer); + thread_sched_wait_running_turn(sched, th, can_direct_transfer); + } + else { + VM_ASSERT(sched->readyq_cnt == 0); + } + } + thread_sched_unlock(sched, th); +} + +void +rb_thread_sched_init(struct rb_thread_sched *sched, bool atfork) +{ + rb_native_mutex_initialize(&sched->lock_); + ccan_list_head_init(&sched->readyq); + sched->readyq_cnt = 0; + +#if USE_MN_THREADS + if (!atfork) sched->enable_mn_threads = true; // MN is enabled on Ractors +#endif +} + +static void +thread_sched_switch0(struct coroutine_context *current_cont, rb_thread_t *next_th, struct rb_native_thread *nt) +{ + VM_ASSERT(!nt->dedicated); + VM_ASSERT(next_th->nt == NULL); + + RUBY_DEBUG_LOG("next_th:%u", rb_th_serial(next_th)); + + ruby_thread_set_native(next_th); + native_thread_assign(nt, next_th); + coroutine_transfer(current_cont, next_th->sched.context); +} + +static void +thread_sched_switch(rb_thread_t *cth, rb_thread_t *next_th) +{ + struct rb_native_thread *nt = cth->nt; + native_thread_assign(NULL, cth); + RUBY_DEBUG_LOG("th:%u->%u on nt:%d", rb_th_serial(cth), rb_th_serial(next_th), nt->serial); + thread_sched_switch0(cth->sched.context, next_th, nt); +} + +#if VM_CHECK_MODE > 0 +RBIMPL_ATTR_MAYBE_UNUSED() +static unsigned int +grq_size(rb_vm_t *vm, rb_ractor_t *cr) +{ + ASSERT_ractor_sched_locked(vm, cr); + + rb_ractor_t *r, *prev_r = NULL; + unsigned int i = 0; + + ccan_list_for_each(&vm->ractor.sched.grq, r, threads.sched.grq_node) { + i++; + + VM_ASSERT(r != prev_r); + prev_r = r; + } + return i; +} +#endif + +static void +ractor_sched_enq(rb_vm_t *vm, rb_ractor_t *r) +{ + struct rb_thread_sched *sched = &r->threads.sched; + rb_ractor_t *cr = NULL; // timer thread can call this function + + VM_ASSERT(sched->running != NULL); + VM_ASSERT(sched->running->nt == NULL); + + ractor_sched_lock(vm, cr); + { +#if VM_CHECK_MODE > 0 + // check if grq contains r + rb_ractor_t *tr; + ccan_list_for_each(&vm->ractor.sched.grq, tr, threads.sched.grq_node) { + VM_ASSERT(r != tr); + } +#endif + + ccan_list_add_tail(&vm->ractor.sched.grq, &sched->grq_node); + vm->ractor.sched.grq_cnt++; + VM_ASSERT(grq_size(vm, cr) == vm->ractor.sched.grq_cnt); + + RUBY_DEBUG_LOG("r:%u th:%u grq_cnt:%u", rb_ractor_id(r), rb_th_serial(sched->running), vm->ractor.sched.grq_cnt); + + rb_native_cond_signal(&vm->ractor.sched.cond); + + // ractor_sched_dump(vm); + } + ractor_sched_unlock(vm, cr); +} + + +#ifndef SNT_KEEP_SECONDS +#define SNT_KEEP_SECONDS 0 +#endif + +#ifndef MINIMUM_SNT +// make at least MINIMUM_SNT snts for debug. +#define MINIMUM_SNT 0 +#endif + +static rb_ractor_t * +ractor_sched_deq(rb_vm_t *vm, rb_ractor_t *cr) +{ + rb_ractor_t *r; + + ractor_sched_lock(vm, cr); + { + RUBY_DEBUG_LOG("empty? %d", ccan_list_empty(&vm->ractor.sched.grq)); + // ractor_sched_dump(vm); + + VM_ASSERT(rb_current_execution_context(false) == NULL); + VM_ASSERT(grq_size(vm, cr) == vm->ractor.sched.grq_cnt); + + while ((r = ccan_list_pop(&vm->ractor.sched.grq, rb_ractor_t, threads.sched.grq_node)) == NULL) { + RUBY_DEBUG_LOG("wait grq_cnt:%d", (int)vm->ractor.sched.grq_cnt); + +#if SNT_KEEP_SECONDS > 0 + rb_hrtime_t abs = rb_hrtime_add(rb_hrtime_now(), RB_HRTIME_PER_SEC * SNT_KEEP_SECONDS); + if (native_cond_timedwait(&vm->ractor.sched.cond, &vm->ractor.sched.lock, &abs) == ETIMEDOUT) { + RUBY_DEBUG_LOG("timeout, grq_cnt:%d", (int)vm->ractor.sched.grq_cnt); + VM_ASSERT(r == NULL); + vm->ractor.sched.snt_cnt--; + vm->ractor.sched.running_cnt--; + break; + } + else { + RUBY_DEBUG_LOG("wakeup grq_cnt:%d", (int)vm->ractor.sched.grq_cnt); + } +#else + ractor_sched_set_unlocked(vm, cr); + rb_native_cond_wait(&vm->ractor.sched.cond, &vm->ractor.sched.lock); + ractor_sched_set_locked(vm, cr); + + RUBY_DEBUG_LOG("wakeup grq_cnt:%d", (int)vm->ractor.sched.grq_cnt); +#endif + } + + VM_ASSERT(rb_current_execution_context(false) == NULL); + + if (r) { + VM_ASSERT(vm->ractor.sched.grq_cnt > 0); + vm->ractor.sched.grq_cnt--; + RUBY_DEBUG_LOG("r:%d grq_cnt:%u", (int)rb_ractor_id(r), vm->ractor.sched.grq_cnt); + } + else { + VM_ASSERT(SNT_KEEP_SECONDS > 0); + // timeout + } + } + ractor_sched_unlock(vm, cr); + + return r; +} + +void rb_ractor_lock_self(rb_ractor_t *r); +void rb_ractor_unlock_self(rb_ractor_t *r); + +void +rb_ractor_sched_sleep(rb_execution_context_t *ec, rb_ractor_t *cr, rb_unblock_function_t *ubf) +{ + // ractor lock of cr is acquired + // r is sleeping statuss + rb_thread_t *th = rb_ec_thread_ptr(ec); + struct rb_thread_sched *sched = TH_SCHED(th); + cr->sync.wait.waiting_thread = th; // TODO: multi-thread + + setup_ubf(th, ubf, (void *)cr); + + thread_sched_lock(sched, th); + { + rb_ractor_unlock_self(cr); + { + if (RUBY_VM_INTERRUPTED(th->ec)) { + RUBY_DEBUG_LOG("interrupted"); + } + else if (cr->sync.wait.wakeup_status != wakeup_none) { + RUBY_DEBUG_LOG("awaken:%d", (int)cr->sync.wait.wakeup_status); + } + else { + // sleep + RB_VM_SAVE_MACHINE_CONTEXT(th); + th->status = THREAD_STOPPED_FOREVER; + + bool can_direct_transfer = !th_has_dedicated_nt(th); + thread_sched_wakeup_next_thread(sched, th, can_direct_transfer); + thread_sched_wait_running_turn(sched, th, can_direct_transfer); + th->status = THREAD_RUNNABLE; + // wakeup + } + } + } + thread_sched_unlock(sched, th); + + setup_ubf(th, NULL, NULL); + + rb_ractor_lock_self(cr); + cr->sync.wait.waiting_thread = NULL; +} + +void +rb_ractor_sched_wakeup(rb_ractor_t *r) +{ + rb_thread_t *r_th = r->sync.wait.waiting_thread; + // ractor lock of r is acquired + struct rb_thread_sched *sched = TH_SCHED(r_th); + + VM_ASSERT(r->sync.wait.wakeup_status != 0); + + thread_sched_lock(sched, r_th); + { + if (r_th->status == THREAD_STOPPED_FOREVER) { + thread_sched_to_ready_common(sched, r_th, true, false); + } + } + thread_sched_unlock(sched, r_th); +} + +static bool +ractor_sched_barrier_completed_p(rb_vm_t *vm) +{ + RUBY_DEBUG_LOG("run:%u wait:%u", vm->ractor.sched.running_cnt, vm->ractor.sched.barrier_waiting_cnt); + VM_ASSERT(vm->ractor.sched.running_cnt - 1 >= vm->ractor.sched.barrier_waiting_cnt); + return (vm->ractor.sched.running_cnt - vm->ractor.sched.barrier_waiting_cnt) == 1; +} + +void +rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr) +{ + VM_ASSERT(cr == GET_RACTOR()); + VM_ASSERT(vm->ractor.sync.lock_owner == cr); // VM is locked + VM_ASSERT(!vm->ractor.sched.barrier_waiting); + VM_ASSERT(vm->ractor.sched.barrier_waiting_cnt == 0); + + RUBY_DEBUG_LOG("start serial:%u", vm->ractor.sched.barrier_serial); + + unsigned int lock_rec; + + ractor_sched_lock(vm, cr); + { + vm->ractor.sched.barrier_waiting = true; + + // release VM lock + lock_rec = vm->ractor.sync.lock_rec; + vm->ractor.sync.lock_rec = 0; + vm->ractor.sync.lock_owner = NULL; + rb_native_mutex_unlock(&vm->ractor.sync.lock); + { + // interrupts all running threads + rb_thread_t *ith; + ccan_list_for_each(&vm->ractor.sched.running_threads, ith, sched.node.running_threads) { + if (ith->ractor != cr) { + RUBY_DEBUG_LOG("barrier int:%u", rb_th_serial(ith)); + RUBY_VM_SET_VM_BARRIER_INTERRUPT(ith->ec); + } + } + + // wait for other ractors + while (!ractor_sched_barrier_completed_p(vm)) { + ractor_sched_set_unlocked(vm, cr); + rb_native_cond_wait(&vm->ractor.sched.barrier_complete_cond, &vm->ractor.sched.lock); + ractor_sched_set_locked(vm, cr); + } + } + } + ractor_sched_unlock(vm, cr); + + // acquire VM lock + rb_native_mutex_lock(&vm->ractor.sync.lock); + vm->ractor.sync.lock_rec = lock_rec; + vm->ractor.sync.lock_owner = cr; + + RUBY_DEBUG_LOG("completed seirial:%u", vm->ractor.sched.barrier_serial); + + ractor_sched_lock(vm, cr); + { + vm->ractor.sched.barrier_waiting = false; + vm->ractor.sched.barrier_serial++; + vm->ractor.sched.barrier_waiting_cnt = 0; + rb_native_cond_broadcast(&vm->ractor.sched.barrier_release_cond); + } + ractor_sched_unlock(vm, cr); +} + +static void +ractor_sched_barrier_join_signal_locked(rb_vm_t *vm) +{ + if (ractor_sched_barrier_completed_p(vm)) { + rb_native_cond_signal(&vm->ractor.sched.barrier_complete_cond); + } +} + +static void +ractor_sched_barrier_join_wait_locked(rb_vm_t *vm, rb_thread_t *th) +{ + VM_ASSERT(vm->ractor.sched.barrier_waiting); + + unsigned int barrier_serial = vm->ractor.sched.barrier_serial; + + while (vm->ractor.sched.barrier_serial == barrier_serial) { + RUBY_DEBUG_LOG("sleep serial:%u", barrier_serial); + RB_VM_SAVE_MACHINE_CONTEXT(th); + + rb_ractor_t *cr = th->ractor; + ractor_sched_set_unlocked(vm, cr); + rb_native_cond_wait(&vm->ractor.sched.barrier_release_cond, &vm->ractor.sched.lock); + ractor_sched_set_locked(vm, cr); + + RUBY_DEBUG_LOG("wakeup serial:%u", barrier_serial); + } +} + +void +rb_ractor_sched_barrier_join(rb_vm_t *vm, rb_ractor_t *cr) +{ + VM_ASSERT(cr->threads.sched.running != NULL); // running ractor + VM_ASSERT(cr == GET_RACTOR()); + VM_ASSERT(vm->ractor.sync.lock_owner == NULL); // VM is locked, but owner == NULL + VM_ASSERT(vm->ractor.sched.barrier_waiting); // VM needs barrier sync + +#if USE_RUBY_DEBUG_LOG || VM_CHECK_MODE > 0 + unsigned int barrier_serial = vm->ractor.sched.barrier_serial; +#endif + + RUBY_DEBUG_LOG("join"); + + rb_native_mutex_unlock(&vm->ractor.sync.lock); + { + VM_ASSERT(vm->ractor.sched.barrier_waiting); // VM needs barrier sync + VM_ASSERT(vm->ractor.sched.barrier_serial == barrier_serial); + + ractor_sched_lock(vm, cr); + { + // running_cnt + vm->ractor.sched.barrier_waiting_cnt++; + RUBY_DEBUG_LOG("waiting_cnt:%u serial:%u", vm->ractor.sched.barrier_waiting_cnt, barrier_serial); + + ractor_sched_barrier_join_signal_locked(vm); + ractor_sched_barrier_join_wait_locked(vm, cr->threads.sched.running); + } + ractor_sched_unlock(vm, cr); + } + + rb_native_mutex_lock(&vm->ractor.sync.lock); + // VM locked here +} + +#if 0 +// TODO + +static void clear_thread_cache_altstack(void); + +static void +rb_thread_sched_destroy(struct rb_thread_sched *sched) +{ + /* + * only called once at VM shutdown (not atfork), another thread + * may still grab vm->gvl.lock when calling gvl_release at + * the end of thread_start_func_2 + */ + if (0) { + rb_native_mutex_destroy(&sched->lock); + } + clear_thread_cache_altstack(); +} +#endif + +#ifdef RB_THREAD_T_HAS_NATIVE_ID +static int +get_native_thread_id(void) +{ +#ifdef __linux__ + return (int)syscall(SYS_gettid); +#elif defined(__FreeBSD__) + return pthread_getthreadid_np(); +#endif +} +#endif + +#if defined(HAVE_WORKING_FORK) +static void +thread_sched_atfork(struct rb_thread_sched *sched) +{ + current_fork_gen++; + rb_thread_sched_init(sched, true); + rb_thread_t *th = GET_THREAD(); + rb_vm_t *vm = GET_VM(); + + if (th_has_dedicated_nt(th)) { + vm->ractor.sched.snt_cnt = 0; + } + else { + vm->ractor.sched.snt_cnt = 1; + } + vm->ractor.sched.running_cnt = 0; + + // rb_native_cond_destroy(&vm->ractor.sched.cond); + rb_native_cond_initialize(&vm->ractor.sched.cond); + rb_native_cond_initialize(&vm->ractor.sched.barrier_complete_cond); + rb_native_cond_initialize(&vm->ractor.sched.barrier_release_cond); + + ccan_list_head_init(&vm->ractor.sched.grq); + ccan_list_head_init(&vm->ractor.sched.timeslice_threads); + ccan_list_head_init(&vm->ractor.sched.running_threads); + + VM_ASSERT(sched->is_running); + sched->is_running_timeslice = false; + + if (sched->running != th) { + thread_sched_to_running(sched, th); + } + else { + thread_sched_setup_running_threads(sched, th->ractor, vm, th, NULL, NULL); + } + +#ifdef RB_THREAD_T_HAS_NATIVE_ID + if (th->nt) { + th->nt->tid = get_native_thread_id(); + } +#endif +} + +#endif + +#ifdef RB_THREAD_LOCAL_SPECIFIER +static RB_THREAD_LOCAL_SPECIFIER rb_thread_t *ruby_native_thread; +#else +static pthread_key_t ruby_native_thread_key; +#endif + +static void +null_func(int i) +{ + /* null */ + // This function can be called from signal handler + // RUBY_DEBUG_LOG("i:%d", i); +} + +rb_thread_t * +ruby_thread_from_native(void) +{ +#ifdef RB_THREAD_LOCAL_SPECIFIER + return ruby_native_thread; +#else + return pthread_getspecific(ruby_native_thread_key); +#endif +} + +int +ruby_thread_set_native(rb_thread_t *th) +{ + if (th) { +#ifdef USE_UBF_LIST + ccan_list_node_init(&th->sched.node.ubf); +#endif + } + + // setup TLS + + if (th && th->ec) { + rb_ractor_set_current_ec(th->ractor, th->ec); + } +#ifdef RB_THREAD_LOCAL_SPECIFIER + ruby_native_thread = th; + return 1; +#else + return pthread_setspecific(ruby_native_thread_key, th) == 0; +#endif +} + +static void native_thread_setup(struct rb_native_thread *nt); +static void native_thread_setup_on_thread(struct rb_native_thread *nt); + +void +Init_native_thread(rb_thread_t *main_th) +{ +#if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) + if (condattr_monotonic) { + int r = pthread_condattr_init(condattr_monotonic); + if (r == 0) { + r = pthread_condattr_setclock(condattr_monotonic, CLOCK_MONOTONIC); + } + if (r) condattr_monotonic = NULL; + } +#endif + +#ifndef RB_THREAD_LOCAL_SPECIFIER + if (pthread_key_create(&ruby_native_thread_key, 0) == EAGAIN) { + rb_bug("pthread_key_create failed (ruby_native_thread_key)"); + } + if (pthread_key_create(&ruby_current_ec_key, 0) == EAGAIN) { + rb_bug("pthread_key_create failed (ruby_current_ec_key)"); + } +#endif + ruby_posix_signal(SIGVTALRM, null_func); + + // setup vm + rb_vm_t *vm = main_th->vm; + rb_native_mutex_initialize(&vm->ractor.sched.lock); + rb_native_cond_initialize(&vm->ractor.sched.cond); + rb_native_cond_initialize(&vm->ractor.sched.barrier_complete_cond); + rb_native_cond_initialize(&vm->ractor.sched.barrier_release_cond); + + ccan_list_head_init(&vm->ractor.sched.grq); + ccan_list_head_init(&vm->ractor.sched.timeslice_threads); + ccan_list_head_init(&vm->ractor.sched.running_threads); + + // setup main thread + main_th->nt->thread_id = pthread_self(); + main_th->nt->serial = 1; +#ifdef RUBY_NT_SERIAL + ruby_nt_serial = 1; +#endif + ruby_thread_set_native(main_th); + native_thread_setup(main_th->nt); + native_thread_setup_on_thread(main_th->nt); + + TH_SCHED(main_th)->running = main_th; + main_th->has_dedicated_nt = 1; + + thread_sched_setup_running_threads(TH_SCHED(main_th), main_th->ractor, vm, main_th, NULL, NULL); + + // setup main NT + main_th->nt->dedicated = 1; + main_th->nt->vm = vm; + + // setup mn + vm->ractor.sched.dnt_cnt = 1; +} + +void +ruby_mn_threads_params(void) +{ + rb_vm_t *vm = GET_VM(); + rb_ractor_t *main_ractor = GET_RACTOR(); + + const char *mn_threads_cstr = getenv("RUBY_MN_THREADS"); + bool enable_mn_threads; + + if (mn_threads_cstr && (enable_mn_threads = atoi(mn_threads_cstr) > 0)) { + if (RTEST(ruby_verbose)) { +#if USE_MN_THREADS + fprintf(stderr, "RUBY_MN_THREADS = %s (default: 0)\n", mn_threads_cstr); +#else + fprintf(stderr, "RUBY_MN_THREADS = %s is specified, but MN threads are not implmeented on this executable.", mn_threads_cstr); +#endif + } + } + else { + enable_mn_threads = false; // default: off on main Ractor + } + main_ractor->threads.sched.enable_mn_threads = enable_mn_threads; + + const char *max_cpu_cstr = getenv("RUBY_MAX_CPU"); + const int default_max_cpu = 8; // TODO: CPU num? + int max_cpu; + if (max_cpu_cstr && (max_cpu = atoi(max_cpu_cstr)) > 0) { + if (RTEST(ruby_verbose)) { +#if USE_MN_THREADS + fprintf(stderr, "RUBY_MAX_CPU = %d (default: %d)\n", max_cpu, default_max_cpu); +#else + fprintf(stderr, "RUBY_MAX_CPU = %d is specified, but MN threads are not implmeented on this executable.", max_cpu); +#endif + } + } + else { + max_cpu = default_max_cpu; + } + + vm->ractor.sched.max_cpu = max_cpu; +} + +static void +native_thread_dedicated_inc(rb_vm_t *vm, rb_ractor_t *cr, struct rb_native_thread *nt) +{ + RUBY_DEBUG_LOG("nt:%d %d->%d", nt->serial, nt->dedicated, nt->dedicated + 1); + + if (nt->dedicated == 0) { + ractor_sched_lock(vm, cr); + { + vm->ractor.sched.snt_cnt--; + vm->ractor.sched.dnt_cnt++; + } + ractor_sched_unlock(vm, cr); + } + + nt->dedicated++; +} + +static void +native_thread_dedicated_dec(rb_vm_t *vm, rb_ractor_t *cr, struct rb_native_thread *nt) +{ + RUBY_DEBUG_LOG("nt:%d %d->%d", nt->serial, nt->dedicated, nt->dedicated - 1); + VM_ASSERT(nt->dedicated > 0); + nt->dedicated--; + + if (nt->dedicated == 0) { + ractor_sched_lock(vm, cr); + { + nt->vm->ractor.sched.snt_cnt++; + nt->vm->ractor.sched.dnt_cnt--; + } + ractor_sched_unlock(vm, cr); + } +} + +static void +native_thread_assign(struct rb_native_thread *nt, rb_thread_t *th) +{ +#if USE_RUBY_DEBUG_LOG + if (nt) { + if (th->nt) { + RUBY_DEBUG_LOG("th:%d nt:%d->%d", (int)th->serial, (int)th->nt->serial, (int)nt->serial); + } + else { + RUBY_DEBUG_LOG("th:%d nt:NULL->%d", (int)th->serial, (int)nt->serial); + } } - if (pthread_key_create(&ruby_current_ec_key, 0) == EAGAIN) { - rb_bug("pthread_key_create failed (ruby_current_ec_key)"); + else { + if (th->nt) { + RUBY_DEBUG_LOG("th:%d nt:%d->NULL", (int)th->serial, (int)th->nt->serial); + } + else { + RUBY_DEBUG_LOG("th:%d nt:NULL->NULL", (int)th->serial); + } } #endif - ruby_posix_signal(SIGVTALRM, null_func); - // setup main thread - main_th->nt->thread_id = pthread_self(); - ruby_thread_set_native(main_th); - native_thread_init(main_th->nt); + th->nt = nt; } -#if defined(USE_THREAD_CACHE) && !(USE_THREAD_CACHE+0) -# undef USE_THREAD_CACHE -# define USE_THREAD_CACHE 0 -#else -# undef USE_THREAD_CACHE -# define USE_THREAD_CACHE 1 -#endif - static void native_thread_destroy(rb_thread_t *th) { @@ -765,21 +1722,8 @@ native_thread_destroy(rb_thread_t *th) if (&nt->cond.readyq != &nt->cond.intr) rb_native_cond_destroy(&nt->cond.intr); - - /* - * prevent false positive from ruby_thread_has_gvl_p if that - * gets called from an interposing function wrapper - */ - if (USE_THREAD_CACHE) - ruby_thread_set_native(0); } -#if USE_THREAD_CACHE -static rb_thread_t *register_cached_thread_and_wait(void *); -#else -# define register_cached_thread_and_wait(altstack) ((void)(altstack), NULL) -#endif - #if defined HAVE_PTHREAD_GETATTR_NP || defined HAVE_PTHREAD_ATTR_GET_NP #define STACKADDR_AVAILABLE 1 #elif defined HAVE_PTHREAD_GET_STACKADDR_NP && defined HAVE_PTHREAD_GET_STACKSIZE_NP @@ -1069,13 +2013,15 @@ native_thread_init_stack(rb_thread_t *th) } else { #ifdef STACKADDR_AVAILABLE - void *start; - size_t size; - - if (get_stack(&start, &size) == 0) { - uintptr_t diff = (uintptr_t)start - (uintptr_t)&curr; - th->ec->machine.stack_start = (VALUE *)&curr; - th->ec->machine.stack_maxsize = size - diff; + if (th_has_dedicated_nt(th)) { + void *start; + size_t size; + + if (get_stack(&start, &size) == 0) { + uintptr_t diff = (uintptr_t)start - (uintptr_t)&curr; + th->ec->machine.stack_start = (VALUE *)&curr; + th->ec->machine.stack_maxsize = size - diff; + } } #else rb_raise(rb_eNotImpError, "ruby engine can initialize only in the main thread"); @@ -1089,199 +2035,292 @@ native_thread_init_stack(rb_thread_t *th) #define USE_NATIVE_THREAD_INIT 1 #endif +struct nt_param { + rb_vm_t *vm; + struct rb_native_thread *nt; +}; + static void * -thread_start_func_1(void *th_ptr) +nt_start(void *ptr); + +static int +native_thread_create0(struct rb_native_thread *nt) { - rb_thread_t *th = th_ptr; + int err = 0; + pthread_attr_t attr; -#if USE_RUBY_DEBUG_LOG && defined(RUBY_NT_SERIAL) - ruby_nt_serial = th->nt->serial; -#endif + const size_t stack_size = nt->vm->default_params.thread_machine_stack_size; + const size_t space = space_size(stack_size); - RB_ALTSTACK_INIT(void *altstack, th->nt->altstack); - do { -#if !defined USE_NATIVE_THREAD_INIT - VALUE stack_start; -#endif + nt->machine_stack_maxsize = stack_size - space; -#if defined USE_NATIVE_THREAD_INIT - native_thread_init_stack(th); +#ifdef USE_SIGALTSTACK + nt->altstack = rb_allocate_sigaltstack(); #endif - native_thread_init(th->nt); + CHECK_ERR(pthread_attr_init(&attr)); - RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_STARTED); +# ifdef PTHREAD_STACK_MIN + RUBY_DEBUG_LOG("stack size: %lu", (unsigned long)stack_size); + CHECK_ERR(pthread_attr_setstacksize(&attr, stack_size)); +# endif - /* run */ -#if defined USE_NATIVE_THREAD_INIT - thread_start_func_2(th, th->ec->machine.stack_start); -#else - thread_start_func_2(th, &stack_start); -#endif - } while ((th = register_cached_thread_and_wait(RB_ALTSTACK(altstack))) != 0); - if (!USE_THREAD_CACHE) { - RB_ALTSTACK_FREE(altstack); - } - return 0; -} +# ifdef HAVE_PTHREAD_ATTR_SETINHERITSCHED + CHECK_ERR(pthread_attr_setinheritsched(&attr, PTHREAD_INHERIT_SCHED)); +# endif + CHECK_ERR(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)); -struct cached_thread_entry { - rb_nativethread_cond_t cond; - rb_nativethread_id_t thread_id; - rb_thread_t *th; - void *altstack; - struct ccan_list_node node; -}; + err = pthread_create(&nt->thread_id, &attr, nt_start, nt); + + RUBY_DEBUG_LOG("nt:%d err:%d", (int)nt->serial, err); -#if USE_THREAD_CACHE -static rb_nativethread_lock_t thread_cache_lock = RB_NATIVETHREAD_LOCK_INIT; -static CCAN_LIST_HEAD(cached_thread_head); + CHECK_ERR(pthread_attr_destroy(&attr)); + + return err; +} -# if defined(HAVE_WORKING_FORK) static void -thread_cache_reset(void) +native_thread_setup(struct rb_native_thread *nt) { - rb_native_mutex_initialize(&thread_cache_lock); - ccan_list_head_init(&cached_thread_head); + // init cond + rb_native_cond_initialize(&nt->cond.readyq); + + if (&nt->cond.readyq != &nt->cond.intr) { + rb_native_cond_initialize(&nt->cond.intr); + } } -# endif -/* - * number of seconds to cache for, I think 1-5s is sufficient to obviate - * the need for thread pool in many network programs (taking into account - * worst case network latency across the globe) without wasting memory - */ -#ifndef THREAD_CACHE_TIME -# define THREAD_CACHE_TIME ((rb_hrtime_t)3 * RB_HRTIME_PER_SEC) +static void +native_thread_setup_on_thread(struct rb_native_thread *nt) +{ + // init tid +#ifdef RB_THREAD_T_HAS_NATIVE_ID + nt->tid = get_native_thread_id(); #endif -static rb_thread_t * -register_cached_thread_and_wait(void *altstack) + // init signal handler + RB_ALTSTACK_INIT(nt->altstack, nt->altstack); +} + +static struct rb_native_thread * +native_thread_alloc(void) { - rb_hrtime_t end = THREAD_CACHE_TIME; - struct cached_thread_entry entry; + struct rb_native_thread *nt = ZALLOC(struct rb_native_thread); - rb_native_cond_initialize(&entry.cond); - entry.altstack = altstack; - entry.th = NULL; - entry.thread_id = pthread_self(); - end = native_cond_timeout(&entry.cond, end); +#if USE_MN_THREADS + nt->nt_context = ruby_xmalloc(sizeof(struct coroutine_context)); +#endif - rb_native_mutex_lock(&thread_cache_lock); - { - ccan_list_add(&cached_thread_head, &entry.node); +#if USE_RUBY_DEBUG_LOG + static rb_atomic_t nt_serial = 2; + nt->serial = RUBY_ATOMIC_FETCH_ADD(nt_serial, 1); +#endif + return nt; +} - native_cond_timedwait(&entry.cond, &thread_cache_lock, &end); +static int +native_thread_create_dedicated(rb_thread_t *th) +{ + th->nt = native_thread_alloc(); + th->nt->vm = th->vm; + th->nt->running_thread = th; + th->nt->dedicated = 1; + native_thread_setup(th->nt); - if (entry.th == NULL) { /* unused */ - ccan_list_del(&entry.node); - } - } - rb_native_mutex_unlock(&thread_cache_lock); + // vm stack + size_t vm_stack_word_size = th->vm->default_params.thread_vm_stack_size / sizeof(VALUE); + void *vm_stack = ruby_xmalloc(vm_stack_word_size * sizeof(VALUE)); + th->sched.malloc_stack = true; + rb_ec_initialize_vm_stack(th->ec, vm_stack, vm_stack_word_size); + th->sched.context_stack = vm_stack; - rb_native_cond_destroy(&entry.cond); - if (!entry.th) { - RB_ALTSTACK_FREE(entry.altstack); - } + // setup + thread_sched_to_ready(TH_SCHED(th), th); - return entry.th; + return native_thread_create0(th->nt); } + +static void +call_thread_start_func_2(rb_thread_t *th) +{ + RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_STARTED); + +#if defined USE_NATIVE_THREAD_INIT + native_thread_init_stack(th); + thread_start_func_2(th, th->ec->machine.stack_start); #else -# if defined(HAVE_WORKING_FORK) -static void thread_cache_reset(void) { } -# endif -#define thread_cache_lock *(rb_nativethread_lock_t *)NULL -#define cached_thread_head *(struct ccan_list_head *)NULL + VALUE stack_start; + thread_start_func_2(th, &stack_start); #endif +} -static int -use_cached_thread(rb_thread_t *th) +static void * +nt_start(void *ptr) { - if (!USE_THREAD_CACHE) return 0; + struct rb_native_thread *nt = (struct rb_native_thread *)ptr; + rb_vm_t *vm = nt->vm; + + native_thread_setup_on_thread(nt); - struct cached_thread_entry *entry; + // init tid +#ifdef RB_THREAD_T_HAS_NATIVE_ID + nt->tid = get_native_thread_id(); +#endif + +#if USE_RUBY_DEBUG_LOG && defined(RUBY_NT_SERIAL) + ruby_nt_serial = nt->serial; +#endif - rb_native_mutex_lock(&thread_cache_lock); - entry = ccan_list_pop(&cached_thread_head, struct cached_thread_entry, node); - if (entry) { - entry->th = th; - /* th->nt->thread_id must be set before signal for Thread#name= */ - th->nt->thread_id = entry->thread_id; - rb_native_cond_signal(&entry->cond); + RUBY_DEBUG_LOG("nt:%u", nt->serial); + + if (!nt->dedicated) { + coroutine_initialize_main(nt->nt_context); } - rb_native_mutex_unlock(&thread_cache_lock); - return !!entry; -} -// TODO -static void -clear_thread_cache_altstack(void) -{ - if (!USE_THREAD_CACHE) return; + while (1) { + if (nt->dedicated) { + // wait running turn + rb_thread_t *th = nt->running_thread; + struct rb_thread_sched *sched = TH_SCHED(th); + + RUBY_DEBUG_LOG("on dedicated th:%u", rb_th_serial(th)); + ruby_thread_set_native(th); - struct cached_thread_entry *entry; + thread_sched_lock(sched, th); + { + if (sched->running == th) { + thread_sched_add_running_thread(sched, th); + } + thread_sched_wait_running_turn(sched, th, false); + } + thread_sched_unlock(sched, th); - rb_native_mutex_lock(&thread_cache_lock); - ccan_list_for_each(&cached_thread_head, entry, node) { - void MAYBE_UNUSED(*altstack) = entry->altstack; - entry->altstack = 0; - RB_ALTSTACK_FREE(altstack); + // start threads + call_thread_start_func_2(th); + break; // TODO: allow to change to the SNT + } + else { + RUBY_DEBUG_LOG("check next"); + rb_ractor_t *r = ractor_sched_deq(vm, NULL); + + if (r) { + struct rb_thread_sched *sched = &r->threads.sched; + + thread_sched_lock(sched, NULL); + { + rb_thread_t *next_th = sched->running; + + if (next_th && next_th->nt == NULL) { + RUBY_DEBUG_LOG("nt:%d next_th:%d", (int)nt->serial, (int)next_th->serial); + thread_sched_switch0(nt->nt_context, next_th, nt); + } + else { + RUBY_DEBUG_LOG("no schedulable threads -- next_th:%p", next_th); + } + } + thread_sched_unlock(sched, NULL); + } + else { + // timeout -> deleted. + break; + } + } } - rb_native_mutex_unlock(&thread_cache_lock); + + return NULL; } -static struct rb_native_thread * -native_thread_alloc(void) -{ - struct rb_native_thread *nt = ZALLOC(struct rb_native_thread); -#if USE_RUBY_DEBUG_LOG - static rb_atomic_t nt_serial = 1; - nt->serial = RUBY_ATOMIC_FETCH_ADD(nt_serial, 1); +static int native_thread_create_shared(rb_thread_t *th); + +#if USE_MN_THREADS +static void nt_free_stack(void *mstack); #endif - return nt; -} -static int -native_thread_create(rb_thread_t *th) +void +rb_threadptr_remove(rb_thread_t *th) { - int err = 0; +#if USE_MN_THREADS + if (th->sched.malloc_stack) { + // dedicated + return; + } + else { + rb_vm_t *vm = th->vm; + th->sched.finished = false; - VM_ASSERT(th->nt == 0); - th->nt = native_thread_alloc(); + RB_VM_LOCK_ENTER(); + { + ccan_list_add(&vm->ractor.sched.zombie_threads, &th->sched.node.zombie_threads); + } + RB_VM_LOCK_LEAVE(); + } +#endif +} - if (use_cached_thread(th)) { - RUBY_DEBUG_LOG("use cached nt. th:%u", rb_th_serial(th)); +void +rb_threadptr_sched_free(rb_thread_t *th) +{ +#if USE_MN_THREADS + if (th->sched.malloc_stack) { + ruby_xfree(th->sched.context_stack); + RB_ALTSTACK_FREE(th->nt->altstack); + ruby_xfree(th->nt->nt_context); + ruby_xfree(th->nt); } else { - pthread_attr_t attr; - const size_t stack_size = th->vm->default_params.thread_machine_stack_size + th->vm->default_params.thread_vm_stack_size; - const size_t space = space_size(stack_size); + nt_free_stack(th->sched.context_stack); + // TODO: how to free nt and nt->altstack? + } -#ifdef USE_SIGALTSTACK - th->nt->altstack = rb_allocate_sigaltstack(); -#endif - th->ec->machine.stack_maxsize = stack_size - space; + if (th->sched.context) { + ruby_xfree(th->sched.context); + VM_ASSERT((th->sched.context = NULL) == NULL); + } - CHECK_ERR(pthread_attr_init(&attr)); + th->nt = NULL; +#else + ruby_xfree(th->sched.context_stack); -# ifdef PTHREAD_STACK_MIN - RUBY_DEBUG_LOG("stack size: %lu", (unsigned long)stack_size); - CHECK_ERR(pthread_attr_setstacksize(&attr, stack_size)); -# endif + struct rb_native_thread *nt = th->nt; + if (nt) { // TODO: not sure why nt is NULL + RB_ALTSTACK_FREE(nt->altstack); + ruby_xfree(nt); + } +#endif +} -# ifdef HAVE_PTHREAD_ATTR_SETINHERITSCHED - CHECK_ERR(pthread_attr_setinheritsched(&attr, PTHREAD_INHERIT_SCHED)); -# endif - CHECK_ERR(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)); +void +rb_thread_sched_mark_zombies(rb_vm_t *vm) +{ + if (!ccan_list_empty(&vm->ractor.sched.zombie_threads)) { + rb_thread_t *zombie_th, *next_zombie_th; + ccan_list_for_each_safe(&vm->ractor.sched.zombie_threads, zombie_th, next_zombie_th, sched.node.zombie_threads) { + if (zombie_th->sched.finished) { + ccan_list_del_init(&zombie_th->sched.node.zombie_threads); + } + else { + rb_gc_mark(zombie_th->self); + } + } + } +} - err = pthread_create(&th->nt->thread_id, &attr, thread_start_func_1, th); +static int +native_thread_create(rb_thread_t *th) +{ + VM_ASSERT(th->nt == 0); + RUBY_DEBUG_LOG("th:%d has_dnt:%d", th->serial, th->has_dedicated_nt); - RUBY_DEBUG_LOG("th:%u err:%d", rb_th_serial(th), err); + if (!th->ractor->threads.sched.enable_mn_threads) { + th->has_dedicated_nt = 1; + } - /* should be done in the created thread */ - CHECK_ERR(pthread_attr_destroy(&attr)); + if (th->has_dedicated_nt) { + return native_thread_create_dedicated(th); + } + else { + return native_thread_create_shared(th); } - return err; } #if USE_NATIVE_THREAD_PRIORITY @@ -1324,7 +2363,7 @@ static void ubf_pthread_cond_signal(void *ptr) { rb_thread_t *th = (rb_thread_t *)ptr; - RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + RUBY_DEBUG_LOG("th:%u on nt:%d", rb_th_serial(th), (int)th->nt->serial); rb_native_cond_signal(&th->nt->cond.intr); } @@ -1389,33 +2428,54 @@ ubf_list_atfork(void) rb_native_mutex_initialize(&ubf_list_lock); } +RBIMPL_ATTR_MAYBE_UNUSED() +static bool +ubf_list_contain_p(rb_thread_t *th) +{ + rb_thread_t *list_th; + ccan_list_for_each(&ubf_list_head, list_th, sched.node.ubf) { + if (list_th == th) return true; + } + return false; +} + /* The thread 'th' is registered to be trying unblock. */ static void register_ubf_list(rb_thread_t *th) { + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); struct ccan_list_node *node = &th->sched.node.ubf; - if (ccan_list_empty((struct ccan_list_head*)node)) { - rb_native_mutex_lock(&ubf_list_lock); - ccan_list_add(&ubf_list_head, node); - rb_native_mutex_unlock(&ubf_list_lock); + VM_ASSERT(th->unblock.func != NULL); + + rb_native_mutex_lock(&ubf_list_lock); + { + // check not connected yet + if (ccan_list_empty((struct ccan_list_head*)node)) { + VM_ASSERT(!ubf_list_contain_p(th)); + ccan_list_add(&ubf_list_head, node); + } } + rb_native_mutex_unlock(&ubf_list_lock); + + timer_thread_wakeup(); } /* The thread 'th' is unblocked. It no longer need to be registered. */ static void unregister_ubf_list(rb_thread_t *th) { + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); struct ccan_list_node *node = &th->sched.node.ubf; /* we can't allow re-entry into ubf_list_head */ - VM_ASSERT(th->unblock.func == 0); + VM_ASSERT(th->unblock.func == NULL); if (!ccan_list_empty((struct ccan_list_head*)node)) { rb_native_mutex_lock(&ubf_list_lock); - ccan_list_del_init(node); - if (ccan_list_empty(&ubf_list_head) && !rb_signal_buff_size()) { - ubf_timer_disarm(); + { + VM_ASSERT(ubf_list_contain_p(th)); + ccan_list_del_init(node); } rb_native_mutex_unlock(&ubf_list_lock); } @@ -1428,61 +2488,39 @@ unregister_ubf_list(rb_thread_t *th) static void ubf_wakeup_thread(rb_thread_t *th) { - RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); - pthread_kill(th->nt->thread_id, SIGVTALRM); + RUBY_DEBUG_LOG("th:%u thread_id:%p", rb_th_serial(th), (void *)th->nt->thread_id); + + int r = pthread_kill(th->nt->thread_id, SIGVTALRM); + if (r != 0) { + rb_bug_errno("pthread_kill", r); + } } static void ubf_select(void *ptr) { rb_thread_t *th = (rb_thread_t *)ptr; - struct rb_thread_sched *sched = TH_SCHED(th); - const rb_thread_t *cur = ruby_thread_from_native(); /* may be 0 */ - - register_ubf_list(th); - - /* - * ubf_wakeup_thread() doesn't guarantee to wake up a target thread. - * Therefore, we repeatedly call ubf_wakeup_thread() until a target thread - * exit from ubf function. We must have a timer to perform this operation. - * We use double-checked locking here because this function may be called - * while vm->gvl.lock is held in do_gvl_timer. - * There is also no need to start a timer if we're the designated - * sigwait_th thread, otherwise we can deadlock with a thread - * in unblock_function_clear. - */ - if (cur != sched->timer && cur != sigwait_th) { - /* - * Double-checked locking above was to prevent nested locking - * by the SAME thread. We use trylock here to prevent deadlocks - * between DIFFERENT threads - */ - if (rb_native_mutex_trylock(&sched->lock) == 0) { - if (!sched->timer) { - rb_thread_wakeup_timer_thread(-1); - } - rb_native_mutex_unlock(&sched->lock); - } - } - + RUBY_DEBUG_LOG("wakeup th:%u", rb_th_serial(th)); ubf_wakeup_thread(th); + register_ubf_list(th); } -static int +static bool ubf_threads_empty(void) { - return ccan_list_empty(&ubf_list_head); + return ccan_list_empty(&ubf_list_head) != 0; } static void ubf_wakeup_all_threads(void) { if (!ubf_threads_empty()) { - rb_native_mutex_lock(&ubf_list_lock); rb_thread_t *th; - - ccan_list_for_each(&ubf_list_head, th, sched.node.ubf) { - ubf_wakeup_thread(th); + rb_native_mutex_lock(&ubf_list_lock); + { + ccan_list_for_each(&ubf_list_head, th, sched.node.ubf) { + ubf_wakeup_thread(th); + } } rb_native_mutex_unlock(&ubf_list_lock); } @@ -1493,151 +2531,35 @@ ubf_wakeup_all_threads(void) #define unregister_ubf_list(th) (void)(th) #define ubf_select 0 static void ubf_wakeup_all_threads(void) { return; } -static int ubf_threads_empty(void) { return 1; } +static bool ubf_threads_empty(void) { return true; } #define ubf_list_atfork() do {} while (0) #endif /* USE_UBF_LIST */ #define TT_DEBUG 0 #define WRITE_CONST(fd, str) (void)(write((fd),(str),sizeof(str)-1)<0) -static struct { - /* pipes are closed in forked children when owner_process does not match */ - int normal[2]; /* [0] == sigwait_fd */ - int ub_main[2]; /* unblock main thread from native_ppoll_sleep */ - - /* volatile for signal handler use: */ - volatile rb_serial_t fork_gen; -} signal_self_pipe = { - {-1, -1}, - {-1, -1}, -}; - -/* only use signal-safe system calls here */ -static void -rb_thread_wakeup_timer_thread_fd(int fd) -{ -#if USE_EVENTFD - const uint64_t buff = 1; -#else - const char buff = '!'; -#endif - ssize_t result; - - /* already opened */ - if (fd >= 0) { - retry: - if ((result = write(fd, &buff, sizeof(buff))) <= 0) { - int e = errno; - switch (e) { - case EINTR: goto retry; - case EAGAIN: -#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN - case EWOULDBLOCK: -#endif - break; - default: - async_bug_fd("rb_thread_wakeup_timer_thread: write", e, fd); - } - } - if (TT_DEBUG) WRITE_CONST(2, "rb_thread_wakeup_timer_thread: write\n"); - } - else { - /* ignore wakeup */ - } -} - -/* - * This ensures we get a SIGVTALRM in TIME_QUANTUM_MSEC if our - * process could not react to the original signal in time. - */ -static void -ubf_timer_arm(rb_serial_t fork_gen) /* async signal safe */ -{ -#if UBF_TIMER == UBF_TIMER_POSIX - if ((!fork_gen || timer_posix.fork_gen == fork_gen) && - timer_state_cas(RTIMER_DISARM, RTIMER_ARMING) == RTIMER_DISARM) { - struct itimerspec it; - - it.it_interval.tv_sec = it.it_value.tv_sec = 0; - it.it_interval.tv_nsec = it.it_value.tv_nsec = TIME_QUANTUM_NSEC; - - if (timer_settime(timer_posix.timerid, 0, &it, 0)) - rb_async_bug_errno("timer_settime (arm)", errno); - - switch (timer_state_cas(RTIMER_ARMING, RTIMER_ARMED)) { - case RTIMER_DISARM: - /* somebody requested a disarm while we were arming */ - /* may race harmlessly with ubf_timer_destroy */ - (void)timer_settime(timer_posix.timerid, 0, &zero, 0); - - case RTIMER_ARMING: return; /* success */ - case RTIMER_ARMED: - /* - * it is possible to have another thread disarm, and - * a third thread arm finish re-arming before we get - * here, so we wasted a syscall with timer_settime but - * probably unavoidable in a signal handler. - */ - return; - case RTIMER_DEAD: - /* may race harmlessly with ubf_timer_destroy */ - (void)timer_settime(timer_posix.timerid, 0, &zero, 0); - return; - default: - rb_async_bug_errno("UBF_TIMER_POSIX unknown state", ERANGE); - } - } -#elif UBF_TIMER == UBF_TIMER_PTHREAD - if (!fork_gen || fork_gen == timer_pthread.fork_gen) { - if (ATOMIC_EXCHANGE(timer_pthread.armed, 1) == 0) - rb_thread_wakeup_timer_thread_fd(timer_pthread.low[1]); - } -#endif -} - void -rb_thread_wakeup_timer_thread(int sig) -{ - /* non-sighandler path */ - if (sig <= 0) { - rb_thread_wakeup_timer_thread_fd(signal_self_pipe.normal[1]); - if (sig < 0) { - ubf_timer_arm(0); - } - return; - } - - /* must be safe inside sighandler, so no mutex */ - if (signal_self_pipe.fork_gen == current_fork_gen) { - rb_thread_wakeup_timer_thread_fd(signal_self_pipe.normal[1]); +rb_thread_wakeup_timer_thread(int sig) +{ + // This function can be called from signal handlers so that + // pthread_mutex_lock() should not be used. - /* - * system_working check is required because vm and main_thread are - * freed during shutdown - */ - if (system_working > 0) { - volatile rb_execution_context_t *ec; - rb_vm_t *vm = GET_VM(); - rb_thread_t *mth; + // wakeup timer thread + timer_thread_wakeup_force(); - /* - * FIXME: root VM and main_thread should be static and not - * on heap for maximum safety (and startup/shutdown speed) - */ - if (!vm) return; - mth = vm->ractor.main_thread; - if (!mth || system_working <= 0) return; + // interrupt main thread if main thread is available + if (system_working) { + rb_vm_t *vm = GET_VM(); + rb_thread_t *main_th = vm->ractor.main_thread; - /* this relies on GC for grace period before cont_free */ - ec = ACCESS_ONCE(rb_execution_context_t *, mth->ec); + if (main_th) { + volatile rb_execution_context_t *main_th_ec = ACCESS_ONCE(rb_execution_context_t *, main_th->ec); - if (ec) { - RUBY_VM_SET_TRAP_INTERRUPT(ec); - ubf_timer_arm(current_fork_gen); + if (main_th_ec) { + RUBY_VM_SET_TRAP_INTERRUPT(main_th_ec); - /* some ubfs can interrupt single-threaded process directly */ - if (vm->ubf_async_safe && mth->unblock.func) { - (mth->unblock.func)(mth->unblock.arg); + if (vm->ubf_async_safe && main_th->unblock.func) { + (main_th->unblock.func)(main_th->unblock.arg); } } } @@ -1661,12 +2583,12 @@ static void close_invalidate_pair(int fds[2], const char *msg) { if (USE_EVENTFD && fds[0] == fds[1]) { + fds[1] = -1; // disable write port first close_invalidate(&fds[0], msg); - fds[1] = -1; } else { - close_invalidate(&fds[0], msg); close_invalidate(&fds[1], msg); + close_invalidate(&fds[0], msg); } } @@ -1686,15 +2608,15 @@ set_nonblock(int fd) } /* communication pipe with timer thread and signal handler */ -static int +static void setup_communication_pipe_internal(int pipes[2]) { int err; - if (pipes[0] >= 0 || pipes[1] >= 0) { - VM_ASSERT(pipes[0] >= 0); - VM_ASSERT(pipes[1] >= 0); - return 0; + if (pipes[0] > 0 || pipes[1] > 0) { + VM_ASSERT(pipes[0] > 0); + VM_ASSERT(pipes[1] > 0); + return; } /* @@ -1703,23 +2625,21 @@ setup_communication_pipe_internal(int pipes[2]) */ #if USE_EVENTFD && defined(EFD_NONBLOCK) && defined(EFD_CLOEXEC) pipes[0] = pipes[1] = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC); + if (pipes[0] >= 0) { rb_update_max_fd(pipes[0]); - return 0; + return; } #endif err = rb_cloexec_pipe(pipes); if (err != 0) { - rb_warn("pipe creation failed for timer: %s, scheduling broken", - strerror(errno)); - return -1; + rb_bug("can not create communication pipe"); } rb_update_max_fd(pipes[0]); rb_update_max_fd(pipes[1]); set_nonblock(pipes[0]); set_nonblock(pipes[1]); - return 0; } #if !defined(SET_CURRENT_THREAD_NAME) && defined(__linux__) && defined(PR_SET_NAME) @@ -1802,14 +2722,15 @@ native_set_another_thread_name(rb_nativethread_id_t thread_id, VALUE name) static VALUE native_thread_native_thread_id(rb_thread_t *target_th) { + if (!target_th->nt) return Qnil; + #ifdef RB_THREAD_T_HAS_NATIVE_ID int tid = target_th->nt->tid; if (tid == 0) return Qnil; return INT2FIX(tid); #elif defined(__APPLE__) uint64_t tid; -# if (!defined(MAC_OS_X_VERSION_10_6) || \ - (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6) || \ +# if ((MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6) || \ defined(__POWERPC__) /* never defined for PowerPC platforms */) const bool no_pthread_threadid_np = true; # define NO_PTHREAD_MACH_THREAD_NP 1 @@ -1838,173 +2759,317 @@ native_thread_native_thread_id(rb_thread_t *target_th) # define USE_NATIVE_THREAD_NATIVE_THREAD_ID 0 #endif -static void -ubf_timer_invalidate(void) +static struct { + rb_serial_t created_fork_gen; + pthread_t pthread_id; + + int comm_fds[2]; // r, w + +#if HAVE_SYS_EPOLL_H && USE_MN_THREADS +#define EPOLL_EVENTS_MAX 0x10 + int epoll_fd; + struct epoll_event finished_events[EPOLL_EVENTS_MAX]; +#endif + + // waiting threads list + struct ccan_list_head waiting; // waiting threads in ractors + pthread_mutex_t waiting_lock; +} timer_th = { + .created_fork_gen = 0, +}; + +#define TIMER_THREAD_CREATED_P() (timer_th.created_fork_gen == current_fork_gen) + +static void timer_thread_check_timeslice(rb_vm_t *vm); +static int timer_thread_set_timeout(rb_vm_t *vm); +static void timer_thread_wakeup_thread(rb_thread_t *th); + +#include "thread_pthread_mn.c" + +static int +timer_thread_set_timeout(rb_vm_t *vm) { -#if UBF_TIMER == UBF_TIMER_PTHREAD - CLOSE_INVALIDATE_PAIR(timer_pthread.low); +#if 0 + return 10; // ms +#else + int timeout = -1; + + ractor_sched_lock(vm, NULL); + { + if ( !ccan_list_empty(&vm->ractor.sched.timeslice_threads) // (1-1) Provide time slice for active NTs + || !ubf_threads_empty() // (1-3) Periodic UBF + || vm->ractor.sched.grq_cnt > 0 // (1-4) Lazy GRQ deq start + ) { + + RUBY_DEBUG_LOG("timeslice:%d ubf:%d grq:%d", + !ccan_list_empty(&vm->ractor.sched.timeslice_threads), + !ubf_threads_empty(), + (vm->ractor.sched.grq_cnt > 0)); + + timeout = 10; // ms + vm->ractor.sched.timeslice_wait_inf = false; + } + else { + vm->ractor.sched.timeslice_wait_inf = true; + } + } + ractor_sched_unlock(vm, NULL); + + if (vm->ractor.sched.timeslice_wait_inf) { + rb_native_mutex_lock(&timer_th.waiting_lock); + { + rb_thread_t *th = ccan_list_top(&timer_th.waiting, rb_thread_t, sched.waiting_reason.node); + if (th && (th->sched.waiting_reason.flags & thread_sched_waiting_timeout)) { + rb_hrtime_t now = rb_hrtime_now(); + rb_hrtime_t hrrel = rb_hrtime_sub(th->sched.waiting_reason.data.timeout, now); + + RUBY_DEBUG_LOG("th:%u now:%lu rel:%lu", rb_th_serial(th), (unsigned long)now, (unsigned long)hrrel); + + // TODO: overflow? + timeout = (int)(hrrel / RB_HRTIME_PER_MSEC); // ms + } + } + rb_native_mutex_unlock(&timer_th.waiting_lock); + } + + RUBY_DEBUG_LOG("timeout:%d inf:%d", timeout, (int)vm->ractor.sched.timeslice_wait_inf); + + // fprintf(stderr, "timeout:%d\n", timeout); + return timeout; #endif } static void -ubf_timer_pthread_create(rb_serial_t fork_gen) +timer_thread_check_signal(rb_vm_t *vm) { -#if UBF_TIMER == UBF_TIMER_PTHREAD - int err; - if (timer_pthread.fork_gen == fork_gen) - return; + // ruby_sigchld_handler(vm); TODO - if (setup_communication_pipe_internal(timer_pthread.low) < 0) - return; + int signum = rb_signal_buff_size(); + if (UNLIKELY(signum > 0) && vm->ractor.main_thread) { + RUBY_DEBUG_LOG("signum:%d", signum); + threadptr_trap_interrupt(vm->ractor.main_thread); + } +} - err = pthread_create(&timer_pthread.thid, 0, timer_pthread_fn, GET_VM()); - if (!err) - timer_pthread.fork_gen = fork_gen; - else - rb_warn("pthread_create failed for timer: %s, signals racy", - strerror(err)); -#endif +static bool +timer_thread_check_exceed(rb_hrtime_t abs, rb_hrtime_t now) +{ + if (abs < now) { + return true; + } + else if (abs - now < RB_HRTIME_PER_MSEC) { + return true; // too short time + } + else { + return false; + } } -static void -ubf_timer_create(rb_serial_t fork_gen) +static rb_thread_t * +timer_thread_deq_wakeup(rb_vm_t *vm, rb_hrtime_t now) { -#if UBF_TIMER == UBF_TIMER_POSIX -# if defined(__sun) -# define UBF_TIMER_CLOCK CLOCK_REALTIME -# else /* Tested Linux and FreeBSD: */ -# define UBF_TIMER_CLOCK CLOCK_MONOTONIC -# endif + rb_thread_t *th = ccan_list_top(&timer_th.waiting, rb_thread_t, sched.waiting_reason.node); - struct sigevent sev; + if (th != NULL && + (th->sched.waiting_reason.flags & thread_sched_waiting_timeout) && + timer_thread_check_exceed(th->sched.waiting_reason.data.timeout, now)) { - sev.sigev_notify = SIGEV_SIGNAL; - sev.sigev_signo = SIGVTALRM; - sev.sigev_value.sival_ptr = &timer_posix; + RUBY_DEBUG_LOG("wakeup th:%u", rb_th_serial(th)); - if (!timer_create(UBF_TIMER_CLOCK, &sev, &timer_posix.timerid)) { - rb_atomic_t prev = timer_state_exchange(RTIMER_DISARM); + // delete from waiting list + ccan_list_del_init(&th->sched.waiting_reason.node); - if (prev != RTIMER_DEAD) { - rb_bug("timer_posix was not dead: %u", (unsigned)prev); - } - timer_posix.fork_gen = fork_gen; - } - else { - rb_warn("timer_create failed: %s, signals racy", strerror(errno)); + // setup result + th->sched.waiting_reason.flags = thread_sched_waiting_none; + th->sched.waiting_reason.data.result = 0; + + return th; } -#endif - if (UBF_TIMER == UBF_TIMER_PTHREAD) - ubf_timer_pthread_create(fork_gen); + + return NULL; } static void -rb_thread_create_timer_thread(void) +timer_thread_wakeup_thread(rb_thread_t *th) { - /* we only create the pipe, and lazy-spawn */ - rb_serial_t fork_gen = signal_self_pipe.fork_gen; + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + struct rb_thread_sched *sched = TH_SCHED(th); - if (fork_gen && fork_gen != current_fork_gen) { - CLOSE_INVALIDATE_PAIR(signal_self_pipe.normal); - CLOSE_INVALIDATE_PAIR(signal_self_pipe.ub_main); - ubf_timer_invalidate(); + thread_sched_lock(sched, th); + { + if (sched->running != th) { + thread_sched_to_ready_common(sched, th, true, false); + } + else { + // will be release the execution right + } } + thread_sched_unlock(sched, th); +} - if (setup_communication_pipe_internal(signal_self_pipe.normal) < 0) return; - if (setup_communication_pipe_internal(signal_self_pipe.ub_main) < 0) return; +static void +timer_thread_check_timeout(rb_vm_t *vm) +{ + rb_hrtime_t now = rb_hrtime_now(); + rb_thread_t *th; - ubf_timer_create(current_fork_gen); - if (fork_gen != current_fork_gen) { - /* validate pipe on this process */ - sigwait_th = THREAD_INVALID; - signal_self_pipe.fork_gen = current_fork_gen; + rb_native_mutex_lock(&timer_th.waiting_lock); + { + while ((th = timer_thread_deq_wakeup(vm, now)) != NULL) { + timer_thread_wakeup_thread(th); + } } + rb_native_mutex_unlock(&timer_th.waiting_lock); } static void -ubf_timer_disarm(void) +timer_thread_check_timeslice(rb_vm_t *vm) +{ + // TODO: check time + rb_thread_t *th; + ccan_list_for_each(&vm->ractor.sched.timeslice_threads, th, sched.node.timeslice_threads) { + RUBY_DEBUG_LOG("timeslice th:%u", rb_th_serial(th)); + RUBY_VM_SET_TIMER_INTERRUPT(th->ec); + } +} + +void +rb_assert_sig(void) { -#if UBF_TIMER == UBF_TIMER_POSIX - rb_atomic_t prev; + sigset_t oldmask; + pthread_sigmask(0, NULL, &oldmask); + if (sigismember(&oldmask, SIGVTALRM)) { + rb_bug("!!!"); + } + else { + RUBY_DEBUG_LOG("ok"); + } +} - if (timer_posix.fork_gen && timer_posix.fork_gen != current_fork_gen) return; - prev = timer_state_cas(RTIMER_ARMED, RTIMER_DISARM); - switch (prev) { - case RTIMER_DISARM: return; /* likely */ - case RTIMER_ARMING: return; /* ubf_timer_arm will disarm itself */ - case RTIMER_ARMED: - if (timer_settime(timer_posix.timerid, 0, &zero, 0)) { - int err = errno; +static void * +timer_thread_func(void *ptr) +{ + rb_vm_t *vm = (rb_vm_t *)ptr; +#if defined(RUBY_NT_SERIAL) + ruby_nt_serial = (rb_atomic_t)-1; +#endif - if (err == EINVAL) { - prev = timer_state_cas(RTIMER_DISARM, RTIMER_DISARM); + RUBY_DEBUG_LOG("started%s", ""); - /* main thread may have killed the timer */ - if (prev == RTIMER_DEAD) return; + while (system_working) { + timer_thread_check_signal(vm); + timer_thread_check_timeout(vm); + ubf_wakeup_all_threads(); - rb_bug_errno("timer_settime (disarm)", err); - } - } - return; - case RTIMER_DEAD: return; /* stay dead */ - default: - rb_bug("UBF_TIMER_POSIX bad state: %u", (unsigned)prev); + RUBY_DEBUG_LOG("system_working:%d", system_working); + timer_thread_polling(vm); } -#elif UBF_TIMER == UBF_TIMER_PTHREAD - ATOMIC_SET(timer_pthread.armed, 0); -#endif + RUBY_DEBUG_LOG("terminated"); + return NULL; } +/* only use signal-safe system calls here */ static void -ubf_timer_destroy(void) +signal_communication_pipe(int fd) { -#if UBF_TIMER == UBF_TIMER_POSIX - if (timer_posix.fork_gen == current_fork_gen) { - rb_atomic_t expect = RTIMER_DISARM; - size_t i, max = 10000000; +#if USE_EVENTFD + const uint64_t buff = 1; +#else + const char buff = '!'; +#endif + ssize_t result; - /* prevent signal handler from arming: */ - for (i = 0; i < max; i++) { - switch (timer_state_cas(expect, RTIMER_DEAD)) { - case RTIMER_DISARM: - if (expect == RTIMER_DISARM) goto done; - expect = RTIMER_DISARM; - break; - case RTIMER_ARMING: - native_thread_yield(); /* let another thread finish arming */ - expect = RTIMER_ARMED; - break; - case RTIMER_ARMED: - if (expect == RTIMER_ARMED) { - if (timer_settime(timer_posix.timerid, 0, &zero, 0)) - rb_bug_errno("timer_settime (destroy)", errno); - goto done; - } - expect = RTIMER_ARMED; + /* already opened */ + if (fd >= 0) { + retry: + if ((result = write(fd, &buff, sizeof(buff))) <= 0) { + int e = errno; + switch (e) { + case EINTR: goto retry; + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif break; - case RTIMER_DEAD: - rb_bug("RTIMER_DEAD unexpected"); + default: + async_bug_fd("rb_thread_wakeup_timer_thread: write", e, fd); } } - rb_bug("timed out waiting for timer to arm"); -done: - if (timer_delete(timer_posix.timerid) < 0) - rb_sys_fail("timer_delete"); + if (TT_DEBUG) WRITE_CONST(2, "rb_thread_wakeup_timer_thread: write\n"); + } + else { + // ignore wakeup + } +} + +static void +timer_thread_wakeup_force(void) +{ + // should not use RUBY_DEBUG_LOG() because it can be called within signal handlers. + signal_communication_pipe(timer_th.comm_fds[1]); +} + +static void +timer_thread_wakeup_locked(rb_vm_t *vm) +{ + // should be locked before. + ASSERT_ractor_sched_locked(vm, NULL); - VM_ASSERT(timer_state_exchange(RTIMER_DEAD) == RTIMER_DEAD); + if (timer_th.created_fork_gen == current_fork_gen) { + if (vm->ractor.sched.timeslice_wait_inf) { + RUBY_DEBUG_LOG("wakeup with fd:%d", timer_th.comm_fds[1]); + timer_thread_wakeup_force(); + } + else { + RUBY_DEBUG_LOG("will be wakeup..."); + } } -#elif UBF_TIMER == UBF_TIMER_PTHREAD - int err; +} + +static void +timer_thread_wakeup(void) +{ + rb_vm_t *vm = GET_VM(); - timer_pthread.fork_gen = 0; - ubf_timer_disarm(); - rb_thread_wakeup_timer_thread_fd(timer_pthread.low[1]); - err = pthread_join(timer_pthread.thid, 0); - if (err) { - rb_raise(rb_eThreadError, "native_thread_join() failed (%d)", err); + ractor_sched_lock(vm, NULL); + { + timer_thread_wakeup_locked(vm); } + ractor_sched_unlock(vm, NULL); +} + +static void +rb_thread_create_timer_thread(void) +{ + rb_serial_t created_fork_gen = timer_th.created_fork_gen; + + RUBY_DEBUG_LOG("fork_gen create:%d current:%d", (int)created_fork_gen, (int)current_fork_gen); + + timer_th.created_fork_gen = current_fork_gen; + + if (created_fork_gen != current_fork_gen) { + if (created_fork_gen != 0) { + RUBY_DEBUG_LOG("forked child process"); + + CLOSE_INVALIDATE_PAIR(timer_th.comm_fds); +#if HAVE_SYS_EPOLL_H && USE_MN_THREADS + close_invalidate(&timer_th.epoll_fd, "close epoll_fd"); #endif + rb_native_mutex_destroy(&timer_th.waiting_lock); + } + + ccan_list_head_init(&timer_th.waiting); + rb_native_mutex_initialize(&timer_th.waiting_lock); + + // open communication channel + setup_communication_pipe_internal(timer_th.comm_fds); + + // open epoll fd + timer_thread_setup_nm(); + } + + pthread_create(&timer_th.pthread_id, NULL, timer_thread_func, GET_VM()); } static int @@ -2012,8 +3077,13 @@ native_stop_timer_thread(void) { int stopped; stopped = --system_working <= 0; - if (stopped) - ubf_timer_destroy(); + + if (stopped) { + RUBY_DEBUG_LOG("wakeup send %d", timer_th.comm_fds[1]); + timer_thread_wakeup_force(); + RUBY_DEBUG_LOG("wakeup sent"); + pthread_join(timer_th.pthread_id, NULL); + } if (TT_DEBUG) fprintf(stderr, "stop timer thread\n"); return stopped; @@ -2022,7 +3092,7 @@ native_stop_timer_thread(void) static void native_reset_timer_thread(void) { - if (TT_DEBUG) fprintf(stderr, "reset timer thread\n"); + // } #ifdef HAVE_SIGALTSTACK @@ -2073,22 +3143,26 @@ int rb_reserved_fd_p(int fd) { /* no false-positive if out-of-FD at startup */ - if (fd < 0) - return 0; + if (fd < 0) return 0; -#if UBF_TIMER == UBF_TIMER_PTHREAD - if (fd == timer_pthread.low[0] || fd == timer_pthread.low[1]) - goto check_fork_gen; + if (fd == timer_th.comm_fds[0] || + fd == timer_th.comm_fds[1] +#if HAVE_SYS_EPOLL_H && USE_MN_THREADS + || fd == timer_th.epoll_fd #endif - if (fd == signal_self_pipe.normal[0] || fd == signal_self_pipe.normal[1]) - goto check_fork_gen; - if (fd == signal_self_pipe.ub_main[0] || fd == signal_self_pipe.ub_main[1]) + ) { goto check_fork_gen; + } return 0; -check_fork_gen: - if (signal_self_pipe.fork_gen == current_fork_gen) /* async-signal-safe */ + + check_fork_gen: + if (timer_th.created_fork_gen == current_fork_gen) { + /* async-signal-safe */ return 1; - return 0; + } + else { + return 0; + } } rb_nativethread_id_t @@ -2097,34 +3171,6 @@ rb_nativethread_self(void) return pthread_self(); } -int -rb_sigwait_fd_get(const rb_thread_t *th) -{ - if (signal_self_pipe.normal[0] >= 0) { - VM_ASSERT(signal_self_pipe.fork_gen == current_fork_gen); - /* - * no need to keep firing the timer if any thread is sleeping - * on the signal self-pipe - */ - ubf_timer_disarm(); - - if (ATOMIC_PTR_CAS(sigwait_th, THREAD_INVALID, th) == THREAD_INVALID) { - return signal_self_pipe.normal[0]; - } - } - return -1; /* avoid thundering herd and work stealing/starvation */ -} - -void -rb_sigwait_fd_put(const rb_thread_t *th, int fd) -{ - const rb_thread_t *old; - - VM_ASSERT(signal_self_pipe.normal[0] == fd); - old = ATOMIC_PTR_EXCHANGE(sigwait_th, THREAD_INVALID); - if (old != th) assert(old == th); -} - #ifndef HAVE_PPOLL /* TODO: don't ignore sigmask */ static int @@ -2156,61 +3202,6 @@ ruby_ppoll(struct pollfd *fds, nfds_t nfds, # define ppoll(fds,nfds,ts,sigmask) ruby_ppoll((fds),(nfds),(ts),(sigmask)) #endif -void -rb_sigwait_sleep(rb_thread_t *th, int sigwait_fd, const rb_hrtime_t *rel) -{ - struct pollfd pfd; - struct timespec ts; - - pfd.fd = sigwait_fd; - pfd.events = POLLIN; - - if (!BUSY_WAIT_SIGNALS && ubf_threads_empty()) { - (void)ppoll(&pfd, 1, rb_hrtime2timespec(&ts, rel), 0); - check_signals_nogvl(th, sigwait_fd); - } - else { - rb_hrtime_t to = RB_HRTIME_MAX, end = 0; - int n = 0; - - if (rel) { - to = *rel; - end = rb_hrtime_add(rb_hrtime_now(), to); - } - /* - * tricky: this needs to return on spurious wakeup (no auto-retry). - * But we also need to distinguish between periodic quantum - * wakeups, so we care about the result of consume_communication_pipe - * - * We want to avoid spurious wakeup for Mutex#sleep compatibility - * [ruby-core:88102] - */ - for (;;) { - const rb_hrtime_t *sto = sigwait_timeout(th, sigwait_fd, &to, &n); - - if (n) return; - n = ppoll(&pfd, 1, rb_hrtime2timespec(&ts, sto), 0); - if (check_signals_nogvl(th, sigwait_fd)) - return; - if (n || (th && RUBY_VM_INTERRUPTED(th->ec))) - return; - if (rel && hrtime_update_expire(&to, end)) - return; - } - } -} - -/* - * we need to guarantee wakeups from native_ppoll_sleep because - * ubf_select may not be going through ubf_list if other threads - * are all sleeping. - */ -static void -ubf_ppoll_sleep(void *ignore) -{ - rb_thread_wakeup_timer_thread_fd(signal_self_pipe.ub_main[1]); -} - /* * Single CPU setups benefit from explicit sched_yield() before ppoll(), * since threads may be too starved to enter the GVL waitqueue for @@ -2222,153 +3213,38 @@ ubf_ppoll_sleep(void *ignore) * [ruby-core:90417] [Bug #15398] */ #define THREAD_BLOCKING_YIELD(th) do { \ - const rb_thread_t *next; \ + const rb_thread_t *next_th; \ struct rb_thread_sched *sched = TH_SCHED(th); \ RB_VM_SAVE_MACHINE_CONTEXT(th); \ - rb_native_mutex_lock(&sched->lock); \ - next = thread_sched_to_waiting_common((sched), (th)); \ - rb_native_mutex_unlock(&sched->lock); \ - if (!next && rb_ractor_living_thread_num(th->ractor) > 1) { \ + thread_sched_to_waiting(sched, (th)); \ + next_th = sched->running; \ + rb_native_mutex_unlock(&sched->lock_); \ + native_thread_yield(); /* TODO: needed? */ \ + if (!next_th && rb_ractor_living_thread_num(th->ractor) > 1) { \ native_thread_yield(); \ } -/* - * This function does not exclusively acquire sigwait_fd, so it - * cannot safely read from it. However, it can be woken up in - * 4 ways: - * - * 1) ubf_ppoll_sleep (from another thread) - * 2) rb_thread_wakeup_timer_thread (from signal handler) - * 3) any unmasked signal hitting the process - * 4) periodic ubf timer wakeups (after 3) - */ -static void -native_ppoll_sleep(rb_thread_t *th, rb_hrtime_t *rel) -{ - rb_native_mutex_lock(&th->interrupt_lock); - th->unblock.func = ubf_ppoll_sleep; - rb_native_mutex_unlock(&th->interrupt_lock); - - THREAD_BLOCKING_YIELD(th); - { - if (!RUBY_VM_INTERRUPTED(th->ec)) { - struct pollfd pfd[2]; - struct timespec ts; - - pfd[0].fd = signal_self_pipe.normal[0]; /* sigwait_fd */ - pfd[1].fd = signal_self_pipe.ub_main[0]; - pfd[0].events = pfd[1].events = POLLIN; - if (ppoll(pfd, 2, rb_hrtime2timespec(&ts, rel), 0) > 0) { - if (pfd[1].revents & POLLIN) { - (void)consume_communication_pipe(pfd[1].fd); - } - } - /* - * do not read the sigwait_fd, here, let uplevel callers - * or other threads that, otherwise we may steal and starve - * other threads - */ - } - unblock_function_clear(th); - } - THREAD_BLOCKING_END(th); -} - static void native_sleep(rb_thread_t *th, rb_hrtime_t *rel) { - int sigwait_fd = rb_sigwait_fd_get(th); - rb_ractor_blocking_threads_inc(th->ractor, __FILE__, __LINE__); - + struct rb_thread_sched *sched = TH_SCHED(th); RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED); - if (sigwait_fd >= 0) { - rb_native_mutex_lock(&th->interrupt_lock); - th->unblock.func = ubf_sigwait; - rb_native_mutex_unlock(&th->interrupt_lock); - - THREAD_BLOCKING_YIELD(th); - { - if (!RUBY_VM_INTERRUPTED(th->ec)) { - rb_sigwait_sleep(th, sigwait_fd, rel); - } - else { - check_signals_nogvl(th, sigwait_fd); - } - unblock_function_clear(th); + RUBY_DEBUG_LOG("rel:%d", rel ? (int)*rel : 0); + if (rel) { + if (th_has_dedicated_nt(th)) { + native_cond_sleep(th, rel); + } + else { + thread_sched_wait_events(sched, th, -1, thread_sched_waiting_timeout, rel); } - THREAD_BLOCKING_END(th); - - rb_sigwait_fd_put(th, sigwait_fd); - } - else if (th == th->vm->ractor.main_thread) { /* always able to handle signals */ - native_ppoll_sleep(th, rel); } else { - native_cond_sleep(th, rel); - } - - rb_ractor_blocking_threads_dec(th->ractor, __FILE__, __LINE__); -} - -#if UBF_TIMER == UBF_TIMER_PTHREAD -static void * -timer_pthread_fn(void *p) -{ - rb_vm_t *vm = p; - pthread_t main_thread_id = vm->ractor.main_thread->nt->thread_id; - struct pollfd pfd; - int timeout = -1; - int ccp; - - pfd.fd = timer_pthread.low[0]; - pfd.events = POLLIN; - - while (system_working > 0) { - (void)poll(&pfd, 1, timeout); - ccp = consume_communication_pipe(pfd.fd); - - if (system_working > 0) { - if (ATOMIC_CAS(timer_pthread.armed, 1, 1)) { - pthread_kill(main_thread_id, SIGVTALRM); - - if (rb_signal_buff_size() || !ubf_threads_empty()) { - timeout = TIME_QUANTUM_MSEC; - } - else { - ATOMIC_SET(timer_pthread.armed, 0); - timeout = -1; - } - } - else if (ccp) { - pthread_kill(main_thread_id, SIGVTALRM); - ATOMIC_SET(timer_pthread.armed, 0); - timeout = -1; - } - } + thread_sched_to_waiting_until_wakeup(sched, th); } - return 0; -} -#endif /* UBF_TIMER_PTHREAD */ - -static VALUE -ubf_caller(void *ignore) -{ - rb_thread_sleep_forever(); - - return Qfalse; -} - -/* - * Called if and only if one thread is running, and - * the unblock function is NOT async-signal-safe - * This assumes USE_THREAD_CACHE is true for performance reasons - */ -static VALUE -rb_thread_start_unblock_thread(void) -{ - return rb_thread_create(ubf_caller, 0); + RUBY_DEBUG_LOG("wakeup"); + RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_READY); } // thread internal event hooks (only for pthread) diff --git a/thread_pthread.h b/thread_pthread.h index 10d3fcd9280c17..9ccacbf6604615 100644 --- a/thread_pthread.h +++ b/thread_pthread.h @@ -19,14 +19,59 @@ // per-Thead scheduler helper data struct rb_thread_sched_item { - union { + struct { struct ccan_list_node ubf; - struct ccan_list_node readyq; // protected by sched->lock + + // connected to ractor->threads.sched.reqdyq + // locked by ractor->threads.sched.lock + struct ccan_list_node readyq; + + // connected to vm->ractor.sched.timeslice_threads + // locked by vm->ractor.sched.lock + struct ccan_list_node timeslice_threads; + + // connected to vm->ractor.sched.running_threads + // locked by vm->ractor.sched.lock + struct ccan_list_node running_threads; + + // connected to vm->ractor.sched.zombie_threads + struct ccan_list_node zombie_threads; } node; + + // this data should be protected by timer_th.waiting_lock + struct { + enum thread_sched_waiting_flag { + thread_sched_waiting_none = 0x00, + thread_sched_waiting_timeout = 0x01, + thread_sched_waiting_io_read = 0x02, + thread_sched_waiting_io_write = 0x08, + thread_sched_waiting_io_force = 0x40, // ignore readable + } flags; + + struct { + // should be compat with hrtime.h +#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL + int128_t timeout; +#else + uint64_t timeout; +#endif + int fd; // -1 for timeout only + int result; + } data; + + // connected to timer_th.waiting + struct ccan_list_node node; + } waiting_reason; + + bool finished; + bool malloc_stack; + void *context_stack; + struct coroutine_context *context; }; struct rb_native_thread { rb_atomic_t serial; + struct rb_vm_struct *vm; rb_nativethread_id_t thread_id; @@ -54,6 +99,11 @@ struct rb_native_thread { #ifdef USE_SIGALTSTACK void *altstack; #endif + + struct coroutine_context *nt_context; + int dedicated; + + size_t machine_stack_maxsize; }; #undef except @@ -63,45 +113,35 @@ struct rb_native_thread { // per-Ractor struct rb_thread_sched { - /* fast path */ - - const struct rb_thread_struct *running; // running thread or NULL - rb_nativethread_lock_t lock; + rb_nativethread_lock_t lock_; +#if VM_CHECK_MODE + struct rb_thread_struct *lock_owner; +#endif + struct rb_thread_struct *running; // running thread or NULL + bool is_running; + bool is_running_timeslice; + bool enable_mn_threads; - /* - * slow path, protected by ractor->thread_sched->lock - * - @readyq - FIFO queue of threads waiting for running - * - @timer - it handles timeslices for @current. It is any one thread - * in @waitq, there is no @timer if @waitq is empty, but always - * a @timer if @waitq has entries - * - @timer_err tracks timeslice limit, the timeslice only resets - * when pthread_cond_timedwait returns ETIMEDOUT, so frequent - * switching between contended/uncontended GVL won't reset the - * timer. - */ struct ccan_list_head readyq; - const struct rb_thread_struct *timer; - int timer_err; - - /* yield */ - rb_nativethread_cond_t switch_cond; - rb_nativethread_cond_t switch_wait_cond; - int need_yield; - int wait_yield; + int readyq_cnt; + // ractor scheduling + struct ccan_list_node grq_node; }; #ifdef RB_THREAD_LOCAL_SPECIFIER -# ifdef __APPLE__ -// on Darwin, TLS can not be accessed across .so -struct rb_execution_context_struct *rb_current_ec(void); -void rb_current_ec_set(struct rb_execution_context_struct *); -# else -RUBY_EXTERN RB_THREAD_LOCAL_SPECIFIER struct rb_execution_context_struct *ruby_current_ec; - -// for RUBY_DEBUG_LOG() -RUBY_EXTERN RB_THREAD_LOCAL_SPECIFIER rb_atomic_t ruby_nt_serial; -#define RUBY_NT_SERIAL 1 -# endif + NOINLINE(void rb_current_ec_set(struct rb_execution_context_struct *)); + NOINLINE(struct rb_execution_context_struct *rb_current_ec_noinline(void)); + + # ifdef __APPLE__ + // on Darwin, TLS can not be accessed across .so + struct rb_execution_context_struct *rb_current_ec(void); + # else + RUBY_EXTERN RB_THREAD_LOCAL_SPECIFIER struct rb_execution_context_struct *ruby_current_ec; + + // for RUBY_DEBUG_LOG() + RUBY_EXTERN RB_THREAD_LOCAL_SPECIFIER rb_atomic_t ruby_nt_serial; + #define RUBY_NT_SERIAL 1 + # endif #else typedef pthread_key_t native_tls_key_t; diff --git a/thread_pthread_mn.c b/thread_pthread_mn.c new file mode 100644 index 00000000000000..64266683cdb452 --- /dev/null +++ b/thread_pthread_mn.c @@ -0,0 +1,843 @@ +// included by "thread_pthread.c" + +#if USE_MN_THREADS + +static void timer_thread_unregister_waiting(rb_thread_t *th, int fd); + +static bool +timer_thread_cancel_waiting(rb_thread_t *th) +{ + bool canceled = false; + + if (th->sched.waiting_reason.flags) { + rb_native_mutex_lock(&timer_th.waiting_lock); + { + if (th->sched.waiting_reason.flags) { + canceled = true; + ccan_list_del_init(&th->sched.waiting_reason.node); + if (th->sched.waiting_reason.flags & (thread_sched_waiting_io_read | thread_sched_waiting_io_write)) { + timer_thread_unregister_waiting(th, th->sched.waiting_reason.data.fd); + } + th->sched.waiting_reason.flags = thread_sched_waiting_none; + } + } + rb_native_mutex_unlock(&timer_th.waiting_lock); + } + + return canceled; +} + +static void +ubf_event_waiting(void *ptr) +{ + rb_thread_t *th = (rb_thread_t *)ptr; + struct rb_thread_sched *sched = TH_SCHED(th); + + RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + + VM_ASSERT(th->nt == NULL || !th_has_dedicated_nt(th)); + + // only once. it is safe because th->interrupt_lock is already acquired. + th->unblock.func = NULL; + th->unblock.arg = NULL; + + bool canceled = timer_thread_cancel_waiting(th); + + thread_sched_lock(sched, th); + { + if (sched->running == th) { + RUBY_DEBUG_LOG("not waiting yet"); + } + else if (canceled) { + thread_sched_to_ready_common(sched, th, true, false); + } + else { + RUBY_DEBUG_LOG("already not waiting"); + } + } + thread_sched_unlock(sched, th); +} + +static bool timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags, rb_hrtime_t *rel); + +// return true if timed out +static bool +thread_sched_wait_events(struct rb_thread_sched *sched, rb_thread_t *th, int fd, enum thread_sched_waiting_flag events, rb_hrtime_t *rel) +{ + VM_ASSERT(!th_has_dedicated_nt(th)); // on SNT + + volatile bool timedout = false, need_cancel = false; + + if (timer_thread_register_waiting(th, fd, events, rel)) { + RUBY_DEBUG_LOG("wait fd:%d", fd); + + RB_VM_SAVE_MACHINE_CONTEXT(th); + setup_ubf(th, ubf_event_waiting, (void *)th); + + thread_sched_lock(sched, th); + { + if (th->sched.waiting_reason.flags == thread_sched_waiting_none) { + // already awaken + } + else if (RUBY_VM_INTERRUPTED(th->ec)) { + need_cancel = true; + } + else { + RUBY_DEBUG_LOG("sleep"); + + th->status = THREAD_STOPPED_FOREVER; + thread_sched_wakeup_next_thread(sched, th, true); + thread_sched_wait_running_turn(sched, th, true); + + RUBY_DEBUG_LOG("wakeup"); + } + + timedout = th->sched.waiting_reason.data.result == 0; + } + thread_sched_unlock(sched, th); + + if (need_cancel) { + timer_thread_cancel_waiting(th); + } + + setup_ubf(th, NULL, NULL); // TODO: maybe it is already NULL? + + th->status = THREAD_RUNNABLE; + } + else { + RUBY_DEBUG_LOG("can not wait fd:%d", fd); + return false; + } + + VM_ASSERT(sched->running == th); + + return timedout; +} + +/// stack management + +#define MSTACK_CHUNK_SIZE (512 * 1024 * 1024) // 512MB +#define MSTACK_PAGE_SIZE 4096 +#define MSTACK_CHUNK_PAGE_NUM (MSTACK_CHUNK_SIZE / MSTACK_PAGE_SIZE - 1) // 1 is start redzone + +// 512MB chunk +// 131,072 pages (> 65,536) +// 0th page is Redzone. Start from 1st page. + +/* + * <--> machine stack + vm stack + * ---------------------------------- + * |HD...|RZ| ... |RZ| ... ... |RZ| + * <------------- 512MB -------------> + */ + +static struct nt_stack_chunk_header { + struct nt_stack_chunk_header *prev_chunk; + struct nt_stack_chunk_header *prev_free_chunk; + + uint16_t start_page; + uint16_t stack_count; + uint16_t uninitialized_stack_count; + + uint16_t free_stack_pos; + uint16_t free_stack[]; +} *nt_stack_chunks = NULL, + *nt_free_stack_chunks = NULL; + +struct nt_machine_stack_footer { + struct nt_stack_chunk_header *ch; + size_t index; +}; + +static rb_nativethread_lock_t nt_machine_stack_lock = RB_NATIVETHREAD_LOCK_INIT; + +#include + +// vm_stack_size + machine_stack_size + 1 * (guard page size) +static inline size_t +nt_therad_stack_size(void) +{ + static size_t msz; + if (LIKELY(msz > 0)) return msz; + + rb_vm_t *vm = GET_VM(); + int sz = (int)(vm->default_params.thread_vm_stack_size + vm->default_params.thread_machine_stack_size + MSTACK_PAGE_SIZE); + int page_num = (sz + MSTACK_PAGE_SIZE - 1) / MSTACK_PAGE_SIZE; + msz = page_num * MSTACK_PAGE_SIZE; + return msz; +} + +static struct nt_stack_chunk_header * +nt_alloc_thread_stack_chunk(void) +{ + const char *m = (void *)mmap(NULL, MSTACK_CHUNK_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_STACK, -1, 0); + if (m == MAP_FAILED) { + return NULL; + } + + size_t msz = nt_therad_stack_size(); + int header_page_cnt = 1; + int stack_count = ((MSTACK_CHUNK_PAGE_NUM - header_page_cnt) * MSTACK_PAGE_SIZE) / msz; + int ch_size = sizeof(struct nt_stack_chunk_header) + sizeof(uint16_t) * stack_count; + + if (ch_size > MSTACK_PAGE_SIZE * header_page_cnt) { + header_page_cnt = (ch_size + MSTACK_PAGE_SIZE - 1) / MSTACK_PAGE_SIZE; + stack_count = ((MSTACK_CHUNK_PAGE_NUM - header_page_cnt) * MSTACK_PAGE_SIZE) / msz; + } + + VM_ASSERT(stack_count <= UINT16_MAX); + + struct nt_stack_chunk_header *ch = (struct nt_stack_chunk_header *)m; + + ch->start_page = header_page_cnt; + ch->prev_chunk = nt_stack_chunks; + ch->prev_free_chunk = nt_free_stack_chunks; + ch->uninitialized_stack_count = ch->stack_count = (uint16_t)stack_count; + ch->free_stack_pos = 0; + + RUBY_DEBUG_LOG("ch:%p start_page:%d stack_cnt:%d stack_size:%d", ch, (int)ch->start_page, (int)ch->stack_count, (int)msz); + + return ch; +} + +static void * +nt_stack_chunk_get_stack_start(struct nt_stack_chunk_header *ch, size_t idx) +{ + const char *m = (char *)ch; + return (void *)(m + ch->start_page * MSTACK_PAGE_SIZE + idx * nt_therad_stack_size()); +} + +static struct nt_machine_stack_footer * +nt_stack_chunk_get_msf(const rb_vm_t *vm, const char *mstack) +{ + // TODO: stack direction + const size_t msz = vm->default_params.thread_machine_stack_size; + return (struct nt_machine_stack_footer *)&mstack[msz - sizeof(struct nt_machine_stack_footer)]; +} + +static void * +nt_stack_chunk_get_stack(const rb_vm_t *vm, struct nt_stack_chunk_header *ch, size_t idx, void **vm_stack, void **machine_stack) +{ + // TODO: only support stack going down + // [VM ... machine stack ...] + + const char *vstack, *mstack; + const char *guard_page; + vstack = nt_stack_chunk_get_stack_start(ch, idx); + guard_page = vstack + vm->default_params.thread_vm_stack_size; + mstack = guard_page + MSTACK_PAGE_SIZE; + + struct nt_machine_stack_footer *msf = nt_stack_chunk_get_msf(vm, mstack); + msf->ch = ch; + msf->index = idx; + +#if 0 + RUBY_DEBUG_LOG("msf:%p vstack:%p-%p guard_page:%p-%p mstack:%p-%p", msf, + vstack, (void *)(guard_page-1), + guard_page, (void *)(mstack-1), + mstack, (void *)(msf)); +#endif + + *vm_stack = (void *)vstack; + *machine_stack = (void *)mstack; + + return (void *)guard_page; +} + +RBIMPL_ATTR_MAYBE_UNUSED() +static void +nt_stack_chunk_dump(void) +{ + struct nt_stack_chunk_header *ch; + int i; + + fprintf(stderr, "** nt_stack_chunks\n"); + ch = nt_stack_chunks; + for (i=0; ch; i++, ch = ch->prev_chunk) { + fprintf(stderr, "%d %p free_pos:%d\n", i, (void *)ch, (int)ch->free_stack_pos); + } + + fprintf(stderr, "** nt_free_stack_chunks\n"); + ch = nt_free_stack_chunks; + for (i=0; ch; i++, ch = ch->prev_free_chunk) { + fprintf(stderr, "%d %p free_pos:%d\n", i, (void *)ch, (int)ch->free_stack_pos); + } +} + +static int +nt_guard_page(const char *p, size_t len) +{ + if (mprotect((void *)p, len, PROT_NONE) != -1) { + return 0; + } + else { + return errno; + } +} + +static int +nt_alloc_stack(rb_vm_t *vm, void **vm_stack, void **machine_stack) +{ + int err = 0; + + rb_native_mutex_lock(&nt_machine_stack_lock); + { + retry: + if (nt_free_stack_chunks) { + struct nt_stack_chunk_header *ch = nt_free_stack_chunks; + if (ch->free_stack_pos > 0) { + RUBY_DEBUG_LOG("free_stack_pos:%d", ch->free_stack_pos); + nt_stack_chunk_get_stack(vm, ch, ch->free_stack[--ch->free_stack_pos], vm_stack, machine_stack); + } + else if (ch->uninitialized_stack_count > 0) { + RUBY_DEBUG_LOG("uninitialized_stack_count:%d", ch->uninitialized_stack_count); + + size_t idx = ch->stack_count - ch->uninitialized_stack_count--; + void *guard_page = nt_stack_chunk_get_stack(vm, ch, idx, vm_stack, machine_stack); + err = nt_guard_page(guard_page, MSTACK_PAGE_SIZE); + } + else { + nt_free_stack_chunks = ch->prev_free_chunk; + ch->prev_free_chunk = NULL; + goto retry; + } + } + else { + struct nt_stack_chunk_header *p = nt_alloc_thread_stack_chunk(); + if (p == NULL) { + err = errno; + } + else { + nt_free_stack_chunks = nt_stack_chunks = p; + goto retry; + } + } + } + rb_native_mutex_unlock(&nt_machine_stack_lock); + + return err; +} + +static void +nt_free_stack(void *mstack) +{ + if (!mstack) return; + + rb_native_mutex_lock(&nt_machine_stack_lock); + { + struct nt_machine_stack_footer *msf = nt_stack_chunk_get_msf(GET_VM(), mstack); + struct nt_stack_chunk_header *ch = msf->ch; + int idx = (int)msf->index; + void *stack = nt_stack_chunk_get_stack_start(ch, idx); + + RUBY_DEBUG_LOG("stack:%p mstack:%p ch:%p index:%d", stack, mstack, ch, idx); + + if (ch->prev_free_chunk == NULL) { + ch->prev_free_chunk = nt_free_stack_chunks; + nt_free_stack_chunks = ch; + } + ch->free_stack[ch->free_stack_pos++] = idx; + + // clear the stack pages +#if defined(MADV_FREE) + int r = madvise(stack, nt_therad_stack_size(), MADV_FREE); +#elif defined(MADV_DONTNEED) + int r = madvise(stack, nt_therad_stack_size(), MADV_DONTNEED); +#else + int r = 0; +#endif + + if (r != 0) rb_bug("madvise errno:%d", errno); + } + rb_native_mutex_unlock(&nt_machine_stack_lock); +} + +static int +native_thread_check_and_create_shared(rb_vm_t *vm) +{ + bool need_to_make = false; + + rb_native_mutex_lock(&vm->ractor.sched.lock); + { + unsigned int snt_cnt = vm->ractor.sched.snt_cnt; + if (!vm->ractor.main_ractor->threads.sched.enable_mn_threads) snt_cnt++; // do not need snt for main ractor + + if (((int)snt_cnt < MINIMUM_SNT) || + (snt_cnt < vm->ractor.cnt && + snt_cnt < vm->ractor.sched.max_cpu)) { + + RUBY_DEBUG_LOG("added snt:%u dnt:%u ractor_cnt:%u grq_cnt:%u", + vm->ractor.sched.snt_cnt, + vm->ractor.sched.dnt_cnt, + vm->ractor.cnt, + vm->ractor.sched.grq_cnt); + + vm->ractor.sched.snt_cnt++; + need_to_make = true; + } + else { + RUBY_DEBUG_LOG("snt:%d ractor_cnt:%d", (int)vm->ractor.sched.snt_cnt, (int)vm->ractor.cnt); + } + } + rb_native_mutex_unlock(&vm->ractor.sched.lock); + + if (need_to_make) { + struct rb_native_thread *nt = native_thread_alloc(); + nt->vm = vm; + return native_thread_create0(nt); + } + else { + return 0; + } +} + +static COROUTINE +co_start(struct coroutine_context *from, struct coroutine_context *self) +{ + rb_thread_t *th = (rb_thread_t *)self->argument; + struct rb_thread_sched *sched = TH_SCHED(th); + VM_ASSERT(th->nt != NULL); + VM_ASSERT(th == sched->running); + VM_ASSERT(sched->lock_owner == NULL); + + // RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); + + thread_sched_set_lock_owner(sched, th); + thread_sched_add_running_thread(TH_SCHED(th), th); + thread_sched_unlock(sched, th); + { + call_thread_start_func_2(th); + } + thread_sched_lock(sched, NULL); + + RUBY_DEBUG_LOG("terminated th:%d", (int)th->serial); + + // Thread is terminated + + VM_ASSERT(!th_has_dedicated_nt(th)); + + rb_vm_t *vm = th->vm; + bool has_ready_ractor = vm->ractor.sched.grq_cnt > 0; // at least this ractor is not queued + + rb_thread_t *next_th = sched->running; + struct rb_native_thread *nt = th->nt; + native_thread_assign(NULL, th); + rb_ractor_set_current_ec(th->ractor, NULL); + + if (!has_ready_ractor && next_th && !next_th->nt) { + // switch to the next thread + thread_sched_set_lock_owner(sched, NULL); + thread_sched_switch0(th->sched.context, next_th, nt); + th->sched.finished = true; + } + else { + // switch to the next Ractor + th->sched.finished = true; + coroutine_transfer(self, nt->nt_context); + } + rb_bug("unreachable"); +} + +static int +native_thread_create_shared(rb_thread_t *th) +{ + // setup coroutine + rb_vm_t *vm = th->vm; + void *vm_stack = NULL, *machine_stack = NULL; + int err = nt_alloc_stack(vm, &vm_stack, &machine_stack); + if (err) return err; + + VM_ASSERT(vm_stack < machine_stack); + + // setup vm stack + size_t vm_stack_words = th->vm->default_params.thread_vm_stack_size/sizeof(VALUE); + rb_ec_initialize_vm_stack(th->ec, vm_stack, vm_stack_words); + + // setup machine stack + size_t machine_stack_size = vm->default_params.thread_machine_stack_size - sizeof(struct nt_machine_stack_footer); + th->ec->machine.stack_start = (void *)((uintptr_t)machine_stack + machine_stack_size); + th->ec->machine.stack_maxsize = machine_stack_size; // TODO + th->sched.context_stack = machine_stack; + + th->sched.context = ruby_xmalloc(sizeof(struct coroutine_context)); + coroutine_initialize(th->sched.context, co_start, machine_stack, machine_stack_size); + th->sched.context->argument = th; + + RUBY_DEBUG_LOG("th:%u vm_stack:%p machine_stack:%p", rb_th_serial(th), vm_stack, machine_stack); + thread_sched_to_ready(TH_SCHED(th), th); + + // setup nt + return native_thread_check_and_create_shared(th->vm); +} + +#else // USE_MN_THREADS + +static int +native_thread_create_shared(rb_thread_t *th) +{ + rb_bug("unreachable"); +} + +static bool +thread_sched_wait_events(struct rb_thread_sched *sched, rb_thread_t *th, int fd, enum thread_sched_waiting_flag events, rb_hrtime_t *rel) +{ + rb_bug("unreachable"); +} + +#endif // USE_MN_THREADS + +/// EPOLL specific code +#if HAVE_SYS_EPOLL_H && USE_MN_THREADS + +static bool +fd_readable_nonblock(int fd) +{ + struct pollfd pfd = { + .fd = fd, + .events = POLLIN, + }; + return poll(&pfd, 1, 0) != 0; +} + +static bool +fd_writable_nonblock(int fd) +{ + struct pollfd pfd = { + .fd = fd, + .events = POLLOUT, + }; + return poll(&pfd, 1, 0) != 0; +} + +static void +verify_waiting_list(void) +{ +#if VM_CHECK_MODE > 0 + rb_thread_t *wth, *prev_wth = NULL; + ccan_list_for_each(&timer_th.waiting, wth, sched.waiting_reason.node) { + // fprintf(stderr, "verify_waiting_list th:%u abs:%lu\n", rb_th_serial(wth), (unsigned long)wth->sched.waiting_reason.data.timeout); + if (prev_wth) { + rb_hrtime_t timeout = wth->sched.waiting_reason.data.timeout; + rb_hrtime_t prev_timeout = prev_wth->sched.waiting_reason.data.timeout; + VM_ASSERT(timeout == 0 || prev_timeout <= timeout); + } + prev_wth = wth; + } +#endif +} + +// return false if the fd is not waitable or not need to wait. +static bool +timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags, rb_hrtime_t *rel) +{ + RUBY_DEBUG_LOG("th:%u fd:%d flag:%d rel:%lu", rb_th_serial(th), fd, flags, rel ? (unsigned long)*rel : 0); + + VM_ASSERT(th == NULL || TH_SCHED(th)->running == th); + VM_ASSERT(flags != 0); + + rb_hrtime_t abs = 0; // 0 means no timeout + + if (rel) { + if (*rel > 0) { + flags |= thread_sched_waiting_timeout; + } + else { + return false; + } + } + + if (rel && *rel > 0) { + flags |= thread_sched_waiting_timeout; + } + + __uint32_t epoll_events = 0; + if (flags & thread_sched_waiting_timeout) { + VM_ASSERT(rel != NULL); + abs = rb_hrtime_add(rb_hrtime_now(), *rel); + } + + if (flags & thread_sched_waiting_io_read) { + if (!(flags & thread_sched_waiting_io_force) && fd_readable_nonblock(fd)) { + RUBY_DEBUG_LOG("fd_readable_nonblock"); + return false; + } + else { + VM_ASSERT(fd >= 0); + epoll_events |= EPOLLIN; + } + } + + if (flags & thread_sched_waiting_io_write) { + if (!(flags & thread_sched_waiting_io_force) && fd_writable_nonblock(fd)) { + RUBY_DEBUG_LOG("fd_writable_nonblock"); + return false; + } + else { + VM_ASSERT(fd >= 0); + epoll_events |= EPOLLOUT; + } + } + + rb_native_mutex_lock(&timer_th.waiting_lock); + { + if (epoll_events) { + struct epoll_event event = { + .events = epoll_events, + .data = { + .ptr = (void *)th, + }, + }; + if (epoll_ctl(timer_th.epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1) { + RUBY_DEBUG_LOG("failed (%d)", errno); + + switch (errno) { + case EBADF: + // the fd is closed? + case EPERM: + // the fd doesn't support epoll + case EEXIST: + // the fd is already registerred by another thread + rb_native_mutex_unlock(&timer_th.waiting_lock); + return false; + default: + perror("epoll_ctl"); + rb_bug("register/epoll_ctl failed(fd:%d, errno:%d)", fd, errno); + } + } + RUBY_DEBUG_LOG("epoll_ctl(add, fd:%d, events:%d) success", fd, epoll_events); + } + + if (th) { + VM_ASSERT(th->sched.waiting_reason.flags == thread_sched_waiting_none); + + // setup waiting information + { + th->sched.waiting_reason.flags = flags; + th->sched.waiting_reason.data.timeout = abs; + th->sched.waiting_reason.data.fd = fd; + th->sched.waiting_reason.data.result = 0; + } + + if (abs == 0) { // no timeout + VM_ASSERT(!(flags & thread_sched_waiting_timeout)); + ccan_list_add_tail(&timer_th.waiting, &th->sched.waiting_reason.node); + } + else { + RUBY_DEBUG_LOG("abs:%lu", abs); + VM_ASSERT(flags & thread_sched_waiting_timeout); + + // insert th to sorted list (TODO: O(n)) + rb_thread_t *wth, *prev_wth = NULL; + + ccan_list_for_each(&timer_th.waiting, wth, sched.waiting_reason.node) { + if ((wth->sched.waiting_reason.flags & thread_sched_waiting_timeout) && + wth->sched.waiting_reason.data.timeout < abs) { + prev_wth = wth; + } + else { + break; + } + } + + if (prev_wth) { + ccan_list_add_after(&timer_th.waiting, &prev_wth->sched.waiting_reason.node, &th->sched.waiting_reason.node); + } + else { + ccan_list_add(&timer_th.waiting, &th->sched.waiting_reason.node); + } + + verify_waiting_list(); + + // update timeout seconds + timer_thread_wakeup(); + } + } + else { + VM_ASSERT(abs == 0); + } + } + rb_native_mutex_unlock(&timer_th.waiting_lock); + + return true; +} + +static void +timer_thread_unregister_waiting(rb_thread_t *th, int fd) +{ + RUBY_DEBUG_LOG("th:%u fd:%d", rb_th_serial(th), fd); + + // Linux 2.6.9 or later is needed to pass NULL as data. + if (epoll_ctl(timer_th.epoll_fd, EPOLL_CTL_DEL, fd, NULL) == -1) { + switch (errno) { + case EBADF: + // just ignore. maybe fd is closed. + break; + default: + perror("epoll_ctl"); + rb_bug("unregister/epoll_ctl fails. errno:%d", errno); + } + } +} + +static void +timer_thread_setup_nm(void) +{ + if ((timer_th.epoll_fd = epoll_create1(EPOLL_CLOEXEC)) == -1) rb_bug("epoll_create (errno:%d)", errno); + RUBY_DEBUG_LOG("comm_fds:%d/%d", timer_th.comm_fds[0], timer_th.comm_fds[1]); + RUBY_DEBUG_LOG("epoll_fd:%d", timer_th.epoll_fd); + + timer_thread_register_waiting(NULL, timer_th.comm_fds[0], thread_sched_waiting_io_read | thread_sched_waiting_io_force, NULL); +} + +/* + * The purpose of the timer thread: + * + * (1) Periodic checking + * (1-1) Provide time slice for active NTs + * (1-2) Check NT shortage + * (1-3) Periodic UBF (global) + * (1-4) Lazy GRQ deq start + * (2) Receive notification + * (2-1) async I/O termination + * (2-2) timeout + * (2-2-1) sleep(n) + * (2-2-2) timeout(n), I/O, ... + */ +static void +timer_thread_polling(rb_vm_t *vm) +{ + int r = epoll_wait(timer_th.epoll_fd, timer_th.finished_events, EPOLL_EVENTS_MAX, timer_thread_set_timeout(vm)); + + RUBY_DEBUG_LOG("r:%d errno:%d", r, errno); + + switch (r) { + case 0: // timeout + RUBY_DEBUG_LOG("timeout%s", ""); + + ractor_sched_lock(vm, NULL); + { + // (1-1) timeslice + timer_thread_check_timeslice(vm); + + // (1-4) lazy grq deq + if (vm->ractor.sched.grq_cnt > 0) { + RUBY_DEBUG_LOG("GRQ cnt: %u", vm->ractor.sched.grq_cnt); + rb_native_cond_signal(&vm->ractor.sched.cond); + } + } + ractor_sched_unlock(vm, NULL); + + // (1-2) + native_thread_check_and_create_shared(vm); + + break; + + case -1: + switch (errno) { + case EINTR: + // simply retry + break; + default: + perror("epoll_wait"); + rb_bug("epoll_wait errno:%d", errno); + } + break; + + default: + RUBY_DEBUG_LOG("%d event(s)", r); + + for (int i=0; isched.waiting_reason.flags) { + // delete from chain + ccan_list_del_init(&th->sched.waiting_reason.node); + timer_thread_unregister_waiting(th, th->sched.waiting_reason.data.fd); + + th->sched.waiting_reason.flags = thread_sched_waiting_none; + th->sched.waiting_reason.data.fd = -1; + th->sched.waiting_reason.data.result = (int)events; + + timer_thread_wakeup_thread(th); + } + else { + // already released + } + } + rb_native_mutex_unlock(&timer_th.waiting_lock); + } + } + } +} + +#else // HAVE_SYS_EPOLL_H + +static void +timer_thread_setup_nm(void) +{ + // do nothing +} + +static void +timer_thread_polling(rb_vm_t *vm) +{ + int timeout = timer_thread_set_timeout(vm); + + struct pollfd pfd = { + .fd = timer_th.comm_fds[0], + .events = POLLIN, + }; + + int r = poll(&pfd, 1, timeout); + + switch (r) { + case 0: // timeout + rb_native_mutex_lock(&vm->ractor.sched.lock); + { + // (1-1) timeslice + timer_thread_check_timeslice(vm); + } + rb_native_mutex_unlock(&vm->ractor.sched.lock); + break; + + case -1: // error + switch (errno) { + case EINTR: + // simply retry + break; + default: + perror("poll"); + rb_bug("poll errno:%d", errno); + break; + } + + case 1: + consume_communication_pipe(timer_th.comm_fds[0]); + break; + + default: + rb_bug("unreachbale"); + } +} + +#endif // HAVE_SYS_EPOLL_H diff --git a/thread_sync.c b/thread_sync.c index ca463c35f1a8b1..825fdde76f5026 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -41,6 +41,8 @@ struct queue_sleep_arg { static void sync_wakeup(struct ccan_list_head *head, long max) { + RUBY_DEBUG_LOG("max:%ld", max); + struct sync_waiter *cur = 0, *next; ccan_list_for_each_safe(head, cur, next, node) { @@ -51,6 +53,7 @@ sync_wakeup(struct ccan_list_head *head, long max) rb_fiber_scheduler_unblock(cur->th->scheduler, cur->self, rb_fiberptr_self(cur->fiber)); } else { + RUBY_DEBUG_LOG("target_th:%u", rb_th_serial(cur->th)); rb_threadptr_interrupt(cur->th); cur->th->status = THREAD_RUNNABLE; } @@ -251,6 +254,8 @@ rb_mutex_trylock(VALUE self) rb_mutex_t *mutex = mutex_ptr(self); if (mutex->fiber == 0) { + RUBY_DEBUG_LOG("%p ok", mutex); + rb_fiber_t *fiber = GET_EC()->fiber_ptr; rb_thread_t *th = GET_THREAD(); mutex->fiber = fiber; @@ -258,17 +263,12 @@ rb_mutex_trylock(VALUE self) mutex_locked(th, self); return Qtrue; } - - return Qfalse; + else { + RUBY_DEBUG_LOG("%p ng", mutex); + return Qfalse; + } } -/* - * At maximum, only one thread can use cond_timedwait and watch deadlock - * periodically. Multiple polling thread (i.e. concurrent deadlock check) - * introduces new race conditions. [Bug #6278] [ruby-core:44275] - */ -static const rb_thread_t *patrol_thread = NULL; - static VALUE mutex_owned_p(rb_fiber_t *fiber, rb_mutex_t *mutex) { @@ -290,6 +290,8 @@ delete_from_waitq(VALUE value) return Qnil; } +static inline rb_atomic_t threadptr_get_interrupts(rb_thread_t *th); + static VALUE do_mutex_lock(VALUE self, int interruptible_p) { @@ -297,6 +299,7 @@ do_mutex_lock(VALUE self, int interruptible_p) rb_thread_t *th = ec->thread_ptr; rb_fiber_t *fiber = ec->fiber_ptr; rb_mutex_t *mutex = mutex_ptr(self); + rb_atomic_t saved_ints = 0; /* When running trap handler */ if (!FL_TEST_RAW(self, MUTEX_ALLOW_TRAP) && @@ -310,6 +313,8 @@ do_mutex_lock(VALUE self, int interruptible_p) } while (mutex->fiber != fiber) { + VM_ASSERT(mutex->fiber != NULL); + VALUE scheduler = rb_fiber_scheduler_current(); if (scheduler != Qnil) { struct sync_waiter sync_waiter = { @@ -331,51 +336,47 @@ do_mutex_lock(VALUE self, int interruptible_p) rb_raise(rb_eThreadError, "deadlock; lock already owned by another fiber belonging to the same thread"); } - enum rb_thread_status prev_status = th->status; - rb_hrtime_t *timeout = 0; - rb_hrtime_t rel = rb_msec2hrtime(100); - - th->status = THREAD_STOPPED_FOREVER; - th->locking_mutex = self; - rb_ractor_sleeper_threads_inc(th->ractor); - /* - * Carefully! while some contended threads are in native_sleep(), - * ractor->sleeper is unstable value. we have to avoid both deadlock - * and busy loop. - */ - if ((rb_ractor_living_thread_num(th->ractor) == rb_ractor_sleeper_thread_num(th->ractor)) && - !patrol_thread) { - timeout = &rel; - patrol_thread = th; - } - struct sync_waiter sync_waiter = { .self = self, .th = th, - .fiber = nonblocking_fiber(fiber) + .fiber = nonblocking_fiber(fiber), }; - ccan_list_add_tail(&mutex->waitq, &sync_waiter.node); + RUBY_DEBUG_LOG("%p wait", mutex); + + // similar code with `sleep_forever`, but + // sleep_forever(SLEEP_DEADLOCKABLE) raises an exception. + // Ensure clause is needed like but `rb_ensure` a bit slow. + // + // begin + // sleep_forever(th, SLEEP_DEADLOCKABLE); + // ensure + // ccan_list_del(&sync_waiter.node); + // end + enum rb_thread_status prev_status = th->status; + th->status = THREAD_STOPPED_FOREVER; + rb_ractor_sleeper_threads_inc(th->ractor); + rb_check_deadlock(th->ractor); - native_sleep(th, timeout); /* release GVL */ + th->locking_mutex = self; + ccan_list_add_tail(&mutex->waitq, &sync_waiter.node); + { + native_sleep(th, NULL); + } ccan_list_del(&sync_waiter.node); + // unlocked by another thread while sleeping if (!mutex->fiber) { mutex->fiber = fiber; } - if (patrol_thread == th) - patrol_thread = NULL; - - th->locking_mutex = Qfalse; - if (mutex->fiber && timeout && !RUBY_VM_INTERRUPTED(th->ec)) { - rb_check_deadlock(th->ractor); - } - if (th->status == THREAD_STOPPED_FOREVER) { - th->status = prev_status; - } rb_ractor_sleeper_threads_dec(th->ractor); + th->status = prev_status; + th->locking_mutex = Qfalse; + th->locking_mutex = Qfalse; + + RUBY_DEBUG_LOG("%p wakeup", mutex); } if (interruptible_p) { @@ -387,11 +388,27 @@ do_mutex_lock(VALUE self, int interruptible_p) mutex->fiber = fiber; } } + else { + // clear interrupt information + if (RUBY_VM_INTERRUPTED(th->ec)) { + // reset interrupts + if (saved_ints == 0) { + saved_ints = threadptr_get_interrupts(th); + } + else { + // ignore additional interrupts + threadptr_get_interrupts(th); + } + } + } } + if (saved_ints) th->ec->interrupt_flag = saved_ints; if (mutex->fiber == fiber) mutex_locked(th, self); } + RUBY_DEBUG_LOG("%p locked", mutex); + // assertion if (mutex_owned_p(fiber, mutex) == Qfalse) rb_bug("do_mutex_lock: mutex is not owned."); @@ -435,6 +452,8 @@ rb_mutex_owned_p(VALUE self) static const char * rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_fiber_t *fiber) { + RUBY_DEBUG_LOG("%p", mutex); + if (mutex->fiber == 0) { return "Attempt to unlock a mutex which is not locked"; } @@ -456,13 +475,14 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_fiber_t *fiber) } else { switch (cur->th->status) { - case THREAD_RUNNABLE: /* from someone else calling Thread#run */ - case THREAD_STOPPED_FOREVER: /* likely (rb_mutex_lock) */ + case THREAD_RUNNABLE: /* from someone else calling Thread#run */ + case THREAD_STOPPED_FOREVER: /* likely (rb_mutex_lock) */ + RUBY_DEBUG_LOG("wakeup th:%u", rb_th_serial(cur->th)); rb_threadptr_interrupt(cur->th); return NULL; - case THREAD_STOPPED: /* probably impossible */ + case THREAD_STOPPED: /* probably impossible */ rb_bug("unexpected THREAD_STOPPED"); - case THREAD_KILLED: + case THREAD_KILLED: /* not sure about this, possible in exit GC? */ rb_bug("unexpected THREAD_KILLED"); continue; diff --git a/thread_win32.c b/thread_win32.c index 543a045bd81d06..4190d03abe69aa 100644 --- a/thread_win32.c +++ b/thread_win32.c @@ -148,12 +148,13 @@ thread_sched_yield(struct rb_thread_sched *sched, rb_thread_t *th) } void -rb_thread_sched_init(struct rb_thread_sched *sched) +rb_thread_sched_init(struct rb_thread_sched *sched, bool atfork) { if (GVL_DEBUG) fprintf(stderr, "sched init\n"); sched->lock = w32_mutex_create(); } +// per-ractor void rb_thread_sched_destroy(struct rb_thread_sched *sched) { @@ -202,6 +203,11 @@ Init_native_thread(rb_thread_t *main_th) main_th->nt->interrupt_event); } +void +ruby_mn_threads_params(void) +{ +} + static int w32_wait_events(HANDLE *events, int count, DWORD timeout, rb_thread_t *th) { @@ -637,20 +643,32 @@ thread_start_func_1(void *th_ptr) RUBY_DEBUG_LOG("thread created th:%u, thid: %p, event: %p", rb_th_serial(th), th->nt->thread_id, th->nt->interrupt_event); + thread_sched_to_running(TH_SCHED(th), th); + ruby_thread_set_native(th); + + // kick threads thread_start_func_2(th, th->ec->machine.stack_start); w32_close_handle(thread_id); RUBY_DEBUG_LOG("thread deleted th:%u", rb_th_serial(th)); + return 0; } static int native_thread_create(rb_thread_t *th) { - const size_t stack_size = th->vm->default_params.thread_machine_stack_size + th->vm->default_params.thread_vm_stack_size; + // setup nt + const size_t stack_size = th->vm->default_params.thread_machine_stack_size; th->nt = ZALLOC(struct rb_native_thread); th->nt->thread_id = w32_create_thread(stack_size, thread_start_func_1, th); + // setup vm stack + size_t vm_stack_word_size = th->vm->default_params.thread_vm_stack_size / sizeof(VALUE); + void *vm_stack = ruby_xmalloc(vm_stack_word_size * sizeof(VALUE)); + th->sched.vm_stack = vm_stack; + rb_ec_initialize_vm_stack(th->ec, vm_stack, vm_stack_word_size); + if ((th->nt->thread_id) == 0) { return thread_errno; } @@ -763,12 +781,6 @@ rb_thread_wakeup_timer_thread(int sig) /* do nothing */ } -static VALUE -rb_thread_start_unblock_thread(void) -{ - return Qfalse; /* no-op */ -} - static void rb_thread_create_timer_thread(void) { @@ -841,26 +853,6 @@ rb_reserved_fd_p(int fd) return 0; } -int -rb_sigwait_fd_get(rb_thread_t *th) -{ - return -1; /* TODO */ -} - -NORETURN(void rb_sigwait_fd_put(rb_thread_t *, int)); -void -rb_sigwait_fd_put(rb_thread_t *th, int fd) -{ - rb_bug("not implemented, should not be called"); -} - -NORETURN(void rb_sigwait_sleep(const rb_thread_t *, int, const rb_hrtime_t *)); -void -rb_sigwait_sleep(const rb_thread_t *th, int fd, const rb_hrtime_t *rel) -{ - rb_bug("not implemented, should not be called"); -} - rb_nativethread_id_t rb_nativethread_self(void) { @@ -881,4 +873,134 @@ native_thread_native_thread_id(rb_thread_t *th) } #define USE_NATIVE_THREAD_NATIVE_THREAD_ID 1 +void +rb_add_running_thread(rb_thread_t *th){ + // do nothing +} + +void +rb_del_running_thread(rb_thread_t *th) +{ + // do nothing +} + +static bool +th_has_dedicated_nt(const rb_thread_t *th) +{ + return true; +} + +void +rb_threadptr_sched_free(rb_thread_t *th) +{ + ruby_xfree(th->nt); + ruby_xfree(th->sched.vm_stack); +} + +void +rb_threadptr_remove(rb_thread_t *th) +{ + // do nothing +} + +void +rb_thread_sched_mark_zombies(rb_vm_t *vm) +{ + // do nothing +} + +static bool +vm_barrier_finish_p(rb_vm_t *vm) +{ + RUBY_DEBUG_LOG("cnt:%u living:%u blocking:%u", + vm->ractor.blocking_cnt == vm->ractor.cnt, + vm->ractor.sync.barrier_cnt, + vm->ractor.cnt, + vm->ractor.blocking_cnt); + + VM_ASSERT(vm->ractor.blocking_cnt <= vm->ractor.cnt); + return vm->ractor.blocking_cnt == vm->ractor.cnt; +} + +void +rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr) +{ + vm->ractor.sync.barrier_waiting = true; + + RUBY_DEBUG_LOG("barrier start. cnt:%u living:%u blocking:%u", + vm->ractor.sync.barrier_cnt, + vm->ractor.cnt, + vm->ractor.blocking_cnt); + + rb_vm_ractor_blocking_cnt_inc(vm, cr, __FILE__, __LINE__); + + // send signal + rb_ractor_t *r = 0; + ccan_list_for_each(&vm->ractor.set, r, vmlr_node) { + if (r != cr) { + rb_ractor_vm_barrier_interrupt_running_thread(r); + } + } + + // wait + while (!vm_barrier_finish_p(vm)) { + rb_vm_cond_wait(vm, &vm->ractor.sync.barrier_cond); + } + + RUBY_DEBUG_LOG("cnt:%u barrier success", vm->ractor.sync.barrier_cnt); + + rb_vm_ractor_blocking_cnt_dec(vm, cr, __FILE__, __LINE__); + + vm->ractor.sync.barrier_waiting = false; + vm->ractor.sync.barrier_cnt++; + + ccan_list_for_each(&vm->ractor.set, r, vmlr_node) { + rb_native_cond_signal(&r->barrier_wait_cond); + } +} + +void +rb_ractor_sched_barrier_join(rb_vm_t *vm, rb_ractor_t *cr) +{ + vm->ractor.sync.lock_owner = cr; + unsigned int barrier_cnt = vm->ractor.sync.barrier_cnt; + rb_thread_t *th = GET_THREAD(); + bool running; + + RB_VM_SAVE_MACHINE_CONTEXT(th); + + if (rb_ractor_status_p(cr, ractor_running)) { + rb_vm_ractor_blocking_cnt_inc(vm, cr, __FILE__, __LINE__); + running = true; + } + else { + running = false; + } + VM_ASSERT(rb_ractor_status_p(cr, ractor_blocking)); + + if (vm_barrier_finish_p(vm)) { + RUBY_DEBUG_LOG("wakeup barrier owner"); + rb_native_cond_signal(&vm->ractor.sync.barrier_cond); + } + else { + RUBY_DEBUG_LOG("wait for barrier finish"); + } + + // wait for restart + while (barrier_cnt == vm->ractor.sync.barrier_cnt) { + vm->ractor.sync.lock_owner = NULL; + rb_native_cond_wait(&cr->barrier_wait_cond, &vm->ractor.sync.lock); + VM_ASSERT(vm->ractor.sync.lock_owner == NULL); + vm->ractor.sync.lock_owner = cr; + } + + RUBY_DEBUG_LOG("barrier is released. Acquire vm_lock"); + + if (running) { + rb_vm_ractor_blocking_cnt_dec(vm, cr, __FILE__, __LINE__); + } + + vm->ractor.sync.lock_owner = NULL; +} + #endif /* THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION */ diff --git a/thread_win32.h b/thread_win32.h index 0dfe9d46de4ed9..23cd71fcfe08d9 100644 --- a/thread_win32.h +++ b/thread_win32.h @@ -27,7 +27,7 @@ struct rb_native_thread { }; struct rb_thread_sched_item { - char dmy; + void *vm_stack; }; struct rb_thread_sched { diff --git a/vm.c b/vm.c index a1241ce667fe21..1dc2b3f02c4b28 100644 --- a/vm.c +++ b/vm.c @@ -489,13 +489,14 @@ bool ruby_vm_keep_script_lines; #ifdef RB_THREAD_LOCAL_SPECIFIER RB_THREAD_LOCAL_SPECIFIER rb_execution_context_t *ruby_current_ec; + #ifdef RUBY_NT_SERIAL RB_THREAD_LOCAL_SPECIFIER rb_atomic_t ruby_nt_serial; #endif -#ifdef __APPLE__ +// no-inline decl on thread_pthread.h rb_execution_context_t * -rb_current_ec(void) +rb_current_ec_noinline(void) { return ruby_current_ec; } @@ -505,8 +506,16 @@ rb_current_ec_set(rb_execution_context_t *ec) { ruby_current_ec = ec; } -#endif + +#ifdef __APPLE__ +rb_execution_context_t * +rb_current_ec(void) +{ + return ruby_current_ec; +} + +#endif #else native_tls_key_t ruby_current_ec_key; #endif @@ -2805,6 +2814,8 @@ vm_mark_negative_cme(VALUE val, void *dmy) return ID_TABLE_CONTINUE; } +void rb_thread_sched_mark_zombies(rb_vm_t *vm); + void rb_vm_mark(void *ptr) { @@ -2876,6 +2887,7 @@ rb_vm_mark(void *ptr) } } + rb_thread_sched_mark_zombies(vm); rb_rjit_mark(); } @@ -3289,12 +3301,16 @@ thread_mark(void *ptr) RUBY_MARK_LEAVE("thread"); } +void rb_threadptr_sched_free(rb_thread_t *th); // thread_*.c + static void thread_free(void *ptr) { rb_thread_t *th = ptr; RUBY_FREE_ENTER("thread"); + rb_threadptr_sched_free(th); + if (th->locking_mutex != Qfalse) { rb_bug("thread_free: locking_mutex must be NULL (%p:%p)", (void *)th, (void *)th->locking_mutex); } @@ -3308,7 +3324,8 @@ thread_free(void *ptr) RUBY_GC_INFO("MRI main thread\n"); } else { - ruby_xfree(th->nt); // TODO + // ruby_xfree(th->nt); + // TODO: MN system collect nt, but without MN system it should be freed here. ruby_xfree(th); } @@ -3429,8 +3446,10 @@ th_init(rb_thread_t *th, VALUE self, rb_vm_t *vm) th->ext_config.ractor_safe = true; #if USE_RUBY_DEBUG_LOG - static rb_atomic_t thread_serial = 0; + static rb_atomic_t thread_serial = 1; th->serial = RUBY_ATOMIC_FETCH_ADD(thread_serial, 1); + + RUBY_DEBUG_LOG("th:%u", th->serial); #endif } @@ -4058,8 +4077,11 @@ Init_BareVM(void) // setup ractor system rb_native_mutex_initialize(&vm->ractor.sync.lock); - rb_native_cond_initialize(&vm->ractor.sync.barrier_cond); rb_native_cond_initialize(&vm->ractor.sync.terminate_cond); + +#ifdef RUBY_THREAD_WIN32_H + rb_native_cond_initialize(&vm->ractor.sync.barrier_cond); +#endif } #ifndef _WIN32 diff --git a/vm_core.h b/vm_core.h index a5c08d3f725dc6..6f2fca6537c457 100644 --- a/vm_core.h +++ b/vm_core.h @@ -634,15 +634,51 @@ typedef struct rb_vm_struct { struct rb_ractor_struct *lock_owner; unsigned int lock_rec; - // barrier - bool barrier_waiting; - unsigned int barrier_cnt; - rb_nativethread_cond_t barrier_cond; - // join at exit rb_nativethread_cond_t terminate_cond; bool terminate_waiting; + +#ifndef RUBY_THREAD_PTHREAD_H + bool barrier_waiting; + unsigned int barrier_cnt; + rb_nativethread_cond_t barrier_cond; +#endif } sync; + + // ractor scheduling + struct { + rb_nativethread_lock_t lock; + struct rb_ractor_struct *lock_owner; + bool locked; + + rb_nativethread_cond_t cond; // GRQ + unsigned int snt_cnt; // count of shared NTs + unsigned int dnt_cnt; // count of dedicated NTs + + unsigned int running_cnt; + + unsigned int max_cpu; + struct ccan_list_head grq; // // Global Ready Queue + unsigned int grq_cnt; + + // running threads + struct ccan_list_head running_threads; + + // threads which switch context by timeslice + struct ccan_list_head timeslice_threads; + + struct ccan_list_head zombie_threads; + + // true if timeslice timer is not enable + bool timeslice_wait_inf; + + // barrier + rb_nativethread_cond_t barrier_complete_cond; + rb_nativethread_cond_t barrier_release_cond; + bool barrier_waiting; + unsigned int barrier_waiting_cnt; + unsigned int barrier_serial; + } sched; } ractor; #ifdef USE_SIGALTSTACK @@ -1739,6 +1775,7 @@ rb_vm_living_threads_init(rb_vm_t *vm) ccan_list_head_init(&vm->waiting_fds); ccan_list_head_init(&vm->workqueue); ccan_list_head_init(&vm->ractor.set); + ccan_list_head_init(&vm->ractor.sched.zombie_threads); } typedef int rb_backtrace_iter_func(void *, VALUE, int, VALUE); @@ -1839,6 +1876,20 @@ rb_current_execution_context(bool expect_ec) #else rb_execution_context_t *ec = ruby_current_ec; #endif + + /* On the shared objects, `__tls_get_addr()` is used to access the TLS + * and the address of the `ruby_current_ec` can be stored on a function + * frame. However, this address can be mis-used after native thread + * migration of a coroutine. + * 1) Get `ptr =&ruby_current_ec` op NT1 and store it on the frame. + * 2) Context switch and resume it on the NT2. + * 3) `ptr` is used on NT2 but it accesses to the TLS on NT1. + * This assertion checks such misusage. + * + * To avoid accidents, `GET_EC()` should be called once on the frame. + * Note that inlining can produce the problem. + */ + VM_ASSERT(ec == rb_current_ec_noinline()); #else rb_execution_context_t *ec = native_tls_get(ruby_current_ec_key); #endif diff --git a/vm_sync.c b/vm_sync.c index 01c85053446480..4bef232f20e830 100644 --- a/vm_sync.c +++ b/vm_sync.c @@ -5,7 +5,8 @@ #include "ractor_core.h" #include "vm_debug.h" -static bool vm_barrier_finish_p(rb_vm_t *vm); +void rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr); +void rb_ractor_sched_barrier_join(rb_vm_t *vm, rb_ractor_t *cr); static bool vm_locked(rb_vm_t *vm) @@ -52,56 +53,32 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign // locking ractor and acquire VM lock will cause deadlock VM_ASSERT(cr->sync.locked_by != rb_ractor_self(cr)); #endif - // lock rb_native_mutex_lock(&vm->ractor.sync.lock); VM_ASSERT(vm->ractor.sync.lock_owner == NULL); - vm->ractor.sync.lock_owner = cr; + VM_ASSERT(vm->ractor.sync.lock_rec == 0); + +#ifdef RUBY_THREAD_PTHREAD_H + if (!no_barrier && + cr->threads.sched.running != NULL // ractor has running threads. + ) { + while (vm->ractor.sched.barrier_waiting) { + RUBY_DEBUG_LOG("barrier serial:%u", vm->ractor.sched.barrier_serial); + rb_ractor_sched_barrier_join(vm, cr); + } + } +#else if (!no_barrier) { - // barrier while (vm->ractor.sync.barrier_waiting) { - unsigned int barrier_cnt = vm->ractor.sync.barrier_cnt; - rb_thread_t *th = GET_THREAD(); - bool running; - - RB_VM_SAVE_MACHINE_CONTEXT(th); - - if (rb_ractor_status_p(cr, ractor_running)) { - rb_vm_ractor_blocking_cnt_inc(vm, cr, __FILE__, __LINE__); - running = true; - } - else { - running = false; - } - VM_ASSERT(rb_ractor_status_p(cr, ractor_blocking)); - - if (vm_barrier_finish_p(vm)) { - RUBY_DEBUG_LOG("wakeup barrier owner"); - rb_native_cond_signal(&vm->ractor.sync.barrier_cond); - } - else { - RUBY_DEBUG_LOG("wait for barrier finish"); - } - - // wait for restart - while (barrier_cnt == vm->ractor.sync.barrier_cnt) { - vm->ractor.sync.lock_owner = NULL; - rb_native_cond_wait(&cr->barrier_wait_cond, &vm->ractor.sync.lock); - VM_ASSERT(vm->ractor.sync.lock_owner == NULL); - vm->ractor.sync.lock_owner = cr; - } - - RUBY_DEBUG_LOG("barrier is released. Acquire vm_lock"); - - if (running) { - rb_vm_ractor_blocking_cnt_dec(vm, cr, __FILE__, __LINE__); - } + rb_ractor_sched_barrier_join(vm, cr); } } +#endif VM_ASSERT(vm->ractor.sync.lock_rec == 0); - VM_ASSERT(vm->ractor.sync.lock_owner == cr); + VM_ASSERT(vm->ractor.sync.lock_owner == NULL); + vm->ractor.sync.lock_owner = cr; } vm->ractor.sync.lock_rec++; @@ -114,8 +91,9 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign static void vm_lock_leave(rb_vm_t *vm, unsigned int *lev APPEND_LOCATION_ARGS) { - RUBY_DEBUG_LOG2(file, line, "rec:%u owner:%u", vm->ractor.sync.lock_rec, - (unsigned int)rb_ractor_id(vm->ractor.sync.lock_owner)); + RUBY_DEBUG_LOG2(file, line, "rec:%u owner:%u%s", vm->ractor.sync.lock_rec, + (unsigned int)rb_ractor_id(vm->ractor.sync.lock_owner), + vm->ractor.sync.lock_rec == 1 ? " (leave)" : ""); ASSERT_vm_locking(); VM_ASSERT(vm->ractor.sync.lock_rec > 0); @@ -216,18 +194,6 @@ rb_vm_cond_timedwait(rb_vm_t *vm, rb_nativethread_cond_t *cond, unsigned long ms vm_cond_wait(vm, cond, msec); } -static bool -vm_barrier_finish_p(rb_vm_t *vm) -{ - RUBY_DEBUG_LOG("cnt:%u living:%u blocking:%u", - vm->ractor.sync.barrier_cnt, - vm->ractor.cnt, - vm->ractor.blocking_cnt); - - VM_ASSERT(vm->ractor.blocking_cnt <= vm->ractor.cnt); - return vm->ractor.blocking_cnt == vm->ractor.cnt; -} - void rb_vm_barrier(void) { @@ -239,45 +205,13 @@ rb_vm_barrier(void) } else { rb_vm_t *vm = GET_VM(); - VM_ASSERT(vm->ractor.sync.barrier_waiting == false); + VM_ASSERT(!vm->ractor.sched.barrier_waiting); ASSERT_vm_locking(); - rb_ractor_t *cr = vm->ractor.sync.lock_owner; VM_ASSERT(cr == GET_RACTOR()); VM_ASSERT(rb_ractor_status_p(cr, ractor_running)); - vm->ractor.sync.barrier_waiting = true; - - RUBY_DEBUG_LOG("barrier start. cnt:%u living:%u blocking:%u", - vm->ractor.sync.barrier_cnt, - vm->ractor.cnt, - vm->ractor.blocking_cnt); - - rb_vm_ractor_blocking_cnt_inc(vm, cr, __FILE__, __LINE__); - - // send signal - rb_ractor_t *r = 0; - ccan_list_for_each(&vm->ractor.set, r, vmlr_node) { - if (r != cr) { - rb_ractor_vm_barrier_interrupt_running_thread(r); - } - } - - // wait - while (!vm_barrier_finish_p(vm)) { - rb_vm_cond_wait(vm, &vm->ractor.sync.barrier_cond); - } - - RUBY_DEBUG_LOG("cnt:%u barrier success", vm->ractor.sync.barrier_cnt); - - rb_vm_ractor_blocking_cnt_dec(vm, cr, __FILE__, __LINE__); - - vm->ractor.sync.barrier_waiting = false; - vm->ractor.sync.barrier_cnt++; - - ccan_list_for_each(&vm->ractor.set, r, vmlr_node) { - rb_native_cond_signal(&r->barrier_wait_cond); - } + rb_ractor_sched_barrier_start(vm, cr); } }