From b945cd2728c615981dc0913688681d79647f96ad Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sat, 17 Sep 2022 15:38:37 +0200 Subject: [PATCH 01/32] add "s_spinlock.h" --- src/s_spinlock.h | 394 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 394 insertions(+) create mode 100644 src/s_spinlock.h diff --git a/src/s_spinlock.h b/src/s_spinlock.h new file mode 100644 index 0000000000..0e6fedb95f --- /dev/null +++ b/src/s_spinlock.h @@ -0,0 +1,394 @@ +/* Copyright (c) 2021 Christof Ressi. + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ + +/* header-only C/C++ spinlock library; can also be used by externals! */ + +#ifndef S_SPINLOCK_H +#define S_SPINLOCK_H + +#include +#include + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +/* C++11 atomics */ +# include +# define ALIGNAS(x) alignas(x) +using std::atomic_int; +using std::atomic_uint; +using std::atomic_load_explicit; +using std::atomic_exchange_explicit; +using std::atomic_fetch_add_explicit; +using std::atomic_fetch_sub_explicit; +using std::memory_order_acquire; +using std::memory_order_release; +using std::memory_order_acq_rel; +using std::memory_order_relaxed; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) \ + && !defined(__STDC_NO_ATOMICS__) +/* C11 atomics */ +# include +# include +# define ALIGNAS(x) _Alignas(x) +#elif defined(_MSC_VER) +/* fallback for MSVC (which doesn't yet provide at the time of writing) */ +# pragma message ("C11 atomics not supported, using fallback for MSVC.") +# include +# define ALIGNAS(x) __declspec(align(x)) +# define MSVC_INTERLOCKED +#else +# error "Missing support for C11/C++11 atomics." +#endif + +#define CACHELINE_SIZE 64 + +/* t_spinlock */ + +typedef struct _spinlock +{ +#ifdef MSVC_INTERLOCKED + unsigned long state; +#else + atomic_uint state; +#endif +} t_spinlock; + +static inline void spinlock_init(t_spinlock *x); +static inline void spinlock_lock(t_spinlock *x); +static inline int spinlock_trylock(t_spinlock *x); +static inline void spinlock_unlock(t_spinlock *x); + +/* t_padded_spinlock */ + +typedef struct _padded_spinlock +{ + ALIGNAS(CACHELINE_SIZE) t_spinlock lock; + char padding[64 - sizeof(t_spinlock)]; +} t_padded_spinlock; + +#define padded_spinlock_init(x) spinlock_init(&((x)->lock)) +#define padded_spinlock_lock(x) spinlock_lock(&((x)->lock)) +#define padded_spinlock_trylock(x) spinlock_trylock(&((x)->lock)) +#define padded_spinlock_unlock(x) spinlock_unlock(&((x)->lock)) + +/* t_rwspinlock */ + +typedef struct _rwspinlock +{ +#ifdef MSVC_INTERLOCKED + unsigned long state; +#else + atomic_uint state; +#endif +} t_rwspinlock; + +static inline void rwspinlock_init(t_rwspinlock *x); +/* writer */ +static inline void rwspinlock_wrlock(t_rwspinlock *x); +static inline int rwspinlock_trywrlock(t_rwspinlock *x); +static inline void rwspinlock_wrunlock(t_rwspinlock *x); +/* reader */ +static inline void rwspinlock_rdlock(t_rwspinlock *x); +static inline int rwspinlock_tryrdlock(t_rwspinlock *x); +static inline void rwspinlock_rdunlock(t_rwspinlock *x); + +/* t_padded_rwspinlock */ + +typedef struct _padded_rwspinlock +{ + ALIGNAS(CACHELINE_SIZE) t_rwspinlock lock; + char padding[64 - sizeof(t_rwspinlock)]; +} t_padded_rwspinlock; + +#define padded_rwspinlock_init(x) rwspinlock_init(&((x)->lock)) +/* writer */ +#define padded_rwspinlock_wrlock(x) rwspinlock_wrlock(&((x)->lock)) +#define padded_rwspinlock_trywrlock(x) rwspinlock_trywrlock(&((x)->lock)) +#define padded_rwspinlock_wrunlock(x) rwspinlock_wrunlock(&((x)->lock)) +/* reader */ +#define padded_rwspinlock_rdlock(x) rwspinlock_rdlock(&((x)->lock)) +#define padded_rwspinlock_tryrdlock(x) rwspinlock_tryrdlock(&((x)->lock)) +#define padded_rwspinlock_rdunlock(x) rwspinlock_rdunlock(&((x)->lock)) + + +/* ------------------------ implementation --------------------------- */ + +#define CHECK_ALIGNMENT(x) assert((((uintptr_t)&x) & (sizeof(x)-1)) == 0) + +/* Intel */ +#if defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(_M_X64) +# define HAVE_PAUSE +# include +/* ARM */ +#elif (defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7S__) || \ + defined(__ARM_ARCH_8A__) || \ + defined(__aarch64__)) +/* the 'yield' instruction is supported from ARMv6k onwards */ +# define HAVE_YIELD +#else +/* fallback */ +# ifdef __cplusplus +# include +# else +# include +# endif +#endif + +static inline void pause_cpu(void) +{ +#if defined(HAVE_PAUSE) + _mm_pause(); +#elif defined(HAVE_YIELD) + __asm__ __volatile__("yield"); +#else /* fallback */ + #warning "architecture does not support yield/pause instruction" +# ifdef __cplusplus + std::this_thread::yield(); +# else + thrd_yield(); +# endif +#endif +} + +/* -------------------- t_spinlock ---------------------- */ + +static inline void spinlock_init(t_spinlock *x) +{ + CHECK_ALIGNMENT(x->state); + x->state = 0; +} + +static inline int spinlock_trylock(t_spinlock *x) +{ +#ifdef MSVC_INTERLOCKED + return _InterlockedExchange(&x->state, 1) == 0; +#else + return atomic_exchange_explicit(&x->state, 1, memory_order_acquire) == 0; +#endif +} + +static inline void spinlock_lock(t_spinlock *x) +{ +#ifdef MSVC_INTERLOCKED + do { + while (x->state != 0) + pause_cpu(); + } while (_InterlockedExchange(&x->state, 1) != 0); +#else + /* only try to modify the shared state if the lock seems to be available. + * this should prevent unnecessary cache invalidation. */ + do { + while (atomic_load_explicit(&x->state, memory_order_relaxed) != 0) + pause_cpu(); + } while (atomic_exchange_explicit(&x->state, 1, memory_order_acquire) != 0); +#endif +} + +static inline void spinlock_unlock(t_spinlock *x) +{ +#ifdef MSVC_INTERLOCKED + _InterlockedExchange(&x->state, 0); +#else + atomic_store_explicit(&x->state, 0, memory_order_release); +#endif +} + +/* -------------------------- t_rwspinlock -------------------------- */ + +#define RWSPINLOCK_UNLOCKED 0 +#define RWSPINLOCK_LOCKED 0x80000000 +/* use fetch-and-add version (optimized for readers) */ +#define RWSPINLOCK_FETCH_AND_ADD 1 + +static inline void rwspinlock_init(t_rwspinlock *x) +{ + CHECK_ALIGNMENT(x->state); + x->state = 0; +} + +static inline int rwspinlock_trywrlock(t_rwspinlock *x) +{ +#ifdef MSVC_INTERLOCKED + return _InterlockedCompareExchange(&x->state, RWSPINLOCK_LOCKED, RWSPINLOCK_UNLOCKED) == RWSPINLOCK_UNLOCKED; +#else + uint32_t expected = RWSPINLOCK_UNLOCKED; + return atomic_compare_exchange_strong_explicit(&x->state, &expected, RWSPINLOCK_LOCKED, + memory_order_acquire, memory_order_relaxed); +#endif +} + +static inline void rwspinlock_wrlock(t_rwspinlock *x) +{ + /* only try to modify the shared state if the lock seems to be available. + * this should prevent unnecessary cache invalidation. */ +#ifdef MSVC_INTERLOCKED + for (;;) + { + if (x->state == RWSPINLOCK_UNLOCKED) + { + /* check if state is UNLOCKED and set LOCKED bit on success. */ + if (_InterlockedCompareExchange(&x->state, RWSPINLOCK_LOCKED, RWSPINLOCK_UNLOCKED) == RWSPINLOCK_UNLOCKED) + return; + /* CAS failed -> retry immediately */ + } else + pause_cpu(); + } +#else + for (;;) + { + if (atomic_load_explicit(&x->state, memory_order_relaxed) == RWSPINLOCK_UNLOCKED) + { + /* check if state is UNLOCKED and set LOCKED bit on success. */ + uint32_t expected = RWSPINLOCK_UNLOCKED; + if (atomic_compare_exchange_weak_explicit(&x->state, &expected, RWSPINLOCK_LOCKED, + memory_order_acquire, memory_order_relaxed)) return; + /* CAS failed -> retry immediately */ + } else + pause_cpu(); + } +#endif +} + +static inline void rwspinlock_wrunlock(t_rwspinlock *x) +{ +#if RWSPINLOCK_FETCH_AND_ADD + /* clear "locked" bit, see rwspinlock_tryrdlock() */ +# ifdef MSVC_INTERLOCKED + _InterlockedAnd(&x->state, ~RWSPINLOCK_LOCKED); +# else + atomic_fetch_and_explicit(&x->state, ~RWSPINLOCK_LOCKED, memory_order_release); +# endif +#else /* CAS */ +# ifdef MSVC_INTERLOCKED + _InterlockedExchange(&x->state, RWSPINLOCK_UNLOCKED); +# else + atomic_store_explicit(&x->state, RWSPINLOCK_UNLOCKED, memory_order_release); +# endif +#endif +} + +static inline int rwspinlock_tryrdlock(t_rwspinlock *x) +{ +#if RWSPINLOCK_FETCH_AND_ADD + /* optimistically increment the reader count and then check if the "locked" + * bit is set, otherwise we simply decrement the reader count again. + * This is optimized for the likely case that there's no writer. */ +# ifdef MSVC_INTERLOCKED + unsigned long state = _InterlockedIncrement(&x->state); + if ((state & RWSPINLOCK_LOCKED) == 0) + return 1; + else + { + _InterlockedDecrement(&x->state); + return 0; + } +# else + uint32_t state = atomic_fetch_add_explicit(&x->state, 1, memory_order_acquire); + if ((state & RWSPINLOCK_LOCKED) == 0) + return 1; + else + { + atomic_fetch_sub_explicit(&x->state, 1, memory_order_acq_rel); + return 0; + } +# endif +#else /* CAS */ + /* We need a loop because the CAS can fail if another *reader* aquires/releases + * the lock concurrently. We shouldn't consider this a failure! */ +# ifdef MSVC_INTERLOCKED + for (;;) + { + unsigned long state = x->state; + if ((state & RWSPINLOCK_LOCKED) == 0) + { + if (_InterlockedCompareExchange(&x->state, state + 1, state) == state) + return 1; + /* CAS failed -> retry */ + } + else + return 0; + } +# else + uint32_t state = atomic_load_explicit(&x->state, memory_order_relaxed); + for (;;) + { + if ((state & RWSPINLOCK_LOCKED) == 0) + { + if (atomic_compare_exchange_weak_explicit(&x->state, &state, state + 1, + memory_order_acquire, memory_order_relaxed)) return 1; + /* CAS failed -> retry; 'state' has been updated */ + } + else + return 0; + } +# endif +#endif +} + +static inline void rwspinlock_rdlock(t_rwspinlock *x) +{ +#if RWSPINLOCK_FETCH_AND_ADD + /* only try to modify the shared state if the lock seems to be available. + * this should prevent unnecessary cache invalidation. */ + for (;;) + { +# ifdef MSVC_INTERLOCKED + unsigned long state = x->state; +# else + uint32_t state = atomic_load_explicit(&x->state, memory_order_relaxed); +# endif + if (!(state & RWSPINLOCK_LOCKED) && rwspinlock_tryrdlock(x)) + return; + else + pause_cpu(); + } +#else /* CAS */ + /* with RWSPINLOCK_LOCKED masked away, the CAS will fail if the + * spinlock is currently locked. NB: the CAS can also fail if + * another reader acquired/releases the lock concurrently. */ +# ifdef MSVC_INTERLOCKED + for (;;) + { + unsigned long state = x->state & ~RWSPINLOCK_LOCKED; + if (_InterlockedCompareExchange(&x->state, state + 1, state) == state) + return; + else + pause_cpu(); + } +# else + for (;;) + { + uint32_t state = atomic_load_explicit(&x->state, memory_order_relaxed); + state &= ~RWSPINLOCK_LOCKED; + if (atomic_compare_exchange_weak_explicit(&x->state, &state, state + 1, + memory_order_acquire, memory_order_relaxed)) return; + else /* NB: don't use updated 'state', instead read again after pause! */ + pause_cpu(); + } +# endif +#endif +} + +static inline void rwspinlock_rdunlock(t_rwspinlock *x) +{ +#ifdef MSVC_INTERLOCKED + _InterlockedDecrement(&x->state); +#else + atomic_fetch_sub_explicit(&x->state, 1, memory_order_release); +#endif +} + +#undef CACHELINE_SIZE +#undef ALIGNAS +/* keep MSVC_INTERLOCKED and CHECK_ALIGNMENT */ + +#endif /* S_SPINLOCK_H */ From acc106853dd8a8c6837abff70460a086323b616f Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Mon, 20 Dec 2021 16:47:17 +0100 Subject: [PATCH 02/32] add s_sync.h / s_sync.c --- src/Makefile.am | 1 + src/makefile.gnu | 2 +- src/makefile.mac | 2 +- src/makefile.mingw | 2 +- src/makefile.msvc | 2 +- src/s_sync.c | 216 +++++++++++++++++++++++++++++++++++++++++++++ src/s_sync.h | 95 ++++++++++++++++++++ 7 files changed, 316 insertions(+), 4 deletions(-) create mode 100644 src/s_sync.c create mode 100644 src/s_sync.h diff --git a/src/Makefile.am b/src/Makefile.am index 0780c4c859..4f1d4328b5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -156,6 +156,7 @@ pd_SOURCES_core = \ s_net.c \ s_path.c \ s_print.c \ + s_sync.c \ s_utf8.c \ x_acoustics.c \ x_arithmetic.c \ diff --git a/src/makefile.gnu b/src/makefile.gnu index f44d78690c..89c32c6a21 100644 --- a/src/makefile.gnu +++ b/src/makefile.gnu @@ -116,7 +116,7 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ s_main.c s_inter.c s_inter_gui.c s_print.c s_loader.c s_path.c s_entry.c \ - s_audio.c s_audio_paring.c s_midi.c s_net.c s_utf8.c \ + s_audio.c s_audio_paring.c s_midi.c s_net.c s_sync.c s_utf8.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ diff --git a/src/makefile.mac b/src/makefile.mac index a355ebd85a..d34d27dfdc 100644 --- a/src/makefile.mac +++ b/src/makefile.mac @@ -90,7 +90,7 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ g_editor_extras.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ - s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c \ + s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c s_sync.c \ s_loader.c s_path.c s_entry.c s_audio.c s_midi.c s_net.c s_utf8.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ diff --git a/src/makefile.mingw b/src/makefile.mingw index b42004ef81..4de3ca4e4f 100644 --- a/src/makefile.mingw +++ b/src/makefile.mingw @@ -124,7 +124,7 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ g_editor_extras.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ - s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c \ + s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c s_sync.c \ s_loader.c s_path.c s_entry.c s_audio.c s_midi.c s_net.c s_utf8.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ diff --git a/src/makefile.msvc b/src/makefile.msvc index cd76e1ac3c..9cf819b30e 100644 --- a/src/makefile.msvc +++ b/src/makefile.msvc @@ -87,7 +87,7 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ g_editor_extras.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ - s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c \ + s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c s_sync.c \ s_loader.c s_path.c s_entry.c s_audio.c s_midi.c s_net.c s_utf8.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ diff --git a/src/s_sync.c b/src/s_sync.c new file mode 100644 index 0000000000..8d3ba5ba45 --- /dev/null +++ b/src/s_sync.c @@ -0,0 +1,216 @@ +/* Copyright (c) 2021 Christof Ressi. + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ + +/* thread synchronisation tools. */ + +/* currently, this file is only needed for PD_DSPTHREADS */ +#if PD_DSPTHREADS + +#include "s_sync.h" + +#ifdef _WIN32 +# include +#endif + +/* ----------------------- t_lockfree_stack ---------------------- */ + +void lockfree_stack_init(t_lockfree_stack *x) +{ + CHECK_ALIGNMENT(x->x_head); + x->x_head = NULL; +} + +void lockfree_stack_push(t_lockfree_stack *x, void *y) +{ + t_lfs_node *node = (t_lfs_node *)y; +#ifdef MSVC_INTERLOCKED + do + { + node->x_next = x->x_head; + } + while (_InterlockedCompareExchangePointer(&x->x_head, node, node->x_next) != node->x_next); +#else + node->x_next = atomic_load_explicit(&x->x_head, memory_order_relaxed); + while (!atomic_compare_exchange_weak_explicit(&x->x_head, &node->x_next, node, + memory_order_release, memory_order_relaxed)) ; +#endif +} + +void * lockfree_stack_pop(t_lockfree_stack *x) +{ +#ifdef MSVC_INTERLOCKED + t_lfs_node *head; + do + { + head = x->x_head; + } + while (head && _InterlockedCompareExchangePointer(&x->x_head, head->x_next, head) != head); +#else + t_lfs_node *head = atomic_load_explicit(&x->x_head, memory_order_relaxed); + while (head && !atomic_compare_exchange_weak_explicit(&x->x_head, &head, + head->x_next, memory_order_acquire, memory_order_relaxed)) ; +#endif + return head; +} + +void * lockfree_stack_release(t_lockfree_stack *x) +{ +#ifdef MSVC_INTERLOCKED + return (void *)_InterlockedExchangePointer(&x->x_head, NULL); +#else + return (void *)atomic_exchange(&x->x_head, NULL); +#endif +} + +/* -------------------- t_native_semaphore -------------------- */ + +int native_semaphore_init(t_native_semaphore *x) +{ +#if defined(_WIN32) + return (x->sem = CreateSemaphoreA(0, 0, INT_MAX, 0)) ? 0 : -1; +#elif defined(__APPLE__) + return (semaphore_create(mach_task_self(), &x->sem, SYNC_POLICY_FIFO, 0) + == KERN_SUCCESS) ? 0 : -1; +#else /* posix */ + return sem_init(&x->sem, 0, 0); +#endif +} + +int native_semaphore_destroy(t_native_semaphore *x) +{ +#if defined(_WIN32) + return CloseHandle(x->sem) ? 0 : -1; +#elif defined(__APPLE__) + return (semaphore_destroy(mach_task_self(), x->sem) == KERN_SUCCESS) ? 0 : -1; +#else /* posix */ + return sem_destroy(&x->sem); +#endif +} + +int native_semaphore_post(t_native_semaphore *x) +{ +#if defined(_WIN32) + return ReleaseSemaphore(x->sem, 1, 0) ? 0 : -1; +#elif defined(__APPLE__) + return (semaphore_signal(x->sem) == KERN_SUCCESS) ? 0 : 1; +#else /* posix */ + return sem_post(&x->sem); +#endif +} + +int native_semaphore_postn(t_native_semaphore *x, int count) +{ +#if defined(_WIN32) + return ReleaseSemaphore(x->sem, count, 0) ? 0 : -1; +#else + for (int i = 0; i < count; ++i) + { + if (native_semaphore_post(x) < 0) + return -1; + } + return 0; +#endif +} + +int native_semaphore_wait(t_native_semaphore *x) +{ +#if defined(_WIN32) + return (WaitForSingleObject(x->sem, INFINITE) != WAIT_FAILED) ? 0 : -1; +#elif defined(__APPLE__) + return (semaphore_wait(x->sem) == KERN_SUCCESS) ? 0 : -1; +#else /* posix */ + for (;;) + { + int ret = sem_wait(&x->sem); + if (ret == 0) + return 0; + else if (errno == EINTR) + continue; + else + return -1; + } +#endif +} + +/* t_fast_semaphore */ + +int fast_semaphore_init(t_fast_semaphore *x) +{ + CHECK_ALIGNMENT(x->count); + x->count = 0; + return native_semaphore_init(&x->sem); +} + +int fast_semaphore_destroy(t_fast_semaphore *x){ + return native_semaphore_destroy(&x->sem); +} + +int fast_semaphore_post(t_fast_semaphore *x) +{ +#ifdef MSVC_INTERLOCKED + int old = _InterlockedIncrement(&x->count) - 1; /* returns new value! */ +#else + int old = atomic_fetch_add_explicit(&x->count, 1, memory_order_release); +#endif + if (old < 0) + return native_semaphore_post(&x->sem); + else + return 0; +} + +int fast_semaphore_postn(t_fast_semaphore *x, int count) +{ +#ifdef MSVC_INTERLOCKED + int old = _InterlockedExchangeAdd(&x->count, count); /* returns old value */ +#else + int old = atomic_fetch_add_explicit(&x->count, count, memory_order_release); +#endif + if (old < 0) + { + int release = -old < count ? -old : count; + return native_semaphore_postn(&x->sem, release); + } + else + return 0; +} + +int fast_semaphore_wait(t_fast_semaphore *x) +{ +#ifdef MSVC_INTERLOCKED + int old = _InterlockedDecrement(&x->count) + 1; /* returns new value! */ +#else + int old = atomic_fetch_sub_explicit(&x->count, 1, memory_order_acquire); +#endif + if (old <= 0) + return native_semaphore_wait(&x->sem); + else + return 0; +} + +/* returns 1 on success, 0 on failure */ +int fast_semaphore_trywait(t_fast_semaphore *x) { +#ifdef MSVC_INTERLOCKED + int value = x->count; +#else + int value = atomic_load_explicit(&x->count, memory_order_relaxed); +#endif + /* NOTE: we need a loop because another thread might decrement the count + * concurrently, which does not necessarily mean that we have failed! */ + while (value > 0) + { + #ifdef MSVC_INTERLOCKED + if (_InterlockedCompareExchange(&x->count, value - 1, value) == value) + return 1; + /* CAS failed -> retry and update */ + value = x->count; + #else + if (atomic_compare_exchange_weak_explicit(&x->count, &value, value - 1, + memory_order_acquire, memory_order_relaxed)) return 1; + /* CAS failed -> retry; 'value' has been updated */ + #endif + } + return 0; +} + +#endif /* PD_DSPTHREADS */ diff --git a/src/s_sync.h b/src/s_sync.h new file mode 100644 index 0000000000..111655620b --- /dev/null +++ b/src/s_sync.h @@ -0,0 +1,95 @@ +/* Copyright (c) 2021 Christof Ressi. + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ + +/* thread synchronisation tools */ + +#ifndef S_SYNC_H +#define S_SYNC_H + +/* for atomics */ +#include "s_spinlock.h" + +#ifdef _WIN32 +/* use Win32 Semaphores */ +#elif defined(__APPLE__) +/* macOS doesn't support unnamed Posix semaphores, + * so we use Mach semaphores instead. */ +# include +#elif defined(__linux__) || defined(__FreeBSD__) \ + || defined(__NetBSD__) || defined(__OpenBSD__) +/* Linux or BSD: use Posix semaphores */ +# include +# include +#else +# error "Platform not supported!" +#endif + +/* -------------------- t_lockfree_stack ---------------------- */ + +/* nodes must have t_lfs_node as its first member */ + +typedef struct _lfs_node +{ + struct _lfs_node *x_next; +} t_lfs_node; + +#define lfs_node_init(x) ((t_lfs_node *)(x))->x_next = 0 +#define lfs_node_next(x) ((void *)((t_lfs_node *)(x))->x_next) + +typedef struct _lockfree_stack +{ +#ifdef MSVC_INTERLOCKED + t_lfs_node *x_head; +#else + t_lfs_node * _Atomic x_head; +#endif +} t_lockfree_stack; + +void lockfree_stack_init(t_lockfree_stack *x); +void lockfree_stack_push(t_lockfree_stack *x, void *node); +void * lockfree_stack_pop(t_lockfree_stack *x); +void * lockfree_stack_release(t_lockfree_stack *x); + +/* ------------------- t_native_semaphore -------------------- */ + +typedef struct _native_semaphore +{ +#if defined(_WIN32) + void *sem; +#elif defined(__APPLE__) + semaphore_t sem; +#else /* posix */ + sem_t sem; +#endif +} t_native_semaphore; + +int native_semaphore_init(t_native_semaphore *x); +int native_semaphore_destroy(t_native_semaphore *x); +int native_semaphore_post(t_native_semaphore *x); +int native_semaphore_postn(t_native_semaphore *x, int count); +int native_semaphore_wait(t_native_semaphore *x); + +/* ------------------- t_fast_semaphore ------------------ */ + +/* thanks to https://preshing.com/20150316/semaphores-are-surprisingly-versatile */ + +typedef struct _fast_semaphore +{ + t_native_semaphore sem; +#ifdef MSVC_INTERLOCKED + long count; +#else + atomic_int count; +#endif +} t_fast_semaphore; + +int fast_semaphore_init(t_fast_semaphore *x); +int fast_semaphore_destroy(t_fast_semaphore *x); +int fast_semaphore_post(t_fast_semaphore *x); +int fast_semaphore_postn(t_fast_semaphore *x, int count); +int fast_semaphore_wait(t_fast_semaphore *x); +/* returns 1 on success, 0 on failure */ +int fast_semaphore_trywait(t_fast_semaphore *x); + +#endif /* S_SYNC_H */ From 9f4db0c91bb778efc76e8cf8214d490684176f11 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Mon, 27 Dec 2021 01:25:57 +0100 Subject: [PATCH 03/32] add DSP thread pool API * public API to manage the DSP thread pool * private API to create DSP task queues and DSP tasks * PD_DSPTHREADS define to enable/disable the thread pool at compile time --- src/Makefile.am | 1 + src/d_threadpool.c | 707 +++++++++++++++++++++++++++++++++++++++++++++ src/makefile.gnu | 2 +- src/makefile.mac | 2 +- src/makefile.mingw | 2 +- src/makefile.msvc | 2 +- src/s_stuff.h | 31 ++ 7 files changed, 743 insertions(+), 4 deletions(-) create mode 100644 src/d_threadpool.c diff --git a/src/Makefile.am b/src/Makefile.am index 4f1d4328b5..e1d772e2bf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -103,6 +103,7 @@ pd_SOURCES_core = \ d_ctl.c \ d_dac.c \ d_delay.c \ + d_threadpool.c \ d_fft.c \ d_filter.c \ d_global.c \ diff --git a/src/d_threadpool.c b/src/d_threadpool.c new file mode 100644 index 0000000000..cb05a49bae --- /dev/null +++ b/src/d_threadpool.c @@ -0,0 +1,707 @@ +/* Copyright (c) 2021 Christof Ressi. + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ + +#include "m_pd.h" +#include "s_stuff.h" +#include "m_imp.h" + +#include +#include +#include + +#if PD_DSPTHREADS + +#include "s_sync.h" + +#include + +#if defined(_WIN32) +# include +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +# include +# include +#else /* Linux */ +# include +# include +#endif + +/* define for debugging DSP tasks and task queues */ +// #define DEBUG_DSPTHREADS + +/* ----------------------- thread utilities -------------------------- */ + + /* 0: failure */ +static int thread_hardware_concurrency(void) +{ +#if defined(_WIN32) + SYSTEM_INFO info; + memset(&info, 0, sizeof(info)); + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + int count; + size_t size = sizeof(count); + if (sysctlbyname("hw.ncpu", &count, &size, NULL, 0) == 0) + return count; + else + { + fprintf(stderr, "sysctlbyname() failed (%d)\n", errno); + return 0; + } +#elif defined(__SC_NPROCESSORS_ONLN) + int count = sysconf(_SC_NPROCESSORS_ONLN); + if (count > 0) + return count; + else + { + fprintf(stderr, "sysconf() failed (%d)\n", errno); + return 0; + } +#elif defined(__linux__) + return get_nprocs(); +#else + #warning "thread_hardware_concurrency() not implemented" + return 0; +#endif +} + + /* 0: failure */ +static int thread_physical_concurrency(void) +{ +#if defined(_WIN32) + typedef BOOL (WINAPI *LPFN_GLPI)( + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + + LPFN_GLPI glpi; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info; + DWORD err, size = 0; + int i, n, count = 0; + + /* available since Windows XP SP3 */ + glpi = (LPFN_GLPI) GetProcAddress( + GetModuleHandleA("kernel32"), "GetLogicalProcessorInformation"); + if (!glpi) + { + fprintf(stderr, "GetLogicalProcessorInformation() not supported;\n" + "fall back to thread_hardware_concurrency\n"); + return thread_hardware_concurrency(); + } + /* call with size 0 to retrieve actual size; + * ERROR_INSUFFICIENT_BUFFER is expected. */ + glpi(NULL, &size); + if ((err = GetLastError()) != ERROR_INSUFFICIENT_BUFFER) + goto fail; + info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(size); + if (glpi(info, &size) == FALSE) + { + err = GetLastError(); + free(info); + goto fail; + } + n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + for (i = 0; i < n; ++i) + { + if (info[i].Relationship == RelationProcessorCore) + count++; + } + free(info); + return count; +fail: + fprintf(stderr, "GetLogicalProcessorInformation() failed (%d)\n", err); + return 0; +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + int count; + size_t size = sizeof(count); + if (sysctlbyname("hw.physicalcpu", &count, &size, NULL, 0) == 0) + return count; + else + { + fprintf(stderr, "sysctlbyname() failed (%d)\n", errno); + return 0; + } +#elif defined(__linux__) + /* The file /proc/cpusinfo contains all logical CPUs where + * each entry has a property "physical id" and "core id". + * We filter entries where those properties are the same + * (= SMT), so we end up with the number of physical CPUs. */ + #define MAXNUMCPUS 1024 + unsigned int cpus[MAXNUMCPUS]; + /* upper 2 bytes: physical ID, lower 2 bytes: core ID */ + unsigned int current = 0; + FILE *fp; + char *line = 0; + size_t len; + int num = 0, count = 0; + fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + { + fprintf(stderr, "could not open /proc/cpuinfo\n"); + return 0; + } + while ((getline(&line, &len, fp) >= 0) && (count < MAXNUMCPUS)) + { + const char *colon; + int i, value; + if (len == 0) + continue; + /* "physical id" comes first */ + if (strstr(line, "physical id")) + { + if (!(colon = strchr(line + strlen("physical id"), ':')) || + (sscanf(colon + 1, "%d", &value) < 1)) + { + count = 0; + break; + } + current = ((unsigned int)value) << 16; + } + /* followed by "core id" */ + else if (strstr(line, "core id")) + { + if (!(colon = strchr(line + strlen("core id"), ':')) || + (sscanf(colon + 1, "%d", &value) < 1)) + { + count = 0; + break; + } + current |= (unsigned int)value; + /* now check if this entry already exists */ + for (i = 0; i < count; ++i) + { + if (cpus[i] == current) + goto skip; + } + cpus[count++] = current; + skip: + #if 0 + fprintf(stderr, "CPU %d: physical id: %d, core id: %d\n", + num, current >> 16, current & 0xffff); + #endif + num++; + } + } + if (line) + free(line); + fclose(fp); + if (count == 0) + fprintf(stderr, "/proc/cpuinfo: unexpected format\n"); + return count; +#else + #warning "thread_physical_concurrency() not implemented" + /* fall back to hardware concurrency */ + return thread_hardware_concurrency(); +#endif +} + + /* 1: success, 0: failure */ +static int thread_set_realtime(void) +{ +#if defined(_WIN32) + /* Force high thread priority in case we're not a high priority process. + * This might be necessary for libpd when using the internal thread pool. */ + int pc = GetPriorityClass(GetCurrentProcess()); + if (!pc) + { + fprintf(stderr, "GetPriorityClass() failed (%d)\n", GetLastError()); + return 0; + } + if (pc < HIGH_PRIORITY_CLASS) + { + if (!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL)) + { + fprintf(stderr, "SetThreadPriority() failed (%d)\n", GetLastError()); + return 0; + } + } + return 1; +#elif defined(__APPLE__) + struct sched_param param; + int policy = SCHED_RR; + int err; + param.sched_priority = 80; /* adjust 0 : 100 */ + + err = pthread_setschedparam(pthread_self(), policy, ¶m); + if (err) + { + fprintf(stderr, "pthread_setschedparam() failed (%d)\n", err); + return 0; + } + return 1; +#else /* Linux + BSD, see sys_set_priority() */ + struct sched_param par; + int p; +#ifdef USEAPI_JACK + p = sched_get_priority_min(SCHED_FIFO) + 5; +#else + p = sched_get_priority_max(SCHED_FIFO) - 7; +#endif + par.sched_priority = p; + if (sched_setscheduler(0, SCHED_FIFO, &par) < 0) + { + fprintf(stderr, "sched_setscheduler() failed (%d)\n", errno); + return 0; + } + return 1; +#endif +} + +/* -------------------------- helper functions -------------------------- */ + +static int sys_maxnumdspthreads(void) +{ + /* only obtain once per thread (value is fixed) */ + static PERTHREAD int count = -1; + if (count < 0) + { + count = thread_hardware_concurrency(); + if (count <= 0) + { + fprintf(stderr, "thread_hardware_concurrency() failed; default to 1\n"); + count = 1; + } + } + return count; +} + + /* also used in sys_get_audio_settings() */ +int sys_defnumdspthreads(void) +{ + /* only obtain once per thread (value is fixed) */ + static PERTHREAD int count = -1; + if (count < 0) + { + #if 1 + /* use number of physical cores because SMT with + * all available CPUs can lead to worse performance */ + count = thread_physical_concurrency(); + if (count <= 0) + { + fprintf(stderr, "thread_physical_concurrency() failed, " + "use all available CPUs.\n"); + count = sys_maxnumdspthreads(); + } + #else + /* use all available CPUs. */ + count = sys_maxnumdspthreads(); + #endif + } + return count; +} + +static void dspthread_setrealtime(int index) +{ + if (thread_set_realtime()) + { + if (sys_verbose) + fprintf(stderr, "DSP thread %d: set realtime priority\n", index); + } + else + fprintf(stderr, "DSP thread %d: couldn't set realtime priority\n", index); +} + +/* -------------------------- t_dspthreadpool --------------------------- */ + +typedef struct _dspthreadpool +{ +#ifdef MSVC_INTERLOCKED + long tp_running; +#else + atomic_int tp_running; +#endif + int tp_n; + pthread_t *tp_threads; + t_lockfree_stack tp_tasks; + t_fast_semaphore tp_sem; +} t_dspthreadpool; + +static t_dspthreadpool *d_threadpool = NULL; + +static void dspthread_dorun(int index); + +static void * thread_function(void *x) +{ + int index = (int)(intptr_t)x; + if (sys_hipriority != 0) /* -1 or 1 */ + dspthread_setrealtime(index); + if (!d_threadpool) + { + bug("DSP thread pool not initialized!"); + return 0; + } + if (index == 0) + { + bug("thread index 0 reserved for main audio thread!"); + return 0; + } + else if (index < 0 || index > d_threadpool->tp_n) + { + bug("thread index %d out of range!", index); + return 0; + } + + dspthread_dorun(index); + + return NULL; +} + +int sys_havedspthreadpool(void) +{ + return 1; +} + + /* called with global lock set! */ +static void dspthreadpool_init(void) +{ + if (!d_threadpool) + { + d_threadpool = (t_dspthreadpool *)getbytes(sizeof(t_dspthreadpool)); + d_threadpool->tp_running = 0; + d_threadpool->tp_n = 0; + d_threadpool->tp_threads = 0; + lockfree_stack_init(&d_threadpool->tp_tasks); + fast_semaphore_init(&d_threadpool->tp_sem); + } +} + +void dspthreadpool_stop(int external) +{ + int n = d_threadpool->tp_n; + if (!n) /* no threads or already stopped */ + return; + if (sys_verbose) + fprintf(stderr, "stop DSP thread pool\n"); +#ifdef MSVC_INTERLOCKED + _InterlockedExchange(&d_threadpool->tp_running, 0); +#else + atomic_store(&d_threadpool->tp_running, 0); +#endif + /* wake up helper threads */ + fast_semaphore_postn(&d_threadpool->tp_sem, n); + if (!external) + { + /* join helper threads */ + for (int i = 1; i < n; ++i) + pthread_join(d_threadpool->tp_threads[i], NULL); + } + if (d_threadpool->tp_threads) + freebytes(d_threadpool->tp_threads, sizeof(pthread_t) * n); + d_threadpool->tp_threads = 0; + d_threadpool->tp_n = 0; +} + +int sys_dspthreadpool_start(int *numthreads, int external) +{ + int n, maxnumthreads; + pd_globallock(); /* global lock begin */ + dspthreadpool_init(); + dspthreadpool_stop(external); + /* validate DSP thread count */ + if (!numthreads || *numthreads < 1) + n = sys_defnumdspthreads(); + else + n = *numthreads; + maxnumthreads = sys_maxnumdspthreads(); + if (n > maxnumthreads) + n = maxnumthreads; + if (numthreads) + *numthreads = n; + + if (sys_verbose) + fprintf(stderr, "start DSP thread pool (using %d of %d CPUs)\n", + n, maxnumthreads); + + n--; /* we already have 1 audio thread */ + + d_threadpool->tp_running = 1; + if (external) /* DSP threads are created and run by the user */ + { + d_threadpool->tp_threads = NULL; + d_threadpool->tp_n = n; + } + else /* use internal DSP threads */ + { + if (n > 0) + { + d_threadpool->tp_threads = (pthread_t *)getbytes(sizeof(pthread_t) * n); + d_threadpool->tp_n = n; + /* spawn new threads; thread index starts at 1 */ + for (int i = 0; i < n; ++i) + pthread_create(&d_threadpool->tp_threads[i], + NULL, thread_function, (void *)(intptr_t)(i + 1)); + } + else /* single threaded */ + { + d_threadpool->tp_threads = 0; + d_threadpool->tp_n = 0; + } + } + pd_globalunlock(); /* global lock end */ + return 1; +} + +int sys_dspthreadpool_stop(int external) +{ + pd_globallock(); + dspthreadpool_init(); + dspthreadpool_stop(external); + pd_globalunlock(); + return 1; +} + +static void dspthreadpool_push(t_dsptask *task) +{ + lockfree_stack_push(&d_threadpool->tp_tasks, task); +} + +static t_dsptask * dspthreadpool_pop(void) +{ + return lockfree_stack_pop(&d_threadpool->tp_tasks); +} + +static void dsptask_run(t_dsptask *x, int index); + +static void dspthread_dorun(int index) +{ +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "DSP thread %d: start\n", index); +#endif +#ifdef MSVC_INTERLOCKED + while (d_threadpool->tp_running) +#else + while (atomic_load_explicit(&d_threadpool->tp_running, memory_order_relaxed)) +#endif + { + /* run as many tasks as possible */ + t_dsptask *t; + while ((t = dspthreadpool_pop())) + dsptask_run(t, index); + /* wait for more tasks (or quit) */ + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "DSP thread %d: wait\n", index); + #endif + fast_semaphore_wait(&d_threadpool->tp_sem); + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "DSP thread %d: wake up\n", index); + #endif + } +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "DSP thread %d: finish\n", index); +#endif +} + +int sys_dspthread_run(int index) +{ + if (!d_threadpool) + { + fprintf(stderr, "sys_dspthread_run: DSP thread pool not initialized!\n"); + return 0; + } + if (index == 0) + { + fprintf(stderr, "sys_dspthread_run: thread index 0 reserved for main audio thread!\n"); + return 0; + } + else if (index < 0 || index > d_threadpool->tp_n) + { + fprintf(stderr, "sys_dspthread_run: thread index %d out of range!\n", index); + return 0; + } + + dspthread_dorun(index); + + return 1; +} + +/* -------------------------- t_dsptaskqueue --------------------------- */ + +struct _dsptaskqueue +{ + int dq_numtasks; /* number of tasks, also doubles as reference count */ +#ifdef MSVC_INTERLOCKED + long dq_remaining; +#else + atomic_int dq_remaining; +#endif + t_fast_semaphore dq_sem; +}; + +t_dsptaskqueue * dsptaskqueue_new(void) +{ + t_dsptaskqueue *x = (t_dsptaskqueue *)getbytes(sizeof(t_dsptaskqueue)); + x->dq_numtasks = 0; + x->dq_remaining = 0; + fast_semaphore_init(&x->dq_sem); + return x; +} + + /* this is also called by dsptask_free(). we only free the queue + * when the reference count drops *below* zero. */ +void dsptaskqueue_release(t_dsptaskqueue *x) +{ + int oldcount = x->dq_numtasks--; + if (oldcount < 0) + bug("dsptaskqueue_free"); + else if (oldcount == 0) + { + fast_semaphore_destroy(&x->dq_sem); + freebytes(x, sizeof(t_dsptaskqueue)); + } +} + +void dsptaskqueue_reset(t_dsptaskqueue *x) +{ + if (x->dq_numtasks > 0) + { + x->dq_remaining = x->dq_numtasks; + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: reset with %d tasks\n", + x, x->dq_numtasks); + #endif + } +} + +void dsptaskqueue_join(t_dsptaskqueue *x) +{ + if (!d_threadpool || !d_threadpool->tp_n) + /* single-threaded -> nothing to do, see dsptask_sched() */ + return; + if (!x->dq_numtasks) /* no tasks */ + return; + /* multi-threaded */ +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: begin join\n", x); +#endif + /* We don't want to put the thread to sleep, so we first try to + * participate in DSP thread pool. + * NB: if PDINSTANCE defined, we might actually run tasks that + * belong to other Pd instances! LATER decide if we should push + * such tasks back to the queue? */ + while (!fast_semaphore_trywait(&x->dq_sem)) + { + /* Pop and run a *single* task, then try again. + * Unlike in dspthread_dorun(), we do not pop tasks in a loop + * because we might end up running tasks that don't belong to + * this queue (and have a much later deadline). */ + t_dsptask *t = dspthreadpool_pop(); + if (t) + dsptask_run(t, 0); + else + { + /* nothing to do, wait */ + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: wait\n", x); + #endif + fast_semaphore_wait(&x->dq_sem); + break; /* ! */ + } + } +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: end join\n", x); +#endif +} + +/* ---------------------------- t_dsptask ----------------------------- */ + +struct _dsptask +{ + t_lfs_node dt_node; +#ifdef PDINSTANCE + t_pdinstance *dt_pdinstance; +#endif + t_dsptaskqueue *dt_queue; + t_dsptaskfn dt_fn; + void *dt_data; +}; + +t_dsptask * dsptask_new(t_dsptaskqueue *queue, t_dsptaskfn fn, void *data) +{ + t_dsptask *x = (t_dsptask *)getbytes(sizeof(t_dsptask)); + lfs_node_init(x); +#ifdef PDINSTANCE + x->dt_pdinstance = pd_this; +#endif + x->dt_queue = queue; + x->dt_fn = fn; + x->dt_data = data; + queue->dq_numtasks++; /* increment refcount */ + return x; +} + +void dsptask_free(t_dsptask *x) +{ + dsptaskqueue_release(x->dt_queue); /* release */ + freebytes(x, sizeof(t_dsptask)); +} + +void dsptask_sched(t_dsptask *x) +{ + if (d_threadpool && d_threadpool->tp_n > 0) + { + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: sched task %p\n", x->dt_queue, x); + #endif + dspthreadpool_push(x); + fast_semaphore_post(&d_threadpool->tp_sem); + } + else + { + /* execute immediately, see dsptaskqueue_join(). + * NOTE: don't use dsptask_run() here! */ + (x->dt_fn)(x->dt_data); + } +} + +static void dsptask_run(t_dsptask *x, int index) +{ + t_dsptaskqueue *queue = x->dt_queue; + int remaining; +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: run task %p on thread %d\n", + queue, x, index); +#endif +#ifdef PDINSTANCE + pd_setinstance(x->dt_pdinstance); +#endif + (x->dt_fn)(x->dt_data); +#ifdef MSVC_INTERLOCKED + remaining = _InterlockedDecrement(&queue->dq_remaining); /* returns new value! */ +#else + remaining = atomic_fetch_sub(&queue->dq_remaining, 1) - 1; +#endif +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: %d remaining tasks\n", queue, remaining); +#endif + if (!remaining) + { + /* last task, notify waiting main audio thread; + * see dsptaskqueue_join() */ + fast_semaphore_post(&queue->dq_sem); + } +} + +#else /* PD_DSPTHREADS */ + +/* dummy implementations of public API functions */ + +int sys_havedspthreadpool(void) +{ + return 0; +} + +int sys_dspthreadpool_start(int *numthreads, int external) +{ + return 0; +} + +int sys_dspthreadpool_stop(int external) +{ + return 0; +} + +int sys_dspthread_run(int index) +{ + return 0; +} + +#endif /* PD_DSPTHREADS */ diff --git a/src/makefile.gnu b/src/makefile.gnu index 89c32c6a21..2bc04a3388 100644 --- a/src/makefile.gnu +++ b/src/makefile.gnu @@ -120,7 +120,7 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ - d_soundfile_next.c d_soundfile_wave.c \ + d_soundfile_next.c d_soundfile_wave.c d_threadpool.c \ x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \ x_time.c x_acoustics.c x_net.c x_text.c x_gui.c x_list.c x_array.c \ x_file.c x_scalar.c x_vexp.c x_vexp_if.c x_vexp_fun.c \ diff --git a/src/makefile.mac b/src/makefile.mac index d34d27dfdc..2b0ed52ada 100644 --- a/src/makefile.mac +++ b/src/makefile.mac @@ -95,7 +95,7 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ - d_soundfile_next.c d_soundfile_wave.c \ + d_soundfile_next.c d_soundfile_wave.c d_threadpool.c \ x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \ x_time.c x_acoustics.c x_net.c x_text.c x_gui.c x_list.c x_array.c \ x_file.c x_scalar.c x_vexp.c x_vexp_if.c x_vexp_fun.c \ diff --git a/src/makefile.mingw b/src/makefile.mingw index 4de3ca4e4f..1da6b6b05f 100644 --- a/src/makefile.mingw +++ b/src/makefile.mingw @@ -129,7 +129,7 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ - d_soundfile_next.c d_soundfile_wave.c \ + d_soundfile_next.c d_soundfile_wave.c d_threadpool.c \ x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \ x_time.c x_acoustics.c x_net.c x_text.c x_gui.c x_list.c x_array.c \ x_file.c x_scalar.c x_vexp.c x_vexp_if.c x_vexp_fun.c diff --git a/src/makefile.msvc b/src/makefile.msvc index 9cf819b30e..367d546184 100644 --- a/src/makefile.msvc +++ b/src/makefile.msvc @@ -92,7 +92,7 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ - d_soundfile_next.c d_soundfile_wave.c \ + d_soundfile_next.c d_soundfile_wave.c d_threadpool.c \ x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \ x_time.c x_acoustics.c x_net.c x_text.c x_gui.c x_list.c x_array.c \ x_file.c x_scalar.c x_vexp.c x_vexp_if.c x_vexp_fun.c \ diff --git a/src/s_stuff.h b/src/s_stuff.h index 580b4ac343..8295e898eb 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -392,6 +392,37 @@ EXTERN void inmidi_polyaftertouch(int portno, /* } jsarlo */ EXTERN int sys_zoom_open; +/* DSP task queue, for documentation see d_threadpool.c */ +#ifndef PD_DSPTHREADS +#define PD_DSPTHREADS 0 +#endif + +EXTERN_STRUCT _dsptaskqueue; +#define t_dsptaskqueue struct _dsptaskqueue + +#if PD_DSPTHREADS + +t_dsptaskqueue * dsptaskqueue_new(void); +void dsptaskqueue_release(t_dsptaskqueue *x); +void dsptaskqueue_reset(t_dsptaskqueue *x); +void dsptaskqueue_join(t_dsptaskqueue *x); + +EXTERN_STRUCT _dsptask; +#define t_dsptask struct _dsptask + +typedef void (*t_dsptaskfn) (void *data); + +t_dsptask * dsptask_new(t_dsptaskqueue *queue, t_dsptaskfn fn, void *data); +void dsptask_free(t_dsptask *x); +void dsptask_sched(t_dsptask *x); + +#endif /* PD_DSPTHREADS */ + +EXTERN int sys_havedspthreadpool(void); +EXTERN int sys_dspthreadpool_start(int *numthreads, int external); +EXTERN int sys_dspthreadpool_stop(int external); +EXTERN int sys_dspthread_run(int index); + struct _instancestuff { t_namelist *st_externlist; From 6af56fc9cb5f134f4f7cb6b62b65169d845afdba Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Mon, 27 Dec 2021 01:36:30 +0100 Subject: [PATCH 04/32] implement internal thread pool * add global DSP task queue * reset and join DSP global task queue in dsp_tick() * add "-threads" command line option to set number of DSP threads --- src/d_ugen.c | 16 ++++++++++++++++ src/m_sched.c | 5 +++++ src/s_main.c | 12 ++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/d_ugen.c b/src/d_ugen.c index 647183fa6e..c319a1efa6 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -13,6 +13,7 @@ #include "m_pd.h" #include "m_imp.h" +#include "s_stuff.h" #include extern t_class *vinlet_class, *voutlet_class, *canvas_class, *text_class; @@ -43,6 +44,9 @@ struct _instanceugen int u_phase; int u_loud; struct _dspcontext *u_context; +#if PD_DSPTHREADS + t_dsptaskqueue *u_dspqueue; /* toplevel DSP thread queue */ +#endif }; #define THIS (pd_this->pd_ugen) @@ -53,10 +57,16 @@ void d_ugen_newpdinstance(void) THIS->u_dspchain = 0; THIS->u_dspchainsize = 0; THIS->u_signals = 0; +#if PD_DSPTHREADS + THIS->u_dspqueue = dsptaskqueue_new(); +#endif } void d_ugen_freepdinstance(void) { +#if PD_DSPTHREADS + dsptaskqueue_release(THIS->u_dspqueue); +#endif freebytes(THIS, sizeof(*THIS)); } @@ -364,8 +374,14 @@ void dsp_tick(void) if (THIS->u_dspchain) { t_int *ip; + #if PD_DSPTHREADS + dsptaskqueue_reset(THIS->u_dspqueue); + #endif for (ip = THIS->u_dspchain; ip; ) ip = (*(t_perfroutine)(*ip))(ip); THIS->u_phase++; + #if PD_DSPTHREADS + dsptaskqueue_join(THIS->u_dspqueue); + #endif } } diff --git a/src/m_sched.c b/src/m_sched.c index 5d3dc234e9..df29e0d030 100644 --- a/src/m_sched.c +++ b/src/m_sched.c @@ -27,6 +27,7 @@ extern int sys_nosleep; int sys_usecsincelastsleep(void); int sys_sleepgrain; +extern int sys_dspthreads; typedef void (*t_clockmethod)(void *client); @@ -332,6 +333,7 @@ static void m_pollingscheduler(void) { sys_lock(); sys_initmidiqueue(); + sys_dspthreadpool_start(&sys_dspthreads, 0); while (!sys_quit) /* outer loop runs once per tick */ { sys_addhist(0); @@ -384,6 +386,7 @@ static void m_pollingscheduler(void) break; } } + sys_dspthreadpool_stop(0); sys_unlock(); } @@ -403,6 +406,7 @@ void sched_audio_callbackfn(void) static void m_callbackscheduler(void) { sys_initmidiqueue(); + sys_dspthreadpool_start(&sys_dspthreads, 0); while (!sys_quit) { double timewas = pd_this->pd_systime; @@ -421,6 +425,7 @@ static void m_callbackscheduler(void) if (sys_idlehook) sys_idlehook(); } + sys_dspthreadpool_stop(0); } int m_mainloop(void) diff --git a/src/s_main.c b/src/s_main.c index 3fef766200..dce12fbf8f 100644 --- a/src/s_main.c +++ b/src/s_main.c @@ -58,6 +58,7 @@ int sys_guisetportnumber; /* if started from the GUI, this is the port # */ int sys_nosleep = 0; /* skip all "sleep" calls and spin instead */ int sys_defeatrt; /* flag to cancel real-time */ t_symbol *sys_flags; /* more command-line flags */ +int sys_dspthreads = 1; /* number of DSP threads */ const char *sys_guicmd; t_symbol *sys_libdir; @@ -415,6 +416,10 @@ static char *(usagemessage[]) = { "-noaudio -- suppress audio input and output (-nosound is synonym) \n", "-callback -- use callbacks if possible\n", "-nocallback -- use polling-mode (true by default)\n", +#if PD_DSPTHREADS +"-threads -- number of audio threads\n" +" 0: use all physical cores (default)\n", +#endif "-listdev -- list audio and MIDI devices\n", #ifdef USEAPI_OSS @@ -1328,6 +1333,13 @@ int sys_argparse(int argc, const char **argv) fprintf(stderr, "Pd compiled without realtime priority-support, ignoring '%s' flag\n", *argv); argc--; argv++; } +#endif +#if PD_DSPTHREADS + else if (!strcmp(*argv, "-threads") && argc > 1) + { + sys_dspthreads = atoi(argv[1]); + argc -= 2; argv += 2; + } #endif else if (!strcmp(*argv, "-sleep")) { From 487f1571e119e05d9a1ef1d1287d98e534ecde6b Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Mon, 27 Dec 2021 01:41:56 +0100 Subject: [PATCH 05/32] make clock scheduling methods thread-safe * add thread local variable for the current DSP thread index * if clocks are set/unset in the main thread (= 0), do as usual * if clocks are set/unset in a DSP helper thread (> 0), cache the desired time point and put the clock on a lock-free stack * at the end of dsp_tick() we take the list of deferred clocks and set/unset them for real. --- src/d_threadpool.c | 4 +++ src/d_ugen.c | 23 +++++++++++++++ src/m_pd.h | 10 +++++-- src/m_sched.c | 70 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 103 insertions(+), 4 deletions(-) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index cb05a49bae..324dd01f68 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -461,11 +461,15 @@ static t_dsptask * dspthreadpool_pop(void) static void dsptask_run(t_dsptask *x, int index); +void dspthread_setindex(int index); + static void dspthread_dorun(int index) { #ifdef DEBUG_DSPTHREADS fprintf(stderr, "DSP thread %d: start\n", index); #endif + dspthread_setindex(index); + #ifdef MSVC_INTERLOCKED while (d_threadpool->tp_running) #else diff --git a/src/d_ugen.c b/src/d_ugen.c index c319a1efa6..2d58faa6e6 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -15,6 +15,9 @@ #include "m_imp.h" #include "s_stuff.h" #include +#if PD_DSPTHREADS +# include "s_sync.h" +#endif extern t_class *vinlet_class, *voutlet_class, *canvas_class, *text_class; @@ -46,6 +49,7 @@ struct _instanceugen struct _dspcontext *u_context; #if PD_DSPTHREADS t_dsptaskqueue *u_dspqueue; /* toplevel DSP thread queue */ + t_lockfree_stack u_clocks; /* only for the main queue */ #endif }; @@ -59,6 +63,7 @@ void d_ugen_newpdinstance(void) THIS->u_signals = 0; #if PD_DSPTHREADS THIS->u_dspqueue = dsptaskqueue_new(); + lockfree_stack_init(&THIS->u_clocks); #endif } @@ -70,6 +75,14 @@ void d_ugen_freepdinstance(void) freebytes(THIS, sizeof(*THIS)); } +#if PD_DSPTHREADS +void clock_defer(t_clock *x) +{ + /* push to main queue */ + lockfree_stack_push(&THIS->u_clocks, x); +} +#endif + t_int *zero_perform(t_int *w) /* zero out a vector */ { t_sample *out = (t_sample *)(w[1]); @@ -369,18 +382,28 @@ void dsp_addv(t_perfroutine f, int n, t_int *vec) THIS->u_dspchainsize = newsize; } +#if PD_DSPTHREADS +void clock_dispatch(t_clock *x); +void dspthread_setindex(int index); +#endif + void dsp_tick(void) { if (THIS->u_dspchain) { t_int *ip; #if PD_DSPTHREADS + t_clock *c; + dspthread_setindex(0); /* just to be sure */ dsptaskqueue_reset(THIS->u_dspqueue); #endif for (ip = THIS->u_dspchain; ip; ) ip = (*(t_perfroutine)(*ip))(ip); THIS->u_phase++; #if PD_DSPTHREADS dsptaskqueue_join(THIS->u_dspqueue); + /* dispatch deferred clocks */ + if ((c = lockfree_stack_release(&THIS->u_clocks))) + clock_dispatch(c); #endif } } diff --git a/src/m_pd.h b/src/m_pd.h index 1cf440edd1..2bc8141ddb 100644 --- a/src/m_pd.h +++ b/src/m_pd.h @@ -924,12 +924,16 @@ EXTERN void pd_setinstance(t_pdinstance *x); EXTERN void pdinstance_free(t_pdinstance *x); #endif /* PDINSTANCE */ -#if defined(PDTHREADS) && defined(PDINSTANCE) #ifdef _MSC_VER -#define PERTHREAD __declspec(thread) +#define THREADLOCAL __declspec(thread) +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) +#define THREADLOCAL _Thread_local #else -#define PERTHREAD __thread +#define THREADLOCAL __thread #endif /* _MSC_VER */ + +#if PDTHREADS && defined(PDINSTANCE) +#define PERTHREAD THREADLOCAL #else #define PERTHREAD #endif diff --git a/src/m_sched.c b/src/m_sched.c index df29e0d030..2a34e14cb1 100644 --- a/src/m_sched.c +++ b/src/m_sched.c @@ -9,6 +9,9 @@ #include "s_stuff.h" #ifdef _WIN32 #include +#endif +#if PD_DSPTHREADS +#include "s_sync.h" #endif /* LATER consider making this variable. It's now the LCM of all sample @@ -31,12 +34,27 @@ extern int sys_dspthreads; typedef void (*t_clockmethod)(void *client); +#if PD_DSPTHREADS +/* do not use PERTHREAD! */ +static THREADLOCAL int dspthreadindex = 0; + +void dspthread_setindex(int index) +{ + dspthreadindex = index; +} + +#endif /* PD_DSPTHREADS */ + struct _clock { +#if PD_DSPTHREADS + t_lfs_node c_node; + double c_wanttime; +#endif double c_settime; /* in TIMEUNITS; <0 if unset */ void *c_owner; t_clockmethod c_fn; - struct _clock *c_next; + struct _clock *c_next; /* for the clock list */ t_float c_unit; /* >0 if in TIMEUNITS; <0 if in samples */ }; @@ -47,6 +65,10 @@ struct _clock t_clock *clock_new(void *owner, t_method fn) { t_clock *x = (t_clock *)getbytes(sizeof *x); +#if PD_DSPTHREADS + lfs_node_init(x); + x->c_wanttime = -1; +#endif x->c_settime = -1; x->c_owner = owner; x->c_fn = (t_clockmethod)fn; @@ -55,8 +77,42 @@ t_clock *clock_new(void *owner, t_method fn) return (x); } +#if PD_DSPTHREADS +void clock_defer(t_clock *x); + +/* dispatch clocks scheduled from DSP helper threads */ +void clock_dispatch(t_clock *x) +{ +#if 1 + if (dspthreadindex != 0) + { + sys_lock(); + bug("clock_dispatch"); + sys_unlock(); + return; + } +#endif + for (; x; x = lfs_node_next(x)) + { + if (x->c_wanttime >= 0) + clock_set(x, x->c_wanttime); + else + clock_unset(x); + } +} +#endif /* PD_DSPTHREADS */ + void clock_unset(t_clock *x) { +#if PD_DSPTHREADS + if (dspthreadindex > 0) + { + /* called from DSP helper thread -> defer */ + x->c_wanttime = -1; + clock_defer(x); + return; + } +#endif if (x->c_settime >= 0) { if (x == pd_this->pd_clock_setlist) @@ -75,6 +131,15 @@ void clock_unset(t_clock *x) void clock_set(t_clock *x, double setticks) { if (setticks < pd_this->pd_systime) setticks = pd_this->pd_systime; +#if PD_DSPTHREADS + if (dspthreadindex > 0) + { + /* called from DSP helper thread -> defer */ + x->c_wanttime = setticks; + clock_defer(x); + return; + } +#endif clock_unset(x); x->c_settime = setticks; if (pd_this->pd_clock_setlist && @@ -242,6 +307,9 @@ void sched_tick(void) { double next_sys_time = pd_this->pd_systime + SYSTIMEPERTICK; int countdown = 5000; +#if PD_DSPTHREADS + dspthreadindex = 0; /* just to be sure */ +#endif while (pd_this->pd_clock_setlist && pd_this->pd_clock_setlist->c_settime < next_sys_time) { From d8e90606f981dc4963ea468d0d61e7c31e89992c Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Tue, 21 Dec 2021 23:02:45 +0100 Subject: [PATCH 06/32] add t_garrayref API * swapping/resizing an array doesn't require a DSP graph update * in control objects, garrayref can be faster because the garray is cached * in perform routines, garrayref allows thread-safe read or write access to garrays --- src/g_array.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++ src/g_scalar.c | 2 +- src/m_pd.h | 40 +++++++++++++- 3 files changed, 186 insertions(+), 2 deletions(-) diff --git a/src/g_array.c b/src/g_array.c index b78c7a6742..61c79dfd94 100644 --- a/src/g_array.c +++ b/src/g_array.c @@ -6,6 +6,10 @@ #include /* for read/write to files */ #include "m_pd.h" #include "g_canvas.h" +#include "s_stuff.h" +#if PD_DSPTHREADS +# include "s_spinlock.h" +#endif #include /* jsarlo { */ @@ -115,6 +119,9 @@ struct _garray unsigned int x_listviewing:1; /* list view window is open */ unsigned int x_hidename:1; /* don't print name above graph */ unsigned int x_edit:1; /* we can edit the array */ +#if PD_DSPTHREADS + t_rwspinlock x_lock; +#endif }; static t_pd *garray_arraytemplatecanvas; /* written at setup w/ global lock */ @@ -176,6 +183,9 @@ static t_garray *graph_scalar(t_glist *gl, t_symbol *s, t_symbol *templatesym, x->x_edit = 1; glist_add(gl, &x->x_gobj); x->x_glist = gl; +#if PD_DSPTHREADS + rwspinlock_init(&x->x_lock); +#endif return (x); } @@ -753,11 +763,147 @@ const t_widgetbehavior garray_widgetbehavior = /* ----------------------- public functions -------------------- */ +/* legacy, use garrayref methods instead */ void garray_usedindsp(t_garray *x) { x->x_usedindsp = 1; } +void garrayref_init(t_garrayref *x) +{ + x->ar_garray = 0; + x->ar_stub = 0; +} + +void gstub_dis(t_gstub *gs); + +void garrayref_unset(t_garrayref *x) +{ + x->ar_garray = 0; + if (x->ar_stub) + { + gstub_dis(x->ar_stub); + x->ar_stub = 0; + } +} + +static int garrayref_findbyname(t_garrayref *x, t_symbol *name, t_object *object) +{ + t_garray *g; + t_array *a; + int npoints; + t_word *vec; + if (!(g = (t_garray *)pd_findbyclass(name, garray_class))) + { + if (object) + pd_error(object, "%s: %s: no such array", + class_getname(object->te_pd), name->s_name); + else + pd_error(0, "%s: no such array", name->s_name); + return 0; + } + if (!garray_getfloatwords(g, &npoints, &vec)) + { + if (object) + pd_error(object, "%s: bad template for %s", + name->s_name, class_getname(object->te_pd)); + else + pd_error(0, "%s: bad template", name->s_name); + return 0; + } + if (!(a = garray_getarray(g))) + return 0; + x->ar_garray = g; + if (x->ar_stub) gstub_dis(x->ar_stub); + x->ar_stub = a->a_stub; + a->a_stub->gs_refcount++; + return 1; +} + +int garrayref_set(t_garrayref *x, t_symbol *arrayname, t_object *object) +{ + /* ignore empty symbol */ + if (!(*arrayname->s_name && garrayref_findbyname(x, arrayname, object))) + { + garrayref_unset(x); + return 0; + } + return 1; +} + +int garrayref_check(t_garrayref *x) +{ + /* do we have a stub, and if yes, has it been cut off? */ + t_gstub *gs = x->ar_stub; + return gs && (gs->gs_which == GP_ARRAY); +} + + /* lazily initialize an garrayref by name and return the array data; + * if 'arrayname' is NULL, just fail silently. */ +int garrayref_get(t_garrayref *x, int *size, t_word **vec, + t_symbol *arrayname, t_object *object) +{ + t_array *a; + if (!garrayref_check(x)) + { + if (!arrayname || !garrayref_findbyname(x, arrayname, object)) + return 0; + } + a = x->ar_stub->gs_un.gs_array; + *vec = (t_word *)a->a_vec; + *size = a->a_n; + return 1; +} + + /* see m_pd.h. */ +#if !PD_PARALLEL +#undef garrayref_write_lock +#undef garrayref_write_unlock +#undef garrayref_read_lock +#undef garrayref_read_unlock +#endif + + /* garrayref_write_lock() and garrayref_read_lock() always fail + * silently if garrayref is empty or if the garray has been removed. + * In practice, adding/removing garrays triggers a DSP graph update, + * so we automatically try to reacquire the garray in our DSP method + * by calling garrayref_set(). + * NOTE: we avoid (un)setting the garrayref in the perform routine + * because it would make things more complicated wrt thread-safety. */ +int garrayref_write_lock(t_garrayref *x, int *size, t_word **vec) +{ + if (!garrayref_get(x, size, vec, 0, 0)) + return 0; +#if PD_DSPTHREADS + rwspinlock_wrlock(&x->ar_garray->x_lock); +#endif + return 1; +} + +void garrayref_write_unlock(t_garrayref *x) +{ +#if PD_DSPTHREADS + rwspinlock_wrunlock(&x->ar_garray->x_lock); +#endif +} + +int garrayref_read_lock(t_garrayref *x, int *size, t_word **vec) +{ + if (!garrayref_get(x, size, vec, 0, 0)) + return 0; +#if PD_DSPTHREADS + rwspinlock_rdlock(&x->ar_garray->x_lock); +#endif + return 1; +} + +void garrayref_read_unlock(t_garrayref *x) +{ +#if PD_DSPTHREADS + rwspinlock_rdunlock(&x->ar_garray->x_lock); +#endif +} + static void garray_doredraw(t_gobj *client, t_glist *glist) { t_garray *x = (t_garray *)client; diff --git a/src/g_scalar.c b/src/g_scalar.c index 9f3d27f917..ee5f906a05 100644 --- a/src/g_scalar.c +++ b/src/g_scalar.c @@ -38,7 +38,7 @@ down the owner) we increase a reference count. The following routine is called whenever a gpointer is unset from pointing here. If the owner is gone and the refcount goes to zero, we can free the gstub safely. */ -static void gstub_dis(t_gstub *gs) +void gstub_dis(t_gstub *gs) { int refcount = --gs->gs_refcount; if ((!refcount) && gs->gs_which == GP_NONE) diff --git a/src/m_pd.h b/src/m_pd.h index 2bc8141ddb..aaee95822e 100644 --- a/src/m_pd.h +++ b/src/m_pd.h @@ -697,12 +697,50 @@ EXTERN int garray_npoints(t_garray *x); EXTERN char *garray_vec(t_garray *x); EXTERN void garray_resize(t_garray *x, t_floatarg f); /* avoid; use this: */ EXTERN void garray_resize_long(t_garray *x, long n); /* better version */ -EXTERN void garray_usedindsp(t_garray *x); +EXTERN void garray_usedindsp(t_garray *x); /* avoid, use garrayref methods instead */ EXTERN void garray_setsaveit(t_garray *x, int saveit); EXTERN t_glist *garray_getglist(t_garray *x); EXTERN t_array *garray_getarray(t_garray *x); EXTERN t_class *scalar_class; +/* t_garrayref is a safe reference to a garray (similar to gpointer). + * The actual array data can be obtained on demand by the functions below. + * (You must not store any pointers to array data because it might become stale!) + * The advantage of using those functions instead of garray_getfloatwords() + * is that you don't have to call garray_usedindsp(), which means the array data + * can change without rebuilding the DSP graph! + * They also speed up and simplify garray access in control objects because you + * do not have to look up the garray every single time. + * Finally, they allow to synchronize array data access in parallel DSP processing. + * See d_array.c for examples. */ +typedef struct _arrayref +{ + t_garray *ar_garray; + t_gstub *ar_stub; +} t_garrayref; + +EXTERN void garrayref_init(t_garrayref *x); +EXTERN void garrayref_unset(t_garrayref *x); +/* set garrayref to a new garray */ +EXTERN int garrayref_set(t_garrayref *x, t_symbol *arrayname, t_object *obj); +/* check if the garrayref is valid. Call before accessing the 'ar_garray' member! */ +EXTERN int garrayref_check(t_garrayref *x); +/* for control objects: safely access array data. If the reference is empty or + * stale, (re)acquire the array by name; if 'arrayname' is NULL, fail silently. + * Returns 1 if it could get the array data; otherwise returns 0. + * + * If you want to set the garrayref to another garray, you must either call + * garray_set() with the new name, or call garray_unset() and lazily initialize + * it in the next call to garrayref_get(). */ +EXTERN int garrayref_get(t_garrayref *x, int *size, t_word **vec, t_symbol *arrayname, t_object *object); +/* for DSP objects: lock/unlock garray for reading/writing in the perform routine. + * Returns 1 if it could get the array data and lock the garray; otherwise returns 0. + * WARNING: do not attempt to unlock the garray if you could not lock it! */ +EXTERN int garrayref_write_lock(t_garrayref *x, int *size, t_word **vec); +EXTERN void garrayref_write_unlock(t_garrayref *x); +EXTERN int garrayref_read_lock(t_garrayref *x, int *size, t_word **vec); +EXTERN void garrayref_read_unlock(t_garrayref *x); + EXTERN t_float *value_get(t_symbol *s); EXTERN void value_release(t_symbol *s); EXTERN int value_getfloat(t_symbol *s, t_float *f); From afe8644f4f82576726b9e6397c7ed2c22a6342e4 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Tue, 28 Dec 2021 18:56:19 +0100 Subject: [PATCH 07/32] make table DSP objects thread-safe --- src/d_array.c | 468 ++++++++++++++++++++++++-------------------------- 1 file changed, 222 insertions(+), 246 deletions(-) diff --git a/src/d_array.c b/src/d_array.c index 9c18620d67..1c617cb340 100644 --- a/src/d_array.c +++ b/src/d_array.c @@ -17,80 +17,74 @@ typedef struct _tabwrite_tilde { t_object x_obj; int x_phase; - int x_nsampsintab; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; + t_clock *x_clock; t_float x_f; } t_tabwrite_tilde; -static void tabwrite_tilde_tick(t_tabwrite_tilde *x); +static void tabwrite_tilde_redraw(t_tabwrite_tilde *x) +{ + if (garrayref_check(&x->x_ref)) + garray_redraw(x->x_ref.ar_garray); +} static void *tabwrite_tilde_new(t_symbol *s) { t_tabwrite_tilde *x = (t_tabwrite_tilde *)pd_new(tabwrite_tilde_class); x->x_phase = 0x7fffffff; x->x_arrayname = s; + garrayref_init(&x->x_ref); + x->x_clock = clock_new(x, (t_method)tabwrite_tilde_redraw); x->x_f = 0; return (x); } -static void tabwrite_tilde_redraw(t_tabwrite_tilde *x) +static void tabwrite_tilde_free(t_tabwrite_tilde *x) { - t_garray *a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class); - if (!a) - bug("tabwrite_tilde_redraw"); - else garray_redraw(a); + garrayref_unset(&x->x_ref); + clock_free(x->x_clock); } static t_int *tabwrite_tilde_perform(t_int *w) { t_tabwrite_tilde *x = (t_tabwrite_tilde *)(w[1]); t_sample *in = (t_sample *)(w[2]); - int n = (int)(w[3]), phase = x->x_phase, endphase = x->x_nsampsintab; - if (!x->x_vec) goto bad; - - if (endphase > phase) + t_word *vec; + int n = (int)(w[3]), phase = x->x_phase, endphase; + if (garrayref_write_lock(&x->x_ref, &endphase, &vec)) { - int nxfer = endphase - phase; - t_word *wp = x->x_vec + phase; - if (nxfer > n) nxfer = n; - phase += nxfer; - while (nxfer--) + if (endphase > phase) { - t_sample f = *in++; - if (PD_BIGORSMALL(f)) - f = 0; - (wp++)->w_float = f; - } - if (phase >= endphase) - { - tabwrite_tilde_redraw(x); - phase = 0x7fffffff; + int nxfer = endphase - phase; + t_word *wp = vec + phase; + if (nxfer > n) nxfer = n; + phase += nxfer; + while (nxfer--) + { + t_sample f = *in++; + if (PD_BIGORSMALL(f)) + f = 0; + (wp++)->w_float = f; + } + if (phase >= endphase) + { + clock_delay(x->x_clock, 0); + phase = 0x7fffffff; + } + x->x_phase = phase; } - x->x_phase = phase; + else x->x_phase = 0x7fffffff; + + garrayref_write_unlock(&x->x_ref); } - else x->x_phase = 0x7fffffff; -bad: return (w+4); } static void tabwrite_tilde_set(t_tabwrite_tilde *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) pd_error(x, "tabwrite~: %s: no such array", - x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_nsampsintab, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabwrite~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabwrite_tilde_dsp(t_tabwrite_tilde *x, t_signal **sp) @@ -121,7 +115,7 @@ static void tabwrite_tilde_stop(t_tabwrite_tilde *x) static void tabwrite_tilde_setup(void) { tabwrite_tilde_class = class_new(gensym("tabwrite~"), - (t_newmethod)tabwrite_tilde_new, 0, + (t_newmethod)tabwrite_tilde_new, (t_method)tabwrite_tilde_free, sizeof(t_tabwrite_tilde), 0, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabwrite_tilde_class, t_tabwrite_tilde, x_f); class_addmethod(tabwrite_tilde_class, (t_method)tabwrite_tilde_dsp, @@ -144,9 +138,8 @@ typedef struct _tabplay_tilde t_object x_obj; t_outlet *x_bangout; int x_phase; - int x_nsampsintab; int x_limit; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; t_clock *x_clock; } t_tabplay_tilde; @@ -159,6 +152,7 @@ static void *tabplay_tilde_new(t_symbol *s) x->x_clock = clock_new(x, (t_method)tabplay_tilde_tick); x->x_phase = 0x7fffffff; x->x_limit = 0; + garrayref_init(&x->x_ref); x->x_arrayname = s; outlet_new(&x->x_obj, &s_signal); x->x_bangout = outlet_new(&x->x_obj, &s_bang); @@ -169,15 +163,19 @@ static t_int *tabplay_tilde_perform(t_int *w) { t_tabplay_tilde *x = (t_tabplay_tilde *)(w[1]); t_sample *out = (t_sample *)(w[2]); - t_word *wp; - int n = (int)(w[3]), phase = x->x_phase, - endphase = (x->x_nsampsintab < x->x_limit ? - x->x_nsampsintab : x->x_limit), nxfer, n3; - if (!x->x_vec || phase >= endphase) + t_word *vec, *wp; + int n = (int)(w[3]), phase = x->x_phase, endphase, npoints, nxfer, n3; + if (!garrayref_read_lock(&x->x_ref, &npoints, &vec)) + goto zero; + endphase = npoints < x->x_limit ? npoints : x->x_limit; + if (phase >= endphase) + { + garrayref_read_unlock(&x->x_ref); /* ! */ goto zero; + } nxfer = endphase - phase; - wp = x->x_vec + phase; + wp = vec + phase; if (nxfer > n) nxfer = n; n3 = n - nxfer; @@ -193,6 +191,7 @@ static t_int *tabplay_tilde_perform(t_int *w) } else x->x_phase = phase; + garrayref_read_unlock(&x->x_ref); return (w+4); zero: while (n--) *out++ = 0; @@ -201,21 +200,8 @@ static t_int *tabplay_tilde_perform(t_int *w) static void tabplay_tilde_set(t_tabplay_tilde *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) pd_error(x, "tabplay~: %s: no such array", - x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_nsampsintab, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabplay~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, x->x_arrayname, &x->x_obj); } static void tabplay_tilde_dsp(t_tabplay_tilde *x, t_signal **sp) @@ -249,6 +235,7 @@ static void tabplay_tilde_tick(t_tabplay_tilde *x) static void tabplay_tilde_free(t_tabplay_tilde *x) { + garrayref_unset(&x->x_ref); clock_free(x->x_clock); } @@ -273,8 +260,7 @@ static t_class *tabread_tilde_class; typedef struct _tabread_tilde { t_object x_obj; - int x_npoints; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; t_float x_f; } t_tabread_tilde; @@ -283,7 +269,7 @@ static void *tabread_tilde_new(t_symbol *s) { t_tabread_tilde *x = (t_tabread_tilde *)pd_new(tabread_tilde_class); x->x_arrayname = s; - x->x_vec = 0; + garrayref_init(&x->x_ref); outlet_new(&x->x_obj, gensym("signal")); x->x_f = 0; return (x); @@ -295,13 +281,16 @@ static t_int *tabread_tilde_perform(t_int *w) t_sample *in = (t_sample *)(w[2]); t_sample *out = (t_sample *)(w[3]); int n = (int)(w[4]); - int maxindex; - t_word *buf = x->x_vec; - int i; - - maxindex = x->x_npoints - 1; - if(maxindex<0) goto zero; - if (!buf) goto zero; + int maxindex, i, npoints; + t_word *vec; + if (!garrayref_read_lock(&x->x_ref, &npoints, &vec)) + goto zero; + maxindex = npoints - 1; + if (maxindex < 0) + { + garrayref_read_unlock(&x->x_ref); /* ! */ + goto zero; + } for (i = 0; i < n; i++) { @@ -310,8 +299,10 @@ static t_int *tabread_tilde_perform(t_int *w) index = 0; else if (index > maxindex) index = maxindex; - *out++ = buf[index].w_float; + *out++ = vec[index].w_float; } + + garrayref_read_unlock(&x->x_ref); return (w+5); zero: while (n--) *out++ = 0; @@ -321,21 +312,8 @@ static t_int *tabread_tilde_perform(t_int *w) static void tabread_tilde_set(t_tabread_tilde *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabread~: %s: no such array", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_npoints, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabread~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabread_tilde_dsp(t_tabread_tilde *x, t_signal **sp) @@ -349,6 +327,7 @@ static void tabread_tilde_dsp(t_tabread_tilde *x, t_signal **sp) static void tabread_tilde_free(t_tabread_tilde *x) { + garrayref_unset(&x->x_ref); } static void tabread_tilde_setup(void) @@ -370,8 +349,7 @@ static t_class *tabread4_tilde_class; typedef struct _tabread4_tilde { t_object x_obj; - int x_npoints; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; t_float x_f; t_float x_onset; @@ -381,7 +359,7 @@ static void *tabread4_tilde_new(t_symbol *s) { t_tabread4_tilde *x = (t_tabread4_tilde *)pd_new(tabread4_tilde_class); x->x_arrayname = s; - x->x_vec = 0; + garrayref_init(&x->x_ref); outlet_new(&x->x_obj, gensym("signal")); floatinlet_new(&x->x_obj, &x->x_onset); x->x_f = 0; @@ -394,16 +372,17 @@ static t_int *tabread4_tilde_perform(t_int *w) t_tabread4_tilde *x = (t_tabread4_tilde *)(w[1]); t_sample *in = (t_sample *)(w[2]); t_sample *out = (t_sample *)(w[3]); - int n = (int)(w[4]); - int maxindex; - t_word *buf = x->x_vec, *wp; + int n = (int)(w[4]), maxindex, npoints, i; double onset = x->x_onset; - int i; - - maxindex = x->x_npoints - 3; - if(maxindex<0) goto zero; - - if (!buf) goto zero; + t_word *vec, *wp; + if (!garrayref_read_lock(&x->x_ref, &npoints, &vec)) + goto zero; + maxindex = npoints - 3; + if (maxindex < 0) + { + garrayref_read_unlock(&x->x_ref); /* ! */ + goto zero; + } #if 0 /* test for spam -- I'm not ready to deal with this */ for (i = 0, xmax = 0, xmin = maxindex, fp = in1; i < n; i++, fp++) @@ -431,7 +410,7 @@ static t_int *tabread4_tilde_perform(t_int *w) else if (index > maxindex) index = maxindex, frac = 1; else frac = findex - index; - wp = buf + index; + wp = vec + index; a = wp[-1].w_float; b = wp[0].w_float; c = wp[1].w_float; @@ -443,6 +422,8 @@ static t_int *tabread4_tilde_perform(t_int *w) ) ); } + + garrayref_read_unlock(&x->x_ref); return (w+5); zero: while (n--) *out++ = 0; @@ -452,21 +433,8 @@ static t_int *tabread4_tilde_perform(t_int *w) static void tabread4_tilde_set(t_tabread4_tilde *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabread4~: %s: no such array", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_npoints, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabread4~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabread4_tilde_dsp(t_tabread4_tilde *x, t_signal **sp) @@ -480,6 +448,7 @@ static void tabread4_tilde_dsp(t_tabread4_tilde *x, t_signal **sp) static void tabread4_tilde_free(t_tabread4_tilde *x) { + garrayref_unset(&x->x_ref); } static void tabread4_tilde_setup(void) @@ -544,18 +513,20 @@ typedef struct _tabosc4_tilde t_object x_obj; t_float x_fnpoints; t_float x_finvnpoints; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; t_float x_f; - double x_phase; t_float x_conv; + double x_phase; + int x_lasttabsize; } t_tabosc4_tilde; static void *tabosc4_tilde_new(t_symbol *s) { t_tabosc4_tilde *x = (t_tabosc4_tilde *)pd_new(tabosc4_tilde_class); x->x_arrayname = s; - x->x_vec = 0; + garrayref_init(&x->x_ref); + x->x_lasttabsize = 0; x->x_fnpoints = 512.; x->x_finvnpoints = (1./512.); outlet_new(&x->x_obj, gensym("signal")); @@ -564,21 +535,52 @@ static void *tabosc4_tilde_new(t_symbol *s) return (x); } +static void tabosc4_tilde_free(t_tabosc4_tilde *x) +{ + garrayref_unset(&x->x_ref); +} + static t_int *tabosc4_tilde_perform(t_int *w) { t_tabosc4_tilde *x = (t_tabosc4_tilde *)(w[1]); t_sample *in = (t_sample *)(w[2]); t_sample *out = (t_sample *)(w[3]); - int n = (int)(w[4]); - int normhipart; + int n = (int)(w[4]), tabsize, mask, normhipart; union tabfudge tf; - t_float fnpoints = x->x_fnpoints; - int mask = fnpoints - 1; - t_float conv = fnpoints * x->x_conv; - t_word *tab = x->x_vec, *addr; - double dphase = fnpoints * x->x_phase + UNITBIT32; + t_word *vec, *addr; + t_float fnpoints, conv; + double dphase; + if (!garrayref_read_lock(&x->x_ref, &tabsize, &vec)) + goto zero; + + if (tabsize != x->x_lasttabsize) + { + /* check table size */ + int npoints = tabsize - 3; + if (npoints == (1 << ilog2(npoints))) + { + x->x_fnpoints = npoints; + x->x_finvnpoints = 1./npoints; + } + else + { + pd_error(x, "tabosc4~: %s: number of points (%d) not a power of 2 plus three", + x->x_arrayname->s_name, tabsize); + x->x_fnpoints = -1; /* sentinel */ + } + x->x_lasttabsize = tabsize; + } + + if (x->x_fnpoints < 0) /* bad size */ + { + garrayref_read_unlock(&x->x_ref); /* ! */ + goto zero; + } + fnpoints = x->x_fnpoints; + mask = fnpoints - 1; + conv = fnpoints * x->x_conv; + dphase = fnpoints * x->x_phase + UNITBIT32; - if (!tab) goto zero; tf.tf_d = UNITBIT32; normhipart = tf.tf_i[HIOFFSET]; @@ -588,7 +590,7 @@ static t_int *tabosc4_tilde_perform(t_int *w) t_sample frac, a, b, c, d, cminusb; tf.tf_d = dphase; dphase += *in++ * conv; - addr = tab + (tf.tf_i[HIOFFSET] & mask); + addr = vec + (tf.tf_i[HIOFFSET] & mask); tf.tf_i[HIOFFSET] = normhipart; frac = tf.tf_d - UNITBIT32; a = addr[0].w_float; @@ -609,6 +611,8 @@ static t_int *tabosc4_tilde_perform(t_int *w) tf.tf_d = dphase + (UNITBIT32 * fnpoints - UNITBIT32); tf.tf_i[HIOFFSET] = normhipart; x->x_phase = (tf.tf_d - UNITBIT32 * fnpoints) * x->x_finvnpoints; + + garrayref_read_unlock(&x->x_ref); return (w+5); zero: while (n--) *out++ = 0; @@ -618,34 +622,8 @@ static t_int *tabosc4_tilde_perform(t_int *w) static void tabosc4_tilde_set(t_tabosc4_tilde *x, t_symbol *s) { - t_garray *a; - int npoints, pointsinarray; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabosc4~: %s: no such array", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &pointsinarray, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabosc4~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if ((npoints = pointsinarray - 3) != (1 << ilog2(pointsinarray - 3))) - { - pd_error(x, "%s: number of points (%d) not a power of 2 plus three", - x->x_arrayname->s_name, pointsinarray); - x->x_vec = 0; - garray_usedindsp(a); - } - else - { - x->x_fnpoints = npoints; - x->x_finvnpoints = 1./npoints; - garray_usedindsp(a); - } + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabosc4_tilde_ft1(t_tabosc4_tilde *x, t_float f) @@ -665,7 +643,7 @@ static void tabosc4_tilde_dsp(t_tabosc4_tilde *x, t_signal **sp) static void tabosc4_tilde_setup(void) { tabosc4_tilde_class = class_new(gensym("tabosc4~"), - (t_newmethod)tabosc4_tilde_new, 0, + (t_newmethod)tabosc4_tilde_new, (t_method)tabosc4_tilde_free, sizeof(t_tabosc4_tilde), 0, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabosc4_tilde_class, t_tabosc4_tilde, x_f); class_addmethod(tabosc4_tilde_class, (t_method)tabosc4_tilde_dsp, @@ -683,72 +661,68 @@ static t_class *tabsend_class; typedef struct _tabsend { t_object x_obj; - t_word *x_vec; + t_garrayref x_ref; int x_graphperiod; int x_graphcount; t_symbol *x_arrayname; + t_clock *x_clock; t_float x_f; - int x_npoints; } t_tabsend; -static void tabsend_tick(t_tabsend *x); +static void tabsend_tick(t_tabsend *x) +{ + if (garrayref_check(&x->x_ref)) + garray_redraw(x->x_ref.ar_garray); +} static void *tabsend_new(t_symbol *s) { t_tabsend *x = (t_tabsend *)pd_new(tabsend_class); + garrayref_init(&x->x_ref); x->x_graphcount = 0; x->x_arrayname = s; + x->x_clock = clock_new(x, (t_method)tabsend_tick); x->x_f = 0; return (x); } +static void tabsend_free(t_tabsend *x) +{ + garrayref_unset(&x->x_ref); + clock_free(x->x_clock); +} + static t_int *tabsend_perform(t_int *w) { t_tabsend *x = (t_tabsend *)(w[1]); t_sample *in = (t_sample *)(w[2]); - int n = (int)w[3]; - t_word *dest = x->x_vec; - int i = x->x_graphcount; - if (!x->x_vec) goto bad; - if (n > x->x_npoints) - n = x->x_npoints; - while (n--) - { - t_sample f = *in++; - if (PD_BIGORSMALL(f)) - f = 0; - (dest++)->w_float = f; - } - if (!i--) + int n = (int)w[3], npoints; + t_word *dest; + if (garrayref_write_lock(&x->x_ref, &npoints, &dest)) { - t_garray *a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class); - if (!a) - bug("tabsend_dsp"); - else garray_redraw(a); - i = x->x_graphperiod; + if (n > npoints) + n = npoints; + while (n--) + { + t_sample f = *in++; + if (PD_BIGORSMALL(f)) + f = 0; + (dest++)->w_float = f; + } + if (!x->x_graphcount--) + { + clock_delay(x->x_clock, 0); + x->x_graphcount = x->x_graphperiod; + } + garrayref_write_unlock(&x->x_ref); } - x->x_graphcount = i; -bad: return (w+4); } static void tabsend_set(t_tabsend *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabsend~: %s: no such array", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_npoints, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabsend~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabsend_dsp(t_tabsend *x, t_signal **sp) @@ -765,7 +739,7 @@ static void tabsend_dsp(t_tabsend *x, t_signal **sp) static void tabsend_setup(void) { tabsend_class = class_new(gensym("tabsend~"), (t_newmethod)tabsend_new, - 0, sizeof(t_tabsend), 0, A_DEFSYM, 0); + (t_method)tabsend_free, sizeof(t_tabsend), 0, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabsend_class, t_tabsend, x_f); class_addmethod(tabsend_class, (t_method)tabsend_dsp, gensym("dsp"), A_CANT, 0); @@ -781,52 +755,38 @@ static t_class *tabreceive_class; typedef struct _tabreceive { t_object x_obj; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; - int x_npoints; } t_tabreceive; static t_int *tabreceive_perform(t_int *w) { t_tabreceive *x = (t_tabreceive *)(w[1]); t_sample *out = (t_sample *)(w[2]); - int n = (int)w[3]; - t_word *from = x->x_vec; - if (from) + int n = (int)w[3], npoints; + t_word *from; + if (garrayref_read_lock(&x->x_ref, &npoints, &from)) { - t_int vecsize = x->x_npoints; + int vecsize = npoints; if (vecsize > n) vecsize = n; while (vecsize--) *out++ = (from++)->w_float; - vecsize = n - x->x_npoints; + vecsize = n - npoints; if (vecsize > 0) while (vecsize--) *out++ = 0; + garrayref_read_unlock(&x->x_ref); } - else while (n--) *out++ = 0; + else + while (n--) *out++ = 0; return (w+4); } static void tabreceive_set(t_tabreceive *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabreceive~: %s: no such array", - x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_npoints, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabreceive~", - x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabreceive_dsp(t_tabreceive *x, t_signal **sp) @@ -838,15 +798,21 @@ static void tabreceive_dsp(t_tabreceive *x, t_signal **sp) static void *tabreceive_new(t_symbol *s) { t_tabreceive *x = (t_tabreceive *)pd_new(tabreceive_class); + garrayref_init(&x->x_ref); x->x_arrayname = s; outlet_new(&x->x_obj, &s_signal); return (x); } +static void tabreceive_free(t_tabreceive *x) +{ + garrayref_unset(&x->x_ref); +} + static void tabreceive_setup(void) { tabreceive_class = class_new(gensym("tabreceive~"), - (t_newmethod)tabreceive_new, 0, + (t_newmethod)tabreceive_new, (t_method)tabreceive_free, sizeof(t_tabreceive), 0, A_DEFSYM, 0); class_addmethod(tabreceive_class, (t_method)tabreceive_dsp, gensym("dsp"), A_CANT, 0); @@ -862,20 +828,15 @@ static t_class *tabread_class; typedef struct _tabread { t_object x_obj; + t_garrayref x_ref; t_symbol *x_arrayname; } t_tabread; static void tabread_float(t_tabread *x, t_float f) { - t_garray *a; int npoints; t_word *vec; - - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - pd_error(x, "%s: no such array", x->x_arrayname->s_name); - else if (!garray_getfloatwords(a, &npoints, &vec)) - pd_error(x, "%s: bad template for tabread", x->x_arrayname->s_name); - else + if (garrayref_get(&x->x_ref, &npoints, &vec, x->x_arrayname, &x->x_obj)) { int n = f; if (n < 0) n = 0; @@ -887,20 +848,27 @@ static void tabread_float(t_tabread *x, t_float f) static void tabread_set(t_tabread *x, t_symbol *s) { x->x_arrayname = s; + garrayref_unset(&x->x_ref); /* reset */ } static void *tabread_new(t_symbol *s) { t_tabread *x = (t_tabread *)pd_new(tabread_class); x->x_arrayname = s; + garrayref_init(&x->x_ref); outlet_new(&x->x_obj, &s_float); return (x); } +static void tabread_free(t_tabread *x) +{ + garrayref_unset(&x->x_ref); +} + static void tabread_setup(void) { tabread_class = class_new(gensym("tabread"), (t_newmethod)tabread_new, - 0, sizeof(t_tabread), 0, A_DEFSYM, 0); + (t_method)tabread_free, sizeof(t_tabread), 0, A_DEFSYM, 0); class_addfloat(tabread_class, (t_method)tabread_float); class_addmethod(tabread_class, (t_method)tabread_set, gensym("set"), A_SYMBOL, 0); @@ -913,20 +881,18 @@ static t_class *tabread4_class; typedef struct _tabread4 { t_object x_obj; + t_garrayref x_ref; t_symbol *x_arrayname; } t_tabread4; static void tabread4_float(t_tabread4 *x, t_float f) { - t_garray *a; int npoints; t_word *vec; + if (!garrayref_get(&x->x_ref, &npoints, &vec, x->x_arrayname, &x->x_obj)) + return; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - pd_error(x, "%s: no such array", x->x_arrayname->s_name); - else if (!garray_getfloatwords(a, &npoints, &vec)) - pd_error(x, "%s: bad template for tabread4", x->x_arrayname->s_name); - else if (npoints < 4) + if (npoints < 4) outlet_float(x->x_obj.ob_outlet, 0); else if (f <= 1) outlet_float(x->x_obj.ob_outlet, vec[1].w_float); @@ -955,20 +921,27 @@ static void tabread4_float(t_tabread4 *x, t_float f) static void tabread4_set(t_tabread4 *x, t_symbol *s) { x->x_arrayname = s; + garrayref_unset(&x->x_ref); /* reset */ } static void *tabread4_new(t_symbol *s) { t_tabread4 *x = (t_tabread4 *)pd_new(tabread4_class); x->x_arrayname = s; + garrayref_init(&x->x_ref); outlet_new(&x->x_obj, &s_float); return (x); } +static void tabread4_free(t_tabread4 *x) +{ + garrayref_unset(&x->x_ref); +} + static void tabread4_setup(void) { tabread4_class = class_new(gensym("tabread4"), (t_newmethod)tabread4_new, - 0, sizeof(t_tabread4), 0, A_DEFSYM, 0); + (t_method)tabread4_free, sizeof(t_tabread4), 0, A_DEFSYM, 0); class_addfloat(tabread4_class, (t_method)tabread4_float); class_addmethod(tabread4_class, (t_method)tabread4_set, gensym("set"), A_SYMBOL, 0); @@ -981,6 +954,7 @@ static t_class *tabwrite_class; typedef struct _tabwrite { t_object x_obj; + t_garrayref x_ref; t_symbol *x_arrayname; t_float x_ft1; } t_tabwrite; @@ -988,14 +962,9 @@ typedef struct _tabwrite static void tabwrite_float(t_tabwrite *x, t_float f) { int vecsize; - t_garray *a; t_word *vec; - - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - pd_error(x, "%s: no such array", x->x_arrayname->s_name); - else if (!garray_getfloatwords(a, &vecsize, &vec)) - pd_error(x, "%s: bad template for tabwrite", x->x_arrayname->s_name); - else + if (garrayref_get(&x->x_ref, + &vecsize, &vec, x->x_arrayname, &x->x_obj)) { int n = x->x_ft1; if (n < 0) @@ -1003,13 +972,14 @@ static void tabwrite_float(t_tabwrite *x, t_float f) else if (n >= vecsize) n = vecsize-1; vec[n].w_float = f; - garray_redraw(a); + garray_redraw(x->x_ref.ar_garray); } } static void tabwrite_set(t_tabwrite *x, t_symbol *s) { x->x_arrayname = s; + garrayref_unset(&x->x_ref); /* reset */ } static void *tabwrite_new(t_symbol *s) @@ -1017,14 +987,20 @@ static void *tabwrite_new(t_symbol *s) t_tabwrite *x = (t_tabwrite *)pd_new(tabwrite_class); x->x_ft1 = 0; x->x_arrayname = s; + garrayref_init(&x->x_ref); floatinlet_new(&x->x_obj, &x->x_ft1); return (x); } +static void tabwrite_free(t_tabwrite *x) +{ + garrayref_unset(&x->x_ref); +} + void tabwrite_setup(void) { tabwrite_class = class_new(gensym("tabwrite"), (t_newmethod)tabwrite_new, - 0, sizeof(t_tabwrite), 0, A_DEFSYM, 0); + (t_method)tabwrite_free, sizeof(t_tabwrite), 0, A_DEFSYM, 0); class_addfloat(tabwrite_class, (t_method)tabwrite_float); class_addmethod(tabwrite_class, (t_method)tabwrite_set, gensym("set"), A_SYMBOL, 0); From 3a735f692c50003c23d2a6c95c575f883ac27507 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Tue, 21 Dec 2021 23:03:50 +0100 Subject: [PATCH 08/32] make delay DSP objects thread-safe --- src/d_delay.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/d_delay.c b/src/d_delay.c index 681f5c8937..7b2dd20708 100644 --- a/src/d_delay.c +++ b/src/d_delay.c @@ -5,7 +5,22 @@ /* send~, delread~, throw~, catch~ */ #include "m_pd.h" +#include "s_stuff.h" #include + +#if PD_DSPTHREADS +# include "s_spinlock.h" +# define LOCK(x) rwspinlock_wrlock((t_rwspinlock *)&x) +# define UNLOCK(x) rwspinlock_wrunlock((t_rwspinlock *)&x) +# define LOCK_SHARED(x) rwspinlock_rdlock((t_rwspinlock *)&x) +# define UNLOCK_SHARED(x) rwspinlock_rdunlock((t_rwspinlock *)&x) +#else +# define LOCK(x) +# define UNLOCK(x) +# define LOCK_SHARED(x) +# define UNLOCK_SHARED(x) +#endif + extern int ugen_getsortno(void); #define DEFDELVS 64 /* LATER get this from canvas at DSP time */ @@ -16,9 +31,12 @@ static t_class *sigdelwrite_class; typedef struct delwritectl { - int c_n; t_sample *c_vec; + int c_n; int c_phase; +#if PD_DSPTHREADS + t_spinlock c_lock; +#endif } t_delwritectl; typedef struct _sigdelwrite @@ -86,6 +104,9 @@ static void *sigdelwrite_new(t_symbol *s, t_floatarg msec) x->x_deltime = msec; x->x_cspace.c_n = 0; x->x_cspace.c_vec = getbytes(XTRASAMPS * sizeof(t_sample)); +#if PD_DSPTHREADS + spinlock_init(&x->x_cspace.c_lock); +#endif x->x_sortno = 0; x->x_vecsize = 0; x->x_f = 0; @@ -101,6 +122,7 @@ static t_int *sigdelwrite_perform(t_int *w) t_sample *vp = c->c_vec, *bp = vp + phase, *ep = vp + (c->c_n + XTRASAMPS); phase += n; + LOCK(c->c_lock); while (n--) { t_sample f = *in++; @@ -118,6 +140,7 @@ static t_int *sigdelwrite_perform(t_int *w) } } c->c_phase = phase; + UNLOCK(c->c_lock); return (w+4); } @@ -203,11 +226,13 @@ static t_int *sigdelread_perform(t_int *w) if (phase < 0) phase += nsamps; bp = vp + phase; + LOCK_SHARED(c->c_lock); while (n--) { *out++ = *bp++; if (bp == ep) bp -= nsamps; } + UNLOCK_SHARED(c->c_lock); return (w+5); } @@ -288,6 +313,7 @@ static t_int *sigvd_perform(t_int *w) *out++ = 0; return (w+6); } + LOCK_SHARED(ctl->c_lock); while (n--) { t_sample delsamps = x->x_sr * *in++ - zerodel, frac; @@ -314,6 +340,7 @@ static t_int *sigvd_perform(t_int *w) ) ); } + UNLOCK_SHARED(ctl->c_lock); return (w+6); } From 60ce2af377451547693c16af08ce7ec2796f10b4 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Tue, 21 Dec 2021 23:04:26 +0100 Subject: [PATCH 09/32] make send~/receive~ and throw~/catch~ thread-safe --- src/d_global.c | 84 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/src/d_global.c b/src/d_global.c index 83dd45e4ec..4e80c3d2f8 100644 --- a/src/d_global.c +++ b/src/d_global.c @@ -5,8 +5,22 @@ /* send~, receive~, throw~, catch~ */ #include "m_pd.h" +#include "s_stuff.h" #include +#if PD_DSPTHREADS +# include "s_spinlock.h" +# define LOCK(x) rwspinlock_wrlock((t_rwspinlock *)&x) +# define UNLOCK(x) rwspinlock_wrunlock((t_rwspinlock *)&x) +# define LOCK_SHARED(x) rwspinlock_rdlock((t_rwspinlock *)&x) +# define UNLOCK_SHARED(x) rwspinlock_rdunlock((t_rwspinlock *)&x) +#else +# define LOCK(x) +# define UNLOCK(x) +# define LOCK_SHARED(x) +# define UNLOCK_SHARED(x) +#endif + #define DEFSENDVS 64 /* LATER get send to get this from canvas */ /* ----------------------------- send~ ----------------------------- */ @@ -19,6 +33,9 @@ typedef struct _sigsend int x_n; t_sample *x_vec; t_float x_f; +#if PD_DSPTHREADS + t_rwspinlock x_lock; +#endif } t_sigsend; static void *sigsend_new(t_symbol *s) @@ -30,27 +47,33 @@ static void *sigsend_new(t_symbol *s) x->x_vec = (t_sample *)getbytes(DEFSENDVS * sizeof(t_sample)); memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(t_sample)); x->x_f = 0; +#if PD_DSPTHREADS + rwspinlock_init((t_rwspinlock *)&x->x_lock); +#endif return (x); } static t_int *sigsend_perform(t_int *w) { - t_sample *in = (t_sample *)(w[1]); - t_sample *out = (t_sample *)(w[2]); + t_sigsend *x = (t_sigsend *)(w[1]); + t_sample *in = (t_sample *)(w[2]); + t_sample *out = x->x_vec; int n = (int)(w[3]); + LOCK(x->x_lock); while (n--) { *out = (PD_BIGORSMALL(*in) ? 0 : *in); out++; in++; } + UNLOCK(x->x_lock); return (w+4); } static void sigsend_dsp(t_sigsend *x, t_signal **sp) { if (x->x_n == sp[0]->s_n) - dsp_add(sigsend_perform, 3, sp[0]->s_vec, x->x_vec, (t_int)sp[0]->s_n); + dsp_add(sigsend_perform, 3, x, sp[0]->s_vec, (t_int)sp[0]->s_n); else pd_error(0, "sigsend %s: unexpected vector size", x->x_sym->s_name); } @@ -80,6 +103,9 @@ typedef struct _sigreceive t_symbol *x_sym; t_sample *x_wherefrom; int x_n; +#if PD_DSPTHREADS + t_rwspinlock *x_lock; +#endif } t_sigreceive; static void *sigreceive_new(t_symbol *s) @@ -88,6 +114,9 @@ static void *sigreceive_new(t_symbol *s) x->x_n = DEFSENDVS; /* LATER find our vector size correctly */ x->x_sym = s; x->x_wherefrom = 0; +#if PD_DSPTHREADS + x->x_lock = 0; +#endif outlet_new(&x->x_obj, &s_signal); return (x); } @@ -100,8 +129,10 @@ static t_int *sigreceive_perform(t_int *w) t_sample *in = x->x_wherefrom; if (in) { + LOCK_SHARED(*x->x_lock); while (n--) *out++ = *in++; + UNLOCK_SHARED(*x->x_lock); } else { @@ -120,11 +151,13 @@ static t_int *sigreceive_perf8(t_int *w) t_sample *in = x->x_wherefrom; if (in) { + LOCK_SHARED(*x->x_lock); for (; n; n -= 8, in += 8, out += 8) { out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; out[3] = in[3]; out[4] = in[4]; out[5] = in[5]; out[6] = in[6]; out[7] = in[7]; } + UNLOCK_SHARED(*x->x_lock); } else { @@ -144,11 +177,19 @@ static void sigreceive_set(t_sigreceive *x, t_symbol *s) if (sender) { if (sender->x_n == x->x_n) + { x->x_wherefrom = sender->x_vec; + #if PD_DSPTHREADS + x->x_lock = &sender->x_lock; + #endif + } else { pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name); x->x_wherefrom = 0; + #if PD_DSPTHREADS + x->x_lock = 0; + #endif } } else @@ -197,6 +238,9 @@ typedef struct _sigcatch t_symbol *x_sym; int x_n; t_sample *x_vec; +#if PD_DSPTHREADS + t_rwspinlock x_lock; +#endif } t_sigcatch; static void *sigcatch_new(t_symbol *s) @@ -207,25 +251,34 @@ static void *sigcatch_new(t_symbol *s) x->x_n = DEFSENDVS; x->x_vec = (t_sample *)getbytes(DEFSENDVS * sizeof(t_sample)); memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(t_sample)); +#if PD_DSPTHREADS + rwspinlock_init((t_rwspinlock *)&x->x_lock); +#endif outlet_new(&x->x_obj, &s_signal); return (x); } static t_int *sigcatch_perform(t_int *w) { - t_sample *in = (t_sample *)(w[1]); + t_sigcatch *x = (t_sigcatch *)(w[1]); + t_sample *in = x->x_vec; t_sample *out = (t_sample *)(w[2]); int n = (int)(w[3]); + LOCK(x->x_lock); while (n--) *out++ = *in, *in++ = 0; + UNLOCK(x->x_lock); return (w+4); } /* tb: vectorized catch function */ static t_int *sigcatch_perf8(t_int *w) { - t_sample *in = (t_sample *)(w[1]); + t_sigcatch *x = (t_sigcatch *)(w[1]); + t_sample *in = x->x_vec; t_sample *out = (t_sample *)(w[2]); int n = (int)(w[3]); + /* reading + writing */ + LOCK(x->x_lock); for (; n; n -= 8, in += 8, out += 8) { out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; out[3] = in[3]; @@ -234,6 +287,7 @@ static t_int *sigcatch_perf8(t_int *w) in[0] = 0; in[1] = 0; in[2] = 0; in[3] = 0; in[4] = 0; in[5] = 0; in[6] = 0; in[7] = 0; } + UNLOCK(x->x_lock); return (w+4); } @@ -242,9 +296,9 @@ static void sigcatch_dsp(t_sigcatch *x, t_signal **sp) if (x->x_n == sp[0]->s_n) { if(sp[0]->s_n&7) - dsp_add(sigcatch_perform, 3, x->x_vec, sp[0]->s_vec, (t_int)sp[0]->s_n); + dsp_add(sigcatch_perform, 3, x, sp[0]->s_vec, (t_int)sp[0]->s_n); else - dsp_add(sigcatch_perf8, 3, x->x_vec, sp[0]->s_vec, (t_int)sp[0]->s_n); + dsp_add(sigcatch_perf8, 3, x, sp[0]->s_vec, (t_int)sp[0]->s_n); } else pd_error(0, "sigcatch %s: unexpected vector size", x->x_sym->s_name); } @@ -274,6 +328,9 @@ typedef struct _sigthrow t_sample *x_whereto; int x_n; t_float x_f; +#if PD_DSPTHREADS + t_rwspinlock *x_lock; +#endif } t_sigthrow; static void *sigthrow_new(t_symbol *s) @@ -283,6 +340,9 @@ static void *sigthrow_new(t_symbol *s) x->x_whereto = 0; x->x_n = DEFSENDVS; x->x_f = 0; +#if PD_DSPTHREADS + x->x_lock = 0; +#endif return (x); } @@ -294,12 +354,14 @@ static t_int *sigthrow_perform(t_int *w) t_sample *out = x->x_whereto; if (out) { + LOCK(*x->x_lock); while (n--) { *out += (PD_BIGORSMALL(*in) ? 0 : *in); out++; in++; } + UNLOCK(*x->x_lock); } return (w+4); } @@ -311,11 +373,19 @@ static void sigthrow_set(t_sigthrow *x, t_symbol *s) if (catcher) { if (catcher->x_n == x->x_n) + { x->x_whereto = catcher->x_vec; + #if PD_DSPTHREADS + x->x_lock = &catcher->x_lock; + #endif + } else { pd_error(x, "throw~ %s: vector size mismatch", x->x_sym->s_name); x->x_whereto = 0; + #if PD_DSPTHREADS + x->x_lock = 0; + #endif } } else x->x_whereto = 0; /* no match: now no longer considered an error */ From 9d27289dc5f1b556a14aad5ece6a8f1ae47789fc Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Mon, 27 Dec 2021 00:06:00 +0100 Subject: [PATCH 10/32] make dac~ thread-safe protect st_soundout with spinlocks --- src/d_dac.c | 43 +++++++++++++++++++++++++++++++++++++++++-- src/m_class.c | 7 +++++++ src/s_audio.c | 41 +++++++++++++++++++++++++++++++++-------- src/s_stuff.h | 1 + 4 files changed, 82 insertions(+), 10 deletions(-) diff --git a/src/d_dac.c b/src/d_dac.c index 45e0027477..b5aac6c71a 100644 --- a/src/d_dac.c +++ b/src/d_dac.c @@ -8,6 +8,10 @@ #include "m_pd.h" #include "s_stuff.h" +#if PD_DSPTHREADS +#include "s_spinlock.h" +#endif + /* ----------------------------- dac~ --------------------------- */ static t_class *dac_class; @@ -41,6 +45,30 @@ static void *dac_new(t_symbol *s, int argc, t_atom *argv) return (x); } +#if PD_DSPTHREADS +t_int *dac_perform8(t_int *w) +{ + t_sample *in = (t_sample *)(w[1]); + t_sample *out = (t_sample *)(w[2]); + t_spinlock *lock = (t_spinlock *)(w[3]); + int n = DEFDACBLKSIZE; + spinlock_lock(lock); + for (; n; n -= 8, in += 8, out += 8) + { + t_sample f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3]; + t_sample f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7]; + + t_sample g0 = out[0], g1 = out[1], g2 = out[2], g3 = out[3]; + t_sample g4 = out[4], g5 = out[5], g6 = out[6], g7 = out[7]; + + out[0] = f0 + g0; out[1] = f1 + g1; out[2] = f2 + g2; out[3] = f3 + g3; + out[4] = f4 + g4; out[5] = f5 + g5; out[6] = f6 + g6; out[7] = f7 + g7; + } + spinlock_unlock(lock); + return w+4; +} +#endif /* PD_DSPTHREADS */ + static void dac_dsp(t_dac *x, t_signal **sp) { t_int i, *ip; @@ -51,8 +79,19 @@ static void dac_dsp(t_dac *x, t_signal **sp) if ((*sp2)->s_n != DEFDACBLKSIZE) pd_error(0, "dac~: bad vector size"); else if (ch >= 0 && ch < sys_get_outchannels()) - dsp_add(plus_perform, 4, STUFF->st_soundout + DEFDACBLKSIZE*ch, - (*sp2)->s_vec, STUFF->st_soundout + DEFDACBLKSIZE*ch, (t_int)DEFDACBLKSIZE); + { + t_sample *in = (*sp2)->s_vec; + t_sample *out = STUFF->st_soundout + DEFDACBLKSIZE*ch; + #if PD_DSPTHREADS + t_spinlock *lock = &STUFF->st_soundout_locks[ch]; + if (!(sp[0]->s_n & 7)) /* always true for DEFDACBLKSIZE */ + dsp_add(dac_perform8, 3, in, out, lock); + else + bug("dac_dsp"); + #else + dsp_add_plus(out, in, out, DEFDACBLKSIZE); + #endif + } } } diff --git a/src/m_class.c b/src/m_class.c index d4bc82a587..495bd9921a 100644 --- a/src/m_class.c +++ b/src/m_class.c @@ -60,13 +60,20 @@ void s_stuff_newpdinstance(void) STUFF->st_externlist = STUFF->st_searchpath = STUFF->st_staticpath = STUFF->st_helppath = STUFF->st_temppath = 0; STUFF->st_schedblocksize = STUFF->st_blocksize = DEFDACBLKSIZE; + STUFF->st_inchannels = STUFF->st_outchannels = 0; STUFF->st_dacsr = DEFDACSAMPLERATE; + STUFF->st_soundin = NULL; + STUFF->st_soundout = NULL; STUFF->st_printhook = sys_printhook; STUFF->st_impdata = NULL; + STUFF->st_soundout_locks = NULL; } +void sys_audio_free(void); + void s_stuff_freepdinstance(void) { + sys_audio_free(); freebytes(STUFF, sizeof(*STUFF)); } diff --git a/src/s_audio.c b/src/s_audio.c index 5e41dd4513..c8181076c6 100644 --- a/src/s_audio.c +++ b/src/s_audio.c @@ -22,6 +22,10 @@ #include #include +#if PD_DSPTHREADS +#include "s_spinlock.h" +#endif + #ifdef _MSC_VER #define snprintf _snprintf #endif @@ -79,6 +83,26 @@ static int audio_getfixedblocksize(int api) return 0; } +void sys_audio_free(void) +{ + if (STUFF->st_soundin) + freebytes(STUFF->st_soundin, + (STUFF->st_inchannels ? STUFF->st_inchannels : 2) * + (DEFDACBLKSIZE * sizeof(t_sample))); + STUFF->st_soundin = 0; + if (STUFF->st_soundout) + freebytes(STUFF->st_soundout, + (STUFF->st_outchannels ? STUFF->st_outchannels : 2) * + (DEFDACBLKSIZE * sizeof(t_sample))); + STUFF->st_soundout = 0; +#if PD_DSPTHREADS + if (STUFF->st_soundout_locks) + freebytes(STUFF->st_soundout_locks, + STUFF->st_outchannels * sizeof(t_spinlock)); + STUFF->st_soundout_locks = 0; +#endif +} + /* inform rest of Pd of current channels and sample rate. Do this when opening audio device. This is also called from alsamm but I think that is no longer in use, so in principle this could be static. */ @@ -89,15 +113,10 @@ void sys_setchsr(int chin, int chout, int sr) (DEFDACBLKSIZE*sizeof(t_sample)); int outbytes = (chout ? chout : 2) * (DEFDACBLKSIZE*sizeof(t_sample)); + int i; + + sys_audio_free(); - if (STUFF->st_soundin) - freebytes(STUFF->st_soundin, - (STUFF->st_inchannels? STUFF->st_inchannels : 2) * - (DEFDACBLKSIZE*sizeof(t_sample))); - if (STUFF->st_soundout) - freebytes(STUFF->st_soundout, - (STUFF->st_outchannels? STUFF->st_outchannels : 2) * - (DEFDACBLKSIZE*sizeof(t_sample))); STUFF->st_inchannels = chin; STUFF->st_outchannels = chout; if (!audio_isfixedsr(sys_audioapiopened)) @@ -109,6 +128,12 @@ void sys_setchsr(int chin, int chout, int sr) STUFF->st_soundout = (t_sample *)getbytes(outbytes); memset(STUFF->st_soundout, 0, outbytes); +#if PD_DSPTHREADS + STUFF->st_soundout_locks = (t_spinlock *)getbytes(chout * sizeof(t_spinlock)); + for (i = 0; i < chout; i++) + spinlock_init(&STUFF->st_soundout_locks[i]); +#endif + logpost(NULL, PD_VERBOSE, "input channels = %d, output channels = %d", STUFF->st_inchannels, STUFF->st_outchannels); canvas_resume_dsp(canvas_suspend_dsp()); diff --git a/src/s_stuff.h b/src/s_stuff.h index 8295e898eb..15678e0d31 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -440,6 +440,7 @@ struct _instancestuff double st_time_per_dsp_tick; /* obsolete - included for GEM?? */ t_printhook st_printhook; /* set this to override per-instance printing */ void *st_impdata; /* optional implementation-specific data for libpd, etc */ + struct _spinlock *st_soundout_locks; /* spinlocks for dac~ */ }; #define STUFF (pd_this->pd_stuff) From 7767d49291f91fe10322180999bc7dff3a897d61 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Tue, 28 Dec 2021 17:15:02 +0100 Subject: [PATCH 11/32] make FFT thread-safe --- src/d_fft_fftsg.c | 20 ++++++++++++++------ src/d_fft_fftw.c | 13 ++++++++----- src/d_threadpool.c | 5 +++++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/d_fft_fftsg.c b/src/d_fft_fftsg.c index e7517993f1..201e2267d9 100644 --- a/src/d_fft_fftsg.c +++ b/src/d_fft_fftsg.c @@ -21,6 +21,7 @@ for another, more permissive-sounding copyright notice. -MSP /* ---------- Pd interface to OOURA FFT; imitate Mayer API ---------- */ #include "m_pd.h" #include "m_imp.h" +#include "s_stuff.h" #ifdef _WIN32 # include /* MSVC or mingw on windows */ @@ -30,17 +31,24 @@ for another, more permissive-sounding copyright notice. -MSP # include /* BSDs for example */ #endif +#if PD_DSPTHREADS +/* always thread-local! */ +#define FFT_PERTHREAD THREADLOCAL +#else +#define FFT_PERTHREAD PERTHREAD +#endif + #define FFTFLT double void cdft(int, int, FFTFLT *, int *, FFTFLT *); void rdft(int, int, FFTFLT *, int *, FFTFLT *); int ilog2(int n); -static PERTHREAD int ooura_maxn; -static PERTHREAD int *ooura_bitrev; -static PERTHREAD int ooura_bitrevsize; -static PERTHREAD FFTFLT *ooura_costab; -static PERTHREAD FFTFLT *ooura_buffer; +static FFT_PERTHREAD int ooura_maxn; +static FFT_PERTHREAD int *ooura_bitrev; +static FFT_PERTHREAD int ooura_bitrevsize; +static FFT_PERTHREAD FFTFLT *ooura_costab; +static FFT_PERTHREAD FFTFLT *ooura_buffer; static int ooura_init( int n) { @@ -101,7 +109,7 @@ static void ooura_term( void) } /* -------- initialization and cleanup -------- */ -static PERTHREAD int mayer_refcount = 0; +static FFT_PERTHREAD int mayer_refcount = 0; void mayer_init( void) { diff --git a/src/d_fft_fftw.c b/src/d_fft_fftw.c index 10d1ce403f..554402b5d9 100644 --- a/src/d_fft_fftw.c +++ b/src/d_fft_fftw.c @@ -7,6 +7,7 @@ /* changes and additions for FFTW3 by Thomas Grill */ #include "m_pd.h" +#include "s_stuff.h" #include int ilog2(int n); @@ -147,14 +148,16 @@ static void rfftw_term(void) } } -static int mayer_refcount = 0; +#if PD_DSPTHREADS +/* always thread-local! */ +static THREADLOCAL int mayer_refcount = 0; +#else +static PERTHREAD int mayer_refcount = 0; +#endif void mayer_init(void) { - if (mayer_refcount++ == 0) - { - /* nothing to do */ - } + mayer_refcount++; } void mayer_term(void) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index 324dd01f68..507c6cfd20 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -462,6 +462,8 @@ static t_dsptask * dspthreadpool_pop(void) static void dsptask_run(t_dsptask *x, int index); void dspthread_setindex(int index); +void mayer_init(void); +void mayer_term(void); static void dspthread_dorun(int index) { @@ -469,6 +471,7 @@ static void dspthread_dorun(int index) fprintf(stderr, "DSP thread %d: start\n", index); #endif dspthread_setindex(index); + mayer_init(); /* init FFT */ #ifdef MSVC_INTERLOCKED while (d_threadpool->tp_running) @@ -489,6 +492,8 @@ static void dspthread_dorun(int index) fprintf(stderr, "DSP thread %d: wake up\n", index); #endif } + + mayer_term(); /* term FFT */ #ifdef DEBUG_DSPTHREADS fprintf(stderr, "DSP thread %d: finish\n", index); #endif From 3be5d4ac3a4b763a056c0f498889c25781757c32 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Wed, 22 Dec 2021 13:37:33 +0100 Subject: [PATCH 12/32] make sys_vgui() thread-safe (mainly for posting) --- src/s_inter.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/s_inter.c b/src/s_inter.c index 020c895485..50b52daa11 100644 --- a/src/s_inter.c +++ b/src/s_inter.c @@ -125,6 +125,9 @@ struct _instanceinter t_guiqueue *i_guiqueuehead; t_binbuf *i_inbinbuf; char *i_guibuf; +#if PD_DSPTHREADS + pthread_mutex_t i_guimutex; +#endif int i_guihead; int i_guitail; int i_guisize; @@ -793,6 +796,9 @@ void sys_vgui(const char *fmt, ...) if (!sys_havegui()) return; +#if PD_DSPTHREADS + pthread_mutex_lock(&INTER->i_guimutex); +#endif if (!INTER->i_guibuf) { if (!(INTER->i_guibuf = malloc(GUI_ALLOCCHUNK))) @@ -816,6 +822,9 @@ void sys_vgui(const char *fmt, ...) { fprintf(stderr, "Pd: buffer space wasn't sufficient for long GUI string\n"); + #if PD_DSPTHREADS + pthread_mutex_unlock(&INTER->i_guimutex); + #endif return; } if (msglen >= INTER->i_guisize - INTER->i_guihead) @@ -860,6 +869,9 @@ void sys_vgui(const char *fmt, ...) } INTER->i_guihead += msglen; INTER->i_bytessincelastping += msglen; +#if PD_DSPTHREADS + pthread_mutex_unlock(&INTER->i_guimutex); +#endif } void sys_gui(const char *s) @@ -1657,6 +1669,9 @@ void s_inter_newpdinstance(void) pthread_mutex_init(&INTER->i_mutex, NULL); pd_this->pd_islocked = 0; #endif +#if PD_DSPTHREADS + pthread_mutex_init(&INTER->i_guimutex, NULL); +#endif #ifdef _WIN32 INTER->i_freq = 0; #endif @@ -1675,6 +1690,9 @@ void s_inter_free(t_instanceinter *inter) } #if PDTHREADS pthread_mutex_destroy(&INTER->i_mutex); +#endif +#if PD_DSPTHREADS + pthread_mutex_destroy(&INTER->i_guimutex); #endif freebytes(inter, sizeof(*inter)); } From f800d38d6b0752990e5e9ea8bbeada55f5f09265 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Wed, 22 Dec 2021 05:56:54 +0100 Subject: [PATCH 13/32] mark API functions as threadsafe --- src/m_pd.h | 85 +++++++++++++++++++++++++++--------------------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/src/m_pd.h b/src/m_pd.h index aaee95822e..56135b9574 100644 --- a/src/m_pd.h +++ b/src/m_pd.h @@ -111,6 +111,8 @@ typedef unsigned __int64 uint64_t; # error invalid FLOATSIZE: must be 32 or 64 #endif +#define THREADSAFE + typedef PD_LONGINTTYPE t_int; /* pointer-size integer */ typedef PD_FLOATTYPE t_float; /* a float type at most the same size */ typedef PD_FLOATTYPE t_floatarg; /* float type for function calls */ @@ -373,16 +375,15 @@ EXTERN t_symbol *binbuf_realizedollsym(t_symbol *s, int ac, const t_atom *av, /* ------------------ clocks --------------- */ EXTERN t_clock *clock_new(void *owner, t_method fn); -EXTERN void clock_set(t_clock *x, double systime); -EXTERN void clock_delay(t_clock *x, double delaytime); -EXTERN void clock_unset(t_clock *x); -EXTERN void clock_setunit(t_clock *x, double timeunit, int sampflag); -EXTERN double clock_getlogicaltime(void); -EXTERN double clock_getsystime(void); /* OBSOLETE; use clock_getlogicaltime() */ -EXTERN double clock_gettimesince(double prevsystime); -EXTERN double clock_gettimesincewithunits(double prevsystime, - double units, int sampflag); -EXTERN double clock_getsystimeafter(double delaytime); +THREADSAFE EXTERN void clock_set(t_clock *x, double systime); +THREADSAFE EXTERN void clock_delay(t_clock *x, double delaytime); +THREADSAFE EXTERN void clock_unset(t_clock *x); +THREADSAFE EXTERN void clock_setunit(t_clock *x, double timeunit, int sampflag); +THREADSAFE EXTERN double clock_getlogicaltime(void); +THREADSAFE EXTERN double clock_getsystime(void); /* OBSOLETE; use clock_getlogicaltime() */ +THREADSAFE EXTERN double clock_gettimesince(double prevsystime); +THREADSAFE EXTERN double clock_gettimesincewithunits(double prevsystime, double units, int sampflag); +THREADSAFE EXTERN double clock_getsystimeafter(double delaytime); EXTERN void clock_free(t_clock *x); /* ----------------- pure data ---------------- */ @@ -533,15 +534,15 @@ EXTERN void class_setfreefn(t_class *c, t_classfreefn fn); /* ------------ printing --------------------------------- */ -EXTERN void post(const char *fmt, ...); -EXTERN void startpost(const char *fmt, ...); -EXTERN void poststring(const char *s); -EXTERN void postfloat(t_floatarg f); -EXTERN void postatom(int argc, const t_atom *argv); -EXTERN void endpost(void); +THREADSAFE EXTERN void post(const char *fmt, ...); +THREADSAFE EXTERN void startpost(const char *fmt, ...); +THREADSAFE EXTERN void poststring(const char *s); +THREADSAFE EXTERN void postfloat(t_floatarg f); +THREADSAFE EXTERN void postatom(int argc, const t_atom *argv); +THREADSAFE EXTERN void endpost(void); -EXTERN void bug(const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); -EXTERN void pd_error(const void *object, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); +THREADSAFE EXTERN void bug(const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); +THREADSAFE EXTERN void pd_error(const void *object, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); /* for logpost(); does *not* work with verbose()! */ typedef enum { @@ -552,11 +553,11 @@ typedef enum { PD_VERBOSE } t_loglevel; -EXTERN void logpost(const void *object, int level, const char *fmt, ...) +THREADSAFE EXTERN void logpost(const void *object, int level, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(3, 4); /* deprecated, use logpost() instead. */ -EXTERN void verbose(int level, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); +THREADSAFE EXTERN void verbose(int level, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); /* ------------ system interface routines ------------------- */ @@ -625,14 +626,14 @@ EXTERN int sys_get_outchannels(void); EXTERN void dsp_add(t_perfroutine f, int n, ...); EXTERN void dsp_addv(t_perfroutine f, int n, t_int *vec); -EXTERN void pd_fft(t_float *buf, int npoints, int inverse); -EXTERN int ilog2(int n); +THREADSAFE EXTERN void pd_fft(t_float *buf, int npoints, int inverse); +THREADSAFE EXTERN int ilog2(int n); -EXTERN void mayer_fht(t_sample *fz, int n); -EXTERN void mayer_fft(int n, t_sample *real, t_sample *imag); -EXTERN void mayer_ifft(int n, t_sample *real, t_sample *imag); -EXTERN void mayer_realfft(int n, t_sample *real); -EXTERN void mayer_realifft(int n, t_sample *real); +THREADSAFE EXTERN void mayer_fht(t_sample *fz, int n); +THREADSAFE EXTERN void mayer_fft(int n, t_sample *real, t_sample *imag); +THREADSAFE EXTERN void mayer_ifft(int n, t_sample *real, t_sample *imag); +THREADSAFE EXTERN void mayer_realfft(int n, t_sample *real); +THREADSAFE EXTERN void mayer_realifft(int n, t_sample *real); EXTERN float *cos_table; #define LOGCOSTABSIZE 9 @@ -669,18 +670,18 @@ EXTERN void resamplefrom_dsp(t_resample *x, t_sample *in, int insize, int outsiz EXTERN void resampleto_dsp(t_resample *x, t_sample *out, int insize, int outsize, int method); /* ----------------------- utility functions for signals -------------- */ -EXTERN t_float mtof(t_float); -EXTERN t_float ftom(t_float); -EXTERN t_float rmstodb(t_float); -EXTERN t_float powtodb(t_float); -EXTERN t_float dbtorms(t_float); -EXTERN t_float dbtopow(t_float); - -EXTERN t_float q8_sqrt(t_float); -EXTERN t_float q8_rsqrt(t_float); +THREADSAFE EXTERN t_float mtof(t_float); +THREADSAFE EXTERN t_float ftom(t_float); +THREADSAFE EXTERN t_float rmstodb(t_float); +THREADSAFE EXTERN t_float powtodb(t_float); +THREADSAFE EXTERN t_float dbtorms(t_float); +THREADSAFE EXTERN t_float dbtopow(t_float); + +THREADSAFE EXTERN t_float q8_sqrt(t_float); +THREADSAFE EXTERN t_float q8_rsqrt(t_float); #ifndef N32 -EXTERN t_float qsqrt(t_float); /* old names kept for extern compatibility */ -EXTERN t_float qrsqrt(t_float); +THREADSAFE EXTERN t_float qsqrt(t_float); /* old names kept for extern compatibility */ +THREADSAFE EXTERN t_float qrsqrt(t_float); #endif /* --------------------- data --------------------------------- */ @@ -736,10 +737,10 @@ EXTERN int garrayref_get(t_garrayref *x, int *size, t_word **vec, t_symbol *arra /* for DSP objects: lock/unlock garray for reading/writing in the perform routine. * Returns 1 if it could get the array data and lock the garray; otherwise returns 0. * WARNING: do not attempt to unlock the garray if you could not lock it! */ -EXTERN int garrayref_write_lock(t_garrayref *x, int *size, t_word **vec); -EXTERN void garrayref_write_unlock(t_garrayref *x); -EXTERN int garrayref_read_lock(t_garrayref *x, int *size, t_word **vec); -EXTERN void garrayref_read_unlock(t_garrayref *x); +THREADSAFE EXTERN int garrayref_write_lock(t_garrayref *x, int *size, t_word **vec); +THREADSAFE EXTERN void garrayref_write_unlock(t_garrayref *x); +THREADSAFE EXTERN int garrayref_read_lock(t_garrayref *x, int *size, t_word **vec); +THREADSAFE EXTERN void garrayref_read_unlock(t_garrayref *x); EXTERN t_float *value_get(t_symbol *s); EXTERN void value_release(t_symbol *s); From ac41950fd8cf5355ea765040fe5f0b806cd2c41c Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Wed, 22 Dec 2021 13:54:57 +0100 Subject: [PATCH 14/32] add PD_PARALLEL define to m_pd.h and implement it in class_new() --- src/m_class.c | 1 + src/m_imp.h | 1 + src/m_pd.h | 21 ++++++++++++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/m_class.c b/src/m_class.c index 495bd9921a..782d9c8eca 100644 --- a/src/m_class.c +++ b/src/m_class.c @@ -499,6 +499,7 @@ t_class *class_new(t_symbol *s, t_newmethod newmethod, t_method freemethod, c->c_pwb = 0; c->c_firstin = ((flags & CLASS_NOINLET) == 0); c->c_patchable = (typeflag == CLASS_PATCHABLE); + c->c_threadsafe = (flags & CLASS_THREADSAFE) != 0; c->c_gobj = (typeflag >= CLASS_GOBJ); c->c_drawcommand = 0; c->c_floatsignalin = 0; diff --git a/src/m_imp.h b/src/m_imp.h index 96f1885379..d6d9774808 100644 --- a/src/m_imp.h +++ b/src/m_imp.h @@ -58,6 +58,7 @@ struct _class char c_firstin; /* if patchable, true if draw first inlet */ char c_drawcommand; /* a drawing command for a template */ t_classfreefn c_classfreefn; /* function to call before freeing class */ + char c_threadsafe; /* can be safely used in parallel DSP */ }; /* m_pd.c */ diff --git a/src/m_pd.h b/src/m_pd.h index 56135b9574..c3c6bf7524 100644 --- a/src/m_pd.h +++ b/src/m_pd.h @@ -111,6 +111,11 @@ typedef unsigned __int64 uint64_t; # error invalid FLOATSIZE: must be 32 or 64 #endif +/* override for parallel processing support */ +#ifndef PD_PARALLEL +#define PD_PARALLEL 1 +#endif + #define THREADSAFE typedef PD_LONGINTTYPE t_int; /* pointer-size integer */ @@ -462,14 +467,21 @@ EXTERN const t_parentwidgetbehavior *pd_getparentwidget(t_pd *x); /* -------------------- classes -------------- */ -#define CLASS_DEFAULT 0 /* flags for new classes below */ +/* flags for new classes below */ #define CLASS_PD 1 #define CLASS_GOBJ 2 #define CLASS_PATCHABLE 3 +#define CLASS_THREADSAFE 4 #define CLASS_NOINLET 8 #define CLASS_TYPEMASK 3 +#if PD_PARALLEL +# define CLASS_DEFAULT CLASS_THREADSAFE +#else +# define CLASS_DEFAULT 0 +#endif + EXTERN t_class *class_new(t_symbol *name, t_newmethod newmethod, t_method freemethod, size_t size, int flags, t_atomtype arg1, ...); @@ -737,10 +749,17 @@ EXTERN int garrayref_get(t_garrayref *x, int *size, t_word **vec, t_symbol *arra /* for DSP objects: lock/unlock garray for reading/writing in the perform routine. * Returns 1 if it could get the array data and lock the garray; otherwise returns 0. * WARNING: do not attempt to unlock the garray if you could not lock it! */ +#if PD_PARALLEL THREADSAFE EXTERN int garrayref_write_lock(t_garrayref *x, int *size, t_word **vec); THREADSAFE EXTERN void garrayref_write_unlock(t_garrayref *x); THREADSAFE EXTERN int garrayref_read_lock(t_garrayref *x, int *size, t_word **vec); THREADSAFE EXTERN void garrayref_read_unlock(t_garrayref *x); +#else +#define garrayref_write_lock(x, size, vec) garrayref_get(x, size, vec, 0, 0) +#define garrayref_write_unlock(x) +#define garrayref_read_lock(x, size, vec) garrayref_get(x, size, vec, 0, 0) +#define garrayref_read_unlock(x) +#endif /* PD_PARALLEL */ EXTERN t_float *value_get(t_symbol *s); EXTERN void value_release(t_symbol *s); From 30846acbf6238e7fe49e82270375f813aa0b8c08 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sat, 1 Jan 2022 21:38:40 +0100 Subject: [PATCH 15/32] use CLASS_DEFAULT for all DSP objects expands to CLASS_THREADSAFE if PD_PARALLEL is 1 --- doc/6.externs/dspobj~.c | 2 +- extra/bob~/bob~.c | 2 +- extra/bonk~/bonk~.c | 2 +- extra/fiddle~/fiddle~.c | 2 +- extra/loop~/loop~.c | 2 +- extra/lrshift~/lrshift~.c | 6 +++--- extra/pd~/pd~.c | 2 +- extra/sigmund~/sigmund~.c | 2 +- src/d_arithmetic.c | 24 ++++++++++++------------ src/d_array.c | 14 +++++++------- src/d_ctl.c | 14 +++++++------- src/d_dac.c | 4 ++-- src/d_delay.c | 6 +++--- src/d_fft.c | 10 +++++----- src/d_filter.c | 34 +++++++++++++++++----------------- src/d_global.c | 9 +++++---- src/d_math.c | 28 ++++++++++++++-------------- src/d_misc.c | 4 ++-- src/d_osc.c | 10 +++++----- src/d_soundfile.c | 4 ++-- src/d_ugen.c | 2 +- src/g_canvas.c | 2 +- src/g_clone.c | 2 +- src/g_io.c | 4 ++-- src/x_array.c | 2 +- src/x_scalar.c | 2 +- src/x_vexp_if.c | 4 ++-- 27 files changed, 100 insertions(+), 99 deletions(-) diff --git a/doc/6.externs/dspobj~.c b/doc/6.externs/dspobj~.c index 92da4d3654..c5abb1c5d9 100644 --- a/doc/6.externs/dspobj~.c +++ b/doc/6.externs/dspobj~.c @@ -58,7 +58,7 @@ static void *dspobj_new(void) void dspobj_tilde_setup(void) { dspobj_class = class_new(gensym("dspobj~"), (t_newmethod)dspobj_new, 0, - sizeof(t_dspobj), 0, A_DEFFLOAT, 0); + sizeof(t_dspobj), CLASS_DEFAULT, A_DEFFLOAT, 0); /* this is magic to declare that the leftmost, "main" inlet takes signals; other signal inlets are done differently... */ CLASS_MAINSIGNALIN(dspobj_class, t_dspobj, x_f); diff --git a/extra/bob~/bob~.c b/extra/bob~/bob~.c index 0e99e39170..89609e766d 100644 --- a/extra/bob~/bob~.c +++ b/extra/bob~/bob~.c @@ -244,7 +244,7 @@ void bob_tilde_setup(void) { int i; bob_class = class_new(gensym("bob~"), - (t_newmethod)bob_new, 0, sizeof(t_bob), 0, 0); + (t_newmethod)bob_new, 0, sizeof(t_bob), CLASS_DEFAULT, 0); class_addmethod(bob_class, (t_method)bob_saturation, gensym("saturation"), A_FLOAT, 0); class_addmethod(bob_class, (t_method)bob_oversample, gensym("oversample"), diff --git a/extra/bonk~/bonk~.c b/extra/bonk~/bonk~.c index 30191a19ef..3b8e96dbb6 100644 --- a/extra/bonk~/bonk~.c +++ b/extra/bonk~/bonk~.c @@ -1355,7 +1355,7 @@ static void *bonk_new(t_symbol *s, int argc, t_atom *argv) void bonk_tilde_setup(void) { bonk_class = class_new(gensym("bonk~"), (t_newmethod)bonk_new, - (t_method)bonk_free, sizeof(t_bonk), 0, A_GIMME, 0); + (t_method)bonk_free, sizeof(t_bonk), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(bonk_class, nullfn, gensym("signal"), 0); class_addmethod(bonk_class, (t_method)bonk_dsp, gensym("dsp"), A_CANT, 0); class_addbang(bonk_class, bonk_bang); diff --git a/extra/fiddle~/fiddle~.c b/extra/fiddle~/fiddle~.c index 50016565a4..2fd585f73d 100644 --- a/extra/fiddle~/fiddle~.c +++ b/extra/fiddle~/fiddle~.c @@ -1498,7 +1498,7 @@ void *sigfiddle_new(t_floatarg npoints, t_floatarg npitch, void fiddle_tilde_setup(void) { sigfiddle_class = class_new(gensym("fiddle~"), (t_newmethod)sigfiddle_new, - (t_method)sigfiddle_ff, sizeof(t_sigfiddle), 0, + (t_method)sigfiddle_ff, sizeof(t_sigfiddle), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addmethod(sigfiddle_class, (t_method)sigfiddle_dsp, gensym("dsp"), 0); diff --git a/extra/loop~/loop~.c b/extra/loop~/loop~.c index 3528a56fb4..f9d97c4964 100644 --- a/extra/loop~/loop~.c +++ b/extra/loop~/loop~.c @@ -158,7 +158,7 @@ static void loop_bang(t_loop *x) void loop_tilde_setup(void) { loop_class = class_new(gensym("loop~"), (t_newmethod)loop_new, 0, - sizeof(t_loop), 0, 0); + sizeof(t_loop), CLASS_DEFAULT, 0); class_addmethod(loop_class, (t_method)loop_dsp, gensym("dsp"), A_CANT, 0); CLASS_MAINSIGNALIN(loop_class, t_loop, x_f); class_addmethod(loop_class, (t_method)loop_set, gensym("set"), diff --git a/extra/lrshift~/lrshift~.c b/extra/lrshift~/lrshift~.c index 377b43fb8a..61a6b9119b 100644 --- a/extra/lrshift~/lrshift~.c +++ b/extra/lrshift~/lrshift~.c @@ -68,9 +68,9 @@ static void *lrshift_tilde_new(t_floatarg f) void lrshift_tilde_setup(void) { lrshift_tilde_class = class_new(gensym("lrshift~"), - (t_newmethod)lrshift_tilde_new, 0, sizeof(t_lrshift_tilde), 0, - A_DEFFLOAT, 0); + (t_newmethod)lrshift_tilde_new, 0, sizeof(t_lrshift_tilde), + CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(lrshift_tilde_class, t_lrshift_tilde, x_f); class_addmethod(lrshift_tilde_class, (t_method)lrshift_tilde_dsp, - gensym("dsp"), 0); + gensym("dsp"), A_CANT, 0); } diff --git a/extra/pd~/pd~.c b/extra/pd~/pd~.c index 84b613af69..071d9c8c05 100644 --- a/extra/pd~/pd~.c +++ b/extra/pd~/pd~.c @@ -1179,7 +1179,7 @@ static void *pd_tilde_new(t_symbol *s, int argc, t_atom *argv) void pd_tilde_setup(void) { pd_tilde_class = class_new(gensym("pd~"), (t_newmethod)pd_tilde_new, - (t_method)pd_tilde_free, sizeof(t_pd_tilde), 0, A_GIMME, 0); + (t_method)pd_tilde_free, sizeof(t_pd_tilde), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(pd_tilde_class, nullfn, gensym("signal"), 0); class_addmethod(pd_tilde_class, (t_method)pd_tilde_dsp, gensym("dsp"), A_CANT, 0); diff --git a/extra/sigmund~/sigmund~.c b/extra/sigmund~/sigmund~.c index 89994eaf6c..3a1ca1316a 100644 --- a/extra/sigmund~/sigmund~.c +++ b/extra/sigmund~/sigmund~.c @@ -1418,7 +1418,7 @@ static void sigmund_printnext(t_sigmund *x, t_float f) void sigmund_tilde_setup(void) { sigmund_class = class_new(gensym("sigmund~"), (t_newmethod)sigmund_new, - (t_method)sigmund_free, sizeof(t_sigmund), 0, A_GIMME, 0); + (t_method)sigmund_free, sizeof(t_sigmund), CLASS_DEFAULT, A_GIMME, 0); class_addlist(sigmund_class, sigmund_list); class_addmethod(sigmund_class, (t_method)sigmund_dsp, gensym("dsp"), A_CANT, 0); diff --git a/src/d_arithmetic.c b/src/d_arithmetic.c index 78b573ebc0..6857196fef 100644 --- a/src/d_arithmetic.c +++ b/src/d_arithmetic.c @@ -93,12 +93,12 @@ static void scalarplus_dsp(t_scalarplus *x, t_signal **sp) static void plus_setup(void) { plus_class = class_new(gensym("+~"), (t_newmethod)plus_new, 0, - sizeof(t_plus), 0, A_GIMME, 0); + sizeof(t_plus), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(plus_class, (t_method)plus_dsp, gensym("dsp"), A_CANT, 0); CLASS_MAINSIGNALIN(plus_class, t_plus, x_f); class_sethelpsymbol(plus_class, gensym("binops-tilde")); scalarplus_class = class_new(gensym("+~"), 0, 0, - sizeof(t_scalarplus), 0, 0); + sizeof(t_scalarplus), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarplus_class, t_scalarplus, x_f); class_addmethod(scalarplus_class, (t_method)scalarplus_dsp, gensym("dsp"), A_CANT, 0); @@ -223,12 +223,12 @@ static void scalarminus_dsp(t_scalarminus *x, t_signal **sp) static void minus_setup(void) { minus_class = class_new(gensym("-~"), (t_newmethod)minus_new, 0, - sizeof(t_minus), 0, A_GIMME, 0); + sizeof(t_minus), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(minus_class, t_minus, x_f); class_addmethod(minus_class, (t_method)minus_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(minus_class, gensym("sigbinops")); scalarminus_class = class_new(gensym("-~"), 0, 0, - sizeof(t_scalarminus), 0, 0); + sizeof(t_scalarminus), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarminus_class, t_scalarminus, x_f); class_addmethod(scalarminus_class, (t_method)scalarminus_dsp, gensym("dsp"), A_CANT, 0); @@ -354,12 +354,12 @@ static void scalartimes_dsp(t_scalartimes *x, t_signal **sp) static void times_setup(void) { times_class = class_new(gensym("*~"), (t_newmethod)times_new, 0, - sizeof(t_times), 0, A_GIMME, 0); + sizeof(t_times), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(times_class, t_times, x_f); class_addmethod(times_class, (t_method)times_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(times_class, gensym("sigbinops")); scalartimes_class = class_new(gensym("*~"), 0, 0, - sizeof(t_scalartimes), 0, 0); + sizeof(t_scalartimes), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalartimes_class, t_scalartimes, x_f); class_addmethod(scalartimes_class, (t_method)scalartimes_dsp, gensym("dsp"), A_CANT, 0); @@ -496,12 +496,12 @@ static void scalarover_dsp(t_scalarover *x, t_signal **sp) static void over_setup(void) { over_class = class_new(gensym("/~"), (t_newmethod)over_new, 0, - sizeof(t_over), 0, A_GIMME, 0); + sizeof(t_over), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(over_class, t_over, x_f); class_addmethod(over_class, (t_method)over_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(over_class, gensym("sigbinops")); scalarover_class = class_new(gensym("/~"), 0, 0, - sizeof(t_scalarover), 0, 0); + sizeof(t_scalarover), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarover_class, t_scalarover, x_f); class_addmethod(scalarover_class, (t_method)scalarover_dsp, gensym("dsp"), A_CANT, 0); @@ -638,12 +638,12 @@ static void scalarmax_dsp(t_scalarmax *x, t_signal **sp) static void max_setup(void) { max_class = class_new(gensym("max~"), (t_newmethod)max_new, 0, - sizeof(t_max), 0, A_GIMME, 0); + sizeof(t_max), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(max_class, t_max, x_f); class_addmethod(max_class, (t_method)max_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(max_class, gensym("sigbinops")); scalarmax_class = class_new(gensym("max~"), 0, 0, - sizeof(t_scalarmax), 0, 0); + sizeof(t_scalarmax), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarmax_class, t_scalarmax, x_f); class_addmethod(scalarmax_class, (t_method)scalarmax_dsp, gensym("dsp"), A_CANT, 0); @@ -780,12 +780,12 @@ static void scalarmin_dsp(t_scalarmin *x, t_signal **sp) static void min_setup(void) { min_class = class_new(gensym("min~"), (t_newmethod)min_new, 0, - sizeof(t_min), 0, A_GIMME, 0); + sizeof(t_min), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(min_class, t_min, x_f); class_addmethod(min_class, (t_method)min_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(min_class, gensym("sigbinops")); scalarmin_class = class_new(gensym("min~"), 0, 0, - sizeof(t_scalarmin), 0, 0); + sizeof(t_scalarmin), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarmin_class, t_scalarmin, x_f); class_addmethod(scalarmin_class, (t_method)scalarmin_dsp, gensym("dsp"), A_CANT, 0); diff --git a/src/d_array.c b/src/d_array.c index 1c617cb340..68bb86e8ce 100644 --- a/src/d_array.c +++ b/src/d_array.c @@ -116,7 +116,7 @@ static void tabwrite_tilde_setup(void) { tabwrite_tilde_class = class_new(gensym("tabwrite~"), (t_newmethod)tabwrite_tilde_new, (t_method)tabwrite_tilde_free, - sizeof(t_tabwrite_tilde), 0, A_DEFSYM, 0); + sizeof(t_tabwrite_tilde), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabwrite_tilde_class, t_tabwrite_tilde, x_f); class_addmethod(tabwrite_tilde_class, (t_method)tabwrite_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -243,7 +243,7 @@ static void tabplay_tilde_setup(void) { tabplay_tilde_class = class_new(gensym("tabplay~"), (t_newmethod)tabplay_tilde_new, (t_method)tabplay_tilde_free, - sizeof(t_tabplay_tilde), 0, A_DEFSYM, 0); + sizeof(t_tabplay_tilde), CLASS_DEFAULT, A_DEFSYM, 0); class_addmethod(tabplay_tilde_class, (t_method)tabplay_tilde_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(tabplay_tilde_class, (t_method)tabplay_tilde_stop, @@ -334,7 +334,7 @@ static void tabread_tilde_setup(void) { tabread_tilde_class = class_new(gensym("tabread~"), (t_newmethod)tabread_tilde_new, (t_method)tabread_tilde_free, - sizeof(t_tabread_tilde), 0, A_DEFSYM, 0); + sizeof(t_tabread_tilde), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabread_tilde_class, t_tabread_tilde, x_f); class_addmethod(tabread_tilde_class, (t_method)tabread_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -455,7 +455,7 @@ static void tabread4_tilde_setup(void) { tabread4_tilde_class = class_new(gensym("tabread4~"), (t_newmethod)tabread4_tilde_new, (t_method)tabread4_tilde_free, - sizeof(t_tabread4_tilde), 0, A_DEFSYM, 0); + sizeof(t_tabread4_tilde), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabread4_tilde_class, t_tabread4_tilde, x_f); class_addmethod(tabread4_tilde_class, (t_method)tabread4_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -644,7 +644,7 @@ static void tabosc4_tilde_setup(void) { tabosc4_tilde_class = class_new(gensym("tabosc4~"), (t_newmethod)tabosc4_tilde_new, (t_method)tabosc4_tilde_free, - sizeof(t_tabosc4_tilde), 0, A_DEFSYM, 0); + sizeof(t_tabosc4_tilde), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabosc4_tilde_class, t_tabosc4_tilde, x_f); class_addmethod(tabosc4_tilde_class, (t_method)tabosc4_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -739,7 +739,7 @@ static void tabsend_dsp(t_tabsend *x, t_signal **sp) static void tabsend_setup(void) { tabsend_class = class_new(gensym("tabsend~"), (t_newmethod)tabsend_new, - (t_method)tabsend_free, sizeof(t_tabsend), 0, A_DEFSYM, 0); + (t_method)tabsend_free, sizeof(t_tabsend), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabsend_class, t_tabsend, x_f); class_addmethod(tabsend_class, (t_method)tabsend_dsp, gensym("dsp"), A_CANT, 0); @@ -813,7 +813,7 @@ static void tabreceive_setup(void) { tabreceive_class = class_new(gensym("tabreceive~"), (t_newmethod)tabreceive_new, (t_method)tabreceive_free, - sizeof(t_tabreceive), 0, A_DEFSYM, 0); + sizeof(t_tabreceive), CLASS_DEFAULT, A_DEFSYM, 0); class_addmethod(tabreceive_class, (t_method)tabreceive_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(tabreceive_class, (t_method)tabreceive_set, diff --git a/src/d_ctl.c b/src/d_ctl.c index 9802f958ad..56da396e58 100644 --- a/src/d_ctl.c +++ b/src/d_ctl.c @@ -39,7 +39,7 @@ static void *sig_tilde_new(t_floatarg f) static void sig_tilde_setup(void) { sig_tilde_class = class_new(gensym("sig~"), (t_newmethod)sig_tilde_new, 0, - sizeof(t_sig), 0, A_DEFFLOAT, 0); + sizeof(t_sig), CLASS_DEFAULT, A_DEFFLOAT, 0); class_addfloat(sig_tilde_class, (t_method)sig_tilde_float); class_addmethod(sig_tilde_class, (t_method)sig_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -180,7 +180,7 @@ static void *line_tilde_new(void) static void line_tilde_setup(void) { line_tilde_class = class_new(gensym("line~"), line_tilde_new, 0, - sizeof(t_line), 0, 0); + sizeof(t_line), CLASS_DEFAULT, 0); class_addfloat(line_tilde_class, (t_method)line_tilde_float); class_addmethod(line_tilde_class, (t_method)line_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -374,7 +374,7 @@ static void *vline_tilde_new(void) static void vline_tilde_setup(void) { vline_tilde_class = class_new(gensym("vline~"), vline_tilde_new, - (t_method)vline_tilde_stop, sizeof(t_vline), 0, 0); + (t_method)vline_tilde_stop, sizeof(t_vline), CLASS_DEFAULT, 0); class_addfloat(vline_tilde_class, (t_method)vline_tilde_float); class_addmethod(vline_tilde_class, (t_method)vline_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -428,7 +428,7 @@ static void snapshot_tilde_set(t_snapshot *x, t_floatarg f) static void snapshot_tilde_setup(void) { snapshot_tilde_class = class_new(gensym("snapshot~"), snapshot_tilde_new, 0, - sizeof(t_snapshot), 0, 0); + sizeof(t_snapshot), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(snapshot_tilde_class, t_snapshot, x_f); class_addmethod(snapshot_tilde_class, (t_method)snapshot_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -516,7 +516,7 @@ static void vsnapshot_tilde_setup(void) { vsnapshot_tilde_class = class_new(gensym("vsnapshot~"), vsnapshot_tilde_new, (t_method)vsnapshot_tilde_ff, - sizeof(t_vsnapshot), 0, 0); + sizeof(t_vsnapshot), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(vsnapshot_tilde_class, t_vsnapshot, x_f); class_addmethod(vsnapshot_tilde_class, (t_method)vsnapshot_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -655,7 +655,7 @@ static void env_tilde_ff(t_sigenv *x) /* cleanup on free */ void env_tilde_setup(void) { env_tilde_class = class_new(gensym("env~"), (t_newmethod)env_tilde_new, - (t_method)env_tilde_ff, sizeof(t_sigenv), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + (t_method)env_tilde_ff, sizeof(t_sigenv), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(env_tilde_class, t_sigenv, x_f); class_addmethod(env_tilde_class, (t_method)env_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -785,7 +785,7 @@ static void threshold_tilde_setup(void) { threshold_tilde_class = class_new(gensym("threshold~"), (t_newmethod)threshold_tilde_new, (t_method)threshold_tilde_ff, - sizeof(t_threshold_tilde), 0, + sizeof(t_threshold_tilde), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(threshold_tilde_class, t_threshold_tilde, x_f); class_addmethod(threshold_tilde_class, (t_method)threshold_tilde_set, diff --git a/src/d_dac.c b/src/d_dac.c index b5aac6c71a..c814abb25d 100644 --- a/src/d_dac.c +++ b/src/d_dac.c @@ -111,7 +111,7 @@ static void dac_free(t_dac *x) static void dac_setup(void) { dac_class = class_new(gensym("dac~"), (t_newmethod)dac_new, - (t_method)dac_free, sizeof(t_dac), 0, A_GIMME, 0); + (t_method)dac_free, sizeof(t_dac), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(dac_class, t_dac, x_f); class_addmethod(dac_class, (t_method)dac_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(dac_class, (t_method)dac_set, gensym("set"), A_GIMME, 0); @@ -181,7 +181,7 @@ static void adc_free(t_adc *x) static void adc_setup(void) { adc_class = class_new(gensym("adc~"), (t_newmethod)adc_new, - (t_method)adc_free, sizeof(t_adc), 0, A_GIMME, 0); + (t_method)adc_free, sizeof(t_adc), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(adc_class, (t_method)adc_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(adc_class, (t_method)adc_set, gensym("set"), A_GIMME, 0); class_sethelpsymbol(adc_class, gensym("adc~_dac~")); diff --git a/src/d_delay.c b/src/d_delay.c index 7b2dd20708..b603e2f817 100644 --- a/src/d_delay.c +++ b/src/d_delay.c @@ -163,7 +163,7 @@ static void sigdelwrite_setup(void) { sigdelwrite_class = class_new(gensym("delwrite~"), (t_newmethod)sigdelwrite_new, (t_method)sigdelwrite_free, - sizeof(t_sigdelwrite), 0, A_DEFSYM, A_DEFFLOAT, 0); + sizeof(t_sigdelwrite), CLASS_DEFAULT, A_DEFSYM, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigdelwrite_class, t_sigdelwrite, x_f); class_addmethod(sigdelwrite_class, (t_method)sigdelwrite_dsp, gensym("dsp"), A_CANT, 0); @@ -263,7 +263,7 @@ static void sigdelread_setup(void) { sigdelread_class = class_new(gensym("delread~"), (t_newmethod)sigdelread_new, 0, - sizeof(t_sigdelread), 0, A_DEFSYM, A_DEFFLOAT, 0); + sizeof(t_sigdelread), CLASS_DEFAULT, A_DEFSYM, A_DEFFLOAT, 0); class_addmethod(sigdelread_class, (t_method)sigdelread_dsp, gensym("dsp"), A_CANT, 0); class_addfloat(sigdelread_class, (t_method)sigdelread_float); @@ -368,7 +368,7 @@ static void sigvd_dsp(t_sigvd *x, t_signal **sp) static void sigvd_setup(void) { sigvd_class = class_new(gensym("delread4~"), (t_newmethod)sigvd_new, 0, - sizeof(t_sigvd), 0, A_DEFSYM, 0); + sizeof(t_sigvd), CLASS_DEFAULT, A_DEFSYM, 0); class_addcreator((t_newmethod)sigvd_new, gensym("vd~"), A_DEFSYM, 0); class_addmethod(sigvd_class, (t_method)sigvd_dsp, gensym("dsp"), A_CANT, 0); CLASS_MAINSIGNALIN(sigvd_class, t_sigvd, x_f); diff --git a/src/d_fft.c b/src/d_fft.c index 9b1a1f25a0..a8f3ecd38f 100644 --- a/src/d_fft.c +++ b/src/d_fft.c @@ -134,7 +134,7 @@ static void sigifft_dsp(t_sigfft *x, t_signal **sp) static void sigfft_setup(void) { sigfft_class = class_new(gensym("fft~"), sigfft_new, 0, - sizeof(t_sigfft), 0, 0); + sizeof(t_sigfft), CLASS_DEFAULT, 0); class_setfreefn(sigfft_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigfft_class, t_sigfft, x_f); class_addmethod(sigfft_class, (t_method)sigfft_dsp, @@ -142,7 +142,7 @@ static void sigfft_setup(void) mayer_init(); sigifft_class = class_new(gensym("ifft~"), sigifft_new, 0, - sizeof(t_sigfft), 0, 0); + sizeof(t_sigfft), CLASS_DEFAULT, 0); class_setfreefn(sigifft_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigifft_class, t_sigfft, x_f); class_addmethod(sigifft_class, (t_method)sigifft_dsp, @@ -202,7 +202,7 @@ static void sigrfft_dsp(t_sigrfft *x, t_signal **sp) static void sigrfft_setup(void) { sigrfft_class = class_new(gensym("rfft~"), sigrfft_new, 0, - sizeof(t_sigrfft), 0, 0); + sizeof(t_sigrfft), CLASS_DEFAULT, 0); class_setfreefn(sigrfft_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigrfft_class, t_sigrfft, x_f); class_addmethod(sigrfft_class, (t_method)sigrfft_dsp, @@ -265,7 +265,7 @@ static void sigrifft_dsp(t_sigrifft *x, t_signal **sp) static void sigrifft_setup(void) { sigrifft_class = class_new(gensym("rifft~"), sigrifft_new, 0, - sizeof(t_sigrifft), 0, 0); + sizeof(t_sigrifft), CLASS_DEFAULT, 0); class_setfreefn(sigrifft_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigrifft_class, t_sigrifft, x_f); class_addmethod(sigrifft_class, (t_method)sigrifft_dsp, @@ -358,7 +358,7 @@ static void sigframp_dsp(t_sigframp *x, t_signal **sp) static void sigframp_setup(void) { sigframp_class = class_new(gensym("framp~"), sigframp_new, 0, - sizeof(t_sigframp), 0, 0); + sizeof(t_sigframp), CLASS_DEFAULT, 0); class_setfreefn(sigframp_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigframp_class, t_sigframp, x_f); class_addmethod(sigframp_class, (t_method)sigframp_dsp, diff --git a/src/d_filter.c b/src/d_filter.c index bdcc87a43f..cc1a04e034 100644 --- a/src/d_filter.c +++ b/src/d_filter.c @@ -127,7 +127,7 @@ static void sighip_clear(t_sighip *x, t_floatarg q) void sighip_setup(void) { sighip_class = class_new(gensym("hip~"), (t_newmethod)sighip_new, 0, - sizeof(t_sighip), 0, A_DEFFLOAT, 0); + sizeof(t_sighip), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sighip_class, t_sighip, x_f); class_addmethod(sighip_class, (t_method)sighip_dsp, gensym("dsp"), A_CANT, 0); @@ -215,7 +215,7 @@ static void siglop_dsp(t_siglop *x, t_signal **sp) void siglop_setup(void) { siglop_class = class_new(gensym("lop~"), (t_newmethod)siglop_new, 0, - sizeof(t_siglop), 0, A_DEFFLOAT, 0); + sizeof(t_siglop), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(siglop_class, t_siglop, x_f); class_addmethod(siglop_class, (t_method)siglop_dsp, gensym("dsp"), A_CANT, 0); @@ -347,7 +347,7 @@ static void sigbp_dsp(t_sigbp *x, t_signal **sp) void sigbp_setup(void) { sigbp_class = class_new(gensym("bp~"), (t_newmethod)sigbp_new, 0, - sizeof(t_sigbp), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_sigbp), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigbp_class, t_sigbp, x_f); class_addmethod(sigbp_class, (t_method)sigbp_dsp, gensym("dsp"), A_CANT, 0); @@ -471,7 +471,7 @@ static void sigbiquad_dsp(t_sigbiquad *x, t_signal **sp) void sigbiquad_setup(void) { sigbiquad_class = class_new(gensym("biquad~"), (t_newmethod)sigbiquad_new, - 0, sizeof(t_sigbiquad), 0, A_GIMME, 0); + 0, sizeof(t_sigbiquad), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(sigbiquad_class, t_sigbiquad, x_f); class_addmethod(sigbiquad_class, (t_method)sigbiquad_dsp, gensym("dsp"), A_CANT, 0); @@ -549,7 +549,7 @@ static void sigsamphold_set(t_sigsamphold *x, t_float f) void sigsamphold_setup(void) { sigsamphold_class = class_new(gensym("samphold~"), - (t_newmethod)sigsamphold_new, 0, sizeof(t_sigsamphold), 0, 0); + (t_newmethod)sigsamphold_new, 0, sizeof(t_sigsamphold), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(sigsamphold_class, t_sigsamphold, x_f); class_addmethod(sigsamphold_class, (t_method)sigsamphold_set, gensym("set"), A_DEFFLOAT, 0); @@ -621,8 +621,8 @@ static void sigrpole_set(t_sigrpole *x, t_float f) void sigrpole_setup(void) { - sigrpole_class = class_new(gensym("rpole~"), - (t_newmethod)sigrpole_new, 0, sizeof(t_sigrpole), 0, A_DEFFLOAT, 0); + sigrpole_class = class_new(gensym("rpole~"), (t_newmethod)sigrpole_new, + 0, sizeof(t_sigrpole), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigrpole_class, t_sigrpole, x_f); class_addmethod(sigrpole_class, (t_method)sigrpole_set, gensym("set"), A_DEFFLOAT, 0); @@ -693,8 +693,8 @@ static void sigrzero_set(t_sigrzero *x, t_float f) void sigrzero_setup(void) { - sigrzero_class = class_new(gensym("rzero~"), - (t_newmethod)sigrzero_new, 0, sizeof(t_sigrzero), 0, A_DEFFLOAT, 0); + sigrzero_class = class_new(gensym("rzero~"), (t_newmethod)sigrzero_new, + 0, sizeof(t_sigrzero), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigrzero_class, t_sigrzero, x_f); class_addmethod(sigrzero_class, (t_method)sigrzero_set, gensym("set"), A_DEFFLOAT, 0); @@ -767,7 +767,7 @@ void sigrzero_rev_setup(void) { sigrzero_rev_class = class_new(gensym("rzero_rev~"), (t_newmethod)sigrzero_rev_new, 0, sizeof(t_sigrzero_rev), - 0, A_DEFFLOAT, 0); + CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigrzero_rev_class, t_sigrzero_rev, x_f); class_addmethod(sigrzero_rev_class, (t_method)sigrzero_rev_set, gensym("set"), A_DEFFLOAT, 0); @@ -859,8 +859,8 @@ static void sigcpole_set(t_sigcpole *x, t_float re, t_float im) void sigcpole_setup(void) { sigcpole_class = class_new(gensym("cpole~"), - (t_newmethod)sigcpole_new, 0, sizeof(t_sigcpole), 0, - A_DEFFLOAT, A_DEFFLOAT, 0); + (t_newmethod)sigcpole_new, 0, sizeof(t_sigcpole), + CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigcpole_class, t_sigcpole, x_f); class_addmethod(sigcpole_class, (t_method)sigcpole_set, gensym("set"), A_DEFFLOAT, A_DEFFLOAT, 0); @@ -949,8 +949,8 @@ static void sigczero_set(t_sigczero *x, t_float re, t_float im) void sigczero_setup(void) { sigczero_class = class_new(gensym("czero~"), - (t_newmethod)sigczero_new, 0, sizeof(t_sigczero), 0, - A_DEFFLOAT, A_DEFFLOAT, 0); + (t_newmethod)sigczero_new, 0, sizeof(t_sigczero), + CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigczero_class, t_sigczero, x_f); class_addmethod(sigczero_class, (t_method)sigczero_set, gensym("set"), A_DEFFLOAT, A_DEFFLOAT, 0); @@ -1041,8 +1041,8 @@ static void sigczero_rev_set(t_sigczero_rev *x, t_float re, t_float im) void sigczero_rev_setup(void) { sigczero_rev_class = class_new(gensym("czero_rev~"), - (t_newmethod)sigczero_rev_new, 0, sizeof(t_sigczero_rev), 0, - A_DEFFLOAT, A_DEFFLOAT, 0); + (t_newmethod)sigczero_rev_new, 0, sizeof(t_sigczero_rev), + CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigczero_rev_class, t_sigczero_rev, x_f); class_addmethod(sigczero_rev_class, (t_method)sigczero_rev_set, gensym("set"), A_DEFFLOAT, A_DEFFLOAT, 0); @@ -1151,7 +1151,7 @@ static void slop_tilde_dsp(t_slop_tilde *x, t_signal **sp) void slop_tilde_setup(void) { slop_tilde_class = class_new(gensym("slop~"), (t_newmethod)slop_tilde_new, 0, - sizeof(t_slop_tilde), 0, A_GIMME, 0); + sizeof(t_slop_tilde), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(slop_tilde_class, t_slop_tilde, x_f); class_addmethod(slop_tilde_class, (t_method)slop_tilde_dsp, gensym("dsp"), A_CANT, 0); diff --git a/src/d_global.c b/src/d_global.c index 4e80c3d2f8..860000bd46 100644 --- a/src/d_global.c +++ b/src/d_global.c @@ -86,7 +86,7 @@ static void sigsend_free(t_sigsend *x) static void sigsend_setup(void) { sigsend_class = class_new(gensym("send~"), (t_newmethod)sigsend_new, - (t_method)sigsend_free, sizeof(t_sigsend), 0, A_DEFSYM, 0); + (t_method)sigsend_free, sizeof(t_sigsend), CLASS_DEFAULT, A_DEFSYM, 0); class_addcreator((t_newmethod)sigsend_new, gensym("s~"), A_DEFSYM, 0); CLASS_MAINSIGNALIN(sigsend_class, t_sigsend, x_f); class_addmethod(sigsend_class, (t_method)sigsend_dsp, @@ -220,7 +220,7 @@ static void sigreceive_setup(void) { sigreceive_class = class_new(gensym("receive~"), (t_newmethod)sigreceive_new, 0, - sizeof(t_sigreceive), 0, A_DEFSYM, 0); + sizeof(t_sigreceive), CLASS_DEFAULT, A_DEFSYM, 0); class_addcreator((t_newmethod)sigreceive_new, gensym("r~"), A_DEFSYM, 0); class_addmethod(sigreceive_class, (t_method)sigreceive_set, gensym("set"), A_SYMBOL, 0); @@ -312,7 +312,8 @@ static void sigcatch_free(t_sigcatch *x) static void sigcatch_setup(void) { sigcatch_class = class_new(gensym("catch~"), (t_newmethod)sigcatch_new, - (t_method)sigcatch_free, sizeof(t_sigcatch), CLASS_NOINLET, A_DEFSYM, 0); + (t_method)sigcatch_free, sizeof(t_sigcatch), + CLASS_THREADSAFE | CLASS_NOINLET, A_DEFSYM, 0); class_addmethod(sigcatch_class, (t_method)sigcatch_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(sigcatch_class, gensym("throw~-catch~")); @@ -408,7 +409,7 @@ static void sigthrow_dsp(t_sigthrow *x, t_signal **sp) static void sigthrow_setup(void) { sigthrow_class = class_new(gensym("throw~"), (t_newmethod)sigthrow_new, 0, - sizeof(t_sigthrow), 0, A_DEFSYM, 0); + sizeof(t_sigthrow), CLASS_DEFAULT, A_DEFSYM, 0); class_addmethod(sigthrow_class, (t_method)sigthrow_set, gensym("set"), A_SYMBOL, 0); CLASS_MAINSIGNALIN(sigthrow_class, t_sigthrow, x_f); diff --git a/src/d_math.c b/src/d_math.c index 244fd93c0f..c33b571409 100644 --- a/src/d_math.c +++ b/src/d_math.c @@ -58,7 +58,7 @@ static void clip_dsp(t_clip *x, t_signal **sp) static void clip_setup(void) { clip_class = class_new(gensym("clip~"), (t_newmethod)clip_new, 0, - sizeof(t_clip), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_clip), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(clip_class, t_clip, x_f); class_addmethod(clip_class, (t_method)clip_dsp, gensym("dsp"), A_CANT, 0); } @@ -177,7 +177,7 @@ static void sigrsqrt_dsp(t_sigrsqrt *x, t_signal **sp) void sigrsqrt_setup(void) { sigrsqrt_class = class_new(gensym("rsqrt~"), (t_newmethod)sigrsqrt_new, 0, - sizeof(t_sigrsqrt), 0, 0); + sizeof(t_sigrsqrt), CLASS_DEFAULT, 0); /* an old name for it: */ class_addcreator(sigrsqrt_new, gensym("q8_rsqrt~"), 0); CLASS_MAINSIGNALIN(sigrsqrt_class, t_sigrsqrt, x_f); @@ -236,7 +236,7 @@ static void sigsqrt_dsp(t_sigsqrt *x, t_signal **sp) void sigsqrt_setup(void) { sigsqrt_class = class_new(gensym("sqrt~"), (t_newmethod)sigsqrt_new, 0, - sizeof(t_sigsqrt), 0, 0); + sizeof(t_sigsqrt), CLASS_DEFAULT, 0); class_addcreator(sigsqrt_new, gensym("q8_sqrt~"), 0); /* old name */ CLASS_MAINSIGNALIN(sigsqrt_class, t_sigsqrt, x_f); class_addmethod(sigsqrt_class, (t_method)sigsqrt_dsp, @@ -302,7 +302,7 @@ static void sigwrap_dsp(t_sigwrap *x, t_signal **sp) void sigwrap_setup(void) { sigwrap_class = class_new(gensym("wrap~"), (t_newmethod)sigwrap_new, 0, - sizeof(t_sigwrap), 0, 0); + sizeof(t_sigwrap), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(sigwrap_class, t_sigwrap, x_f); class_addmethod(sigwrap_class, (t_method)sigwrap_dsp, gensym("dsp"), A_CANT, 0); @@ -351,7 +351,7 @@ static void mtof_tilde_dsp(t_mtof_tilde *x, t_signal **sp) void mtof_tilde_setup(void) { mtof_tilde_class = class_new(gensym("mtof~"), (t_newmethod)mtof_tilde_new, 0, - sizeof(t_mtof_tilde), 0, 0); + sizeof(t_mtof_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(mtof_tilde_class, t_mtof_tilde, x_f); class_addmethod(mtof_tilde_class, (t_method)mtof_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -395,7 +395,7 @@ static void ftom_tilde_dsp(t_ftom_tilde *x, t_signal **sp) void ftom_tilde_setup(void) { ftom_tilde_class = class_new(gensym("ftom~"), (t_newmethod)ftom_tilde_new, 0, - sizeof(t_ftom_tilde), 0, 0); + sizeof(t_ftom_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(ftom_tilde_class, t_ftom_tilde, x_f); class_addmethod(ftom_tilde_class, (t_method)ftom_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -445,7 +445,7 @@ static void dbtorms_tilde_dsp(t_dbtorms_tilde *x, t_signal **sp) void dbtorms_tilde_setup(void) { dbtorms_tilde_class = class_new(gensym("dbtorms~"), (t_newmethod)dbtorms_tilde_new, 0, - sizeof(t_dbtorms_tilde), 0, 0); + sizeof(t_dbtorms_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(dbtorms_tilde_class, t_dbtorms_tilde, x_f); class_addmethod(dbtorms_tilde_class, (t_method)dbtorms_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -494,7 +494,7 @@ static void rmstodb_tilde_dsp(t_rmstodb_tilde *x, t_signal **sp) void rmstodb_tilde_setup(void) { rmstodb_tilde_class = class_new(gensym("rmstodb~"), - (t_newmethod)rmstodb_tilde_new, 0, sizeof(t_rmstodb_tilde), 0, 0); + (t_newmethod)rmstodb_tilde_new, 0, sizeof(t_rmstodb_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(rmstodb_tilde_class, t_rmstodb_tilde, x_f); class_addmethod(rmstodb_tilde_class, (t_method)rmstodb_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -544,7 +544,7 @@ static void dbtopow_tilde_dsp(t_dbtopow_tilde *x, t_signal **sp) void dbtopow_tilde_setup(void) { dbtopow_tilde_class = class_new(gensym("dbtopow~"), (t_newmethod)dbtopow_tilde_new, 0, - sizeof(t_dbtopow_tilde), 0, 0); + sizeof(t_dbtopow_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(dbtopow_tilde_class, t_dbtopow_tilde, x_f); class_addmethod(dbtopow_tilde_class, (t_method)dbtopow_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -593,7 +593,7 @@ static void powtodb_tilde_dsp(t_powtodb_tilde *x, t_signal **sp) void powtodb_tilde_setup(void) { powtodb_tilde_class = class_new(gensym("powtodb~"), (t_newmethod)powtodb_tilde_new, 0, - sizeof(t_powtodb_tilde), 0, 0); + sizeof(t_powtodb_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(powtodb_tilde_class, t_powtodb_tilde, x_f); class_addmethod(powtodb_tilde_class, (t_method)powtodb_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -643,7 +643,7 @@ static void pow_tilde_dsp(t_pow_tilde *x, t_signal **sp) static void pow_tilde_setup(void) { pow_tilde_class = class_new(gensym("pow~"), (t_newmethod)pow_tilde_new, 0, - sizeof(t_pow_tilde), 0, A_DEFFLOAT, 0); + sizeof(t_pow_tilde), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(pow_tilde_class, t_pow_tilde, x_f); class_addmethod(pow_tilde_class, (t_method)pow_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -684,7 +684,7 @@ static void exp_tilde_dsp(t_exp_tilde *x, t_signal **sp) static void exp_tilde_setup(void) { exp_tilde_class = class_new(gensym("exp~"), (t_newmethod)exp_tilde_new, 0, - sizeof(t_exp_tilde), 0, 0); + sizeof(t_exp_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(exp_tilde_class, t_exp_tilde, x_f); class_addmethod(exp_tilde_class, (t_method)exp_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -737,7 +737,7 @@ static void log_tilde_dsp(t_log_tilde *x, t_signal **sp) static void log_tilde_setup(void) { log_tilde_class = class_new(gensym("log~"), (t_newmethod)log_tilde_new, 0, - sizeof(t_log_tilde), 0, A_DEFFLOAT, 0); + sizeof(t_log_tilde), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(log_tilde_class, t_log_tilde, x_f); class_addmethod(log_tilde_class, (t_method)log_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -781,7 +781,7 @@ static void abs_tilde_dsp(t_abs_tilde *x, t_signal **sp) static void abs_tilde_setup(void) { abs_tilde_class = class_new(gensym("abs~"), (t_newmethod)abs_tilde_new, 0, - sizeof(t_abs_tilde), 0, 0); + sizeof(t_abs_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(abs_tilde_class, t_abs_tilde, x_f); class_addmethod(abs_tilde_class, (t_method)abs_tilde_dsp, gensym("dsp"), A_CANT, 0); diff --git a/src/d_misc.c b/src/d_misc.c index 521e10ad0b..88812393a9 100644 --- a/src/d_misc.c +++ b/src/d_misc.c @@ -65,7 +65,7 @@ static void *print_new(t_symbol *s) static void print_setup(void) { print_class = class_new(gensym("print~"), (t_newmethod)print_new, 0, - sizeof(t_print), 0, A_DEFSYM, 0); + sizeof(t_print), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(print_class, t_print, x_f); class_addmethod(print_class, (t_method)print_dsp, gensym("dsp"), A_CANT, 0); class_addbang(print_class, print_bang); @@ -115,7 +115,7 @@ static void *bang_tilde_new(t_symbol *s) static void bang_tilde_setup(void) { bang_tilde_class = class_new(gensym("bang~"), (t_newmethod)bang_tilde_new, - (t_method)bang_tilde_free, sizeof(t_bang), 0, 0); + (t_method)bang_tilde_free, sizeof(t_bang), CLASS_DEFAULT, 0); class_addmethod(bang_tilde_class, (t_method)bang_tilde_dsp, gensym("dsp"), 0); } diff --git a/src/d_osc.c b/src/d_osc.c index b6d3a43cc7..47ef5415d3 100644 --- a/src/d_osc.c +++ b/src/d_osc.c @@ -115,7 +115,7 @@ static void phasor_ft1(t_phasor *x, t_float f) static void phasor_setup(void) { phasor_class = class_new(gensym("phasor~"), (t_newmethod)phasor_new, 0, - sizeof(t_phasor), 0, A_DEFFLOAT, 0); + sizeof(t_phasor), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(phasor_class, t_phasor, x_f); class_addmethod(phasor_class, (t_method)phasor_dsp, gensym("dsp"), A_CANT, 0); @@ -230,7 +230,7 @@ static void cos_cleanup(t_class *c) static void cos_setup(void) { cos_class = class_new(gensym("cos~"), (t_newmethod)cos_new, 0, - sizeof(t_cos), 0, A_DEFFLOAT, 0); + sizeof(t_cos), CLASS_DEFAULT, A_DEFFLOAT, 0); class_setfreefn(cos_class, cos_cleanup); CLASS_MAINSIGNALIN(cos_class, t_cos, x_f); class_addmethod(cos_class, (t_method)cos_dsp, gensym("dsp"), A_CANT, 0); @@ -332,7 +332,7 @@ static void osc_ft1(t_osc *x, t_float f) static void osc_setup(void) { osc_class = class_new(gensym("osc~"), (t_newmethod)osc_new, 0, - sizeof(t_osc), 0, A_DEFFLOAT, 0); + sizeof(t_osc), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(osc_class, t_osc, x_f); class_addmethod(osc_class, (t_method)osc_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(osc_class, (t_method)osc_ft1, gensym("ft1"), A_FLOAT, 0); @@ -456,7 +456,7 @@ static void sigvcf_setup(void) { sigvcf_class = class_new(gensym("vcf~"), (t_newmethod)sigvcf_new, 0, - sizeof(t_sigvcf), 0, A_DEFFLOAT, 0); + sizeof(t_sigvcf), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigvcf_class, t_sigvcf, x_f); class_addmethod(sigvcf_class, (t_method)sigvcf_dsp, gensym("dsp"), A_CANT, 0); @@ -514,7 +514,7 @@ static void noise_float(t_noise *x, t_float f) static void noise_setup(void) { noise_class = class_new(gensym("noise~"), (t_newmethod)noise_new, 0, - sizeof(t_noise), 0, A_DEFFLOAT, 0); + sizeof(t_noise), CLASS_DEFAULT, A_DEFFLOAT, 0); class_addmethod(noise_class, (t_method)noise_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(noise_class, (t_method)noise_float, diff --git a/src/d_soundfile.c b/src/d_soundfile.c index 89a092a856..c014740fb0 100644 --- a/src/d_soundfile.c +++ b/src/d_soundfile.c @@ -2232,7 +2232,7 @@ static void readsf_setup(void) { readsf_class = class_new(gensym("readsf~"), (t_newmethod)readsf_new, (t_method)readsf_free, - sizeof(t_readsf), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_readsf), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addfloat(readsf_class, (t_method)readsf_float); class_addmethod(readsf_class, (t_method)readsf_start, gensym("start"), 0); class_addmethod(readsf_class, (t_method)readsf_stop, gensym("stop"), 0); @@ -2725,7 +2725,7 @@ static void writesf_setup(void) { writesf_class = class_new(gensym("writesf~"), (t_newmethod)writesf_new, (t_method)writesf_free, - sizeof(t_writesf), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_writesf), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addmethod(writesf_class, (t_method)writesf_start, gensym("start"), 0); class_addmethod(writesf_class, (t_method)writesf_stop, gensym("stop"), 0); class_addmethod(writesf_class, (t_method)writesf_dsp, diff --git a/src/d_ugen.c b/src/d_ugen.c index 2d58faa6e6..2a8c4ab52e 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -327,7 +327,7 @@ static void block_dsp(t_block *x, t_signal **sp) void block_tilde_setup(void) { block_class = class_new(gensym("block~"), (t_newmethod)block_new, 0, - sizeof(t_block), 0, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_block), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addcreator((t_newmethod)switch_new, gensym("switch~"), A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addmethod(block_class, (t_method)block_set, gensym("set"), diff --git a/src/g_canvas.c b/src/g_canvas.c index 7ecf34711f..1888aaba8a 100644 --- a/src/g_canvas.c +++ b/src/g_canvas.c @@ -2023,7 +2023,7 @@ void g_canvas_setup(void) /* we prevent the user from typing "canvas" in an object box by sending 0 for a creator function. */ canvas_class = class_new(gensym("canvas"), 0, - (t_method)canvas_free, sizeof(t_canvas), CLASS_NOINLET, 0); + (t_method)canvas_free, sizeof(t_canvas), CLASS_THREADSAFE | CLASS_NOINLET, 0); /* here is the real creator function, invoked in patch files by sending the "canvas" message to #N, which is bound to pd_camvasmaker. */ diff --git a/src/g_clone.c b/src/g_clone.c index 4d4397e831..20104e0fe9 100644 --- a/src/g_clone.c +++ b/src/g_clone.c @@ -452,7 +452,7 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) void clone_setup(void) { clone_class = class_new(gensym("clone"), (t_newmethod)clone_new, - (t_method)clone_free, sizeof(t_clone), CLASS_NOINLET, A_GIMME, 0); + (t_method)clone_free, sizeof(t_clone), CLASS_THREADSAFE | CLASS_NOINLET, A_GIMME, 0); class_addmethod(clone_class, (t_method)clone_click, gensym("click"), A_FLOAT, A_FLOAT, A_FLOAT, A_FLOAT, A_FLOAT, 0); class_addmethod(clone_class, (t_method)clone_loadbang, gensym("loadbang"), diff --git a/src/g_io.c b/src/g_io.c index 28bdd7ae35..0bb90df187 100644 --- a/src/g_io.c +++ b/src/g_io.c @@ -293,7 +293,7 @@ static void *vinlet_newsig(t_symbol *s, int argc, t_atom *argv) static void vinlet_setup(void) { vinlet_class = class_new(gensym("inlet"), (t_newmethod)vinlet_new, - (t_method)vinlet_free, sizeof(t_vinlet), CLASS_NOINLET, A_DEFSYM, 0); + (t_method)vinlet_free, sizeof(t_vinlet), CLASS_THREADSAFE | CLASS_NOINLET, A_DEFSYM, 0); class_addcreator((t_newmethod)vinlet_newsig, gensym("inlet~"), A_GIMME, 0); class_addbang(vinlet_class, vinlet_bang); class_addpointer(vinlet_class, vinlet_pointer); @@ -597,7 +597,7 @@ static void *voutlet_newsig(t_symbol *s) static void voutlet_setup(void) { voutlet_class = class_new(gensym("outlet"), (t_newmethod)voutlet_new, - (t_method)voutlet_free, sizeof(t_voutlet), CLASS_NOINLET, A_DEFSYM, 0); + (t_method)voutlet_free, sizeof(t_voutlet), CLASS_THREADSAFE | CLASS_NOINLET, A_DEFSYM, 0); class_addcreator((t_newmethod)voutlet_newsig, gensym("outlet~"), A_DEFSYM, 0); class_addbang(voutlet_class, voutlet_bang); class_addpointer(voutlet_class, voutlet_pointer); diff --git a/src/x_array.c b/src/x_array.c index a7fedff363..6a6438ae3f 100644 --- a/src/x_array.c +++ b/src/x_array.c @@ -897,7 +897,7 @@ void canvas_add_for_class(t_class *c); void x_array_setup(void) { array_define_class = class_new(gensym("array define"), 0, - (t_method)canvas_free, sizeof(t_canvas), 0, 0); + (t_method)canvas_free, sizeof(t_canvas), CLASS_DEFAULT, 0); canvas_add_for_class(array_define_class); class_addmethod(array_define_class, (t_method)array_define_send, gensym("send"), A_SYMBOL, 0); diff --git a/src/x_scalar.c b/src/x_scalar.c index 92862cdc5a..450a5bb0d1 100644 --- a/src/x_scalar.c +++ b/src/x_scalar.c @@ -188,7 +188,7 @@ void canvas_add_for_class(t_class *c); void x_scalar_setup(void) { scalar_define_class = class_new(gensym("scalar define"), 0, - (t_method)canvas_free, sizeof(t_canvas), 0, 0); + (t_method)canvas_free, sizeof(t_canvas), CLASS_DEFAULT, 0); canvas_add_for_class(scalar_define_class); class_addmethod(scalar_define_class, (t_method)scalar_define_send, gensym("send"), A_SYMBOL, 0); diff --git a/src/x_vexp_if.c b/src/x_vexp_if.c index ebf197e2ae..2606c5f7df 100644 --- a/src/x_vexp_if.c +++ b/src/x_vexp_if.c @@ -800,7 +800,7 @@ expr_setup(void) * expr~ initialization */ expr_tilde_class = class_new(gensym("expr~"), (t_newmethod)expr_new, - (t_method)expr_ff, sizeof(t_expr), 0, A_GIMME, 0); + (t_method)expr_ff, sizeof(t_expr), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(expr_tilde_class, nullfn, gensym("signal"), 0); CLASS_MAINSIGNALIN(expr_tilde_class, t_expr, exp_f); class_addmethod(expr_tilde_class,(t_method)expr_dsp, gensym("dsp"), @@ -812,7 +812,7 @@ expr_setup(void) * fexpr~ initialization */ fexpr_tilde_class = class_new(gensym("fexpr~"), (t_newmethod)expr_new, - (t_method)expr_ff, sizeof(t_expr), 0, A_GIMME, 0); + (t_method)expr_ff, sizeof(t_expr), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(fexpr_tilde_class, nullfn, gensym("signal"), 0); CLASS_MAINSIGNALIN(fexpr_tilde_class, t_expr, exp_f); class_addmethod(fexpr_tilde_class,(t_method)expr_start, From 3737cae821f642ce0cb088b801edd854eedc7d7d Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Wed, 29 Dec 2021 16:50:35 +0100 Subject: [PATCH 16/32] configure add --enable-parallel option (disabled by default) --- configure.ac | 24 +++++++++++++++++++----- src/d_threadpool.c | 4 ++++ src/m_pd.h | 9 +++++++-- src/s_stuff.h | 9 +++------ 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/configure.ac b/configure.ac index fce638335b..0a08fd863d 100644 --- a/configure.ac +++ b/configure.ac @@ -287,11 +287,6 @@ AS_IF([test x$debug = xyes],[ PD_CPPFLAGS="-DNDEBUG $PD_CPPFLAGS" ]) -##### macOS version min ##### -AS_IF([test "x$macos_version_min" != "x"],[ - PD_CFLAGS="-mmacosx-version-min=$macos_version_min $PD_CFLAGS" -]) - ######################################### ##### Configure Options ##### @@ -313,6 +308,20 @@ AM_CONDITIONAL(LIBPD_EXTRA, test x$enable_libpd_extra = xyes) AM_CONDITIONAL(LIBPD_MULTIINSTANCE, test x$enable_libpd_instance = xyes) AM_CONDITIONAL(LIBPD_NO_SETLOCALE, test x$enable_libpd_setlocale = xno) +##### parallel processing support ##### +AC_ARG_ENABLE([parallel], + [AS_HELP_STRING([--disable-parallel], [disable parallel processing support])], + [parallel=$enableval], [parallel=yes]) +AS_IF([test x$parallel = xyes],[ + PD_CPPFLAGS="-DPD_DSPTHREADS=1 -DPD_PARALLEL=1 $PD_CPPFLAGS" + AS_IF([test x"$MACOSX" = x"yes"], [ + # for thread local storage support + macos_version_min=10.9 + ]) +],[ + PD_CPPFLAGS="-DPD_DSPTHREADS=0 -DPD_PARALLEL=0 $PD_CPPFLAGS" +]) + ##### Universal/multi architecture build on macOS ##### PD_CHECK_UNIVERSAL(ARCH, [universal=yes], [universal=no]) AM_CONDITIONAL(UNIVERSAL, test x$universal = xyes) @@ -562,6 +571,11 @@ AS_IF([test "x${enable_libpd}" = "xyes"],[ libpd="no" ]) +##### macOS version min ##### +AS_IF([test "x$macos_version_min" != "x"],[ + PD_CFLAGS="-mmacosx-version-min=$macos_version_min $PD_CFLAGS" +]) + # pass common flags via @PD_*@ AM variables for use in Makefiles AC_SUBST(PD_CPPFLAGS) AC_SUBST(PD_CFLAGS) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index 507c6cfd20..07a015f7d8 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -12,6 +12,10 @@ #if PD_DSPTHREADS +#if !PD_PARALLEL +# error PD_DSPTHREADS requires PD_PARALLEL! +#endif + #include "s_sync.h" #include diff --git a/src/m_pd.h b/src/m_pd.h index c3c6bf7524..b582e20935 100644 --- a/src/m_pd.h +++ b/src/m_pd.h @@ -111,11 +111,16 @@ typedef unsigned __int64 uint64_t; # error invalid FLOATSIZE: must be 32 or 64 #endif -/* override for parallel processing support */ +/* externals may override this for parallel processing support. + * You have to use the CLASS_DEFAULT macro in class_new() and + * in your perform routine(s) you must only call API functions + * that are markes as THREADSAFE! */ #ifndef PD_PARALLEL -#define PD_PARALLEL 1 +#define PD_PARALLEL 0 #endif +/* used to mark API functions as thread-safe, meaning that they + * can be safely used in a perform routine. */ #define THREADSAFE typedef PD_LONGINTTYPE t_int; /* pointer-size integer */ diff --git a/src/s_stuff.h b/src/s_stuff.h index 15678e0d31..98380397ea 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -392,16 +392,12 @@ EXTERN void inmidi_polyaftertouch(int portno, /* } jsarlo */ EXTERN int sys_zoom_open; -/* DSP task queue, for documentation see d_threadpool.c */ -#ifndef PD_DSPTHREADS -#define PD_DSPTHREADS 0 -#endif +/* DSP task queue */ +#if PD_DSPTHREADS EXTERN_STRUCT _dsptaskqueue; #define t_dsptaskqueue struct _dsptaskqueue -#if PD_DSPTHREADS - t_dsptaskqueue * dsptaskqueue_new(void); void dsptaskqueue_release(t_dsptaskqueue *x); void dsptaskqueue_reset(t_dsptaskqueue *x); @@ -418,6 +414,7 @@ void dsptask_sched(t_dsptask *x); #endif /* PD_DSPTHREADS */ +/* DSP thread pool API, for documentation see d_threadpool.c */ EXTERN int sys_havedspthreadpool(void); EXTERN int sys_dspthreadpool_start(int *numthreads, int external); EXTERN int sys_dspthreadpool_stop(int external); From 83c1174a01861e3b06d90451215f059be670bc22 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Wed, 29 Dec 2021 03:46:28 +0100 Subject: [PATCH 17/32] DSP threads are now part of audio settings * add a_numthreads to t_audiosettings * add "threads" to preferences * add "Audio threads" option in audio settings dialog --- extra/pd~/pdsched.c | 2 ++ src/m_sched.c | 9 ++++----- src/s_audio.c | 28 ++++++++++++++++++++++++++-- src/s_file.c | 9 +++++++++ src/s_main.c | 3 +-- src/s_stuff.h | 1 + tcl/dialog_audio.tcl | 28 +++++++++++++++++++--------- 7 files changed, 62 insertions(+), 18 deletions(-) diff --git a/extra/pd~/pdsched.c b/extra/pd~/pdsched.c index 0bf913dcc7..25849d6216 100644 --- a/extra/pd~/pdsched.c +++ b/extra/pd~/pdsched.c @@ -115,6 +115,7 @@ int pd_extern_sched(char *flags) /* fprintf(stderr, "Pd plug-in scheduler called, chans %d %d, sr %d\n", chin, chout, (int)rate); */ sys_setchsr(chin, chout, as.a_srate); + sys_dspthreadpool_start(&as.a_numthreads, 0); while (useascii ? readasciimessage(b) : readbinmessage(b) ) { t_atom *ap = binbuf_getvec(b); @@ -163,5 +164,6 @@ int pd_extern_sched(char *flags) } } binbuf_free(b); + sys_dspthreadpool_stop(0); return (0); } diff --git a/src/m_sched.c b/src/m_sched.c index 2a34e14cb1..a092df8ae7 100644 --- a/src/m_sched.c +++ b/src/m_sched.c @@ -30,7 +30,6 @@ extern int sys_nosleep; int sys_usecsincelastsleep(void); int sys_sleepgrain; -extern int sys_dspthreads; typedef void (*t_clockmethod)(void *client); @@ -401,7 +400,6 @@ static void m_pollingscheduler(void) { sys_lock(); sys_initmidiqueue(); - sys_dspthreadpool_start(&sys_dspthreads, 0); while (!sys_quit) /* outer loop runs once per tick */ { sys_addhist(0); @@ -454,7 +452,6 @@ static void m_pollingscheduler(void) break; } } - sys_dspthreadpool_stop(0); sys_unlock(); } @@ -474,7 +471,6 @@ void sched_audio_callbackfn(void) static void m_callbackscheduler(void) { sys_initmidiqueue(); - sys_dspthreadpool_start(&sys_dspthreads, 0); while (!sys_quit) { double timewas = pd_this->pd_systime; @@ -493,7 +489,6 @@ static void m_callbackscheduler(void) if (sys_idlehook) sys_idlehook(); } - sys_dspthreadpool_stop(0); } int m_mainloop(void) @@ -518,8 +513,12 @@ int m_mainloop(void) int m_batchmain(void) { + t_audiosettings as; + sys_get_audio_settings(&as); + sys_dspthreadpool_start(&as.a_numthreads, 0); while (sys_quit != SYS_QUIT_QUIT) sched_tick(); + sys_dspthreadpool_stop(0); return (0); } diff --git a/src/s_audio.c b/src/s_audio.c index c8181076c6..866d0333b6 100644 --- a/src/s_audio.c +++ b/src/s_audio.c @@ -46,6 +46,9 @@ static t_audiosettings audio_nextsettings; void sched_audio_callbackfn(void); void sched_reopenmeplease(void); +#if PD_DSPTHREADS +int sys_defnumdspthreads(void); +#endif int audio_isopen(void) { @@ -238,6 +241,11 @@ void sys_get_audio_settings(t_audiosettings *a) audio_nextsettings.a_choutdevvec[0] = SYS_DEFAULTCH; audio_nextsettings.a_advance = DEFAULTADVANCE; audio_nextsettings.a_blocksize = DEFDACBLKSIZE; + #if PD_DSPTHREADS + audio_nextsettings.a_numthreads = 0; /* default */ + #else + audio_nextsettings.a_numthreads = -1; /* no threads */ + #endif initted = 1; } *a = audio_nextsettings; @@ -273,6 +281,14 @@ void sys_set_audio_settings(t_audiosettings *a) a->a_blocksize = 1 << ilog2(a->a_blocksize); if (a->a_blocksize < DEFDACBLKSIZE || a->a_blocksize > MAXBLOCKSIZE) a->a_blocksize = DEFDACBLKSIZE; +#if PD_DSPTHREADS + /* 0: default number of threads. */ + if (a->a_numthreads <= 0) + a->a_numthreads = sys_defnumdspthreads(); +#else + /* -1 tells the GUI that PD_DSPTHREADS is disabled. */ + a->a_numthreads = -1; +#endif audio_make_sane(&a->a_noutdev, a->a_outdevvec, &a->a_nchoutdev, a->a_choutdevvec, MAXAUDIOOUTDEV); @@ -295,6 +311,9 @@ void sys_close_audio(void) } if (!audio_isopen()) return; + + sys_dspthreadpool_stop(0); + #ifdef USEAPI_PORTAUDIO if (sys_audioapiopened == API_PORTAUDIO) pa_close_audio(); @@ -368,6 +387,9 @@ void sys_reopen_audio(void) audio_compact_and_count_channels(&as.a_noutdev, as.a_outdevvec, as.a_choutdevvec, &totaloutchans, MAXAUDIOOUTDEV); sys_setchsr(totalinchans, totaloutchans, as.a_srate); + sys_dspthreadpool_start(&as.a_numthreads, 0); + /* save actual (validated) thread count. */ + audio_nextsettings.a_numthreads = as.a_numthreads; if (!as.a_nindev && !as.a_noutdev) { sched_set_using_audio(SCHED_AUDIO_NONE); @@ -669,7 +691,7 @@ void glob_audio_properties(t_pd *dummy, t_floatarg flongform) pdgui_stub_deleteforkey(0); pdgui_stub_vnew(&glob_pdobject, "pdtk_audio_dialog", (void *)glob_audio_properties, - "iiii iiii iiii iiii s ii s i s", + "iiii iiii iiii iiii s ii s i s i", as.a_indevvec [0], as.a_indevvec [1], as.a_indevvec [2], as.a_indevvec [3], as.a_chindevvec [0], as.a_chindevvec [1], as.a_chindevvec [2], as.a_chindevvec [3], as.a_outdevvec [0], as.a_outdevvec [1], as.a_outdevvec [2], as.a_outdevvec [3], @@ -678,7 +700,8 @@ void glob_audio_properties(t_pd *dummy, t_floatarg flongform) as.a_advance, canmulti, callback, (flongform != 0), - blocksize); + blocksize, + as.a_numthreads); } /* new values from dialog window */ @@ -691,6 +714,7 @@ void glob_audio_dialog(t_pd *dummy, t_symbol *s, int argc, t_atom *argv) as.a_advance = atom_getfloatarg(17, argc, argv); as.a_callback = atom_getfloatarg(18, argc, argv); as.a_blocksize = atom_getfloatarg(19, argc, argv); + as.a_numthreads = atom_getfloatarg(20, argc, argv); for (i = 0; i < 4; i++) { diff --git a/src/s_file.c b/src/s_file.c index 82653eeb40..788380e37f 100644 --- a/src/s_file.c +++ b/src/s_file.c @@ -596,6 +596,10 @@ void sys_loadpreferences(const char *filename, int startingup) #ifndef _WIN32 else if (sys_getpreference("blocksize", prefbuf, MAXPDSTRING)) sscanf(prefbuf, "%d", &as.a_blocksize); +#endif +#if PD_DSPTHREADS + if (sys_getpreference("threads", prefbuf, MAXPDSTRING)) + sscanf(prefbuf, "%d", &as.a_numthreads); #endif sys_set_audio_settings(&as); @@ -751,6 +755,11 @@ void sys_savepreferences(const char *filename) sprintf(buf1, "%d", as.a_blocksize); sys_putpreference("audioblocksize", buf1); +#if PD_DSPTHREADS + sprintf(buf1, "%d", as.a_numthreads); + sys_putpreference("threads", buf1); +#endif + /* MIDI settings */ sprintf(buf1, "%d", sys_midiapi); sys_putpreference("midiapi", buf1); diff --git a/src/s_main.c b/src/s_main.c index dce12fbf8f..c3f05eed85 100644 --- a/src/s_main.c +++ b/src/s_main.c @@ -58,7 +58,6 @@ int sys_guisetportnumber; /* if started from the GUI, this is the port # */ int sys_nosleep = 0; /* skip all "sleep" calls and spin instead */ int sys_defeatrt; /* flag to cancel real-time */ t_symbol *sys_flags; /* more command-line flags */ -int sys_dspthreads = 1; /* number of DSP threads */ const char *sys_guicmd; t_symbol *sys_libdir; @@ -1337,7 +1336,7 @@ int sys_argparse(int argc, const char **argv) #if PD_DSPTHREADS else if (!strcmp(*argv, "-threads") && argc > 1) { - sys_dspthreads = atoi(argv[1]); + as.a_numthreads = atoi(argv[1]); argc -= 2; argv += 2; } #endif diff --git a/src/s_stuff.h b/src/s_stuff.h index 98380397ea..9a41d3ff21 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -73,6 +73,7 @@ typedef struct _audiosettings int a_advance; int a_callback; int a_blocksize; + int a_numthreads; } t_audiosettings; #define SENDDACS_NO 0 /* return values for sys_send_dacs() */ diff --git a/tcl/dialog_audio.tcl b/tcl/dialog_audio.tcl index c8caead5a0..2e60c2a288 100644 --- a/tcl/dialog_audio.tcl +++ b/tcl/dialog_audio.tcl @@ -17,7 +17,7 @@ proc ::dialog_audio::apply {mytoplevel} { global audio_outdev1 audio_outdev2 audio_outdev3 audio_outdev4 global audio_outchan1 audio_outchan2 audio_outchan3 audio_outchan4 global audio_outenable1 audio_outenable2 audio_outenable3 audio_outenable4 - global audio_sr audio_advance audio_callback audio_blocksize + global audio_sr audio_advance audio_callback audio_blocksize audio_threads pdsend "pd audio-dialog \ $audio_indev1 \ @@ -39,7 +39,8 @@ proc ::dialog_audio::apply {mytoplevel} { $audio_sr \ $audio_advance \ $audio_callback \ - $audio_blocksize" + $audio_blocksize \ + $audio_threads" } proc ::dialog_audio::cancel {mytoplevel} { @@ -102,7 +103,7 @@ proc ::dialog_audio::pdtk_audio_dialog {mytoplevel \ inchan1 inchan2 inchan3 inchan4 \ outdev1 outdev2 outdev3 outdev4 \ outchan1 outchan2 outchan3 outchan4 sr advance multi callback \ - longform blocksize} { + longform blocksize {threads 0}} { global audio_indev1 audio_indev2 audio_indev3 audio_indev4 global audio_inchan1 audio_inchan2 audio_inchan3 audio_inchan4 global audio_inenable1 audio_inenable2 audio_inenable3 audio_inenable4 @@ -112,7 +113,7 @@ proc ::dialog_audio::pdtk_audio_dialog {mytoplevel \ global audio_sr audio_advance audio_callback audio_blocksize global audio_indevlist audio_outdevlist global pd_indev pd_outdev - global audio_longform + global audio_longform audio_threads set audio_indev1 $indev1 set audio_indev2 $indev2 @@ -147,6 +148,8 @@ proc ::dialog_audio::pdtk_audio_dialog {mytoplevel \ foreach {audio_callback audio_isfixedcallback} [::dialog_audio::isfixed $callback] {} foreach {audio_blocksize audio_isfixedbs} [::dialog_audio::isfixed $blocksize] {} + set audio_threads $threads + toplevel $mytoplevel -class DialogWindow wm withdraw $mytoplevel wm title $mytoplevel [_ "Audio Settings"] @@ -202,12 +205,19 @@ proc ::dialog_audio::pdtk_audio_dialog {mytoplevel \ $mytoplevel.settings.bsc.bs_popup config -state "disabled" } - if {$audio_isfixedcallback} {} else { - frame $mytoplevel.settings.callback - pack $mytoplevel.settings.callback -side bottom -fill x - checkbutton $mytoplevel.settings.callback.c_button -variable audio_callback \ + # callbacks and audio threads + frame $mytoplevel.settings.misc + pack $mytoplevel.settings.misc -side bottom -fill x + if {!$audio_isfixedcallback} { + checkbutton $mytoplevel.settings.misc.c_button -variable audio_callback \ -text [_ "Use callbacks"] - pack $mytoplevel.settings.callback.c_button + pack $mytoplevel.settings.misc.c_button + pack $mytoplevel.settings.misc.c_button -side left + } + if {$threads >= 0} { + label $mytoplevel.settings.misc.t_label -text [_ "Audio threads:"] + entry $mytoplevel.settings.misc.t_entry -textvariable audio_threads -width 4 + pack $mytoplevel.settings.misc.t_entry $mytoplevel.settings.misc.t_label -side right } # input devices From 96aa0fd0851f3a7a7b2c1078c0095c117c4da558 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sun, 26 Dec 2021 16:14:56 +0100 Subject: [PATCH 18/32] add t_signalcontext by default, the DSP graph uses the global signal context. However, if a graph is processed in parallel, it needs its own signal context. For this purpose, we can also temporarily push/pop a new signal context with signalcontext_push() and signalcontext_pop(). --- src/d_ugen.c | 133 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 95 insertions(+), 38 deletions(-) diff --git a/src/d_ugen.c b/src/d_ugen.c index 2a8c4ab52e..66b821ec28 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -34,19 +34,62 @@ void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, int downsample, int upsample, int reblock, int switched); +/* ---------------------------- t_signalcontext ----------------------------- */ + +typedef struct _signalcontext +{ + t_signal *sc_signals; /* list of signals used by DSP chain */ + /* list of signals which can be reused, sorted by buffer size */ + t_signal *sc_freelist[MAXLOGSIG+1]; + /* list of reusable "borrowed" signals (which don't own sample buffers) */ + t_signal *sc_freeborrowed; +} t_signalcontext; + +t_signalcontext *signalcontext_new(void) +{ + t_signalcontext *x = (t_signalcontext *)getbytes(sizeof(t_signalcontext)); + x->sc_signals = 0; + return x; +} + + /* call this to free all the signals, e.g. before creating a new DSP graph */ +void signalcontext_clear(t_signalcontext *x) +{ + t_signal *sig; + int i; + while ((sig = x->sc_signals)) + { + x->sc_signals = sig->s_nextused; + if (!sig->s_isborrowed) + t_freebytes(sig->s_vec, sig->s_vecsize * sizeof (*sig->s_vec)); + t_freebytes(sig, sizeof(*sig)); + } + for (i = 0; i <= MAXLOGSIG; i++) + x->sc_freelist[i] = 0; + x->sc_freeborrowed = 0; +} + +void signalcontext_free(t_signalcontext *x) +{ + signalcontext_clear(x); + freebytes(x, sizeof(t_signalcontext)); +} + +t_signalcontext *signalcontext_current(void); +t_signalcontext *signalcontext_push(t_signalcontext *newcontext); +void signalcontext_pop(t_signalcontext *oldcontext); + +/* ---------------------------- t_instanceugen ----------------------------- */ + struct _instanceugen { t_int *u_dspchain; /* DSP chain */ int u_dspchainsize; /* number of elements in DSP chain */ - t_signal *u_signals; /* list of signals used by DSP chain */ int u_sortno; /* number of DSP sortings so far */ - /* list of signals which can be reused, sorted by buffer size */ - t_signal *u_freelist[MAXLOGSIG+1]; - /* list of reusable "borrowed" signals (which don't own sample buffers) */ - t_signal *u_freeborrowed; int u_phase; int u_loud; - struct _dspcontext *u_context; + t_signalcontext *u_signals; /* global signal context */ + struct _dspcontext *u_context; /* current DSP context */ #if PD_DSPTHREADS t_dsptaskqueue *u_dspqueue; /* toplevel DSP thread queue */ t_lockfree_stack u_clocks; /* only for the main queue */ @@ -60,7 +103,7 @@ void d_ugen_newpdinstance(void) THIS = getbytes(sizeof(*THIS)); THIS->u_dspchain = 0; THIS->u_dspchainsize = 0; - THIS->u_signals = 0; + THIS->u_signals = signalcontext_new(); #if PD_DSPTHREADS THIS->u_dspqueue = dsptaskqueue_new(); lockfree_stack_init(&THIS->u_clocks); @@ -69,6 +112,7 @@ void d_ugen_newpdinstance(void) void d_ugen_freepdinstance(void) { + signalcontext_free(THIS->u_signals); #if PD_DSPTHREADS dsptaskqueue_release(THIS->u_dspqueue); #endif @@ -422,31 +466,14 @@ int ilog2(int n) return (r); } - - /* call this when DSP is stopped to free all the signals */ -static void signal_cleanup(void) -{ - t_signal *sig; - int i; - while ((sig = THIS->u_signals)) - { - THIS->u_signals = sig->s_nextused; - if (!sig->s_isborrowed) - t_freebytes(sig->s_vec, sig->s_vecsize * sizeof (*sig->s_vec)); - t_freebytes(sig, sizeof *sig); - } - for (i = 0; i <= MAXLOGSIG; i++) - THIS->u_freelist[i] = 0; - THIS->u_freeborrowed = 0; -} - /* mark the signal "reusable." */ void signal_makereusable(t_signal *sig) { int logn = ilog2(sig->s_vecsize); + t_signalcontext *context = signalcontext_current(); #if 1 t_signal *s5; - for (s5 = THIS->u_freeborrowed; s5; s5 = s5->s_nextfree) + for (s5 = context->sc_freeborrowed; s5; s5 = s5->s_nextfree) { if (s5 == sig) { @@ -454,7 +481,7 @@ void signal_makereusable(t_signal *sig) return; } } - for (s5 = THIS->u_freelist[logn]; s5; s5 = s5->s_nextfree) + for (s5 = context->sc_freelist[logn]; s5; s5 = s5->s_nextfree) { if (s5 == sig) { @@ -474,16 +501,16 @@ void signal_makereusable(t_signal *sig) s2->s_refcount--; if (!s2->s_refcount) signal_makereusable(s2); - sig->s_nextfree = THIS->u_freeborrowed; - THIS->u_freeborrowed = sig; + sig->s_nextfree = context->sc_freeborrowed; + context->sc_freeborrowed = sig; } else { /* if it's a real signal (not borrowed), put it on the free list so we can reuse it. */ - if (THIS->u_freelist[logn] == sig) bug("signal_free 2"); - sig->s_nextfree = THIS->u_freelist[logn]; - THIS->u_freelist[logn] = sig; + if (context->sc_freelist[logn] == sig) bug("signal_free 2"); + sig->s_nextfree = context->sc_freelist[logn]; + context->sc_freelist[logn] = sig; } } @@ -494,6 +521,7 @@ void signal_makereusable(t_signal *sig) static t_signal *signal_new(int n, t_float sr) { int logn, vecsize = 0; + t_signalcontext *context = signalcontext_current(); t_signal *ret, **whichlist; logn = ilog2(n); if (n) @@ -502,10 +530,10 @@ static t_signal *signal_new(int n, t_float sr) vecsize *= 2; if (logn > MAXLOGSIG) bug("signal buffer too large"); - whichlist = THIS->u_freelist + logn; + whichlist = context->sc_freelist + logn; } else - whichlist = &THIS->u_freeborrowed; + whichlist = &context->sc_freeborrowed; /* first try to reclaim one from the free list */ if ((ret = *whichlist)) @@ -524,8 +552,8 @@ static t_signal *signal_new(int n, t_float sr) ret->s_vec = 0; ret->s_isborrowed = 1; } - ret->s_nextused = THIS->u_signals; - THIS->u_signals = ret; + ret->s_nextused = context->sc_signals; + context->sc_signals = ret; } ret->s_n = n; ret->s_vecsize = vecsize; @@ -603,6 +631,7 @@ struct _dspcontext int dc_ninlets; int dc_noutlets; t_signal **dc_iosigs; + t_signalcontext *dc_signals; t_float dc_srate; int dc_vecsize; /* vector size, power of two */ int dc_calcsize; /* number of elements to calculate */ @@ -620,6 +649,31 @@ t_signal *signal_newfromcontext(int borrowed) THIS->u_context->dc_srate)); } +t_signalcontext *signalcontext_current(void) +{ + return THIS->u_context->dc_signals; +} + +t_signalcontext *signalcontext_push(t_signalcontext *newcontext) +{ + t_signalcontext *old; + if (!THIS->u_context || !((old = THIS->u_context->dc_signals))) + { + bug("signalcontext_push"); + return 0; + } + THIS->u_context->dc_signals = newcontext; + return old; +} + +void signalcontext_pop(t_signalcontext *oldcontext) +{ + if (THIS->u_context && THIS->u_context->dc_signals) + THIS->u_context->dc_signals = oldcontext; + else + bug("signalcontext_pop"); +} + void ugen_stop(void) { if (THIS->u_dspchain) @@ -628,8 +682,7 @@ void ugen_stop(void) THIS->u_dspchainsize * sizeof (t_int)); THIS->u_dspchain = 0; } - signal_cleanup(); - + signalcontext_clear(THIS->u_signals); } void ugen_start(void) @@ -692,6 +745,10 @@ t_dspcontext *ugen_start_graph(int toplevel, t_signal **sp, dc->dc_ninlets = ninlets; dc->dc_noutlets = noutlets; dc->dc_parentcontext = THIS->u_context; + /* use parent signal context by default. This might be overriden + * by block~ (see "parallel") or by signalcontext_push(). */ + dc->dc_signals = THIS->u_context ? THIS->u_context->dc_signals + : THIS->u_signals; THIS->u_context = dc; return (dc); } From a584da3ac79a7a3efcd514d4b7c1a63b0c4c0995 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Fri, 24 Dec 2021 13:51:55 +0100 Subject: [PATCH 19/32] fix dsp_done() signature --- src/d_ugen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/d_ugen.c b/src/d_ugen.c index 66b821ec28..a8b60471a5 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -383,7 +383,7 @@ void block_tilde_setup(void) /* ------------------ DSP call list ----------------------- */ -static t_int dsp_done(t_int *w) +t_int *dsp_done(t_int *w) { return (0); } From e26f744738eb2f6c1009fa2cde7d756704b9a361 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Mon, 27 Dec 2021 01:52:07 +0100 Subject: [PATCH 20/32] block~: implement "parallel" method * add global DSP task queue * parallel canvasses use their own signal context * vinlet makes a copy of the parent input signal (in the new context) * voutlet uses double buffering: voutlet_dspprolog() writes last buffer to parent output signal, voutlet_dsp() writes input signal to buffer. --- src/d_ugen.c | 201 +++++++++++++++++++++++++++++++++++++++++---------- src/g_io.c | 107 +++++++++++++++++++++------ 2 files changed, 246 insertions(+), 62 deletions(-) diff --git a/src/d_ugen.c b/src/d_ugen.c index a8b60471a5..145330f90e 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -26,13 +26,13 @@ EXTERN_STRUCT _voutlet; void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched); + int downsample, int upsample, int reblock, int switched, int parallel); void voutlet_dspprolog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched); + int downsample, int upsample, int reblock, int switched, int parallel); void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched); + int downsample, int upsample, int reblock, int switched, int parallel); /* ---------------------------- t_signalcontext ----------------------------- */ @@ -91,8 +91,8 @@ struct _instanceugen t_signalcontext *u_signals; /* global signal context */ struct _dspcontext *u_context; /* current DSP context */ #if PD_DSPTHREADS - t_dsptaskqueue *u_dspqueue; /* toplevel DSP thread queue */ - t_lockfree_stack u_clocks; /* only for the main queue */ + t_dsptaskqueue *u_dspqueue; /* global DSP thread queue */ + t_lockfree_stack u_clocks; /* deferred clocks */ #endif }; @@ -190,6 +190,10 @@ overlapping and buffering to deal with vector size changes. If we're switched but not reblocked, the inlet prolog is not needed, and the output epilog is ONLY run when the block is switched off; in this case the epilog code simply copies zeros to all signal outlets. + +Block~ also has a "parallel" method which will process the canvas in parallel. +It will run asynchronously with all subsequent canvasses, unless it is joined +by a parent canvas (with the "join" method). */ static t_class *block_class; @@ -210,6 +214,13 @@ typedef struct _block char x_switched; /* true if we're acting as a a switch */ char x_switchon; /* true if we're switched on */ char x_reblock; /* true if inlets and outlets are reblocking */ +#if PD_DSPTHREADS + char x_parallel; /* true if we are processing in parallel */ + t_signalcontext *x_signals; /* signal context for parallel processing */ + t_dsptask *x_task; /* DSP task for parallel processing */ + int x_taskonset; /* beginning of parallel task in the chain */ + int x_tasklength; /* length of parallel task */ +#endif int x_upsample; /* upsampling-factor */ int x_downsample; /* downsampling-factor */ int x_return; /* stop right after this block (for one-shots) */ @@ -227,10 +238,27 @@ static void *block_new(t_floatarg fvecsize, t_floatarg foverlap, x->x_frequency = 1; x->x_switched = 0; x->x_switchon = 1; +#if PD_DSPTHREADS + x->x_parallel = 0; + x->x_signals = 0; + x->x_task = 0; + x->x_taskonset = 0; + x->x_tasklength = 0, +#endif block_set(x, fvecsize, foverlap, fupsample); return (x); } +static void block_free(t_block *x) +{ +#if PD_DSPTHREADS + if (x->x_signals) + signalcontext_free(x->x_signals); + if (x->x_task) + dsptask_free(x->x_task); +#endif +} + static void block_set(t_block *x, t_floatarg fcalcsize, t_floatarg foverlap, t_floatarg fupsample) { @@ -363,6 +391,35 @@ static t_int *block_epilog(t_int *w) else return (w + EPILOGCALL); } +#if PD_DSPTHREADS + +static void block_parallel(t_block *x, t_floatarg f) +{ + int par = f != 0; + if (par != x->x_parallel) + { + x->x_parallel = par; + canvas_update_dsp(); + } +} + +static t_int *block_schedtask(t_int *w) +{ + t_block *x = (t_block *)w[1]; + dsptask_sched(x->x_task); + /* skip the DSP chain performed by block_runtask(). */ + return w + 2 + x->x_tasklength; +} + +static void block_runtask(t_block *x) +{ + t_int *ip = THIS->u_dspchain + x->x_taskonset; + while (ip) + ip = (*(t_perfroutine)(*ip))(ip); +} + +#endif /* PD_DSPTHREADS */ + static void block_dsp(t_block *x, t_signal **sp) { /* do nothing here */ @@ -370,12 +427,15 @@ static void block_dsp(t_block *x, t_signal **sp) void block_tilde_setup(void) { - block_class = class_new(gensym("block~"), (t_newmethod)block_new, 0, + block_class = class_new(gensym("block~"), (t_newmethod)block_new, (t_method)block_free, sizeof(t_block), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addcreator((t_newmethod)switch_new, gensym("switch~"), A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addmethod(block_class, (t_method)block_set, gensym("set"), A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); +#if PD_DSPTHREADS + class_addmethod(block_class, (t_method)block_parallel, gensym("parallel"), A_FLOAT, 0); +#endif class_addmethod(block_class, (t_method)block_dsp, gensym("dsp"), A_CANT, 0); class_addfloat(block_class, block_float); class_addbang(block_class, block_bang); @@ -636,8 +696,12 @@ struct _dspcontext int dc_vecsize; /* vector size, power of two */ int dc_calcsize; /* number of elements to calculate */ char dc_toplevel; /* true if "iosigs" is invalid. */ - char dc_reblock; /* true if we have to reblock inlets/outlets */ - char dc_switched; /* true if we're switched */ + char dc_reblock; /* true if we have to reblock inlets/outlets. */ + char dc_switched; /* true if we're switched. */ + char dc_parallel; /* true if we're parallel. */ +#if PD_DSPTHREADS + t_dsptaskqueue *dc_dspqueue; /* current DSP task queue */ +#endif }; #define t_dspcontext struct _dspcontext @@ -749,6 +813,12 @@ t_dspcontext *ugen_start_graph(int toplevel, t_signal **sp, * by block~ (see "parallel") or by signalcontext_push(). */ dc->dc_signals = THIS->u_context ? THIS->u_context->dc_signals : THIS->u_signals; +#if PD_DSPTHREADS + /* use parent DSP task queue by default. This might be overridden + * by block~ (see "join") or by dsptaskqueue_push(). */ + dc->dc_dspqueue = THIS->u_context ? THIS->u_context->dc_dspqueue + : THIS->u_dspqueue; +#endif THIS->u_context = dc; return (dc); } @@ -842,19 +912,19 @@ static void ugen_doit(t_dspcontext *dc, t_ugenbox *u) t_sigoutconnect *oc; t_class *class = pd_class(&u->u_obj->ob_pd); int i, n; - /* suppress creating new signals for the outputs of signal - inlets and subpatches; except in the case we're an inlet and "blocking" - is set. We don't yet know if a subcanvas will be "blocking" so there + /* suppress creating new signals for the outputs of signal inlets and + subpatches; except in the case we're an inlet and "reblock" or "parallel" + is set. We don't yet know if a subcanvas will be "blocking" so there we delay new signal creation, which will be handled by calling signal_setborrowed in the ugen_done_graph routine below. */ int nonewsigs = (class == canvas_class || - ((class == vinlet_class) && !(dc->dc_reblock))); + ((class == vinlet_class) && !(dc->dc_reblock || dc->dc_parallel))); /* when we encounter a subcanvas or a signal outlet, suppress freeing - the input signals as they may be "borrowed" for the super or sub - patch; same exception as above, but also if we're "switched" we - have to do a copy rather than a borrow. */ + the input signals as they may be "borrowed" for the super or sub patch; + same exception as above, but also if we're "switched" we have to do a + copy rather than a borrow. */ int nofreesigs = (class == canvas_class || class == clone_class || - ((class == voutlet_class) && !(dc->dc_reblock || dc->dc_switched))); + ((class == voutlet_class) && !(dc->dc_reblock || dc->dc_parallel || dc->dc_switched))); t_signal **insig, **outsig, **sig, *s1, *s2, *s3; t_ugenbox *u2; @@ -884,10 +954,10 @@ static void ugen_doit(t_dspcontext *dc, t_ugenbox *u) *sig = uin->i_signal; newrefcount = --(*sig)->s_refcount; /* if the reference count went to zero, we free the signal now, - unless it's a subcanvas or outlet; these might keep the - signal around to send to objects connected to them. In this - case we increment the reference count; the corresponding decrement - is in sig_makereusable(). */ + unless it's a subcanvas or voutlet (except reblocked or parallel); + these might keep the signal around to send to objects connected + to them. In this case we increment the reference count; + the corresponding decrement is in sig_makereusable(). */ if (nofreesigs) (*sig)->s_refcount++; else if (!newrefcount) @@ -895,13 +965,15 @@ static void ugen_doit(t_dspcontext *dc, t_ugenbox *u) } for (sig = outsig, uout = u->u_out, i = u->u_nout; i--; sig++, uout++) { - /* similarly, for outlets of subcanvases we delay creating - them; instead we create "borrowed" ones so that the refcount - is known. The subcanvas replaces the fake signal with one showing - where the output data actually is, to avoid having to copy it. - For any other object, we just allocate a new output vector; - since we've already freed the inputs the objects might get called - "in place." */ + /* We delay creating outlets for subcanvasses or vinlets (except + reblocked or parallel); instead we create "borrowed" ones so that + the refcount is known. A subcanvas or vinlet will replace the fake + signal with one showing where the output data actually is, to avoid + having to copy it. + For any other objects, we just allocate a new output vector; since + we've already freed the inputs the objects might get called "in place." + For parallel processing, the signals for vinlet will be created in a + new signal context, so they are independent from the parent canvas. */ if (nonewsigs) { *sig = uout->o_signal = @@ -1006,7 +1078,7 @@ void ugen_done_graph(t_dspcontext *dc) int chainblockbegin; /* DSP chain onset before block prolog code */ int chainblockend; /* and after block epilog code */ int chainafterall; /* and after signal outlet epilog */ - int reblock = 0, switched; + int reblock = 0, switched, parallel; int downsample = 1, upsample = 1; /* debugging printout */ @@ -1080,6 +1152,28 @@ void ugen_done_graph(t_dspcontext *dc) (downsample != 1) || (upsample != 1)) reblock = 1; switched = blk->x_switched; + #if PD_DSPTHREADS + /* always free existing DSP task! */ + if (blk->x_task) + { + dsptask_free(blk->x_task); + blk->x_task = 0; + } + parallel = blk->x_parallel; + if (parallel && reblock && parent_context) + { + /* the code for reblocking is rather complicated and I am not + * ready to combine it with parallel processing, so I just + * just disallow it for now. After all, users can simply wrap + * a reblocked canvas in a non-reblocked one. Note that we do + * allow block~ on root canvases (which count as reblocked) + * because we do not have to care about inlets~ and outlets~. */ + pd_error(blk, "reblocking + parallel processing not supported (yet)"); + parallel = 0; + } + #else + parallel = 0; + #endif } else { @@ -1091,20 +1185,21 @@ void ugen_done_graph(t_dspcontext *dc) phase = 0; if (!parent_context) reblock = 1; switched = 0; + parallel = 0; } dc->dc_reblock = reblock; dc->dc_switched = switched; + dc->dc_parallel = parallel; dc->dc_srate = srate; dc->dc_vecsize = vecsize; dc->dc_calcsize = calcsize; - /* if we're reblocking or switched, we now have to create output - signals to fill in for the "borrowed" ones we have now. This - is also possibly true even if we're not blocked/switched, in - the case that there was a signal loop. But we don't know this - yet. */ + /* if we're reblocking, switched or parallel, we now have to create + output signals to fill in for the "borrowed" ones we have now. + This is also possibly true even if we're not blocked/switched, in + the case that there was a signal loop. But we don't know this */ - if (dc->dc_iosigs && (switched || reblock)) + if (dc->dc_iosigs && (switched || reblock || parallel)) { t_signal **sigp; for (i = 0, sigp = dc->dc_iosigs + dc->dc_ninlets; i < dc->dc_noutlets; @@ -1123,7 +1218,8 @@ void ugen_done_graph(t_dspcontext *dc) } if (THIS->u_loud) - post("reblock %d, switched %d", reblock, switched); + post("reblock %d, switched %d, parallel %d", + reblock, switched, parallel); /* schedule prologs for inlets and outlets. If the "reblock" flag is set, an inlet will put code on the DSP chain to copy its input @@ -1142,11 +1238,11 @@ void ugen_done_graph(t_dspcontext *dc) if (pd_class(zz) == vinlet_class) vinlet_dspprolog((struct _vinlet *)zz, dc->dc_iosigs, vecsize, calcsize, THIS->u_phase, period, frequency, - downsample, upsample, reblock, switched); + downsample, upsample, reblock, switched, parallel); else if (pd_class(zz) == voutlet_class) voutlet_dspprolog((struct _voutlet *)zz, outsigs, vecsize, calcsize, THIS->u_phase, period, frequency, - downsample, upsample, reblock, switched); + downsample, upsample, reblock, switched, parallel); } chainblockbegin = THIS->u_dspchainsize; @@ -1155,6 +1251,23 @@ void ugen_done_graph(t_dspcontext *dc) dsp_add(block_prolog, 1, blk); blk->x_chainonset = THIS->u_dspchainsize - 1; } +#if PD_DSPTHREADS + if (parallel) + { + /* this canvas needs its own private signal context. */ + if (!blk->x_signals) + blk->x_signals = signalcontext_new(); /* create lazily */ + else + signalcontext_clear(blk->x_signals); + dc->dc_signals = blk->x_signals; + /* create new DSP task for this canvas on the current queue */ + blk->x_task = dsptask_new(dc->dc_dspqueue, (t_dsptaskfn)block_runtask, blk); + /* schedule task */ + dsp_add(block_schedtask, 1, blk); + blk->x_taskonset = THIS->u_dspchainsize - 1; + } +#endif /* PD_DSPTHREADS */ + /* Initialize for sorting */ for (u = dc->dc_ugenlist; u; u = u->u_next) { @@ -1206,6 +1319,18 @@ void ugen_done_graph(t_dspcontext *dc) break; /* don't need to keep looking. */ } +#if PD_DSPTHREADS + if (parallel) + { + /* add sentinel */ + dsp_add(dsp_done, 0); + /* save chain size, see block_pushtask(). */ + blk->x_tasklength = THIS->u_dspchainsize - blk->x_taskonset - 1; + if (THIS->u_loud) + post("parallel DSP task length: %d", blk->x_tasklength); + } +#endif /* PD_DSPTHREADS */ + if (blk && (reblock || switched)) /* add block DSP epilog */ dsp_add(block_epilog, 1, blk); chainblockend = THIS->u_dspchainsize; @@ -1221,7 +1346,7 @@ void ugen_done_graph(t_dspcontext *dc) if (iosigs) iosigs += dc->dc_ninlets; voutlet_dspepilog((struct _voutlet *)zz, iosigs, vecsize, calcsize, THIS->u_phase, period, frequency, - downsample, upsample, reblock, switched); + downsample, upsample, reblock, switched, parallel); } } diff --git a/src/g_io.c b/src/g_io.c index 0bb90df187..879129e68c 100644 --- a/src/g_io.c +++ b/src/g_io.c @@ -36,6 +36,7 @@ typedef struct _vinlet t_object x_obj; t_canvas *x_canvas; t_inlet *x_inlet; + char x_parallel; int x_bufsize; t_sample *x_buf; /* signal buffer; zero if not a signal */ t_sample *x_endbuf; @@ -54,6 +55,7 @@ static void *vinlet_new(t_symbol *s) t_vinlet *x = (t_vinlet *)pd_new(vinlet_class); x->x_canvas = canvas_getcurrent(); x->x_inlet = canvas_addinlet(x->x_canvas, &x->x_obj.ob_pd, 0); + x->x_parallel = 0; x->x_bufsize = 0; x->x_buf = 0; outlet_new(&x->x_obj, 0); @@ -141,9 +143,14 @@ static void vinlet_dsp(t_vinlet *x, t_signal **sp) outsig = sp[0]; if (x->x_directsignal) { - signal_setborrowed(sp[0], x->x_directsignal); + /* fill in fake signal created in ugen_doit(). */ + signal_setborrowed(outsig, x->x_directsignal); } - else + else if (x->x_parallel) /* parallel */ + { + dsp_add_copy(x->x_buf, outsig->s_vec, outsig->s_n); + } + else /* reblocking */ { dsp_add(vinlet_perform, 3, x, outsig->s_vec, (t_int)outsig->s_vecsize); x->x_read = x->x_buf; @@ -175,7 +182,7 @@ int inlet_getsignalindex(t_inlet *x); /* set up prolog DSP code */ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched) + int downsample, int upsample, int reblock, int switched, int parallel) { t_signal *insig; /* no buffer means we're not a signal inlet */ @@ -183,12 +190,13 @@ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, return; x->x_updown.downsample = downsample; x->x_updown.upsample = upsample; + x->x_parallel = parallel; /* if the "reblock" flag is set, arrange to copy data in from the parent. */ if (reblock) { - int parentvecsize, bufsize, oldbufsize, prologphase; + int parentvecsize, bufsize, prologphase; int re_parentvecsize; /* resampled parentvectorsize */ /* this should never happen: */ if (!x->x_buf) return; @@ -212,10 +220,10 @@ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, bufsize = re_parentvecsize; if (bufsize < myvecsize) bufsize = myvecsize; - if (bufsize != (oldbufsize = x->x_bufsize)) + if (bufsize != x->x_bufsize) { t_sample *buf = x->x_buf; - t_freebytes(buf, oldbufsize * sizeof(*buf)); + t_freebytes(buf, x->x_bufsize * sizeof(*buf)); buf = (t_sample *)t_getbytes(bufsize * sizeof(*buf)); memset((char *)buf, 0, bufsize * sizeof(*buf)); x->x_bufsize = bufsize; @@ -240,7 +248,7 @@ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, re_parentvecsize, method); dsp_add(vinlet_doprolog, 3, x, x->x_updown.s_vec, (t_int)re_parentvecsize); - } + } /* if the input signal's reference count is zero, we have to free it here because we didn't in ugen_doit(). */ @@ -250,6 +258,32 @@ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, else memset((char *)(x->x_buf), 0, bufsize * sizeof(*x->x_buf)); x->x_directsignal = 0; } + else if (parallel) + { + if (myvecsize != x->x_bufsize) + { + t_sample *buf = x->x_buf; + t_freebytes(buf, x->x_bufsize * sizeof(*buf)); + buf = (t_sample *)t_getbytes(myvecsize * sizeof(*buf)); + x->x_bufsize = myvecsize; + x->x_buf = buf; + x->x_endbuf = buf + myvecsize; + } + if (parentsigs) + { + insig = parentsigs[inlet_getsignalindex(x->x_inlet)]; + /* copy input signals to buffer. LATER think how to avoid the + * extra copy, see vinlet_dsp(). */ + dsp_add_copy(insig->s_vec, x->x_buf, myvecsize); + /* if the input signal's reference count is zero, we have + * to free it here because we didn't in ugen_doit(). */ + if (!insig->s_refcount) + signal_makereusable(insig); + } + else + memset((char *)(x->x_buf), 0, myvecsize * sizeof(*x->x_buf)); + x->x_directsignal = 0; + } else { /* no reblocking; in this case our output signal is "borrowed" @@ -330,7 +364,8 @@ typedef struct _voutlet /* and here's a flag indicating that we aren't blocked but have to do a copy (because we're switched). */ char x_justcopyout; - t_resample x_updown; + char x_parallel; + t_resample x_updown; } t_voutlet; static void *voutlet_new(t_symbol *s) @@ -443,21 +478,23 @@ static t_int *voutlet_doepilog_resampling(t_int *w) int outlet_getsignalindex(t_outlet *x); - /* prolog for outlets -- store pointer to the outlet on the - parent, which, if "reblock" is false, will want to refer + /* prolog for outlets -- store pointer to the outlet on the parent, + which, if "reblock" and "parallel" is false, will want to refer back to whatever we see on our input during the "dsp" method - called later. */ + called later. If "parallel" is true, we copy the previous buffer + content to the output signals. */ void voutlet_dspprolog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched) + int downsample, int upsample, int reblock, int switched, int parallel) { /* no buffer means we're not a signal outlet */ if (!x->x_buf) return; x->x_updown.downsample=downsample; x->x_updown.upsample=upsample; - x->x_justcopyout = (switched && !reblock); - if (reblock) + x->x_justcopyout = (switched && !reblock && !parallel); + x->x_parallel = parallel; + if (reblock || parallel) { x->x_directsignal = 0; } @@ -467,6 +504,24 @@ void voutlet_dspprolog(struct _voutlet *x, t_signal **parentsigs, x->x_directsignal = parentsigs[outlet_getsignalindex(x->x_parentoutlet)]; } + if (parallel && parentsigs) + { + t_signal *outsig; + if (myvecsize != x->x_bufsize) + { + t_sample *buf = x->x_buf; + t_freebytes(buf, x->x_bufsize * sizeof(*buf)); + buf = (t_sample *)t_getbytes(myvecsize * sizeof(*buf)); + memset((char *)buf, 0, myvecsize * sizeof(*buf)); + x->x_bufsize = myvecsize; + x->x_buf = buf; + } + outsig = parentsigs[outlet_getsignalindex(x->x_parentoutlet)]; + if (outsig->s_n != myvecsize) bug("voutlet_dspprolog: bad vecsize"); + /* copy previous buffer content to output signals. the following + * DSP chain can now safely write to the buffer, see voutlet_dsp(). */ + dsp_add_copy(x->x_buf, outsig->s_vec, myvecsize); + } } static void voutlet_dsp(t_voutlet *x, t_signal **sp) @@ -474,16 +529,20 @@ static void voutlet_dsp(t_voutlet *x, t_signal **sp) t_signal *insig; if (!x->x_buf) return; insig = sp[0]; - if (x->x_justcopyout) + if (x->x_justcopyout) /* switched, but not reblocked or parallel */ dsp_add_copy(insig->s_vec, x->x_directsignal->s_vec, insig->s_n); else if (x->x_directsignal) { - /* if we're just going to make the signal available on the - parent patch, hand it off to the parent signal. */ - /* this is done elsewhere--> sp[0]->s_refcount++; */ - signal_setborrowed(x->x_directsignal, sp[0]); + /* if we're just going to make the signal available on the + * parent patch, hand it off to the parent signal. + * this is done elsewhere--> sp[0]->s_refcount++; */ + signal_setborrowed(x->x_directsignal, insig); } - else + else if (x->x_parallel) /* parallel processing */ + /* write to buffer. at this point, we have already copied the + * previous buffer to the signal outlets, see voutlet_dspprolog(). */ + dsp_add_copy(insig->s_vec, x->x_buf, insig->s_n); + else /* reblocked */ dsp_add(voutlet_perform, 3, x, insig->s_vec, (t_int)insig->s_n); } @@ -492,7 +551,7 @@ static void voutlet_dsp(t_voutlet *x, t_signal **sp) If we aren't reblocking, there's nothing to do here. */ void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched) + int downsample, int upsample, int reblock, int switched, int parallel) { if (!x->x_buf) return; /* this shouldn't be necesssary... */ x->x_updown.downsample=downsample; @@ -500,7 +559,7 @@ void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, if (reblock) { t_signal *outsig; - int parentvecsize, bufsize, oldbufsize; + int parentvecsize, bufsize; int re_parentvecsize; int bigperiod, epilogphase, blockphase; if (parentsigs) @@ -521,10 +580,10 @@ void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, blockphase = (phase + period - 1) & (bigperiod - 1) & (- period); bufsize = re_parentvecsize; if (bufsize < myvecsize) bufsize = myvecsize; - if (bufsize != (oldbufsize = x->x_bufsize)) + if (bufsize != x->x_bufsize) { t_sample *buf = x->x_buf; - t_freebytes(buf, oldbufsize * sizeof(*buf)); + t_freebytes(buf, x->x_bufsize * sizeof(*buf)); buf = (t_sample *)t_getbytes(bufsize * sizeof(*buf)); memset((char *)buf, 0, bufsize * sizeof(*buf)); x->x_bufsize = bufsize; From 78bcdf205f979ca16c1981d1b77038c833b6e84e Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Mon, 27 Dec 2021 02:00:04 +0100 Subject: [PATCH 21/32] block~: implement "join" method * if "join" is true, block~ has its own DSP task queue * in ugen_donegraph, we push the DSP task queue, reset it and finally join it. this will synchronize all parallel DSP tasks in subpatches or child abstractions. --- src/d_threadpool.c | 24 ++++++++++++++++++++++ src/d_ugen.c | 51 +++++++++++++++++++++++++++++++++++++++++++--- src/s_stuff.h | 2 ++ 3 files changed, 74 insertions(+), 3 deletions(-) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index 07a015f7d8..10dc75411a 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -574,6 +574,18 @@ void dsptaskqueue_reset(t_dsptaskqueue *x) } } +static t_int *dsptaskqueue_doreset(t_int *w) +{ + t_dsptaskqueue *x = (t_dsptaskqueue *)w[1]; + dsptaskqueue_reset(x); + return w + 2; +} + +void dsp_add_reset(t_dsptaskqueue *x) +{ + dsp_add(dsptaskqueue_doreset, 1, x); +} + void dsptaskqueue_join(t_dsptaskqueue *x) { if (!d_threadpool || !d_threadpool->tp_n) @@ -614,6 +626,18 @@ void dsptaskqueue_join(t_dsptaskqueue *x) #endif } +static t_int *dsptaskqueue_dojoin(t_int *w) +{ + t_dsptaskqueue *x = (t_dsptaskqueue *)w[1]; + dsptaskqueue_join(x); + return w + 2; +} + +void dsp_add_join(t_dsptaskqueue *x) +{ + dsp_add(dsptaskqueue_dojoin, 1, x); +} + /* ---------------------------- t_dsptask ----------------------------- */ struct _dsptask diff --git a/src/d_ugen.c b/src/d_ugen.c index 145330f90e..0044cfe72c 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -216,10 +216,12 @@ typedef struct _block char x_reblock; /* true if inlets and outlets are reblocking */ #if PD_DSPTHREADS char x_parallel; /* true if we are processing in parallel */ + char x_join; /* true if this canvas should join DSP tasks of subpatches */ t_signalcontext *x_signals; /* signal context for parallel processing */ t_dsptask *x_task; /* DSP task for parallel processing */ int x_taskonset; /* beginning of parallel task in the chain */ int x_tasklength; /* length of parallel task */ + t_dsptaskqueue *x_dspqueue; /* maintain a DSP task queue and join tasks */ #endif int x_upsample; /* upsampling-factor */ int x_downsample; /* downsampling-factor */ @@ -240,10 +242,12 @@ static void *block_new(t_floatarg fvecsize, t_floatarg foverlap, x->x_switchon = 1; #if PD_DSPTHREADS x->x_parallel = 0; + x->x_join = 0; x->x_signals = 0; x->x_task = 0; x->x_taskonset = 0; x->x_tasklength = 0, + x->x_dspqueue = 0; #endif block_set(x, fvecsize, foverlap, fupsample); return (x); @@ -256,6 +260,8 @@ static void block_free(t_block *x) signalcontext_free(x->x_signals); if (x->x_task) dsptask_free(x->x_task); + if (x->x_dspqueue) + dsptaskqueue_release(x->x_dspqueue); #endif } @@ -403,6 +409,19 @@ static void block_parallel(t_block *x, t_floatarg f) } } +static void block_join(t_block *x, t_floatarg f) +{ + int join = f != 0; + if (join != x->x_join) + { + x->x_join = join; + if (x->x_dspqueue) + dsptaskqueue_release(x->x_dspqueue); + x->x_dspqueue = join ? dsptaskqueue_new() : 0; + canvas_update_dsp(); + } +} + static t_int *block_schedtask(t_int *w) { t_block *x = (t_block *)w[1]; @@ -435,6 +454,7 @@ void block_tilde_setup(void) A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); #if PD_DSPTHREADS class_addmethod(block_class, (t_method)block_parallel, gensym("parallel"), A_FLOAT, 0); + class_addmethod(block_class, (t_method)block_join, gensym("join"), A_FLOAT, 0); #endif class_addmethod(block_class, (t_method)block_dsp, gensym("dsp"), A_CANT, 0); class_addfloat(block_class, block_float); @@ -1078,7 +1098,7 @@ void ugen_done_graph(t_dspcontext *dc) int chainblockbegin; /* DSP chain onset before block prolog code */ int chainblockend; /* and after block epilog code */ int chainafterall; /* and after signal outlet epilog */ - int reblock = 0, switched, parallel; + int reblock = 0, switched, parallel, join; int downsample = 1, upsample = 1; /* debugging printout */ @@ -1160,6 +1180,7 @@ void ugen_done_graph(t_dspcontext *dc) blk->x_task = 0; } parallel = blk->x_parallel; + join = blk->x_join; if (parallel && reblock && parent_context) { /* the code for reblocking is rather complicated and I am not @@ -1171,8 +1192,17 @@ void ugen_done_graph(t_dspcontext *dc) pd_error(blk, "reblocking + parallel processing not supported (yet)"); parallel = 0; } + if (parallel && join) + { + /* it doesn't make sense to use 'parallel' together with 'join', + * because the latter will force the former to run synchronously, + * preventing any kind of parallelism. */ + logpost(blk, PD_NORMAL, "block~: warning: using 'parallel' " + "and 'join' in the same canvas has no effect."); + } #else parallel = 0; + join = 0; #endif } else @@ -1186,6 +1216,7 @@ void ugen_done_graph(t_dspcontext *dc) if (!parent_context) reblock = 1; switched = 0; parallel = 0; + join = 0; } dc->dc_reblock = reblock; dc->dc_switched = switched; @@ -1218,8 +1249,8 @@ void ugen_done_graph(t_dspcontext *dc) } if (THIS->u_loud) - post("reblock %d, switched %d, parallel %d", - reblock, switched, parallel); + post("reblock %d, switched %d, parallel %d, join %d", + reblock, switched, parallel, join); /* schedule prologs for inlets and outlets. If the "reblock" flag is set, an inlet will put code on the DSP chain to copy its input @@ -1252,6 +1283,13 @@ void ugen_done_graph(t_dspcontext *dc) blk->x_chainonset = THIS->u_dspchainsize - 1; } #if PD_DSPTHREADS + if (join) + { + /* this canvas manages its own DSP task queue. this part comes + * after the prolog, so that it gets skipped if we're switched off. */ + dc->dc_dspqueue = blk->x_dspqueue; + dsp_add_reset(blk->x_dspqueue); + } if (parallel) { /* this canvas needs its own private signal context. */ @@ -1329,6 +1367,13 @@ void ugen_done_graph(t_dspcontext *dc) if (THIS->u_loud) post("parallel DSP task length: %d", blk->x_tasklength); } + + if (join) + { + /* join DSP tasks managed by this canvas. this must come before + * the blockepilog, so that it gets skipped if we're switched off. */ + dsp_add_join(blk->x_dspqueue); + } #endif /* PD_DSPTHREADS */ if (blk && (reblock || switched)) /* add block DSP epilog */ diff --git a/src/s_stuff.h b/src/s_stuff.h index 9a41d3ff21..afd322ec73 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -403,6 +403,8 @@ t_dsptaskqueue * dsptaskqueue_new(void); void dsptaskqueue_release(t_dsptaskqueue *x); void dsptaskqueue_reset(t_dsptaskqueue *x); void dsptaskqueue_join(t_dsptaskqueue *x); +void dsp_add_reset(t_dsptaskqueue *x); +void dsp_add_join(t_dsptaskqueue *x); EXTERN_STRUCT _dsptask; #define t_dsptask struct _dsptask From 54b2a55a7007f76c162b41ede6ad95d4747bff28 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Wed, 30 Mar 2022 16:22:20 +0200 Subject: [PATCH 22/32] handle switched-off parallel canvasses * each DSP task object registers itself with every outer switch~ object. If a switch~ object changes it state, it notifies all its DSP task children, so they can in turn notify their DSP task queue. * DSP tasks in switched-off parallel canvasses are simply not scheduled. * each DSP task queue maintains a count of switched-off DSP tasks, so that it won't lock up in case there is a switch~ *between* the queue and the task. --- src/d_threadpool.c | 115 ++++++++++++++++++++++++++++++++++++++------- src/d_ugen.c | 54 +++++++++++++++++++++ src/s_stuff.h | 1 + 3 files changed, 154 insertions(+), 16 deletions(-) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index 10dc75411a..8a7eb52860 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -531,6 +531,7 @@ int sys_dspthread_run(int index) struct _dsptaskqueue { int dq_numtasks; /* number of tasks, also doubles as reference count */ + int dq_numswitchoff; /* number of switched of tasks */ #ifdef MSVC_INTERLOCKED long dq_remaining; #else @@ -543,6 +544,7 @@ t_dsptaskqueue * dsptaskqueue_new(void) { t_dsptaskqueue *x = (t_dsptaskqueue *)getbytes(sizeof(t_dsptaskqueue)); x->dq_numtasks = 0; + x->dq_numswitchoff = 0; x->dq_remaining = 0; fast_semaphore_init(&x->dq_sem); return x; @@ -553,25 +555,43 @@ t_dsptaskqueue * dsptaskqueue_new(void) void dsptaskqueue_release(t_dsptaskqueue *x) { int oldcount = x->dq_numtasks--; - if (oldcount < 0) - bug("dsptaskqueue_free"); - else if (oldcount == 0) + if (oldcount > 0) { + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: %d tasks (%d switched off)\n", + x, oldcount-1, x->dq_numswitchoff); + #endif + } + else if (oldcount == 0) /* release queue */ + { + if (x->dq_numswitchoff != 0) + bug("dsptaskqueue_release: bad switch count (%d)", + x->dq_numswitchoff); + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: release\n"); + #endif fast_semaphore_destroy(&x->dq_sem); freebytes(x, sizeof(t_dsptaskqueue)); } + else if (oldcount < 0) + bug("dsptaskqueue_release: bad refcount (%d)", oldcount); } void dsptaskqueue_reset(t_dsptaskqueue *x) { - if (x->dq_numtasks > 0) + int count = x->dq_numtasks - x->dq_numswitchoff; + if (count > 0) { - x->dq_remaining = x->dq_numtasks; + x->dq_remaining = count; #ifdef DEBUG_DSPTHREADS - fprintf(stderr, "queue %p: reset with %d tasks\n", - x, x->dq_numtasks); + fprintf(stderr, "queue %p: reset with %d active tasks " + "(%d total, %d switched off)\n", + x, count, x->dq_numtasks, x->dq_numswitchoff); #endif } + else if (count < 0) + fprintf(stderr, "dsptaskqueue_reset: queue %p: bad task count (%d)\n", + x, count); } static t_int *dsptaskqueue_doreset(t_int *w) @@ -588,12 +608,11 @@ void dsp_add_reset(t_dsptaskqueue *x) void dsptaskqueue_join(t_dsptaskqueue *x) { - if (!d_threadpool || !d_threadpool->tp_n) - /* single-threaded -> nothing to do, see dsptask_sched() */ - return; - if (!x->dq_numtasks) /* no tasks */ + int count = x->dq_numtasks - x->dq_numswitchoff; + assert(count >= 0); + if (!d_threadpool || !d_threadpool->tp_n || !count) + /* single-threaded or no tasks, see also dsptask_sched() */ return; - /* multi-threaded */ #ifdef DEBUG_DSPTHREADS fprintf(stderr, "queue %p: begin join\n", x); #endif @@ -640,6 +659,8 @@ void dsp_add_join(t_dsptaskqueue *x) /* ---------------------------- t_dsptask ----------------------------- */ +void ugen_addtask(t_dsptask *x); + struct _dsptask { t_lfs_node dt_node; @@ -649,6 +670,7 @@ struct _dsptask t_dsptaskqueue *dt_queue; t_dsptaskfn dt_fn; void *dt_data; + int dt_switchoff; }; t_dsptask * dsptask_new(t_dsptaskqueue *queue, t_dsptaskfn fn, void *data) @@ -661,13 +683,26 @@ t_dsptask * dsptask_new(t_dsptaskqueue *queue, t_dsptaskfn fn, void *data) x->dt_queue = queue; x->dt_fn = fn; x->dt_data = data; + x->dt_switchoff = 0; queue->dq_numtasks++; /* increment refcount */ +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: %d tasks (%d switched off)\n", + queue, queue->dq_numtasks, queue->dq_numswitchoff); +#endif + ugen_addtask(x); return x; } void dsptask_free(t_dsptask *x) { - dsptaskqueue_release(x->dt_queue); /* release */ + /* make sure to decrement switch count! */ + if (x->dt_switchoff > 0) + { + if (--x->dt_queue->dq_numswitchoff < 0) + bug("dsptask_free: bad queue switch count (%d)", + x->dt_queue->dq_numswitchoff); + } + dsptaskqueue_release(x->dt_queue); freebytes(x, sizeof(t_dsptask)); } @@ -681,10 +716,10 @@ void dsptask_sched(t_dsptask *x) dspthreadpool_push(x); fast_semaphore_post(&d_threadpool->tp_sem); } - else + else /* single-threaded */ { /* execute immediately, see dsptaskqueue_join(). - * NOTE: don't use dsptask_run() here! */ + * NB: don't use dsptask_run() here! */ (x->dt_fn)(x->dt_data); } } @@ -700,7 +735,10 @@ static void dsptask_run(t_dsptask *x, int index) #ifdef PDINSTANCE pd_setinstance(x->dt_pdinstance); #endif + assert(x->dt_switchoff == 0); + /* execute task */ (x->dt_fn)(x->dt_data); + /* atomically decrement task counter */ #ifdef MSVC_INTERLOCKED remaining = _InterlockedDecrement(&queue->dq_remaining); /* returns new value! */ #else @@ -709,12 +747,57 @@ static void dsptask_run(t_dsptask *x, int index) #ifdef DEBUG_DSPTHREADS fprintf(stderr, "queue %p: %d remaining tasks\n", queue, remaining); #endif - if (!remaining) + if (!remaining) /* last task */ { /* last task, notify waiting main audio thread; * see dsptaskqueue_join() */ fast_semaphore_post(&queue->dq_sem); } + else if (remaining < 0) + fprintf(stderr, "queue: %p: bad remaining task count (%d)\n", + queue, remaining); +} + +/* This is called whenever an enclosing switch~ object has changed state. + * Note that there can be several switch~ objects beyond this task; + * as soon as one of them is switched off, the DSP task won't run and it + * must notify the queue and DSP context to prevent them from locking up. + * Conversely, *all* enclosing switch~ objects must be switched on for + * the task to run (again), i.e. the counter must reach 0. */ +void dsptask_switch(t_dsptask *x, int on) +{ + t_dsptaskqueue *queue = x->dt_queue; + int state, oldstate = x->dt_switchoff > 0; + if (on) + { + if (--x->dt_switchoff < 0) + bug("dsptask_switch: bad switch count (%d)", x->dt_switchoff); + } + else + x->dt_switchoff++; + + state = x->dt_switchoff > 0; + if (oldstate != state) + { + /* only notify if the state has changed! */ + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: switch %s task %p \n", + x->dt_queue, (on ? "on" : "off"), x); + #endif + if (on) /* off -> on */ + { + if (--queue->dq_numswitchoff < 0) + bug("dsptask_switch: bad queue switch count (%d)", + queue->dq_numswitchoff); + } + else /* on -> off */ + { + if (++queue->dq_numswitchoff > queue->dq_numtasks) + bug("dsptask_switch: queue switch count (%d) " + "exceeds queue task count (%d)", + queue->dq_numswitchoff, queue->dq_numtasks); + } + } } #else /* PD_DSPTHREADS */ diff --git a/src/d_ugen.c b/src/d_ugen.c index 0044cfe72c..589cee5e1c 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -222,6 +222,8 @@ typedef struct _block int x_taskonset; /* beginning of parallel task in the chain */ int x_tasklength; /* length of parallel task */ t_dsptaskqueue *x_dspqueue; /* maintain a DSP task queue and join tasks */ + t_dsptask **x_childtasks; /* child DSP tasks */ + int x_numchildtasks; /* number of child DSP tasks */ #endif int x_upsample; /* upsampling-factor */ int x_downsample; /* downsampling-factor */ @@ -248,6 +250,8 @@ static void *block_new(t_floatarg fvecsize, t_floatarg foverlap, x->x_taskonset = 0; x->x_tasklength = 0, x->x_dspqueue = 0; + x->x_childtasks = 0; + x->x_numchildtasks = 0; #endif block_set(x, fvecsize, foverlap, fupsample); return (x); @@ -262,6 +266,8 @@ static void block_free(t_block *x) dsptask_free(x->x_task); if (x->x_dspqueue) dsptaskqueue_release(x->x_dspqueue); + if (x->x_numchildtasks) + freebytes(x->x_childtasks, x->x_numchildtasks * sizeof(t_dsptask *)); #endif } @@ -338,7 +344,20 @@ static void *switch_new(t_floatarg fvecsize, t_floatarg foverlap, static void block_float(t_block *x, t_floatarg f) { if (x->x_switched) + { + #if PD_DSPTHREADS + int i, oldstate = x->x_switchon, state = (f != 0); + x->x_switchon = state; + /* only do this if the state has changed! */ + if (state != oldstate) + { + for (i = 0; i < x->x_numchildtasks; i++) + dsptask_switch(x->x_childtasks[i], state); + } + #else x->x_switchon = (f != 0); + #endif + } } static void block_bang(t_block *x) @@ -399,6 +418,16 @@ static t_int *block_epilog(t_int *w) #if PD_DSPTHREADS +static void switch_addtask(t_block *x, t_dsptask *t) +{ + int old = x->x_numchildtasks++; + x->x_childtasks = resizebytes(x->x_childtasks, + old * sizeof(t_dsptask *), x->x_numchildtasks * sizeof(t_dsptask *)); + x->x_childtasks[old] = t; + if (!x->x_switchon) + dsptask_switch(t, 0); /* switch off */ +} + static void block_parallel(t_block *x, t_floatarg f) { int par = f != 0; @@ -721,11 +750,27 @@ struct _dspcontext char dc_parallel; /* true if we're parallel. */ #if PD_DSPTHREADS t_dsptaskqueue *dc_dspqueue; /* current DSP task queue */ + t_block *dc_block; /* block~ object */ #endif }; #define t_dspcontext struct _dspcontext +#if PD_DSPTHREADS + +void ugen_addtask(t_dsptask *x) +{ + t_dspcontext *dc; + /* Add the DSP task to all enclosing switch~ objects */ + for (dc = THIS->u_context; dc; dc = dc->dc_parentcontext) + { + if (dc->dc_block && dc->dc_block->x_switched) /* switch~ */ + switch_addtask(dc->dc_block, x); + } +} + +#endif /* PD_DSPTHREADS */ + /* get a new signal for the current context - used by clone~ object */ t_signal *signal_newfromcontext(int borrowed) { @@ -838,6 +883,7 @@ t_dspcontext *ugen_start_graph(int toplevel, t_signal **sp, * by block~ (see "join") or by dsptaskqueue_push(). */ dc->dc_dspqueue = THIS->u_context ? THIS->u_context->dc_dspqueue : THIS->u_dspqueue; + dc->dc_block = 0; #endif THIS->u_context = dc; return (dc); @@ -1173,6 +1219,14 @@ void ugen_done_graph(t_dspcontext *dc) reblock = 1; switched = blk->x_switched; #if PD_DSPTHREADS + dc->dc_block = blk; + /* free old DSP task list */ + if (blk->x_numchildtasks) + { + freebytes(blk->x_childtasks, blk->x_numchildtasks * sizeof(t_dsptask *)); + blk->x_childtasks = 0; + blk->x_numchildtasks = 0; + } /* always free existing DSP task! */ if (blk->x_task) { diff --git a/src/s_stuff.h b/src/s_stuff.h index afd322ec73..c3b0826dce 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -414,6 +414,7 @@ typedef void (*t_dsptaskfn) (void *data); t_dsptask * dsptask_new(t_dsptaskqueue *queue, t_dsptaskfn fn, void *data); void dsptask_free(t_dsptask *x); void dsptask_sched(t_dsptask *x); +void dsptask_switch(t_dsptask *x, int on); #endif /* PD_DSPTHREADS */ From 72ccb5eb1280d55338f7a2b96419e907c44077e8 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sun, 26 Dec 2021 16:45:31 +0100 Subject: [PATCH 23/32] clone: implement "parallel" method * if "parallel" is true, clone has its own signal context and DSP task queue. * child abstractions are scheduled as DSP tasks and finally joined. * the cloned abstractions don't need to know that they are being processed in parallel, we can carry out all necessary steps in clone_dsp(). --- src/d_ugen.c | 31 +++++++ src/g_clone.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 248 insertions(+), 9 deletions(-) diff --git a/src/d_ugen.c b/src/d_ugen.c index 589cee5e1c..1c8ba132e6 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -769,6 +769,27 @@ void ugen_addtask(t_dsptask *x) } } + /* used in clone_dsp() */ +t_dsptaskqueue * dsptaskqueue_push(t_dsptaskqueue *newqueue) +{ + t_dsptaskqueue *old; + if (!THIS->u_context || !((old = THIS->u_context->dc_dspqueue))) + { + bug("dsptaskqueue_push"); + return 0; + } + THIS->u_context->dc_dspqueue = newqueue; + return old; +} + +void dsptaskqueue_pop(t_dsptaskqueue *oldqueue) +{ + if (THIS->u_context && THIS->u_context->dc_dspqueue) + THIS->u_context->dc_dspqueue = oldqueue; + else + bug("dsptaskqueue_pop"); +} + #endif /* PD_DSPTHREADS */ /* get a new signal for the current context - used by clone~ object */ @@ -829,6 +850,16 @@ int ugen_getsortno(void) return (THIS->u_sortno); } +t_int *ugen_getchain(void) +{ + return THIS->u_dspchain; +} + +int ugen_getsize(void) +{ + return THIS->u_dspchainsize; +} + #if 0 void glob_ugen_printstate(void *dummy, t_symbol *s, int argc, t_atom *argv) { diff --git a/src/g_clone.c b/src/g_clone.c index 20104e0fe9..140b04f32e 100644 --- a/src/g_clone.c +++ b/src/g_clone.c @@ -1,6 +1,7 @@ #include "m_pd.h" #include "g_canvas.h" #include "m_imp.h" +#include "s_stuff.h" #include /* ---------- clone - maintain copies of a patch ----------------- */ @@ -22,10 +23,30 @@ t_class *clone_class; static t_class *clone_in_class, *clone_out_class; +#if PD_DSPTHREADS + +typedef struct _signalcontext t_signalcontext; + +t_signalcontext *signalcontext_new(void); +void signalcontext_free(t_signalcontext *x); +void signalcontext_clear(t_signalcontext *x); +t_signalcontext *signalcontext_push(t_signalcontext *newcontext); +void signalcontext_pop(t_signalcontext *oldcontext); + +t_dsptaskqueue * dsptaskqueue_push(t_dsptaskqueue *newqueue); +void dsptaskqueue_pop(t_dsptaskqueue *oldqueue); + +#endif /* PD_DSPTHREADS */ + typedef struct _copy { t_glist *c_gl; - int c_on; /* DSP running */ +#if PD_DSPTHREADS + t_dsptask *c_task; + int c_chainonset; + int c_chainlength; + t_signalcontext *c_sigcontext; +#endif } t_copy; typedef struct _in @@ -42,6 +63,9 @@ typedef struct _out t_outlet *o_outlet; int o_signal; int o_n; +#if PD_DSPTHREADS + t_signal *o_outsig; +#endif } t_out; typedef struct _clone @@ -59,6 +83,10 @@ typedef struct _clone int x_phase; int x_startvoice; /* number of first voice, 0 by default */ int x_suppressvoice; /* suppress voice number as $1 arg */ +#if PD_DSPTHREADS + int x_parallel; /* process in parallel */ + t_dsptaskqueue *x_dspqueue; /* DSP task queue */ +#endif } t_clone; int clone_match(t_pd *z, t_symbol *name, t_symbol *dir) @@ -146,6 +174,18 @@ static void clone_in_fwd(t_in *x, t_symbol *s, int argc, t_atom *argv) typedmess(&x->i_pd, argv->a_w.w_symbol, argc-1, argv+1); } +#if PD_DSPTHREADS +static void clone_in_parallel(t_in *x, t_floatarg f) +{ + int par = f != 0; + if (par != x->i_owner->x_parallel) + { + x->i_owner->x_parallel = par; + canvas_update_dsp(); + } +} +#endif /* PD_DSPTHREADS */ + static void clone_out_anything(t_out *x, t_symbol *s, int argc, t_atom *argv) { t_atom *outv; @@ -176,17 +216,29 @@ static void clone_free(t_clone *x) } for (i = 0; i < x->x_n; i++) { - canvas_closebang(x->x_vec[i].c_gl); - pd_free(&x->x_vec[i].c_gl->gl_pd); + t_copy *copy = &x->x_vec[i]; + canvas_closebang(copy->c_gl); + pd_free(©->c_gl->gl_pd); t_freebytes(x->x_outvec[i], x->x_nout * sizeof(*x->x_outvec[i])); + #if PD_DSPTHREADS + if (copy->c_sigcontext) + signalcontext_free(copy->c_sigcontext); + if (copy->c_task) + dsptask_free(copy->c_task); + #endif } t_freebytes(x->x_vec, x->x_n * sizeof(*x->x_vec)); t_freebytes(x->x_argv, x->x_argc * sizeof(*x->x_argv)); t_freebytes(x->x_invec, x->x_nin * sizeof(*x->x_invec)); t_freebytes(x->x_outvec, x->x_n * sizeof(*x->x_outvec)); clone_voicetovis = voicetovis; + } +#if PD_DSPTHREADS + if (x->x_dspqueue) + dsptaskqueue_release(x->x_dspqueue); +#endif } static t_canvas *clone_makeone(t_symbol *s, int argc, t_atom *argv) @@ -228,6 +280,7 @@ void clone_setn(t_clone *x, t_floatarg f) { t_canvas *c; t_out *outvec; + t_copy *copy; SETFLOAT(x->x_argv, x->x_startvoice + i); if (!(c = clone_makeone(x->x_s, x->x_argc - x->x_suppressvoice, x->x_argv + x->x_suppressvoice))) @@ -237,8 +290,14 @@ void clone_setn(t_clone *x, t_floatarg f) } x->x_vec = (t_copy *)t_resizebytes(x->x_vec, i * sizeof(t_copy), (i+1) * sizeof(t_copy)); - x->x_vec[i].c_gl = c; - x->x_vec[i].c_on = 0; + copy = &x->x_vec[i]; + copy->c_gl = c; + #if PD_DSPTHREADS + copy->c_task = 0; + copy->c_chainonset = 0; + copy->c_chainlength = 0; + copy->c_sigcontext = 0; + #endif x->x_outvec = (t_out **)t_resizebytes(x->x_outvec, i * sizeof(*x->x_outvec), (i+1) * sizeof(*x->x_outvec)); x->x_outvec[i] = outvec = @@ -249,6 +308,9 @@ void clone_setn(t_clone *x, t_floatarg f) outvec[j].o_signal = obj_issignaloutlet(&x->x_vec[0].c_gl->gl_obj, i); outvec[j].o_n = x->x_startvoice + i; + #if PD_DSPTHREADS + outvec[j].o_outsig = 0; + #endif outvec[j].o_outlet = x->x_outvec[0][j].o_outlet; obj_connect(&x->x_vec[i].c_gl->gl_obj, j, @@ -260,8 +322,16 @@ void clone_setn(t_clone *x, t_floatarg f) { for (i = wantn; i < nwas; i++) { - canvas_closebang(x->x_vec[i].c_gl); - pd_free(&x->x_vec[i].c_gl->gl_pd); + t_copy *copy = &x->x_vec[i]; + canvas_closebang(copy->c_gl); + pd_free(©->c_gl->gl_pd); + t_freebytes(x->x_outvec[i], x->x_nout * sizeof(*x->x_outvec[i])); + #if PD_DSPTHREADS + if (copy->c_sigcontext) + signalcontext_free(copy->c_sigcontext); + if (copy->c_task) + dsptask_free(copy->c_task); + #endif } x->x_vec = (t_copy *)t_resizebytes(x->x_vec, nwas * sizeof(t_copy), wantn * sizeof(*x->x_vec)); @@ -294,10 +364,35 @@ void canvas_dodsp(t_canvas *x, int toplevel, t_signal **sp); t_signal *signal_newfromcontext(int borrowed); void signal_makereusable(t_signal *sig); +#if PD_DSPTHREADS +t_int *ugen_getchain(void); +int ugen_getsize(void); +t_int *dsp_done(t_int *w); + +static t_int *clone_schedtask(t_int *w) +{ + t_copy *x = (t_copy *)w[1]; + dsptask_sched(x->c_task); + /* skip the DSP chain performed by clone_runtask(). */ + return w + 2 + x->c_chainlength; +} + +static void clone_runtask(t_copy *x) +{ + t_int *ip = ugen_getchain() + x->c_chainonset; + while (ip) + ip = (*(t_perfroutine)(*ip))(ip); +} + +#endif /* PD_DSPTHREADS */ + static void clone_dsp(t_clone *x, t_signal **sp) { int i, j, nin, nout; t_signal **tempsigs, **tempio; +#if PD_DSPTHREADS + int parallel = x->x_parallel; +#endif if (!x->x_n) return; for (i = nin = 0; i < x->x_nin; i++) @@ -319,6 +414,98 @@ static void clone_dsp(t_clone *x, t_signal **sp) return; } } +#if PD_DSPTHREADS + /* always free existing DSP tasks! */ + for (i = 0; i < x->x_n; i++) + { + if (x->x_vec[i].c_task) + { + dsptask_free(x->x_vec[i].c_task); + x->x_vec[i].c_task = 0; + } + } + if (parallel) + { + /* Every child abstraction gets its own DSP task. Unlike block~ + "parallel", + * cloned abstractions are not aware that they are being processed in parallel. + * Since all DSP tasks are joined by us, there is no need for double buffering + * in voutlet, and consequently there is no delay, either. + * The clone object maintains its own DSP task queue. Each cloned instance also has + * its own signal context because signals must not be reused across child abstractions. + * Each child abstractions starts with new input signals which are copies of our + * input signals, but belong to a dedicated signal context. After we have processed + * and joined all child abstractions, we can simply sum their output signals into + * our output signals. */ + int blocksize = sp[0]->s_n; + t_dsptaskqueue *oldqueue; + if (!x->x_dspqueue) + x->x_dspqueue = dsptaskqueue_new(); + /* push our queue to the current DSP context */ + oldqueue = dsptaskqueue_push(x->x_dspqueue); + /* reset queue */ + dsp_add_reset(x->x_dspqueue); + /* schedule canvases as tasks. */ + for (j = 0; j < x->x_n; j++) + { + t_copy *copy = &x->x_vec[j]; + t_out *outvec = x->x_outvec[j]; + t_signal **tempio; + /* push new signal context, so that signals are not reused concurrently. */ + t_signalcontext *oldsigcontext; + if (!copy->c_sigcontext) + copy->c_sigcontext = signalcontext_new(); /* create lazily */ + else + signalcontext_clear(copy->c_sigcontext); + oldsigcontext = signalcontext_push(copy->c_sigcontext); + tempio = alloca((nin + nout) * sizeof(t_signal *)); + /* create input signals (in the new context) */ + for (i = 0; i < nin; ++i) + tempio[i] = signal_newfromcontext(0); + for (i = 0; i < nout; ++i) + /* create "fake" output signals which will be filled later by voutlet + * in the child abstraction; normally this would be done in ugen_doit(). */ + outvec[i].o_outsig = tempio[nin + i] = signal_newfromcontext(1); + /* create new DSP task */ + copy->c_task = dsptask_new(x->x_dspqueue, (t_dsptaskfn)clone_runtask, copy); + dsp_add(clone_schedtask, 1, copy); + copy->c_chainonset = ugen_getsize() - 1; + /* copy parent input signals to our input signals. we can already do this + * concurrently because nobody is writing to the parent input signal. */ + for (i = 0; i < nin; ++i) + dsp_add_copy(sp[i]->s_vec, tempio[i]->s_vec, blocksize); + /* now we can process the child abstraction. */ + canvas_dodsp(copy->c_gl, 0, tempio); + dsp_add(dsp_done, 0); /* sentinel */ + copy->c_chainlength = ugen_getsize() - copy->c_chainonset - 1; + #if 0 /* not necessary; nobody will (re)use our signals. */ + for (i = 0; i < (nin + nout); ++i) + signal_makereusable(tempio[i]); + #endif + /* restore signal context. */ + signalcontext_pop(oldsigcontext); + } + /* join all tasks */ + dsp_add_join(x->x_dspqueue); + /* Finally we can sum the outputs. Unlike "regular" clone, we can directly write + * to the output signals because the input signals have already been copied. */ + for (j = 0; j < x->x_n; j++) + { + for (i = 0; i < nout; i++) + { + t_sample *from = x->x_outvec[j][i].o_outsig->s_vec; + t_sample *to = sp[nin + i]->s_vec; + if (j == 0) + dsp_add_copy(from, to, blocksize); + else + dsp_add_plus(from, to, to, blocksize); + } + } + /* restore the previous DSP queue */ + dsptaskqueue_pop(oldqueue); + + return; /* done */ + } +#endif /* PD_DSPTHREADS */ tempsigs = (t_signal **)alloca((nin + 2 * nout) * sizeof(*tempsigs)); tempio = tempsigs + nout; /* load input signals into signal vector to send subpatches */ @@ -329,15 +516,19 @@ static void clone_dsp(t_clone *x, t_signal **sp) sp[i]->s_refcount += x->x_n-1; tempio[i] = sp[i]; } - /* for first copy, write output to first nout temp sigs */ + /* create temp signals to safely sum the outputs of each canvas + * without overwriting the input. */ for (i = 0; i < nout; i++) tempsigs[i] = signal_newfromcontext(0); for (j = 0; j < x->x_n; j++) { + /* create "fake" output signals which will be filled later by voutlet + * in the child abstraction; normally this would be done in ugen_doit(). */ for (i = 0; i < nout; i++) tempio[nin + i] = signal_newfromcontext(1); canvas_dodsp(x->x_vec[j].c_gl, 0, tempio); + /* sum output signals to temp signals */ for (i = 0; i < nout; i++) { if (j == 0) @@ -348,7 +539,7 @@ static void clone_dsp(t_clone *x, t_signal **sp) signal_makereusable(tempio[nin + i]); } } - /* copy to output signsls */ + /* copy temp signals to our output signals */ for (i = 0; i < nout; i++) { dsp_add_copy(tempsigs[i]->s_vec, sp[nin+i]->s_vec, tempsigs[i]->s_n); @@ -366,6 +557,10 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) x->x_outvec = 0; x->x_startvoice = 0; x->x_suppressvoice = 0; +#if PD_DSPTHREADS + x->x_parallel = 0; + x->x_dspqueue = 0; +#endif clone_voicetovis = -1; if (argc == 0) { @@ -405,6 +600,12 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) goto fail; x->x_vec = (t_copy *)getbytes(sizeof(*x->x_vec)); x->x_vec[0].c_gl = c; +#if PD_DSPTHREADS + x->x_vec[0].c_task = 0; + x->x_vec[0].c_chainonset = 0; + x->x_vec[0].c_chainlength = 0; + x->x_vec[0].c_sigcontext = 0; +#endif x->x_n = 1; x->x_nin = obj_ninlets(&x->x_vec[0].c_gl->gl_obj); x->x_invec = (t_in *)getbytes(x->x_nin * sizeof(*x->x_invec)); @@ -430,6 +631,9 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) outvec[i].o_signal = obj_issignaloutlet(&x->x_vec[0].c_gl->gl_obj, i); outvec[i].o_n = x->x_startvoice; + #if PD_DSPTHREADS + outvec[i].o_outsig = 0; + #endif outvec[i].o_outlet = outlet_new(&x->x_obj, (outvec[i].o_signal ? &s_signal : 0)); obj_connect(&x->x_vec[0].c_gl->gl_obj, i, @@ -474,6 +678,10 @@ void clone_setup(void) A_FLOAT, A_FLOAT, 0); class_addmethod(clone_in_class, (t_method)clone_in_fwd, gensym("fwd"), A_GIMME, 0); +#if PD_DSPTHREADS + class_addmethod(clone_in_class, (t_method)clone_in_parallel, gensym("parallel"), + A_FLOAT, 0); +#endif class_addlist(clone_in_class, (t_method)clone_in_list); clone_out_class = class_new(gensym("clone-outlet"), 0, 0, From 4c83d7ac53cea7dc614e3f453b3f74e209112b5a Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Wed, 29 Dec 2021 18:00:27 +0100 Subject: [PATCH 24/32] add thread-safety checks dsptaskqueue_update() checks if the canvas sub tree contains non-thread-safe objects. Note that any non-thread-safe objects *outside* the tree are ignored. This function is called in ugen_done_graph() for canvases with block~ + "join" and also in ugen_start() for the toplevel queue. dsptaskqueue_check() checks if the task queue is thread-safe, and if false, posts the first N non-thread-safe object (only once per queue). This is called in ugen_done_graph() for canvases with block~ + "parallel". Each parallel canvas checks if the enclosing joining canvas is thread-safe and prints an error message if false. As an optimization, we traverse the complete object tree once in ugen_start() and mark every sub-tree (depth first) by setting gl_threadsafe. This speeds up subsequent calls to canvas_isthreadsafe() tremendously. clone is handled specially since it contains both the DSP task queue and the DSP tasks, so the check can be performed all inside clone_dsp(). Thread safety checks can be disabled with the "-nothreadsafe" command line option. --- src/d_threadpool.c | 40 +++++++++++++- src/d_ugen.c | 25 ++++++++- src/g_canvas.c | 132 +++++++++++++++++++++++++++++++++++++++++++++ src/g_canvas.h | 1 + src/g_clone.c | 79 +++++++++++++++++++++++++-- src/s_main.c | 15 ++++++ src/s_stuff.h | 9 +++- 7 files changed, 293 insertions(+), 8 deletions(-) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index 8a7eb52860..8e353f4671 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -538,15 +538,21 @@ struct _dsptaskqueue atomic_int dq_remaining; #endif t_fast_semaphore dq_sem; + t_canvas *dq_owner; /* canvas or NULL */ + char dq_threadsafe; + char dq_warned; }; -t_dsptaskqueue * dsptaskqueue_new(void) +t_dsptaskqueue * dsptaskqueue_new(t_canvas *owner) { t_dsptaskqueue *x = (t_dsptaskqueue *)getbytes(sizeof(t_dsptaskqueue)); x->dq_numtasks = 0; x->dq_numswitchoff = 0; x->dq_remaining = 0; fast_semaphore_init(&x->dq_sem); + x->dq_owner = owner; + x->dq_threadsafe = 0; + x->dq_warned = 0; return x; } @@ -577,6 +583,38 @@ void dsptaskqueue_release(t_dsptaskqueue *x) bug("dsptaskqueue_release: bad refcount (%d)", oldcount); } + /* check if our sub-tree is thread-safe and cache the result. + * Called once per DSP graph update in ugen_start() and + * ugen_done_graph(); see also canvas_markthreadsafe(). */ +void dsptaskqueue_update(t_dsptaskqueue *x) +{ + x->dq_threadsafe = sys_threadsafe ? + canvas_isthreadsafe(x->dq_owner, 0) : 1; /* silent! */ + x->dq_warned = 0; +} + + /* check if our sub-tree is thread-safe, using the cached result + * of dsptaskqueue_update() above. Called by block~ objects + * associated with this queue, see ugen_done_graph(). */ +int dsptaskqueue_check(t_dsptaskqueue *x) +{ + if (x->dq_threadsafe) + return 1; + else + { + #if 1 + if (!x->dq_warned) /* only warn once per DSP task queue */ + #endif + { + if (canvas_isthreadsafe(x->dq_owner, 1)) /* loud */ + /* dq_threadsafe should have been true */ + bug("dsptaskqueue_check"); + x->dq_warned = 1; + } + return 0; + } +} + void dsptaskqueue_reset(t_dsptaskqueue *x) { int count = x->dq_numtasks - x->dq_numswitchoff; diff --git a/src/d_ugen.c b/src/d_ugen.c index 1c8ba132e6..af78ac7694 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -105,7 +105,7 @@ void d_ugen_newpdinstance(void) THIS->u_dspchainsize = 0; THIS->u_signals = signalcontext_new(); #if PD_DSPTHREADS - THIS->u_dspqueue = dsptaskqueue_new(); + THIS->u_dspqueue = dsptaskqueue_new(0); lockfree_stack_init(&THIS->u_clocks); #endif } @@ -217,6 +217,7 @@ typedef struct _block #if PD_DSPTHREADS char x_parallel; /* true if we are processing in parallel */ char x_join; /* true if this canvas should join DSP tasks of subpatches */ + t_canvas *x_owner; /* owning canvas */ t_signalcontext *x_signals; /* signal context for parallel processing */ t_dsptask *x_task; /* DSP task for parallel processing */ int x_taskonset; /* beginning of parallel task in the chain */ @@ -245,6 +246,7 @@ static void *block_new(t_floatarg fvecsize, t_floatarg foverlap, #if PD_DSPTHREADS x->x_parallel = 0; x->x_join = 0; + x->x_owner = canvas_getcurrent(); x->x_signals = 0; x->x_task = 0; x->x_taskonset = 0; @@ -446,7 +448,7 @@ static void block_join(t_block *x, t_floatarg f) x->x_join = join; if (x->x_dspqueue) dsptaskqueue_release(x->x_dspqueue); - x->x_dspqueue = join ? dsptaskqueue_new() : 0; + x->x_dspqueue = join ? dsptaskqueue_new(x->x_owner) : 0; canvas_update_dsp(); } } @@ -756,6 +758,7 @@ struct _dspcontext #define t_dspcontext struct _dspcontext + /* for clone object, see clone_dsp() */ #if PD_DSPTHREADS void ugen_addtask(t_dsptask *x) @@ -842,6 +845,12 @@ void ugen_start(void) THIS->u_dspchain = (t_int *)getbytes(sizeof(*THIS->u_dspchain)); THIS->u_dspchain[0] = (t_int)dsp_done; THIS->u_dspchainsize = 1; +#if PD_DSPTHREADS + /* first check and mark canvas tree */ + canvas_markthreadsafe(); + /* then update toplevel queue */ + dsptaskqueue_update(THIS->u_dspqueue); +#endif if (THIS->u_context) bug("ugen_start"); } @@ -1285,6 +1294,18 @@ void ugen_done_graph(t_dspcontext *dc) logpost(blk, PD_NORMAL, "block~: warning: using 'parallel' " "and 'join' in the same canvas has no effect."); } + /* first update queue */ + if (join) + dsptaskqueue_update(blk->x_dspqueue); + /* then check if we can safely run in parallel */ + if (parallel && !dsptaskqueue_check(dc->dc_dspqueue)) + { + /* see also clone_dsp() */ + pd_error(blk, "block~: parallel processing not possible because " + "some DSP objects are not officially thread-safe! Start Pd with " + "with -nothreadsafe to circumvent this check (potentially dangerous!)"); + parallel = 0; + } #else parallel = 0; join = 0; diff --git a/src/g_canvas.c b/src/g_canvas.c index 1888aaba8a..49ed75bace 100644 --- a/src/g_canvas.c +++ b/src/g_canvas.c @@ -500,6 +500,12 @@ t_canvas *canvas_new(void *dummy, t_symbol *sel, int argc, t_atom *argv) canvas_dosetbounds(x, xloc, yloc, xloc + width, yloc + height); x->gl_owner = owner; x->gl_isclone = 0; +#if PD_DSPTHREADS + /* if started with -nothreadsafe, we pretend to be thread-safe. */ + x->gl_threadsafe = !sys_threadsafe; +#else + x->gl_threadsafe = 0; +#endif x->gl_name = (*s->s_name ? s : (THISGUI->i_newfilename ? THISGUI->i_newfilename : gensym("Pd"))); canvas_bind(x); @@ -1335,6 +1341,132 @@ void ugen_connect(t_dspcontext *dc, t_object *x1, int outno, t_object *x2, int inno); void ugen_done_graph(t_dspcontext *dc); +#if PD_DSPTHREADS + +int clone_isthreadsafe(t_pd *x, t_symbol *dspsym, int *limit); + + /* also called by clone_isthreadsafe() */ +int obj_isthreadsafe(t_gobj *x, t_symbol *dspsym, int *limit) +{ + if (x->g_pd == canvas_class) + { + t_canvas *c = (t_canvas *)x; + /* -threadsafe -> use cached result of canvas_markthreadsafe(); + * -nothreadsafe -> always true, see canvas_new(). */ + if (c->gl_threadsafe) + return 1; + else if (!limit) + return 0; + else /* find offending objects */ + { + t_canvas *canvas = (t_canvas *)x; + t_gobj *y; + for (y = canvas->gl_list; y && (*limit > 0); y = y->g_next) + obj_isthreadsafe(y, dspsym, limit); + return 0; + } + } + else if (x->g_pd == clone_class) + return clone_isthreadsafe(&x->g_pd, dspsym, limit); + else + { /* zgetfn() comes last because it's the most expensive check */ + if (x->g_pd->c_patchable && !x->g_pd->c_threadsafe + && zgetfn(&x->g_pd, dspsym)) + { + /* LATER get rid of duplicate warnings for the same class */ + if (limit && *limit > 0) + { + logpost(x, PD_NORMAL, "warning: %s is not thread-safe!", + class_getname(x->g_pd)); + if (--(*limit) == 0) /* hit limit */ + logpost(0, PD_NORMAL, "..."); + } + return 0; + } else + return 1; + } +} + +#define THREADSAFE_WARN_MAX 10 + + /* check if all DSP objects starting at the given canvas + * are thread-safe; if 'x' is NULL, check all root canvases. */ +int canvas_isthreadsafe(t_canvas *x, int loud) +{ + t_symbol *dspsym = gensym("dsp"); + int limit = THREADSAFE_WARN_MAX; + if (x) + return obj_isthreadsafe((t_gobj *)x, dspsym, loud ? &limit : 0); + else /* root canvases */ + { + int threadsafe = 1; + t_canvas *y; + for (y = pd_getcanvaslist(); y; y = y->gl_next) + { + if (!obj_isthreadsafe((t_gobj *)y, dspsym, loud ? &limit : 0)) + { + threadsafe = 0; + if (!loud || !limit) + break; + } + } + return threadsafe; + } +} + +int clone_markthreadsafe(t_pd *x, t_symbol *dspsym); + + /* also called by clone_markthreadsafe() */ +int obj_markthreadsafe(t_gobj *x, t_symbol *dspsym) +{ + if (x->g_pd == canvas_class) + { + t_canvas *c = (t_canvas *)x; + t_gobj *y; + c->gl_threadsafe = 1; + for (y = c->gl_list; y; y = y->g_next) + { + if (!obj_markthreadsafe(y, dspsym)) + c->gl_threadsafe = 0; /* don't break! */ + } + #if 0 + post("canvas %p (parent: %p) threadsafe: %d", + c, c->gl_owner, c->gl_threadsafe); + #endif + return c->gl_threadsafe; + } + else if (x->g_pd == clone_class) + return clone_markthreadsafe(&x->g_pd, dspsym); + else /* zgetfn() comes last because it's the most expensive check */ + return !(x->g_pd->c_patchable && !x->g_pd->c_threadsafe + && zgetfn(&x->g_pd, dspsym)); +} + + /* traverse canvas tree and mark every sub-tree (depth first). + * This mitigates O(n^2) complexity when calling canvas_isthreadsafe() + * repeatedly via dsptaskqueue_update() and dsptaskqueue_check(). */ +int canvas_markthreadsafe(void) +{ + /* when started with -nothreadsafe, gl_threadsafe will always + * be true, see canvas_new() and clone_new(). */ + if (!sys_threadsafe) + return 1; + else + { + t_symbol *dspsym = gensym("dsp"); + int threadsafe = 1; + t_canvas *y; + for (y = pd_getcanvaslist(); y; y = y->gl_next) + { + if (!obj_markthreadsafe((t_gobj *)y, dspsym)) + threadsafe = 0; /* don't break! */ + } + return threadsafe; + } +} + +#endif /* PD_DSPTHREADS */ + /* schedule one canvas for DSP. This is called below for all "root" canvases, but is also called from the "dsp" method for sub- canvases, which are treated almost like any other tilde object. */ diff --git a/src/g_canvas.h b/src/g_canvas.h index 7822fd40d0..a567b52145 100644 --- a/src/g_canvas.h +++ b/src/g_canvas.h @@ -203,6 +203,7 @@ struct _glist unsigned int gl_hidetext:1; /* hide object-name + args when doing graph on parent */ unsigned int gl_private:1; /* private flag used in x_scalar.c */ unsigned int gl_isclone:1; /* exists as part of a clone object */ + unsigned int gl_threadsafe:1; /* is this canvas and all its subcanvases threadsafe */ int gl_zoom; /* zoom factor (integer zoom-in only) */ void *gl_privatedata; /* private data */ }; diff --git a/src/g_clone.c b/src/g_clone.c index 140b04f32e..23f621f73d 100644 --- a/src/g_clone.c +++ b/src/g_clone.c @@ -82,13 +82,60 @@ typedef struct _clone t_atom *x_argv; int x_phase; int x_startvoice; /* number of first voice, 0 by default */ - int x_suppressvoice; /* suppress voice number as $1 arg */ + char x_suppressvoice; /* suppress voice number as $1 arg */ #if PD_DSPTHREADS - int x_parallel; /* process in parallel */ + char x_parallel; /* process in parallel */ + char x_threadsafe; /* are we thread-safe? */ t_dsptaskqueue *x_dspqueue; /* DSP task queue */ #endif } t_clone; +#if PD_DSPTHREADS + +int obj_markthreadsafe(t_gobj *x, t_symbol *dspsym); + + /* called by obj_markthreadsafe() */ +int clone_markthreadsafe(t_pd *z, t_symbol *dspsym) +{ + t_clone *x = (t_clone *)z; + int i; + x->x_threadsafe = 1; + for (i = 0; i < x->x_n; i++) + { + t_gobj *obj = (t_gobj *)x->x_vec[i].c_gl; + if (!obj_markthreadsafe(obj, dspsym)) + x->x_threadsafe = 0; /* don't break! */ + } + return x->x_threadsafe; +} + +int obj_isthreadsafe(t_gobj *x, t_symbol *dspsym, int *limit); + + /* called by obj_isthreadsafe() */ +int clone_isthreadsafe(t_pd *z, t_symbol *dspsym, int *limit) +{ + t_clone *x = (t_clone *)z; + if (x->x_threadsafe) + return 1; + else if (!limit) + return 0; + else + { + /* only search for the first offending canvas; the loop is + * necessary because of live editing and dynamic patching! */ + int i; + for (i = 0; i < x->x_n; i++) + { + t_gobj *obj = (t_gobj *)x->x_vec[i].c_gl; + if (!obj_isthreadsafe(obj, dspsym, limit)) + break; + } + return 0; + } +} + +#endif /* PD_DSPTHREADS */ + int clone_match(t_pd *z, t_symbol *name, t_symbol *dir) { t_clone *x = (t_clone *)z; @@ -425,6 +472,31 @@ static void clone_dsp(t_clone *x, t_signal **sp) } } if (parallel) + { + if (!x->x_dspqueue) /* create lazily */ + x->x_dspqueue = dsptaskqueue_new(0); + /* check thread-safety; unlike block~ in ugen_done_graph(), + * we don't use dsptaskqueue_update() and dsptaskqueue_check() + * because we already have all the information we need. */ + if (!x->x_threadsafe) + { + /* only search for the first offending canvas; the loop is + * necessary because of live editing and dynamic patching! */ + int i; + for (i = 0; i < x->x_n; i++) + { + if (!canvas_isthreadsafe(x->x_vec[i].c_gl, 1)) /* loud */ + break; + } + /* see also ugen_done_graph() */ + pd_error(x, "clone: parallel processing not possible because " + "some DSP objects are not officially thread-safe! Start Pd with " + "with -nothreadsafe to circumvent this check (potentially dangerous!)"); + + parallel = 0; + } + } + if (parallel) { /* Every child abstraction gets its own DSP task. Unlike block~ + "parallel", * cloned abstractions are not aware that they are being processed in parallel. @@ -438,8 +510,6 @@ static void clone_dsp(t_clone *x, t_signal **sp) * our output signals. */ int blocksize = sp[0]->s_n; t_dsptaskqueue *oldqueue; - if (!x->x_dspqueue) - x->x_dspqueue = dsptaskqueue_new(); /* push our queue to the current DSP context */ oldqueue = dsptaskqueue_push(x->x_dspqueue); /* reset queue */ @@ -559,6 +629,7 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) x->x_suppressvoice = 0; #if PD_DSPTHREADS x->x_parallel = 0; + x->x_threadsafe = !sys_threadsafe; /* see canvas_new() */ x->x_dspqueue = 0; #endif clone_voicetovis = -1; diff --git a/src/s_main.c b/src/s_main.c index c3f05eed85..034335913a 100644 --- a/src/s_main.c +++ b/src/s_main.c @@ -57,6 +57,7 @@ int sys_hipriority = -1; /* -1 = not specified; 0 = no; 1 = yes */ int sys_guisetportnumber; /* if started from the GUI, this is the port # */ int sys_nosleep = 0; /* skip all "sleep" calls and spin instead */ int sys_defeatrt; /* flag to cancel real-time */ +int sys_threadsafe = 1; /* only allow thread-safe DSP objects in parallel processing */ t_symbol *sys_flags; /* more command-line flags */ const char *sys_guicmd; @@ -418,6 +419,10 @@ static char *(usagemessage[]) = { #if PD_DSPTHREADS "-threads -- number of audio threads\n" " 0: use all physical cores (default)\n", +"-threadsafe -- check if all DSP objects in a parallel canvas are\n" +" \"officially\" thread-safe (true by default)\n", +"-nothreadsafe -- do not check if DSP objects are thread-safe\n" +" (potentially dangerous!)\n", #endif "-listdev -- list audio and MIDI devices\n", @@ -1339,6 +1344,16 @@ int sys_argparse(int argc, const char **argv) as.a_numthreads = atoi(argv[1]); argc -= 2; argv += 2; } + else if (!strcmp(*argv, "-threadsafe")) + { + sys_threadsafe = 1; + argc--; argv++; + } + else if (!strcmp(*argv, "-nothreadsafe")) + { + sys_threadsafe = 0; + argc--; argv++; + } #endif else if (!strcmp(*argv, "-sleep")) { diff --git a/src/s_stuff.h b/src/s_stuff.h index c3b0826dce..d70370e0c2 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -399,13 +399,18 @@ EXTERN int sys_zoom_open; EXTERN_STRUCT _dsptaskqueue; #define t_dsptaskqueue struct _dsptaskqueue -t_dsptaskqueue * dsptaskqueue_new(void); +t_dsptaskqueue * dsptaskqueue_new(t_canvas *owner); void dsptaskqueue_release(t_dsptaskqueue *x); +void dsptaskqueue_update(t_dsptaskqueue *x); +int dsptaskqueue_check(t_dsptaskqueue *x); void dsptaskqueue_reset(t_dsptaskqueue *x); void dsptaskqueue_join(t_dsptaskqueue *x); void dsp_add_reset(t_dsptaskqueue *x); void dsp_add_join(t_dsptaskqueue *x); +int canvas_markthreadsafe(void); +int canvas_isthreadsafe(t_canvas *x, int loud); + EXTERN_STRUCT _dsptask; #define t_dsptask struct _dsptask @@ -424,6 +429,8 @@ EXTERN int sys_dspthreadpool_start(int *numthreads, int external); EXTERN int sys_dspthreadpool_stop(int external); EXTERN int sys_dspthread_run(int index); +EXTERN int sys_threadsafe; /* enable/disable thread-safety checks */ + struct _instancestuff { t_namelist *st_externlist; From d40c6ebbf023b9da5774b08289601625e8262f04 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sat, 1 Jan 2022 14:36:48 +0100 Subject: [PATCH 25/32] post all non-thread-safe DSP objects if Pd is started with -verbose ... and without -nothreadsafe --- src/m_class.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/m_class.c b/src/m_class.c index 782d9c8eca..dc1a4c025a 100644 --- a/src/m_class.c +++ b/src/m_class.c @@ -618,6 +618,19 @@ void class_addmethod(t_class *c, t_method fn, t_symbol *sel, post("warning: signal method overrides class_mainsignalin"); c->c_floatsignalin = -1; } +#if PD_DSPTHREADS + /* post non-thread-safe DSP objects */ + if (sys_verbose && sys_threadsafe && (sel == gensym("dsp")) + && !c->c_threadsafe) + { + char *slash = strrchr(c->c_externdir->s_name, '/'); + if (slash) /* external */ + logpost(0, PD_VERBOSE, "%s/%s not thread-safe", + slash+1, c->c_name->s_name); + else /* built-in objects should be thread-safe; did we forget one? */ + pd_error(0, "%s not thread-safe", c->c_name); + } +#endif /* check for special cases. "Pointer" is missing here so that pd_objectmaker's pointer method can be typechecked differently. */ if (sel == &s_bang) From 260ea0a4aa71756d7d580f0a74b3b50102fa0fdf Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Thu, 6 Jan 2022 18:14:28 +0100 Subject: [PATCH 26/32] parse hardware topology used in thread_physical_concurrency() and later for thread pinning --- src/d_threadpool.c | 353 +++++++++++++++++++++++++++++++-------------- 1 file changed, 246 insertions(+), 107 deletions(-) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index 8e353f4671..7fcb28539b 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -2,22 +2,28 @@ * For information on usage and redistribution, and for a DISCLAIMER OF ALL * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ -#include "m_pd.h" -#include "s_stuff.h" -#include "m_imp.h" - -#include -#include -#include - #if PD_DSPTHREADS #if !PD_PARALLEL # error PD_DSPTHREADS requires PD_PARALLEL! #endif +/* This one must be defined before including any headers! */ +#ifdef __linux__ +# ifndef _GNU_SOURCE +# define _GNU_SOURCE +# endif +#endif + +#include "m_pd.h" +#include "m_imp.h" +#include "s_stuff.h" #include "s_sync.h" +#include +#include +#include + #include #if defined(_WIN32) @@ -35,69 +41,101 @@ /* ----------------------- thread utilities -------------------------- */ - /* 0: failure */ -static int thread_hardware_concurrency(void) +typedef struct _cpuinfo { -#if defined(_WIN32) - SYSTEM_INFO info; - memset(&info, 0, sizeof(info)); - GetSystemInfo(&info); - return info.dwNumberOfProcessors; -#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) - int count; - size_t size = sizeof(count); - if (sysctlbyname("hw.ncpu", &count, &size, NULL, 0) == 0) - return count; - else - { - fprintf(stderr, "sysctlbyname() failed (%d)\n", errno); - return 0; - } -#elif defined(__SC_NPROCESSORS_ONLN) - int count = sysconf(_SC_NPROCESSORS_ONLN); - if (count > 0) - return count; - else + int physical_id; + int core_id; + int sibling_id; + int id; +} t_cpuinfo; + + /* convert t_cpuinfo to an uint64_t for comparison. + * We try to keep siblings as far apart as possible, + * followed by physical packages, so that cores of + * the same package are close to each other. */ +static inline uint64_t cpuinfo2number(t_cpuinfo *x) +{ + return ((uint64_t)(x)->sibling_id << 24) | + ((uint64_t)(x)->physical_id << 16) | ((uint64_t)(x)->core_id); +} + +static t_cpuinfo *cpuvec = NULL; +static int numcpus = 0; +static int numcores = 0; +static int numpackages = 0; + +static void cpuinfo_print(void) +{ + int i; + fprintf(stderr, "hardware topology:\n"); + fprintf(stderr, "\tlogical processors: %d\n", numcpus); + fprintf(stderr, "\tCPU cores: %d\n", numcores); + fprintf(stderr, "\tphysical packages: %d\n", numpackages); + fprintf(stderr, "\t---\n"); + for (i = 0; i < numcpus; i++) { - fprintf(stderr, "sysconf() failed (%d)\n", errno); - return 0; + fprintf(stderr, "\t#%d package: %d, core: %d, sibling: %d\n", + i, cpuvec[i].physical_id, cpuvec[i].core_id, cpuvec[i].sibling_id); } -#elif defined(__linux__) - return get_nprocs(); -#else - #warning "thread_hardware_concurrency() not implemented" - return 0; -#endif + fflush(stderr); } - /* 0: failure */ -static int thread_physical_concurrency(void) + /* sort the list so that we can simply pick + * consecutive CPUs for effective thread pinning. */ +static int cpuinfo_sort(const void *x, const void *y) { -#if defined(_WIN32) - typedef BOOL (WINAPI *LPFN_GLPI)( + uint64_t a = cpuinfo2number((t_cpuinfo *)x); + uint64_t b = cpuinfo2number((t_cpuinfo *)y); + return (a > b) ? 1 : (a < b) ? -1 : 0; +} + +static void cpuinfo_done(void) +{ + if (sys_verbose) + cpuinfo_print(); /* print original list */ + /* sort the list */ + qsort(cpuvec, numcpus, sizeof(t_cpuinfo), cpuinfo_sort); +#if 0 + cpuinfo_print(); /* print sorted list (for debugging) */ +#endif +} + + /* 1: success, 0: failure */ +static int parse_hardware_topology(void) +{ + /* Make sure to call this only once. This is not really thread-safe, + * but in practice the function is called for the first time either in + * threadpool_init() or via sys_argparse() -> sys_set_audio_settings(). + * LATER replace with C11 call_once(). */ + static int initted = 0; + if (initted) + return (numcpus > 0); + initted = 1; + +#ifdef _WIN32 /* Windows */ + typedef BOOL (WINAPI *t_func)( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); - LPFN_GLPI glpi; + t_func fn; PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info; DWORD err, size = 0; - int i, n, count = 0; + int i, n; /* available since Windows XP SP3 */ - glpi = (LPFN_GLPI) GetProcAddress( + fn = (t_func)GetProcAddress( GetModuleHandleA("kernel32"), "GetLogicalProcessorInformation"); - if (!glpi) + if (!fn) { - fprintf(stderr, "GetLogicalProcessorInformation() not supported;\n" - "fall back to thread_hardware_concurrency\n"); - return thread_hardware_concurrency(); + fprintf(stderr, "GetLogicalProcessorInformation() not supported\n"); + return 0; } /* call with size 0 to retrieve actual size; * ERROR_INSUFFICIENT_BUFFER is expected. */ - glpi(NULL, &size); + fn(NULL, &size); if ((err = GetLastError()) != ERROR_INSUFFICIENT_BUFFER) goto fail; info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(size); - if (glpi(info, &size) == FALSE) + if (fn(info, &size) == FALSE) { err = GetLastError(); free(info); @@ -107,90 +145,191 @@ static int thread_physical_concurrency(void) for (i = 0; i < n; ++i) { if (info[i].Relationship == RelationProcessorCore) - count++; + { + /* add all siblings to CPU list */ + int j, nsiblings = 0; + ULONG_PTR mask = info[i].ProcessorMask; + for (j = 0; mask; j++, mask >>= 1) + { + if (mask & 1) + { + t_cpuinfo info = { 0, numcores, nsiblings, j }; + int index = numcpus++; + cpuvec = realloc(cpuvec, sizeof(t_cpuinfo) * numcpus); + cpuvec[index] = info; + nsiblings++; + } + } + numcores++; + } + } + /* loop again for physical packages */ + for (i = 0; i < n; ++i) + { + if (info[i].Relationship == RelationProcessorPackage) + { + int j, k; + ULONG_PTR mask = info[i].ProcessorMask; + /* loop over all processors and find corresponding t_cpuinfo */ + for (j = 0; mask; j++, mask >>= 1) + { + if (mask & 1) + { + for (k = 0; k < numcpus; ++k) + { + if (cpuvec[k].id == j) + cpuvec[k].physical_id = numpackages; + } + } + } + numpackages++; + } } free(info); - return count; + cpuinfo_done(); + return 1; fail: fprintf(stderr, "GetLogicalProcessorInformation() failed (%d)\n", err); return 0; -#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) - int count; - size_t size = sizeof(count); - if (sysctlbyname("hw.physicalcpu", &count, &size, NULL, 0) == 0) - return count; - else - { - fprintf(stderr, "sysctlbyname() failed (%d)\n", errno); - return 0; - } -#elif defined(__linux__) +#elif defined(__linux__) /* Linux */ /* The file /proc/cpusinfo contains all logical CPUs where - * each entry has a property "physical id" and "core id". - * We filter entries where those properties are the same - * (= SMT), so we end up with the number of physical CPUs. */ - #define MAXNUMCPUS 1024 - unsigned int cpus[MAXNUMCPUS]; - /* upper 2 bytes: physical ID, lower 2 bytes: core ID */ - unsigned int current = 0; + * each entry has a property "physical id" and "core id". */ + t_cpuinfo cpu; FILE *fp; char *line = 0; size_t len; - int num = 0, count = 0; - fp = fopen("/proc/cpuinfo", "r"); - if (!fp) + if (!(fp = fopen("/proc/cpuinfo", "r"))) { fprintf(stderr, "could not open /proc/cpuinfo\n"); return 0; } - while ((getline(&line, &len, fp) >= 0) && (count < MAXNUMCPUS)) + cpu.physical_id = cpu.core_id = -1; + while (getline(&line, &len, fp) >= 0) { - const char *colon; - int i, value; + const char *pos, *colon; if (len == 0) continue; - /* "physical id" comes first */ - if (strstr(line, "physical id")) + + /* search for "physical id" and "core id" */ + if ((pos = strstr(line, "physical id"))) { - if (!(colon = strchr(line + strlen("physical id"), ':')) || - (sscanf(colon + 1, "%d", &value) < 1)) - { - count = 0; - break; - } - current = ((unsigned int)value) << 16; + if (!(colon = strchr(pos, ':')) || + (sscanf(colon + 1, "%d", &cpu.physical_id) < 1)) + goto fail; + } + else if ((pos = strstr(line, "core id"))) + { + if (!(colon = strchr(pos, ':')) || + (sscanf(colon + 1, "%d", &cpu.core_id) < 1)) + goto fail; } - /* followed by "core id" */ - else if (strstr(line, "core id")) + /* found both */ + if (cpu.physical_id >= 0 && cpu.core_id >= 0) { - if (!(colon = strchr(line + strlen("core id"), ':')) || - (sscanf(colon + 1, "%d", &value) < 1)) + int i, found, index; + cpu.sibling_id = 0; + cpu.id = numcpus; + /* get sibling number */ + for (i = 0; i < numcpus; ++i) { - count = 0; - break; + if ((cpuvec[i].physical_id == cpu.physical_id) && + (cpuvec[i].core_id == cpu.core_id)) + { + cpu.sibling_id++; + } } - current |= (unsigned int)value; - /* now check if this entry already exists */ - for (i = 0; i < count; ++i) + if (cpu.sibling_id == 0) + numcores++; + /* check for new physical package */ + found = 0; + for (i = 0; i < numcpus; ++i) { - if (cpus[i] == current) - goto skip; + if (cpuvec[i].physical_id == cpu.physical_id) + found = 1; } - cpus[count++] = current; - skip: - #if 0 - fprintf(stderr, "CPU %d: physical id: %d, core id: %d\n", - num, current >> 16, current & 0xffff); - #endif - num++; + if (!found) + numpackages++; + index = numcpus++; + cpuvec = realloc(cpuvec, sizeof(t_cpuinfo) * numcpus); + cpuvec[index] = cpu; + + cpu.physical_id = cpu.core_id = -1; /* reset for next CPU */ } } if (line) free(line); fclose(fp); - if (count == 0) - fprintf(stderr, "/proc/cpuinfo: unexpected format\n"); - return count; + cpuinfo_done(); + return 1; +fail: + fprintf(stderr, "/proc/cpuinfo: unexpected format\n"); + fclose(fp); + if (line) + free(line); + if (cpuvec) + free(cpuvec); + cpuvec = NULL; + numcpus = 0; + numcores = 0; + numpackages = 0; + return 0; +#else /* Apple, BSDs, etc. */ + fprintf(stderr, "parsse_hardware_topology() not implemented\n"); + return 0; +#endif +} + + /* 0: failure */ +static int thread_hardware_concurrency(void) +{ +#if defined(_WIN32) + SYSTEM_INFO info; + memset(&info, 0, sizeof(info)); + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + int count; + size_t size = sizeof(count); + if (sysctlbyname("hw.ncpu", &count, &size, NULL, 0) == 0) + return count; + else + { + fprintf(stderr, "sysctlbyname() failed (%d)\n", errno); + return 0; + } +#elif defined(__SC_NPROCESSORS_ONLN) + int count = sysconf(_SC_NPROCESSORS_ONLN); + if (count > 0) + return count; + else + { + fprintf(stderr, "sysconf() failed (%d)\n", errno); + return 0; + } +#elif defined(__linux__) + return get_nprocs(); +#else + #warning "thread_hardware_concurrency() not implemented" + return 0; +#endif +} + + /* 0: failure */ +static int thread_physical_concurrency(void) +{ +#if defined(_WIN32) || defined(__linux__) + parse_hardware_topology(); /* see comment */ + return numcores; +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + int count; + size_t size = sizeof(count); + if (sysctlbyname("hw.physicalcpu", &count, &size, NULL, 0) == 0) + return count; + else + { + fprintf(stderr, "sysctlbyname() failed (%d)\n", errno); + return 0; + } #else #warning "thread_physical_concurrency() not implemented" /* fall back to hardware concurrency */ From d495bcea5984c329163d002e288450b8d3102683 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sat, 17 Sep 2022 14:23:02 +0200 Subject: [PATCH 27/32] add spin-waiting instead of going to sleep everytime the task queue is (temporarily) empty, the DSP helper threads only wake up once at the beginning of each DSP tick and then they spin-wait until all tasks have been finished. Pro: minimize wake-up latency = more stable performance Con: burning CPU cycles Can be enabled/disabled with the "-spinwait" resp. "-nospinwait" command line options. --- src/d_threadpool.c | 193 +++++++++++++++++++++++++++++++++++++-------- src/d_ugen.c | 43 +++++++++- src/s_main.c | 19 ++++- src/s_stuff.h | 1 + 4 files changed, 220 insertions(+), 36 deletions(-) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index 7fcb28539b..a1f0470667 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -445,6 +445,29 @@ static void dspthread_setrealtime(int index) /* -------------------------- t_dspthreadpool --------------------------- */ +typedef struct _backoff +{ + int b_n; +} t_backoff; + +#define BACKOFF_MINLOOPS 16 +#define BACKOFF_MAXLOOPS 4096 + +void backoff_reset(t_backoff *x) +{ + x->b_n = BACKOFF_MINLOOPS; +} + +void backoff_perform(t_backoff *x) +{ + int i, n = x->b_n; + for (i = 0; i < n; i++) + pause_cpu(); + x->b_n *= 2; + if (x->b_n > BACKOFF_MAXLOOPS) + x->b_n = BACKOFF_MAXLOOPS; +} + typedef struct _dspthreadpool { #ifdef MSVC_INTERLOCKED @@ -456,6 +479,11 @@ typedef struct _dspthreadpool pthread_t *tp_threads; t_lockfree_stack tp_tasks; t_fast_semaphore tp_sem; +#ifdef MSVC_INTERLOCKED + long tp_remaining; +#else + atomic_int tp_remaining; +#endif } t_dspthreadpool; static t_dspthreadpool *d_threadpool = NULL; @@ -504,6 +532,10 @@ static void dspthreadpool_init(void) d_threadpool->tp_threads = 0; lockfree_stack_init(&d_threadpool->tp_tasks); fast_semaphore_init(&d_threadpool->tp_sem); + d_threadpool->tp_remaining = 0; + /* for thread pinning */ + if (sys_threadaffinity) + parse_hardware_topology(); } } @@ -564,11 +596,11 @@ int sys_dspthreadpool_start(int *numthreads, int external) } else /* use internal DSP threads */ { - if (n > 0) + if (n > 0) /* multi-threaded */ { d_threadpool->tp_threads = (pthread_t *)getbytes(sizeof(pthread_t) * n); d_threadpool->tp_n = n; - /* spawn new threads; thread index starts at 1 */ + /* spawn new threads; index for DSP helper threads starts at 1 */ for (int i = 0; i < n; ++i) pthread_create(&d_threadpool->tp_threads[i], NULL, thread_function, (void *)(intptr_t)(i + 1)); @@ -592,6 +624,30 @@ int sys_dspthreadpool_stop(int external) return 1; } +void dspthreadpool_tick(int ntasks) +{ + if (ntasks > 0 && sys_threadspinwait && d_threadpool && d_threadpool->tp_n) + { + /* use atomic increment, so it also works with PDINSTANCE! */ + #ifdef MSVC_INTERLOCKED + int prev = _InterlockedExchangeAdd(&d_threadpool->tp_remaining, + ntasks); + #else + int prev = atomic_fetch_add(&d_threadpool->tp_remaining, ntasks); + #endif + /* only notify DSP helper threads if necessary */ + if (prev == 0) + fast_semaphore_postn(&d_threadpool->tp_sem, d_threadpool->tp_n); + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "-- DSP thread pool: start tick with %d active tasks\n", ntasks); + #endif + #ifndef PDINSTANCE + if (prev != 0) + pd_error(0, "DSP thread pool: bad task count (%d)", prev); + #endif + } +} + static void dspthreadpool_push(t_dsptask *task) { lockfree_stack_push(&d_threadpool->tp_tasks, task); @@ -619,14 +675,42 @@ static void dspthread_dorun(int index) #ifdef MSVC_INTERLOCKED while (d_threadpool->tp_running) #else - while (atomic_load_explicit(&d_threadpool->tp_running, memory_order_relaxed)) + while (atomic_load_explicit(&d_threadpool->tp_running, + memory_order_relaxed)) #endif { - /* run as many tasks as possible */ + /* run as many tasks as possible */ t_dsptask *t; - while ((t = dspthreadpool_pop())) - dsptask_run(t, index); - /* wait for more tasks (or quit) */ + if (sys_threadspinwait) /* spin */ + { + int remaining; + t_backoff backoff; + backoff_reset(&backoff); + tryagain: + while ((t = dspthreadpool_pop())) + { + dsptask_run(t, index); + backoff_reset(&backoff); + } + #ifdef MSVC_INTERLOCKED + remaining = d_threadpool->tp_remaining); + #else + remaining = atomic_load_explicit( + &d_threadpool->tp_remaining, memory_order_acquire); + #endif + if (remaining > 0) + { + backoff_perform(&backoff); + goto tryagain; + } + /* wait for next tick (or quit) */ + } + else /* wait */ + { + while ((t = dspthreadpool_pop())) + dsptask_run(t, index); + /* wait for more tasks (or quit) */ + } #ifdef DEBUG_DSPTHREADS fprintf(stderr, "DSP thread %d: wait\n", index); #endif @@ -676,7 +760,7 @@ struct _dsptaskqueue #else atomic_int dq_remaining; #endif - t_fast_semaphore dq_sem; + t_fast_semaphore dq_sem; /* not needed for spinning */ t_canvas *dq_owner; /* canvas or NULL */ char dq_threadsafe; char dq_warned; @@ -688,7 +772,8 @@ t_dsptaskqueue * dsptaskqueue_new(t_canvas *owner) x->dq_numtasks = 0; x->dq_numswitchoff = 0; x->dq_remaining = 0; - fast_semaphore_init(&x->dq_sem); + if (!sys_threadspinwait) + fast_semaphore_init(&x->dq_sem); x->dq_owner = owner; x->dq_threadsafe = 0; x->dq_warned = 0; @@ -715,7 +800,8 @@ void dsptaskqueue_release(t_dsptaskqueue *x) #ifdef DEBUG_DSPTHREADS fprintf(stderr, "queue %p: release\n"); #endif - fast_semaphore_destroy(&x->dq_sem); + if (!sys_threadspinwait) + fast_semaphore_destroy(&x->dq_sem); freebytes(x, sizeof(t_dsptaskqueue)); } else if (oldcount < 0) @@ -798,23 +884,58 @@ void dsptaskqueue_join(t_dsptaskqueue *x) * NB: if PDINSTANCE defined, we might actually run tasks that * belong to other Pd instances! LATER decide if we should push * such tasks back to the queue? */ - while (!fast_semaphore_trywait(&x->dq_sem)) - { - /* Pop and run a *single* task, then try again. - * Unlike in dspthread_dorun(), we do not pop tasks in a loop - * because we might end up running tasks that don't belong to - * this queue (and have a much later deadline). */ - t_dsptask *t = dspthreadpool_pop(); - if (t) - dsptask_run(t, 0); - else + if (sys_threadspinwait) /* spin */ + { + t_backoff backoff; + backoff_reset(&backoff); + #ifdef MSVC_INTERLOCKED + while (x->dq_remaining) + #else + while (atomic_load_explicit(&x->dq_remaining, + memory_order_relaxed)) + #endif { - /* nothing to do, wait */ - #ifdef DEBUG_DSPTHREADS - fprintf(stderr, "queue %p: wait\n", x); - #endif - fast_semaphore_wait(&x->dq_sem); - break; /* ! */ + /* Pop and run a *single* task, then try again. + * Unlike in dspthread_dorun(), we do not pop tasks in a loop + * because we might end up running tasks that don't belong to + * this queue (and have a much later deadline). */ + t_dsptask *t = dspthreadpool_pop(); + if (t) + { + dsptask_run(t, 0); + backoff_reset(&backoff); + } + else + backoff_perform(&backoff); + } + /* decrement global task counter. + /* NB: we *could* simply decrement all tasks at once in dsp_tick(), + * but then the DSP helper threads would always spin for the whole + * duration of the tick. By doing it here we make sure that they + * go to sleep as soon as all tasks have finished. */ + #ifdef MSVC_INTERLOCKED + _InterlockedExchangeAdd(&d_threadpool->tp_remaining, -count); + #else + atomic_fetch_sub_explicit(&d_threadpool->tp_remaining, count, + memory_order_release); + #endif + } + else /* wait */ + { + while (!fast_semaphore_trywait(&x->dq_sem)) + { + /* Pop and run a *single* task, see explanation above. */ + t_dsptask *t = dspthreadpool_pop(); + if (t) + dsptask_run(t, 0); + else + { + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: wait\n", x); + #endif + fast_semaphore_wait(&x->dq_sem); + break; /* ! */ + } } } #ifdef DEBUG_DSPTHREADS @@ -837,6 +958,8 @@ void dsp_add_join(t_dsptaskqueue *x) /* ---------------------------- t_dsptask ----------------------------- */ void ugen_addtask(t_dsptask *x); +void ugen_removetask(t_dsptask *x, int on); +void ugen_switchtask(t_dsptask *x, int on); struct _dsptask { @@ -879,6 +1002,8 @@ void dsptask_free(t_dsptask *x) bug("dsptask_free: bad queue switch count (%d)", x->dt_queue->dq_numswitchoff); } + /* remove and free */ + ugen_removetask(x, x->dt_switchoff == 0); dsptaskqueue_release(x->dt_queue); freebytes(x, sizeof(t_dsptask)); } @@ -891,7 +1016,8 @@ void dsptask_sched(t_dsptask *x) fprintf(stderr, "queue %p: sched task %p\n", x->dt_queue, x); #endif dspthreadpool_push(x); - fast_semaphore_post(&d_threadpool->tp_sem); + if (!sys_threadspinwait) + fast_semaphore_post(&d_threadpool->tp_sem); } else /* single-threaded */ { @@ -926,12 +1052,15 @@ static void dsptask_run(t_dsptask *x, int index) #endif if (!remaining) /* last task */ { - /* last task, notify waiting main audio thread; - * see dsptaskqueue_join() */ - fast_semaphore_post(&queue->dq_sem); + if (!sys_threadspinwait) /* wait */ + { + /* last task, notify waiting main audio thread; + * see dsptaskqueue_join() */ + fast_semaphore_post(&queue->dq_sem); + } } else if (remaining < 0) - fprintf(stderr, "queue: %p: bad remaining task count (%d)\n", + fprintf(stderr, "dsptask_run: queue %p: bad remaining task count (%d)\n", queue, remaining); } @@ -966,6 +1095,7 @@ void dsptask_switch(t_dsptask *x, int on) if (--queue->dq_numswitchoff < 0) bug("dsptask_switch: bad queue switch count (%d)", queue->dq_numswitchoff); + ugen_switchtask(x, 1); } else /* on -> off */ { @@ -973,6 +1103,7 @@ void dsptask_switch(t_dsptask *x, int on) bug("dsptask_switch: queue switch count (%d) " "exceeds queue task count (%d)", queue->dq_numswitchoff, queue->dq_numtasks); + ugen_switchtask(x, 0); } } } diff --git a/src/d_ugen.c b/src/d_ugen.c index af78ac7694..e8d99aff63 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -93,6 +93,8 @@ struct _instanceugen #if PD_DSPTHREADS t_dsptaskqueue *u_dspqueue; /* global DSP thread queue */ t_lockfree_stack u_clocks; /* deferred clocks */ + int u_numtasks; /* total number of active DSP tasks */ + int u_numswitchtasks; /* number of switched off DSP tasks */ #endif }; @@ -107,6 +109,8 @@ void d_ugen_newpdinstance(void) #if PD_DSPTHREADS THIS->u_dspqueue = dsptaskqueue_new(0); lockfree_stack_init(&THIS->u_clocks); + THIS->u_numtasks = 0; + THIS->u_numswitchtasks = 0; #endif } @@ -540,6 +544,7 @@ void dsp_addv(t_perfroutine f, int n, t_int *vec) #if PD_DSPTHREADS void clock_dispatch(t_clock *x); void dspthread_setindex(int index); +void dspthreadpool_tick(int ntasks); #endif void dsp_tick(void) @@ -549,8 +554,16 @@ void dsp_tick(void) t_int *ip; #if PD_DSPTHREADS t_clock *c; - dspthread_setindex(0); /* just to be sure */ - dsptaskqueue_reset(THIS->u_dspqueue); + int count = THIS->u_numtasks - THIS->u_numswitchtasks; + if (count >= 0) + { + dspthread_setindex(0); /* just to be sure */ + dspthreadpool_tick(count); + dsptaskqueue_reset(THIS->u_dspqueue); + } + else + bug("dsp_tick: bad task count (%d) resp. switch count (%d)", + THIS->u_numtasks, THIS->u_numswitchtasks); #endif for (ip = THIS->u_dspchain; ip; ) ip = (*(t_perfroutine)(*ip))(ip); THIS->u_phase++; @@ -764,6 +777,7 @@ struct _dspcontext void ugen_addtask(t_dsptask *x) { t_dspcontext *dc; + THIS->u_numtasks++; /* Add the DSP task to all enclosing switch~ objects */ for (dc = THIS->u_context; dc; dc = dc->dc_parentcontext) { @@ -772,6 +786,31 @@ void ugen_addtask(t_dsptask *x) } } +void ugen_removetask(t_dsptask *x, int on) +{ + if (!on) + { + if (--THIS->u_numswitchtasks < 0) + bug("ugen_removetask: bad switch count (%d)", + THIS->u_numswitchtasks); + } + if (--THIS->u_numtasks < 0) + bug("ugen_removetask: bad task count (%d)", + THIS->u_numtasks); +} + +/* DSP task has been switched on or off */ +void ugen_switchtask(t_dsptask *x, int on) +{ + if (on) /* off -> on */ + { + if (--THIS->u_numswitchtasks < 0) + bug("block_float"); + } + else /* on -> off */ + THIS->u_numswitchtasks++; +} + /* used in clone_dsp() */ t_dsptaskqueue * dsptaskqueue_push(t_dsptaskqueue *newqueue) { diff --git a/src/s_main.c b/src/s_main.c index 034335913a..47fac88250 100644 --- a/src/s_main.c +++ b/src/s_main.c @@ -55,9 +55,10 @@ int sys_noloadbang; static int sys_dontstartgui; int sys_hipriority = -1; /* -1 = not specified; 0 = no; 1 = yes */ int sys_guisetportnumber; /* if started from the GUI, this is the port # */ -int sys_nosleep = 0; /* skip all "sleep" calls and spin instead */ +int sys_nosleep = 0; /* skip all "sleep" calls and spin instead */ int sys_defeatrt; /* flag to cancel real-time */ int sys_threadsafe = 1; /* only allow thread-safe DSP objects in parallel processing */ +int sys_threadspinwait = 0; /* DSP threads spin while waiting for tasks */ t_symbol *sys_flags; /* more command-line flags */ const char *sys_guicmd; @@ -423,7 +424,9 @@ static char *(usagemessage[]) = { " \"officially\" thread-safe (true by default)\n", "-nothreadsafe -- do not check if DSP objects are thread-safe\n" " (potentially dangerous!)\n", -#endif +"-spinwait -- audio threads spin while waiting for tasks\n", +"-nospinwait -- audio threads do not spin (true by default)\n", +#endif /* PD_DSPTHREADS */ "-listdev -- list audio and MIDI devices\n", #ifdef USEAPI_OSS @@ -1354,7 +1357,17 @@ int sys_argparse(int argc, const char **argv) sys_threadsafe = 0; argc--; argv++; } -#endif + else if (!strcmp(*argv, "-spinwait")) + { + sys_threadspinwait = 1; + argc--; argv++; + } + else if (!strcmp(*argv, "-nospinwait")) + { + sys_threadspinwait = 0; + argc--; argv++; + } +#endif /* PD_DSPTHREADS */ else if (!strcmp(*argv, "-sleep")) { sys_nosleep = 0; diff --git a/src/s_stuff.h b/src/s_stuff.h index d70370e0c2..e58117b4a5 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -430,6 +430,7 @@ EXTERN int sys_dspthreadpool_stop(int external); EXTERN int sys_dspthread_run(int index); EXTERN int sys_threadsafe; /* enable/disable thread-safety checks */ +EXTERN int sys_threadspinwait; /* spin while waiting for tasks */ struct _instancestuff { From 5b9cfee3392f3a6b46b23b2bfdcd0976f746ec33 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sat, 17 Sep 2022 14:31:10 +0200 Subject: [PATCH 28/32] add thread pinning option allow to pin DSP threads to dedicated cores; useful for spinwaiting! Can be enabled/disabled with the "-affinity" resp. "-noaffinity" command line options --- src/d_threadpool.c | 107 +++++++++++++++++++++++++++++++++++++++++++++ src/s_main.c | 17 +++++++ src/s_stuff.h | 1 + 3 files changed, 125 insertions(+) diff --git a/src/d_threadpool.c b/src/d_threadpool.c index a1f0470667..0f324aa4a1 100644 --- a/src/d_threadpool.c +++ b/src/d_threadpool.c @@ -34,6 +34,7 @@ #else /* Linux */ # include # include +# include #endif /* define for debugging DSP tasks and task queues */ @@ -359,6 +360,8 @@ static int thread_set_realtime(void) } return 1; #elif defined(__APPLE__) + /* Is SCHED_RR still appropriate? + * Should we use the Mach API instead? */ struct sched_param param; int policy = SCHED_RR; int err; @@ -387,6 +390,70 @@ static int thread_set_realtime(void) } return 1; #endif +} + + /* 1: success, 0: failure */ +static int thread_set_affinity(int i) +{ +#if defined(_WIN32) + static THREADLOCAL DWORD_PTR original = 0; + if (i >= 0) /* pin to the given CPU */ + { + DWORD oldmask, newmask = (DWORD_PTR)1 << i; + oldmask = SetThreadAffinityMask(GetCurrentThread(), newmask); + if (oldmask == 0) + { + fprintf(stderr, "SetThreadAffinityMask() failed (%d)\n", GetLastError()); + return 0; + } + /* store original CPU mask (only the first time!) */ + if (!original) + original = oldmask; + } + else if (original) /* restore original mask */ + { + if (SetThreadAffinityMask(GetCurrentThread(), original) == 0) + { + fprintf(stderr, "SetThreadAffinityMask() failed (%d)\n", GetLastError()); + return 0; + } + } + return 1; +#elif defined(__linux__) + cpu_set_t cpuset, *ptr; + static THREADLOCAL cpu_set_t original; + static THREADLOCAL int initted = 0; + /* store original CPU set when we first enter this function */ + if (initted < 0) + return 0; /* init failed */ + if (!initted) + { + if (sched_getaffinity(0, sizeof(cpu_set_t), &original) != 0) + { + fprintf(stderr, "sched_getaffinity() failed (%d)\n", errno); + initted = -1; + return 0; + } + initted = 1; + } + if (i >= 0) /* pin to the given CPU */ + { + CPU_ZERO(&cpuset); + CPU_SET(i, &cpuset); + ptr = &cpuset; + } + else /* restore original CPU set */ + ptr = &original; + if (sched_setaffinity(0, sizeof(cpu_set_t), ptr) < 0) + { + fprintf(stderr, "sched_setaffinity() failed (%d)\n", errno); + return 0; + } + return 1; +#else + fprintf(stderr, "thread_set_affinity() not implemented\n"); + return 0; +#endif } /* -------------------------- helper functions -------------------------- */ @@ -443,6 +510,41 @@ static void dspthread_setrealtime(int index) fprintf(stderr, "DSP thread %d: couldn't set realtime priority\n", index); } +static void dspthread_pin(int index, int pin) +{ + /* We only use thread pinning on Windows and Linux; + * on macOS we want to use audio workgroups instead. */ +#if defined(_WIN32) || defined(__linux__) + if (sys_threadaffinity && (numcpus > 0)) + { + if (index >= 0 && index < numcpus) + { + if (pin) /* pin to thread */ + { + /* see cpuinfo_sort() */ + int cpu = cpuvec[index].id; + if (thread_set_affinity(cpu)) + { + if (sys_verbose) + fprintf(stderr, "DSP thread %d: " + "pinned to CPU %d\n", index, cpu); + } + else + fprintf(stderr, "DSP thread %d: " + "could not pin to CPU %d\n", index, cpu); + } + else /* unpin */ + { + if (!thread_set_affinity(-1)) + fprintf(stderr, "DSP thread %d: could not unpin\n", index); + } + } + else + bug("dspthread_pin"); + } +#endif +} + /* -------------------------- t_dspthreadpool --------------------------- */ typedef struct _backoff @@ -563,6 +665,8 @@ void dspthreadpool_stop(int external) freebytes(d_threadpool->tp_threads, sizeof(pthread_t) * n); d_threadpool->tp_threads = 0; d_threadpool->tp_n = 0; + + dspthread_pin(0, 0); /* unpin */ } int sys_dspthreadpool_start(int *numthreads, int external) @@ -604,6 +708,8 @@ int sys_dspthreadpool_start(int *numthreads, int external) for (int i = 0; i < n; ++i) pthread_create(&d_threadpool->tp_threads[i], NULL, thread_function, (void *)(intptr_t)(i + 1)); + /* only pin main thread if we actually have helper threads */ + dspthread_pin(0, 1); } else /* single threaded */ { @@ -670,6 +776,7 @@ static void dspthread_dorun(int index) fprintf(stderr, "DSP thread %d: start\n", index); #endif dspthread_setindex(index); + dspthread_pin(index, 1); mayer_init(); /* init FFT */ #ifdef MSVC_INTERLOCKED diff --git a/src/s_main.c b/src/s_main.c index 47fac88250..c2fa72c7af 100644 --- a/src/s_main.c +++ b/src/s_main.c @@ -58,6 +58,7 @@ int sys_guisetportnumber; /* if started from the GUI, this is the port # */ int sys_nosleep = 0; /* skip all "sleep" calls and spin instead */ int sys_defeatrt; /* flag to cancel real-time */ int sys_threadsafe = 1; /* only allow thread-safe DSP objects in parallel processing */ +int sys_threadaffinity = 0; /* pin DSP threads to CPUs */ int sys_threadspinwait = 0; /* DSP threads spin while waiting for tasks */ t_symbol *sys_flags; /* more command-line flags */ @@ -426,6 +427,10 @@ static char *(usagemessage[]) = { " (potentially dangerous!)\n", "-spinwait -- audio threads spin while waiting for tasks\n", "-nospinwait -- audio threads do not spin (true by default)\n", +#if defined(_WIN32) || defined(__linux__) +"-affinity -- pin audio threads to CPUs\n", +"-noaffinity -- do not pin audio threads (true by default)\n", +#endif /* Windows/Linux */ #endif /* PD_DSPTHREADS */ "-listdev -- list audio and MIDI devices\n", @@ -1367,6 +1372,18 @@ int sys_argparse(int argc, const char **argv) sys_threadspinwait = 0; argc--; argv++; } +#if defined(_WIN32) || defined(__linux__) + else if (!strcmp(*argv, "-affinity")) + { + sys_threadaffinity = 1; + argc--; argv++; + } + else if (!strcmp(*argv, "-noaffinity")) + { + sys_threadaffinity = 0; + argc--; argv++; + } +#endif /* Windows/Linux */ #endif /* PD_DSPTHREADS */ else if (!strcmp(*argv, "-sleep")) { diff --git a/src/s_stuff.h b/src/s_stuff.h index e58117b4a5..ba5e005004 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -430,6 +430,7 @@ EXTERN int sys_dspthreadpool_stop(int external); EXTERN int sys_dspthread_run(int index); EXTERN int sys_threadsafe; /* enable/disable thread-safety checks */ +EXTERN int sys_threadaffinity; /* enable/disable thread pinning */ EXTERN int sys_threadspinwait; /* spin while waiting for tasks */ struct _instancestuff From bfde087ea3a9573784b7ea7e3fc003ccf8a891de Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Tue, 29 Mar 2022 17:53:04 +0200 Subject: [PATCH 29/32] update block~-help.pd and clone-help.pd --- doc/5.reference/block~-help.pd | 307 +++++++++++++++++++++++++++++++-- doc/5.reference/clone-help.pd | 7 + 2 files changed, 295 insertions(+), 19 deletions(-) diff --git a/doc/5.reference/block~-help.pd b/doc/5.reference/block~-help.pd index 4313a78c71..8f76903b3c 100644 --- a/doc/5.reference/block~-help.pd +++ b/doc/5.reference/block~-help.pd @@ -1,8 +1,8 @@ -#N canvas 576 23 531 684 12; +#N canvas 576 23 531 733 12; #X text 34 248 You may have at most one block~/switch~ object in any window., f 67; -#X text 15 597 see also:; -#X obj 135 649 fft~; +#X text 15 653 see also:; +#X obj 135 705 fft~; #X text 34 367 Pd's default block size is 64 samples. The inlet~ and outlet~ objects reblock signals to adjust for differences between parent and subpatch \, but only power-of-two adjustments are possible. So @@ -10,7 +10,6 @@ for "normal" audio computations \, all blocks should also be power-of-two in size. HOWEVER \, if you have no inlet~ or outlet~ you may specify any other block size. This is intended for later use in video processing. , f 67; -#X text 318 649 updated for Pd version 0.43; #N canvas 424 281 492 272 block-interactions 0; #X text 32 49 Dac~ and adc~ don't work correctly if reblocked \, nor if a parent window is reblocked \, even if the window containing the @@ -25,8 +24,8 @@ than they are \, there might be weirdness.; may be switched with impunity \, but not catch~., f 60; #X text 32 11 INTERACTIONS BETWEEN BLOCK~/SWITCH~ AND OTHER OBJECTS IN PD; -#X restore 149 540 pd block-interactions; -#X text 308 540 <= BUG! block~/switch~ and dac~/adc~ are incompatible +#X restore 149 570 pd block-interactions; +#X text 308 570 <= BUG! block~/switch~ and dac~/adc~ are incompatible , f 27; #N canvas 741 59 537 534 switch-example 0; #X obj 109 380 bang~; @@ -72,7 +71,7 @@ reblocked.; #X connect 7 0 20 0; #X connect 8 0 20 0; #X connect 9 0 20 0; -#X restore 150 489 pd switch-example; +#X restore 150 519 pd switch-example; #N canvas 551 180 567 287 switch-bang 0; #X text 50 15 You can use the switch~ object to single-step dsp in a subpatch. This might be useful for block operations that don't want @@ -92,19 +91,19 @@ to be synced to the sample clock: loading a window function in a table #X connect 1 0 7 0; #X connect 2 0 4 0; #X connect 2 0 7 0; -#X restore 149 514 pd switch-bang; -#X text 277 489 <= click and open example; +#X restore 149 544 pd switch-bang; +#X text 277 519 <= click and open example; #X obj 85 68 block~ 64 1 1; #X text 85 95 args: block size \, overlap \, up-downsampling; -#X obj 87 597 ../3.audio.examples/G04.control.blocksize; -#X obj 87 619 ../3.audio.examples/J07.oversampling; -#X obj 46 489 tgl 17 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#X obj 87 653 ../3.audio.examples/G04.control.blocksize; +#X obj 87 675 ../3.audio.examples/J07.oversampling; +#X obj 46 519 tgl 17 0 empty empty empty 17 7 0 10 #fcfcfc #000000 #000000 0 1; -#X msg 46 522 \; pd dsp \$1; -#X text 67 486 <= DSP on/off, f 6; -#X obj 178 649 bang~; -#X text 14 649 and the objects:; -#X text 259 514 <= 'bang' lets you single-step DSP; +#X msg 46 552 \; pd dsp \$1; +#X text 67 516 <= DSP on/off, f 6; +#X obj 178 705 bang~; +#X text 14 705 and the objects:; +#X text 259 544 <= 'bang' lets you single-step DSP; #X text 34 122 The block~ and switch~ objects set the block size \, overlap \, and up/down-sampling ratio for the patch window. (The overlap and resampling ratio are relative to the super-patch.), f 67; @@ -150,7 +149,277 @@ up/downsampling)., f 66; #X restore 369 17 pd reference; #X text 5 16 [block~] and [switch~] -; #X text 188 9 set block size and on/off control for DSP, f 22; -#X obj 5 583 cnv 1 520 1 empty empty empty 8 12 0 13 #000000 #000000 +#X obj 5 639 cnv 1 520 1 empty empty empty 8 12 0 13 #000000 #000000 0; #X text 462 18 <= click; -#X connect 14 0 15 0; +#X text 35 474 Block~ also facilitates parallel DSP processing with +the "parallel" and "join" messages., f 68; +#N canvas 617 264 613 446 parallel-dsp 0; +#X obj 43 252 block~; +#X msg 43 62 parallel \$1; +#X obj 43 41 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 #000000 +0 1; +#X obj 60 197 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#000000 0 1; +#X msg 60 217 join \$1; +#X text 40 388 See also the "parallel" message for; +#X obj 296 388 clone; +#N canvas 425 76 499 300 pipelining 0; +#N canvas 75 75 406 256 fx1 0; +#X obj 57 103 inlet~; +#X obj 57 140 bob~; +#X obj 57 178 outlet~; +#X obj 123 103 inlet~; +#X obj 123 140 bob~; +#X obj 123 178 outlet~; +#X obj 212 121 loadbang; +#X msg 212 149 parallel 1; +#X obj 212 178 block~; +#X obj 70 35 loadbang; +#X msg 70 64 800; +#X msg 148 67 1; +#X connect 0 0 1 0; +#X connect 1 0 2 0; +#X connect 3 0 4 0; +#X connect 4 0 5 0; +#X connect 6 0 7 0; +#X connect 7 0 8 0; +#X connect 9 0 10 0; +#X connect 9 0 11 0; +#X connect 10 0 1 1; +#X connect 10 0 4 1; +#X connect 11 0 1 2; +#X connect 11 0 4 2; +#X restore 82 100 pd fx1; +#X text 78 38 asynchronous pipelining:; +#X obj 81 70 noise~; +#X obj 132 70 noise~; +#X obj 81 215 output~; +#X msg 157 236 \; pd dsp \$1; +#X obj 157 214 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#000000 0 1; +#X text 139 101 <= click me; +#X text 142 125 Although the 4 stages are nominally processed in series +\, block~ + "parallel" make them run asynchronously. However \, because +outlet~ is buffered \, each stage is delayed by 1 block., f 46; +#N canvas 75 75 406 256 fx2 0; +#X obj 57 103 inlet~; +#X obj 57 140 bob~; +#X obj 57 178 outlet~; +#X obj 123 103 inlet~; +#X obj 123 140 bob~; +#X obj 123 178 outlet~; +#X obj 212 121 loadbang; +#X msg 212 149 parallel 1; +#X obj 212 178 block~; +#X obj 70 35 loadbang; +#X msg 70 64 800; +#X msg 148 66 1; +#X connect 0 0 1 0; +#X connect 1 0 2 0; +#X connect 3 0 4 0; +#X connect 4 0 5 0; +#X connect 6 0 7 0; +#X connect 7 0 8 0; +#X connect 9 0 10 0; +#X connect 9 0 11 0; +#X connect 10 0 1 1; +#X connect 10 0 4 1; +#X connect 11 0 1 2; +#X connect 11 0 4 2; +#X restore 82 128 pd fx2; +#N canvas 75 75 406 256 fx3 0; +#X obj 57 103 inlet~; +#X obj 57 140 bob~; +#X obj 57 178 outlet~; +#X obj 123 103 inlet~; +#X obj 123 140 bob~; +#X obj 123 178 outlet~; +#X obj 212 121 loadbang; +#X msg 212 149 parallel 1; +#X obj 212 178 block~; +#X obj 70 35 loadbang; +#X msg 70 64 800; +#X msg 148 67 0.1; +#X connect 0 0 1 0; +#X connect 1 0 2 0; +#X connect 3 0 4 0; +#X connect 4 0 5 0; +#X connect 6 0 7 0; +#X connect 7 0 8 0; +#X connect 9 0 10 0; +#X connect 9 0 11 0; +#X connect 10 0 1 1; +#X connect 10 0 4 1; +#X connect 11 0 1 2; +#X connect 11 0 4 2; +#X restore 82 156 pd fx3; +#N canvas 75 75 406 256 fx4 0; +#X obj 57 103 inlet~; +#X obj 57 140 bob~; +#X obj 57 178 outlet~; +#X obj 123 103 inlet~; +#X obj 123 140 bob~; +#X obj 123 178 outlet~; +#X obj 212 121 loadbang; +#X msg 212 149 parallel 1; +#X obj 212 178 block~; +#X obj 70 35 loadbang; +#X msg 70 64 800; +#X msg 148 67 1; +#X connect 0 0 1 0; +#X connect 1 0 2 0; +#X connect 3 0 4 0; +#X connect 4 0 5 0; +#X connect 6 0 7 0; +#X connect 7 0 8 0; +#X connect 9 0 10 0; +#X connect 9 0 11 0; +#X connect 10 0 1 1; +#X connect 10 0 4 1; +#X connect 11 0 1 2; +#X connect 11 0 4 2; +#X restore 82 185 pd fx4; +#X connect 0 0 9 0; +#X connect 0 1 9 1; +#X connect 2 0 0 0; +#X connect 3 0 0 1; +#X connect 6 0 5 0; +#X connect 9 0 10 0; +#X connect 9 1 10 1; +#X connect 10 0 11 0; +#X connect 10 1 11 1; +#X connect 11 0 4 0; +#X connect 11 1 4 1; +#X restore 425 118 pd pipelining; +#X text 137 85 Signal outlets are buffered \, so they always return +the result of the *previous* block.; +#X text 137 119 This allows for asynchronous pipelining:; +#X text 137 193 Wait for all parallel subpatches/abstractions in this +canvas to finish.; +#X text 65 39 on/off; +#X text 79 194 on/off; +#N canvas 502 173 398 300 fork/join 0; +#X text 52 37 fork/join; +#N canvas 271 107 598 227 source 0; +#X obj 40 140 outlet~; +#N canvas 175 175 450 182 sub1 0; +#X obj 280 103 block~; +#X obj 280 47 loadbang; +#X msg 280 74 parallel 1; +#X obj 62 62 osc~ 440; +#X obj 62 94 throw~ \$0-ch1; +#X obj 169 62 osc~ 440; +#X obj 169 94 throw~ \$0-ch2; +#X connect 1 0 2 0; +#X connect 2 0 0 0; +#X connect 3 0 4 0; +#X connect 5 0 6 0; +#X restore 42 44 pd sub1; +#N canvas 175 175 450 182 sub2 0; +#X obj 280 103 block~; +#X obj 280 47 loadbang; +#X msg 280 74 parallel 1; +#X obj 62 62 osc~ 440; +#X obj 62 94 throw~ \$0-ch1; +#X obj 169 62 osc~ 440; +#X obj 169 94 throw~ \$0-ch2; +#X connect 1 0 2 0; +#X connect 2 0 0 0; +#X connect 3 0 4 0; +#X connect 5 0 6 0; +#X restore 104 44 pd sub2; +#N canvas 175 175 450 182 sub3 0; +#X obj 280 103 block~; +#X obj 280 47 loadbang; +#X msg 280 74 parallel 1; +#X obj 62 62 osc~ 440; +#X obj 62 94 throw~ \$0-ch1; +#X obj 169 62 osc~ 440; +#X obj 169 94 throw~ \$0-ch2; +#X connect 1 0 2 0; +#X connect 2 0 0 0; +#X connect 3 0 4 0; +#X connect 5 0 6 0; +#X restore 42 72 pd sub3; +#N canvas 175 175 450 182 sub4 0; +#X obj 280 103 block~; +#X obj 280 47 loadbang; +#X msg 280 74 parallel 1; +#X obj 62 62 osc~ 440; +#X obj 62 94 throw~ \$0-ch1; +#X obj 169 62 osc~ 440; +#X obj 169 94 throw~ \$0-ch2; +#X connect 1 0 2 0; +#X connect 2 0 0 0; +#X connect 3 0 4 0; +#X connect 5 0 6 0; +#X restore 105 72 pd sub4; +#X obj 172 147 block~; +#X obj 172 46 loadbang; +#X msg 172 120 join \$1; +#X msg 172 70 1; +#X obj 172 96 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#000000 0 1; +#X text 242 67 disabling the "join" will mess up the output because +the throw~ objects in the parallel subpatches write to the corresponding +catch~ objects at random times \, sometimes before \, sometimes after +the latter are processed., f 46; +#X connect 6 0 8 0; +#X connect 7 0 5 0; +#X connect 8 0 9 0; +#X connect 9 0 7 0; +#X restore 57 73 pd source; +#N canvas 486 479 457 212 fx 0; +#X obj 50 49 inlet~; +#X obj 49 148 outlet~; +#X obj 151 148 outlet~; +#X obj 49 85 catch~ \$0-ch1; +#X obj 151 85 catch~ \$0-ch2; +#X obj 49 118 bob~; +#X obj 151 115 bob~; +#X obj 163 27 loadbang; +#X msg 163 51 12000; +#X connect 3 0 5 0; +#X connect 4 0 6 0; +#X connect 5 0 1 0; +#X connect 6 0 2 0; +#X connect 7 0 8 0; +#X connect 8 0 5 1; +#X connect 8 0 6 1; +#X restore 57 148 pd fx; +#X text 67 107 dummy connection to enforce ordering (to avoid delay) +, f 27; +#X obj 58 179 output~; +#X text 107 148 <= catch and process signals; +#X text 134 72 <= generate signals in parallel and join them, f 24 +; +#X connect 1 0 2 0; +#X connect 2 0 4 0; +#X connect 2 1 4 1; +#X restore 479 261 pd fork/join; +#X text 136 228 This is handy if you want to join parallel subpatches +for further processing. By using throw~/catch~ instead of outlet~ \, +you can even do this without any delay:, f 58; +#N canvas 182 294 450 300 nesting 0; +#X text 139 138 TODO nesting example; +#X restore 297 295 pd nesting; +#X text 36 295 "parallel" and "join" can be nested:; +#X text 40 326 NOTE: "parallel" only works if all DSP objects starting +from the nearest outer "join" point are "officially" thread-safe. You +can circumvent this check by starting Pd with -nothreadsafe., f 74 +; +#X text 138 142 NOTE: "parallel" canvases themselves cannot use reblocking +\, upsampling or overlap \, but subpatches/abstractions can!, f 60 +; +#X text 138 36 Process a canvas in parallel. The canvas will effectively +run asynchronously until it is joined by an outer canvas (see below) +- or at the end of the DSP tick.; +#X connect 1 0 0 0; +#X connect 2 0 1 0; +#X connect 3 0 4 0; +#X connect 4 0 0 0; +#X restore 149 606 pd parallel-dsp; +#X text 267 606 <= parallel DSP processing; +#X text 314 705 updated for Pd version 0.52-2; +#X connect 13 0 14 0; diff --git a/doc/5.reference/clone-help.pd b/doc/5.reference/clone-help.pd index ffed7b6bb9..bcca1a46f5 100644 --- a/doc/5.reference/clone-help.pd +++ b/doc/5.reference/clone-help.pd @@ -113,6 +113,11 @@ the sum of all instances' outputs \, and control outlets forward messages with the number of the instance prepended to them., f 95; #X obj 181 512 clone clone-abstraction 16; #X text 78 10 - make multiple copies of an abstraction.; +#X msg 40 466 parallel \$1; +#X obj 40 441 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#000000 0 1; +#X text 38 391 process copies in parallel to utilize more CPU cores. +, f 20; #X connect 0 0 1 0; #X connect 1 0 2 0; #X connect 1 1 21 1; @@ -134,3 +139,5 @@ with the number of the instance prepended to them., f 95; #X connect 33 0 9 0; #X connect 38 0 19 0; #X connect 38 0 19 1; +#X connect 40 0 38 0; +#X connect 41 0 40 0; From fa51f3efbb33a5cc161ece954895368fcaac6179 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Wed, 31 Aug 2022 01:01:38 +0200 Subject: [PATCH 30/32] improve garrayref comments --- src/g_array.c | 5 ++--- src/m_pd.h | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/g_array.c b/src/g_array.c index 61c79dfd94..0b217122bf 100644 --- a/src/g_array.c +++ b/src/g_array.c @@ -866,10 +866,9 @@ int garrayref_get(t_garrayref *x, int *size, t_word **vec, /* garrayref_write_lock() and garrayref_read_lock() always fail * silently if garrayref is empty or if the garray has been removed. * In practice, adding/removing garrays triggers a DSP graph update, - * so we automatically try to reacquire the garray in our DSP method - * by calling garrayref_set(). + * so we can (re)acquire the garray in our DSP method with garrayref_set(). * NOTE: we avoid (un)setting the garrayref in the perform routine - * because it would make things more complicated wrt thread-safety. */ + * because it would require additional thread synchronization. */ int garrayref_write_lock(t_garrayref *x, int *size, t_word **vec) { if (!garrayref_get(x, size, vec, 0, 0)) diff --git a/src/m_pd.h b/src/m_pd.h index b582e20935..2398736d2c 100644 --- a/src/m_pd.h +++ b/src/m_pd.h @@ -746,10 +746,9 @@ EXTERN int garrayref_check(t_garrayref *x); /* for control objects: safely access array data. If the reference is empty or * stale, (re)acquire the array by name; if 'arrayname' is NULL, fail silently. * Returns 1 if it could get the array data; otherwise returns 0. - * * If you want to set the garrayref to another garray, you must either call - * garray_set() with the new name, or call garray_unset() and lazily initialize - * it in the next call to garrayref_get(). */ + * garray_set() with the new name, or call garray_unset() and acquire it lazily + * with the next call to garrayref_get(). */ EXTERN int garrayref_get(t_garrayref *x, int *size, t_word **vec, t_symbol *arrayname, t_object *object); /* for DSP objects: lock/unlock garray for reading/writing in the perform routine. * Returns 1 if it could get the array data and lock the garray; otherwise returns 0. @@ -760,6 +759,7 @@ THREADSAFE EXTERN void garrayref_write_unlock(t_garrayref *x); THREADSAFE EXTERN int garrayref_read_lock(t_garrayref *x, int *size, t_word **vec); THREADSAFE EXTERN void garrayref_read_unlock(t_garrayref *x); #else +/* optimization for non-parallel builds */ #define garrayref_write_lock(x, size, vec) garrayref_get(x, size, vec, 0, 0) #define garrayref_write_unlock(x) #define garrayref_read_lock(x, size, vec) garrayref_get(x, size, vec, 0, 0) From 29ced3f0eb922a49d91eb3aabf9d2755ea080ce3 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sat, 17 Sep 2022 17:34:45 +0200 Subject: [PATCH 31/32] enable parallel processing in makefiles --- libpd/Makefile | 8 +++++++- src/makefile.gnu | 5 +++++ src/makefile.mac | 8 ++++++++ src/makefile.mingw | 5 +++++ src/makefile.msvc | 7 +++++++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/libpd/Makefile b/libpd/Makefile index 37bddb22cf..657f1427a3 100644 --- a/libpd/Makefile +++ b/libpd/Makefile @@ -9,6 +9,8 @@ LIBPD_IMPLIB = LIBPD_DEF = PLATFORM_ARCH ?= $(shell $(CC) -dumpmachine | sed -e 's,-.*,,') +PARALLEL=true + ifeq ($(UNAME), Darwin) # Mac SOLIB_EXT = dylib PLATFORM_CFLAGS = -DHAVE_LIBDL @@ -62,6 +64,10 @@ VPATH = ../src:\ CPPFLAGS = -I../src -DPD -DHAVE_UNISTD_H -DUSEAPI_DUMMY -DLIBPD_EXTRA \ -DPDINSTANCE +ifeq ($(PARALLEL), true) +CPPFLAGS += -DPD_DSPTHREADS=1 -DPD_PARALLEL=1 +endif + # code generation flags (e.g., optimization). CODECFLAGS = -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -O3 @@ -81,7 +87,7 @@ PDSRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ g_editor_extras.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ - s_main.c s_inter.c s_print.c \ + s_main.c s_inter.c s_print.c s_sync.c \ s_loader.c s_path.c s_entry.c s_audio.c s_midi.c s_net.c s_utf8.c \ s_audio_paring.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ diff --git a/src/makefile.gnu b/src/makefile.gnu index 2bc04a3388..0425ddd658 100644 --- a/src/makefile.gnu +++ b/src/makefile.gnu @@ -34,6 +34,7 @@ PDEXEC = $(BIN_DIR)/pd EXT= pd_linux ALSA=true OSS=true +PARALLEL=true prefix = /usr/local exec_prefix = ${prefix} @@ -60,6 +61,10 @@ CPPFLAGS = -DPD -DHAVE_LIBDL -DHAVE_UNISTD_H -DHAVE_ALLOCA_H \ -Wno-unused -Wno-unused-parameter -Wno-parentheses -Wno-switch \ -Wno-cast-function-type -Wno-stringop-truncation -Wno-format-truncation +ifeq ($(PARALLEL), true) +CPPFLAGS += -DPD_DSPTHREADS=1 -DPD_PARALLEL=1 +endif + # code generation flags (e.g., optimization). CODECFLAGS = -g -O3 -ffast-math -funroll-loops -fomit-frame-pointer diff --git a/src/makefile.mac b/src/makefile.mac index 2b0ed52ada..127206d068 100644 --- a/src/makefile.mac +++ b/src/makefile.mac @@ -11,6 +11,7 @@ EXT= pd_darwin GUINAME= libPdTcl.dylib ARCH= -arch i386 -arch ppc EXTRAARCH= -arch i386 -arch x86_64 -arch ppc +PARALLEL=true MKDIR_P = mkdir -p @@ -37,6 +38,7 @@ CPPFLAGS = -DPD -DINSTALL_PREFIX=\"$(prefix)\" \ -I$(PADIR)/src/os/mac_osx/ -I$(PMDIR)/pm_common \ -I$(PMDIR)/pm_mac -I$(PMDIR)/porttime \ -DUSEAPI_PORTAUDIO -DPA_USE_COREAUDIO -DNEWBUFFER + ARCH_CFLAGS = $(ARCH) WARN_CFLAGS = -Wall -W -Wstrict-prototypes -Wno-unused -Wno-unused-parameter \ -Wno-parentheses -Wno-switch @@ -46,6 +48,12 @@ LDFLAGS = -Wl -framework CoreAudio -framework AudioUnit \ -framework AudioToolbox -framework Carbon -framework CoreMIDI \ -framework CoreFoundation $(ARCH) \ +ifeq ($(PARALLEL), true) +CPPFLAGS += -DPD_DSPTHREADS=1 -DPD_PARALLEL=1 +MORECFLAGS += -mmacosx-version-min=10.9 +LDFLAGS += -mmacosx-version-min=10.9 +endif + LIB = -ldl -lm -lpthread ifdef JACK diff --git a/src/makefile.mingw b/src/makefile.mingw index 1da6b6b05f..730df9aa90 100644 --- a/src/makefile.mingw +++ b/src/makefile.mingw @@ -39,6 +39,7 @@ PDDLL = $(EXECDIR)/pd.dll PDCOM = $(EXECDIR)/pd.com PDRECEIVE = $(EXECDIR)/pdreceive.exe PDSEND = $(EXECDIR)/pdsend.exe +PARALLEL=true DLLWRAP= dllwrap @@ -70,6 +71,10 @@ WARN_CFLAGS = -Wall -W -Wstrict-prototypes -Wno-unused \ ARCH_CFLAGS = -DPD -DPD_INTERNAL -DPA_USE_ASIO -DPA_USE_WMME -DWINVER=0x0502 \ -DUSEAPI_MMIO -DUSEAPI_PORTAUDIO -mms-bitfields -DWISH='"wish85.exe"' +ifeq ($(PARALLEL), true) +ARCH_CFLAGS += -DPD_DSPTHREADS=1 -DPD_PARALLEL=1 +endif + CFLAGS += $(ARCH_CFLAGS) $(WARN_CFLAGS) $(OPT_CFLAGS) $(MORECFLAGS) STRIP = strip --strip-unneeded -R .note -R .comment diff --git a/src/makefile.msvc b/src/makefile.msvc index 367d546184..eea6da7807 100644 --- a/src/makefile.msvc +++ b/src/makefile.msvc @@ -51,6 +51,8 @@ endif # \ !endif +PARALLEL=true + PDINCLUDE = /I./ $(EXTRA_INCLUDES) PDLIB = /NODEFAULTLIB:libcmt /NODEFAULTLIB:libcpmt /NODEFAULTLIB:oldnames \ @@ -75,6 +77,11 @@ CFLAGS = /nologo \ /DUSEAPI_MMIO /DUSEAPI_PORTAUDIO \ /DPA_LITTLE_ENDIAN /DPA19 \ /D_CRT_SECURE_NO_WARNINGS + +ifeq ($(PARALLEL), true) +CFLAGS += /DPD_DSPTHREADS=1 /DPD_PARALLEL=1 +endif + LFLAGS = /nologo SYSSRC = s_audio_pa.c s_audio_paring.c \ From 0cc438ef5d8ef143e2d8df3e0711e97b695aa435 Mon Sep 17 00:00:00 2001 From: Christof Ressi Date: Sat, 17 Sep 2022 23:47:00 +0200 Subject: [PATCH 32/32] install headers --- src/Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index e1d772e2bf..30020e6063 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -188,11 +188,11 @@ libpdbindir = $(pkglibdir)/bin # these install to ${includedir}/pd pkginclude_HEADERS = m_pd.h m_imp.h g_canvas.h g_undo.h g_all_guis.h s_stuff.h \ - s_net.h x_vexp.h + s_net.h s_spinlock.h x_vexp.h # compatibility: m_pd.h also goes into ${includedir}/ include_HEADERS = m_pd.h -noinst_HEADERS = s_audio_alsa.h s_audio_paring.h s_utf8.h +noinst_HEADERS = s_audio_alsa.h s_audio_paring.h s_sync.h s_utf8.h noinst_HEADERS += z_hooks.h z_ringbuffer.h x_libpdreceive.h if LIBPD