diff --git a/configure.ac b/configure.ac index fce638335b..0a08fd863d 100644 --- a/configure.ac +++ b/configure.ac @@ -287,11 +287,6 @@ AS_IF([test x$debug = xyes],[ PD_CPPFLAGS="-DNDEBUG $PD_CPPFLAGS" ]) -##### macOS version min ##### -AS_IF([test "x$macos_version_min" != "x"],[ - PD_CFLAGS="-mmacosx-version-min=$macos_version_min $PD_CFLAGS" -]) - ######################################### ##### Configure Options ##### @@ -313,6 +308,20 @@ AM_CONDITIONAL(LIBPD_EXTRA, test x$enable_libpd_extra = xyes) AM_CONDITIONAL(LIBPD_MULTIINSTANCE, test x$enable_libpd_instance = xyes) AM_CONDITIONAL(LIBPD_NO_SETLOCALE, test x$enable_libpd_setlocale = xno) +##### parallel processing support ##### +AC_ARG_ENABLE([parallel], + [AS_HELP_STRING([--disable-parallel], [disable parallel processing support])], + [parallel=$enableval], [parallel=yes]) +AS_IF([test x$parallel = xyes],[ + PD_CPPFLAGS="-DPD_DSPTHREADS=1 -DPD_PARALLEL=1 $PD_CPPFLAGS" + AS_IF([test x"$MACOSX" = x"yes"], [ + # for thread local storage support + macos_version_min=10.9 + ]) +],[ + PD_CPPFLAGS="-DPD_DSPTHREADS=0 -DPD_PARALLEL=0 $PD_CPPFLAGS" +]) + ##### Universal/multi architecture build on macOS ##### PD_CHECK_UNIVERSAL(ARCH, [universal=yes], [universal=no]) AM_CONDITIONAL(UNIVERSAL, test x$universal = xyes) @@ -562,6 +571,11 @@ AS_IF([test "x${enable_libpd}" = "xyes"],[ libpd="no" ]) +##### macOS version min ##### +AS_IF([test "x$macos_version_min" != "x"],[ + PD_CFLAGS="-mmacosx-version-min=$macos_version_min $PD_CFLAGS" +]) + # pass common flags via @PD_*@ AM variables for use in Makefiles AC_SUBST(PD_CPPFLAGS) AC_SUBST(PD_CFLAGS) diff --git a/doc/5.reference/block~-help.pd b/doc/5.reference/block~-help.pd index 4313a78c71..8f76903b3c 100644 --- a/doc/5.reference/block~-help.pd +++ b/doc/5.reference/block~-help.pd @@ -1,8 +1,8 @@ -#N canvas 576 23 531 684 12; +#N canvas 576 23 531 733 12; #X text 34 248 You may have at most one block~/switch~ object in any window., f 67; -#X text 15 597 see also:; -#X obj 135 649 fft~; +#X text 15 653 see also:; +#X obj 135 705 fft~; #X text 34 367 Pd's default block size is 64 samples. The inlet~ and outlet~ objects reblock signals to adjust for differences between parent and subpatch \, but only power-of-two adjustments are possible. So @@ -10,7 +10,6 @@ for "normal" audio computations \, all blocks should also be power-of-two in size. HOWEVER \, if you have no inlet~ or outlet~ you may specify any other block size. This is intended for later use in video processing. , f 67; -#X text 318 649 updated for Pd version 0.43; #N canvas 424 281 492 272 block-interactions 0; #X text 32 49 Dac~ and adc~ don't work correctly if reblocked \, nor if a parent window is reblocked \, even if the window containing the @@ -25,8 +24,8 @@ than they are \, there might be weirdness.; may be switched with impunity \, but not catch~., f 60; #X text 32 11 INTERACTIONS BETWEEN BLOCK~/SWITCH~ AND OTHER OBJECTS IN PD; -#X restore 149 540 pd block-interactions; -#X text 308 540 <= BUG! block~/switch~ and dac~/adc~ are incompatible +#X restore 149 570 pd block-interactions; +#X text 308 570 <= BUG! block~/switch~ and dac~/adc~ are incompatible , f 27; #N canvas 741 59 537 534 switch-example 0; #X obj 109 380 bang~; @@ -72,7 +71,7 @@ reblocked.; #X connect 7 0 20 0; #X connect 8 0 20 0; #X connect 9 0 20 0; -#X restore 150 489 pd switch-example; +#X restore 150 519 pd switch-example; #N canvas 551 180 567 287 switch-bang 0; #X text 50 15 You can use the switch~ object to single-step dsp in a subpatch. This might be useful for block operations that don't want @@ -92,19 +91,19 @@ to be synced to the sample clock: loading a window function in a table #X connect 1 0 7 0; #X connect 2 0 4 0; #X connect 2 0 7 0; -#X restore 149 514 pd switch-bang; -#X text 277 489 <= click and open example; +#X restore 149 544 pd switch-bang; +#X text 277 519 <= click and open example; #X obj 85 68 block~ 64 1 1; #X text 85 95 args: block size \, overlap \, up-downsampling; -#X obj 87 597 ../3.audio.examples/G04.control.blocksize; -#X obj 87 619 ../3.audio.examples/J07.oversampling; -#X obj 46 489 tgl 17 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#X obj 87 653 ../3.audio.examples/G04.control.blocksize; +#X obj 87 675 ../3.audio.examples/J07.oversampling; +#X obj 46 519 tgl 17 0 empty empty empty 17 7 0 10 #fcfcfc #000000 #000000 0 1; -#X msg 46 522 \; pd dsp \$1; -#X text 67 486 <= DSP on/off, f 6; -#X obj 178 649 bang~; -#X text 14 649 and the objects:; -#X text 259 514 <= 'bang' lets you single-step DSP; +#X msg 46 552 \; pd dsp \$1; +#X text 67 516 <= DSP on/off, f 6; +#X obj 178 705 bang~; +#X text 14 705 and the objects:; +#X text 259 544 <= 'bang' lets you single-step DSP; #X text 34 122 The block~ and switch~ objects set the block size \, overlap \, and up/down-sampling ratio for the patch window. (The overlap and resampling ratio are relative to the super-patch.), f 67; @@ -150,7 +149,277 @@ up/downsampling)., f 66; #X restore 369 17 pd reference; #X text 5 16 [block~] and [switch~] -; #X text 188 9 set block size and on/off control for DSP, f 22; -#X obj 5 583 cnv 1 520 1 empty empty empty 8 12 0 13 #000000 #000000 +#X obj 5 639 cnv 1 520 1 empty empty empty 8 12 0 13 #000000 #000000 0; #X text 462 18 <= click; -#X connect 14 0 15 0; +#X text 35 474 Block~ also facilitates parallel DSP processing with +the "parallel" and "join" messages., f 68; +#N canvas 617 264 613 446 parallel-dsp 0; +#X obj 43 252 block~; +#X msg 43 62 parallel \$1; +#X obj 43 41 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 #000000 +0 1; +#X obj 60 197 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#000000 0 1; +#X msg 60 217 join \$1; +#X text 40 388 See also the "parallel" message for; +#X obj 296 388 clone; +#N canvas 425 76 499 300 pipelining 0; +#N canvas 75 75 406 256 fx1 0; +#X obj 57 103 inlet~; +#X obj 57 140 bob~; +#X obj 57 178 outlet~; +#X obj 123 103 inlet~; +#X obj 123 140 bob~; +#X obj 123 178 outlet~; +#X obj 212 121 loadbang; +#X msg 212 149 parallel 1; +#X obj 212 178 block~; +#X obj 70 35 loadbang; +#X msg 70 64 800; +#X msg 148 67 1; +#X connect 0 0 1 0; +#X connect 1 0 2 0; +#X connect 3 0 4 0; +#X connect 4 0 5 0; +#X connect 6 0 7 0; +#X connect 7 0 8 0; +#X connect 9 0 10 0; +#X connect 9 0 11 0; +#X connect 10 0 1 1; +#X connect 10 0 4 1; +#X connect 11 0 1 2; +#X connect 11 0 4 2; +#X restore 82 100 pd fx1; +#X text 78 38 asynchronous pipelining:; +#X obj 81 70 noise~; +#X obj 132 70 noise~; +#X obj 81 215 output~; +#X msg 157 236 \; pd dsp \$1; +#X obj 157 214 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#000000 0 1; +#X text 139 101 <= click me; +#X text 142 125 Although the 4 stages are nominally processed in series +\, block~ + "parallel" make them run asynchronously. However \, because +outlet~ is buffered \, each stage is delayed by 1 block., f 46; +#N canvas 75 75 406 256 fx2 0; +#X obj 57 103 inlet~; +#X obj 57 140 bob~; +#X obj 57 178 outlet~; +#X obj 123 103 inlet~; +#X obj 123 140 bob~; +#X obj 123 178 outlet~; +#X obj 212 121 loadbang; +#X msg 212 149 parallel 1; +#X obj 212 178 block~; +#X obj 70 35 loadbang; +#X msg 70 64 800; +#X msg 148 66 1; +#X connect 0 0 1 0; +#X connect 1 0 2 0; +#X connect 3 0 4 0; +#X connect 4 0 5 0; +#X connect 6 0 7 0; +#X connect 7 0 8 0; +#X connect 9 0 10 0; +#X connect 9 0 11 0; +#X connect 10 0 1 1; +#X connect 10 0 4 1; +#X connect 11 0 1 2; +#X connect 11 0 4 2; +#X restore 82 128 pd fx2; +#N canvas 75 75 406 256 fx3 0; +#X obj 57 103 inlet~; +#X obj 57 140 bob~; +#X obj 57 178 outlet~; +#X obj 123 103 inlet~; +#X obj 123 140 bob~; +#X obj 123 178 outlet~; +#X obj 212 121 loadbang; +#X msg 212 149 parallel 1; +#X obj 212 178 block~; +#X obj 70 35 loadbang; +#X msg 70 64 800; +#X msg 148 67 0.1; +#X connect 0 0 1 0; +#X connect 1 0 2 0; +#X connect 3 0 4 0; +#X connect 4 0 5 0; +#X connect 6 0 7 0; +#X connect 7 0 8 0; +#X connect 9 0 10 0; +#X connect 9 0 11 0; +#X connect 10 0 1 1; +#X connect 10 0 4 1; +#X connect 11 0 1 2; +#X connect 11 0 4 2; +#X restore 82 156 pd fx3; +#N canvas 75 75 406 256 fx4 0; +#X obj 57 103 inlet~; +#X obj 57 140 bob~; +#X obj 57 178 outlet~; +#X obj 123 103 inlet~; +#X obj 123 140 bob~; +#X obj 123 178 outlet~; +#X obj 212 121 loadbang; +#X msg 212 149 parallel 1; +#X obj 212 178 block~; +#X obj 70 35 loadbang; +#X msg 70 64 800; +#X msg 148 67 1; +#X connect 0 0 1 0; +#X connect 1 0 2 0; +#X connect 3 0 4 0; +#X connect 4 0 5 0; +#X connect 6 0 7 0; +#X connect 7 0 8 0; +#X connect 9 0 10 0; +#X connect 9 0 11 0; +#X connect 10 0 1 1; +#X connect 10 0 4 1; +#X connect 11 0 1 2; +#X connect 11 0 4 2; +#X restore 82 185 pd fx4; +#X connect 0 0 9 0; +#X connect 0 1 9 1; +#X connect 2 0 0 0; +#X connect 3 0 0 1; +#X connect 6 0 5 0; +#X connect 9 0 10 0; +#X connect 9 1 10 1; +#X connect 10 0 11 0; +#X connect 10 1 11 1; +#X connect 11 0 4 0; +#X connect 11 1 4 1; +#X restore 425 118 pd pipelining; +#X text 137 85 Signal outlets are buffered \, so they always return +the result of the *previous* block.; +#X text 137 119 This allows for asynchronous pipelining:; +#X text 137 193 Wait for all parallel subpatches/abstractions in this +canvas to finish.; +#X text 65 39 on/off; +#X text 79 194 on/off; +#N canvas 502 173 398 300 fork/join 0; +#X text 52 37 fork/join; +#N canvas 271 107 598 227 source 0; +#X obj 40 140 outlet~; +#N canvas 175 175 450 182 sub1 0; +#X obj 280 103 block~; +#X obj 280 47 loadbang; +#X msg 280 74 parallel 1; +#X obj 62 62 osc~ 440; +#X obj 62 94 throw~ \$0-ch1; +#X obj 169 62 osc~ 440; +#X obj 169 94 throw~ \$0-ch2; +#X connect 1 0 2 0; +#X connect 2 0 0 0; +#X connect 3 0 4 0; +#X connect 5 0 6 0; +#X restore 42 44 pd sub1; +#N canvas 175 175 450 182 sub2 0; +#X obj 280 103 block~; +#X obj 280 47 loadbang; +#X msg 280 74 parallel 1; +#X obj 62 62 osc~ 440; +#X obj 62 94 throw~ \$0-ch1; +#X obj 169 62 osc~ 440; +#X obj 169 94 throw~ \$0-ch2; +#X connect 1 0 2 0; +#X connect 2 0 0 0; +#X connect 3 0 4 0; +#X connect 5 0 6 0; +#X restore 104 44 pd sub2; +#N canvas 175 175 450 182 sub3 0; +#X obj 280 103 block~; +#X obj 280 47 loadbang; +#X msg 280 74 parallel 1; +#X obj 62 62 osc~ 440; +#X obj 62 94 throw~ \$0-ch1; +#X obj 169 62 osc~ 440; +#X obj 169 94 throw~ \$0-ch2; +#X connect 1 0 2 0; +#X connect 2 0 0 0; +#X connect 3 0 4 0; +#X connect 5 0 6 0; +#X restore 42 72 pd sub3; +#N canvas 175 175 450 182 sub4 0; +#X obj 280 103 block~; +#X obj 280 47 loadbang; +#X msg 280 74 parallel 1; +#X obj 62 62 osc~ 440; +#X obj 62 94 throw~ \$0-ch1; +#X obj 169 62 osc~ 440; +#X obj 169 94 throw~ \$0-ch2; +#X connect 1 0 2 0; +#X connect 2 0 0 0; +#X connect 3 0 4 0; +#X connect 5 0 6 0; +#X restore 105 72 pd sub4; +#X obj 172 147 block~; +#X obj 172 46 loadbang; +#X msg 172 120 join \$1; +#X msg 172 70 1; +#X obj 172 96 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#000000 0 1; +#X text 242 67 disabling the "join" will mess up the output because +the throw~ objects in the parallel subpatches write to the corresponding +catch~ objects at random times \, sometimes before \, sometimes after +the latter are processed., f 46; +#X connect 6 0 8 0; +#X connect 7 0 5 0; +#X connect 8 0 9 0; +#X connect 9 0 7 0; +#X restore 57 73 pd source; +#N canvas 486 479 457 212 fx 0; +#X obj 50 49 inlet~; +#X obj 49 148 outlet~; +#X obj 151 148 outlet~; +#X obj 49 85 catch~ \$0-ch1; +#X obj 151 85 catch~ \$0-ch2; +#X obj 49 118 bob~; +#X obj 151 115 bob~; +#X obj 163 27 loadbang; +#X msg 163 51 12000; +#X connect 3 0 5 0; +#X connect 4 0 6 0; +#X connect 5 0 1 0; +#X connect 6 0 2 0; +#X connect 7 0 8 0; +#X connect 8 0 5 1; +#X connect 8 0 6 1; +#X restore 57 148 pd fx; +#X text 67 107 dummy connection to enforce ordering (to avoid delay) +, f 27; +#X obj 58 179 output~; +#X text 107 148 <= catch and process signals; +#X text 134 72 <= generate signals in parallel and join them, f 24 +; +#X connect 1 0 2 0; +#X connect 2 0 4 0; +#X connect 2 1 4 1; +#X restore 479 261 pd fork/join; +#X text 136 228 This is handy if you want to join parallel subpatches +for further processing. By using throw~/catch~ instead of outlet~ \, +you can even do this without any delay:, f 58; +#N canvas 182 294 450 300 nesting 0; +#X text 139 138 TODO nesting example; +#X restore 297 295 pd nesting; +#X text 36 295 "parallel" and "join" can be nested:; +#X text 40 326 NOTE: "parallel" only works if all DSP objects starting +from the nearest outer "join" point are "officially" thread-safe. You +can circumvent this check by starting Pd with -nothreadsafe., f 74 +; +#X text 138 142 NOTE: "parallel" canvases themselves cannot use reblocking +\, upsampling or overlap \, but subpatches/abstractions can!, f 60 +; +#X text 138 36 Process a canvas in parallel. The canvas will effectively +run asynchronously until it is joined by an outer canvas (see below) +- or at the end of the DSP tick.; +#X connect 1 0 0 0; +#X connect 2 0 1 0; +#X connect 3 0 4 0; +#X connect 4 0 0 0; +#X restore 149 606 pd parallel-dsp; +#X text 267 606 <= parallel DSP processing; +#X text 314 705 updated for Pd version 0.52-2; +#X connect 13 0 14 0; diff --git a/doc/5.reference/clone-help.pd b/doc/5.reference/clone-help.pd index ffed7b6bb9..bcca1a46f5 100644 --- a/doc/5.reference/clone-help.pd +++ b/doc/5.reference/clone-help.pd @@ -113,6 +113,11 @@ the sum of all instances' outputs \, and control outlets forward messages with the number of the instance prepended to them., f 95; #X obj 181 512 clone clone-abstraction 16; #X text 78 10 - make multiple copies of an abstraction.; +#X msg 40 466 parallel \$1; +#X obj 40 441 tgl 15 0 empty empty empty 17 7 0 10 #fcfcfc #000000 +#000000 0 1; +#X text 38 391 process copies in parallel to utilize more CPU cores. +, f 20; #X connect 0 0 1 0; #X connect 1 0 2 0; #X connect 1 1 21 1; @@ -134,3 +139,5 @@ with the number of the instance prepended to them., f 95; #X connect 33 0 9 0; #X connect 38 0 19 0; #X connect 38 0 19 1; +#X connect 40 0 38 0; +#X connect 41 0 40 0; diff --git a/doc/6.externs/dspobj~.c b/doc/6.externs/dspobj~.c index 92da4d3654..c5abb1c5d9 100644 --- a/doc/6.externs/dspobj~.c +++ b/doc/6.externs/dspobj~.c @@ -58,7 +58,7 @@ static void *dspobj_new(void) void dspobj_tilde_setup(void) { dspobj_class = class_new(gensym("dspobj~"), (t_newmethod)dspobj_new, 0, - sizeof(t_dspobj), 0, A_DEFFLOAT, 0); + sizeof(t_dspobj), CLASS_DEFAULT, A_DEFFLOAT, 0); /* this is magic to declare that the leftmost, "main" inlet takes signals; other signal inlets are done differently... */ CLASS_MAINSIGNALIN(dspobj_class, t_dspobj, x_f); diff --git a/extra/bob~/bob~.c b/extra/bob~/bob~.c index 0e99e39170..89609e766d 100644 --- a/extra/bob~/bob~.c +++ b/extra/bob~/bob~.c @@ -244,7 +244,7 @@ void bob_tilde_setup(void) { int i; bob_class = class_new(gensym("bob~"), - (t_newmethod)bob_new, 0, sizeof(t_bob), 0, 0); + (t_newmethod)bob_new, 0, sizeof(t_bob), CLASS_DEFAULT, 0); class_addmethod(bob_class, (t_method)bob_saturation, gensym("saturation"), A_FLOAT, 0); class_addmethod(bob_class, (t_method)bob_oversample, gensym("oversample"), diff --git a/extra/bonk~/bonk~.c b/extra/bonk~/bonk~.c index 30191a19ef..3b8e96dbb6 100644 --- a/extra/bonk~/bonk~.c +++ b/extra/bonk~/bonk~.c @@ -1355,7 +1355,7 @@ static void *bonk_new(t_symbol *s, int argc, t_atom *argv) void bonk_tilde_setup(void) { bonk_class = class_new(gensym("bonk~"), (t_newmethod)bonk_new, - (t_method)bonk_free, sizeof(t_bonk), 0, A_GIMME, 0); + (t_method)bonk_free, sizeof(t_bonk), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(bonk_class, nullfn, gensym("signal"), 0); class_addmethod(bonk_class, (t_method)bonk_dsp, gensym("dsp"), A_CANT, 0); class_addbang(bonk_class, bonk_bang); diff --git a/extra/fiddle~/fiddle~.c b/extra/fiddle~/fiddle~.c index 50016565a4..2fd585f73d 100644 --- a/extra/fiddle~/fiddle~.c +++ b/extra/fiddle~/fiddle~.c @@ -1498,7 +1498,7 @@ void *sigfiddle_new(t_floatarg npoints, t_floatarg npitch, void fiddle_tilde_setup(void) { sigfiddle_class = class_new(gensym("fiddle~"), (t_newmethod)sigfiddle_new, - (t_method)sigfiddle_ff, sizeof(t_sigfiddle), 0, + (t_method)sigfiddle_ff, sizeof(t_sigfiddle), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addmethod(sigfiddle_class, (t_method)sigfiddle_dsp, gensym("dsp"), 0); diff --git a/extra/loop~/loop~.c b/extra/loop~/loop~.c index 3528a56fb4..f9d97c4964 100644 --- a/extra/loop~/loop~.c +++ b/extra/loop~/loop~.c @@ -158,7 +158,7 @@ static void loop_bang(t_loop *x) void loop_tilde_setup(void) { loop_class = class_new(gensym("loop~"), (t_newmethod)loop_new, 0, - sizeof(t_loop), 0, 0); + sizeof(t_loop), CLASS_DEFAULT, 0); class_addmethod(loop_class, (t_method)loop_dsp, gensym("dsp"), A_CANT, 0); CLASS_MAINSIGNALIN(loop_class, t_loop, x_f); class_addmethod(loop_class, (t_method)loop_set, gensym("set"), diff --git a/extra/lrshift~/lrshift~.c b/extra/lrshift~/lrshift~.c index 377b43fb8a..61a6b9119b 100644 --- a/extra/lrshift~/lrshift~.c +++ b/extra/lrshift~/lrshift~.c @@ -68,9 +68,9 @@ static void *lrshift_tilde_new(t_floatarg f) void lrshift_tilde_setup(void) { lrshift_tilde_class = class_new(gensym("lrshift~"), - (t_newmethod)lrshift_tilde_new, 0, sizeof(t_lrshift_tilde), 0, - A_DEFFLOAT, 0); + (t_newmethod)lrshift_tilde_new, 0, sizeof(t_lrshift_tilde), + CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(lrshift_tilde_class, t_lrshift_tilde, x_f); class_addmethod(lrshift_tilde_class, (t_method)lrshift_tilde_dsp, - gensym("dsp"), 0); + gensym("dsp"), A_CANT, 0); } diff --git a/extra/pd~/pdsched.c b/extra/pd~/pdsched.c index 0bf913dcc7..25849d6216 100644 --- a/extra/pd~/pdsched.c +++ b/extra/pd~/pdsched.c @@ -115,6 +115,7 @@ int pd_extern_sched(char *flags) /* fprintf(stderr, "Pd plug-in scheduler called, chans %d %d, sr %d\n", chin, chout, (int)rate); */ sys_setchsr(chin, chout, as.a_srate); + sys_dspthreadpool_start(&as.a_numthreads, 0); while (useascii ? readasciimessage(b) : readbinmessage(b) ) { t_atom *ap = binbuf_getvec(b); @@ -163,5 +164,6 @@ int pd_extern_sched(char *flags) } } binbuf_free(b); + sys_dspthreadpool_stop(0); return (0); } diff --git a/extra/pd~/pd~.c b/extra/pd~/pd~.c index 84b613af69..071d9c8c05 100644 --- a/extra/pd~/pd~.c +++ b/extra/pd~/pd~.c @@ -1179,7 +1179,7 @@ static void *pd_tilde_new(t_symbol *s, int argc, t_atom *argv) void pd_tilde_setup(void) { pd_tilde_class = class_new(gensym("pd~"), (t_newmethod)pd_tilde_new, - (t_method)pd_tilde_free, sizeof(t_pd_tilde), 0, A_GIMME, 0); + (t_method)pd_tilde_free, sizeof(t_pd_tilde), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(pd_tilde_class, nullfn, gensym("signal"), 0); class_addmethod(pd_tilde_class, (t_method)pd_tilde_dsp, gensym("dsp"), A_CANT, 0); diff --git a/extra/sigmund~/sigmund~.c b/extra/sigmund~/sigmund~.c index 89994eaf6c..3a1ca1316a 100644 --- a/extra/sigmund~/sigmund~.c +++ b/extra/sigmund~/sigmund~.c @@ -1418,7 +1418,7 @@ static void sigmund_printnext(t_sigmund *x, t_float f) void sigmund_tilde_setup(void) { sigmund_class = class_new(gensym("sigmund~"), (t_newmethod)sigmund_new, - (t_method)sigmund_free, sizeof(t_sigmund), 0, A_GIMME, 0); + (t_method)sigmund_free, sizeof(t_sigmund), CLASS_DEFAULT, A_GIMME, 0); class_addlist(sigmund_class, sigmund_list); class_addmethod(sigmund_class, (t_method)sigmund_dsp, gensym("dsp"), A_CANT, 0); diff --git a/libpd/Makefile b/libpd/Makefile index 37bddb22cf..657f1427a3 100644 --- a/libpd/Makefile +++ b/libpd/Makefile @@ -9,6 +9,8 @@ LIBPD_IMPLIB = LIBPD_DEF = PLATFORM_ARCH ?= $(shell $(CC) -dumpmachine | sed -e 's,-.*,,') +PARALLEL=true + ifeq ($(UNAME), Darwin) # Mac SOLIB_EXT = dylib PLATFORM_CFLAGS = -DHAVE_LIBDL @@ -62,6 +64,10 @@ VPATH = ../src:\ CPPFLAGS = -I../src -DPD -DHAVE_UNISTD_H -DUSEAPI_DUMMY -DLIBPD_EXTRA \ -DPDINSTANCE +ifeq ($(PARALLEL), true) +CPPFLAGS += -DPD_DSPTHREADS=1 -DPD_PARALLEL=1 +endif + # code generation flags (e.g., optimization). CODECFLAGS = -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -O3 @@ -81,7 +87,7 @@ PDSRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ g_editor_extras.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ - s_main.c s_inter.c s_print.c \ + s_main.c s_inter.c s_print.c s_sync.c \ s_loader.c s_path.c s_entry.c s_audio.c s_midi.c s_net.c s_utf8.c \ s_audio_paring.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ diff --git a/src/Makefile.am b/src/Makefile.am index 0780c4c859..30020e6063 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -103,6 +103,7 @@ pd_SOURCES_core = \ d_ctl.c \ d_dac.c \ d_delay.c \ + d_threadpool.c \ d_fft.c \ d_filter.c \ d_global.c \ @@ -156,6 +157,7 @@ pd_SOURCES_core = \ s_net.c \ s_path.c \ s_print.c \ + s_sync.c \ s_utf8.c \ x_acoustics.c \ x_arithmetic.c \ @@ -186,11 +188,11 @@ libpdbindir = $(pkglibdir)/bin # these install to ${includedir}/pd pkginclude_HEADERS = m_pd.h m_imp.h g_canvas.h g_undo.h g_all_guis.h s_stuff.h \ - s_net.h x_vexp.h + s_net.h s_spinlock.h x_vexp.h # compatibility: m_pd.h also goes into ${includedir}/ include_HEADERS = m_pd.h -noinst_HEADERS = s_audio_alsa.h s_audio_paring.h s_utf8.h +noinst_HEADERS = s_audio_alsa.h s_audio_paring.h s_sync.h s_utf8.h noinst_HEADERS += z_hooks.h z_ringbuffer.h x_libpdreceive.h if LIBPD diff --git a/src/d_arithmetic.c b/src/d_arithmetic.c index 78b573ebc0..6857196fef 100644 --- a/src/d_arithmetic.c +++ b/src/d_arithmetic.c @@ -93,12 +93,12 @@ static void scalarplus_dsp(t_scalarplus *x, t_signal **sp) static void plus_setup(void) { plus_class = class_new(gensym("+~"), (t_newmethod)plus_new, 0, - sizeof(t_plus), 0, A_GIMME, 0); + sizeof(t_plus), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(plus_class, (t_method)plus_dsp, gensym("dsp"), A_CANT, 0); CLASS_MAINSIGNALIN(plus_class, t_plus, x_f); class_sethelpsymbol(plus_class, gensym("binops-tilde")); scalarplus_class = class_new(gensym("+~"), 0, 0, - sizeof(t_scalarplus), 0, 0); + sizeof(t_scalarplus), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarplus_class, t_scalarplus, x_f); class_addmethod(scalarplus_class, (t_method)scalarplus_dsp, gensym("dsp"), A_CANT, 0); @@ -223,12 +223,12 @@ static void scalarminus_dsp(t_scalarminus *x, t_signal **sp) static void minus_setup(void) { minus_class = class_new(gensym("-~"), (t_newmethod)minus_new, 0, - sizeof(t_minus), 0, A_GIMME, 0); + sizeof(t_minus), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(minus_class, t_minus, x_f); class_addmethod(minus_class, (t_method)minus_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(minus_class, gensym("sigbinops")); scalarminus_class = class_new(gensym("-~"), 0, 0, - sizeof(t_scalarminus), 0, 0); + sizeof(t_scalarminus), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarminus_class, t_scalarminus, x_f); class_addmethod(scalarminus_class, (t_method)scalarminus_dsp, gensym("dsp"), A_CANT, 0); @@ -354,12 +354,12 @@ static void scalartimes_dsp(t_scalartimes *x, t_signal **sp) static void times_setup(void) { times_class = class_new(gensym("*~"), (t_newmethod)times_new, 0, - sizeof(t_times), 0, A_GIMME, 0); + sizeof(t_times), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(times_class, t_times, x_f); class_addmethod(times_class, (t_method)times_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(times_class, gensym("sigbinops")); scalartimes_class = class_new(gensym("*~"), 0, 0, - sizeof(t_scalartimes), 0, 0); + sizeof(t_scalartimes), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalartimes_class, t_scalartimes, x_f); class_addmethod(scalartimes_class, (t_method)scalartimes_dsp, gensym("dsp"), A_CANT, 0); @@ -496,12 +496,12 @@ static void scalarover_dsp(t_scalarover *x, t_signal **sp) static void over_setup(void) { over_class = class_new(gensym("/~"), (t_newmethod)over_new, 0, - sizeof(t_over), 0, A_GIMME, 0); + sizeof(t_over), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(over_class, t_over, x_f); class_addmethod(over_class, (t_method)over_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(over_class, gensym("sigbinops")); scalarover_class = class_new(gensym("/~"), 0, 0, - sizeof(t_scalarover), 0, 0); + sizeof(t_scalarover), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarover_class, t_scalarover, x_f); class_addmethod(scalarover_class, (t_method)scalarover_dsp, gensym("dsp"), A_CANT, 0); @@ -638,12 +638,12 @@ static void scalarmax_dsp(t_scalarmax *x, t_signal **sp) static void max_setup(void) { max_class = class_new(gensym("max~"), (t_newmethod)max_new, 0, - sizeof(t_max), 0, A_GIMME, 0); + sizeof(t_max), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(max_class, t_max, x_f); class_addmethod(max_class, (t_method)max_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(max_class, gensym("sigbinops")); scalarmax_class = class_new(gensym("max~"), 0, 0, - sizeof(t_scalarmax), 0, 0); + sizeof(t_scalarmax), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarmax_class, t_scalarmax, x_f); class_addmethod(scalarmax_class, (t_method)scalarmax_dsp, gensym("dsp"), A_CANT, 0); @@ -780,12 +780,12 @@ static void scalarmin_dsp(t_scalarmin *x, t_signal **sp) static void min_setup(void) { min_class = class_new(gensym("min~"), (t_newmethod)min_new, 0, - sizeof(t_min), 0, A_GIMME, 0); + sizeof(t_min), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(min_class, t_min, x_f); class_addmethod(min_class, (t_method)min_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(min_class, gensym("sigbinops")); scalarmin_class = class_new(gensym("min~"), 0, 0, - sizeof(t_scalarmin), 0, 0); + sizeof(t_scalarmin), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(scalarmin_class, t_scalarmin, x_f); class_addmethod(scalarmin_class, (t_method)scalarmin_dsp, gensym("dsp"), A_CANT, 0); diff --git a/src/d_array.c b/src/d_array.c index 9c18620d67..68bb86e8ce 100644 --- a/src/d_array.c +++ b/src/d_array.c @@ -17,80 +17,74 @@ typedef struct _tabwrite_tilde { t_object x_obj; int x_phase; - int x_nsampsintab; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; + t_clock *x_clock; t_float x_f; } t_tabwrite_tilde; -static void tabwrite_tilde_tick(t_tabwrite_tilde *x); +static void tabwrite_tilde_redraw(t_tabwrite_tilde *x) +{ + if (garrayref_check(&x->x_ref)) + garray_redraw(x->x_ref.ar_garray); +} static void *tabwrite_tilde_new(t_symbol *s) { t_tabwrite_tilde *x = (t_tabwrite_tilde *)pd_new(tabwrite_tilde_class); x->x_phase = 0x7fffffff; x->x_arrayname = s; + garrayref_init(&x->x_ref); + x->x_clock = clock_new(x, (t_method)tabwrite_tilde_redraw); x->x_f = 0; return (x); } -static void tabwrite_tilde_redraw(t_tabwrite_tilde *x) +static void tabwrite_tilde_free(t_tabwrite_tilde *x) { - t_garray *a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class); - if (!a) - bug("tabwrite_tilde_redraw"); - else garray_redraw(a); + garrayref_unset(&x->x_ref); + clock_free(x->x_clock); } static t_int *tabwrite_tilde_perform(t_int *w) { t_tabwrite_tilde *x = (t_tabwrite_tilde *)(w[1]); t_sample *in = (t_sample *)(w[2]); - int n = (int)(w[3]), phase = x->x_phase, endphase = x->x_nsampsintab; - if (!x->x_vec) goto bad; - - if (endphase > phase) + t_word *vec; + int n = (int)(w[3]), phase = x->x_phase, endphase; + if (garrayref_write_lock(&x->x_ref, &endphase, &vec)) { - int nxfer = endphase - phase; - t_word *wp = x->x_vec + phase; - if (nxfer > n) nxfer = n; - phase += nxfer; - while (nxfer--) + if (endphase > phase) { - t_sample f = *in++; - if (PD_BIGORSMALL(f)) - f = 0; - (wp++)->w_float = f; - } - if (phase >= endphase) - { - tabwrite_tilde_redraw(x); - phase = 0x7fffffff; + int nxfer = endphase - phase; + t_word *wp = vec + phase; + if (nxfer > n) nxfer = n; + phase += nxfer; + while (nxfer--) + { + t_sample f = *in++; + if (PD_BIGORSMALL(f)) + f = 0; + (wp++)->w_float = f; + } + if (phase >= endphase) + { + clock_delay(x->x_clock, 0); + phase = 0x7fffffff; + } + x->x_phase = phase; } - x->x_phase = phase; + else x->x_phase = 0x7fffffff; + + garrayref_write_unlock(&x->x_ref); } - else x->x_phase = 0x7fffffff; -bad: return (w+4); } static void tabwrite_tilde_set(t_tabwrite_tilde *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) pd_error(x, "tabwrite~: %s: no such array", - x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_nsampsintab, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabwrite~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabwrite_tilde_dsp(t_tabwrite_tilde *x, t_signal **sp) @@ -121,8 +115,8 @@ static void tabwrite_tilde_stop(t_tabwrite_tilde *x) static void tabwrite_tilde_setup(void) { tabwrite_tilde_class = class_new(gensym("tabwrite~"), - (t_newmethod)tabwrite_tilde_new, 0, - sizeof(t_tabwrite_tilde), 0, A_DEFSYM, 0); + (t_newmethod)tabwrite_tilde_new, (t_method)tabwrite_tilde_free, + sizeof(t_tabwrite_tilde), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabwrite_tilde_class, t_tabwrite_tilde, x_f); class_addmethod(tabwrite_tilde_class, (t_method)tabwrite_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -144,9 +138,8 @@ typedef struct _tabplay_tilde t_object x_obj; t_outlet *x_bangout; int x_phase; - int x_nsampsintab; int x_limit; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; t_clock *x_clock; } t_tabplay_tilde; @@ -159,6 +152,7 @@ static void *tabplay_tilde_new(t_symbol *s) x->x_clock = clock_new(x, (t_method)tabplay_tilde_tick); x->x_phase = 0x7fffffff; x->x_limit = 0; + garrayref_init(&x->x_ref); x->x_arrayname = s; outlet_new(&x->x_obj, &s_signal); x->x_bangout = outlet_new(&x->x_obj, &s_bang); @@ -169,15 +163,19 @@ static t_int *tabplay_tilde_perform(t_int *w) { t_tabplay_tilde *x = (t_tabplay_tilde *)(w[1]); t_sample *out = (t_sample *)(w[2]); - t_word *wp; - int n = (int)(w[3]), phase = x->x_phase, - endphase = (x->x_nsampsintab < x->x_limit ? - x->x_nsampsintab : x->x_limit), nxfer, n3; - if (!x->x_vec || phase >= endphase) + t_word *vec, *wp; + int n = (int)(w[3]), phase = x->x_phase, endphase, npoints, nxfer, n3; + if (!garrayref_read_lock(&x->x_ref, &npoints, &vec)) + goto zero; + endphase = npoints < x->x_limit ? npoints : x->x_limit; + if (phase >= endphase) + { + garrayref_read_unlock(&x->x_ref); /* ! */ goto zero; + } nxfer = endphase - phase; - wp = x->x_vec + phase; + wp = vec + phase; if (nxfer > n) nxfer = n; n3 = n - nxfer; @@ -193,6 +191,7 @@ static t_int *tabplay_tilde_perform(t_int *w) } else x->x_phase = phase; + garrayref_read_unlock(&x->x_ref); return (w+4); zero: while (n--) *out++ = 0; @@ -201,21 +200,8 @@ static t_int *tabplay_tilde_perform(t_int *w) static void tabplay_tilde_set(t_tabplay_tilde *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) pd_error(x, "tabplay~: %s: no such array", - x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_nsampsintab, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabplay~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, x->x_arrayname, &x->x_obj); } static void tabplay_tilde_dsp(t_tabplay_tilde *x, t_signal **sp) @@ -249,6 +235,7 @@ static void tabplay_tilde_tick(t_tabplay_tilde *x) static void tabplay_tilde_free(t_tabplay_tilde *x) { + garrayref_unset(&x->x_ref); clock_free(x->x_clock); } @@ -256,7 +243,7 @@ static void tabplay_tilde_setup(void) { tabplay_tilde_class = class_new(gensym("tabplay~"), (t_newmethod)tabplay_tilde_new, (t_method)tabplay_tilde_free, - sizeof(t_tabplay_tilde), 0, A_DEFSYM, 0); + sizeof(t_tabplay_tilde), CLASS_DEFAULT, A_DEFSYM, 0); class_addmethod(tabplay_tilde_class, (t_method)tabplay_tilde_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(tabplay_tilde_class, (t_method)tabplay_tilde_stop, @@ -273,8 +260,7 @@ static t_class *tabread_tilde_class; typedef struct _tabread_tilde { t_object x_obj; - int x_npoints; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; t_float x_f; } t_tabread_tilde; @@ -283,7 +269,7 @@ static void *tabread_tilde_new(t_symbol *s) { t_tabread_tilde *x = (t_tabread_tilde *)pd_new(tabread_tilde_class); x->x_arrayname = s; - x->x_vec = 0; + garrayref_init(&x->x_ref); outlet_new(&x->x_obj, gensym("signal")); x->x_f = 0; return (x); @@ -295,13 +281,16 @@ static t_int *tabread_tilde_perform(t_int *w) t_sample *in = (t_sample *)(w[2]); t_sample *out = (t_sample *)(w[3]); int n = (int)(w[4]); - int maxindex; - t_word *buf = x->x_vec; - int i; - - maxindex = x->x_npoints - 1; - if(maxindex<0) goto zero; - if (!buf) goto zero; + int maxindex, i, npoints; + t_word *vec; + if (!garrayref_read_lock(&x->x_ref, &npoints, &vec)) + goto zero; + maxindex = npoints - 1; + if (maxindex < 0) + { + garrayref_read_unlock(&x->x_ref); /* ! */ + goto zero; + } for (i = 0; i < n; i++) { @@ -310,8 +299,10 @@ static t_int *tabread_tilde_perform(t_int *w) index = 0; else if (index > maxindex) index = maxindex; - *out++ = buf[index].w_float; + *out++ = vec[index].w_float; } + + garrayref_read_unlock(&x->x_ref); return (w+5); zero: while (n--) *out++ = 0; @@ -321,21 +312,8 @@ static t_int *tabread_tilde_perform(t_int *w) static void tabread_tilde_set(t_tabread_tilde *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabread~: %s: no such array", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_npoints, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabread~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabread_tilde_dsp(t_tabread_tilde *x, t_signal **sp) @@ -349,13 +327,14 @@ static void tabread_tilde_dsp(t_tabread_tilde *x, t_signal **sp) static void tabread_tilde_free(t_tabread_tilde *x) { + garrayref_unset(&x->x_ref); } static void tabread_tilde_setup(void) { tabread_tilde_class = class_new(gensym("tabread~"), (t_newmethod)tabread_tilde_new, (t_method)tabread_tilde_free, - sizeof(t_tabread_tilde), 0, A_DEFSYM, 0); + sizeof(t_tabread_tilde), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabread_tilde_class, t_tabread_tilde, x_f); class_addmethod(tabread_tilde_class, (t_method)tabread_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -370,8 +349,7 @@ static t_class *tabread4_tilde_class; typedef struct _tabread4_tilde { t_object x_obj; - int x_npoints; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; t_float x_f; t_float x_onset; @@ -381,7 +359,7 @@ static void *tabread4_tilde_new(t_symbol *s) { t_tabread4_tilde *x = (t_tabread4_tilde *)pd_new(tabread4_tilde_class); x->x_arrayname = s; - x->x_vec = 0; + garrayref_init(&x->x_ref); outlet_new(&x->x_obj, gensym("signal")); floatinlet_new(&x->x_obj, &x->x_onset); x->x_f = 0; @@ -394,16 +372,17 @@ static t_int *tabread4_tilde_perform(t_int *w) t_tabread4_tilde *x = (t_tabread4_tilde *)(w[1]); t_sample *in = (t_sample *)(w[2]); t_sample *out = (t_sample *)(w[3]); - int n = (int)(w[4]); - int maxindex; - t_word *buf = x->x_vec, *wp; + int n = (int)(w[4]), maxindex, npoints, i; double onset = x->x_onset; - int i; - - maxindex = x->x_npoints - 3; - if(maxindex<0) goto zero; - - if (!buf) goto zero; + t_word *vec, *wp; + if (!garrayref_read_lock(&x->x_ref, &npoints, &vec)) + goto zero; + maxindex = npoints - 3; + if (maxindex < 0) + { + garrayref_read_unlock(&x->x_ref); /* ! */ + goto zero; + } #if 0 /* test for spam -- I'm not ready to deal with this */ for (i = 0, xmax = 0, xmin = maxindex, fp = in1; i < n; i++, fp++) @@ -431,7 +410,7 @@ static t_int *tabread4_tilde_perform(t_int *w) else if (index > maxindex) index = maxindex, frac = 1; else frac = findex - index; - wp = buf + index; + wp = vec + index; a = wp[-1].w_float; b = wp[0].w_float; c = wp[1].w_float; @@ -443,6 +422,8 @@ static t_int *tabread4_tilde_perform(t_int *w) ) ); } + + garrayref_read_unlock(&x->x_ref); return (w+5); zero: while (n--) *out++ = 0; @@ -452,21 +433,8 @@ static t_int *tabread4_tilde_perform(t_int *w) static void tabread4_tilde_set(t_tabread4_tilde *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabread4~: %s: no such array", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_npoints, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabread4~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabread4_tilde_dsp(t_tabread4_tilde *x, t_signal **sp) @@ -480,13 +448,14 @@ static void tabread4_tilde_dsp(t_tabread4_tilde *x, t_signal **sp) static void tabread4_tilde_free(t_tabread4_tilde *x) { + garrayref_unset(&x->x_ref); } static void tabread4_tilde_setup(void) { tabread4_tilde_class = class_new(gensym("tabread4~"), (t_newmethod)tabread4_tilde_new, (t_method)tabread4_tilde_free, - sizeof(t_tabread4_tilde), 0, A_DEFSYM, 0); + sizeof(t_tabread4_tilde), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabread4_tilde_class, t_tabread4_tilde, x_f); class_addmethod(tabread4_tilde_class, (t_method)tabread4_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -544,18 +513,20 @@ typedef struct _tabosc4_tilde t_object x_obj; t_float x_fnpoints; t_float x_finvnpoints; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; t_float x_f; - double x_phase; t_float x_conv; + double x_phase; + int x_lasttabsize; } t_tabosc4_tilde; static void *tabosc4_tilde_new(t_symbol *s) { t_tabosc4_tilde *x = (t_tabosc4_tilde *)pd_new(tabosc4_tilde_class); x->x_arrayname = s; - x->x_vec = 0; + garrayref_init(&x->x_ref); + x->x_lasttabsize = 0; x->x_fnpoints = 512.; x->x_finvnpoints = (1./512.); outlet_new(&x->x_obj, gensym("signal")); @@ -564,21 +535,52 @@ static void *tabosc4_tilde_new(t_symbol *s) return (x); } +static void tabosc4_tilde_free(t_tabosc4_tilde *x) +{ + garrayref_unset(&x->x_ref); +} + static t_int *tabosc4_tilde_perform(t_int *w) { t_tabosc4_tilde *x = (t_tabosc4_tilde *)(w[1]); t_sample *in = (t_sample *)(w[2]); t_sample *out = (t_sample *)(w[3]); - int n = (int)(w[4]); - int normhipart; + int n = (int)(w[4]), tabsize, mask, normhipart; union tabfudge tf; - t_float fnpoints = x->x_fnpoints; - int mask = fnpoints - 1; - t_float conv = fnpoints * x->x_conv; - t_word *tab = x->x_vec, *addr; - double dphase = fnpoints * x->x_phase + UNITBIT32; + t_word *vec, *addr; + t_float fnpoints, conv; + double dphase; + if (!garrayref_read_lock(&x->x_ref, &tabsize, &vec)) + goto zero; + + if (tabsize != x->x_lasttabsize) + { + /* check table size */ + int npoints = tabsize - 3; + if (npoints == (1 << ilog2(npoints))) + { + x->x_fnpoints = npoints; + x->x_finvnpoints = 1./npoints; + } + else + { + pd_error(x, "tabosc4~: %s: number of points (%d) not a power of 2 plus three", + x->x_arrayname->s_name, tabsize); + x->x_fnpoints = -1; /* sentinel */ + } + x->x_lasttabsize = tabsize; + } + + if (x->x_fnpoints < 0) /* bad size */ + { + garrayref_read_unlock(&x->x_ref); /* ! */ + goto zero; + } + fnpoints = x->x_fnpoints; + mask = fnpoints - 1; + conv = fnpoints * x->x_conv; + dphase = fnpoints * x->x_phase + UNITBIT32; - if (!tab) goto zero; tf.tf_d = UNITBIT32; normhipart = tf.tf_i[HIOFFSET]; @@ -588,7 +590,7 @@ static t_int *tabosc4_tilde_perform(t_int *w) t_sample frac, a, b, c, d, cminusb; tf.tf_d = dphase; dphase += *in++ * conv; - addr = tab + (tf.tf_i[HIOFFSET] & mask); + addr = vec + (tf.tf_i[HIOFFSET] & mask); tf.tf_i[HIOFFSET] = normhipart; frac = tf.tf_d - UNITBIT32; a = addr[0].w_float; @@ -609,6 +611,8 @@ static t_int *tabosc4_tilde_perform(t_int *w) tf.tf_d = dphase + (UNITBIT32 * fnpoints - UNITBIT32); tf.tf_i[HIOFFSET] = normhipart; x->x_phase = (tf.tf_d - UNITBIT32 * fnpoints) * x->x_finvnpoints; + + garrayref_read_unlock(&x->x_ref); return (w+5); zero: while (n--) *out++ = 0; @@ -618,34 +622,8 @@ static t_int *tabosc4_tilde_perform(t_int *w) static void tabosc4_tilde_set(t_tabosc4_tilde *x, t_symbol *s) { - t_garray *a; - int npoints, pointsinarray; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabosc4~: %s: no such array", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &pointsinarray, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabosc4~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if ((npoints = pointsinarray - 3) != (1 << ilog2(pointsinarray - 3))) - { - pd_error(x, "%s: number of points (%d) not a power of 2 plus three", - x->x_arrayname->s_name, pointsinarray); - x->x_vec = 0; - garray_usedindsp(a); - } - else - { - x->x_fnpoints = npoints; - x->x_finvnpoints = 1./npoints; - garray_usedindsp(a); - } + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabosc4_tilde_ft1(t_tabosc4_tilde *x, t_float f) @@ -665,8 +643,8 @@ static void tabosc4_tilde_dsp(t_tabosc4_tilde *x, t_signal **sp) static void tabosc4_tilde_setup(void) { tabosc4_tilde_class = class_new(gensym("tabosc4~"), - (t_newmethod)tabosc4_tilde_new, 0, - sizeof(t_tabosc4_tilde), 0, A_DEFSYM, 0); + (t_newmethod)tabosc4_tilde_new, (t_method)tabosc4_tilde_free, + sizeof(t_tabosc4_tilde), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabosc4_tilde_class, t_tabosc4_tilde, x_f); class_addmethod(tabosc4_tilde_class, (t_method)tabosc4_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -683,72 +661,68 @@ static t_class *tabsend_class; typedef struct _tabsend { t_object x_obj; - t_word *x_vec; + t_garrayref x_ref; int x_graphperiod; int x_graphcount; t_symbol *x_arrayname; + t_clock *x_clock; t_float x_f; - int x_npoints; } t_tabsend; -static void tabsend_tick(t_tabsend *x); +static void tabsend_tick(t_tabsend *x) +{ + if (garrayref_check(&x->x_ref)) + garray_redraw(x->x_ref.ar_garray); +} static void *tabsend_new(t_symbol *s) { t_tabsend *x = (t_tabsend *)pd_new(tabsend_class); + garrayref_init(&x->x_ref); x->x_graphcount = 0; x->x_arrayname = s; + x->x_clock = clock_new(x, (t_method)tabsend_tick); x->x_f = 0; return (x); } +static void tabsend_free(t_tabsend *x) +{ + garrayref_unset(&x->x_ref); + clock_free(x->x_clock); +} + static t_int *tabsend_perform(t_int *w) { t_tabsend *x = (t_tabsend *)(w[1]); t_sample *in = (t_sample *)(w[2]); - int n = (int)w[3]; - t_word *dest = x->x_vec; - int i = x->x_graphcount; - if (!x->x_vec) goto bad; - if (n > x->x_npoints) - n = x->x_npoints; - while (n--) - { - t_sample f = *in++; - if (PD_BIGORSMALL(f)) - f = 0; - (dest++)->w_float = f; - } - if (!i--) + int n = (int)w[3], npoints; + t_word *dest; + if (garrayref_write_lock(&x->x_ref, &npoints, &dest)) { - t_garray *a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class); - if (!a) - bug("tabsend_dsp"); - else garray_redraw(a); - i = x->x_graphperiod; + if (n > npoints) + n = npoints; + while (n--) + { + t_sample f = *in++; + if (PD_BIGORSMALL(f)) + f = 0; + (dest++)->w_float = f; + } + if (!x->x_graphcount--) + { + clock_delay(x->x_clock, 0); + x->x_graphcount = x->x_graphperiod; + } + garrayref_write_unlock(&x->x_ref); } - x->x_graphcount = i; -bad: return (w+4); } static void tabsend_set(t_tabsend *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabsend~: %s: no such array", x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_npoints, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabsend~", x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabsend_dsp(t_tabsend *x, t_signal **sp) @@ -765,7 +739,7 @@ static void tabsend_dsp(t_tabsend *x, t_signal **sp) static void tabsend_setup(void) { tabsend_class = class_new(gensym("tabsend~"), (t_newmethod)tabsend_new, - 0, sizeof(t_tabsend), 0, A_DEFSYM, 0); + (t_method)tabsend_free, sizeof(t_tabsend), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(tabsend_class, t_tabsend, x_f); class_addmethod(tabsend_class, (t_method)tabsend_dsp, gensym("dsp"), A_CANT, 0); @@ -781,52 +755,38 @@ static t_class *tabreceive_class; typedef struct _tabreceive { t_object x_obj; - t_word *x_vec; + t_garrayref x_ref; t_symbol *x_arrayname; - int x_npoints; } t_tabreceive; static t_int *tabreceive_perform(t_int *w) { t_tabreceive *x = (t_tabreceive *)(w[1]); t_sample *out = (t_sample *)(w[2]); - int n = (int)w[3]; - t_word *from = x->x_vec; - if (from) + int n = (int)w[3], npoints; + t_word *from; + if (garrayref_read_lock(&x->x_ref, &npoints, &from)) { - t_int vecsize = x->x_npoints; + int vecsize = npoints; if (vecsize > n) vecsize = n; while (vecsize--) *out++ = (from++)->w_float; - vecsize = n - x->x_npoints; + vecsize = n - npoints; if (vecsize > 0) while (vecsize--) *out++ = 0; + garrayref_read_unlock(&x->x_ref); } - else while (n--) *out++ = 0; + else + while (n--) *out++ = 0; return (w+4); } static void tabreceive_set(t_tabreceive *x, t_symbol *s) { - t_garray *a; - x->x_arrayname = s; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - { - if (*s->s_name) - pd_error(x, "tabreceive~: %s: no such array", - x->x_arrayname->s_name); - x->x_vec = 0; - } - else if (!garray_getfloatwords(a, &x->x_npoints, &x->x_vec)) - { - pd_error(x, "%s: bad template for tabreceive~", - x->x_arrayname->s_name); - x->x_vec = 0; - } - else garray_usedindsp(a); + garrayref_set(&x->x_ref, s, &x->x_obj); } static void tabreceive_dsp(t_tabreceive *x, t_signal **sp) @@ -838,16 +798,22 @@ static void tabreceive_dsp(t_tabreceive *x, t_signal **sp) static void *tabreceive_new(t_symbol *s) { t_tabreceive *x = (t_tabreceive *)pd_new(tabreceive_class); + garrayref_init(&x->x_ref); x->x_arrayname = s; outlet_new(&x->x_obj, &s_signal); return (x); } +static void tabreceive_free(t_tabreceive *x) +{ + garrayref_unset(&x->x_ref); +} + static void tabreceive_setup(void) { tabreceive_class = class_new(gensym("tabreceive~"), - (t_newmethod)tabreceive_new, 0, - sizeof(t_tabreceive), 0, A_DEFSYM, 0); + (t_newmethod)tabreceive_new, (t_method)tabreceive_free, + sizeof(t_tabreceive), CLASS_DEFAULT, A_DEFSYM, 0); class_addmethod(tabreceive_class, (t_method)tabreceive_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(tabreceive_class, (t_method)tabreceive_set, @@ -862,20 +828,15 @@ static t_class *tabread_class; typedef struct _tabread { t_object x_obj; + t_garrayref x_ref; t_symbol *x_arrayname; } t_tabread; static void tabread_float(t_tabread *x, t_float f) { - t_garray *a; int npoints; t_word *vec; - - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - pd_error(x, "%s: no such array", x->x_arrayname->s_name); - else if (!garray_getfloatwords(a, &npoints, &vec)) - pd_error(x, "%s: bad template for tabread", x->x_arrayname->s_name); - else + if (garrayref_get(&x->x_ref, &npoints, &vec, x->x_arrayname, &x->x_obj)) { int n = f; if (n < 0) n = 0; @@ -887,20 +848,27 @@ static void tabread_float(t_tabread *x, t_float f) static void tabread_set(t_tabread *x, t_symbol *s) { x->x_arrayname = s; + garrayref_unset(&x->x_ref); /* reset */ } static void *tabread_new(t_symbol *s) { t_tabread *x = (t_tabread *)pd_new(tabread_class); x->x_arrayname = s; + garrayref_init(&x->x_ref); outlet_new(&x->x_obj, &s_float); return (x); } +static void tabread_free(t_tabread *x) +{ + garrayref_unset(&x->x_ref); +} + static void tabread_setup(void) { tabread_class = class_new(gensym("tabread"), (t_newmethod)tabread_new, - 0, sizeof(t_tabread), 0, A_DEFSYM, 0); + (t_method)tabread_free, sizeof(t_tabread), 0, A_DEFSYM, 0); class_addfloat(tabread_class, (t_method)tabread_float); class_addmethod(tabread_class, (t_method)tabread_set, gensym("set"), A_SYMBOL, 0); @@ -913,20 +881,18 @@ static t_class *tabread4_class; typedef struct _tabread4 { t_object x_obj; + t_garrayref x_ref; t_symbol *x_arrayname; } t_tabread4; static void tabread4_float(t_tabread4 *x, t_float f) { - t_garray *a; int npoints; t_word *vec; + if (!garrayref_get(&x->x_ref, &npoints, &vec, x->x_arrayname, &x->x_obj)) + return; - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - pd_error(x, "%s: no such array", x->x_arrayname->s_name); - else if (!garray_getfloatwords(a, &npoints, &vec)) - pd_error(x, "%s: bad template for tabread4", x->x_arrayname->s_name); - else if (npoints < 4) + if (npoints < 4) outlet_float(x->x_obj.ob_outlet, 0); else if (f <= 1) outlet_float(x->x_obj.ob_outlet, vec[1].w_float); @@ -955,20 +921,27 @@ static void tabread4_float(t_tabread4 *x, t_float f) static void tabread4_set(t_tabread4 *x, t_symbol *s) { x->x_arrayname = s; + garrayref_unset(&x->x_ref); /* reset */ } static void *tabread4_new(t_symbol *s) { t_tabread4 *x = (t_tabread4 *)pd_new(tabread4_class); x->x_arrayname = s; + garrayref_init(&x->x_ref); outlet_new(&x->x_obj, &s_float); return (x); } +static void tabread4_free(t_tabread4 *x) +{ + garrayref_unset(&x->x_ref); +} + static void tabread4_setup(void) { tabread4_class = class_new(gensym("tabread4"), (t_newmethod)tabread4_new, - 0, sizeof(t_tabread4), 0, A_DEFSYM, 0); + (t_method)tabread4_free, sizeof(t_tabread4), 0, A_DEFSYM, 0); class_addfloat(tabread4_class, (t_method)tabread4_float); class_addmethod(tabread4_class, (t_method)tabread4_set, gensym("set"), A_SYMBOL, 0); @@ -981,6 +954,7 @@ static t_class *tabwrite_class; typedef struct _tabwrite { t_object x_obj; + t_garrayref x_ref; t_symbol *x_arrayname; t_float x_ft1; } t_tabwrite; @@ -988,14 +962,9 @@ typedef struct _tabwrite static void tabwrite_float(t_tabwrite *x, t_float f) { int vecsize; - t_garray *a; t_word *vec; - - if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class))) - pd_error(x, "%s: no such array", x->x_arrayname->s_name); - else if (!garray_getfloatwords(a, &vecsize, &vec)) - pd_error(x, "%s: bad template for tabwrite", x->x_arrayname->s_name); - else + if (garrayref_get(&x->x_ref, + &vecsize, &vec, x->x_arrayname, &x->x_obj)) { int n = x->x_ft1; if (n < 0) @@ -1003,13 +972,14 @@ static void tabwrite_float(t_tabwrite *x, t_float f) else if (n >= vecsize) n = vecsize-1; vec[n].w_float = f; - garray_redraw(a); + garray_redraw(x->x_ref.ar_garray); } } static void tabwrite_set(t_tabwrite *x, t_symbol *s) { x->x_arrayname = s; + garrayref_unset(&x->x_ref); /* reset */ } static void *tabwrite_new(t_symbol *s) @@ -1017,14 +987,20 @@ static void *tabwrite_new(t_symbol *s) t_tabwrite *x = (t_tabwrite *)pd_new(tabwrite_class); x->x_ft1 = 0; x->x_arrayname = s; + garrayref_init(&x->x_ref); floatinlet_new(&x->x_obj, &x->x_ft1); return (x); } +static void tabwrite_free(t_tabwrite *x) +{ + garrayref_unset(&x->x_ref); +} + void tabwrite_setup(void) { tabwrite_class = class_new(gensym("tabwrite"), (t_newmethod)tabwrite_new, - 0, sizeof(t_tabwrite), 0, A_DEFSYM, 0); + (t_method)tabwrite_free, sizeof(t_tabwrite), 0, A_DEFSYM, 0); class_addfloat(tabwrite_class, (t_method)tabwrite_float); class_addmethod(tabwrite_class, (t_method)tabwrite_set, gensym("set"), A_SYMBOL, 0); diff --git a/src/d_ctl.c b/src/d_ctl.c index 9802f958ad..56da396e58 100644 --- a/src/d_ctl.c +++ b/src/d_ctl.c @@ -39,7 +39,7 @@ static void *sig_tilde_new(t_floatarg f) static void sig_tilde_setup(void) { sig_tilde_class = class_new(gensym("sig~"), (t_newmethod)sig_tilde_new, 0, - sizeof(t_sig), 0, A_DEFFLOAT, 0); + sizeof(t_sig), CLASS_DEFAULT, A_DEFFLOAT, 0); class_addfloat(sig_tilde_class, (t_method)sig_tilde_float); class_addmethod(sig_tilde_class, (t_method)sig_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -180,7 +180,7 @@ static void *line_tilde_new(void) static void line_tilde_setup(void) { line_tilde_class = class_new(gensym("line~"), line_tilde_new, 0, - sizeof(t_line), 0, 0); + sizeof(t_line), CLASS_DEFAULT, 0); class_addfloat(line_tilde_class, (t_method)line_tilde_float); class_addmethod(line_tilde_class, (t_method)line_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -374,7 +374,7 @@ static void *vline_tilde_new(void) static void vline_tilde_setup(void) { vline_tilde_class = class_new(gensym("vline~"), vline_tilde_new, - (t_method)vline_tilde_stop, sizeof(t_vline), 0, 0); + (t_method)vline_tilde_stop, sizeof(t_vline), CLASS_DEFAULT, 0); class_addfloat(vline_tilde_class, (t_method)vline_tilde_float); class_addmethod(vline_tilde_class, (t_method)vline_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -428,7 +428,7 @@ static void snapshot_tilde_set(t_snapshot *x, t_floatarg f) static void snapshot_tilde_setup(void) { snapshot_tilde_class = class_new(gensym("snapshot~"), snapshot_tilde_new, 0, - sizeof(t_snapshot), 0, 0); + sizeof(t_snapshot), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(snapshot_tilde_class, t_snapshot, x_f); class_addmethod(snapshot_tilde_class, (t_method)snapshot_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -516,7 +516,7 @@ static void vsnapshot_tilde_setup(void) { vsnapshot_tilde_class = class_new(gensym("vsnapshot~"), vsnapshot_tilde_new, (t_method)vsnapshot_tilde_ff, - sizeof(t_vsnapshot), 0, 0); + sizeof(t_vsnapshot), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(vsnapshot_tilde_class, t_vsnapshot, x_f); class_addmethod(vsnapshot_tilde_class, (t_method)vsnapshot_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -655,7 +655,7 @@ static void env_tilde_ff(t_sigenv *x) /* cleanup on free */ void env_tilde_setup(void) { env_tilde_class = class_new(gensym("env~"), (t_newmethod)env_tilde_new, - (t_method)env_tilde_ff, sizeof(t_sigenv), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + (t_method)env_tilde_ff, sizeof(t_sigenv), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(env_tilde_class, t_sigenv, x_f); class_addmethod(env_tilde_class, (t_method)env_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -785,7 +785,7 @@ static void threshold_tilde_setup(void) { threshold_tilde_class = class_new(gensym("threshold~"), (t_newmethod)threshold_tilde_new, (t_method)threshold_tilde_ff, - sizeof(t_threshold_tilde), 0, + sizeof(t_threshold_tilde), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(threshold_tilde_class, t_threshold_tilde, x_f); class_addmethod(threshold_tilde_class, (t_method)threshold_tilde_set, diff --git a/src/d_dac.c b/src/d_dac.c index 45e0027477..c814abb25d 100644 --- a/src/d_dac.c +++ b/src/d_dac.c @@ -8,6 +8,10 @@ #include "m_pd.h" #include "s_stuff.h" +#if PD_DSPTHREADS +#include "s_spinlock.h" +#endif + /* ----------------------------- dac~ --------------------------- */ static t_class *dac_class; @@ -41,6 +45,30 @@ static void *dac_new(t_symbol *s, int argc, t_atom *argv) return (x); } +#if PD_DSPTHREADS +t_int *dac_perform8(t_int *w) +{ + t_sample *in = (t_sample *)(w[1]); + t_sample *out = (t_sample *)(w[2]); + t_spinlock *lock = (t_spinlock *)(w[3]); + int n = DEFDACBLKSIZE; + spinlock_lock(lock); + for (; n; n -= 8, in += 8, out += 8) + { + t_sample f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3]; + t_sample f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7]; + + t_sample g0 = out[0], g1 = out[1], g2 = out[2], g3 = out[3]; + t_sample g4 = out[4], g5 = out[5], g6 = out[6], g7 = out[7]; + + out[0] = f0 + g0; out[1] = f1 + g1; out[2] = f2 + g2; out[3] = f3 + g3; + out[4] = f4 + g4; out[5] = f5 + g5; out[6] = f6 + g6; out[7] = f7 + g7; + } + spinlock_unlock(lock); + return w+4; +} +#endif /* PD_DSPTHREADS */ + static void dac_dsp(t_dac *x, t_signal **sp) { t_int i, *ip; @@ -51,8 +79,19 @@ static void dac_dsp(t_dac *x, t_signal **sp) if ((*sp2)->s_n != DEFDACBLKSIZE) pd_error(0, "dac~: bad vector size"); else if (ch >= 0 && ch < sys_get_outchannels()) - dsp_add(plus_perform, 4, STUFF->st_soundout + DEFDACBLKSIZE*ch, - (*sp2)->s_vec, STUFF->st_soundout + DEFDACBLKSIZE*ch, (t_int)DEFDACBLKSIZE); + { + t_sample *in = (*sp2)->s_vec; + t_sample *out = STUFF->st_soundout + DEFDACBLKSIZE*ch; + #if PD_DSPTHREADS + t_spinlock *lock = &STUFF->st_soundout_locks[ch]; + if (!(sp[0]->s_n & 7)) /* always true for DEFDACBLKSIZE */ + dsp_add(dac_perform8, 3, in, out, lock); + else + bug("dac_dsp"); + #else + dsp_add_plus(out, in, out, DEFDACBLKSIZE); + #endif + } } } @@ -72,7 +111,7 @@ static void dac_free(t_dac *x) static void dac_setup(void) { dac_class = class_new(gensym("dac~"), (t_newmethod)dac_new, - (t_method)dac_free, sizeof(t_dac), 0, A_GIMME, 0); + (t_method)dac_free, sizeof(t_dac), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(dac_class, t_dac, x_f); class_addmethod(dac_class, (t_method)dac_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(dac_class, (t_method)dac_set, gensym("set"), A_GIMME, 0); @@ -142,7 +181,7 @@ static void adc_free(t_adc *x) static void adc_setup(void) { adc_class = class_new(gensym("adc~"), (t_newmethod)adc_new, - (t_method)adc_free, sizeof(t_adc), 0, A_GIMME, 0); + (t_method)adc_free, sizeof(t_adc), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(adc_class, (t_method)adc_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(adc_class, (t_method)adc_set, gensym("set"), A_GIMME, 0); class_sethelpsymbol(adc_class, gensym("adc~_dac~")); diff --git a/src/d_delay.c b/src/d_delay.c index 681f5c8937..b603e2f817 100644 --- a/src/d_delay.c +++ b/src/d_delay.c @@ -5,7 +5,22 @@ /* send~, delread~, throw~, catch~ */ #include "m_pd.h" +#include "s_stuff.h" #include + +#if PD_DSPTHREADS +# include "s_spinlock.h" +# define LOCK(x) rwspinlock_wrlock((t_rwspinlock *)&x) +# define UNLOCK(x) rwspinlock_wrunlock((t_rwspinlock *)&x) +# define LOCK_SHARED(x) rwspinlock_rdlock((t_rwspinlock *)&x) +# define UNLOCK_SHARED(x) rwspinlock_rdunlock((t_rwspinlock *)&x) +#else +# define LOCK(x) +# define UNLOCK(x) +# define LOCK_SHARED(x) +# define UNLOCK_SHARED(x) +#endif + extern int ugen_getsortno(void); #define DEFDELVS 64 /* LATER get this from canvas at DSP time */ @@ -16,9 +31,12 @@ static t_class *sigdelwrite_class; typedef struct delwritectl { - int c_n; t_sample *c_vec; + int c_n; int c_phase; +#if PD_DSPTHREADS + t_spinlock c_lock; +#endif } t_delwritectl; typedef struct _sigdelwrite @@ -86,6 +104,9 @@ static void *sigdelwrite_new(t_symbol *s, t_floatarg msec) x->x_deltime = msec; x->x_cspace.c_n = 0; x->x_cspace.c_vec = getbytes(XTRASAMPS * sizeof(t_sample)); +#if PD_DSPTHREADS + spinlock_init(&x->x_cspace.c_lock); +#endif x->x_sortno = 0; x->x_vecsize = 0; x->x_f = 0; @@ -101,6 +122,7 @@ static t_int *sigdelwrite_perform(t_int *w) t_sample *vp = c->c_vec, *bp = vp + phase, *ep = vp + (c->c_n + XTRASAMPS); phase += n; + LOCK(c->c_lock); while (n--) { t_sample f = *in++; @@ -118,6 +140,7 @@ static t_int *sigdelwrite_perform(t_int *w) } } c->c_phase = phase; + UNLOCK(c->c_lock); return (w+4); } @@ -140,7 +163,7 @@ static void sigdelwrite_setup(void) { sigdelwrite_class = class_new(gensym("delwrite~"), (t_newmethod)sigdelwrite_new, (t_method)sigdelwrite_free, - sizeof(t_sigdelwrite), 0, A_DEFSYM, A_DEFFLOAT, 0); + sizeof(t_sigdelwrite), CLASS_DEFAULT, A_DEFSYM, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigdelwrite_class, t_sigdelwrite, x_f); class_addmethod(sigdelwrite_class, (t_method)sigdelwrite_dsp, gensym("dsp"), A_CANT, 0); @@ -203,11 +226,13 @@ static t_int *sigdelread_perform(t_int *w) if (phase < 0) phase += nsamps; bp = vp + phase; + LOCK_SHARED(c->c_lock); while (n--) { *out++ = *bp++; if (bp == ep) bp -= nsamps; } + UNLOCK_SHARED(c->c_lock); return (w+5); } @@ -238,7 +263,7 @@ static void sigdelread_setup(void) { sigdelread_class = class_new(gensym("delread~"), (t_newmethod)sigdelread_new, 0, - sizeof(t_sigdelread), 0, A_DEFSYM, A_DEFFLOAT, 0); + sizeof(t_sigdelread), CLASS_DEFAULT, A_DEFSYM, A_DEFFLOAT, 0); class_addmethod(sigdelread_class, (t_method)sigdelread_dsp, gensym("dsp"), A_CANT, 0); class_addfloat(sigdelread_class, (t_method)sigdelread_float); @@ -288,6 +313,7 @@ static t_int *sigvd_perform(t_int *w) *out++ = 0; return (w+6); } + LOCK_SHARED(ctl->c_lock); while (n--) { t_sample delsamps = x->x_sr * *in++ - zerodel, frac; @@ -314,6 +340,7 @@ static t_int *sigvd_perform(t_int *w) ) ); } + UNLOCK_SHARED(ctl->c_lock); return (w+6); } @@ -341,7 +368,7 @@ static void sigvd_dsp(t_sigvd *x, t_signal **sp) static void sigvd_setup(void) { sigvd_class = class_new(gensym("delread4~"), (t_newmethod)sigvd_new, 0, - sizeof(t_sigvd), 0, A_DEFSYM, 0); + sizeof(t_sigvd), CLASS_DEFAULT, A_DEFSYM, 0); class_addcreator((t_newmethod)sigvd_new, gensym("vd~"), A_DEFSYM, 0); class_addmethod(sigvd_class, (t_method)sigvd_dsp, gensym("dsp"), A_CANT, 0); CLASS_MAINSIGNALIN(sigvd_class, t_sigvd, x_f); diff --git a/src/d_fft.c b/src/d_fft.c index 9b1a1f25a0..a8f3ecd38f 100644 --- a/src/d_fft.c +++ b/src/d_fft.c @@ -134,7 +134,7 @@ static void sigifft_dsp(t_sigfft *x, t_signal **sp) static void sigfft_setup(void) { sigfft_class = class_new(gensym("fft~"), sigfft_new, 0, - sizeof(t_sigfft), 0, 0); + sizeof(t_sigfft), CLASS_DEFAULT, 0); class_setfreefn(sigfft_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigfft_class, t_sigfft, x_f); class_addmethod(sigfft_class, (t_method)sigfft_dsp, @@ -142,7 +142,7 @@ static void sigfft_setup(void) mayer_init(); sigifft_class = class_new(gensym("ifft~"), sigifft_new, 0, - sizeof(t_sigfft), 0, 0); + sizeof(t_sigfft), CLASS_DEFAULT, 0); class_setfreefn(sigifft_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigifft_class, t_sigfft, x_f); class_addmethod(sigifft_class, (t_method)sigifft_dsp, @@ -202,7 +202,7 @@ static void sigrfft_dsp(t_sigrfft *x, t_signal **sp) static void sigrfft_setup(void) { sigrfft_class = class_new(gensym("rfft~"), sigrfft_new, 0, - sizeof(t_sigrfft), 0, 0); + sizeof(t_sigrfft), CLASS_DEFAULT, 0); class_setfreefn(sigrfft_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigrfft_class, t_sigrfft, x_f); class_addmethod(sigrfft_class, (t_method)sigrfft_dsp, @@ -265,7 +265,7 @@ static void sigrifft_dsp(t_sigrifft *x, t_signal **sp) static void sigrifft_setup(void) { sigrifft_class = class_new(gensym("rifft~"), sigrifft_new, 0, - sizeof(t_sigrifft), 0, 0); + sizeof(t_sigrifft), CLASS_DEFAULT, 0); class_setfreefn(sigrifft_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigrifft_class, t_sigrifft, x_f); class_addmethod(sigrifft_class, (t_method)sigrifft_dsp, @@ -358,7 +358,7 @@ static void sigframp_dsp(t_sigframp *x, t_signal **sp) static void sigframp_setup(void) { sigframp_class = class_new(gensym("framp~"), sigframp_new, 0, - sizeof(t_sigframp), 0, 0); + sizeof(t_sigframp), CLASS_DEFAULT, 0); class_setfreefn(sigframp_class, fftclass_cleanup); CLASS_MAINSIGNALIN(sigframp_class, t_sigframp, x_f); class_addmethod(sigframp_class, (t_method)sigframp_dsp, diff --git a/src/d_fft_fftsg.c b/src/d_fft_fftsg.c index e7517993f1..201e2267d9 100644 --- a/src/d_fft_fftsg.c +++ b/src/d_fft_fftsg.c @@ -21,6 +21,7 @@ for another, more permissive-sounding copyright notice. -MSP /* ---------- Pd interface to OOURA FFT; imitate Mayer API ---------- */ #include "m_pd.h" #include "m_imp.h" +#include "s_stuff.h" #ifdef _WIN32 # include /* MSVC or mingw on windows */ @@ -30,17 +31,24 @@ for another, more permissive-sounding copyright notice. -MSP # include /* BSDs for example */ #endif +#if PD_DSPTHREADS +/* always thread-local! */ +#define FFT_PERTHREAD THREADLOCAL +#else +#define FFT_PERTHREAD PERTHREAD +#endif + #define FFTFLT double void cdft(int, int, FFTFLT *, int *, FFTFLT *); void rdft(int, int, FFTFLT *, int *, FFTFLT *); int ilog2(int n); -static PERTHREAD int ooura_maxn; -static PERTHREAD int *ooura_bitrev; -static PERTHREAD int ooura_bitrevsize; -static PERTHREAD FFTFLT *ooura_costab; -static PERTHREAD FFTFLT *ooura_buffer; +static FFT_PERTHREAD int ooura_maxn; +static FFT_PERTHREAD int *ooura_bitrev; +static FFT_PERTHREAD int ooura_bitrevsize; +static FFT_PERTHREAD FFTFLT *ooura_costab; +static FFT_PERTHREAD FFTFLT *ooura_buffer; static int ooura_init( int n) { @@ -101,7 +109,7 @@ static void ooura_term( void) } /* -------- initialization and cleanup -------- */ -static PERTHREAD int mayer_refcount = 0; +static FFT_PERTHREAD int mayer_refcount = 0; void mayer_init( void) { diff --git a/src/d_fft_fftw.c b/src/d_fft_fftw.c index 10d1ce403f..554402b5d9 100644 --- a/src/d_fft_fftw.c +++ b/src/d_fft_fftw.c @@ -7,6 +7,7 @@ /* changes and additions for FFTW3 by Thomas Grill */ #include "m_pd.h" +#include "s_stuff.h" #include int ilog2(int n); @@ -147,14 +148,16 @@ static void rfftw_term(void) } } -static int mayer_refcount = 0; +#if PD_DSPTHREADS +/* always thread-local! */ +static THREADLOCAL int mayer_refcount = 0; +#else +static PERTHREAD int mayer_refcount = 0; +#endif void mayer_init(void) { - if (mayer_refcount++ == 0) - { - /* nothing to do */ - } + mayer_refcount++; } void mayer_term(void) diff --git a/src/d_filter.c b/src/d_filter.c index bdcc87a43f..cc1a04e034 100644 --- a/src/d_filter.c +++ b/src/d_filter.c @@ -127,7 +127,7 @@ static void sighip_clear(t_sighip *x, t_floatarg q) void sighip_setup(void) { sighip_class = class_new(gensym("hip~"), (t_newmethod)sighip_new, 0, - sizeof(t_sighip), 0, A_DEFFLOAT, 0); + sizeof(t_sighip), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sighip_class, t_sighip, x_f); class_addmethod(sighip_class, (t_method)sighip_dsp, gensym("dsp"), A_CANT, 0); @@ -215,7 +215,7 @@ static void siglop_dsp(t_siglop *x, t_signal **sp) void siglop_setup(void) { siglop_class = class_new(gensym("lop~"), (t_newmethod)siglop_new, 0, - sizeof(t_siglop), 0, A_DEFFLOAT, 0); + sizeof(t_siglop), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(siglop_class, t_siglop, x_f); class_addmethod(siglop_class, (t_method)siglop_dsp, gensym("dsp"), A_CANT, 0); @@ -347,7 +347,7 @@ static void sigbp_dsp(t_sigbp *x, t_signal **sp) void sigbp_setup(void) { sigbp_class = class_new(gensym("bp~"), (t_newmethod)sigbp_new, 0, - sizeof(t_sigbp), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_sigbp), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigbp_class, t_sigbp, x_f); class_addmethod(sigbp_class, (t_method)sigbp_dsp, gensym("dsp"), A_CANT, 0); @@ -471,7 +471,7 @@ static void sigbiquad_dsp(t_sigbiquad *x, t_signal **sp) void sigbiquad_setup(void) { sigbiquad_class = class_new(gensym("biquad~"), (t_newmethod)sigbiquad_new, - 0, sizeof(t_sigbiquad), 0, A_GIMME, 0); + 0, sizeof(t_sigbiquad), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(sigbiquad_class, t_sigbiquad, x_f); class_addmethod(sigbiquad_class, (t_method)sigbiquad_dsp, gensym("dsp"), A_CANT, 0); @@ -549,7 +549,7 @@ static void sigsamphold_set(t_sigsamphold *x, t_float f) void sigsamphold_setup(void) { sigsamphold_class = class_new(gensym("samphold~"), - (t_newmethod)sigsamphold_new, 0, sizeof(t_sigsamphold), 0, 0); + (t_newmethod)sigsamphold_new, 0, sizeof(t_sigsamphold), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(sigsamphold_class, t_sigsamphold, x_f); class_addmethod(sigsamphold_class, (t_method)sigsamphold_set, gensym("set"), A_DEFFLOAT, 0); @@ -621,8 +621,8 @@ static void sigrpole_set(t_sigrpole *x, t_float f) void sigrpole_setup(void) { - sigrpole_class = class_new(gensym("rpole~"), - (t_newmethod)sigrpole_new, 0, sizeof(t_sigrpole), 0, A_DEFFLOAT, 0); + sigrpole_class = class_new(gensym("rpole~"), (t_newmethod)sigrpole_new, + 0, sizeof(t_sigrpole), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigrpole_class, t_sigrpole, x_f); class_addmethod(sigrpole_class, (t_method)sigrpole_set, gensym("set"), A_DEFFLOAT, 0); @@ -693,8 +693,8 @@ static void sigrzero_set(t_sigrzero *x, t_float f) void sigrzero_setup(void) { - sigrzero_class = class_new(gensym("rzero~"), - (t_newmethod)sigrzero_new, 0, sizeof(t_sigrzero), 0, A_DEFFLOAT, 0); + sigrzero_class = class_new(gensym("rzero~"), (t_newmethod)sigrzero_new, + 0, sizeof(t_sigrzero), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigrzero_class, t_sigrzero, x_f); class_addmethod(sigrzero_class, (t_method)sigrzero_set, gensym("set"), A_DEFFLOAT, 0); @@ -767,7 +767,7 @@ void sigrzero_rev_setup(void) { sigrzero_rev_class = class_new(gensym("rzero_rev~"), (t_newmethod)sigrzero_rev_new, 0, sizeof(t_sigrzero_rev), - 0, A_DEFFLOAT, 0); + CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigrzero_rev_class, t_sigrzero_rev, x_f); class_addmethod(sigrzero_rev_class, (t_method)sigrzero_rev_set, gensym("set"), A_DEFFLOAT, 0); @@ -859,8 +859,8 @@ static void sigcpole_set(t_sigcpole *x, t_float re, t_float im) void sigcpole_setup(void) { sigcpole_class = class_new(gensym("cpole~"), - (t_newmethod)sigcpole_new, 0, sizeof(t_sigcpole), 0, - A_DEFFLOAT, A_DEFFLOAT, 0); + (t_newmethod)sigcpole_new, 0, sizeof(t_sigcpole), + CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigcpole_class, t_sigcpole, x_f); class_addmethod(sigcpole_class, (t_method)sigcpole_set, gensym("set"), A_DEFFLOAT, A_DEFFLOAT, 0); @@ -949,8 +949,8 @@ static void sigczero_set(t_sigczero *x, t_float re, t_float im) void sigczero_setup(void) { sigczero_class = class_new(gensym("czero~"), - (t_newmethod)sigczero_new, 0, sizeof(t_sigczero), 0, - A_DEFFLOAT, A_DEFFLOAT, 0); + (t_newmethod)sigczero_new, 0, sizeof(t_sigczero), + CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigczero_class, t_sigczero, x_f); class_addmethod(sigczero_class, (t_method)sigczero_set, gensym("set"), A_DEFFLOAT, A_DEFFLOAT, 0); @@ -1041,8 +1041,8 @@ static void sigczero_rev_set(t_sigczero_rev *x, t_float re, t_float im) void sigczero_rev_setup(void) { sigczero_rev_class = class_new(gensym("czero_rev~"), - (t_newmethod)sigczero_rev_new, 0, sizeof(t_sigczero_rev), 0, - A_DEFFLOAT, A_DEFFLOAT, 0); + (t_newmethod)sigczero_rev_new, 0, sizeof(t_sigczero_rev), + CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigczero_rev_class, t_sigczero_rev, x_f); class_addmethod(sigczero_rev_class, (t_method)sigczero_rev_set, gensym("set"), A_DEFFLOAT, A_DEFFLOAT, 0); @@ -1151,7 +1151,7 @@ static void slop_tilde_dsp(t_slop_tilde *x, t_signal **sp) void slop_tilde_setup(void) { slop_tilde_class = class_new(gensym("slop~"), (t_newmethod)slop_tilde_new, 0, - sizeof(t_slop_tilde), 0, A_GIMME, 0); + sizeof(t_slop_tilde), CLASS_DEFAULT, A_GIMME, 0); CLASS_MAINSIGNALIN(slop_tilde_class, t_slop_tilde, x_f); class_addmethod(slop_tilde_class, (t_method)slop_tilde_dsp, gensym("dsp"), A_CANT, 0); diff --git a/src/d_global.c b/src/d_global.c index 83dd45e4ec..860000bd46 100644 --- a/src/d_global.c +++ b/src/d_global.c @@ -5,8 +5,22 @@ /* send~, receive~, throw~, catch~ */ #include "m_pd.h" +#include "s_stuff.h" #include +#if PD_DSPTHREADS +# include "s_spinlock.h" +# define LOCK(x) rwspinlock_wrlock((t_rwspinlock *)&x) +# define UNLOCK(x) rwspinlock_wrunlock((t_rwspinlock *)&x) +# define LOCK_SHARED(x) rwspinlock_rdlock((t_rwspinlock *)&x) +# define UNLOCK_SHARED(x) rwspinlock_rdunlock((t_rwspinlock *)&x) +#else +# define LOCK(x) +# define UNLOCK(x) +# define LOCK_SHARED(x) +# define UNLOCK_SHARED(x) +#endif + #define DEFSENDVS 64 /* LATER get send to get this from canvas */ /* ----------------------------- send~ ----------------------------- */ @@ -19,6 +33,9 @@ typedef struct _sigsend int x_n; t_sample *x_vec; t_float x_f; +#if PD_DSPTHREADS + t_rwspinlock x_lock; +#endif } t_sigsend; static void *sigsend_new(t_symbol *s) @@ -30,27 +47,33 @@ static void *sigsend_new(t_symbol *s) x->x_vec = (t_sample *)getbytes(DEFSENDVS * sizeof(t_sample)); memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(t_sample)); x->x_f = 0; +#if PD_DSPTHREADS + rwspinlock_init((t_rwspinlock *)&x->x_lock); +#endif return (x); } static t_int *sigsend_perform(t_int *w) { - t_sample *in = (t_sample *)(w[1]); - t_sample *out = (t_sample *)(w[2]); + t_sigsend *x = (t_sigsend *)(w[1]); + t_sample *in = (t_sample *)(w[2]); + t_sample *out = x->x_vec; int n = (int)(w[3]); + LOCK(x->x_lock); while (n--) { *out = (PD_BIGORSMALL(*in) ? 0 : *in); out++; in++; } + UNLOCK(x->x_lock); return (w+4); } static void sigsend_dsp(t_sigsend *x, t_signal **sp) { if (x->x_n == sp[0]->s_n) - dsp_add(sigsend_perform, 3, sp[0]->s_vec, x->x_vec, (t_int)sp[0]->s_n); + dsp_add(sigsend_perform, 3, x, sp[0]->s_vec, (t_int)sp[0]->s_n); else pd_error(0, "sigsend %s: unexpected vector size", x->x_sym->s_name); } @@ -63,7 +86,7 @@ static void sigsend_free(t_sigsend *x) static void sigsend_setup(void) { sigsend_class = class_new(gensym("send~"), (t_newmethod)sigsend_new, - (t_method)sigsend_free, sizeof(t_sigsend), 0, A_DEFSYM, 0); + (t_method)sigsend_free, sizeof(t_sigsend), CLASS_DEFAULT, A_DEFSYM, 0); class_addcreator((t_newmethod)sigsend_new, gensym("s~"), A_DEFSYM, 0); CLASS_MAINSIGNALIN(sigsend_class, t_sigsend, x_f); class_addmethod(sigsend_class, (t_method)sigsend_dsp, @@ -80,6 +103,9 @@ typedef struct _sigreceive t_symbol *x_sym; t_sample *x_wherefrom; int x_n; +#if PD_DSPTHREADS + t_rwspinlock *x_lock; +#endif } t_sigreceive; static void *sigreceive_new(t_symbol *s) @@ -88,6 +114,9 @@ static void *sigreceive_new(t_symbol *s) x->x_n = DEFSENDVS; /* LATER find our vector size correctly */ x->x_sym = s; x->x_wherefrom = 0; +#if PD_DSPTHREADS + x->x_lock = 0; +#endif outlet_new(&x->x_obj, &s_signal); return (x); } @@ -100,8 +129,10 @@ static t_int *sigreceive_perform(t_int *w) t_sample *in = x->x_wherefrom; if (in) { + LOCK_SHARED(*x->x_lock); while (n--) *out++ = *in++; + UNLOCK_SHARED(*x->x_lock); } else { @@ -120,11 +151,13 @@ static t_int *sigreceive_perf8(t_int *w) t_sample *in = x->x_wherefrom; if (in) { + LOCK_SHARED(*x->x_lock); for (; n; n -= 8, in += 8, out += 8) { out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; out[3] = in[3]; out[4] = in[4]; out[5] = in[5]; out[6] = in[6]; out[7] = in[7]; } + UNLOCK_SHARED(*x->x_lock); } else { @@ -144,11 +177,19 @@ static void sigreceive_set(t_sigreceive *x, t_symbol *s) if (sender) { if (sender->x_n == x->x_n) + { x->x_wherefrom = sender->x_vec; + #if PD_DSPTHREADS + x->x_lock = &sender->x_lock; + #endif + } else { pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name); x->x_wherefrom = 0; + #if PD_DSPTHREADS + x->x_lock = 0; + #endif } } else @@ -179,7 +220,7 @@ static void sigreceive_setup(void) { sigreceive_class = class_new(gensym("receive~"), (t_newmethod)sigreceive_new, 0, - sizeof(t_sigreceive), 0, A_DEFSYM, 0); + sizeof(t_sigreceive), CLASS_DEFAULT, A_DEFSYM, 0); class_addcreator((t_newmethod)sigreceive_new, gensym("r~"), A_DEFSYM, 0); class_addmethod(sigreceive_class, (t_method)sigreceive_set, gensym("set"), A_SYMBOL, 0); @@ -197,6 +238,9 @@ typedef struct _sigcatch t_symbol *x_sym; int x_n; t_sample *x_vec; +#if PD_DSPTHREADS + t_rwspinlock x_lock; +#endif } t_sigcatch; static void *sigcatch_new(t_symbol *s) @@ -207,25 +251,34 @@ static void *sigcatch_new(t_symbol *s) x->x_n = DEFSENDVS; x->x_vec = (t_sample *)getbytes(DEFSENDVS * sizeof(t_sample)); memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(t_sample)); +#if PD_DSPTHREADS + rwspinlock_init((t_rwspinlock *)&x->x_lock); +#endif outlet_new(&x->x_obj, &s_signal); return (x); } static t_int *sigcatch_perform(t_int *w) { - t_sample *in = (t_sample *)(w[1]); + t_sigcatch *x = (t_sigcatch *)(w[1]); + t_sample *in = x->x_vec; t_sample *out = (t_sample *)(w[2]); int n = (int)(w[3]); + LOCK(x->x_lock); while (n--) *out++ = *in, *in++ = 0; + UNLOCK(x->x_lock); return (w+4); } /* tb: vectorized catch function */ static t_int *sigcatch_perf8(t_int *w) { - t_sample *in = (t_sample *)(w[1]); + t_sigcatch *x = (t_sigcatch *)(w[1]); + t_sample *in = x->x_vec; t_sample *out = (t_sample *)(w[2]); int n = (int)(w[3]); + /* reading + writing */ + LOCK(x->x_lock); for (; n; n -= 8, in += 8, out += 8) { out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; out[3] = in[3]; @@ -234,6 +287,7 @@ static t_int *sigcatch_perf8(t_int *w) in[0] = 0; in[1] = 0; in[2] = 0; in[3] = 0; in[4] = 0; in[5] = 0; in[6] = 0; in[7] = 0; } + UNLOCK(x->x_lock); return (w+4); } @@ -242,9 +296,9 @@ static void sigcatch_dsp(t_sigcatch *x, t_signal **sp) if (x->x_n == sp[0]->s_n) { if(sp[0]->s_n&7) - dsp_add(sigcatch_perform, 3, x->x_vec, sp[0]->s_vec, (t_int)sp[0]->s_n); + dsp_add(sigcatch_perform, 3, x, sp[0]->s_vec, (t_int)sp[0]->s_n); else - dsp_add(sigcatch_perf8, 3, x->x_vec, sp[0]->s_vec, (t_int)sp[0]->s_n); + dsp_add(sigcatch_perf8, 3, x, sp[0]->s_vec, (t_int)sp[0]->s_n); } else pd_error(0, "sigcatch %s: unexpected vector size", x->x_sym->s_name); } @@ -258,7 +312,8 @@ static void sigcatch_free(t_sigcatch *x) static void sigcatch_setup(void) { sigcatch_class = class_new(gensym("catch~"), (t_newmethod)sigcatch_new, - (t_method)sigcatch_free, sizeof(t_sigcatch), CLASS_NOINLET, A_DEFSYM, 0); + (t_method)sigcatch_free, sizeof(t_sigcatch), + CLASS_THREADSAFE | CLASS_NOINLET, A_DEFSYM, 0); class_addmethod(sigcatch_class, (t_method)sigcatch_dsp, gensym("dsp"), A_CANT, 0); class_sethelpsymbol(sigcatch_class, gensym("throw~-catch~")); @@ -274,6 +329,9 @@ typedef struct _sigthrow t_sample *x_whereto; int x_n; t_float x_f; +#if PD_DSPTHREADS + t_rwspinlock *x_lock; +#endif } t_sigthrow; static void *sigthrow_new(t_symbol *s) @@ -283,6 +341,9 @@ static void *sigthrow_new(t_symbol *s) x->x_whereto = 0; x->x_n = DEFSENDVS; x->x_f = 0; +#if PD_DSPTHREADS + x->x_lock = 0; +#endif return (x); } @@ -294,12 +355,14 @@ static t_int *sigthrow_perform(t_int *w) t_sample *out = x->x_whereto; if (out) { + LOCK(*x->x_lock); while (n--) { *out += (PD_BIGORSMALL(*in) ? 0 : *in); out++; in++; } + UNLOCK(*x->x_lock); } return (w+4); } @@ -311,11 +374,19 @@ static void sigthrow_set(t_sigthrow *x, t_symbol *s) if (catcher) { if (catcher->x_n == x->x_n) + { x->x_whereto = catcher->x_vec; + #if PD_DSPTHREADS + x->x_lock = &catcher->x_lock; + #endif + } else { pd_error(x, "throw~ %s: vector size mismatch", x->x_sym->s_name); x->x_whereto = 0; + #if PD_DSPTHREADS + x->x_lock = 0; + #endif } } else x->x_whereto = 0; /* no match: now no longer considered an error */ @@ -338,7 +409,7 @@ static void sigthrow_dsp(t_sigthrow *x, t_signal **sp) static void sigthrow_setup(void) { sigthrow_class = class_new(gensym("throw~"), (t_newmethod)sigthrow_new, 0, - sizeof(t_sigthrow), 0, A_DEFSYM, 0); + sizeof(t_sigthrow), CLASS_DEFAULT, A_DEFSYM, 0); class_addmethod(sigthrow_class, (t_method)sigthrow_set, gensym("set"), A_SYMBOL, 0); CLASS_MAINSIGNALIN(sigthrow_class, t_sigthrow, x_f); diff --git a/src/d_math.c b/src/d_math.c index 244fd93c0f..c33b571409 100644 --- a/src/d_math.c +++ b/src/d_math.c @@ -58,7 +58,7 @@ static void clip_dsp(t_clip *x, t_signal **sp) static void clip_setup(void) { clip_class = class_new(gensym("clip~"), (t_newmethod)clip_new, 0, - sizeof(t_clip), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_clip), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(clip_class, t_clip, x_f); class_addmethod(clip_class, (t_method)clip_dsp, gensym("dsp"), A_CANT, 0); } @@ -177,7 +177,7 @@ static void sigrsqrt_dsp(t_sigrsqrt *x, t_signal **sp) void sigrsqrt_setup(void) { sigrsqrt_class = class_new(gensym("rsqrt~"), (t_newmethod)sigrsqrt_new, 0, - sizeof(t_sigrsqrt), 0, 0); + sizeof(t_sigrsqrt), CLASS_DEFAULT, 0); /* an old name for it: */ class_addcreator(sigrsqrt_new, gensym("q8_rsqrt~"), 0); CLASS_MAINSIGNALIN(sigrsqrt_class, t_sigrsqrt, x_f); @@ -236,7 +236,7 @@ static void sigsqrt_dsp(t_sigsqrt *x, t_signal **sp) void sigsqrt_setup(void) { sigsqrt_class = class_new(gensym("sqrt~"), (t_newmethod)sigsqrt_new, 0, - sizeof(t_sigsqrt), 0, 0); + sizeof(t_sigsqrt), CLASS_DEFAULT, 0); class_addcreator(sigsqrt_new, gensym("q8_sqrt~"), 0); /* old name */ CLASS_MAINSIGNALIN(sigsqrt_class, t_sigsqrt, x_f); class_addmethod(sigsqrt_class, (t_method)sigsqrt_dsp, @@ -302,7 +302,7 @@ static void sigwrap_dsp(t_sigwrap *x, t_signal **sp) void sigwrap_setup(void) { sigwrap_class = class_new(gensym("wrap~"), (t_newmethod)sigwrap_new, 0, - sizeof(t_sigwrap), 0, 0); + sizeof(t_sigwrap), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(sigwrap_class, t_sigwrap, x_f); class_addmethod(sigwrap_class, (t_method)sigwrap_dsp, gensym("dsp"), A_CANT, 0); @@ -351,7 +351,7 @@ static void mtof_tilde_dsp(t_mtof_tilde *x, t_signal **sp) void mtof_tilde_setup(void) { mtof_tilde_class = class_new(gensym("mtof~"), (t_newmethod)mtof_tilde_new, 0, - sizeof(t_mtof_tilde), 0, 0); + sizeof(t_mtof_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(mtof_tilde_class, t_mtof_tilde, x_f); class_addmethod(mtof_tilde_class, (t_method)mtof_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -395,7 +395,7 @@ static void ftom_tilde_dsp(t_ftom_tilde *x, t_signal **sp) void ftom_tilde_setup(void) { ftom_tilde_class = class_new(gensym("ftom~"), (t_newmethod)ftom_tilde_new, 0, - sizeof(t_ftom_tilde), 0, 0); + sizeof(t_ftom_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(ftom_tilde_class, t_ftom_tilde, x_f); class_addmethod(ftom_tilde_class, (t_method)ftom_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -445,7 +445,7 @@ static void dbtorms_tilde_dsp(t_dbtorms_tilde *x, t_signal **sp) void dbtorms_tilde_setup(void) { dbtorms_tilde_class = class_new(gensym("dbtorms~"), (t_newmethod)dbtorms_tilde_new, 0, - sizeof(t_dbtorms_tilde), 0, 0); + sizeof(t_dbtorms_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(dbtorms_tilde_class, t_dbtorms_tilde, x_f); class_addmethod(dbtorms_tilde_class, (t_method)dbtorms_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -494,7 +494,7 @@ static void rmstodb_tilde_dsp(t_rmstodb_tilde *x, t_signal **sp) void rmstodb_tilde_setup(void) { rmstodb_tilde_class = class_new(gensym("rmstodb~"), - (t_newmethod)rmstodb_tilde_new, 0, sizeof(t_rmstodb_tilde), 0, 0); + (t_newmethod)rmstodb_tilde_new, 0, sizeof(t_rmstodb_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(rmstodb_tilde_class, t_rmstodb_tilde, x_f); class_addmethod(rmstodb_tilde_class, (t_method)rmstodb_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -544,7 +544,7 @@ static void dbtopow_tilde_dsp(t_dbtopow_tilde *x, t_signal **sp) void dbtopow_tilde_setup(void) { dbtopow_tilde_class = class_new(gensym("dbtopow~"), (t_newmethod)dbtopow_tilde_new, 0, - sizeof(t_dbtopow_tilde), 0, 0); + sizeof(t_dbtopow_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(dbtopow_tilde_class, t_dbtopow_tilde, x_f); class_addmethod(dbtopow_tilde_class, (t_method)dbtopow_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -593,7 +593,7 @@ static void powtodb_tilde_dsp(t_powtodb_tilde *x, t_signal **sp) void powtodb_tilde_setup(void) { powtodb_tilde_class = class_new(gensym("powtodb~"), (t_newmethod)powtodb_tilde_new, 0, - sizeof(t_powtodb_tilde), 0, 0); + sizeof(t_powtodb_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(powtodb_tilde_class, t_powtodb_tilde, x_f); class_addmethod(powtodb_tilde_class, (t_method)powtodb_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -643,7 +643,7 @@ static void pow_tilde_dsp(t_pow_tilde *x, t_signal **sp) static void pow_tilde_setup(void) { pow_tilde_class = class_new(gensym("pow~"), (t_newmethod)pow_tilde_new, 0, - sizeof(t_pow_tilde), 0, A_DEFFLOAT, 0); + sizeof(t_pow_tilde), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(pow_tilde_class, t_pow_tilde, x_f); class_addmethod(pow_tilde_class, (t_method)pow_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -684,7 +684,7 @@ static void exp_tilde_dsp(t_exp_tilde *x, t_signal **sp) static void exp_tilde_setup(void) { exp_tilde_class = class_new(gensym("exp~"), (t_newmethod)exp_tilde_new, 0, - sizeof(t_exp_tilde), 0, 0); + sizeof(t_exp_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(exp_tilde_class, t_exp_tilde, x_f); class_addmethod(exp_tilde_class, (t_method)exp_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -737,7 +737,7 @@ static void log_tilde_dsp(t_log_tilde *x, t_signal **sp) static void log_tilde_setup(void) { log_tilde_class = class_new(gensym("log~"), (t_newmethod)log_tilde_new, 0, - sizeof(t_log_tilde), 0, A_DEFFLOAT, 0); + sizeof(t_log_tilde), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(log_tilde_class, t_log_tilde, x_f); class_addmethod(log_tilde_class, (t_method)log_tilde_dsp, gensym("dsp"), A_CANT, 0); @@ -781,7 +781,7 @@ static void abs_tilde_dsp(t_abs_tilde *x, t_signal **sp) static void abs_tilde_setup(void) { abs_tilde_class = class_new(gensym("abs~"), (t_newmethod)abs_tilde_new, 0, - sizeof(t_abs_tilde), 0, 0); + sizeof(t_abs_tilde), CLASS_DEFAULT, 0); CLASS_MAINSIGNALIN(abs_tilde_class, t_abs_tilde, x_f); class_addmethod(abs_tilde_class, (t_method)abs_tilde_dsp, gensym("dsp"), A_CANT, 0); diff --git a/src/d_misc.c b/src/d_misc.c index 521e10ad0b..88812393a9 100644 --- a/src/d_misc.c +++ b/src/d_misc.c @@ -65,7 +65,7 @@ static void *print_new(t_symbol *s) static void print_setup(void) { print_class = class_new(gensym("print~"), (t_newmethod)print_new, 0, - sizeof(t_print), 0, A_DEFSYM, 0); + sizeof(t_print), CLASS_DEFAULT, A_DEFSYM, 0); CLASS_MAINSIGNALIN(print_class, t_print, x_f); class_addmethod(print_class, (t_method)print_dsp, gensym("dsp"), A_CANT, 0); class_addbang(print_class, print_bang); @@ -115,7 +115,7 @@ static void *bang_tilde_new(t_symbol *s) static void bang_tilde_setup(void) { bang_tilde_class = class_new(gensym("bang~"), (t_newmethod)bang_tilde_new, - (t_method)bang_tilde_free, sizeof(t_bang), 0, 0); + (t_method)bang_tilde_free, sizeof(t_bang), CLASS_DEFAULT, 0); class_addmethod(bang_tilde_class, (t_method)bang_tilde_dsp, gensym("dsp"), 0); } diff --git a/src/d_osc.c b/src/d_osc.c index b6d3a43cc7..47ef5415d3 100644 --- a/src/d_osc.c +++ b/src/d_osc.c @@ -115,7 +115,7 @@ static void phasor_ft1(t_phasor *x, t_float f) static void phasor_setup(void) { phasor_class = class_new(gensym("phasor~"), (t_newmethod)phasor_new, 0, - sizeof(t_phasor), 0, A_DEFFLOAT, 0); + sizeof(t_phasor), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(phasor_class, t_phasor, x_f); class_addmethod(phasor_class, (t_method)phasor_dsp, gensym("dsp"), A_CANT, 0); @@ -230,7 +230,7 @@ static void cos_cleanup(t_class *c) static void cos_setup(void) { cos_class = class_new(gensym("cos~"), (t_newmethod)cos_new, 0, - sizeof(t_cos), 0, A_DEFFLOAT, 0); + sizeof(t_cos), CLASS_DEFAULT, A_DEFFLOAT, 0); class_setfreefn(cos_class, cos_cleanup); CLASS_MAINSIGNALIN(cos_class, t_cos, x_f); class_addmethod(cos_class, (t_method)cos_dsp, gensym("dsp"), A_CANT, 0); @@ -332,7 +332,7 @@ static void osc_ft1(t_osc *x, t_float f) static void osc_setup(void) { osc_class = class_new(gensym("osc~"), (t_newmethod)osc_new, 0, - sizeof(t_osc), 0, A_DEFFLOAT, 0); + sizeof(t_osc), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(osc_class, t_osc, x_f); class_addmethod(osc_class, (t_method)osc_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(osc_class, (t_method)osc_ft1, gensym("ft1"), A_FLOAT, 0); @@ -456,7 +456,7 @@ static void sigvcf_setup(void) { sigvcf_class = class_new(gensym("vcf~"), (t_newmethod)sigvcf_new, 0, - sizeof(t_sigvcf), 0, A_DEFFLOAT, 0); + sizeof(t_sigvcf), CLASS_DEFAULT, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigvcf_class, t_sigvcf, x_f); class_addmethod(sigvcf_class, (t_method)sigvcf_dsp, gensym("dsp"), A_CANT, 0); @@ -514,7 +514,7 @@ static void noise_float(t_noise *x, t_float f) static void noise_setup(void) { noise_class = class_new(gensym("noise~"), (t_newmethod)noise_new, 0, - sizeof(t_noise), 0, A_DEFFLOAT, 0); + sizeof(t_noise), CLASS_DEFAULT, A_DEFFLOAT, 0); class_addmethod(noise_class, (t_method)noise_dsp, gensym("dsp"), A_CANT, 0); class_addmethod(noise_class, (t_method)noise_float, diff --git a/src/d_soundfile.c b/src/d_soundfile.c index 89a092a856..c014740fb0 100644 --- a/src/d_soundfile.c +++ b/src/d_soundfile.c @@ -2232,7 +2232,7 @@ static void readsf_setup(void) { readsf_class = class_new(gensym("readsf~"), (t_newmethod)readsf_new, (t_method)readsf_free, - sizeof(t_readsf), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_readsf), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addfloat(readsf_class, (t_method)readsf_float); class_addmethod(readsf_class, (t_method)readsf_start, gensym("start"), 0); class_addmethod(readsf_class, (t_method)readsf_stop, gensym("stop"), 0); @@ -2725,7 +2725,7 @@ static void writesf_setup(void) { writesf_class = class_new(gensym("writesf~"), (t_newmethod)writesf_new, (t_method)writesf_free, - sizeof(t_writesf), 0, A_DEFFLOAT, A_DEFFLOAT, 0); + sizeof(t_writesf), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addmethod(writesf_class, (t_method)writesf_start, gensym("start"), 0); class_addmethod(writesf_class, (t_method)writesf_stop, gensym("stop"), 0); class_addmethod(writesf_class, (t_method)writesf_dsp, diff --git a/src/d_threadpool.c b/src/d_threadpool.c new file mode 100644 index 0000000000..0f324aa4a1 --- /dev/null +++ b/src/d_threadpool.c @@ -0,0 +1,1242 @@ +/* Copyright (c) 2021 Christof Ressi. + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ + +#if PD_DSPTHREADS + +#if !PD_PARALLEL +# error PD_DSPTHREADS requires PD_PARALLEL! +#endif + +/* This one must be defined before including any headers! */ +#ifdef __linux__ +# ifndef _GNU_SOURCE +# define _GNU_SOURCE +# endif +#endif + +#include "m_pd.h" +#include "m_imp.h" +#include "s_stuff.h" +#include "s_sync.h" + +#include +#include +#include + +#include + +#if defined(_WIN32) +# include +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +# include +# include +#else /* Linux */ +# include +# include +# include +#endif + +/* define for debugging DSP tasks and task queues */ +// #define DEBUG_DSPTHREADS + +/* ----------------------- thread utilities -------------------------- */ + +typedef struct _cpuinfo +{ + int physical_id; + int core_id; + int sibling_id; + int id; +} t_cpuinfo; + + /* convert t_cpuinfo to an uint64_t for comparison. + * We try to keep siblings as far apart as possible, + * followed by physical packages, so that cores of + * the same package are close to each other. */ +static inline uint64_t cpuinfo2number(t_cpuinfo *x) +{ + return ((uint64_t)(x)->sibling_id << 24) | + ((uint64_t)(x)->physical_id << 16) | ((uint64_t)(x)->core_id); +} + +static t_cpuinfo *cpuvec = NULL; +static int numcpus = 0; +static int numcores = 0; +static int numpackages = 0; + +static void cpuinfo_print(void) +{ + int i; + fprintf(stderr, "hardware topology:\n"); + fprintf(stderr, "\tlogical processors: %d\n", numcpus); + fprintf(stderr, "\tCPU cores: %d\n", numcores); + fprintf(stderr, "\tphysical packages: %d\n", numpackages); + fprintf(stderr, "\t---\n"); + for (i = 0; i < numcpus; i++) + { + fprintf(stderr, "\t#%d package: %d, core: %d, sibling: %d\n", + i, cpuvec[i].physical_id, cpuvec[i].core_id, cpuvec[i].sibling_id); + } + fflush(stderr); +} + + /* sort the list so that we can simply pick + * consecutive CPUs for effective thread pinning. */ +static int cpuinfo_sort(const void *x, const void *y) +{ + uint64_t a = cpuinfo2number((t_cpuinfo *)x); + uint64_t b = cpuinfo2number((t_cpuinfo *)y); + return (a > b) ? 1 : (a < b) ? -1 : 0; +} + +static void cpuinfo_done(void) +{ + if (sys_verbose) + cpuinfo_print(); /* print original list */ + /* sort the list */ + qsort(cpuvec, numcpus, sizeof(t_cpuinfo), cpuinfo_sort); +#if 0 + cpuinfo_print(); /* print sorted list (for debugging) */ +#endif +} + + /* 1: success, 0: failure */ +static int parse_hardware_topology(void) +{ + /* Make sure to call this only once. This is not really thread-safe, + * but in practice the function is called for the first time either in + * threadpool_init() or via sys_argparse() -> sys_set_audio_settings(). + * LATER replace with C11 call_once(). */ + static int initted = 0; + if (initted) + return (numcpus > 0); + initted = 1; + +#ifdef _WIN32 /* Windows */ + typedef BOOL (WINAPI *t_func)( + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + + t_func fn; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info; + DWORD err, size = 0; + int i, n; + + /* available since Windows XP SP3 */ + fn = (t_func)GetProcAddress( + GetModuleHandleA("kernel32"), "GetLogicalProcessorInformation"); + if (!fn) + { + fprintf(stderr, "GetLogicalProcessorInformation() not supported\n"); + return 0; + } + /* call with size 0 to retrieve actual size; + * ERROR_INSUFFICIENT_BUFFER is expected. */ + fn(NULL, &size); + if ((err = GetLastError()) != ERROR_INSUFFICIENT_BUFFER) + goto fail; + info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(size); + if (fn(info, &size) == FALSE) + { + err = GetLastError(); + free(info); + goto fail; + } + n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + for (i = 0; i < n; ++i) + { + if (info[i].Relationship == RelationProcessorCore) + { + /* add all siblings to CPU list */ + int j, nsiblings = 0; + ULONG_PTR mask = info[i].ProcessorMask; + for (j = 0; mask; j++, mask >>= 1) + { + if (mask & 1) + { + t_cpuinfo info = { 0, numcores, nsiblings, j }; + int index = numcpus++; + cpuvec = realloc(cpuvec, sizeof(t_cpuinfo) * numcpus); + cpuvec[index] = info; + nsiblings++; + } + } + numcores++; + } + } + /* loop again for physical packages */ + for (i = 0; i < n; ++i) + { + if (info[i].Relationship == RelationProcessorPackage) + { + int j, k; + ULONG_PTR mask = info[i].ProcessorMask; + /* loop over all processors and find corresponding t_cpuinfo */ + for (j = 0; mask; j++, mask >>= 1) + { + if (mask & 1) + { + for (k = 0; k < numcpus; ++k) + { + if (cpuvec[k].id == j) + cpuvec[k].physical_id = numpackages; + } + } + } + numpackages++; + } + } + free(info); + cpuinfo_done(); + return 1; +fail: + fprintf(stderr, "GetLogicalProcessorInformation() failed (%d)\n", err); + return 0; +#elif defined(__linux__) /* Linux */ + /* The file /proc/cpusinfo contains all logical CPUs where + * each entry has a property "physical id" and "core id". */ + t_cpuinfo cpu; + FILE *fp; + char *line = 0; + size_t len; + if (!(fp = fopen("/proc/cpuinfo", "r"))) + { + fprintf(stderr, "could not open /proc/cpuinfo\n"); + return 0; + } + cpu.physical_id = cpu.core_id = -1; + while (getline(&line, &len, fp) >= 0) + { + const char *pos, *colon; + if (len == 0) + continue; + + /* search for "physical id" and "core id" */ + if ((pos = strstr(line, "physical id"))) + { + if (!(colon = strchr(pos, ':')) || + (sscanf(colon + 1, "%d", &cpu.physical_id) < 1)) + goto fail; + } + else if ((pos = strstr(line, "core id"))) + { + if (!(colon = strchr(pos, ':')) || + (sscanf(colon + 1, "%d", &cpu.core_id) < 1)) + goto fail; + } + /* found both */ + if (cpu.physical_id >= 0 && cpu.core_id >= 0) + { + int i, found, index; + cpu.sibling_id = 0; + cpu.id = numcpus; + /* get sibling number */ + for (i = 0; i < numcpus; ++i) + { + if ((cpuvec[i].physical_id == cpu.physical_id) && + (cpuvec[i].core_id == cpu.core_id)) + { + cpu.sibling_id++; + } + } + if (cpu.sibling_id == 0) + numcores++; + /* check for new physical package */ + found = 0; + for (i = 0; i < numcpus; ++i) + { + if (cpuvec[i].physical_id == cpu.physical_id) + found = 1; + } + if (!found) + numpackages++; + index = numcpus++; + cpuvec = realloc(cpuvec, sizeof(t_cpuinfo) * numcpus); + cpuvec[index] = cpu; + + cpu.physical_id = cpu.core_id = -1; /* reset for next CPU */ + } + } + if (line) + free(line); + fclose(fp); + cpuinfo_done(); + return 1; +fail: + fprintf(stderr, "/proc/cpuinfo: unexpected format\n"); + fclose(fp); + if (line) + free(line); + if (cpuvec) + free(cpuvec); + cpuvec = NULL; + numcpus = 0; + numcores = 0; + numpackages = 0; + return 0; +#else /* Apple, BSDs, etc. */ + fprintf(stderr, "parsse_hardware_topology() not implemented\n"); + return 0; +#endif +} + + /* 0: failure */ +static int thread_hardware_concurrency(void) +{ +#if defined(_WIN32) + SYSTEM_INFO info; + memset(&info, 0, sizeof(info)); + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + int count; + size_t size = sizeof(count); + if (sysctlbyname("hw.ncpu", &count, &size, NULL, 0) == 0) + return count; + else + { + fprintf(stderr, "sysctlbyname() failed (%d)\n", errno); + return 0; + } +#elif defined(__SC_NPROCESSORS_ONLN) + int count = sysconf(_SC_NPROCESSORS_ONLN); + if (count > 0) + return count; + else + { + fprintf(stderr, "sysconf() failed (%d)\n", errno); + return 0; + } +#elif defined(__linux__) + return get_nprocs(); +#else + #warning "thread_hardware_concurrency() not implemented" + return 0; +#endif +} + + /* 0: failure */ +static int thread_physical_concurrency(void) +{ +#if defined(_WIN32) || defined(__linux__) + parse_hardware_topology(); /* see comment */ + return numcores; +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + int count; + size_t size = sizeof(count); + if (sysctlbyname("hw.physicalcpu", &count, &size, NULL, 0) == 0) + return count; + else + { + fprintf(stderr, "sysctlbyname() failed (%d)\n", errno); + return 0; + } +#else + #warning "thread_physical_concurrency() not implemented" + /* fall back to hardware concurrency */ + return thread_hardware_concurrency(); +#endif +} + + /* 1: success, 0: failure */ +static int thread_set_realtime(void) +{ +#if defined(_WIN32) + /* Force high thread priority in case we're not a high priority process. + * This might be necessary for libpd when using the internal thread pool. */ + int pc = GetPriorityClass(GetCurrentProcess()); + if (!pc) + { + fprintf(stderr, "GetPriorityClass() failed (%d)\n", GetLastError()); + return 0; + } + if (pc < HIGH_PRIORITY_CLASS) + { + if (!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL)) + { + fprintf(stderr, "SetThreadPriority() failed (%d)\n", GetLastError()); + return 0; + } + } + return 1; +#elif defined(__APPLE__) + /* Is SCHED_RR still appropriate? + * Should we use the Mach API instead? */ + struct sched_param param; + int policy = SCHED_RR; + int err; + param.sched_priority = 80; /* adjust 0 : 100 */ + + err = pthread_setschedparam(pthread_self(), policy, ¶m); + if (err) + { + fprintf(stderr, "pthread_setschedparam() failed (%d)\n", err); + return 0; + } + return 1; +#else /* Linux + BSD, see sys_set_priority() */ + struct sched_param par; + int p; +#ifdef USEAPI_JACK + p = sched_get_priority_min(SCHED_FIFO) + 5; +#else + p = sched_get_priority_max(SCHED_FIFO) - 7; +#endif + par.sched_priority = p; + if (sched_setscheduler(0, SCHED_FIFO, &par) < 0) + { + fprintf(stderr, "sched_setscheduler() failed (%d)\n", errno); + return 0; + } + return 1; +#endif +} + + /* 1: success, 0: failure */ +static int thread_set_affinity(int i) +{ +#if defined(_WIN32) + static THREADLOCAL DWORD_PTR original = 0; + if (i >= 0) /* pin to the given CPU */ + { + DWORD oldmask, newmask = (DWORD_PTR)1 << i; + oldmask = SetThreadAffinityMask(GetCurrentThread(), newmask); + if (oldmask == 0) + { + fprintf(stderr, "SetThreadAffinityMask() failed (%d)\n", GetLastError()); + return 0; + } + /* store original CPU mask (only the first time!) */ + if (!original) + original = oldmask; + } + else if (original) /* restore original mask */ + { + if (SetThreadAffinityMask(GetCurrentThread(), original) == 0) + { + fprintf(stderr, "SetThreadAffinityMask() failed (%d)\n", GetLastError()); + return 0; + } + } + return 1; +#elif defined(__linux__) + cpu_set_t cpuset, *ptr; + static THREADLOCAL cpu_set_t original; + static THREADLOCAL int initted = 0; + /* store original CPU set when we first enter this function */ + if (initted < 0) + return 0; /* init failed */ + if (!initted) + { + if (sched_getaffinity(0, sizeof(cpu_set_t), &original) != 0) + { + fprintf(stderr, "sched_getaffinity() failed (%d)\n", errno); + initted = -1; + return 0; + } + initted = 1; + } + if (i >= 0) /* pin to the given CPU */ + { + CPU_ZERO(&cpuset); + CPU_SET(i, &cpuset); + ptr = &cpuset; + } + else /* restore original CPU set */ + ptr = &original; + if (sched_setaffinity(0, sizeof(cpu_set_t), ptr) < 0) + { + fprintf(stderr, "sched_setaffinity() failed (%d)\n", errno); + return 0; + } + return 1; +#else + fprintf(stderr, "thread_set_affinity() not implemented\n"); + return 0; +#endif +} + +/* -------------------------- helper functions -------------------------- */ + +static int sys_maxnumdspthreads(void) +{ + /* only obtain once per thread (value is fixed) */ + static PERTHREAD int count = -1; + if (count < 0) + { + count = thread_hardware_concurrency(); + if (count <= 0) + { + fprintf(stderr, "thread_hardware_concurrency() failed; default to 1\n"); + count = 1; + } + } + return count; +} + + /* also used in sys_get_audio_settings() */ +int sys_defnumdspthreads(void) +{ + /* only obtain once per thread (value is fixed) */ + static PERTHREAD int count = -1; + if (count < 0) + { + #if 1 + /* use number of physical cores because SMT with + * all available CPUs can lead to worse performance */ + count = thread_physical_concurrency(); + if (count <= 0) + { + fprintf(stderr, "thread_physical_concurrency() failed, " + "use all available CPUs.\n"); + count = sys_maxnumdspthreads(); + } + #else + /* use all available CPUs. */ + count = sys_maxnumdspthreads(); + #endif + } + return count; +} + +static void dspthread_setrealtime(int index) +{ + if (thread_set_realtime()) + { + if (sys_verbose) + fprintf(stderr, "DSP thread %d: set realtime priority\n", index); + } + else + fprintf(stderr, "DSP thread %d: couldn't set realtime priority\n", index); +} + +static void dspthread_pin(int index, int pin) +{ + /* We only use thread pinning on Windows and Linux; + * on macOS we want to use audio workgroups instead. */ +#if defined(_WIN32) || defined(__linux__) + if (sys_threadaffinity && (numcpus > 0)) + { + if (index >= 0 && index < numcpus) + { + if (pin) /* pin to thread */ + { + /* see cpuinfo_sort() */ + int cpu = cpuvec[index].id; + if (thread_set_affinity(cpu)) + { + if (sys_verbose) + fprintf(stderr, "DSP thread %d: " + "pinned to CPU %d\n", index, cpu); + } + else + fprintf(stderr, "DSP thread %d: " + "could not pin to CPU %d\n", index, cpu); + } + else /* unpin */ + { + if (!thread_set_affinity(-1)) + fprintf(stderr, "DSP thread %d: could not unpin\n", index); + } + } + else + bug("dspthread_pin"); + } +#endif +} + +/* -------------------------- t_dspthreadpool --------------------------- */ + +typedef struct _backoff +{ + int b_n; +} t_backoff; + +#define BACKOFF_MINLOOPS 16 +#define BACKOFF_MAXLOOPS 4096 + +void backoff_reset(t_backoff *x) +{ + x->b_n = BACKOFF_MINLOOPS; +} + +void backoff_perform(t_backoff *x) +{ + int i, n = x->b_n; + for (i = 0; i < n; i++) + pause_cpu(); + x->b_n *= 2; + if (x->b_n > BACKOFF_MAXLOOPS) + x->b_n = BACKOFF_MAXLOOPS; +} + +typedef struct _dspthreadpool +{ +#ifdef MSVC_INTERLOCKED + long tp_running; +#else + atomic_int tp_running; +#endif + int tp_n; + pthread_t *tp_threads; + t_lockfree_stack tp_tasks; + t_fast_semaphore tp_sem; +#ifdef MSVC_INTERLOCKED + long tp_remaining; +#else + atomic_int tp_remaining; +#endif +} t_dspthreadpool; + +static t_dspthreadpool *d_threadpool = NULL; + +static void dspthread_dorun(int index); + +static void * thread_function(void *x) +{ + int index = (int)(intptr_t)x; + if (sys_hipriority != 0) /* -1 or 1 */ + dspthread_setrealtime(index); + if (!d_threadpool) + { + bug("DSP thread pool not initialized!"); + return 0; + } + if (index == 0) + { + bug("thread index 0 reserved for main audio thread!"); + return 0; + } + else if (index < 0 || index > d_threadpool->tp_n) + { + bug("thread index %d out of range!", index); + return 0; + } + + dspthread_dorun(index); + + return NULL; +} + +int sys_havedspthreadpool(void) +{ + return 1; +} + + /* called with global lock set! */ +static void dspthreadpool_init(void) +{ + if (!d_threadpool) + { + d_threadpool = (t_dspthreadpool *)getbytes(sizeof(t_dspthreadpool)); + d_threadpool->tp_running = 0; + d_threadpool->tp_n = 0; + d_threadpool->tp_threads = 0; + lockfree_stack_init(&d_threadpool->tp_tasks); + fast_semaphore_init(&d_threadpool->tp_sem); + d_threadpool->tp_remaining = 0; + /* for thread pinning */ + if (sys_threadaffinity) + parse_hardware_topology(); + } +} + +void dspthreadpool_stop(int external) +{ + int n = d_threadpool->tp_n; + if (!n) /* no threads or already stopped */ + return; + if (sys_verbose) + fprintf(stderr, "stop DSP thread pool\n"); +#ifdef MSVC_INTERLOCKED + _InterlockedExchange(&d_threadpool->tp_running, 0); +#else + atomic_store(&d_threadpool->tp_running, 0); +#endif + /* wake up helper threads */ + fast_semaphore_postn(&d_threadpool->tp_sem, n); + if (!external) + { + /* join helper threads */ + for (int i = 1; i < n; ++i) + pthread_join(d_threadpool->tp_threads[i], NULL); + } + if (d_threadpool->tp_threads) + freebytes(d_threadpool->tp_threads, sizeof(pthread_t) * n); + d_threadpool->tp_threads = 0; + d_threadpool->tp_n = 0; + + dspthread_pin(0, 0); /* unpin */ +} + +int sys_dspthreadpool_start(int *numthreads, int external) +{ + int n, maxnumthreads; + pd_globallock(); /* global lock begin */ + dspthreadpool_init(); + dspthreadpool_stop(external); + /* validate DSP thread count */ + if (!numthreads || *numthreads < 1) + n = sys_defnumdspthreads(); + else + n = *numthreads; + maxnumthreads = sys_maxnumdspthreads(); + if (n > maxnumthreads) + n = maxnumthreads; + if (numthreads) + *numthreads = n; + + if (sys_verbose) + fprintf(stderr, "start DSP thread pool (using %d of %d CPUs)\n", + n, maxnumthreads); + + n--; /* we already have 1 audio thread */ + + d_threadpool->tp_running = 1; + if (external) /* DSP threads are created and run by the user */ + { + d_threadpool->tp_threads = NULL; + d_threadpool->tp_n = n; + } + else /* use internal DSP threads */ + { + if (n > 0) /* multi-threaded */ + { + d_threadpool->tp_threads = (pthread_t *)getbytes(sizeof(pthread_t) * n); + d_threadpool->tp_n = n; + /* spawn new threads; index for DSP helper threads starts at 1 */ + for (int i = 0; i < n; ++i) + pthread_create(&d_threadpool->tp_threads[i], + NULL, thread_function, (void *)(intptr_t)(i + 1)); + /* only pin main thread if we actually have helper threads */ + dspthread_pin(0, 1); + } + else /* single threaded */ + { + d_threadpool->tp_threads = 0; + d_threadpool->tp_n = 0; + } + } + pd_globalunlock(); /* global lock end */ + return 1; +} + +int sys_dspthreadpool_stop(int external) +{ + pd_globallock(); + dspthreadpool_init(); + dspthreadpool_stop(external); + pd_globalunlock(); + return 1; +} + +void dspthreadpool_tick(int ntasks) +{ + if (ntasks > 0 && sys_threadspinwait && d_threadpool && d_threadpool->tp_n) + { + /* use atomic increment, so it also works with PDINSTANCE! */ + #ifdef MSVC_INTERLOCKED + int prev = _InterlockedExchangeAdd(&d_threadpool->tp_remaining, + ntasks); + #else + int prev = atomic_fetch_add(&d_threadpool->tp_remaining, ntasks); + #endif + /* only notify DSP helper threads if necessary */ + if (prev == 0) + fast_semaphore_postn(&d_threadpool->tp_sem, d_threadpool->tp_n); + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "-- DSP thread pool: start tick with %d active tasks\n", ntasks); + #endif + #ifndef PDINSTANCE + if (prev != 0) + pd_error(0, "DSP thread pool: bad task count (%d)", prev); + #endif + } +} + +static void dspthreadpool_push(t_dsptask *task) +{ + lockfree_stack_push(&d_threadpool->tp_tasks, task); +} + +static t_dsptask * dspthreadpool_pop(void) +{ + return lockfree_stack_pop(&d_threadpool->tp_tasks); +} + +static void dsptask_run(t_dsptask *x, int index); + +void dspthread_setindex(int index); +void mayer_init(void); +void mayer_term(void); + +static void dspthread_dorun(int index) +{ +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "DSP thread %d: start\n", index); +#endif + dspthread_setindex(index); + dspthread_pin(index, 1); + mayer_init(); /* init FFT */ + +#ifdef MSVC_INTERLOCKED + while (d_threadpool->tp_running) +#else + while (atomic_load_explicit(&d_threadpool->tp_running, + memory_order_relaxed)) +#endif + { + /* run as many tasks as possible */ + t_dsptask *t; + if (sys_threadspinwait) /* spin */ + { + int remaining; + t_backoff backoff; + backoff_reset(&backoff); + tryagain: + while ((t = dspthreadpool_pop())) + { + dsptask_run(t, index); + backoff_reset(&backoff); + } + #ifdef MSVC_INTERLOCKED + remaining = d_threadpool->tp_remaining); + #else + remaining = atomic_load_explicit( + &d_threadpool->tp_remaining, memory_order_acquire); + #endif + if (remaining > 0) + { + backoff_perform(&backoff); + goto tryagain; + } + /* wait for next tick (or quit) */ + } + else /* wait */ + { + while ((t = dspthreadpool_pop())) + dsptask_run(t, index); + /* wait for more tasks (or quit) */ + } + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "DSP thread %d: wait\n", index); + #endif + fast_semaphore_wait(&d_threadpool->tp_sem); + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "DSP thread %d: wake up\n", index); + #endif + } + + mayer_term(); /* term FFT */ +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "DSP thread %d: finish\n", index); +#endif +} + +int sys_dspthread_run(int index) +{ + if (!d_threadpool) + { + fprintf(stderr, "sys_dspthread_run: DSP thread pool not initialized!\n"); + return 0; + } + if (index == 0) + { + fprintf(stderr, "sys_dspthread_run: thread index 0 reserved for main audio thread!\n"); + return 0; + } + else if (index < 0 || index > d_threadpool->tp_n) + { + fprintf(stderr, "sys_dspthread_run: thread index %d out of range!\n", index); + return 0; + } + + dspthread_dorun(index); + + return 1; +} + +/* -------------------------- t_dsptaskqueue --------------------------- */ + +struct _dsptaskqueue +{ + int dq_numtasks; /* number of tasks, also doubles as reference count */ + int dq_numswitchoff; /* number of switched of tasks */ +#ifdef MSVC_INTERLOCKED + long dq_remaining; +#else + atomic_int dq_remaining; +#endif + t_fast_semaphore dq_sem; /* not needed for spinning */ + t_canvas *dq_owner; /* canvas or NULL */ + char dq_threadsafe; + char dq_warned; +}; + +t_dsptaskqueue * dsptaskqueue_new(t_canvas *owner) +{ + t_dsptaskqueue *x = (t_dsptaskqueue *)getbytes(sizeof(t_dsptaskqueue)); + x->dq_numtasks = 0; + x->dq_numswitchoff = 0; + x->dq_remaining = 0; + if (!sys_threadspinwait) + fast_semaphore_init(&x->dq_sem); + x->dq_owner = owner; + x->dq_threadsafe = 0; + x->dq_warned = 0; + return x; +} + + /* this is also called by dsptask_free(). we only free the queue + * when the reference count drops *below* zero. */ +void dsptaskqueue_release(t_dsptaskqueue *x) +{ + int oldcount = x->dq_numtasks--; + if (oldcount > 0) + { + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: %d tasks (%d switched off)\n", + x, oldcount-1, x->dq_numswitchoff); + #endif + } + else if (oldcount == 0) /* release queue */ + { + if (x->dq_numswitchoff != 0) + bug("dsptaskqueue_release: bad switch count (%d)", + x->dq_numswitchoff); + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: release\n"); + #endif + if (!sys_threadspinwait) + fast_semaphore_destroy(&x->dq_sem); + freebytes(x, sizeof(t_dsptaskqueue)); + } + else if (oldcount < 0) + bug("dsptaskqueue_release: bad refcount (%d)", oldcount); +} + + /* check if our sub-tree is thread-safe and cache the result. + * Called once per DSP graph update in ugen_start() and + * ugen_done_graph(); see also canvas_markthreadsafe(). */ +void dsptaskqueue_update(t_dsptaskqueue *x) +{ + x->dq_threadsafe = sys_threadsafe ? + canvas_isthreadsafe(x->dq_owner, 0) : 1; /* silent! */ + x->dq_warned = 0; +} + + /* check if our sub-tree is thread-safe, using the cached result + * of dsptaskqueue_update() above. Called by block~ objects + * associated with this queue, see ugen_done_graph(). */ +int dsptaskqueue_check(t_dsptaskqueue *x) +{ + if (x->dq_threadsafe) + return 1; + else + { + #if 1 + if (!x->dq_warned) /* only warn once per DSP task queue */ + #endif + { + if (canvas_isthreadsafe(x->dq_owner, 1)) /* loud */ + /* dq_threadsafe should have been true */ + bug("dsptaskqueue_check"); + x->dq_warned = 1; + } + return 0; + } +} + +void dsptaskqueue_reset(t_dsptaskqueue *x) +{ + int count = x->dq_numtasks - x->dq_numswitchoff; + if (count > 0) + { + x->dq_remaining = count; + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: reset with %d active tasks " + "(%d total, %d switched off)\n", + x, count, x->dq_numtasks, x->dq_numswitchoff); + #endif + } + else if (count < 0) + fprintf(stderr, "dsptaskqueue_reset: queue %p: bad task count (%d)\n", + x, count); +} + +static t_int *dsptaskqueue_doreset(t_int *w) +{ + t_dsptaskqueue *x = (t_dsptaskqueue *)w[1]; + dsptaskqueue_reset(x); + return w + 2; +} + +void dsp_add_reset(t_dsptaskqueue *x) +{ + dsp_add(dsptaskqueue_doreset, 1, x); +} + +void dsptaskqueue_join(t_dsptaskqueue *x) +{ + int count = x->dq_numtasks - x->dq_numswitchoff; + assert(count >= 0); + if (!d_threadpool || !d_threadpool->tp_n || !count) + /* single-threaded or no tasks, see also dsptask_sched() */ + return; +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: begin join\n", x); +#endif + /* We don't want to put the thread to sleep, so we first try to + * participate in DSP thread pool. + * NB: if PDINSTANCE defined, we might actually run tasks that + * belong to other Pd instances! LATER decide if we should push + * such tasks back to the queue? */ + if (sys_threadspinwait) /* spin */ + { + t_backoff backoff; + backoff_reset(&backoff); + #ifdef MSVC_INTERLOCKED + while (x->dq_remaining) + #else + while (atomic_load_explicit(&x->dq_remaining, + memory_order_relaxed)) + #endif + { + /* Pop and run a *single* task, then try again. + * Unlike in dspthread_dorun(), we do not pop tasks in a loop + * because we might end up running tasks that don't belong to + * this queue (and have a much later deadline). */ + t_dsptask *t = dspthreadpool_pop(); + if (t) + { + dsptask_run(t, 0); + backoff_reset(&backoff); + } + else + backoff_perform(&backoff); + } + /* decrement global task counter. + /* NB: we *could* simply decrement all tasks at once in dsp_tick(), + * but then the DSP helper threads would always spin for the whole + * duration of the tick. By doing it here we make sure that they + * go to sleep as soon as all tasks have finished. */ + #ifdef MSVC_INTERLOCKED + _InterlockedExchangeAdd(&d_threadpool->tp_remaining, -count); + #else + atomic_fetch_sub_explicit(&d_threadpool->tp_remaining, count, + memory_order_release); + #endif + } + else /* wait */ + { + while (!fast_semaphore_trywait(&x->dq_sem)) + { + /* Pop and run a *single* task, see explanation above. */ + t_dsptask *t = dspthreadpool_pop(); + if (t) + dsptask_run(t, 0); + else + { + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: wait\n", x); + #endif + fast_semaphore_wait(&x->dq_sem); + break; /* ! */ + } + } + } +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: end join\n", x); +#endif +} + +static t_int *dsptaskqueue_dojoin(t_int *w) +{ + t_dsptaskqueue *x = (t_dsptaskqueue *)w[1]; + dsptaskqueue_join(x); + return w + 2; +} + +void dsp_add_join(t_dsptaskqueue *x) +{ + dsp_add(dsptaskqueue_dojoin, 1, x); +} + +/* ---------------------------- t_dsptask ----------------------------- */ + +void ugen_addtask(t_dsptask *x); +void ugen_removetask(t_dsptask *x, int on); +void ugen_switchtask(t_dsptask *x, int on); + +struct _dsptask +{ + t_lfs_node dt_node; +#ifdef PDINSTANCE + t_pdinstance *dt_pdinstance; +#endif + t_dsptaskqueue *dt_queue; + t_dsptaskfn dt_fn; + void *dt_data; + int dt_switchoff; +}; + +t_dsptask * dsptask_new(t_dsptaskqueue *queue, t_dsptaskfn fn, void *data) +{ + t_dsptask *x = (t_dsptask *)getbytes(sizeof(t_dsptask)); + lfs_node_init(x); +#ifdef PDINSTANCE + x->dt_pdinstance = pd_this; +#endif + x->dt_queue = queue; + x->dt_fn = fn; + x->dt_data = data; + x->dt_switchoff = 0; + queue->dq_numtasks++; /* increment refcount */ +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: %d tasks (%d switched off)\n", + queue, queue->dq_numtasks, queue->dq_numswitchoff); +#endif + ugen_addtask(x); + return x; +} + +void dsptask_free(t_dsptask *x) +{ + /* make sure to decrement switch count! */ + if (x->dt_switchoff > 0) + { + if (--x->dt_queue->dq_numswitchoff < 0) + bug("dsptask_free: bad queue switch count (%d)", + x->dt_queue->dq_numswitchoff); + } + /* remove and free */ + ugen_removetask(x, x->dt_switchoff == 0); + dsptaskqueue_release(x->dt_queue); + freebytes(x, sizeof(t_dsptask)); +} + +void dsptask_sched(t_dsptask *x) +{ + if (d_threadpool && d_threadpool->tp_n > 0) + { + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: sched task %p\n", x->dt_queue, x); + #endif + dspthreadpool_push(x); + if (!sys_threadspinwait) + fast_semaphore_post(&d_threadpool->tp_sem); + } + else /* single-threaded */ + { + /* execute immediately, see dsptaskqueue_join(). + * NB: don't use dsptask_run() here! */ + (x->dt_fn)(x->dt_data); + } +} + +static void dsptask_run(t_dsptask *x, int index) +{ + t_dsptaskqueue *queue = x->dt_queue; + int remaining; +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: run task %p on thread %d\n", + queue, x, index); +#endif +#ifdef PDINSTANCE + pd_setinstance(x->dt_pdinstance); +#endif + assert(x->dt_switchoff == 0); + /* execute task */ + (x->dt_fn)(x->dt_data); + /* atomically decrement task counter */ +#ifdef MSVC_INTERLOCKED + remaining = _InterlockedDecrement(&queue->dq_remaining); /* returns new value! */ +#else + remaining = atomic_fetch_sub(&queue->dq_remaining, 1) - 1; +#endif +#ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: %d remaining tasks\n", queue, remaining); +#endif + if (!remaining) /* last task */ + { + if (!sys_threadspinwait) /* wait */ + { + /* last task, notify waiting main audio thread; + * see dsptaskqueue_join() */ + fast_semaphore_post(&queue->dq_sem); + } + } + else if (remaining < 0) + fprintf(stderr, "dsptask_run: queue %p: bad remaining task count (%d)\n", + queue, remaining); +} + +/* This is called whenever an enclosing switch~ object has changed state. + * Note that there can be several switch~ objects beyond this task; + * as soon as one of them is switched off, the DSP task won't run and it + * must notify the queue and DSP context to prevent them from locking up. + * Conversely, *all* enclosing switch~ objects must be switched on for + * the task to run (again), i.e. the counter must reach 0. */ +void dsptask_switch(t_dsptask *x, int on) +{ + t_dsptaskqueue *queue = x->dt_queue; + int state, oldstate = x->dt_switchoff > 0; + if (on) + { + if (--x->dt_switchoff < 0) + bug("dsptask_switch: bad switch count (%d)", x->dt_switchoff); + } + else + x->dt_switchoff++; + + state = x->dt_switchoff > 0; + if (oldstate != state) + { + /* only notify if the state has changed! */ + #ifdef DEBUG_DSPTHREADS + fprintf(stderr, "queue %p: switch %s task %p \n", + x->dt_queue, (on ? "on" : "off"), x); + #endif + if (on) /* off -> on */ + { + if (--queue->dq_numswitchoff < 0) + bug("dsptask_switch: bad queue switch count (%d)", + queue->dq_numswitchoff); + ugen_switchtask(x, 1); + } + else /* on -> off */ + { + if (++queue->dq_numswitchoff > queue->dq_numtasks) + bug("dsptask_switch: queue switch count (%d) " + "exceeds queue task count (%d)", + queue->dq_numswitchoff, queue->dq_numtasks); + ugen_switchtask(x, 0); + } + } +} + +#else /* PD_DSPTHREADS */ + +/* dummy implementations of public API functions */ + +int sys_havedspthreadpool(void) +{ + return 0; +} + +int sys_dspthreadpool_start(int *numthreads, int external) +{ + return 0; +} + +int sys_dspthreadpool_stop(int external) +{ + return 0; +} + +int sys_dspthread_run(int index) +{ + return 0; +} + +#endif /* PD_DSPTHREADS */ diff --git a/src/d_ugen.c b/src/d_ugen.c index 647183fa6e..e8d99aff63 100644 --- a/src/d_ugen.c +++ b/src/d_ugen.c @@ -13,7 +13,11 @@ #include "m_pd.h" #include "m_imp.h" +#include "s_stuff.h" #include +#if PD_DSPTHREADS +# include "s_sync.h" +#endif extern t_class *vinlet_class, *voutlet_class, *canvas_class, *text_class; @@ -22,27 +26,76 @@ EXTERN_STRUCT _voutlet; void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched); + int downsample, int upsample, int reblock, int switched, int parallel); void voutlet_dspprolog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched); + int downsample, int upsample, int reblock, int switched, int parallel); void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched); + int downsample, int upsample, int reblock, int switched, int parallel); + +/* ---------------------------- t_signalcontext ----------------------------- */ + +typedef struct _signalcontext +{ + t_signal *sc_signals; /* list of signals used by DSP chain */ + /* list of signals which can be reused, sorted by buffer size */ + t_signal *sc_freelist[MAXLOGSIG+1]; + /* list of reusable "borrowed" signals (which don't own sample buffers) */ + t_signal *sc_freeborrowed; +} t_signalcontext; + +t_signalcontext *signalcontext_new(void) +{ + t_signalcontext *x = (t_signalcontext *)getbytes(sizeof(t_signalcontext)); + x->sc_signals = 0; + return x; +} + + /* call this to free all the signals, e.g. before creating a new DSP graph */ +void signalcontext_clear(t_signalcontext *x) +{ + t_signal *sig; + int i; + while ((sig = x->sc_signals)) + { + x->sc_signals = sig->s_nextused; + if (!sig->s_isborrowed) + t_freebytes(sig->s_vec, sig->s_vecsize * sizeof (*sig->s_vec)); + t_freebytes(sig, sizeof(*sig)); + } + for (i = 0; i <= MAXLOGSIG; i++) + x->sc_freelist[i] = 0; + x->sc_freeborrowed = 0; +} + +void signalcontext_free(t_signalcontext *x) +{ + signalcontext_clear(x); + freebytes(x, sizeof(t_signalcontext)); +} + +t_signalcontext *signalcontext_current(void); +t_signalcontext *signalcontext_push(t_signalcontext *newcontext); +void signalcontext_pop(t_signalcontext *oldcontext); + +/* ---------------------------- t_instanceugen ----------------------------- */ struct _instanceugen { t_int *u_dspchain; /* DSP chain */ int u_dspchainsize; /* number of elements in DSP chain */ - t_signal *u_signals; /* list of signals used by DSP chain */ int u_sortno; /* number of DSP sortings so far */ - /* list of signals which can be reused, sorted by buffer size */ - t_signal *u_freelist[MAXLOGSIG+1]; - /* list of reusable "borrowed" signals (which don't own sample buffers) */ - t_signal *u_freeborrowed; int u_phase; int u_loud; - struct _dspcontext *u_context; + t_signalcontext *u_signals; /* global signal context */ + struct _dspcontext *u_context; /* current DSP context */ +#if PD_DSPTHREADS + t_dsptaskqueue *u_dspqueue; /* global DSP thread queue */ + t_lockfree_stack u_clocks; /* deferred clocks */ + int u_numtasks; /* total number of active DSP tasks */ + int u_numswitchtasks; /* number of switched off DSP tasks */ +#endif }; #define THIS (pd_this->pd_ugen) @@ -52,14 +105,32 @@ void d_ugen_newpdinstance(void) THIS = getbytes(sizeof(*THIS)); THIS->u_dspchain = 0; THIS->u_dspchainsize = 0; - THIS->u_signals = 0; + THIS->u_signals = signalcontext_new(); +#if PD_DSPTHREADS + THIS->u_dspqueue = dsptaskqueue_new(0); + lockfree_stack_init(&THIS->u_clocks); + THIS->u_numtasks = 0; + THIS->u_numswitchtasks = 0; +#endif } void d_ugen_freepdinstance(void) { + signalcontext_free(THIS->u_signals); +#if PD_DSPTHREADS + dsptaskqueue_release(THIS->u_dspqueue); +#endif freebytes(THIS, sizeof(*THIS)); } +#if PD_DSPTHREADS +void clock_defer(t_clock *x) +{ + /* push to main queue */ + lockfree_stack_push(&THIS->u_clocks, x); +} +#endif + t_int *zero_perform(t_int *w) /* zero out a vector */ { t_sample *out = (t_sample *)(w[1]); @@ -123,6 +194,10 @@ overlapping and buffering to deal with vector size changes. If we're switched but not reblocked, the inlet prolog is not needed, and the output epilog is ONLY run when the block is switched off; in this case the epilog code simply copies zeros to all signal outlets. + +Block~ also has a "parallel" method which will process the canvas in parallel. +It will run asynchronously with all subsequent canvasses, unless it is joined +by a parent canvas (with the "join" method). */ static t_class *block_class; @@ -143,6 +218,18 @@ typedef struct _block char x_switched; /* true if we're acting as a a switch */ char x_switchon; /* true if we're switched on */ char x_reblock; /* true if inlets and outlets are reblocking */ +#if PD_DSPTHREADS + char x_parallel; /* true if we are processing in parallel */ + char x_join; /* true if this canvas should join DSP tasks of subpatches */ + t_canvas *x_owner; /* owning canvas */ + t_signalcontext *x_signals; /* signal context for parallel processing */ + t_dsptask *x_task; /* DSP task for parallel processing */ + int x_taskonset; /* beginning of parallel task in the chain */ + int x_tasklength; /* length of parallel task */ + t_dsptaskqueue *x_dspqueue; /* maintain a DSP task queue and join tasks */ + t_dsptask **x_childtasks; /* child DSP tasks */ + int x_numchildtasks; /* number of child DSP tasks */ +#endif int x_upsample; /* upsampling-factor */ int x_downsample; /* downsampling-factor */ int x_return; /* stop right after this block (for one-shots) */ @@ -160,10 +247,36 @@ static void *block_new(t_floatarg fvecsize, t_floatarg foverlap, x->x_frequency = 1; x->x_switched = 0; x->x_switchon = 1; +#if PD_DSPTHREADS + x->x_parallel = 0; + x->x_join = 0; + x->x_owner = canvas_getcurrent(); + x->x_signals = 0; + x->x_task = 0; + x->x_taskonset = 0; + x->x_tasklength = 0, + x->x_dspqueue = 0; + x->x_childtasks = 0; + x->x_numchildtasks = 0; +#endif block_set(x, fvecsize, foverlap, fupsample); return (x); } +static void block_free(t_block *x) +{ +#if PD_DSPTHREADS + if (x->x_signals) + signalcontext_free(x->x_signals); + if (x->x_task) + dsptask_free(x->x_task); + if (x->x_dspqueue) + dsptaskqueue_release(x->x_dspqueue); + if (x->x_numchildtasks) + freebytes(x->x_childtasks, x->x_numchildtasks * sizeof(t_dsptask *)); +#endif +} + static void block_set(t_block *x, t_floatarg fcalcsize, t_floatarg foverlap, t_floatarg fupsample) { @@ -237,7 +350,20 @@ static void *switch_new(t_floatarg fvecsize, t_floatarg foverlap, static void block_float(t_block *x, t_floatarg f) { if (x->x_switched) + { + #if PD_DSPTHREADS + int i, oldstate = x->x_switchon, state = (f != 0); + x->x_switchon = state; + /* only do this if the state has changed! */ + if (state != oldstate) + { + for (i = 0; i < x->x_numchildtasks; i++) + dsptask_switch(x->x_childtasks[i], state); + } + #else x->x_switchon = (f != 0); + #endif + } } static void block_bang(t_block *x) @@ -296,6 +422,58 @@ static t_int *block_epilog(t_int *w) else return (w + EPILOGCALL); } +#if PD_DSPTHREADS + +static void switch_addtask(t_block *x, t_dsptask *t) +{ + int old = x->x_numchildtasks++; + x->x_childtasks = resizebytes(x->x_childtasks, + old * sizeof(t_dsptask *), x->x_numchildtasks * sizeof(t_dsptask *)); + x->x_childtasks[old] = t; + if (!x->x_switchon) + dsptask_switch(t, 0); /* switch off */ +} + +static void block_parallel(t_block *x, t_floatarg f) +{ + int par = f != 0; + if (par != x->x_parallel) + { + x->x_parallel = par; + canvas_update_dsp(); + } +} + +static void block_join(t_block *x, t_floatarg f) +{ + int join = f != 0; + if (join != x->x_join) + { + x->x_join = join; + if (x->x_dspqueue) + dsptaskqueue_release(x->x_dspqueue); + x->x_dspqueue = join ? dsptaskqueue_new(x->x_owner) : 0; + canvas_update_dsp(); + } +} + +static t_int *block_schedtask(t_int *w) +{ + t_block *x = (t_block *)w[1]; + dsptask_sched(x->x_task); + /* skip the DSP chain performed by block_runtask(). */ + return w + 2 + x->x_tasklength; +} + +static void block_runtask(t_block *x) +{ + t_int *ip = THIS->u_dspchain + x->x_taskonset; + while (ip) + ip = (*(t_perfroutine)(*ip))(ip); +} + +#endif /* PD_DSPTHREADS */ + static void block_dsp(t_block *x, t_signal **sp) { /* do nothing here */ @@ -303,12 +481,16 @@ static void block_dsp(t_block *x, t_signal **sp) void block_tilde_setup(void) { - block_class = class_new(gensym("block~"), (t_newmethod)block_new, 0, - sizeof(t_block), 0, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); + block_class = class_new(gensym("block~"), (t_newmethod)block_new, (t_method)block_free, + sizeof(t_block), CLASS_DEFAULT, A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addcreator((t_newmethod)switch_new, gensym("switch~"), A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); class_addmethod(block_class, (t_method)block_set, gensym("set"), A_DEFFLOAT, A_DEFFLOAT, A_DEFFLOAT, 0); +#if PD_DSPTHREADS + class_addmethod(block_class, (t_method)block_parallel, gensym("parallel"), A_FLOAT, 0); + class_addmethod(block_class, (t_method)block_join, gensym("join"), A_FLOAT, 0); +#endif class_addmethod(block_class, (t_method)block_dsp, gensym("dsp"), A_CANT, 0); class_addfloat(block_class, block_float); class_addbang(block_class, block_bang); @@ -316,7 +498,7 @@ void block_tilde_setup(void) /* ------------------ DSP call list ----------------------- */ -static t_int dsp_done(t_int *w) +t_int *dsp_done(t_int *w) { return (0); } @@ -359,13 +541,38 @@ void dsp_addv(t_perfroutine f, int n, t_int *vec) THIS->u_dspchainsize = newsize; } +#if PD_DSPTHREADS +void clock_dispatch(t_clock *x); +void dspthread_setindex(int index); +void dspthreadpool_tick(int ntasks); +#endif + void dsp_tick(void) { if (THIS->u_dspchain) { t_int *ip; + #if PD_DSPTHREADS + t_clock *c; + int count = THIS->u_numtasks - THIS->u_numswitchtasks; + if (count >= 0) + { + dspthread_setindex(0); /* just to be sure */ + dspthreadpool_tick(count); + dsptaskqueue_reset(THIS->u_dspqueue); + } + else + bug("dsp_tick: bad task count (%d) resp. switch count (%d)", + THIS->u_numtasks, THIS->u_numswitchtasks); + #endif for (ip = THIS->u_dspchain; ip; ) ip = (*(t_perfroutine)(*ip))(ip); THIS->u_phase++; + #if PD_DSPTHREADS + dsptaskqueue_join(THIS->u_dspqueue); + /* dispatch deferred clocks */ + if ((c = lockfree_stack_release(&THIS->u_clocks))) + clock_dispatch(c); + #endif } } @@ -383,31 +590,14 @@ int ilog2(int n) return (r); } - - /* call this when DSP is stopped to free all the signals */ -static void signal_cleanup(void) -{ - t_signal *sig; - int i; - while ((sig = THIS->u_signals)) - { - THIS->u_signals = sig->s_nextused; - if (!sig->s_isborrowed) - t_freebytes(sig->s_vec, sig->s_vecsize * sizeof (*sig->s_vec)); - t_freebytes(sig, sizeof *sig); - } - for (i = 0; i <= MAXLOGSIG; i++) - THIS->u_freelist[i] = 0; - THIS->u_freeborrowed = 0; -} - /* mark the signal "reusable." */ void signal_makereusable(t_signal *sig) { int logn = ilog2(sig->s_vecsize); + t_signalcontext *context = signalcontext_current(); #if 1 t_signal *s5; - for (s5 = THIS->u_freeborrowed; s5; s5 = s5->s_nextfree) + for (s5 = context->sc_freeborrowed; s5; s5 = s5->s_nextfree) { if (s5 == sig) { @@ -415,7 +605,7 @@ void signal_makereusable(t_signal *sig) return; } } - for (s5 = THIS->u_freelist[logn]; s5; s5 = s5->s_nextfree) + for (s5 = context->sc_freelist[logn]; s5; s5 = s5->s_nextfree) { if (s5 == sig) { @@ -435,16 +625,16 @@ void signal_makereusable(t_signal *sig) s2->s_refcount--; if (!s2->s_refcount) signal_makereusable(s2); - sig->s_nextfree = THIS->u_freeborrowed; - THIS->u_freeborrowed = sig; + sig->s_nextfree = context->sc_freeborrowed; + context->sc_freeborrowed = sig; } else { /* if it's a real signal (not borrowed), put it on the free list so we can reuse it. */ - if (THIS->u_freelist[logn] == sig) bug("signal_free 2"); - sig->s_nextfree = THIS->u_freelist[logn]; - THIS->u_freelist[logn] = sig; + if (context->sc_freelist[logn] == sig) bug("signal_free 2"); + sig->s_nextfree = context->sc_freelist[logn]; + context->sc_freelist[logn] = sig; } } @@ -455,6 +645,7 @@ void signal_makereusable(t_signal *sig) static t_signal *signal_new(int n, t_float sr) { int logn, vecsize = 0; + t_signalcontext *context = signalcontext_current(); t_signal *ret, **whichlist; logn = ilog2(n); if (n) @@ -463,10 +654,10 @@ static t_signal *signal_new(int n, t_float sr) vecsize *= 2; if (logn > MAXLOGSIG) bug("signal buffer too large"); - whichlist = THIS->u_freelist + logn; + whichlist = context->sc_freelist + logn; } else - whichlist = &THIS->u_freeborrowed; + whichlist = &context->sc_freeborrowed; /* first try to reclaim one from the free list */ if ((ret = *whichlist)) @@ -485,8 +676,8 @@ static t_signal *signal_new(int n, t_float sr) ret->s_vec = 0; ret->s_isborrowed = 1; } - ret->s_nextused = THIS->u_signals; - THIS->u_signals = ret; + ret->s_nextused = context->sc_signals; + context->sc_signals = ret; } ret->s_n = n; ret->s_vecsize = vecsize; @@ -564,16 +755,85 @@ struct _dspcontext int dc_ninlets; int dc_noutlets; t_signal **dc_iosigs; + t_signalcontext *dc_signals; t_float dc_srate; int dc_vecsize; /* vector size, power of two */ int dc_calcsize; /* number of elements to calculate */ char dc_toplevel; /* true if "iosigs" is invalid. */ - char dc_reblock; /* true if we have to reblock inlets/outlets */ - char dc_switched; /* true if we're switched */ + char dc_reblock; /* true if we have to reblock inlets/outlets. */ + char dc_switched; /* true if we're switched. */ + char dc_parallel; /* true if we're parallel. */ +#if PD_DSPTHREADS + t_dsptaskqueue *dc_dspqueue; /* current DSP task queue */ + t_block *dc_block; /* block~ object */ +#endif }; #define t_dspcontext struct _dspcontext + /* for clone object, see clone_dsp() */ +#if PD_DSPTHREADS + +void ugen_addtask(t_dsptask *x) +{ + t_dspcontext *dc; + THIS->u_numtasks++; + /* Add the DSP task to all enclosing switch~ objects */ + for (dc = THIS->u_context; dc; dc = dc->dc_parentcontext) + { + if (dc->dc_block && dc->dc_block->x_switched) /* switch~ */ + switch_addtask(dc->dc_block, x); + } +} + +void ugen_removetask(t_dsptask *x, int on) +{ + if (!on) + { + if (--THIS->u_numswitchtasks < 0) + bug("ugen_removetask: bad switch count (%d)", + THIS->u_numswitchtasks); + } + if (--THIS->u_numtasks < 0) + bug("ugen_removetask: bad task count (%d)", + THIS->u_numtasks); +} + +/* DSP task has been switched on or off */ +void ugen_switchtask(t_dsptask *x, int on) +{ + if (on) /* off -> on */ + { + if (--THIS->u_numswitchtasks < 0) + bug("block_float"); + } + else /* on -> off */ + THIS->u_numswitchtasks++; +} + + /* used in clone_dsp() */ +t_dsptaskqueue * dsptaskqueue_push(t_dsptaskqueue *newqueue) +{ + t_dsptaskqueue *old; + if (!THIS->u_context || !((old = THIS->u_context->dc_dspqueue))) + { + bug("dsptaskqueue_push"); + return 0; + } + THIS->u_context->dc_dspqueue = newqueue; + return old; +} + +void dsptaskqueue_pop(t_dsptaskqueue *oldqueue) +{ + if (THIS->u_context && THIS->u_context->dc_dspqueue) + THIS->u_context->dc_dspqueue = oldqueue; + else + bug("dsptaskqueue_pop"); +} + +#endif /* PD_DSPTHREADS */ + /* get a new signal for the current context - used by clone~ object */ t_signal *signal_newfromcontext(int borrowed) { @@ -581,6 +841,31 @@ t_signal *signal_newfromcontext(int borrowed) THIS->u_context->dc_srate)); } +t_signalcontext *signalcontext_current(void) +{ + return THIS->u_context->dc_signals; +} + +t_signalcontext *signalcontext_push(t_signalcontext *newcontext) +{ + t_signalcontext *old; + if (!THIS->u_context || !((old = THIS->u_context->dc_signals))) + { + bug("signalcontext_push"); + return 0; + } + THIS->u_context->dc_signals = newcontext; + return old; +} + +void signalcontext_pop(t_signalcontext *oldcontext) +{ + if (THIS->u_context && THIS->u_context->dc_signals) + THIS->u_context->dc_signals = oldcontext; + else + bug("signalcontext_pop"); +} + void ugen_stop(void) { if (THIS->u_dspchain) @@ -589,8 +874,7 @@ void ugen_stop(void) THIS->u_dspchainsize * sizeof (t_int)); THIS->u_dspchain = 0; } - signal_cleanup(); - + signalcontext_clear(THIS->u_signals); } void ugen_start(void) @@ -600,6 +884,12 @@ void ugen_start(void) THIS->u_dspchain = (t_int *)getbytes(sizeof(*THIS->u_dspchain)); THIS->u_dspchain[0] = (t_int)dsp_done; THIS->u_dspchainsize = 1; +#if PD_DSPTHREADS + /* first check and mark canvas tree */ + canvas_markthreadsafe(); + /* then update toplevel queue */ + dsptaskqueue_update(THIS->u_dspqueue); +#endif if (THIS->u_context) bug("ugen_start"); } @@ -608,6 +898,16 @@ int ugen_getsortno(void) return (THIS->u_sortno); } +t_int *ugen_getchain(void) +{ + return THIS->u_dspchain; +} + +int ugen_getsize(void) +{ + return THIS->u_dspchainsize; +} + #if 0 void glob_ugen_printstate(void *dummy, t_symbol *s, int argc, t_atom *argv) { @@ -653,6 +953,17 @@ t_dspcontext *ugen_start_graph(int toplevel, t_signal **sp, dc->dc_ninlets = ninlets; dc->dc_noutlets = noutlets; dc->dc_parentcontext = THIS->u_context; + /* use parent signal context by default. This might be overriden + * by block~ (see "parallel") or by signalcontext_push(). */ + dc->dc_signals = THIS->u_context ? THIS->u_context->dc_signals + : THIS->u_signals; +#if PD_DSPTHREADS + /* use parent DSP task queue by default. This might be overridden + * by block~ (see "join") or by dsptaskqueue_push(). */ + dc->dc_dspqueue = THIS->u_context ? THIS->u_context->dc_dspqueue + : THIS->u_dspqueue; + dc->dc_block = 0; +#endif THIS->u_context = dc; return (dc); } @@ -746,19 +1057,19 @@ static void ugen_doit(t_dspcontext *dc, t_ugenbox *u) t_sigoutconnect *oc; t_class *class = pd_class(&u->u_obj->ob_pd); int i, n; - /* suppress creating new signals for the outputs of signal - inlets and subpatches; except in the case we're an inlet and "blocking" - is set. We don't yet know if a subcanvas will be "blocking" so there + /* suppress creating new signals for the outputs of signal inlets and + subpatches; except in the case we're an inlet and "reblock" or "parallel" + is set. We don't yet know if a subcanvas will be "blocking" so there we delay new signal creation, which will be handled by calling signal_setborrowed in the ugen_done_graph routine below. */ int nonewsigs = (class == canvas_class || - ((class == vinlet_class) && !(dc->dc_reblock))); + ((class == vinlet_class) && !(dc->dc_reblock || dc->dc_parallel))); /* when we encounter a subcanvas or a signal outlet, suppress freeing - the input signals as they may be "borrowed" for the super or sub - patch; same exception as above, but also if we're "switched" we - have to do a copy rather than a borrow. */ + the input signals as they may be "borrowed" for the super or sub patch; + same exception as above, but also if we're "switched" we have to do a + copy rather than a borrow. */ int nofreesigs = (class == canvas_class || class == clone_class || - ((class == voutlet_class) && !(dc->dc_reblock || dc->dc_switched))); + ((class == voutlet_class) && !(dc->dc_reblock || dc->dc_parallel || dc->dc_switched))); t_signal **insig, **outsig, **sig, *s1, *s2, *s3; t_ugenbox *u2; @@ -788,10 +1099,10 @@ static void ugen_doit(t_dspcontext *dc, t_ugenbox *u) *sig = uin->i_signal; newrefcount = --(*sig)->s_refcount; /* if the reference count went to zero, we free the signal now, - unless it's a subcanvas or outlet; these might keep the - signal around to send to objects connected to them. In this - case we increment the reference count; the corresponding decrement - is in sig_makereusable(). */ + unless it's a subcanvas or voutlet (except reblocked or parallel); + these might keep the signal around to send to objects connected + to them. In this case we increment the reference count; + the corresponding decrement is in sig_makereusable(). */ if (nofreesigs) (*sig)->s_refcount++; else if (!newrefcount) @@ -799,13 +1110,15 @@ static void ugen_doit(t_dspcontext *dc, t_ugenbox *u) } for (sig = outsig, uout = u->u_out, i = u->u_nout; i--; sig++, uout++) { - /* similarly, for outlets of subcanvases we delay creating - them; instead we create "borrowed" ones so that the refcount - is known. The subcanvas replaces the fake signal with one showing - where the output data actually is, to avoid having to copy it. - For any other object, we just allocate a new output vector; - since we've already freed the inputs the objects might get called - "in place." */ + /* We delay creating outlets for subcanvasses or vinlets (except + reblocked or parallel); instead we create "borrowed" ones so that + the refcount is known. A subcanvas or vinlet will replace the fake + signal with one showing where the output data actually is, to avoid + having to copy it. + For any other objects, we just allocate a new output vector; since + we've already freed the inputs the objects might get called "in place." + For parallel processing, the signals for vinlet will be created in a + new signal context, so they are independent from the parent canvas. */ if (nonewsigs) { *sig = uout->o_signal = @@ -910,7 +1223,7 @@ void ugen_done_graph(t_dspcontext *dc) int chainblockbegin; /* DSP chain onset before block prolog code */ int chainblockend; /* and after block epilog code */ int chainafterall; /* and after signal outlet epilog */ - int reblock = 0, switched; + int reblock = 0, switched, parallel, join; int downsample = 1, upsample = 1; /* debugging printout */ @@ -984,6 +1297,58 @@ void ugen_done_graph(t_dspcontext *dc) (downsample != 1) || (upsample != 1)) reblock = 1; switched = blk->x_switched; + #if PD_DSPTHREADS + dc->dc_block = blk; + /* free old DSP task list */ + if (blk->x_numchildtasks) + { + freebytes(blk->x_childtasks, blk->x_numchildtasks * sizeof(t_dsptask *)); + blk->x_childtasks = 0; + blk->x_numchildtasks = 0; + } + /* always free existing DSP task! */ + if (blk->x_task) + { + dsptask_free(blk->x_task); + blk->x_task = 0; + } + parallel = blk->x_parallel; + join = blk->x_join; + if (parallel && reblock && parent_context) + { + /* the code for reblocking is rather complicated and I am not + * ready to combine it with parallel processing, so I just + * just disallow it for now. After all, users can simply wrap + * a reblocked canvas in a non-reblocked one. Note that we do + * allow block~ on root canvases (which count as reblocked) + * because we do not have to care about inlets~ and outlets~. */ + pd_error(blk, "reblocking + parallel processing not supported (yet)"); + parallel = 0; + } + if (parallel && join) + { + /* it doesn't make sense to use 'parallel' together with 'join', + * because the latter will force the former to run synchronously, + * preventing any kind of parallelism. */ + logpost(blk, PD_NORMAL, "block~: warning: using 'parallel' " + "and 'join' in the same canvas has no effect."); + } + /* first update queue */ + if (join) + dsptaskqueue_update(blk->x_dspqueue); + /* then check if we can safely run in parallel */ + if (parallel && !dsptaskqueue_check(dc->dc_dspqueue)) + { + /* see also clone_dsp() */ + pd_error(blk, "block~: parallel processing not possible because " + "some DSP objects are not officially thread-safe! Start Pd with " + "with -nothreadsafe to circumvent this check (potentially dangerous!)"); + parallel = 0; + } + #else + parallel = 0; + join = 0; + #endif } else { @@ -995,20 +1360,22 @@ void ugen_done_graph(t_dspcontext *dc) phase = 0; if (!parent_context) reblock = 1; switched = 0; + parallel = 0; + join = 0; } dc->dc_reblock = reblock; dc->dc_switched = switched; + dc->dc_parallel = parallel; dc->dc_srate = srate; dc->dc_vecsize = vecsize; dc->dc_calcsize = calcsize; - /* if we're reblocking or switched, we now have to create output - signals to fill in for the "borrowed" ones we have now. This - is also possibly true even if we're not blocked/switched, in - the case that there was a signal loop. But we don't know this - yet. */ + /* if we're reblocking, switched or parallel, we now have to create + output signals to fill in for the "borrowed" ones we have now. + This is also possibly true even if we're not blocked/switched, in + the case that there was a signal loop. But we don't know this */ - if (dc->dc_iosigs && (switched || reblock)) + if (dc->dc_iosigs && (switched || reblock || parallel)) { t_signal **sigp; for (i = 0, sigp = dc->dc_iosigs + dc->dc_ninlets; i < dc->dc_noutlets; @@ -1027,7 +1394,8 @@ void ugen_done_graph(t_dspcontext *dc) } if (THIS->u_loud) - post("reblock %d, switched %d", reblock, switched); + post("reblock %d, switched %d, parallel %d, join %d", + reblock, switched, parallel, join); /* schedule prologs for inlets and outlets. If the "reblock" flag is set, an inlet will put code on the DSP chain to copy its input @@ -1046,11 +1414,11 @@ void ugen_done_graph(t_dspcontext *dc) if (pd_class(zz) == vinlet_class) vinlet_dspprolog((struct _vinlet *)zz, dc->dc_iosigs, vecsize, calcsize, THIS->u_phase, period, frequency, - downsample, upsample, reblock, switched); + downsample, upsample, reblock, switched, parallel); else if (pd_class(zz) == voutlet_class) voutlet_dspprolog((struct _voutlet *)zz, outsigs, vecsize, calcsize, THIS->u_phase, period, frequency, - downsample, upsample, reblock, switched); + downsample, upsample, reblock, switched, parallel); } chainblockbegin = THIS->u_dspchainsize; @@ -1059,6 +1427,30 @@ void ugen_done_graph(t_dspcontext *dc) dsp_add(block_prolog, 1, blk); blk->x_chainonset = THIS->u_dspchainsize - 1; } +#if PD_DSPTHREADS + if (join) + { + /* this canvas manages its own DSP task queue. this part comes + * after the prolog, so that it gets skipped if we're switched off. */ + dc->dc_dspqueue = blk->x_dspqueue; + dsp_add_reset(blk->x_dspqueue); + } + if (parallel) + { + /* this canvas needs its own private signal context. */ + if (!blk->x_signals) + blk->x_signals = signalcontext_new(); /* create lazily */ + else + signalcontext_clear(blk->x_signals); + dc->dc_signals = blk->x_signals; + /* create new DSP task for this canvas on the current queue */ + blk->x_task = dsptask_new(dc->dc_dspqueue, (t_dsptaskfn)block_runtask, blk); + /* schedule task */ + dsp_add(block_schedtask, 1, blk); + blk->x_taskonset = THIS->u_dspchainsize - 1; + } +#endif /* PD_DSPTHREADS */ + /* Initialize for sorting */ for (u = dc->dc_ugenlist; u; u = u->u_next) { @@ -1110,6 +1502,25 @@ void ugen_done_graph(t_dspcontext *dc) break; /* don't need to keep looking. */ } +#if PD_DSPTHREADS + if (parallel) + { + /* add sentinel */ + dsp_add(dsp_done, 0); + /* save chain size, see block_pushtask(). */ + blk->x_tasklength = THIS->u_dspchainsize - blk->x_taskonset - 1; + if (THIS->u_loud) + post("parallel DSP task length: %d", blk->x_tasklength); + } + + if (join) + { + /* join DSP tasks managed by this canvas. this must come before + * the blockepilog, so that it gets skipped if we're switched off. */ + dsp_add_join(blk->x_dspqueue); + } +#endif /* PD_DSPTHREADS */ + if (blk && (reblock || switched)) /* add block DSP epilog */ dsp_add(block_epilog, 1, blk); chainblockend = THIS->u_dspchainsize; @@ -1125,7 +1536,7 @@ void ugen_done_graph(t_dspcontext *dc) if (iosigs) iosigs += dc->dc_ninlets; voutlet_dspepilog((struct _voutlet *)zz, iosigs, vecsize, calcsize, THIS->u_phase, period, frequency, - downsample, upsample, reblock, switched); + downsample, upsample, reblock, switched, parallel); } } diff --git a/src/g_array.c b/src/g_array.c index b78c7a6742..0b217122bf 100644 --- a/src/g_array.c +++ b/src/g_array.c @@ -6,6 +6,10 @@ #include /* for read/write to files */ #include "m_pd.h" #include "g_canvas.h" +#include "s_stuff.h" +#if PD_DSPTHREADS +# include "s_spinlock.h" +#endif #include /* jsarlo { */ @@ -115,6 +119,9 @@ struct _garray unsigned int x_listviewing:1; /* list view window is open */ unsigned int x_hidename:1; /* don't print name above graph */ unsigned int x_edit:1; /* we can edit the array */ +#if PD_DSPTHREADS + t_rwspinlock x_lock; +#endif }; static t_pd *garray_arraytemplatecanvas; /* written at setup w/ global lock */ @@ -176,6 +183,9 @@ static t_garray *graph_scalar(t_glist *gl, t_symbol *s, t_symbol *templatesym, x->x_edit = 1; glist_add(gl, &x->x_gobj); x->x_glist = gl; +#if PD_DSPTHREADS + rwspinlock_init(&x->x_lock); +#endif return (x); } @@ -753,11 +763,146 @@ const t_widgetbehavior garray_widgetbehavior = /* ----------------------- public functions -------------------- */ +/* legacy, use garrayref methods instead */ void garray_usedindsp(t_garray *x) { x->x_usedindsp = 1; } +void garrayref_init(t_garrayref *x) +{ + x->ar_garray = 0; + x->ar_stub = 0; +} + +void gstub_dis(t_gstub *gs); + +void garrayref_unset(t_garrayref *x) +{ + x->ar_garray = 0; + if (x->ar_stub) + { + gstub_dis(x->ar_stub); + x->ar_stub = 0; + } +} + +static int garrayref_findbyname(t_garrayref *x, t_symbol *name, t_object *object) +{ + t_garray *g; + t_array *a; + int npoints; + t_word *vec; + if (!(g = (t_garray *)pd_findbyclass(name, garray_class))) + { + if (object) + pd_error(object, "%s: %s: no such array", + class_getname(object->te_pd), name->s_name); + else + pd_error(0, "%s: no such array", name->s_name); + return 0; + } + if (!garray_getfloatwords(g, &npoints, &vec)) + { + if (object) + pd_error(object, "%s: bad template for %s", + name->s_name, class_getname(object->te_pd)); + else + pd_error(0, "%s: bad template", name->s_name); + return 0; + } + if (!(a = garray_getarray(g))) + return 0; + x->ar_garray = g; + if (x->ar_stub) gstub_dis(x->ar_stub); + x->ar_stub = a->a_stub; + a->a_stub->gs_refcount++; + return 1; +} + +int garrayref_set(t_garrayref *x, t_symbol *arrayname, t_object *object) +{ + /* ignore empty symbol */ + if (!(*arrayname->s_name && garrayref_findbyname(x, arrayname, object))) + { + garrayref_unset(x); + return 0; + } + return 1; +} + +int garrayref_check(t_garrayref *x) +{ + /* do we have a stub, and if yes, has it been cut off? */ + t_gstub *gs = x->ar_stub; + return gs && (gs->gs_which == GP_ARRAY); +} + + /* lazily initialize an garrayref by name and return the array data; + * if 'arrayname' is NULL, just fail silently. */ +int garrayref_get(t_garrayref *x, int *size, t_word **vec, + t_symbol *arrayname, t_object *object) +{ + t_array *a; + if (!garrayref_check(x)) + { + if (!arrayname || !garrayref_findbyname(x, arrayname, object)) + return 0; + } + a = x->ar_stub->gs_un.gs_array; + *vec = (t_word *)a->a_vec; + *size = a->a_n; + return 1; +} + + /* see m_pd.h. */ +#if !PD_PARALLEL +#undef garrayref_write_lock +#undef garrayref_write_unlock +#undef garrayref_read_lock +#undef garrayref_read_unlock +#endif + + /* garrayref_write_lock() and garrayref_read_lock() always fail + * silently if garrayref is empty or if the garray has been removed. + * In practice, adding/removing garrays triggers a DSP graph update, + * so we can (re)acquire the garray in our DSP method with garrayref_set(). + * NOTE: we avoid (un)setting the garrayref in the perform routine + * because it would require additional thread synchronization. */ +int garrayref_write_lock(t_garrayref *x, int *size, t_word **vec) +{ + if (!garrayref_get(x, size, vec, 0, 0)) + return 0; +#if PD_DSPTHREADS + rwspinlock_wrlock(&x->ar_garray->x_lock); +#endif + return 1; +} + +void garrayref_write_unlock(t_garrayref *x) +{ +#if PD_DSPTHREADS + rwspinlock_wrunlock(&x->ar_garray->x_lock); +#endif +} + +int garrayref_read_lock(t_garrayref *x, int *size, t_word **vec) +{ + if (!garrayref_get(x, size, vec, 0, 0)) + return 0; +#if PD_DSPTHREADS + rwspinlock_rdlock(&x->ar_garray->x_lock); +#endif + return 1; +} + +void garrayref_read_unlock(t_garrayref *x) +{ +#if PD_DSPTHREADS + rwspinlock_rdunlock(&x->ar_garray->x_lock); +#endif +} + static void garray_doredraw(t_gobj *client, t_glist *glist) { t_garray *x = (t_garray *)client; diff --git a/src/g_canvas.c b/src/g_canvas.c index 7ecf34711f..49ed75bace 100644 --- a/src/g_canvas.c +++ b/src/g_canvas.c @@ -500,6 +500,12 @@ t_canvas *canvas_new(void *dummy, t_symbol *sel, int argc, t_atom *argv) canvas_dosetbounds(x, xloc, yloc, xloc + width, yloc + height); x->gl_owner = owner; x->gl_isclone = 0; +#if PD_DSPTHREADS + /* if started with -nothreadsafe, we pretend to be thread-safe. */ + x->gl_threadsafe = !sys_threadsafe; +#else + x->gl_threadsafe = 0; +#endif x->gl_name = (*s->s_name ? s : (THISGUI->i_newfilename ? THISGUI->i_newfilename : gensym("Pd"))); canvas_bind(x); @@ -1335,6 +1341,132 @@ void ugen_connect(t_dspcontext *dc, t_object *x1, int outno, t_object *x2, int inno); void ugen_done_graph(t_dspcontext *dc); +#if PD_DSPTHREADS + +int clone_isthreadsafe(t_pd *x, t_symbol *dspsym, int *limit); + + /* also called by clone_isthreadsafe() */ +int obj_isthreadsafe(t_gobj *x, t_symbol *dspsym, int *limit) +{ + if (x->g_pd == canvas_class) + { + t_canvas *c = (t_canvas *)x; + /* -threadsafe -> use cached result of canvas_markthreadsafe(); + * -nothreadsafe -> always true, see canvas_new(). */ + if (c->gl_threadsafe) + return 1; + else if (!limit) + return 0; + else /* find offending objects */ + { + t_canvas *canvas = (t_canvas *)x; + t_gobj *y; + for (y = canvas->gl_list; y && (*limit > 0); y = y->g_next) + obj_isthreadsafe(y, dspsym, limit); + return 0; + } + } + else if (x->g_pd == clone_class) + return clone_isthreadsafe(&x->g_pd, dspsym, limit); + else + { /* zgetfn() comes last because it's the most expensive check */ + if (x->g_pd->c_patchable && !x->g_pd->c_threadsafe + && zgetfn(&x->g_pd, dspsym)) + { + /* LATER get rid of duplicate warnings for the same class */ + if (limit && *limit > 0) + { + logpost(x, PD_NORMAL, "warning: %s is not thread-safe!", + class_getname(x->g_pd)); + if (--(*limit) == 0) /* hit limit */ + logpost(0, PD_NORMAL, "..."); + } + return 0; + } else + return 1; + } +} + +#define THREADSAFE_WARN_MAX 10 + + /* check if all DSP objects starting at the given canvas + * are thread-safe; if 'x' is NULL, check all root canvases. */ +int canvas_isthreadsafe(t_canvas *x, int loud) +{ + t_symbol *dspsym = gensym("dsp"); + int limit = THREADSAFE_WARN_MAX; + if (x) + return obj_isthreadsafe((t_gobj *)x, dspsym, loud ? &limit : 0); + else /* root canvases */ + { + int threadsafe = 1; + t_canvas *y; + for (y = pd_getcanvaslist(); y; y = y->gl_next) + { + if (!obj_isthreadsafe((t_gobj *)y, dspsym, loud ? &limit : 0)) + { + threadsafe = 0; + if (!loud || !limit) + break; + } + } + return threadsafe; + } +} + +int clone_markthreadsafe(t_pd *x, t_symbol *dspsym); + + /* also called by clone_markthreadsafe() */ +int obj_markthreadsafe(t_gobj *x, t_symbol *dspsym) +{ + if (x->g_pd == canvas_class) + { + t_canvas *c = (t_canvas *)x; + t_gobj *y; + c->gl_threadsafe = 1; + for (y = c->gl_list; y; y = y->g_next) + { + if (!obj_markthreadsafe(y, dspsym)) + c->gl_threadsafe = 0; /* don't break! */ + } + #if 0 + post("canvas %p (parent: %p) threadsafe: %d", + c, c->gl_owner, c->gl_threadsafe); + #endif + return c->gl_threadsafe; + } + else if (x->g_pd == clone_class) + return clone_markthreadsafe(&x->g_pd, dspsym); + else /* zgetfn() comes last because it's the most expensive check */ + return !(x->g_pd->c_patchable && !x->g_pd->c_threadsafe + && zgetfn(&x->g_pd, dspsym)); +} + + /* traverse canvas tree and mark every sub-tree (depth first). + * This mitigates O(n^2) complexity when calling canvas_isthreadsafe() + * repeatedly via dsptaskqueue_update() and dsptaskqueue_check(). */ +int canvas_markthreadsafe(void) +{ + /* when started with -nothreadsafe, gl_threadsafe will always + * be true, see canvas_new() and clone_new(). */ + if (!sys_threadsafe) + return 1; + else + { + t_symbol *dspsym = gensym("dsp"); + int threadsafe = 1; + t_canvas *y; + for (y = pd_getcanvaslist(); y; y = y->gl_next) + { + if (!obj_markthreadsafe((t_gobj *)y, dspsym)) + threadsafe = 0; /* don't break! */ + } + return threadsafe; + } +} + +#endif /* PD_DSPTHREADS */ + /* schedule one canvas for DSP. This is called below for all "root" canvases, but is also called from the "dsp" method for sub- canvases, which are treated almost like any other tilde object. */ @@ -2023,7 +2155,7 @@ void g_canvas_setup(void) /* we prevent the user from typing "canvas" in an object box by sending 0 for a creator function. */ canvas_class = class_new(gensym("canvas"), 0, - (t_method)canvas_free, sizeof(t_canvas), CLASS_NOINLET, 0); + (t_method)canvas_free, sizeof(t_canvas), CLASS_THREADSAFE | CLASS_NOINLET, 0); /* here is the real creator function, invoked in patch files by sending the "canvas" message to #N, which is bound to pd_camvasmaker. */ diff --git a/src/g_canvas.h b/src/g_canvas.h index 7822fd40d0..a567b52145 100644 --- a/src/g_canvas.h +++ b/src/g_canvas.h @@ -203,6 +203,7 @@ struct _glist unsigned int gl_hidetext:1; /* hide object-name + args when doing graph on parent */ unsigned int gl_private:1; /* private flag used in x_scalar.c */ unsigned int gl_isclone:1; /* exists as part of a clone object */ + unsigned int gl_threadsafe:1; /* is this canvas and all its subcanvases threadsafe */ int gl_zoom; /* zoom factor (integer zoom-in only) */ void *gl_privatedata; /* private data */ }; diff --git a/src/g_clone.c b/src/g_clone.c index 4d4397e831..23f621f73d 100644 --- a/src/g_clone.c +++ b/src/g_clone.c @@ -1,6 +1,7 @@ #include "m_pd.h" #include "g_canvas.h" #include "m_imp.h" +#include "s_stuff.h" #include /* ---------- clone - maintain copies of a patch ----------------- */ @@ -22,10 +23,30 @@ t_class *clone_class; static t_class *clone_in_class, *clone_out_class; +#if PD_DSPTHREADS + +typedef struct _signalcontext t_signalcontext; + +t_signalcontext *signalcontext_new(void); +void signalcontext_free(t_signalcontext *x); +void signalcontext_clear(t_signalcontext *x); +t_signalcontext *signalcontext_push(t_signalcontext *newcontext); +void signalcontext_pop(t_signalcontext *oldcontext); + +t_dsptaskqueue * dsptaskqueue_push(t_dsptaskqueue *newqueue); +void dsptaskqueue_pop(t_dsptaskqueue *oldqueue); + +#endif /* PD_DSPTHREADS */ + typedef struct _copy { t_glist *c_gl; - int c_on; /* DSP running */ +#if PD_DSPTHREADS + t_dsptask *c_task; + int c_chainonset; + int c_chainlength; + t_signalcontext *c_sigcontext; +#endif } t_copy; typedef struct _in @@ -42,6 +63,9 @@ typedef struct _out t_outlet *o_outlet; int o_signal; int o_n; +#if PD_DSPTHREADS + t_signal *o_outsig; +#endif } t_out; typedef struct _clone @@ -58,9 +82,60 @@ typedef struct _clone t_atom *x_argv; int x_phase; int x_startvoice; /* number of first voice, 0 by default */ - int x_suppressvoice; /* suppress voice number as $1 arg */ + char x_suppressvoice; /* suppress voice number as $1 arg */ +#if PD_DSPTHREADS + char x_parallel; /* process in parallel */ + char x_threadsafe; /* are we thread-safe? */ + t_dsptaskqueue *x_dspqueue; /* DSP task queue */ +#endif } t_clone; +#if PD_DSPTHREADS + +int obj_markthreadsafe(t_gobj *x, t_symbol *dspsym); + + /* called by obj_markthreadsafe() */ +int clone_markthreadsafe(t_pd *z, t_symbol *dspsym) +{ + t_clone *x = (t_clone *)z; + int i; + x->x_threadsafe = 1; + for (i = 0; i < x->x_n; i++) + { + t_gobj *obj = (t_gobj *)x->x_vec[i].c_gl; + if (!obj_markthreadsafe(obj, dspsym)) + x->x_threadsafe = 0; /* don't break! */ + } + return x->x_threadsafe; +} + +int obj_isthreadsafe(t_gobj *x, t_symbol *dspsym, int *limit); + + /* called by obj_isthreadsafe() */ +int clone_isthreadsafe(t_pd *z, t_symbol *dspsym, int *limit) +{ + t_clone *x = (t_clone *)z; + if (x->x_threadsafe) + return 1; + else if (!limit) + return 0; + else + { + /* only search for the first offending canvas; the loop is + * necessary because of live editing and dynamic patching! */ + int i; + for (i = 0; i < x->x_n; i++) + { + t_gobj *obj = (t_gobj *)x->x_vec[i].c_gl; + if (!obj_isthreadsafe(obj, dspsym, limit)) + break; + } + return 0; + } +} + +#endif /* PD_DSPTHREADS */ + int clone_match(t_pd *z, t_symbol *name, t_symbol *dir) { t_clone *x = (t_clone *)z; @@ -146,6 +221,18 @@ static void clone_in_fwd(t_in *x, t_symbol *s, int argc, t_atom *argv) typedmess(&x->i_pd, argv->a_w.w_symbol, argc-1, argv+1); } +#if PD_DSPTHREADS +static void clone_in_parallel(t_in *x, t_floatarg f) +{ + int par = f != 0; + if (par != x->i_owner->x_parallel) + { + x->i_owner->x_parallel = par; + canvas_update_dsp(); + } +} +#endif /* PD_DSPTHREADS */ + static void clone_out_anything(t_out *x, t_symbol *s, int argc, t_atom *argv) { t_atom *outv; @@ -176,17 +263,29 @@ static void clone_free(t_clone *x) } for (i = 0; i < x->x_n; i++) { - canvas_closebang(x->x_vec[i].c_gl); - pd_free(&x->x_vec[i].c_gl->gl_pd); + t_copy *copy = &x->x_vec[i]; + canvas_closebang(copy->c_gl); + pd_free(©->c_gl->gl_pd); t_freebytes(x->x_outvec[i], x->x_nout * sizeof(*x->x_outvec[i])); + #if PD_DSPTHREADS + if (copy->c_sigcontext) + signalcontext_free(copy->c_sigcontext); + if (copy->c_task) + dsptask_free(copy->c_task); + #endif } t_freebytes(x->x_vec, x->x_n * sizeof(*x->x_vec)); t_freebytes(x->x_argv, x->x_argc * sizeof(*x->x_argv)); t_freebytes(x->x_invec, x->x_nin * sizeof(*x->x_invec)); t_freebytes(x->x_outvec, x->x_n * sizeof(*x->x_outvec)); clone_voicetovis = voicetovis; + } +#if PD_DSPTHREADS + if (x->x_dspqueue) + dsptaskqueue_release(x->x_dspqueue); +#endif } static t_canvas *clone_makeone(t_symbol *s, int argc, t_atom *argv) @@ -228,6 +327,7 @@ void clone_setn(t_clone *x, t_floatarg f) { t_canvas *c; t_out *outvec; + t_copy *copy; SETFLOAT(x->x_argv, x->x_startvoice + i); if (!(c = clone_makeone(x->x_s, x->x_argc - x->x_suppressvoice, x->x_argv + x->x_suppressvoice))) @@ -237,8 +337,14 @@ void clone_setn(t_clone *x, t_floatarg f) } x->x_vec = (t_copy *)t_resizebytes(x->x_vec, i * sizeof(t_copy), (i+1) * sizeof(t_copy)); - x->x_vec[i].c_gl = c; - x->x_vec[i].c_on = 0; + copy = &x->x_vec[i]; + copy->c_gl = c; + #if PD_DSPTHREADS + copy->c_task = 0; + copy->c_chainonset = 0; + copy->c_chainlength = 0; + copy->c_sigcontext = 0; + #endif x->x_outvec = (t_out **)t_resizebytes(x->x_outvec, i * sizeof(*x->x_outvec), (i+1) * sizeof(*x->x_outvec)); x->x_outvec[i] = outvec = @@ -249,6 +355,9 @@ void clone_setn(t_clone *x, t_floatarg f) outvec[j].o_signal = obj_issignaloutlet(&x->x_vec[0].c_gl->gl_obj, i); outvec[j].o_n = x->x_startvoice + i; + #if PD_DSPTHREADS + outvec[j].o_outsig = 0; + #endif outvec[j].o_outlet = x->x_outvec[0][j].o_outlet; obj_connect(&x->x_vec[i].c_gl->gl_obj, j, @@ -260,8 +369,16 @@ void clone_setn(t_clone *x, t_floatarg f) { for (i = wantn; i < nwas; i++) { - canvas_closebang(x->x_vec[i].c_gl); - pd_free(&x->x_vec[i].c_gl->gl_pd); + t_copy *copy = &x->x_vec[i]; + canvas_closebang(copy->c_gl); + pd_free(©->c_gl->gl_pd); + t_freebytes(x->x_outvec[i], x->x_nout * sizeof(*x->x_outvec[i])); + #if PD_DSPTHREADS + if (copy->c_sigcontext) + signalcontext_free(copy->c_sigcontext); + if (copy->c_task) + dsptask_free(copy->c_task); + #endif } x->x_vec = (t_copy *)t_resizebytes(x->x_vec, nwas * sizeof(t_copy), wantn * sizeof(*x->x_vec)); @@ -294,10 +411,35 @@ void canvas_dodsp(t_canvas *x, int toplevel, t_signal **sp); t_signal *signal_newfromcontext(int borrowed); void signal_makereusable(t_signal *sig); +#if PD_DSPTHREADS +t_int *ugen_getchain(void); +int ugen_getsize(void); +t_int *dsp_done(t_int *w); + +static t_int *clone_schedtask(t_int *w) +{ + t_copy *x = (t_copy *)w[1]; + dsptask_sched(x->c_task); + /* skip the DSP chain performed by clone_runtask(). */ + return w + 2 + x->c_chainlength; +} + +static void clone_runtask(t_copy *x) +{ + t_int *ip = ugen_getchain() + x->c_chainonset; + while (ip) + ip = (*(t_perfroutine)(*ip))(ip); +} + +#endif /* PD_DSPTHREADS */ + static void clone_dsp(t_clone *x, t_signal **sp) { int i, j, nin, nout; t_signal **tempsigs, **tempio; +#if PD_DSPTHREADS + int parallel = x->x_parallel; +#endif if (!x->x_n) return; for (i = nin = 0; i < x->x_nin; i++) @@ -319,6 +461,121 @@ static void clone_dsp(t_clone *x, t_signal **sp) return; } } +#if PD_DSPTHREADS + /* always free existing DSP tasks! */ + for (i = 0; i < x->x_n; i++) + { + if (x->x_vec[i].c_task) + { + dsptask_free(x->x_vec[i].c_task); + x->x_vec[i].c_task = 0; + } + } + if (parallel) + { + if (!x->x_dspqueue) /* create lazily */ + x->x_dspqueue = dsptaskqueue_new(0); + /* check thread-safety; unlike block~ in ugen_done_graph(), + * we don't use dsptaskqueue_update() and dsptaskqueue_check() + * because we already have all the information we need. */ + if (!x->x_threadsafe) + { + /* only search for the first offending canvas; the loop is + * necessary because of live editing and dynamic patching! */ + int i; + for (i = 0; i < x->x_n; i++) + { + if (!canvas_isthreadsafe(x->x_vec[i].c_gl, 1)) /* loud */ + break; + } + /* see also ugen_done_graph() */ + pd_error(x, "clone: parallel processing not possible because " + "some DSP objects are not officially thread-safe! Start Pd with " + "with -nothreadsafe to circumvent this check (potentially dangerous!)"); + + parallel = 0; + } + } + if (parallel) + { + /* Every child abstraction gets its own DSP task. Unlike block~ + "parallel", + * cloned abstractions are not aware that they are being processed in parallel. + * Since all DSP tasks are joined by us, there is no need for double buffering + * in voutlet, and consequently there is no delay, either. + * The clone object maintains its own DSP task queue. Each cloned instance also has + * its own signal context because signals must not be reused across child abstractions. + * Each child abstractions starts with new input signals which are copies of our + * input signals, but belong to a dedicated signal context. After we have processed + * and joined all child abstractions, we can simply sum their output signals into + * our output signals. */ + int blocksize = sp[0]->s_n; + t_dsptaskqueue *oldqueue; + /* push our queue to the current DSP context */ + oldqueue = dsptaskqueue_push(x->x_dspqueue); + /* reset queue */ + dsp_add_reset(x->x_dspqueue); + /* schedule canvases as tasks. */ + for (j = 0; j < x->x_n; j++) + { + t_copy *copy = &x->x_vec[j]; + t_out *outvec = x->x_outvec[j]; + t_signal **tempio; + /* push new signal context, so that signals are not reused concurrently. */ + t_signalcontext *oldsigcontext; + if (!copy->c_sigcontext) + copy->c_sigcontext = signalcontext_new(); /* create lazily */ + else + signalcontext_clear(copy->c_sigcontext); + oldsigcontext = signalcontext_push(copy->c_sigcontext); + tempio = alloca((nin + nout) * sizeof(t_signal *)); + /* create input signals (in the new context) */ + for (i = 0; i < nin; ++i) + tempio[i] = signal_newfromcontext(0); + for (i = 0; i < nout; ++i) + /* create "fake" output signals which will be filled later by voutlet + * in the child abstraction; normally this would be done in ugen_doit(). */ + outvec[i].o_outsig = tempio[nin + i] = signal_newfromcontext(1); + /* create new DSP task */ + copy->c_task = dsptask_new(x->x_dspqueue, (t_dsptaskfn)clone_runtask, copy); + dsp_add(clone_schedtask, 1, copy); + copy->c_chainonset = ugen_getsize() - 1; + /* copy parent input signals to our input signals. we can already do this + * concurrently because nobody is writing to the parent input signal. */ + for (i = 0; i < nin; ++i) + dsp_add_copy(sp[i]->s_vec, tempio[i]->s_vec, blocksize); + /* now we can process the child abstraction. */ + canvas_dodsp(copy->c_gl, 0, tempio); + dsp_add(dsp_done, 0); /* sentinel */ + copy->c_chainlength = ugen_getsize() - copy->c_chainonset - 1; + #if 0 /* not necessary; nobody will (re)use our signals. */ + for (i = 0; i < (nin + nout); ++i) + signal_makereusable(tempio[i]); + #endif + /* restore signal context. */ + signalcontext_pop(oldsigcontext); + } + /* join all tasks */ + dsp_add_join(x->x_dspqueue); + /* Finally we can sum the outputs. Unlike "regular" clone, we can directly write + * to the output signals because the input signals have already been copied. */ + for (j = 0; j < x->x_n; j++) + { + for (i = 0; i < nout; i++) + { + t_sample *from = x->x_outvec[j][i].o_outsig->s_vec; + t_sample *to = sp[nin + i]->s_vec; + if (j == 0) + dsp_add_copy(from, to, blocksize); + else + dsp_add_plus(from, to, to, blocksize); + } + } + /* restore the previous DSP queue */ + dsptaskqueue_pop(oldqueue); + + return; /* done */ + } +#endif /* PD_DSPTHREADS */ tempsigs = (t_signal **)alloca((nin + 2 * nout) * sizeof(*tempsigs)); tempio = tempsigs + nout; /* load input signals into signal vector to send subpatches */ @@ -329,15 +586,19 @@ static void clone_dsp(t_clone *x, t_signal **sp) sp[i]->s_refcount += x->x_n-1; tempio[i] = sp[i]; } - /* for first copy, write output to first nout temp sigs */ + /* create temp signals to safely sum the outputs of each canvas + * without overwriting the input. */ for (i = 0; i < nout; i++) tempsigs[i] = signal_newfromcontext(0); for (j = 0; j < x->x_n; j++) { + /* create "fake" output signals which will be filled later by voutlet + * in the child abstraction; normally this would be done in ugen_doit(). */ for (i = 0; i < nout; i++) tempio[nin + i] = signal_newfromcontext(1); canvas_dodsp(x->x_vec[j].c_gl, 0, tempio); + /* sum output signals to temp signals */ for (i = 0; i < nout; i++) { if (j == 0) @@ -348,7 +609,7 @@ static void clone_dsp(t_clone *x, t_signal **sp) signal_makereusable(tempio[nin + i]); } } - /* copy to output signsls */ + /* copy temp signals to our output signals */ for (i = 0; i < nout; i++) { dsp_add_copy(tempsigs[i]->s_vec, sp[nin+i]->s_vec, tempsigs[i]->s_n); @@ -366,6 +627,11 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) x->x_outvec = 0; x->x_startvoice = 0; x->x_suppressvoice = 0; +#if PD_DSPTHREADS + x->x_parallel = 0; + x->x_threadsafe = !sys_threadsafe; /* see canvas_new() */ + x->x_dspqueue = 0; +#endif clone_voicetovis = -1; if (argc == 0) { @@ -405,6 +671,12 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) goto fail; x->x_vec = (t_copy *)getbytes(sizeof(*x->x_vec)); x->x_vec[0].c_gl = c; +#if PD_DSPTHREADS + x->x_vec[0].c_task = 0; + x->x_vec[0].c_chainonset = 0; + x->x_vec[0].c_chainlength = 0; + x->x_vec[0].c_sigcontext = 0; +#endif x->x_n = 1; x->x_nin = obj_ninlets(&x->x_vec[0].c_gl->gl_obj); x->x_invec = (t_in *)getbytes(x->x_nin * sizeof(*x->x_invec)); @@ -430,6 +702,9 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) outvec[i].o_signal = obj_issignaloutlet(&x->x_vec[0].c_gl->gl_obj, i); outvec[i].o_n = x->x_startvoice; + #if PD_DSPTHREADS + outvec[i].o_outsig = 0; + #endif outvec[i].o_outlet = outlet_new(&x->x_obj, (outvec[i].o_signal ? &s_signal : 0)); obj_connect(&x->x_vec[0].c_gl->gl_obj, i, @@ -452,7 +727,7 @@ static void *clone_new(t_symbol *s, int argc, t_atom *argv) void clone_setup(void) { clone_class = class_new(gensym("clone"), (t_newmethod)clone_new, - (t_method)clone_free, sizeof(t_clone), CLASS_NOINLET, A_GIMME, 0); + (t_method)clone_free, sizeof(t_clone), CLASS_THREADSAFE | CLASS_NOINLET, A_GIMME, 0); class_addmethod(clone_class, (t_method)clone_click, gensym("click"), A_FLOAT, A_FLOAT, A_FLOAT, A_FLOAT, A_FLOAT, 0); class_addmethod(clone_class, (t_method)clone_loadbang, gensym("loadbang"), @@ -474,6 +749,10 @@ void clone_setup(void) A_FLOAT, A_FLOAT, 0); class_addmethod(clone_in_class, (t_method)clone_in_fwd, gensym("fwd"), A_GIMME, 0); +#if PD_DSPTHREADS + class_addmethod(clone_in_class, (t_method)clone_in_parallel, gensym("parallel"), + A_FLOAT, 0); +#endif class_addlist(clone_in_class, (t_method)clone_in_list); clone_out_class = class_new(gensym("clone-outlet"), 0, 0, diff --git a/src/g_io.c b/src/g_io.c index 28bdd7ae35..879129e68c 100644 --- a/src/g_io.c +++ b/src/g_io.c @@ -36,6 +36,7 @@ typedef struct _vinlet t_object x_obj; t_canvas *x_canvas; t_inlet *x_inlet; + char x_parallel; int x_bufsize; t_sample *x_buf; /* signal buffer; zero if not a signal */ t_sample *x_endbuf; @@ -54,6 +55,7 @@ static void *vinlet_new(t_symbol *s) t_vinlet *x = (t_vinlet *)pd_new(vinlet_class); x->x_canvas = canvas_getcurrent(); x->x_inlet = canvas_addinlet(x->x_canvas, &x->x_obj.ob_pd, 0); + x->x_parallel = 0; x->x_bufsize = 0; x->x_buf = 0; outlet_new(&x->x_obj, 0); @@ -141,9 +143,14 @@ static void vinlet_dsp(t_vinlet *x, t_signal **sp) outsig = sp[0]; if (x->x_directsignal) { - signal_setborrowed(sp[0], x->x_directsignal); + /* fill in fake signal created in ugen_doit(). */ + signal_setborrowed(outsig, x->x_directsignal); } - else + else if (x->x_parallel) /* parallel */ + { + dsp_add_copy(x->x_buf, outsig->s_vec, outsig->s_n); + } + else /* reblocking */ { dsp_add(vinlet_perform, 3, x, outsig->s_vec, (t_int)outsig->s_vecsize); x->x_read = x->x_buf; @@ -175,7 +182,7 @@ int inlet_getsignalindex(t_inlet *x); /* set up prolog DSP code */ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched) + int downsample, int upsample, int reblock, int switched, int parallel) { t_signal *insig; /* no buffer means we're not a signal inlet */ @@ -183,12 +190,13 @@ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, return; x->x_updown.downsample = downsample; x->x_updown.upsample = upsample; + x->x_parallel = parallel; /* if the "reblock" flag is set, arrange to copy data in from the parent. */ if (reblock) { - int parentvecsize, bufsize, oldbufsize, prologphase; + int parentvecsize, bufsize, prologphase; int re_parentvecsize; /* resampled parentvectorsize */ /* this should never happen: */ if (!x->x_buf) return; @@ -212,10 +220,10 @@ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, bufsize = re_parentvecsize; if (bufsize < myvecsize) bufsize = myvecsize; - if (bufsize != (oldbufsize = x->x_bufsize)) + if (bufsize != x->x_bufsize) { t_sample *buf = x->x_buf; - t_freebytes(buf, oldbufsize * sizeof(*buf)); + t_freebytes(buf, x->x_bufsize * sizeof(*buf)); buf = (t_sample *)t_getbytes(bufsize * sizeof(*buf)); memset((char *)buf, 0, bufsize * sizeof(*buf)); x->x_bufsize = bufsize; @@ -240,7 +248,7 @@ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, re_parentvecsize, method); dsp_add(vinlet_doprolog, 3, x, x->x_updown.s_vec, (t_int)re_parentvecsize); - } + } /* if the input signal's reference count is zero, we have to free it here because we didn't in ugen_doit(). */ @@ -250,6 +258,32 @@ void vinlet_dspprolog(struct _vinlet *x, t_signal **parentsigs, else memset((char *)(x->x_buf), 0, bufsize * sizeof(*x->x_buf)); x->x_directsignal = 0; } + else if (parallel) + { + if (myvecsize != x->x_bufsize) + { + t_sample *buf = x->x_buf; + t_freebytes(buf, x->x_bufsize * sizeof(*buf)); + buf = (t_sample *)t_getbytes(myvecsize * sizeof(*buf)); + x->x_bufsize = myvecsize; + x->x_buf = buf; + x->x_endbuf = buf + myvecsize; + } + if (parentsigs) + { + insig = parentsigs[inlet_getsignalindex(x->x_inlet)]; + /* copy input signals to buffer. LATER think how to avoid the + * extra copy, see vinlet_dsp(). */ + dsp_add_copy(insig->s_vec, x->x_buf, myvecsize); + /* if the input signal's reference count is zero, we have + * to free it here because we didn't in ugen_doit(). */ + if (!insig->s_refcount) + signal_makereusable(insig); + } + else + memset((char *)(x->x_buf), 0, myvecsize * sizeof(*x->x_buf)); + x->x_directsignal = 0; + } else { /* no reblocking; in this case our output signal is "borrowed" @@ -293,7 +327,7 @@ static void *vinlet_newsig(t_symbol *s, int argc, t_atom *argv) static void vinlet_setup(void) { vinlet_class = class_new(gensym("inlet"), (t_newmethod)vinlet_new, - (t_method)vinlet_free, sizeof(t_vinlet), CLASS_NOINLET, A_DEFSYM, 0); + (t_method)vinlet_free, sizeof(t_vinlet), CLASS_THREADSAFE | CLASS_NOINLET, A_DEFSYM, 0); class_addcreator((t_newmethod)vinlet_newsig, gensym("inlet~"), A_GIMME, 0); class_addbang(vinlet_class, vinlet_bang); class_addpointer(vinlet_class, vinlet_pointer); @@ -330,7 +364,8 @@ typedef struct _voutlet /* and here's a flag indicating that we aren't blocked but have to do a copy (because we're switched). */ char x_justcopyout; - t_resample x_updown; + char x_parallel; + t_resample x_updown; } t_voutlet; static void *voutlet_new(t_symbol *s) @@ -443,21 +478,23 @@ static t_int *voutlet_doepilog_resampling(t_int *w) int outlet_getsignalindex(t_outlet *x); - /* prolog for outlets -- store pointer to the outlet on the - parent, which, if "reblock" is false, will want to refer + /* prolog for outlets -- store pointer to the outlet on the parent, + which, if "reblock" and "parallel" is false, will want to refer back to whatever we see on our input during the "dsp" method - called later. */ + called later. If "parallel" is true, we copy the previous buffer + content to the output signals. */ void voutlet_dspprolog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched) + int downsample, int upsample, int reblock, int switched, int parallel) { /* no buffer means we're not a signal outlet */ if (!x->x_buf) return; x->x_updown.downsample=downsample; x->x_updown.upsample=upsample; - x->x_justcopyout = (switched && !reblock); - if (reblock) + x->x_justcopyout = (switched && !reblock && !parallel); + x->x_parallel = parallel; + if (reblock || parallel) { x->x_directsignal = 0; } @@ -467,6 +504,24 @@ void voutlet_dspprolog(struct _voutlet *x, t_signal **parentsigs, x->x_directsignal = parentsigs[outlet_getsignalindex(x->x_parentoutlet)]; } + if (parallel && parentsigs) + { + t_signal *outsig; + if (myvecsize != x->x_bufsize) + { + t_sample *buf = x->x_buf; + t_freebytes(buf, x->x_bufsize * sizeof(*buf)); + buf = (t_sample *)t_getbytes(myvecsize * sizeof(*buf)); + memset((char *)buf, 0, myvecsize * sizeof(*buf)); + x->x_bufsize = myvecsize; + x->x_buf = buf; + } + outsig = parentsigs[outlet_getsignalindex(x->x_parentoutlet)]; + if (outsig->s_n != myvecsize) bug("voutlet_dspprolog: bad vecsize"); + /* copy previous buffer content to output signals. the following + * DSP chain can now safely write to the buffer, see voutlet_dsp(). */ + dsp_add_copy(x->x_buf, outsig->s_vec, myvecsize); + } } static void voutlet_dsp(t_voutlet *x, t_signal **sp) @@ -474,16 +529,20 @@ static void voutlet_dsp(t_voutlet *x, t_signal **sp) t_signal *insig; if (!x->x_buf) return; insig = sp[0]; - if (x->x_justcopyout) + if (x->x_justcopyout) /* switched, but not reblocked or parallel */ dsp_add_copy(insig->s_vec, x->x_directsignal->s_vec, insig->s_n); else if (x->x_directsignal) { - /* if we're just going to make the signal available on the - parent patch, hand it off to the parent signal. */ - /* this is done elsewhere--> sp[0]->s_refcount++; */ - signal_setborrowed(x->x_directsignal, sp[0]); + /* if we're just going to make the signal available on the + * parent patch, hand it off to the parent signal. + * this is done elsewhere--> sp[0]->s_refcount++; */ + signal_setborrowed(x->x_directsignal, insig); } - else + else if (x->x_parallel) /* parallel processing */ + /* write to buffer. at this point, we have already copied the + * previous buffer to the signal outlets, see voutlet_dspprolog(). */ + dsp_add_copy(insig->s_vec, x->x_buf, insig->s_n); + else /* reblocked */ dsp_add(voutlet_perform, 3, x, insig->s_vec, (t_int)insig->s_n); } @@ -492,7 +551,7 @@ static void voutlet_dsp(t_voutlet *x, t_signal **sp) If we aren't reblocking, there's nothing to do here. */ void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, int myvecsize, int calcsize, int phase, int period, int frequency, - int downsample, int upsample, int reblock, int switched) + int downsample, int upsample, int reblock, int switched, int parallel) { if (!x->x_buf) return; /* this shouldn't be necesssary... */ x->x_updown.downsample=downsample; @@ -500,7 +559,7 @@ void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, if (reblock) { t_signal *outsig; - int parentvecsize, bufsize, oldbufsize; + int parentvecsize, bufsize; int re_parentvecsize; int bigperiod, epilogphase, blockphase; if (parentsigs) @@ -521,10 +580,10 @@ void voutlet_dspepilog(struct _voutlet *x, t_signal **parentsigs, blockphase = (phase + period - 1) & (bigperiod - 1) & (- period); bufsize = re_parentvecsize; if (bufsize < myvecsize) bufsize = myvecsize; - if (bufsize != (oldbufsize = x->x_bufsize)) + if (bufsize != x->x_bufsize) { t_sample *buf = x->x_buf; - t_freebytes(buf, oldbufsize * sizeof(*buf)); + t_freebytes(buf, x->x_bufsize * sizeof(*buf)); buf = (t_sample *)t_getbytes(bufsize * sizeof(*buf)); memset((char *)buf, 0, bufsize * sizeof(*buf)); x->x_bufsize = bufsize; @@ -597,7 +656,7 @@ static void *voutlet_newsig(t_symbol *s) static void voutlet_setup(void) { voutlet_class = class_new(gensym("outlet"), (t_newmethod)voutlet_new, - (t_method)voutlet_free, sizeof(t_voutlet), CLASS_NOINLET, A_DEFSYM, 0); + (t_method)voutlet_free, sizeof(t_voutlet), CLASS_THREADSAFE | CLASS_NOINLET, A_DEFSYM, 0); class_addcreator((t_newmethod)voutlet_newsig, gensym("outlet~"), A_DEFSYM, 0); class_addbang(voutlet_class, voutlet_bang); class_addpointer(voutlet_class, voutlet_pointer); diff --git a/src/g_scalar.c b/src/g_scalar.c index 9f3d27f917..ee5f906a05 100644 --- a/src/g_scalar.c +++ b/src/g_scalar.c @@ -38,7 +38,7 @@ down the owner) we increase a reference count. The following routine is called whenever a gpointer is unset from pointing here. If the owner is gone and the refcount goes to zero, we can free the gstub safely. */ -static void gstub_dis(t_gstub *gs) +void gstub_dis(t_gstub *gs) { int refcount = --gs->gs_refcount; if ((!refcount) && gs->gs_which == GP_NONE) diff --git a/src/m_class.c b/src/m_class.c index d4bc82a587..dc1a4c025a 100644 --- a/src/m_class.c +++ b/src/m_class.c @@ -60,13 +60,20 @@ void s_stuff_newpdinstance(void) STUFF->st_externlist = STUFF->st_searchpath = STUFF->st_staticpath = STUFF->st_helppath = STUFF->st_temppath = 0; STUFF->st_schedblocksize = STUFF->st_blocksize = DEFDACBLKSIZE; + STUFF->st_inchannels = STUFF->st_outchannels = 0; STUFF->st_dacsr = DEFDACSAMPLERATE; + STUFF->st_soundin = NULL; + STUFF->st_soundout = NULL; STUFF->st_printhook = sys_printhook; STUFF->st_impdata = NULL; + STUFF->st_soundout_locks = NULL; } +void sys_audio_free(void); + void s_stuff_freepdinstance(void) { + sys_audio_free(); freebytes(STUFF, sizeof(*STUFF)); } @@ -492,6 +499,7 @@ t_class *class_new(t_symbol *s, t_newmethod newmethod, t_method freemethod, c->c_pwb = 0; c->c_firstin = ((flags & CLASS_NOINLET) == 0); c->c_patchable = (typeflag == CLASS_PATCHABLE); + c->c_threadsafe = (flags & CLASS_THREADSAFE) != 0; c->c_gobj = (typeflag >= CLASS_GOBJ); c->c_drawcommand = 0; c->c_floatsignalin = 0; @@ -610,6 +618,19 @@ void class_addmethod(t_class *c, t_method fn, t_symbol *sel, post("warning: signal method overrides class_mainsignalin"); c->c_floatsignalin = -1; } +#if PD_DSPTHREADS + /* post non-thread-safe DSP objects */ + if (sys_verbose && sys_threadsafe && (sel == gensym("dsp")) + && !c->c_threadsafe) + { + char *slash = strrchr(c->c_externdir->s_name, '/'); + if (slash) /* external */ + logpost(0, PD_VERBOSE, "%s/%s not thread-safe", + slash+1, c->c_name->s_name); + else /* built-in objects should be thread-safe; did we forget one? */ + pd_error(0, "%s not thread-safe", c->c_name); + } +#endif /* check for special cases. "Pointer" is missing here so that pd_objectmaker's pointer method can be typechecked differently. */ if (sel == &s_bang) diff --git a/src/m_imp.h b/src/m_imp.h index 96f1885379..d6d9774808 100644 --- a/src/m_imp.h +++ b/src/m_imp.h @@ -58,6 +58,7 @@ struct _class char c_firstin; /* if patchable, true if draw first inlet */ char c_drawcommand; /* a drawing command for a template */ t_classfreefn c_classfreefn; /* function to call before freeing class */ + char c_threadsafe; /* can be safely used in parallel DSP */ }; /* m_pd.c */ diff --git a/src/m_pd.h b/src/m_pd.h index 1cf440edd1..2398736d2c 100644 --- a/src/m_pd.h +++ b/src/m_pd.h @@ -111,6 +111,18 @@ typedef unsigned __int64 uint64_t; # error invalid FLOATSIZE: must be 32 or 64 #endif +/* externals may override this for parallel processing support. + * You have to use the CLASS_DEFAULT macro in class_new() and + * in your perform routine(s) you must only call API functions + * that are markes as THREADSAFE! */ +#ifndef PD_PARALLEL +#define PD_PARALLEL 0 +#endif + +/* used to mark API functions as thread-safe, meaning that they + * can be safely used in a perform routine. */ +#define THREADSAFE + typedef PD_LONGINTTYPE t_int; /* pointer-size integer */ typedef PD_FLOATTYPE t_float; /* a float type at most the same size */ typedef PD_FLOATTYPE t_floatarg; /* float type for function calls */ @@ -373,16 +385,15 @@ EXTERN t_symbol *binbuf_realizedollsym(t_symbol *s, int ac, const t_atom *av, /* ------------------ clocks --------------- */ EXTERN t_clock *clock_new(void *owner, t_method fn); -EXTERN void clock_set(t_clock *x, double systime); -EXTERN void clock_delay(t_clock *x, double delaytime); -EXTERN void clock_unset(t_clock *x); -EXTERN void clock_setunit(t_clock *x, double timeunit, int sampflag); -EXTERN double clock_getlogicaltime(void); -EXTERN double clock_getsystime(void); /* OBSOLETE; use clock_getlogicaltime() */ -EXTERN double clock_gettimesince(double prevsystime); -EXTERN double clock_gettimesincewithunits(double prevsystime, - double units, int sampflag); -EXTERN double clock_getsystimeafter(double delaytime); +THREADSAFE EXTERN void clock_set(t_clock *x, double systime); +THREADSAFE EXTERN void clock_delay(t_clock *x, double delaytime); +THREADSAFE EXTERN void clock_unset(t_clock *x); +THREADSAFE EXTERN void clock_setunit(t_clock *x, double timeunit, int sampflag); +THREADSAFE EXTERN double clock_getlogicaltime(void); +THREADSAFE EXTERN double clock_getsystime(void); /* OBSOLETE; use clock_getlogicaltime() */ +THREADSAFE EXTERN double clock_gettimesince(double prevsystime); +THREADSAFE EXTERN double clock_gettimesincewithunits(double prevsystime, double units, int sampflag); +THREADSAFE EXTERN double clock_getsystimeafter(double delaytime); EXTERN void clock_free(t_clock *x); /* ----------------- pure data ---------------- */ @@ -461,14 +472,21 @@ EXTERN const t_parentwidgetbehavior *pd_getparentwidget(t_pd *x); /* -------------------- classes -------------- */ -#define CLASS_DEFAULT 0 /* flags for new classes below */ +/* flags for new classes below */ #define CLASS_PD 1 #define CLASS_GOBJ 2 #define CLASS_PATCHABLE 3 +#define CLASS_THREADSAFE 4 #define CLASS_NOINLET 8 #define CLASS_TYPEMASK 3 +#if PD_PARALLEL +# define CLASS_DEFAULT CLASS_THREADSAFE +#else +# define CLASS_DEFAULT 0 +#endif + EXTERN t_class *class_new(t_symbol *name, t_newmethod newmethod, t_method freemethod, size_t size, int flags, t_atomtype arg1, ...); @@ -533,15 +551,15 @@ EXTERN void class_setfreefn(t_class *c, t_classfreefn fn); /* ------------ printing --------------------------------- */ -EXTERN void post(const char *fmt, ...); -EXTERN void startpost(const char *fmt, ...); -EXTERN void poststring(const char *s); -EXTERN void postfloat(t_floatarg f); -EXTERN void postatom(int argc, const t_atom *argv); -EXTERN void endpost(void); +THREADSAFE EXTERN void post(const char *fmt, ...); +THREADSAFE EXTERN void startpost(const char *fmt, ...); +THREADSAFE EXTERN void poststring(const char *s); +THREADSAFE EXTERN void postfloat(t_floatarg f); +THREADSAFE EXTERN void postatom(int argc, const t_atom *argv); +THREADSAFE EXTERN void endpost(void); -EXTERN void bug(const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); -EXTERN void pd_error(const void *object, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); +THREADSAFE EXTERN void bug(const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); +THREADSAFE EXTERN void pd_error(const void *object, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); /* for logpost(); does *not* work with verbose()! */ typedef enum { @@ -552,11 +570,11 @@ typedef enum { PD_VERBOSE } t_loglevel; -EXTERN void logpost(const void *object, int level, const char *fmt, ...) +THREADSAFE EXTERN void logpost(const void *object, int level, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(3, 4); /* deprecated, use logpost() instead. */ -EXTERN void verbose(int level, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); +THREADSAFE EXTERN void verbose(int level, const char *fmt, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); /* ------------ system interface routines ------------------- */ @@ -625,14 +643,14 @@ EXTERN int sys_get_outchannels(void); EXTERN void dsp_add(t_perfroutine f, int n, ...); EXTERN void dsp_addv(t_perfroutine f, int n, t_int *vec); -EXTERN void pd_fft(t_float *buf, int npoints, int inverse); -EXTERN int ilog2(int n); +THREADSAFE EXTERN void pd_fft(t_float *buf, int npoints, int inverse); +THREADSAFE EXTERN int ilog2(int n); -EXTERN void mayer_fht(t_sample *fz, int n); -EXTERN void mayer_fft(int n, t_sample *real, t_sample *imag); -EXTERN void mayer_ifft(int n, t_sample *real, t_sample *imag); -EXTERN void mayer_realfft(int n, t_sample *real); -EXTERN void mayer_realifft(int n, t_sample *real); +THREADSAFE EXTERN void mayer_fht(t_sample *fz, int n); +THREADSAFE EXTERN void mayer_fft(int n, t_sample *real, t_sample *imag); +THREADSAFE EXTERN void mayer_ifft(int n, t_sample *real, t_sample *imag); +THREADSAFE EXTERN void mayer_realfft(int n, t_sample *real); +THREADSAFE EXTERN void mayer_realifft(int n, t_sample *real); EXTERN float *cos_table; #define LOGCOSTABSIZE 9 @@ -669,18 +687,18 @@ EXTERN void resamplefrom_dsp(t_resample *x, t_sample *in, int insize, int outsiz EXTERN void resampleto_dsp(t_resample *x, t_sample *out, int insize, int outsize, int method); /* ----------------------- utility functions for signals -------------- */ -EXTERN t_float mtof(t_float); -EXTERN t_float ftom(t_float); -EXTERN t_float rmstodb(t_float); -EXTERN t_float powtodb(t_float); -EXTERN t_float dbtorms(t_float); -EXTERN t_float dbtopow(t_float); - -EXTERN t_float q8_sqrt(t_float); -EXTERN t_float q8_rsqrt(t_float); +THREADSAFE EXTERN t_float mtof(t_float); +THREADSAFE EXTERN t_float ftom(t_float); +THREADSAFE EXTERN t_float rmstodb(t_float); +THREADSAFE EXTERN t_float powtodb(t_float); +THREADSAFE EXTERN t_float dbtorms(t_float); +THREADSAFE EXTERN t_float dbtopow(t_float); + +THREADSAFE EXTERN t_float q8_sqrt(t_float); +THREADSAFE EXTERN t_float q8_rsqrt(t_float); #ifndef N32 -EXTERN t_float qsqrt(t_float); /* old names kept for extern compatibility */ -EXTERN t_float qrsqrt(t_float); +THREADSAFE EXTERN t_float qsqrt(t_float); /* old names kept for extern compatibility */ +THREADSAFE EXTERN t_float qrsqrt(t_float); #endif /* --------------------- data --------------------------------- */ @@ -697,12 +715,57 @@ EXTERN int garray_npoints(t_garray *x); EXTERN char *garray_vec(t_garray *x); EXTERN void garray_resize(t_garray *x, t_floatarg f); /* avoid; use this: */ EXTERN void garray_resize_long(t_garray *x, long n); /* better version */ -EXTERN void garray_usedindsp(t_garray *x); +EXTERN void garray_usedindsp(t_garray *x); /* avoid, use garrayref methods instead */ EXTERN void garray_setsaveit(t_garray *x, int saveit); EXTERN t_glist *garray_getglist(t_garray *x); EXTERN t_array *garray_getarray(t_garray *x); EXTERN t_class *scalar_class; +/* t_garrayref is a safe reference to a garray (similar to gpointer). + * The actual array data can be obtained on demand by the functions below. + * (You must not store any pointers to array data because it might become stale!) + * The advantage of using those functions instead of garray_getfloatwords() + * is that you don't have to call garray_usedindsp(), which means the array data + * can change without rebuilding the DSP graph! + * They also speed up and simplify garray access in control objects because you + * do not have to look up the garray every single time. + * Finally, they allow to synchronize array data access in parallel DSP processing. + * See d_array.c for examples. */ +typedef struct _arrayref +{ + t_garray *ar_garray; + t_gstub *ar_stub; +} t_garrayref; + +EXTERN void garrayref_init(t_garrayref *x); +EXTERN void garrayref_unset(t_garrayref *x); +/* set garrayref to a new garray */ +EXTERN int garrayref_set(t_garrayref *x, t_symbol *arrayname, t_object *obj); +/* check if the garrayref is valid. Call before accessing the 'ar_garray' member! */ +EXTERN int garrayref_check(t_garrayref *x); +/* for control objects: safely access array data. If the reference is empty or + * stale, (re)acquire the array by name; if 'arrayname' is NULL, fail silently. + * Returns 1 if it could get the array data; otherwise returns 0. + * If you want to set the garrayref to another garray, you must either call + * garray_set() with the new name, or call garray_unset() and acquire it lazily + * with the next call to garrayref_get(). */ +EXTERN int garrayref_get(t_garrayref *x, int *size, t_word **vec, t_symbol *arrayname, t_object *object); +/* for DSP objects: lock/unlock garray for reading/writing in the perform routine. + * Returns 1 if it could get the array data and lock the garray; otherwise returns 0. + * WARNING: do not attempt to unlock the garray if you could not lock it! */ +#if PD_PARALLEL +THREADSAFE EXTERN int garrayref_write_lock(t_garrayref *x, int *size, t_word **vec); +THREADSAFE EXTERN void garrayref_write_unlock(t_garrayref *x); +THREADSAFE EXTERN int garrayref_read_lock(t_garrayref *x, int *size, t_word **vec); +THREADSAFE EXTERN void garrayref_read_unlock(t_garrayref *x); +#else +/* optimization for non-parallel builds */ +#define garrayref_write_lock(x, size, vec) garrayref_get(x, size, vec, 0, 0) +#define garrayref_write_unlock(x) +#define garrayref_read_lock(x, size, vec) garrayref_get(x, size, vec, 0, 0) +#define garrayref_read_unlock(x) +#endif /* PD_PARALLEL */ + EXTERN t_float *value_get(t_symbol *s); EXTERN void value_release(t_symbol *s); EXTERN int value_getfloat(t_symbol *s, t_float *f); @@ -924,12 +987,16 @@ EXTERN void pd_setinstance(t_pdinstance *x); EXTERN void pdinstance_free(t_pdinstance *x); #endif /* PDINSTANCE */ -#if defined(PDTHREADS) && defined(PDINSTANCE) #ifdef _MSC_VER -#define PERTHREAD __declspec(thread) +#define THREADLOCAL __declspec(thread) +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) +#define THREADLOCAL _Thread_local #else -#define PERTHREAD __thread +#define THREADLOCAL __thread #endif /* _MSC_VER */ + +#if PDTHREADS && defined(PDINSTANCE) +#define PERTHREAD THREADLOCAL #else #define PERTHREAD #endif diff --git a/src/m_sched.c b/src/m_sched.c index 5d3dc234e9..a092df8ae7 100644 --- a/src/m_sched.c +++ b/src/m_sched.c @@ -9,6 +9,9 @@ #include "s_stuff.h" #ifdef _WIN32 #include +#endif +#if PD_DSPTHREADS +#include "s_sync.h" #endif /* LATER consider making this variable. It's now the LCM of all sample @@ -30,12 +33,27 @@ int sys_sleepgrain; typedef void (*t_clockmethod)(void *client); +#if PD_DSPTHREADS +/* do not use PERTHREAD! */ +static THREADLOCAL int dspthreadindex = 0; + +void dspthread_setindex(int index) +{ + dspthreadindex = index; +} + +#endif /* PD_DSPTHREADS */ + struct _clock { +#if PD_DSPTHREADS + t_lfs_node c_node; + double c_wanttime; +#endif double c_settime; /* in TIMEUNITS; <0 if unset */ void *c_owner; t_clockmethod c_fn; - struct _clock *c_next; + struct _clock *c_next; /* for the clock list */ t_float c_unit; /* >0 if in TIMEUNITS; <0 if in samples */ }; @@ -46,6 +64,10 @@ struct _clock t_clock *clock_new(void *owner, t_method fn) { t_clock *x = (t_clock *)getbytes(sizeof *x); +#if PD_DSPTHREADS + lfs_node_init(x); + x->c_wanttime = -1; +#endif x->c_settime = -1; x->c_owner = owner; x->c_fn = (t_clockmethod)fn; @@ -54,8 +76,42 @@ t_clock *clock_new(void *owner, t_method fn) return (x); } +#if PD_DSPTHREADS +void clock_defer(t_clock *x); + +/* dispatch clocks scheduled from DSP helper threads */ +void clock_dispatch(t_clock *x) +{ +#if 1 + if (dspthreadindex != 0) + { + sys_lock(); + bug("clock_dispatch"); + sys_unlock(); + return; + } +#endif + for (; x; x = lfs_node_next(x)) + { + if (x->c_wanttime >= 0) + clock_set(x, x->c_wanttime); + else + clock_unset(x); + } +} +#endif /* PD_DSPTHREADS */ + void clock_unset(t_clock *x) { +#if PD_DSPTHREADS + if (dspthreadindex > 0) + { + /* called from DSP helper thread -> defer */ + x->c_wanttime = -1; + clock_defer(x); + return; + } +#endif if (x->c_settime >= 0) { if (x == pd_this->pd_clock_setlist) @@ -74,6 +130,15 @@ void clock_unset(t_clock *x) void clock_set(t_clock *x, double setticks) { if (setticks < pd_this->pd_systime) setticks = pd_this->pd_systime; +#if PD_DSPTHREADS + if (dspthreadindex > 0) + { + /* called from DSP helper thread -> defer */ + x->c_wanttime = setticks; + clock_defer(x); + return; + } +#endif clock_unset(x); x->c_settime = setticks; if (pd_this->pd_clock_setlist && @@ -241,6 +306,9 @@ void sched_tick(void) { double next_sys_time = pd_this->pd_systime + SYSTIMEPERTICK; int countdown = 5000; +#if PD_DSPTHREADS + dspthreadindex = 0; /* just to be sure */ +#endif while (pd_this->pd_clock_setlist && pd_this->pd_clock_setlist->c_settime < next_sys_time) { @@ -445,8 +513,12 @@ int m_mainloop(void) int m_batchmain(void) { + t_audiosettings as; + sys_get_audio_settings(&as); + sys_dspthreadpool_start(&as.a_numthreads, 0); while (sys_quit != SYS_QUIT_QUIT) sched_tick(); + sys_dspthreadpool_stop(0); return (0); } diff --git a/src/makefile.gnu b/src/makefile.gnu index f44d78690c..0425ddd658 100644 --- a/src/makefile.gnu +++ b/src/makefile.gnu @@ -34,6 +34,7 @@ PDEXEC = $(BIN_DIR)/pd EXT= pd_linux ALSA=true OSS=true +PARALLEL=true prefix = /usr/local exec_prefix = ${prefix} @@ -60,6 +61,10 @@ CPPFLAGS = -DPD -DHAVE_LIBDL -DHAVE_UNISTD_H -DHAVE_ALLOCA_H \ -Wno-unused -Wno-unused-parameter -Wno-parentheses -Wno-switch \ -Wno-cast-function-type -Wno-stringop-truncation -Wno-format-truncation +ifeq ($(PARALLEL), true) +CPPFLAGS += -DPD_DSPTHREADS=1 -DPD_PARALLEL=1 +endif + # code generation flags (e.g., optimization). CODECFLAGS = -g -O3 -ffast-math -funroll-loops -fomit-frame-pointer @@ -116,11 +121,11 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ s_main.c s_inter.c s_inter_gui.c s_print.c s_loader.c s_path.c s_entry.c \ - s_audio.c s_audio_paring.c s_midi.c s_net.c s_utf8.c \ + s_audio.c s_audio_paring.c s_midi.c s_net.c s_sync.c s_utf8.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ - d_soundfile_next.c d_soundfile_wave.c \ + d_soundfile_next.c d_soundfile_wave.c d_threadpool.c \ x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \ x_time.c x_acoustics.c x_net.c x_text.c x_gui.c x_list.c x_array.c \ x_file.c x_scalar.c x_vexp.c x_vexp_if.c x_vexp_fun.c \ diff --git a/src/makefile.mac b/src/makefile.mac index a355ebd85a..127206d068 100644 --- a/src/makefile.mac +++ b/src/makefile.mac @@ -11,6 +11,7 @@ EXT= pd_darwin GUINAME= libPdTcl.dylib ARCH= -arch i386 -arch ppc EXTRAARCH= -arch i386 -arch x86_64 -arch ppc +PARALLEL=true MKDIR_P = mkdir -p @@ -37,6 +38,7 @@ CPPFLAGS = -DPD -DINSTALL_PREFIX=\"$(prefix)\" \ -I$(PADIR)/src/os/mac_osx/ -I$(PMDIR)/pm_common \ -I$(PMDIR)/pm_mac -I$(PMDIR)/porttime \ -DUSEAPI_PORTAUDIO -DPA_USE_COREAUDIO -DNEWBUFFER + ARCH_CFLAGS = $(ARCH) WARN_CFLAGS = -Wall -W -Wstrict-prototypes -Wno-unused -Wno-unused-parameter \ -Wno-parentheses -Wno-switch @@ -46,6 +48,12 @@ LDFLAGS = -Wl -framework CoreAudio -framework AudioUnit \ -framework AudioToolbox -framework Carbon -framework CoreMIDI \ -framework CoreFoundation $(ARCH) \ +ifeq ($(PARALLEL), true) +CPPFLAGS += -DPD_DSPTHREADS=1 -DPD_PARALLEL=1 +MORECFLAGS += -mmacosx-version-min=10.9 +LDFLAGS += -mmacosx-version-min=10.9 +endif + LIB = -ldl -lm -lpthread ifdef JACK @@ -90,12 +98,12 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ g_editor_extras.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ - s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c \ + s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c s_sync.c \ s_loader.c s_path.c s_entry.c s_audio.c s_midi.c s_net.c s_utf8.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ - d_soundfile_next.c d_soundfile_wave.c \ + d_soundfile_next.c d_soundfile_wave.c d_threadpool.c \ x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \ x_time.c x_acoustics.c x_net.c x_text.c x_gui.c x_list.c x_array.c \ x_file.c x_scalar.c x_vexp.c x_vexp_if.c x_vexp_fun.c \ diff --git a/src/makefile.mingw b/src/makefile.mingw index b42004ef81..730df9aa90 100644 --- a/src/makefile.mingw +++ b/src/makefile.mingw @@ -39,6 +39,7 @@ PDDLL = $(EXECDIR)/pd.dll PDCOM = $(EXECDIR)/pd.com PDRECEIVE = $(EXECDIR)/pdreceive.exe PDSEND = $(EXECDIR)/pdsend.exe +PARALLEL=true DLLWRAP= dllwrap @@ -70,6 +71,10 @@ WARN_CFLAGS = -Wall -W -Wstrict-prototypes -Wno-unused \ ARCH_CFLAGS = -DPD -DPD_INTERNAL -DPA_USE_ASIO -DPA_USE_WMME -DWINVER=0x0502 \ -DUSEAPI_MMIO -DUSEAPI_PORTAUDIO -mms-bitfields -DWISH='"wish85.exe"' +ifeq ($(PARALLEL), true) +ARCH_CFLAGS += -DPD_DSPTHREADS=1 -DPD_PARALLEL=1 +endif + CFLAGS += $(ARCH_CFLAGS) $(WARN_CFLAGS) $(OPT_CFLAGS) $(MORECFLAGS) STRIP = strip --strip-unneeded -R .note -R .comment @@ -124,12 +129,12 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ g_editor_extras.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ - s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c \ + s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c s_sync.c \ s_loader.c s_path.c s_entry.c s_audio.c s_midi.c s_net.c s_utf8.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ - d_soundfile_next.c d_soundfile_wave.c \ + d_soundfile_next.c d_soundfile_wave.c d_threadpool.c \ x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \ x_time.c x_acoustics.c x_net.c x_text.c x_gui.c x_list.c x_array.c \ x_file.c x_scalar.c x_vexp.c x_vexp_if.c x_vexp_fun.c diff --git a/src/makefile.msvc b/src/makefile.msvc index cd76e1ac3c..eea6da7807 100644 --- a/src/makefile.msvc +++ b/src/makefile.msvc @@ -51,6 +51,8 @@ endif # \ !endif +PARALLEL=true + PDINCLUDE = /I./ $(EXTRA_INCLUDES) PDLIB = /NODEFAULTLIB:libcmt /NODEFAULTLIB:libcpmt /NODEFAULTLIB:oldnames \ @@ -75,6 +77,11 @@ CFLAGS = /nologo \ /DUSEAPI_MMIO /DUSEAPI_PORTAUDIO \ /DPA_LITTLE_ENDIAN /DPA19 \ /D_CRT_SECURE_NO_WARNINGS + +ifeq ($(PARALLEL), true) +CFLAGS += /DPD_DSPTHREADS=1 /DPD_PARALLEL=1 +endif + LFLAGS = /nologo SYSSRC = s_audio_pa.c s_audio_paring.c \ @@ -87,12 +94,12 @@ SRC = g_canvas.c g_graph.c g_text.c g_rtext.c g_array.c g_template.c g_io.c \ g_editor_extras.c \ m_pd.c m_class.c m_obj.c m_atom.c m_memory.c m_binbuf.c \ m_conf.c m_glob.c m_sched.c \ - s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c \ + s_main.c s_inter.c s_inter_gui.c s_file.c s_print.c s_sync.c \ s_loader.c s_path.c s_entry.c s_audio.c s_midi.c s_net.c s_utf8.c \ d_ugen.c d_ctl.c d_arithmetic.c d_osc.c d_filter.c d_dac.c d_misc.c \ d_math.c d_fft.c d_fft_fftsg.c d_array.c d_global.c \ d_delay.c d_resample.c d_soundfile.c d_soundfile_aiff.c d_soundfile_caf.c \ - d_soundfile_next.c d_soundfile_wave.c \ + d_soundfile_next.c d_soundfile_wave.c d_threadpool.c \ x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \ x_time.c x_acoustics.c x_net.c x_text.c x_gui.c x_list.c x_array.c \ x_file.c x_scalar.c x_vexp.c x_vexp_if.c x_vexp_fun.c \ diff --git a/src/s_audio.c b/src/s_audio.c index 5e41dd4513..866d0333b6 100644 --- a/src/s_audio.c +++ b/src/s_audio.c @@ -22,6 +22,10 @@ #include #include +#if PD_DSPTHREADS +#include "s_spinlock.h" +#endif + #ifdef _MSC_VER #define snprintf _snprintf #endif @@ -42,6 +46,9 @@ static t_audiosettings audio_nextsettings; void sched_audio_callbackfn(void); void sched_reopenmeplease(void); +#if PD_DSPTHREADS +int sys_defnumdspthreads(void); +#endif int audio_isopen(void) { @@ -79,6 +86,26 @@ static int audio_getfixedblocksize(int api) return 0; } +void sys_audio_free(void) +{ + if (STUFF->st_soundin) + freebytes(STUFF->st_soundin, + (STUFF->st_inchannels ? STUFF->st_inchannels : 2) * + (DEFDACBLKSIZE * sizeof(t_sample))); + STUFF->st_soundin = 0; + if (STUFF->st_soundout) + freebytes(STUFF->st_soundout, + (STUFF->st_outchannels ? STUFF->st_outchannels : 2) * + (DEFDACBLKSIZE * sizeof(t_sample))); + STUFF->st_soundout = 0; +#if PD_DSPTHREADS + if (STUFF->st_soundout_locks) + freebytes(STUFF->st_soundout_locks, + STUFF->st_outchannels * sizeof(t_spinlock)); + STUFF->st_soundout_locks = 0; +#endif +} + /* inform rest of Pd of current channels and sample rate. Do this when opening audio device. This is also called from alsamm but I think that is no longer in use, so in principle this could be static. */ @@ -89,15 +116,10 @@ void sys_setchsr(int chin, int chout, int sr) (DEFDACBLKSIZE*sizeof(t_sample)); int outbytes = (chout ? chout : 2) * (DEFDACBLKSIZE*sizeof(t_sample)); + int i; + + sys_audio_free(); - if (STUFF->st_soundin) - freebytes(STUFF->st_soundin, - (STUFF->st_inchannels? STUFF->st_inchannels : 2) * - (DEFDACBLKSIZE*sizeof(t_sample))); - if (STUFF->st_soundout) - freebytes(STUFF->st_soundout, - (STUFF->st_outchannels? STUFF->st_outchannels : 2) * - (DEFDACBLKSIZE*sizeof(t_sample))); STUFF->st_inchannels = chin; STUFF->st_outchannels = chout; if (!audio_isfixedsr(sys_audioapiopened)) @@ -109,6 +131,12 @@ void sys_setchsr(int chin, int chout, int sr) STUFF->st_soundout = (t_sample *)getbytes(outbytes); memset(STUFF->st_soundout, 0, outbytes); +#if PD_DSPTHREADS + STUFF->st_soundout_locks = (t_spinlock *)getbytes(chout * sizeof(t_spinlock)); + for (i = 0; i < chout; i++) + spinlock_init(&STUFF->st_soundout_locks[i]); +#endif + logpost(NULL, PD_VERBOSE, "input channels = %d, output channels = %d", STUFF->st_inchannels, STUFF->st_outchannels); canvas_resume_dsp(canvas_suspend_dsp()); @@ -213,6 +241,11 @@ void sys_get_audio_settings(t_audiosettings *a) audio_nextsettings.a_choutdevvec[0] = SYS_DEFAULTCH; audio_nextsettings.a_advance = DEFAULTADVANCE; audio_nextsettings.a_blocksize = DEFDACBLKSIZE; + #if PD_DSPTHREADS + audio_nextsettings.a_numthreads = 0; /* default */ + #else + audio_nextsettings.a_numthreads = -1; /* no threads */ + #endif initted = 1; } *a = audio_nextsettings; @@ -248,6 +281,14 @@ void sys_set_audio_settings(t_audiosettings *a) a->a_blocksize = 1 << ilog2(a->a_blocksize); if (a->a_blocksize < DEFDACBLKSIZE || a->a_blocksize > MAXBLOCKSIZE) a->a_blocksize = DEFDACBLKSIZE; +#if PD_DSPTHREADS + /* 0: default number of threads. */ + if (a->a_numthreads <= 0) + a->a_numthreads = sys_defnumdspthreads(); +#else + /* -1 tells the GUI that PD_DSPTHREADS is disabled. */ + a->a_numthreads = -1; +#endif audio_make_sane(&a->a_noutdev, a->a_outdevvec, &a->a_nchoutdev, a->a_choutdevvec, MAXAUDIOOUTDEV); @@ -270,6 +311,9 @@ void sys_close_audio(void) } if (!audio_isopen()) return; + + sys_dspthreadpool_stop(0); + #ifdef USEAPI_PORTAUDIO if (sys_audioapiopened == API_PORTAUDIO) pa_close_audio(); @@ -343,6 +387,9 @@ void sys_reopen_audio(void) audio_compact_and_count_channels(&as.a_noutdev, as.a_outdevvec, as.a_choutdevvec, &totaloutchans, MAXAUDIOOUTDEV); sys_setchsr(totalinchans, totaloutchans, as.a_srate); + sys_dspthreadpool_start(&as.a_numthreads, 0); + /* save actual (validated) thread count. */ + audio_nextsettings.a_numthreads = as.a_numthreads; if (!as.a_nindev && !as.a_noutdev) { sched_set_using_audio(SCHED_AUDIO_NONE); @@ -644,7 +691,7 @@ void glob_audio_properties(t_pd *dummy, t_floatarg flongform) pdgui_stub_deleteforkey(0); pdgui_stub_vnew(&glob_pdobject, "pdtk_audio_dialog", (void *)glob_audio_properties, - "iiii iiii iiii iiii s ii s i s", + "iiii iiii iiii iiii s ii s i s i", as.a_indevvec [0], as.a_indevvec [1], as.a_indevvec [2], as.a_indevvec [3], as.a_chindevvec [0], as.a_chindevvec [1], as.a_chindevvec [2], as.a_chindevvec [3], as.a_outdevvec [0], as.a_outdevvec [1], as.a_outdevvec [2], as.a_outdevvec [3], @@ -653,7 +700,8 @@ void glob_audio_properties(t_pd *dummy, t_floatarg flongform) as.a_advance, canmulti, callback, (flongform != 0), - blocksize); + blocksize, + as.a_numthreads); } /* new values from dialog window */ @@ -666,6 +714,7 @@ void glob_audio_dialog(t_pd *dummy, t_symbol *s, int argc, t_atom *argv) as.a_advance = atom_getfloatarg(17, argc, argv); as.a_callback = atom_getfloatarg(18, argc, argv); as.a_blocksize = atom_getfloatarg(19, argc, argv); + as.a_numthreads = atom_getfloatarg(20, argc, argv); for (i = 0; i < 4; i++) { diff --git a/src/s_file.c b/src/s_file.c index 82653eeb40..788380e37f 100644 --- a/src/s_file.c +++ b/src/s_file.c @@ -596,6 +596,10 @@ void sys_loadpreferences(const char *filename, int startingup) #ifndef _WIN32 else if (sys_getpreference("blocksize", prefbuf, MAXPDSTRING)) sscanf(prefbuf, "%d", &as.a_blocksize); +#endif +#if PD_DSPTHREADS + if (sys_getpreference("threads", prefbuf, MAXPDSTRING)) + sscanf(prefbuf, "%d", &as.a_numthreads); #endif sys_set_audio_settings(&as); @@ -751,6 +755,11 @@ void sys_savepreferences(const char *filename) sprintf(buf1, "%d", as.a_blocksize); sys_putpreference("audioblocksize", buf1); +#if PD_DSPTHREADS + sprintf(buf1, "%d", as.a_numthreads); + sys_putpreference("threads", buf1); +#endif + /* MIDI settings */ sprintf(buf1, "%d", sys_midiapi); sys_putpreference("midiapi", buf1); diff --git a/src/s_inter.c b/src/s_inter.c index 020c895485..50b52daa11 100644 --- a/src/s_inter.c +++ b/src/s_inter.c @@ -125,6 +125,9 @@ struct _instanceinter t_guiqueue *i_guiqueuehead; t_binbuf *i_inbinbuf; char *i_guibuf; +#if PD_DSPTHREADS + pthread_mutex_t i_guimutex; +#endif int i_guihead; int i_guitail; int i_guisize; @@ -793,6 +796,9 @@ void sys_vgui(const char *fmt, ...) if (!sys_havegui()) return; +#if PD_DSPTHREADS + pthread_mutex_lock(&INTER->i_guimutex); +#endif if (!INTER->i_guibuf) { if (!(INTER->i_guibuf = malloc(GUI_ALLOCCHUNK))) @@ -816,6 +822,9 @@ void sys_vgui(const char *fmt, ...) { fprintf(stderr, "Pd: buffer space wasn't sufficient for long GUI string\n"); + #if PD_DSPTHREADS + pthread_mutex_unlock(&INTER->i_guimutex); + #endif return; } if (msglen >= INTER->i_guisize - INTER->i_guihead) @@ -860,6 +869,9 @@ void sys_vgui(const char *fmt, ...) } INTER->i_guihead += msglen; INTER->i_bytessincelastping += msglen; +#if PD_DSPTHREADS + pthread_mutex_unlock(&INTER->i_guimutex); +#endif } void sys_gui(const char *s) @@ -1657,6 +1669,9 @@ void s_inter_newpdinstance(void) pthread_mutex_init(&INTER->i_mutex, NULL); pd_this->pd_islocked = 0; #endif +#if PD_DSPTHREADS + pthread_mutex_init(&INTER->i_guimutex, NULL); +#endif #ifdef _WIN32 INTER->i_freq = 0; #endif @@ -1675,6 +1690,9 @@ void s_inter_free(t_instanceinter *inter) } #if PDTHREADS pthread_mutex_destroy(&INTER->i_mutex); +#endif +#if PD_DSPTHREADS + pthread_mutex_destroy(&INTER->i_guimutex); #endif freebytes(inter, sizeof(*inter)); } diff --git a/src/s_main.c b/src/s_main.c index 3fef766200..c2fa72c7af 100644 --- a/src/s_main.c +++ b/src/s_main.c @@ -55,8 +55,11 @@ int sys_noloadbang; static int sys_dontstartgui; int sys_hipriority = -1; /* -1 = not specified; 0 = no; 1 = yes */ int sys_guisetportnumber; /* if started from the GUI, this is the port # */ -int sys_nosleep = 0; /* skip all "sleep" calls and spin instead */ +int sys_nosleep = 0; /* skip all "sleep" calls and spin instead */ int sys_defeatrt; /* flag to cancel real-time */ +int sys_threadsafe = 1; /* only allow thread-safe DSP objects in parallel processing */ +int sys_threadaffinity = 0; /* pin DSP threads to CPUs */ +int sys_threadspinwait = 0; /* DSP threads spin while waiting for tasks */ t_symbol *sys_flags; /* more command-line flags */ const char *sys_guicmd; @@ -415,6 +418,20 @@ static char *(usagemessage[]) = { "-noaudio -- suppress audio input and output (-nosound is synonym) \n", "-callback -- use callbacks if possible\n", "-nocallback -- use polling-mode (true by default)\n", +#if PD_DSPTHREADS +"-threads -- number of audio threads\n" +" 0: use all physical cores (default)\n", +"-threadsafe -- check if all DSP objects in a parallel canvas are\n" +" \"officially\" thread-safe (true by default)\n", +"-nothreadsafe -- do not check if DSP objects are thread-safe\n" +" (potentially dangerous!)\n", +"-spinwait -- audio threads spin while waiting for tasks\n", +"-nospinwait -- audio threads do not spin (true by default)\n", +#if defined(_WIN32) || defined(__linux__) +"-affinity -- pin audio threads to CPUs\n", +"-noaffinity -- do not pin audio threads (true by default)\n", +#endif /* Windows/Linux */ +#endif /* PD_DSPTHREADS */ "-listdev -- list audio and MIDI devices\n", #ifdef USEAPI_OSS @@ -1329,6 +1346,45 @@ int sys_argparse(int argc, const char **argv) argc--; argv++; } #endif +#if PD_DSPTHREADS + else if (!strcmp(*argv, "-threads") && argc > 1) + { + as.a_numthreads = atoi(argv[1]); + argc -= 2; argv += 2; + } + else if (!strcmp(*argv, "-threadsafe")) + { + sys_threadsafe = 1; + argc--; argv++; + } + else if (!strcmp(*argv, "-nothreadsafe")) + { + sys_threadsafe = 0; + argc--; argv++; + } + else if (!strcmp(*argv, "-spinwait")) + { + sys_threadspinwait = 1; + argc--; argv++; + } + else if (!strcmp(*argv, "-nospinwait")) + { + sys_threadspinwait = 0; + argc--; argv++; + } +#if defined(_WIN32) || defined(__linux__) + else if (!strcmp(*argv, "-affinity")) + { + sys_threadaffinity = 1; + argc--; argv++; + } + else if (!strcmp(*argv, "-noaffinity")) + { + sys_threadaffinity = 0; + argc--; argv++; + } +#endif /* Windows/Linux */ +#endif /* PD_DSPTHREADS */ else if (!strcmp(*argv, "-sleep")) { sys_nosleep = 0; diff --git a/src/s_spinlock.h b/src/s_spinlock.h new file mode 100644 index 0000000000..0e6fedb95f --- /dev/null +++ b/src/s_spinlock.h @@ -0,0 +1,394 @@ +/* Copyright (c) 2021 Christof Ressi. + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ + +/* header-only C/C++ spinlock library; can also be used by externals! */ + +#ifndef S_SPINLOCK_H +#define S_SPINLOCK_H + +#include +#include + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +/* C++11 atomics */ +# include +# define ALIGNAS(x) alignas(x) +using std::atomic_int; +using std::atomic_uint; +using std::atomic_load_explicit; +using std::atomic_exchange_explicit; +using std::atomic_fetch_add_explicit; +using std::atomic_fetch_sub_explicit; +using std::memory_order_acquire; +using std::memory_order_release; +using std::memory_order_acq_rel; +using std::memory_order_relaxed; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) \ + && !defined(__STDC_NO_ATOMICS__) +/* C11 atomics */ +# include +# include +# define ALIGNAS(x) _Alignas(x) +#elif defined(_MSC_VER) +/* fallback for MSVC (which doesn't yet provide at the time of writing) */ +# pragma message ("C11 atomics not supported, using fallback for MSVC.") +# include +# define ALIGNAS(x) __declspec(align(x)) +# define MSVC_INTERLOCKED +#else +# error "Missing support for C11/C++11 atomics." +#endif + +#define CACHELINE_SIZE 64 + +/* t_spinlock */ + +typedef struct _spinlock +{ +#ifdef MSVC_INTERLOCKED + unsigned long state; +#else + atomic_uint state; +#endif +} t_spinlock; + +static inline void spinlock_init(t_spinlock *x); +static inline void spinlock_lock(t_spinlock *x); +static inline int spinlock_trylock(t_spinlock *x); +static inline void spinlock_unlock(t_spinlock *x); + +/* t_padded_spinlock */ + +typedef struct _padded_spinlock +{ + ALIGNAS(CACHELINE_SIZE) t_spinlock lock; + char padding[64 - sizeof(t_spinlock)]; +} t_padded_spinlock; + +#define padded_spinlock_init(x) spinlock_init(&((x)->lock)) +#define padded_spinlock_lock(x) spinlock_lock(&((x)->lock)) +#define padded_spinlock_trylock(x) spinlock_trylock(&((x)->lock)) +#define padded_spinlock_unlock(x) spinlock_unlock(&((x)->lock)) + +/* t_rwspinlock */ + +typedef struct _rwspinlock +{ +#ifdef MSVC_INTERLOCKED + unsigned long state; +#else + atomic_uint state; +#endif +} t_rwspinlock; + +static inline void rwspinlock_init(t_rwspinlock *x); +/* writer */ +static inline void rwspinlock_wrlock(t_rwspinlock *x); +static inline int rwspinlock_trywrlock(t_rwspinlock *x); +static inline void rwspinlock_wrunlock(t_rwspinlock *x); +/* reader */ +static inline void rwspinlock_rdlock(t_rwspinlock *x); +static inline int rwspinlock_tryrdlock(t_rwspinlock *x); +static inline void rwspinlock_rdunlock(t_rwspinlock *x); + +/* t_padded_rwspinlock */ + +typedef struct _padded_rwspinlock +{ + ALIGNAS(CACHELINE_SIZE) t_rwspinlock lock; + char padding[64 - sizeof(t_rwspinlock)]; +} t_padded_rwspinlock; + +#define padded_rwspinlock_init(x) rwspinlock_init(&((x)->lock)) +/* writer */ +#define padded_rwspinlock_wrlock(x) rwspinlock_wrlock(&((x)->lock)) +#define padded_rwspinlock_trywrlock(x) rwspinlock_trywrlock(&((x)->lock)) +#define padded_rwspinlock_wrunlock(x) rwspinlock_wrunlock(&((x)->lock)) +/* reader */ +#define padded_rwspinlock_rdlock(x) rwspinlock_rdlock(&((x)->lock)) +#define padded_rwspinlock_tryrdlock(x) rwspinlock_tryrdlock(&((x)->lock)) +#define padded_rwspinlock_rdunlock(x) rwspinlock_rdunlock(&((x)->lock)) + + +/* ------------------------ implementation --------------------------- */ + +#define CHECK_ALIGNMENT(x) assert((((uintptr_t)&x) & (sizeof(x)-1)) == 0) + +/* Intel */ +#if defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(_M_X64) +# define HAVE_PAUSE +# include +/* ARM */ +#elif (defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7S__) || \ + defined(__ARM_ARCH_8A__) || \ + defined(__aarch64__)) +/* the 'yield' instruction is supported from ARMv6k onwards */ +# define HAVE_YIELD +#else +/* fallback */ +# ifdef __cplusplus +# include +# else +# include +# endif +#endif + +static inline void pause_cpu(void) +{ +#if defined(HAVE_PAUSE) + _mm_pause(); +#elif defined(HAVE_YIELD) + __asm__ __volatile__("yield"); +#else /* fallback */ + #warning "architecture does not support yield/pause instruction" +# ifdef __cplusplus + std::this_thread::yield(); +# else + thrd_yield(); +# endif +#endif +} + +/* -------------------- t_spinlock ---------------------- */ + +static inline void spinlock_init(t_spinlock *x) +{ + CHECK_ALIGNMENT(x->state); + x->state = 0; +} + +static inline int spinlock_trylock(t_spinlock *x) +{ +#ifdef MSVC_INTERLOCKED + return _InterlockedExchange(&x->state, 1) == 0; +#else + return atomic_exchange_explicit(&x->state, 1, memory_order_acquire) == 0; +#endif +} + +static inline void spinlock_lock(t_spinlock *x) +{ +#ifdef MSVC_INTERLOCKED + do { + while (x->state != 0) + pause_cpu(); + } while (_InterlockedExchange(&x->state, 1) != 0); +#else + /* only try to modify the shared state if the lock seems to be available. + * this should prevent unnecessary cache invalidation. */ + do { + while (atomic_load_explicit(&x->state, memory_order_relaxed) != 0) + pause_cpu(); + } while (atomic_exchange_explicit(&x->state, 1, memory_order_acquire) != 0); +#endif +} + +static inline void spinlock_unlock(t_spinlock *x) +{ +#ifdef MSVC_INTERLOCKED + _InterlockedExchange(&x->state, 0); +#else + atomic_store_explicit(&x->state, 0, memory_order_release); +#endif +} + +/* -------------------------- t_rwspinlock -------------------------- */ + +#define RWSPINLOCK_UNLOCKED 0 +#define RWSPINLOCK_LOCKED 0x80000000 +/* use fetch-and-add version (optimized for readers) */ +#define RWSPINLOCK_FETCH_AND_ADD 1 + +static inline void rwspinlock_init(t_rwspinlock *x) +{ + CHECK_ALIGNMENT(x->state); + x->state = 0; +} + +static inline int rwspinlock_trywrlock(t_rwspinlock *x) +{ +#ifdef MSVC_INTERLOCKED + return _InterlockedCompareExchange(&x->state, RWSPINLOCK_LOCKED, RWSPINLOCK_UNLOCKED) == RWSPINLOCK_UNLOCKED; +#else + uint32_t expected = RWSPINLOCK_UNLOCKED; + return atomic_compare_exchange_strong_explicit(&x->state, &expected, RWSPINLOCK_LOCKED, + memory_order_acquire, memory_order_relaxed); +#endif +} + +static inline void rwspinlock_wrlock(t_rwspinlock *x) +{ + /* only try to modify the shared state if the lock seems to be available. + * this should prevent unnecessary cache invalidation. */ +#ifdef MSVC_INTERLOCKED + for (;;) + { + if (x->state == RWSPINLOCK_UNLOCKED) + { + /* check if state is UNLOCKED and set LOCKED bit on success. */ + if (_InterlockedCompareExchange(&x->state, RWSPINLOCK_LOCKED, RWSPINLOCK_UNLOCKED) == RWSPINLOCK_UNLOCKED) + return; + /* CAS failed -> retry immediately */ + } else + pause_cpu(); + } +#else + for (;;) + { + if (atomic_load_explicit(&x->state, memory_order_relaxed) == RWSPINLOCK_UNLOCKED) + { + /* check if state is UNLOCKED and set LOCKED bit on success. */ + uint32_t expected = RWSPINLOCK_UNLOCKED; + if (atomic_compare_exchange_weak_explicit(&x->state, &expected, RWSPINLOCK_LOCKED, + memory_order_acquire, memory_order_relaxed)) return; + /* CAS failed -> retry immediately */ + } else + pause_cpu(); + } +#endif +} + +static inline void rwspinlock_wrunlock(t_rwspinlock *x) +{ +#if RWSPINLOCK_FETCH_AND_ADD + /* clear "locked" bit, see rwspinlock_tryrdlock() */ +# ifdef MSVC_INTERLOCKED + _InterlockedAnd(&x->state, ~RWSPINLOCK_LOCKED); +# else + atomic_fetch_and_explicit(&x->state, ~RWSPINLOCK_LOCKED, memory_order_release); +# endif +#else /* CAS */ +# ifdef MSVC_INTERLOCKED + _InterlockedExchange(&x->state, RWSPINLOCK_UNLOCKED); +# else + atomic_store_explicit(&x->state, RWSPINLOCK_UNLOCKED, memory_order_release); +# endif +#endif +} + +static inline int rwspinlock_tryrdlock(t_rwspinlock *x) +{ +#if RWSPINLOCK_FETCH_AND_ADD + /* optimistically increment the reader count and then check if the "locked" + * bit is set, otherwise we simply decrement the reader count again. + * This is optimized for the likely case that there's no writer. */ +# ifdef MSVC_INTERLOCKED + unsigned long state = _InterlockedIncrement(&x->state); + if ((state & RWSPINLOCK_LOCKED) == 0) + return 1; + else + { + _InterlockedDecrement(&x->state); + return 0; + } +# else + uint32_t state = atomic_fetch_add_explicit(&x->state, 1, memory_order_acquire); + if ((state & RWSPINLOCK_LOCKED) == 0) + return 1; + else + { + atomic_fetch_sub_explicit(&x->state, 1, memory_order_acq_rel); + return 0; + } +# endif +#else /* CAS */ + /* We need a loop because the CAS can fail if another *reader* aquires/releases + * the lock concurrently. We shouldn't consider this a failure! */ +# ifdef MSVC_INTERLOCKED + for (;;) + { + unsigned long state = x->state; + if ((state & RWSPINLOCK_LOCKED) == 0) + { + if (_InterlockedCompareExchange(&x->state, state + 1, state) == state) + return 1; + /* CAS failed -> retry */ + } + else + return 0; + } +# else + uint32_t state = atomic_load_explicit(&x->state, memory_order_relaxed); + for (;;) + { + if ((state & RWSPINLOCK_LOCKED) == 0) + { + if (atomic_compare_exchange_weak_explicit(&x->state, &state, state + 1, + memory_order_acquire, memory_order_relaxed)) return 1; + /* CAS failed -> retry; 'state' has been updated */ + } + else + return 0; + } +# endif +#endif +} + +static inline void rwspinlock_rdlock(t_rwspinlock *x) +{ +#if RWSPINLOCK_FETCH_AND_ADD + /* only try to modify the shared state if the lock seems to be available. + * this should prevent unnecessary cache invalidation. */ + for (;;) + { +# ifdef MSVC_INTERLOCKED + unsigned long state = x->state; +# else + uint32_t state = atomic_load_explicit(&x->state, memory_order_relaxed); +# endif + if (!(state & RWSPINLOCK_LOCKED) && rwspinlock_tryrdlock(x)) + return; + else + pause_cpu(); + } +#else /* CAS */ + /* with RWSPINLOCK_LOCKED masked away, the CAS will fail if the + * spinlock is currently locked. NB: the CAS can also fail if + * another reader acquired/releases the lock concurrently. */ +# ifdef MSVC_INTERLOCKED + for (;;) + { + unsigned long state = x->state & ~RWSPINLOCK_LOCKED; + if (_InterlockedCompareExchange(&x->state, state + 1, state) == state) + return; + else + pause_cpu(); + } +# else + for (;;) + { + uint32_t state = atomic_load_explicit(&x->state, memory_order_relaxed); + state &= ~RWSPINLOCK_LOCKED; + if (atomic_compare_exchange_weak_explicit(&x->state, &state, state + 1, + memory_order_acquire, memory_order_relaxed)) return; + else /* NB: don't use updated 'state', instead read again after pause! */ + pause_cpu(); + } +# endif +#endif +} + +static inline void rwspinlock_rdunlock(t_rwspinlock *x) +{ +#ifdef MSVC_INTERLOCKED + _InterlockedDecrement(&x->state); +#else + atomic_fetch_sub_explicit(&x->state, 1, memory_order_release); +#endif +} + +#undef CACHELINE_SIZE +#undef ALIGNAS +/* keep MSVC_INTERLOCKED and CHECK_ALIGNMENT */ + +#endif /* S_SPINLOCK_H */ diff --git a/src/s_stuff.h b/src/s_stuff.h index 580b4ac343..ba5e005004 100644 --- a/src/s_stuff.h +++ b/src/s_stuff.h @@ -73,6 +73,7 @@ typedef struct _audiosettings int a_advance; int a_callback; int a_blocksize; + int a_numthreads; } t_audiosettings; #define SENDDACS_NO 0 /* return values for sys_send_dacs() */ @@ -392,6 +393,46 @@ EXTERN void inmidi_polyaftertouch(int portno, /* } jsarlo */ EXTERN int sys_zoom_open; +/* DSP task queue */ +#if PD_DSPTHREADS + +EXTERN_STRUCT _dsptaskqueue; +#define t_dsptaskqueue struct _dsptaskqueue + +t_dsptaskqueue * dsptaskqueue_new(t_canvas *owner); +void dsptaskqueue_release(t_dsptaskqueue *x); +void dsptaskqueue_update(t_dsptaskqueue *x); +int dsptaskqueue_check(t_dsptaskqueue *x); +void dsptaskqueue_reset(t_dsptaskqueue *x); +void dsptaskqueue_join(t_dsptaskqueue *x); +void dsp_add_reset(t_dsptaskqueue *x); +void dsp_add_join(t_dsptaskqueue *x); + +int canvas_markthreadsafe(void); +int canvas_isthreadsafe(t_canvas *x, int loud); + +EXTERN_STRUCT _dsptask; +#define t_dsptask struct _dsptask + +typedef void (*t_dsptaskfn) (void *data); + +t_dsptask * dsptask_new(t_dsptaskqueue *queue, t_dsptaskfn fn, void *data); +void dsptask_free(t_dsptask *x); +void dsptask_sched(t_dsptask *x); +void dsptask_switch(t_dsptask *x, int on); + +#endif /* PD_DSPTHREADS */ + +/* DSP thread pool API, for documentation see d_threadpool.c */ +EXTERN int sys_havedspthreadpool(void); +EXTERN int sys_dspthreadpool_start(int *numthreads, int external); +EXTERN int sys_dspthreadpool_stop(int external); +EXTERN int sys_dspthread_run(int index); + +EXTERN int sys_threadsafe; /* enable/disable thread-safety checks */ +EXTERN int sys_threadaffinity; /* enable/disable thread pinning */ +EXTERN int sys_threadspinwait; /* spin while waiting for tasks */ + struct _instancestuff { t_namelist *st_externlist; @@ -409,6 +450,7 @@ struct _instancestuff double st_time_per_dsp_tick; /* obsolete - included for GEM?? */ t_printhook st_printhook; /* set this to override per-instance printing */ void *st_impdata; /* optional implementation-specific data for libpd, etc */ + struct _spinlock *st_soundout_locks; /* spinlocks for dac~ */ }; #define STUFF (pd_this->pd_stuff) diff --git a/src/s_sync.c b/src/s_sync.c new file mode 100644 index 0000000000..8d3ba5ba45 --- /dev/null +++ b/src/s_sync.c @@ -0,0 +1,216 @@ +/* Copyright (c) 2021 Christof Ressi. + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ + +/* thread synchronisation tools. */ + +/* currently, this file is only needed for PD_DSPTHREADS */ +#if PD_DSPTHREADS + +#include "s_sync.h" + +#ifdef _WIN32 +# include +#endif + +/* ----------------------- t_lockfree_stack ---------------------- */ + +void lockfree_stack_init(t_lockfree_stack *x) +{ + CHECK_ALIGNMENT(x->x_head); + x->x_head = NULL; +} + +void lockfree_stack_push(t_lockfree_stack *x, void *y) +{ + t_lfs_node *node = (t_lfs_node *)y; +#ifdef MSVC_INTERLOCKED + do + { + node->x_next = x->x_head; + } + while (_InterlockedCompareExchangePointer(&x->x_head, node, node->x_next) != node->x_next); +#else + node->x_next = atomic_load_explicit(&x->x_head, memory_order_relaxed); + while (!atomic_compare_exchange_weak_explicit(&x->x_head, &node->x_next, node, + memory_order_release, memory_order_relaxed)) ; +#endif +} + +void * lockfree_stack_pop(t_lockfree_stack *x) +{ +#ifdef MSVC_INTERLOCKED + t_lfs_node *head; + do + { + head = x->x_head; + } + while (head && _InterlockedCompareExchangePointer(&x->x_head, head->x_next, head) != head); +#else + t_lfs_node *head = atomic_load_explicit(&x->x_head, memory_order_relaxed); + while (head && !atomic_compare_exchange_weak_explicit(&x->x_head, &head, + head->x_next, memory_order_acquire, memory_order_relaxed)) ; +#endif + return head; +} + +void * lockfree_stack_release(t_lockfree_stack *x) +{ +#ifdef MSVC_INTERLOCKED + return (void *)_InterlockedExchangePointer(&x->x_head, NULL); +#else + return (void *)atomic_exchange(&x->x_head, NULL); +#endif +} + +/* -------------------- t_native_semaphore -------------------- */ + +int native_semaphore_init(t_native_semaphore *x) +{ +#if defined(_WIN32) + return (x->sem = CreateSemaphoreA(0, 0, INT_MAX, 0)) ? 0 : -1; +#elif defined(__APPLE__) + return (semaphore_create(mach_task_self(), &x->sem, SYNC_POLICY_FIFO, 0) + == KERN_SUCCESS) ? 0 : -1; +#else /* posix */ + return sem_init(&x->sem, 0, 0); +#endif +} + +int native_semaphore_destroy(t_native_semaphore *x) +{ +#if defined(_WIN32) + return CloseHandle(x->sem) ? 0 : -1; +#elif defined(__APPLE__) + return (semaphore_destroy(mach_task_self(), x->sem) == KERN_SUCCESS) ? 0 : -1; +#else /* posix */ + return sem_destroy(&x->sem); +#endif +} + +int native_semaphore_post(t_native_semaphore *x) +{ +#if defined(_WIN32) + return ReleaseSemaphore(x->sem, 1, 0) ? 0 : -1; +#elif defined(__APPLE__) + return (semaphore_signal(x->sem) == KERN_SUCCESS) ? 0 : 1; +#else /* posix */ + return sem_post(&x->sem); +#endif +} + +int native_semaphore_postn(t_native_semaphore *x, int count) +{ +#if defined(_WIN32) + return ReleaseSemaphore(x->sem, count, 0) ? 0 : -1; +#else + for (int i = 0; i < count; ++i) + { + if (native_semaphore_post(x) < 0) + return -1; + } + return 0; +#endif +} + +int native_semaphore_wait(t_native_semaphore *x) +{ +#if defined(_WIN32) + return (WaitForSingleObject(x->sem, INFINITE) != WAIT_FAILED) ? 0 : -1; +#elif defined(__APPLE__) + return (semaphore_wait(x->sem) == KERN_SUCCESS) ? 0 : -1; +#else /* posix */ + for (;;) + { + int ret = sem_wait(&x->sem); + if (ret == 0) + return 0; + else if (errno == EINTR) + continue; + else + return -1; + } +#endif +} + +/* t_fast_semaphore */ + +int fast_semaphore_init(t_fast_semaphore *x) +{ + CHECK_ALIGNMENT(x->count); + x->count = 0; + return native_semaphore_init(&x->sem); +} + +int fast_semaphore_destroy(t_fast_semaphore *x){ + return native_semaphore_destroy(&x->sem); +} + +int fast_semaphore_post(t_fast_semaphore *x) +{ +#ifdef MSVC_INTERLOCKED + int old = _InterlockedIncrement(&x->count) - 1; /* returns new value! */ +#else + int old = atomic_fetch_add_explicit(&x->count, 1, memory_order_release); +#endif + if (old < 0) + return native_semaphore_post(&x->sem); + else + return 0; +} + +int fast_semaphore_postn(t_fast_semaphore *x, int count) +{ +#ifdef MSVC_INTERLOCKED + int old = _InterlockedExchangeAdd(&x->count, count); /* returns old value */ +#else + int old = atomic_fetch_add_explicit(&x->count, count, memory_order_release); +#endif + if (old < 0) + { + int release = -old < count ? -old : count; + return native_semaphore_postn(&x->sem, release); + } + else + return 0; +} + +int fast_semaphore_wait(t_fast_semaphore *x) +{ +#ifdef MSVC_INTERLOCKED + int old = _InterlockedDecrement(&x->count) + 1; /* returns new value! */ +#else + int old = atomic_fetch_sub_explicit(&x->count, 1, memory_order_acquire); +#endif + if (old <= 0) + return native_semaphore_wait(&x->sem); + else + return 0; +} + +/* returns 1 on success, 0 on failure */ +int fast_semaphore_trywait(t_fast_semaphore *x) { +#ifdef MSVC_INTERLOCKED + int value = x->count; +#else + int value = atomic_load_explicit(&x->count, memory_order_relaxed); +#endif + /* NOTE: we need a loop because another thread might decrement the count + * concurrently, which does not necessarily mean that we have failed! */ + while (value > 0) + { + #ifdef MSVC_INTERLOCKED + if (_InterlockedCompareExchange(&x->count, value - 1, value) == value) + return 1; + /* CAS failed -> retry and update */ + value = x->count; + #else + if (atomic_compare_exchange_weak_explicit(&x->count, &value, value - 1, + memory_order_acquire, memory_order_relaxed)) return 1; + /* CAS failed -> retry; 'value' has been updated */ + #endif + } + return 0; +} + +#endif /* PD_DSPTHREADS */ diff --git a/src/s_sync.h b/src/s_sync.h new file mode 100644 index 0000000000..111655620b --- /dev/null +++ b/src/s_sync.h @@ -0,0 +1,95 @@ +/* Copyright (c) 2021 Christof Ressi. + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. */ + +/* thread synchronisation tools */ + +#ifndef S_SYNC_H +#define S_SYNC_H + +/* for atomics */ +#include "s_spinlock.h" + +#ifdef _WIN32 +/* use Win32 Semaphores */ +#elif defined(__APPLE__) +/* macOS doesn't support unnamed Posix semaphores, + * so we use Mach semaphores instead. */ +# include +#elif defined(__linux__) || defined(__FreeBSD__) \ + || defined(__NetBSD__) || defined(__OpenBSD__) +/* Linux or BSD: use Posix semaphores */ +# include +# include +#else +# error "Platform not supported!" +#endif + +/* -------------------- t_lockfree_stack ---------------------- */ + +/* nodes must have t_lfs_node as its first member */ + +typedef struct _lfs_node +{ + struct _lfs_node *x_next; +} t_lfs_node; + +#define lfs_node_init(x) ((t_lfs_node *)(x))->x_next = 0 +#define lfs_node_next(x) ((void *)((t_lfs_node *)(x))->x_next) + +typedef struct _lockfree_stack +{ +#ifdef MSVC_INTERLOCKED + t_lfs_node *x_head; +#else + t_lfs_node * _Atomic x_head; +#endif +} t_lockfree_stack; + +void lockfree_stack_init(t_lockfree_stack *x); +void lockfree_stack_push(t_lockfree_stack *x, void *node); +void * lockfree_stack_pop(t_lockfree_stack *x); +void * lockfree_stack_release(t_lockfree_stack *x); + +/* ------------------- t_native_semaphore -------------------- */ + +typedef struct _native_semaphore +{ +#if defined(_WIN32) + void *sem; +#elif defined(__APPLE__) + semaphore_t sem; +#else /* posix */ + sem_t sem; +#endif +} t_native_semaphore; + +int native_semaphore_init(t_native_semaphore *x); +int native_semaphore_destroy(t_native_semaphore *x); +int native_semaphore_post(t_native_semaphore *x); +int native_semaphore_postn(t_native_semaphore *x, int count); +int native_semaphore_wait(t_native_semaphore *x); + +/* ------------------- t_fast_semaphore ------------------ */ + +/* thanks to https://preshing.com/20150316/semaphores-are-surprisingly-versatile */ + +typedef struct _fast_semaphore +{ + t_native_semaphore sem; +#ifdef MSVC_INTERLOCKED + long count; +#else + atomic_int count; +#endif +} t_fast_semaphore; + +int fast_semaphore_init(t_fast_semaphore *x); +int fast_semaphore_destroy(t_fast_semaphore *x); +int fast_semaphore_post(t_fast_semaphore *x); +int fast_semaphore_postn(t_fast_semaphore *x, int count); +int fast_semaphore_wait(t_fast_semaphore *x); +/* returns 1 on success, 0 on failure */ +int fast_semaphore_trywait(t_fast_semaphore *x); + +#endif /* S_SYNC_H */ diff --git a/src/x_array.c b/src/x_array.c index a7fedff363..6a6438ae3f 100644 --- a/src/x_array.c +++ b/src/x_array.c @@ -897,7 +897,7 @@ void canvas_add_for_class(t_class *c); void x_array_setup(void) { array_define_class = class_new(gensym("array define"), 0, - (t_method)canvas_free, sizeof(t_canvas), 0, 0); + (t_method)canvas_free, sizeof(t_canvas), CLASS_DEFAULT, 0); canvas_add_for_class(array_define_class); class_addmethod(array_define_class, (t_method)array_define_send, gensym("send"), A_SYMBOL, 0); diff --git a/src/x_scalar.c b/src/x_scalar.c index 92862cdc5a..450a5bb0d1 100644 --- a/src/x_scalar.c +++ b/src/x_scalar.c @@ -188,7 +188,7 @@ void canvas_add_for_class(t_class *c); void x_scalar_setup(void) { scalar_define_class = class_new(gensym("scalar define"), 0, - (t_method)canvas_free, sizeof(t_canvas), 0, 0); + (t_method)canvas_free, sizeof(t_canvas), CLASS_DEFAULT, 0); canvas_add_for_class(scalar_define_class); class_addmethod(scalar_define_class, (t_method)scalar_define_send, gensym("send"), A_SYMBOL, 0); diff --git a/src/x_vexp_if.c b/src/x_vexp_if.c index ebf197e2ae..2606c5f7df 100644 --- a/src/x_vexp_if.c +++ b/src/x_vexp_if.c @@ -800,7 +800,7 @@ expr_setup(void) * expr~ initialization */ expr_tilde_class = class_new(gensym("expr~"), (t_newmethod)expr_new, - (t_method)expr_ff, sizeof(t_expr), 0, A_GIMME, 0); + (t_method)expr_ff, sizeof(t_expr), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(expr_tilde_class, nullfn, gensym("signal"), 0); CLASS_MAINSIGNALIN(expr_tilde_class, t_expr, exp_f); class_addmethod(expr_tilde_class,(t_method)expr_dsp, gensym("dsp"), @@ -812,7 +812,7 @@ expr_setup(void) * fexpr~ initialization */ fexpr_tilde_class = class_new(gensym("fexpr~"), (t_newmethod)expr_new, - (t_method)expr_ff, sizeof(t_expr), 0, A_GIMME, 0); + (t_method)expr_ff, sizeof(t_expr), CLASS_DEFAULT, A_GIMME, 0); class_addmethod(fexpr_tilde_class, nullfn, gensym("signal"), 0); CLASS_MAINSIGNALIN(fexpr_tilde_class, t_expr, exp_f); class_addmethod(fexpr_tilde_class,(t_method)expr_start, diff --git a/tcl/dialog_audio.tcl b/tcl/dialog_audio.tcl index c8caead5a0..2e60c2a288 100644 --- a/tcl/dialog_audio.tcl +++ b/tcl/dialog_audio.tcl @@ -17,7 +17,7 @@ proc ::dialog_audio::apply {mytoplevel} { global audio_outdev1 audio_outdev2 audio_outdev3 audio_outdev4 global audio_outchan1 audio_outchan2 audio_outchan3 audio_outchan4 global audio_outenable1 audio_outenable2 audio_outenable3 audio_outenable4 - global audio_sr audio_advance audio_callback audio_blocksize + global audio_sr audio_advance audio_callback audio_blocksize audio_threads pdsend "pd audio-dialog \ $audio_indev1 \ @@ -39,7 +39,8 @@ proc ::dialog_audio::apply {mytoplevel} { $audio_sr \ $audio_advance \ $audio_callback \ - $audio_blocksize" + $audio_blocksize \ + $audio_threads" } proc ::dialog_audio::cancel {mytoplevel} { @@ -102,7 +103,7 @@ proc ::dialog_audio::pdtk_audio_dialog {mytoplevel \ inchan1 inchan2 inchan3 inchan4 \ outdev1 outdev2 outdev3 outdev4 \ outchan1 outchan2 outchan3 outchan4 sr advance multi callback \ - longform blocksize} { + longform blocksize {threads 0}} { global audio_indev1 audio_indev2 audio_indev3 audio_indev4 global audio_inchan1 audio_inchan2 audio_inchan3 audio_inchan4 global audio_inenable1 audio_inenable2 audio_inenable3 audio_inenable4 @@ -112,7 +113,7 @@ proc ::dialog_audio::pdtk_audio_dialog {mytoplevel \ global audio_sr audio_advance audio_callback audio_blocksize global audio_indevlist audio_outdevlist global pd_indev pd_outdev - global audio_longform + global audio_longform audio_threads set audio_indev1 $indev1 set audio_indev2 $indev2 @@ -147,6 +148,8 @@ proc ::dialog_audio::pdtk_audio_dialog {mytoplevel \ foreach {audio_callback audio_isfixedcallback} [::dialog_audio::isfixed $callback] {} foreach {audio_blocksize audio_isfixedbs} [::dialog_audio::isfixed $blocksize] {} + set audio_threads $threads + toplevel $mytoplevel -class DialogWindow wm withdraw $mytoplevel wm title $mytoplevel [_ "Audio Settings"] @@ -202,12 +205,19 @@ proc ::dialog_audio::pdtk_audio_dialog {mytoplevel \ $mytoplevel.settings.bsc.bs_popup config -state "disabled" } - if {$audio_isfixedcallback} {} else { - frame $mytoplevel.settings.callback - pack $mytoplevel.settings.callback -side bottom -fill x - checkbutton $mytoplevel.settings.callback.c_button -variable audio_callback \ + # callbacks and audio threads + frame $mytoplevel.settings.misc + pack $mytoplevel.settings.misc -side bottom -fill x + if {!$audio_isfixedcallback} { + checkbutton $mytoplevel.settings.misc.c_button -variable audio_callback \ -text [_ "Use callbacks"] - pack $mytoplevel.settings.callback.c_button + pack $mytoplevel.settings.misc.c_button + pack $mytoplevel.settings.misc.c_button -side left + } + if {$threads >= 0} { + label $mytoplevel.settings.misc.t_label -text [_ "Audio threads:"] + entry $mytoplevel.settings.misc.t_entry -textvariable audio_threads -width 4 + pack $mytoplevel.settings.misc.t_entry $mytoplevel.settings.misc.t_label -side right } # input devices