Skip to content

Commit

Permalink
Move to SIMD without NATIVE_ALIASES on (#7834)
Browse files Browse the repository at this point in the history
* Move to SIMD without NATIVE_ALIASES on

using the new macros from basic-blocks

* knock out arm64ec pipeline build for a minute
  • Loading branch information
baconpaul authored Nov 1, 2024
1 parent d9c1cc4 commit b490f99
Show file tree
Hide file tree
Showing 38 changed files with 965 additions and 927 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/build-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ jobs:
cmakeOpt: DEBUG
runTests: false

- name: "windows msvc arm64ec"
os: windows-latest
target: surge-xt_Standalone
cmakeConfig: -G"Visual Studio 17 2022" -A arm64ec -DCMAKE_SYSTEM_VERSION=10 -DSURGE_SKIP_LUA=TRUE
cmakeOpt: DEBUG
runTests: false
#- name: "windows msvc arm64ec"
# os: windows-latest
# target: surge-xt_Standalone
# cmakeConfig: -G"Visual Studio 17 2022" -A arm64ec -DCMAKE_SYSTEM_VERSION=10 -DSURGE_SKIP_LUA=TRUE
# cmakeOpt: DEBUG
# runTests: false

- name: "windows clang"
os: windows-latest
Expand Down
2 changes: 1 addition & 1 deletion libs/sst/sst-filters
Submodule sst-filters updated 46 files
+68 −0 .github/workflows/build-pr.yml
+17 −0 .github/workflows/code-checks.yml
+19 −0 .github/workflows/publish-doxygen.yml
+19 −18 CMakeLists.txt
+0 −122 azure-pipelines.yml
+1,259 −22 cmake/CPM.cmake
+1 −1 examples/CMakeLists.txt
+14 −0 examples/filter_plot_tool/filter_plot_tool.cpp
+14 −0 examples/filters_example_plugin/FilterPlotComponent.cpp
+14 −0 examples/filters_example_plugin/FilterPlotComponent.h
+37 −24 examples/filters_example_plugin/FiltersPlugin.cpp
+14 −0 examples/filters_example_plugin/FiltersPlugin.h
+14 −0 examples/filters_example_plugin/FiltersPluginEditor.cpp
+14 −0 examples/filters_example_plugin/FiltersPluginEditor.h
+62 −36 include-extras/sst/filters/FilterPlotter.h
+8 −5 include/sst/filters/BiquadFilter.h
+41 −36 include/sst/filters/CutoffWarp.h
+92 −78 include/sst/filters/CytomicSVF.h
+50 −52 include/sst/filters/DiodeLadder.h
+4 −4 include/sst/filters/FilterCoefficientMaker_Impl.h
+156 −155 include/sst/filters/HalfRateFilter.h
+25 −25 include/sst/filters/K35Filter.h
+122 −109 include/sst/filters/OBXDFilter.h
+4 −4 include/sst/filters/QuadFilterUnit.h
+418 −372 include/sst/filters/QuadFilterUnit_Impl.h
+9 −9 include/sst/filters/ResonanceWarp.h
+58 −53 include/sst/filters/TriPoleFilter.h
+37 −38 include/sst/filters/VintageLadders.h
+1 −22 include/sst/utilities/globals.h
+9 −9 include/sst/utilities/shared.h
+0 −0 libs/catch2/catch2.hpp
+2 −2 scripts/fix_file_comments.pl
+14 −0 tests/BasicFiltersTest.cpp
+20 −7 tests/BiquadTest.cpp
+3 −2 tests/CMakeLists.txt
+14 −0 tests/CutoffWarpTest.cpp
+14 −3 tests/CytomicSVFTests.cpp
+14 −0 tests/DiodeLadderTest.cpp
+37 −19 tests/HalfRateTest.cpp
+14 −0 tests/K35FilterTest.cpp
+14 −0 tests/OBXDFilterTest.cpp
+14 −0 tests/ResonanceWarpTest.cpp
+35 −20 tests/TestUtils.h
+14 −0 tests/TriPoleFilterTest.cpp
+14 −0 tests/VintageLaddersTest.cpp
+14 −0 tests/tests.cpp
1 change: 1 addition & 0 deletions src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ else()
target_compile_definitions(luajit-5.1 INTERFACE HAS_LUA=0)
endif()

set(SST_BASIC_BLOCKS_SIMD_OMIT_NATIVE_ALIASES ON CACHE BOOL "No Native Aliases for SCXT") # Makes ARM64EC use neon basically
surge_add_lib_subdirectory(sst/sst-basic-blocks)
surge_add_lib_subdirectory(sst/sst-cpputils)
surge_add_lib_subdirectory(sst/sst-plugininfra)
Expand Down
327 changes: 169 additions & 158 deletions src/common/dsp/QuadFilterChain.cpp

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions src/common/dsp/QuadFilterChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,15 @@ struct QuadFilterChainState
sst::filters::QuadFilterUnitState FU[4]; // 2 filters left and right
sst::waveshapers::QuadWaveshaperState WSS[2]; // 1 shaper left and right

__m128 Gain, FB, Mix1, Mix2, Drive;
__m128 dGain, dFB, dMix1, dMix2, dDrive;
SIMD_M128 Gain, FB, Mix1, Mix2, Drive;
SIMD_M128 dGain, dFB, dMix1, dMix2, dDrive;

__m128 wsLPF, FBlineL, FBlineR;
SIMD_M128 wsLPF, FBlineL, FBlineR;

__m128 DL[BLOCK_SIZE_OS], DR[BLOCK_SIZE_OS]; // wavedata
SIMD_M128 DL[BLOCK_SIZE_OS], DR[BLOCK_SIZE_OS]; // wavedata

__m128 OutL, OutR, dOutL, dOutR;
__m128 Out2L, Out2R, dOut2L, dOut2R; // fc_stereo only
SIMD_M128 OutL, OutR, dOutL, dOutR;
SIMD_M128 Out2L, Out2R, dOut2L, dOut2R; // fc_stereo only
};

/*
Expand Down
12 changes: 6 additions & 6 deletions src/common/dsp/SurgeVoice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ enum lag_entries
le_pfg,
};

inline void set1f(__m128 &m, int i, float f) { *((float *)&m + i) = f; }
inline void set1f(SIMD_M128 &m, int i, float f) { *((float *)&m + i) = f; }

inline void set1i(__m128 &m, int e, int i) { *((int *)&m + e) = i; }
inline void set1ui(__m128 &m, int e, unsigned int i) { *((unsigned int *)&m + e) = i; }
inline void set1i(SIMD_M128 &m, int e, int i) { *((int *)&m + e) = i; }
inline void set1ui(SIMD_M128 &m, int e, unsigned int i) { *((unsigned int *)&m + e) = i; }

inline float get1f(__m128 m, int i) { return *((float *)&m + i); }
inline float get1f(SIMD_M128 m, int i) { return *((float *)&m + i); }

float SurgeVoiceState::getPitch(SurgeStorage *storage)
{
Expand Down Expand Up @@ -1262,8 +1262,8 @@ bool SurgeVoice::process_block(QuadFilterChainState &Q, int Qe)

for (int i = 0; i < BLOCK_SIZE_OS; i++)
{
_mm_store_ss(((float *)&Q.DL[i] + Qe), _mm_load_ss(&output[0][i]));
_mm_store_ss(((float *)&Q.DR[i] + Qe), _mm_load_ss(&output[1][i]));
SIMD_MM(store_ss)(((float *)&Q.DL[i] + Qe), SIMD_MM(load_ss)(&output[0][i]));
SIMD_MM(store_ss)(((float *)&Q.DR[i] + Qe), SIMD_MM(load_ss)(&output[1][i]));
}
SetQFB(&Q, Qe);

Expand Down
4 changes: 2 additions & 2 deletions src/common/dsp/effects/BBDEnsembleEffect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ void BBDEnsembleEffect::process_sinc_delays(float *dataL, float *dataR, float de

float waveshaperOuts alignas(16)[4];

_mm_store_ps(waveshaperOuts, waveshaperOutsVec);
SIMD_MM(store_ps)(waveshaperOuts, waveshaperOutsVec);

L[s] = waveshaperOuts[0] + waveshaperOuts[1];
R[s] = waveshaperOuts[2] + waveshaperOuts[3];
Expand Down Expand Up @@ -391,7 +391,7 @@ void BBDEnsembleEffect::process(float *dataL, float *dataR)

float waveshaperOuts alignas(16)[4];

_mm_store_ps(waveshaperOuts, waveshaperOutsVec);
SIMD_MM(store_ps)(waveshaperOuts, waveshaperOutsVec);

L[s] = waveshaperOuts[0] + waveshaperOuts[1];
R[s] = waveshaperOuts[2] + waveshaperOuts[3];
Expand Down
2 changes: 1 addition & 1 deletion src/common/dsp/effects/ChorusEffect.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
template <int v> class ChorusEffect : public Effect
{
lipol_ps_blocksz feedback alignas(16), mix alignas(16), width alignas(16);
__m128 voicepanL4 alignas(16)[v], voicepanR4 alignas(16)[v];
SIMD_M128 voicepanL4 alignas(16)[v], voicepanR4 alignas(16)[v];
float buffer alignas(16)[max_delay_length + FIRipol_N]; // Includes padding so we can use SSE
// interpolation without wrapping

Expand Down
31 changes: 17 additions & 14 deletions src/common/dsp/effects/ChorusEffectImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ template <int v> void ChorusEffect<v>::init()
x = 2.f * x - 1.f;
voicepan[i][0] = sqrt(0.5 - 0.5 * x) * gainscale;
voicepan[i][1] = sqrt(0.5 + 0.5 * x) * gainscale;
voicepanL4[i] = _mm_set1_ps(voicepan[i][0]);
voicepanR4[i] = _mm_set1_ps(voicepan[i][1]);
voicepanL4[i] = SIMD_MM(set1_ps)(voicepan[i][0]);
voicepanR4[i] = SIMD_MM(set1_ps)(voicepan[i][1]);
}

setvars(true);
Expand Down Expand Up @@ -118,7 +118,7 @@ template <int v> void ChorusEffect<v>::process(float *dataL, float *dataR)

for (int k = 0; k < BLOCK_SIZE; k++)
{
__m128 L = _mm_setzero_ps(), R = _mm_setzero_ps();
auto L = SIMD_MM(setzero_ps)(), R = SIMD_MM(setzero_ps)();

for (int j = 0; j < v; j++)
{
Expand All @@ -129,20 +129,23 @@ template <int v> void ChorusEffect<v>::process(float *dataL, float *dataR)
int sinc = FIRipol_N * limit_range((int)(FIRipol_M * (float(i_dtime + 1) - vtime)), 0,
FIRipol_M - 1);

__m128 vo;
vo = _mm_mul_ps(_mm_load_ps(&storage->sinctable1X[sinc]), _mm_loadu_ps(&buffer[rp]));
vo = _mm_add_ps(vo, _mm_mul_ps(_mm_load_ps(&storage->sinctable1X[sinc + 4]),
_mm_loadu_ps(&buffer[rp + 4])));
vo = _mm_add_ps(vo, _mm_mul_ps(_mm_load_ps(&storage->sinctable1X[sinc + 8]),
_mm_loadu_ps(&buffer[rp + 8])));

L = _mm_add_ps(L, _mm_mul_ps(vo, voicepanL4[j]));
R = _mm_add_ps(R, _mm_mul_ps(vo, voicepanR4[j]));
SIMD_M128 vo;
vo = SIMD_MM(mul_ps)(SIMD_MM(load_ps)(&storage->sinctable1X[sinc]),
SIMD_MM(loadu_ps)(&buffer[rp]));
vo = SIMD_MM(add_ps)(vo,
SIMD_MM(mul_ps)(SIMD_MM(load_ps)(&storage->sinctable1X[sinc + 4]),
SIMD_MM(loadu_ps)(&buffer[rp + 4])));
vo = SIMD_MM(add_ps)(vo,
SIMD_MM(mul_ps)(SIMD_MM(load_ps)(&storage->sinctable1X[sinc + 8]),
SIMD_MM(loadu_ps)(&buffer[rp + 8])));

L = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(vo, voicepanL4[j]));
R = SIMD_MM(add_ps)(R, SIMD_MM(mul_ps)(vo, voicepanR4[j]));
}
L = mech::sum_ps_to_ss(L);
R = mech::sum_ps_to_ss(R);
_mm_store_ss(&tbufferL[k], L);
_mm_store_ss(&tbufferR[k], R);
SIMD_MM(store_ss)(&tbufferL[k], L);
SIMD_MM(store_ss)(&tbufferR[k], R);
}

if (!fxdata->p[ch_highcut].deactivated)
Expand Down
20 changes: 10 additions & 10 deletions src/common/dsp/effects/CombulatorEffect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ void CombulatorEffect::setvars(bool init)
}
}

inline void set1f(__m128 &m, int i, float f) { *((float *)&m + i) = f; }
inline void set1f(SIMD_M128 &m, int i, float f) { *((float *)&m + i) = f; }

inline float get1f(__m128 m, int i) { return *((float *)&m + i); }
inline float get1f(SIMD_M128 m, int i) { return *((float *)&m + i); }

void CombulatorEffect::sampleRateReset()
{
Expand Down Expand Up @@ -270,8 +270,8 @@ void CombulatorEffect::process(float *dataL, float *dataR)
noiseGen[c][0], noiseGen[c][1], 0, storage->rand_pm1());
}

auto l128 = _mm_setzero_ps();
auto r128 = _mm_setzero_ps();
auto l128 = SIMD_MM(setzero_ps)();
auto r128 = SIMD_MM(setzero_ps)();

// FIXME - we want to interpolate the non-integral part if we like this
int panIndex2 = (int)((limit_range(pan2.v, -1.f, 1.f) + 1) * ((PANLAW_SIZE - 1) / 2)) &
Expand All @@ -281,20 +281,20 @@ void CombulatorEffect::process(float *dataL, float *dataR)

if (filtptr)
{
l128 = filtptr(&(qfus[0]), _mm_set1_ps(dataOS[0][s] + noise[0]));
r128 = filtptr(&(qfus[1]), _mm_set1_ps(dataOS[1][s] + noise[1]));
l128 = filtptr(&(qfus[0]), SIMD_MM(set1_ps)(dataOS[0][s] + noise[0]));
r128 = filtptr(&(qfus[1]), SIMD_MM(set1_ps)(dataOS[1][s] + noise[1]));
}
else
{
l128 = _mm_set1_ps(dataOS[0][s]);
r128 = _mm_set1_ps(dataOS[1][s]);
l128 = SIMD_MM(set1_ps)(dataOS[0][s]);
r128 = SIMD_MM(set1_ps)(dataOS[1][s]);
}

float mixl = 0, mixr = 0;
float tl[4], tr[4];

_mm_store_ps(tl, l128);
_mm_store_ps(tr, r128);
SIMD_MM(store_ps)(tl, l128);
SIMD_MM(store_ps)(tr, r128);

for (int i = 0; i < 3; ++i)
{
Expand Down
8 changes: 4 additions & 4 deletions src/common/dsp/effects/DistortionEffect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void DistortionEffect::init()
R = 0.f;

for (int i = 0; i < sst::waveshapers::n_waveshaper_registers; ++i)
wsState.R[i] = _mm_setzero_ps();
wsState.R[i] = SIMD_MM(setzero_ps)();
}

void DistortionEffect::setvars(bool init)
Expand Down Expand Up @@ -151,9 +151,9 @@ void DistortionEffect::process(float *dataL, float *dataR)

sb[0] = L * dInv;
sb[1] = R * dInv;
auto lr128 = _mm_load_ps(sb);
auto wsres = wsop(&wsState, lr128, _mm_set1_ps(dNow));
_mm_store_ps(sb, wsres);
auto lr128 = SIMD_MM(load_ps)(sb);
auto wsres = wsop(&wsState, lr128, SIMD_MM(set1_ps)(dNow));
SIMD_MM(store_ps)(sb, wsres);
L = sb[0];
R = sb[1];

Expand Down
20 changes: 10 additions & 10 deletions src/common/dsp/effects/ResonatorEffect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ void ResonatorEffect::setvars(bool init)
}
}

inline void set1f(__m128 &m, int i, float f) { *((float *)&m + i) = f; }
inline void set1f(SIMD_M128 &m, int i, float f) { *((float *)&m + i) = f; }

inline float get1f(__m128 m, int i) { return *((float *)&m + i); }
inline float get1f(SIMD_M128 m, int i) { return *((float *)&m + i); }

void ResonatorEffect::sampleRateReset()
{
Expand Down Expand Up @@ -206,25 +206,25 @@ void ResonatorEffect::process(float *dataL, float *dataR)
dataOS[1][s] =
storage->lookup_waveshape(sst::waveshapers::WaveshaperType::wst_asym, dataOS[1][s]);

auto l128 = _mm_setzero_ps();
auto r128 = _mm_setzero_ps();
auto l128 = SIMD_MM(setzero_ps)();
auto r128 = SIMD_MM(setzero_ps)();

if (filtptr)
{
l128 = filtptr(&(qfus[0]), _mm_set1_ps(dataOS[0][s]));
r128 = filtptr(&(qfus[1]), _mm_set1_ps(dataOS[1][s]));
l128 = filtptr(&(qfus[0]), SIMD_MM(set1_ps)(dataOS[0][s]));
r128 = filtptr(&(qfus[1]), SIMD_MM(set1_ps)(dataOS[1][s]));
}
else
{
l128 = _mm_set1_ps(dataOS[0][s]);
r128 = _mm_set1_ps(dataOS[1][s]);
l128 = SIMD_MM(set1_ps)(dataOS[0][s]);
r128 = SIMD_MM(set1_ps)(dataOS[1][s]);
}

float mixl = 0, mixr = 0;
float tl[4], tr[4];

_mm_store_ps(tl, l128);
_mm_store_ps(tr, r128);
SIMD_MM(store_ps)(tl, l128);
SIMD_MM(store_ps)(tr, r128);

for (int i = 0; i < 3; ++i)
{
Expand Down
10 changes: 5 additions & 5 deletions src/common/dsp/effects/WaveShaperEffect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,10 @@ void WaveShaperEffect::process(float *dataL, float *dataR)

for (int i = 0; i < sst::waveshapers::n_waveshaper_registers; ++i)
{
wss.R[i] = _mm_set1_ps(R[i]);
wss.R[i] = SIMD_MM(set1_ps)(R[i]);
}

wss.init = _mm_cmpneq_ps(_mm_setzero_ps(), _mm_setzero_ps());
wss.init = SIMD_MM(cmpneq_ps)(SIMD_MM(setzero_ps)(), SIMD_MM(setzero_ps)());
}

auto wsptr = sst::waveshapers::GetQuadWaveshaper(lastShape);
Expand All @@ -154,14 +154,14 @@ void WaveShaperEffect::process(float *dataL, float *dataR)
din[0] = hbfComp * scalef * dataOS[0][i] + bias.v;
din[1] = hbfComp * scalef * dataOS[1][i] + bias.v;

auto dat = _mm_load_ps(din);
auto drv = _mm_set1_ps(drive.v);
auto dat = SIMD_MM(load_ps)(din);
auto drv = SIMD_MM(set1_ps)(drive.v);

dat = wsptr(&wss, dat, drv);

float res alignas(16)[4];

_mm_store_ps(res, dat);
SIMD_MM(store_ps)(res, dat);

dataOS[0][i] = res[0] * oscalef;
dataOS[1][i] = res[1] * oscalef;
Expand Down
12 changes: 6 additions & 6 deletions src/common/dsp/effects/chowdsp/bbd_utils/BBDFilterBank.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ constexpr std::complex<float> oFiltPole[] = {
{-51468.0f, -21437.0f}, {-51468.0f, +21437.0f}, {-26276.0f, -59699.0f}, {-26276.0f, +59699.0f}};
} // namespace FilterSpec

inline SSEComplex fast_complex_pow(__m128 angle, float b)
inline SSEComplex fast_complex_pow(SIMD_M128 angle, float b)
{
const __m128 scalar = _mm_set1_ps(b);
auto angle_pow = _mm_mul_ps(angle, scalar);
const auto scalar = SIMD_MM(set1_ps)(b);
auto angle_pow = SIMD_MM(mul_ps)(angle, scalar);
return SSEComplex::fastExp(angle_pow);
}

Expand Down Expand Up @@ -107,7 +107,7 @@ struct InputFilterBank

inline void process(float u)
{
x = pole_corr * x + SSEComplex(_mm_set1_ps(u), _mm_set1_ps(0.0f));
x = pole_corr * x + SSEComplex(SIMD_MM(set1_ps)(u), SIMD_MM(set1_ps)(0.0f));
}

SSEComplex x;
Expand All @@ -118,7 +118,7 @@ struct InputFilterBank
SSEComplex poles;
SSEComplex root_corr;
SSEComplex pole_corr;
__m128 pole_corr_angle;
SIMD_M128 pole_corr_angle;

SSEComplex Aplus;

Expand Down Expand Up @@ -184,7 +184,7 @@ struct OutputFilterBank
SSEComplex poles;
SSEComplex root_corr;
SSEComplex pole_corr;
__m128 pole_corr_angle;
SIMD_M128 pole_corr_angle;

SSEComplex Aplus;

Expand Down
Loading

0 comments on commit b490f99

Please sign in to comment.