From 72f0fb2b594fa0798d3fd82195f4cec64db96a1d Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 7 Jan 2026 23:32:05 -0500 Subject: [PATCH 01/17] modified oaconvolve --- sdp/challenger_sdp.py | 156 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 150 insertions(+), 6 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index ce60e8d..e481852 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -1,4 +1,151 @@ +import math + import numpy as np +import scipy._lib.array_api_extra as xpx + +from scipy.special import lambertw +from scipy.fft import next_fast_len +from scipy._lib._array_api import array_namespace +from scipy import fft as sp_fft +from scipy.signal._signaltools import _apply_conv_mode, _split # , fftconvolve +from scipy.fft._pocketfft.basic import r2c, c2r, r2cn, c2rn + + +def _sliding_dot_product_r2c2r(Q, T): + n = len(T) + m = len(Q) + next_fast_n = next_fast_len(n, real=True) + + tmp = np.empty((2, next_fast_n)) + tmp[0, :m] = Q[::-1] + tmp[0, m:] = 0.0 + tmp[1, :n] = T + tmp[1, n:] = 0.0 + fft_2d = r2c(True, tmp, axis=-1) + + return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] + + +########## +def _rfft_irfft_r2c2rn(Q_2D, T_2D, shape): + + return c2rn( + False, + r2cn(True, T_2D, shape, axis=1) * r2cn(True, Q_2D, shape, axis=1), + n=shape, + axis=1, + ) + + +########## + + +def _freq_domain_conv(T_2D, Q_2D, shape): + sp1 = sp_fft.rfftn(T_2D, shape, axes=1) + sp2 = sp_fft.rfftn(Q_2D, shape, axes=1) + + return sp_fft.irfftn(sp1 * sp2, shape, axes=1) + + +def _calc_oa_lens(n, m): + fallback = (n + m - 1, None, n, m) + if n == m or m >= n: + return fallback + + overlap = m - 1 + opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real + block_size = next_fast_len(math.ceil(opt_size), real=True) # real=True? + + # Use conventional FFT convolve if there is only going to be one block. + if block_size >= n: + return fallback + + T_step = block_size - m + 1 + Q_step = m + + return (block_size, overlap, T_step, Q_step) + + +def _sliding_dot_product(T, Q): + xp = array_namespace(T, Q) + + n = T.shape[0] + m = Q.shape[0] + + # in1 = T + # in2 = Q + # axes = [0] + s1 = (n,) + s2 = (m,) + + shape_final = n + m - 1 + shape_final_lst = [shape_final] + block_size, overlaps, T_step, Q_step = _calc_oa_lens(n, m) + + # block_size_tuple = (block_size,) + # overlaps_tuple = (overlaps,) + # T_step_tuple = (T_step,) + # Q_step_tuple = (Q_step,) + + if T_step == n and Q_step == m: + return _sliding_dot_product_r2c2r(Q, T) + + # flip Q: + Q = Q[::-1] + + curnstep1 = math.ceil((n + 1) / T_step) + if (block_size - overlaps) * curnstep1 < shape_final: + curnstep1 += 1 + + curpad1 = curnstep1 * T_step - n + + # nsteps1 = [curnstep1] + # nsteps2 = [1] + pad_size1 = [(0, curpad1)] + # pad_size2 = [(0, 0)] + + if not all(curpad == (0, 0) for curpad in pad_size1): + T = xpx.pad(T, pad_size1, mode="constant", constant_values=0, xp=xp) + + # Reshape the overlap-add parts to input block sizes. + split_axes = [0] + fft_axes = [1] + + # reshape_size1 = [curnstep1, T_step] # reshape_size1.insert(0, curnstep1) + # reshape_size2 = [1, Q_step] # reshape_size2.insert(0, 1) + + T = xp.reshape(T, (curnstep1, T_step)) + Q = xp.reshape(Q, (1, Q_step)) + + # fft_shape = [block_size] + ret = _freq_domain_conv(T, Q, block_size) + + # Do the overlap-add. + # ax = 0 + ax_fft = 1 + ax_split = 0 + + overlap = overlaps + if overlap is not None: + ret, overpart = _split(ret, [-overlap], 1, xp=xp) + overpart = _split(overpart, [-1], ax_split, xp=xp)[0] + ret_overpart = _split(ret, [overlap], ax_fft, xp=xp)[0] + ret_overpart = _split(ret_overpart, [1], ax_split, xp)[1] + ret_overpart += overpart + + # Reshape back to the correct dimensionality. + shape_ret = [ + ret.shape[i] if i not in fft_axes else ret.shape[i] * ret.shape[i - 1] + for i in range(ret.ndim) + if i not in split_axes + ] + ret = xp.reshape(ret, shape_ret) + + # Slice to the correct size. + slice_final = tuple([slice(islice) for islice in shape_final_lst]) + ret = ret[slice_final] + + return _apply_conv_mode(ret, s1, s2, "valid", [0], xp) def setup(Q, T): @@ -6,9 +153,6 @@ def setup(Q, T): def sliding_dot_product(Q, T): - m = len(Q) - l = T.shape[0] - m + 1 - out = np.empty(l) - for i in range(l): - out[i] = np.dot(Q, T[i : i + m]) - return out + if len(Q) == len(T): + return np.dot(Q, T) + return _sliding_dot_product(T, Q) From be0035e2040003b3e36edc0ebd0db76b90ad9e47 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 8 Jan 2026 00:22:11 -0500 Subject: [PATCH 02/17] update code logic --- sdp/challenger_sdp.py | 54 +++++++++++-------------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index e481852..47db55a 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -1,7 +1,8 @@ import math import numpy as np -import scipy._lib.array_api_extra as xpx + +# import scipy._lib.array_api_extra as xpx from scipy.special import lambertw from scipy.fft import next_fast_len @@ -31,9 +32,9 @@ def _rfft_irfft_r2c2rn(Q_2D, T_2D, shape): return c2rn( False, - r2cn(True, T_2D, shape, axis=1) * r2cn(True, Q_2D, shape, axis=1), - n=shape, - axis=1, + r2cn(True, T_2D, shape, axes=1) * r2cn(True, Q_2D, shape, axes=1), + s=shape, + axes=1, ) @@ -81,63 +82,36 @@ def _sliding_dot_product(T, Q): shape_final = n + m - 1 shape_final_lst = [shape_final] block_size, overlaps, T_step, Q_step = _calc_oa_lens(n, m) - - # block_size_tuple = (block_size,) - # overlaps_tuple = (overlaps,) - # T_step_tuple = (T_step,) - # Q_step_tuple = (Q_step,) - if T_step == n and Q_step == m: return _sliding_dot_product_r2c2r(Q, T) - # flip Q: Q = Q[::-1] - curnstep1 = math.ceil((n + 1) / T_step) if (block_size - overlaps) * curnstep1 < shape_final: curnstep1 += 1 curpad1 = curnstep1 * T_step - n - - # nsteps1 = [curnstep1] - # nsteps2 = [1] - pad_size1 = [(0, curpad1)] - # pad_size2 = [(0, 0)] - - if not all(curpad == (0, 0) for curpad in pad_size1): - T = xpx.pad(T, pad_size1, mode="constant", constant_values=0, xp=xp) - - # Reshape the overlap-add parts to input block sizes. - split_axes = [0] - fft_axes = [1] - - # reshape_size1 = [curnstep1, T_step] # reshape_size1.insert(0, curnstep1) - # reshape_size2 = [1, Q_step] # reshape_size2.insert(0, 1) + if curpad1 != 0: + T = np.pad(T, [(0, curpad1)], mode="constant", constant_values=0) T = xp.reshape(T, (curnstep1, T_step)) Q = xp.reshape(Q, (1, Q_step)) + ret = _rfft_irfft_r2c2rn(T, Q, block_size) - # fft_shape = [block_size] - ret = _freq_domain_conv(T, Q, block_size) - - # Do the overlap-add. - # ax = 0 - ax_fft = 1 - ax_split = 0 - + # overlap-add overlap = overlaps if overlap is not None: ret, overpart = _split(ret, [-overlap], 1, xp=xp) - overpart = _split(overpart, [-1], ax_split, xp=xp)[0] - ret_overpart = _split(ret, [overlap], ax_fft, xp=xp)[0] - ret_overpart = _split(ret_overpart, [1], ax_split, xp)[1] + overpart = _split(overpart, [-1], 0, xp=xp)[0] + ret_overpart = _split(ret, [overlap], 1, xp=xp)[0] + ret_overpart = _split(ret_overpart, [1], 0, xp)[1] ret_overpart += overpart # Reshape back to the correct dimensionality. shape_ret = [ - ret.shape[i] if i not in fft_axes else ret.shape[i] * ret.shape[i - 1] + ret.shape[i] if i not in [1] else ret.shape[i] * ret.shape[i - 1] for i in range(ret.ndim) - if i not in split_axes + if i not in [0] ] ret = xp.reshape(ret, shape_ret) From 907e2e23b179ddf12f7ce6c6dbac4292a7c9df2a Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 8 Jan 2026 09:49:37 -0500 Subject: [PATCH 03/17] minor clean ups --- sdp/challenger_sdp.py | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index 47db55a..a4b27a2 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -2,13 +2,10 @@ import numpy as np -# import scipy._lib.array_api_extra as xpx - from scipy.special import lambertw from scipy.fft import next_fast_len from scipy._lib._array_api import array_namespace -from scipy import fft as sp_fft -from scipy.signal._signaltools import _apply_conv_mode, _split # , fftconvolve +from scipy.signal._signaltools import _apply_conv_mode, _split from scipy.fft._pocketfft.basic import r2c, c2r, r2cn, c2rn @@ -27,7 +24,6 @@ def _sliding_dot_product_r2c2r(Q, T): return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] -########## def _rfft_irfft_r2c2rn(Q_2D, T_2D, shape): return c2rn( @@ -38,26 +34,17 @@ def _rfft_irfft_r2c2rn(Q_2D, T_2D, shape): ) -########## - - -def _freq_domain_conv(T_2D, Q_2D, shape): - sp1 = sp_fft.rfftn(T_2D, shape, axes=1) - sp2 = sp_fft.rfftn(Q_2D, shape, axes=1) - - return sp_fft.irfftn(sp1 * sp2, shape, axes=1) - - def _calc_oa_lens(n, m): fallback = (n + m - 1, None, n, m) - if n == m or m >= n: - return fallback + # assuming n > m + # otherwise, need to add the following: + # if n == m or m >= n: + # return fallback overlap = m - 1 opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real - block_size = next_fast_len(math.ceil(opt_size), real=True) # real=True? + block_size = next_fast_len(math.ceil(opt_size), real=True) - # Use conventional FFT convolve if there is only going to be one block. if block_size >= n: return fallback @@ -73,14 +60,7 @@ def _sliding_dot_product(T, Q): n = T.shape[0] m = Q.shape[0] - # in1 = T - # in2 = Q - # axes = [0] - s1 = (n,) - s2 = (m,) - shape_final = n + m - 1 - shape_final_lst = [shape_final] block_size, overlaps, T_step, Q_step = _calc_oa_lens(n, m) if T_step == n and Q_step == m: return _sliding_dot_product_r2c2r(Q, T) @@ -116,10 +96,10 @@ def _sliding_dot_product(T, Q): ret = xp.reshape(ret, shape_ret) # Slice to the correct size. - slice_final = tuple([slice(islice) for islice in shape_final_lst]) + slice_final = tuple([slice(islice) for islice in [shape_final]]) ret = ret[slice_final] - return _apply_conv_mode(ret, s1, s2, "valid", [0], xp) + return _apply_conv_mode(ret, (n,), (m,), "valid", [0], xp) def setup(Q, T): From 969f187b3278bb38eed4055d6188a5a8e7090d7c Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 9 Jan 2026 10:16:10 -0500 Subject: [PATCH 04/17] major changes to imporve readability --- sdp/challenger_sdp.py | 110 ++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 73 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index a4b27a2..ab9fa8f 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -1,12 +1,32 @@ import math - import numpy as np - from scipy.special import lambertw from scipy.fft import next_fast_len -from scipy._lib._array_api import array_namespace -from scipy.signal._signaltools import _apply_conv_mode, _split -from scipy.fft._pocketfft.basic import r2c, c2r, r2cn, c2rn +from scipy.fft._pocketfft.basic import r2c, c2r + + +def _rfft_irfft_r2c2r_block(Q, T, block_size): + m = Q.shape[0] + n = T.shape[0] + T_step = block_size - (m - 1) + n_step = math.ceil(n / T_step) + last_chunk_start = (n_step - 1) * T_step + + tmp = np.empty((n_step + 1, block_size), dtype=np.float64) + + # fill with T, block-wise + tmp[: n_step - 1, :T_step] = T[:last_chunk_start].reshape(n_step - 1, T_step) + tmp[: n_step - 1, T_step:] = 0.0 + tmp[n_step - 1, : n - last_chunk_start] = T[last_chunk_start:] + tmp[n_step - 1, n - last_chunk_start :] = 0.0 + + # fill with Q[::-1] + tmp[n_step, :m] = Q[::-1] + tmp[n_step, m:] = 0.0 + + fft_2d = r2c(True, tmp, axis=-1) + + return c2r(False, np.multiply(fft_2d[:-1], fft_2d[[-1]]), n=block_size) def _sliding_dot_product_r2c2r(Q, T): @@ -24,82 +44,26 @@ def _sliding_dot_product_r2c2r(Q, T): return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] -def _rfft_irfft_r2c2rn(Q_2D, T_2D, shape): - - return c2rn( - False, - r2cn(True, T_2D, shape, axes=1) * r2cn(True, Q_2D, shape, axes=1), - s=shape, - axes=1, - ) - - -def _calc_oa_lens(n, m): - fallback = (n + m - 1, None, n, m) - # assuming n > m - # otherwise, need to add the following: - # if n == m or m >= n: - # return fallback +def _sliding_dot_product(Q, T): + m = Q.shape[0] + n = T.shape[0] + # compute optimal block size overlap = m - 1 opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real block_size = next_fast_len(math.ceil(opt_size), real=True) - if block_size >= n: - return fallback - - T_step = block_size - m + 1 - Q_step = m - - return (block_size, overlap, T_step, Q_step) - - -def _sliding_dot_product(T, Q): - xp = array_namespace(T, Q) - - n = T.shape[0] - m = Q.shape[0] - - shape_final = n + m - 1 - block_size, overlaps, T_step, Q_step = _calc_oa_lens(n, m) - if T_step == n and Q_step == m: return _sliding_dot_product_r2c2r(Q, T) - Q = Q[::-1] - curnstep1 = math.ceil((n + 1) / T_step) - if (block_size - overlaps) * curnstep1 < shape_final: - curnstep1 += 1 - - curpad1 = curnstep1 * T_step - n - if curpad1 != 0: - T = np.pad(T, [(0, curpad1)], mode="constant", constant_values=0) - - T = xp.reshape(T, (curnstep1, T_step)) - Q = xp.reshape(Q, (1, Q_step)) - ret = _rfft_irfft_r2c2rn(T, Q, block_size) - - # overlap-add - overlap = overlaps - if overlap is not None: - ret, overpart = _split(ret, [-overlap], 1, xp=xp) - overpart = _split(overpart, [-1], 0, xp=xp)[0] - ret_overpart = _split(ret, [overlap], 1, xp=xp)[0] - ret_overpart = _split(ret_overpart, [1], 0, xp)[1] - ret_overpart += overpart - - # Reshape back to the correct dimensionality. - shape_ret = [ - ret.shape[i] if i not in [1] else ret.shape[i] * ret.shape[i - 1] - for i in range(ret.ndim) - if i not in [0] - ] - ret = xp.reshape(ret, shape_ret) + # perform fft-ifft operation on blocks + ret = _rfft_irfft_r2c2r_block(Q, T, block_size) - # Slice to the correct size. - slice_final = tuple([slice(islice) for islice in [shape_final]]) - ret = ret[slice_final] + # overlap-add process + out = ret[:, :-overlap] + out[1:, :overlap] += ret[:-1, -overlap:] + out = np.reshape(out, (-1,)) - return _apply_conv_mode(ret, (n,), (m,), "valid", [0], xp) + return out[m - 1 : n] def setup(Q, T): @@ -109,4 +73,4 @@ def setup(Q, T): def sliding_dot_product(Q, T): if len(Q) == len(T): return np.dot(Q, T) - return _sliding_dot_product(T, Q) + return _sliding_dot_product(Q, T) From b366e35b7a8c06abc82ed8b7882fefbcfab1d910 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 9 Jan 2026 12:31:56 -0500 Subject: [PATCH 05/17] Add param blocksize --- sdp/challenger_sdp.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index ab9fa8f..3ac2b9a 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -44,14 +44,16 @@ def _sliding_dot_product_r2c2r(Q, T): return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] -def _sliding_dot_product(Q, T): +def _sliding_dot_product(Q, T, block_size=None): m = Q.shape[0] n = T.shape[0] - # compute optimal block size overlap = m - 1 - opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real - block_size = next_fast_len(math.ceil(opt_size), real=True) + if block_size is None: + # compute optimal block size + opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real + block_size = next_fast_len(math.ceil(opt_size), real=True) + if block_size >= n: return _sliding_dot_product_r2c2r(Q, T) @@ -70,7 +72,7 @@ def setup(Q, T): return -def sliding_dot_product(Q, T): +def sliding_dot_product(Q, T, block_size=None): if len(Q) == len(T): return np.dot(Q, T) - return _sliding_dot_product(Q, T) + return _sliding_dot_product(Q, T, block_size=block_size) From a3d2e44cfdd4d990dcf61fa516788e22dd4cf20e Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 9 Jan 2026 13:22:16 -0500 Subject: [PATCH 06/17] add temp test for challenger --- test.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test.py b/test.py index d1b80e8..7879e39 100644 --- a/test.py +++ b/test.py @@ -210,3 +210,18 @@ def test_pyfftw_sdp_max_n(): np.testing.assert_allclose(comp, ref) return + + +def test_oaconvolve_sdp_blocksize(): + from sdp.challenger_sdp import sliding_dot_product + + T = np.random.rand(2**10) + Q = np.random.rand(2**8) + block_size = 2**9 + + comp = sliding_dot_product(Q, T, block_size=block_size) + ref = naive_sliding_dot_product(Q, T) + + np.testing.assert_allclose(comp, ref) + + return From 22d233be12b76818f2e798955e499527d9e9f563 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 9 Jan 2026 23:53:01 -0500 Subject: [PATCH 07/17] add func for computing block size --- sdp/challenger_sdp.py | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index 3ac2b9a..67551bb 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -5,6 +5,24 @@ from scipy.fft._pocketfft.basic import r2c, c2r +def _compute_block_size(m, n, block_size=None): + """ + Return a block size for the overlap-add method. + """ + if block_size is None: + # Find optimal block_size based on m and n + if m >= n / 2: + block_size = n # i.e. no blocking + else: + overlap = m - 1 + opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real + block_size = next_fast_len(math.ceil(opt_size), real=True) + + block_size = max(block_size, m) + + return min(block_size, n) + + def _rfft_irfft_r2c2r_block(Q, T, block_size): m = Q.shape[0] n = T.shape[0] @@ -44,23 +62,12 @@ def _sliding_dot_product_r2c2r(Q, T): return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] -def _sliding_dot_product(Q, T, block_size=None): +def _sliding_dot_product(Q, T, block_size): m = Q.shape[0] n = T.shape[0] overlap = m - 1 - if block_size is None: - # compute optimal block size - opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real - block_size = next_fast_len(math.ceil(opt_size), real=True) - - if block_size >= n: - return _sliding_dot_product_r2c2r(Q, T) - - # perform fft-ifft operation on blocks ret = _rfft_irfft_r2c2r_block(Q, T, block_size) - - # overlap-add process out = ret[:, :-overlap] out[1:, :overlap] += ret[:-1, -overlap:] out = np.reshape(out, (-1,)) @@ -73,6 +80,13 @@ def setup(Q, T): def sliding_dot_product(Q, T, block_size=None): - if len(Q) == len(T): + m = Q.shape[0] + n = T.shape[0] + if m == n: return np.dot(Q, T) - return _sliding_dot_product(Q, T, block_size=block_size) + + block_size = _compute_block_size(m, n, block_size=block_size) + if block_size >= n: + return _sliding_dot_product_r2c2r(Q, T) + else: + return _sliding_dot_product(Q, T, block_size) From d6eedfab8083f89400943c40064e8ba66ea7ab50 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 10 Jan 2026 00:46:35 -0500 Subject: [PATCH 08/17] remove redundant code --- sdp/challenger_sdp.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index 67551bb..68c62dd 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -3,6 +3,7 @@ from scipy.special import lambertw from scipy.fft import next_fast_len from scipy.fft._pocketfft.basic import r2c, c2r +from . import pocketfft_r2c_c2r_sdp def _compute_block_size(m, n, block_size=None): @@ -47,21 +48,6 @@ def _rfft_irfft_r2c2r_block(Q, T, block_size): return c2r(False, np.multiply(fft_2d[:-1], fft_2d[[-1]]), n=block_size) -def _sliding_dot_product_r2c2r(Q, T): - n = len(T) - m = len(Q) - next_fast_n = next_fast_len(n, real=True) - - tmp = np.empty((2, next_fast_n)) - tmp[0, :m] = Q[::-1] - tmp[0, m:] = 0.0 - tmp[1, :n] = T - tmp[1, n:] = 0.0 - fft_2d = r2c(True, tmp, axis=-1) - - return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] - - def _sliding_dot_product(Q, T, block_size): m = Q.shape[0] n = T.shape[0] @@ -87,6 +73,6 @@ def sliding_dot_product(Q, T, block_size=None): block_size = _compute_block_size(m, n, block_size=block_size) if block_size >= n: - return _sliding_dot_product_r2c2r(Q, T) + return pocketfft_r2c_c2r_sdp.sliding_dot_product(Q, T) else: return _sliding_dot_product(Q, T, block_size) From 4e23694fa9790a2cc0ce37e3a9c126bbcd2c1a3a Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 11 Jan 2026 14:50:40 -0500 Subject: [PATCH 09/17] added clearer functions --- sdp/challenger_sdp.py | 120 +++++++++++++++++++++++++++++++----------- 1 file changed, 89 insertions(+), 31 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index 68c62dd..bf9522d 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -6,73 +6,131 @@ from . import pocketfft_r2c_c2r_sdp -def _compute_block_size(m, n, block_size=None): +def _compute_block_size(m, n, conv_block_size=None): """ Return a block size for the overlap-add method. """ - if block_size is None: + if conv_block_size is None: # Find optimal block_size based on m and n if m >= n / 2: - block_size = n # i.e. no blocking + conv_block_size = n # i.e. no blocking else: overlap = m - 1 opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real - block_size = next_fast_len(math.ceil(opt_size), real=True) + conv_block_size = next_fast_len(math.ceil(opt_size), real=True) - block_size = max(block_size, m) + conv_block_size = max(conv_block_size, m) - return min(block_size, n) + return min(conv_block_size, n) -def _rfft_irfft_r2c2r_block(Q, T, block_size): +def _pocketfft_oaconvolve_block(Q, T, conv_block_size): m = Q.shape[0] n = T.shape[0] - T_step = block_size - (m - 1) - n_step = math.ceil(n / T_step) - last_chunk_start = (n_step - 1) * T_step - tmp = np.empty((n_step + 1, block_size), dtype=np.float64) + T_chunk_size = conv_block_size - (m - 1) + n_chunks = math.ceil(n / T_chunk_size) + last_chunk_start = (n_chunks - 1) * T_chunk_size + + tmp = np.empty((n_chunks + 1, conv_block_size), dtype=np.float64) # fill with T, block-wise - tmp[: n_step - 1, :T_step] = T[:last_chunk_start].reshape(n_step - 1, T_step) - tmp[: n_step - 1, T_step:] = 0.0 - tmp[n_step - 1, : n - last_chunk_start] = T[last_chunk_start:] - tmp[n_step - 1, n - last_chunk_start :] = 0.0 + tmp[: n_chunks - 1, :T_chunk_size] = T[:last_chunk_start].reshape( + n_chunks - 1, T_chunk_size + ) + tmp[: n_chunks - 1, T_chunk_size:] = 0.0 + tmp[n_chunks - 1, : n - last_chunk_start] = T[last_chunk_start:] + tmp[n_chunks - 1, n - last_chunk_start :] = 0.0 - # fill with Q[::-1] - tmp[n_step, :m] = Q[::-1] - tmp[n_step, m:] = 0.0 + # fill with Q + tmp[n_chunks, :m] = Q + tmp[n_chunks, m:] = 0.0 fft_2d = r2c(True, tmp, axis=-1) - return c2r(False, np.multiply(fft_2d[:-1], fft_2d[[-1]]), n=block_size) - + return c2r(False, np.multiply(fft_2d[:-1], fft_2d[[-1]]), n=conv_block_size) + + +def _pocketfft_oaconvolve(Q, T, conv_block_size): + # Linear convolution between two 1D arrays X and Y + # (of same length N) is an array with length N, + # and its n-th element is defined as: + # out[n] = sum_{i=0}^{N-1} X[i] * Y[(n - i)] + # and, for any out-of-range index, the value is 0. + + # To compute this linear convolution: + # Approach I: Regular Method + # X: 1 2 3 4 5 6 --conv-- Y: b a 0 0 0 0 + + # X_conv_Y: + # [ + # 1b, + # 1a + 2b, + # 2a + 3b, + # 3a + 4b, + # 4a + 5b, + # 5a + 6b + # ] + + # Approach II: Overlap-Add Method + # Step1: Chunk T and compute convolution block-wise + # Choose a block size --> 3 + # Find chunk size --> block_size - (M - 1) = 2 + # Chunk T according to chunk size, and Pad + # each chunk with M-1 zeros at the end + + # X1: 1 2 0 --conv-- Y: b a 0 + # X2: 3 4 0 --conv-- Y: b a 0 + # X3: 5 6 0 --conv-- Y: b a 0 + + # X1_conv_Y: [1b, 1a + 2b, 2a] + # X2_conv_Y: [3b, 3a + 4b, 4a] + # X3_conv_Y: [5b, 5a + 6b, 6a] + + # Step 2: Add the overlapping parts + # Slice the blocks based on `chunk_size`, so: + # [1b, 1a + 2b] + # [3b, 3a + 4b] + # [5b, 5a + 6b] + + # The last `M-1` element(s) of k-th block + # will be added to first `M-1` element(s) of (k+1)-th block. + # Final output: + # [ + # 1b + # 1a + 2b, + # 3b (+ 2a), + # 3a + 4b, + # 5b (+ 4a), + # 5a + 6b + # ] + # Now this is equivalent to X_conv_Y. -def _sliding_dot_product(Q, T, block_size): m = Q.shape[0] n = T.shape[0] - overlap = m - 1 - ret = _rfft_irfft_r2c2r_block(Q, T, block_size) - out = ret[:, :-overlap] - out[1:, :overlap] += ret[:-1, -overlap:] - out = np.reshape(out, (-1,)) + QT_conv_blocks = _pocketfft_oaconvolve_block(Q, T, conv_block_size) + out = QT_conv_blocks[:, :-overlap] + out[1:, :overlap] += QT_conv_blocks[:-1, -overlap:] + return np.reshape(out, (-1,)) + - return out[m - 1 : n] +def _sliding_dot_product(Q, T, conv_block_size): + return _pocketfft_oaconvolve(Q[::-1], T, conv_block_size)[len(Q) - 1 : len(T)] def setup(Q, T): return -def sliding_dot_product(Q, T, block_size=None): +def sliding_dot_product(Q, T, conv_block_size=None): m = Q.shape[0] n = T.shape[0] if m == n: return np.dot(Q, T) - block_size = _compute_block_size(m, n, block_size=block_size) - if block_size >= n: + conv_block_size = _compute_block_size(m, n, conv_block_size=conv_block_size) + if conv_block_size >= n: return pocketfft_r2c_c2r_sdp.sliding_dot_product(Q, T) else: - return _sliding_dot_product(Q, T, block_size) + return _sliding_dot_product(Q, T, conv_block_size) From 01a0e7a5236530a7bb7f99b447b9a647371e27ed Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 11 Jan 2026 14:52:42 -0500 Subject: [PATCH 10/17] minor change --- sdp/challenger_sdp.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index bf9522d..f278392 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -105,10 +105,7 @@ def _pocketfft_oaconvolve(Q, T, conv_block_size): # 5a + 6b # ] # Now this is equivalent to X_conv_Y. - - m = Q.shape[0] - n = T.shape[0] - overlap = m - 1 + overlap = len(Q) - 1 QT_conv_blocks = _pocketfft_oaconvolve_block(Q, T, conv_block_size) out = QT_conv_blocks[:, :-overlap] out[1:, :overlap] += QT_conv_blocks[:-1, -overlap:] From dddb708b77215600500a917c4cefaca1ff361cdc Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 11 Jan 2026 14:53:53 -0500 Subject: [PATCH 11/17] minor change to help with future refactoring --- sdp/pocketfft_r2c_c2r_sdp.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/sdp/pocketfft_r2c_c2r_sdp.py b/sdp/pocketfft_r2c_c2r_sdp.py index 1f14342..524da36 100644 --- a/sdp/pocketfft_r2c_c2r_sdp.py +++ b/sdp/pocketfft_r2c_c2r_sdp.py @@ -3,20 +3,24 @@ from scipy.fft._pocketfft.basic import r2c, c2r -def setup(Q, T): - return - - -def sliding_dot_product(Q, T): +def _pocketfft_convolve(Q, T): n = len(T) m = len(Q) next_fast_n = next_fast_len(n, real=True) tmp = np.empty((2, next_fast_n)) - tmp[0, :m] = Q[::-1] + tmp[0, :m] = Q tmp[0, m:] = 0.0 tmp[1, :n] = T tmp[1, n:] = 0.0 fft_2d = r2c(True, tmp, axis=-1) - return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] + return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n) + + +def setup(Q, T): + return + + +def sliding_dot_product(Q, T): + return _pocketfft_convolve(Q[::-1], T)[len(Q) - 1 : len(T)] From 41db845b0a153fde7254ab5dda6a5570375bdda2 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 11 Jan 2026 14:56:53 -0500 Subject: [PATCH 12/17] minor change --- sdp/challenger_sdp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index f278392..2b24110 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -105,8 +105,8 @@ def _pocketfft_oaconvolve(Q, T, conv_block_size): # 5a + 6b # ] # Now this is equivalent to X_conv_Y. - overlap = len(Q) - 1 QT_conv_blocks = _pocketfft_oaconvolve_block(Q, T, conv_block_size) + overlap = len(Q) - 1 out = QT_conv_blocks[:, :-overlap] out[1:, :overlap] += QT_conv_blocks[:-1, -overlap:] return np.reshape(out, (-1,)) From 99e450b8fd8ea4243258c9751d59356def0f71a4 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 11 Jan 2026 15:44:29 -0500 Subject: [PATCH 13/17] Added reference for finding optimal block size --- sdp/challenger_sdp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index 2b24110..a39b7b0 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -15,6 +15,8 @@ def _compute_block_size(m, n, conv_block_size=None): if m >= n / 2: conv_block_size = n # i.e. no blocking else: + # To minimize Eq. 3 in + # https://en.wikipedia.org/wiki/Overlap–add_method overlap = m - 1 opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real conv_block_size = next_fast_len(math.ceil(opt_size), real=True) From e8fa3315f8bc3cdf005dd9406d1fff272351d629 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 11 Jan 2026 16:53:28 -0500 Subject: [PATCH 14/17] fixed test --- test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index 7879e39..7fee8aa 100644 --- a/test.py +++ b/test.py @@ -217,9 +217,9 @@ def test_oaconvolve_sdp_blocksize(): T = np.random.rand(2**10) Q = np.random.rand(2**8) - block_size = 2**9 + conv_block_size = 2**9 - comp = sliding_dot_product(Q, T, block_size=block_size) + comp = sliding_dot_product(Q, T, conv_block_size=conv_block_size) ref = naive_sliding_dot_product(Q, T) np.testing.assert_allclose(comp, ref) From f45f541e8543e48fc51980a0efca55908ec38143 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 11 Jan 2026 21:41:29 -0500 Subject: [PATCH 15/17] revise comment --- sdp/challenger_sdp.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index a39b7b0..a49fd9e 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -54,19 +54,18 @@ def _pocketfft_oaconvolve_block(Q, T, conv_block_size): def _pocketfft_oaconvolve(Q, T, conv_block_size): - # Linear convolution between two 1D arrays X and Y + # Circular convolution between two 1D arrays X and Y # (of same length N) is an array with length N, # and its n-th element is defined as: - # out[n] = sum_{i=0}^{N-1} X[i] * Y[(n - i)] - # and, for any out-of-range index, the value is 0. + # out[n] = sum_{i=0}^{N-1} X[i] * Y[(n - i) mod N] - # To compute this linear convolution: + # To compute this circular convolution: # Approach I: Regular Method # X: 1 2 3 4 5 6 --conv-- Y: b a 0 0 0 0 # X_conv_Y: # [ - # 1b, + # 1b + 6a, # 1a + 2b, # 2a + 3b, # 3a + 4b, @@ -99,14 +98,14 @@ def _pocketfft_oaconvolve(Q, T, conv_block_size): # will be added to first `M-1` element(s) of (k+1)-th block. # Final output: # [ - # 1b + # 1b # ?? # 1a + 2b, # 3b (+ 2a), # 3a + 4b, # 5b (+ 4a), # 5a + 6b # ] - # Now this is equivalent to X_conv_Y. + # Now this is equivalent to X_conv_Y, for [M-1: N] QT_conv_blocks = _pocketfft_oaconvolve_block(Q, T, conv_block_size) overlap = len(Q) - 1 out = QT_conv_blocks[:, :-overlap] From 39e936ced715438401772af99d2eecacfac138b3 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 13 Jan 2026 13:33:08 -0500 Subject: [PATCH 16/17] removed overlap-add explanation. Created PR#36 instead --- sdp/challenger_sdp.py | 52 ------------------------------------------- 1 file changed, 52 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index a49fd9e..15ccfc0 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -54,58 +54,6 @@ def _pocketfft_oaconvolve_block(Q, T, conv_block_size): def _pocketfft_oaconvolve(Q, T, conv_block_size): - # Circular convolution between two 1D arrays X and Y - # (of same length N) is an array with length N, - # and its n-th element is defined as: - # out[n] = sum_{i=0}^{N-1} X[i] * Y[(n - i) mod N] - - # To compute this circular convolution: - # Approach I: Regular Method - # X: 1 2 3 4 5 6 --conv-- Y: b a 0 0 0 0 - - # X_conv_Y: - # [ - # 1b + 6a, - # 1a + 2b, - # 2a + 3b, - # 3a + 4b, - # 4a + 5b, - # 5a + 6b - # ] - - # Approach II: Overlap-Add Method - # Step1: Chunk T and compute convolution block-wise - # Choose a block size --> 3 - # Find chunk size --> block_size - (M - 1) = 2 - # Chunk T according to chunk size, and Pad - # each chunk with M-1 zeros at the end - - # X1: 1 2 0 --conv-- Y: b a 0 - # X2: 3 4 0 --conv-- Y: b a 0 - # X3: 5 6 0 --conv-- Y: b a 0 - - # X1_conv_Y: [1b, 1a + 2b, 2a] - # X2_conv_Y: [3b, 3a + 4b, 4a] - # X3_conv_Y: [5b, 5a + 6b, 6a] - - # Step 2: Add the overlapping parts - # Slice the blocks based on `chunk_size`, so: - # [1b, 1a + 2b] - # [3b, 3a + 4b] - # [5b, 5a + 6b] - - # The last `M-1` element(s) of k-th block - # will be added to first `M-1` element(s) of (k+1)-th block. - # Final output: - # [ - # 1b # ?? - # 1a + 2b, - # 3b (+ 2a), - # 3a + 4b, - # 5b (+ 4a), - # 5a + 6b - # ] - # Now this is equivalent to X_conv_Y, for [M-1: N] QT_conv_blocks = _pocketfft_oaconvolve_block(Q, T, conv_block_size) overlap = len(Q) - 1 out = QT_conv_blocks[:, :-overlap] From f6fed15598304507a013a4acb2c783d4ed2138b4 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 13 Jan 2026 20:10:12 -0500 Subject: [PATCH 17/17] renaming private functions to reflect valid convolution --- sdp/challenger_sdp.py | 27 +++++++++++++++------------ sdp/pocketfft_r2c_c2r_sdp.py | 8 +++++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index 15ccfc0..af3c890 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -53,16 +53,25 @@ def _pocketfft_oaconvolve_block(Q, T, conv_block_size): return c2r(False, np.multiply(fft_2d[:-1], fft_2d[[-1]]), n=conv_block_size) -def _pocketfft_oaconvolve(Q, T, conv_block_size): +def _pocketfft_valid_oaconvolve(Q, T, conv_block_size): QT_conv_blocks = _pocketfft_oaconvolve_block(Q, T, conv_block_size) overlap = len(Q) - 1 out = QT_conv_blocks[:, :-overlap] out[1:, :overlap] += QT_conv_blocks[:-1, -overlap:] - return np.reshape(out, (-1,)) + return np.reshape(out, (-1,))[len(Q) - 1 : len(T)] -def _sliding_dot_product(Q, T, conv_block_size): - return _pocketfft_oaconvolve(Q[::-1], T, conv_block_size)[len(Q) - 1 : len(T)] + +def _valid_convolve(Q, T, conv_block_size=None): + m = len(Q) + n = len(T) + conv_block_size = _compute_block_size(m, n, conv_block_size=conv_block_size) + if conv_block_size >= n: + out = pocketfft_r2c_c2r_sdp._pocketfft_valid_convolve(Q, T) + else: + out = _pocketfft_valid_oaconvolve(Q, T, conv_block_size) + + return out def setup(Q, T): @@ -70,13 +79,7 @@ def setup(Q, T): def sliding_dot_product(Q, T, conv_block_size=None): - m = Q.shape[0] - n = T.shape[0] - if m == n: + if len(Q) == len(T): return np.dot(Q, T) - - conv_block_size = _compute_block_size(m, n, conv_block_size=conv_block_size) - if conv_block_size >= n: - return pocketfft_r2c_c2r_sdp.sliding_dot_product(Q, T) else: - return _sliding_dot_product(Q, T, conv_block_size) + return _valid_convolve(Q[::-1], T, conv_block_size=conv_block_size) diff --git a/sdp/pocketfft_r2c_c2r_sdp.py b/sdp/pocketfft_r2c_c2r_sdp.py index 524da36..f663d32 100644 --- a/sdp/pocketfft_r2c_c2r_sdp.py +++ b/sdp/pocketfft_r2c_c2r_sdp.py @@ -3,7 +3,7 @@ from scipy.fft._pocketfft.basic import r2c, c2r -def _pocketfft_convolve(Q, T): +def _pocketfft_valid_convolve(Q, T): n = len(T) m = len(Q) next_fast_n = next_fast_len(n, real=True) @@ -15,7 +15,9 @@ def _pocketfft_convolve(Q, T): tmp[1, n:] = 0.0 fft_2d = r2c(True, tmp, axis=-1) - return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n) + return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[ + len(Q) - 1 : len(T) + ] def setup(Q, T): @@ -23,4 +25,4 @@ def setup(Q, T): def sliding_dot_product(Q, T): - return _pocketfft_convolve(Q[::-1], T)[len(Q) - 1 : len(T)] + return _pocketfft_valid_convolve(Q[::-1], T)