diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index ce60e8d..af3c890 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -1,14 +1,85 @@ +import math import numpy as np +from scipy.special import lambertw +from scipy.fft import next_fast_len +from scipy.fft._pocketfft.basic import r2c, c2r +from . import pocketfft_r2c_c2r_sdp -def setup(Q, T): - return +def _compute_block_size(m, n, conv_block_size=None): + """ + Return a block size for the overlap-add method. + """ + if conv_block_size is None: + # Find optimal block_size based on m and n + if m >= n / 2: + conv_block_size = n # i.e. no blocking + else: + # To minimize Eq. 3 in + # https://en.wikipedia.org/wiki/Overlap–add_method + overlap = m - 1 + opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real + conv_block_size = next_fast_len(math.ceil(opt_size), real=True) + + conv_block_size = max(conv_block_size, m) + + return min(conv_block_size, n) + + +def _pocketfft_oaconvolve_block(Q, T, conv_block_size): + m = Q.shape[0] + n = T.shape[0] + + T_chunk_size = conv_block_size - (m - 1) + n_chunks = math.ceil(n / T_chunk_size) + last_chunk_start = (n_chunks - 1) * T_chunk_size + + tmp = np.empty((n_chunks + 1, conv_block_size), dtype=np.float64) + + # fill with T, block-wise + tmp[: n_chunks - 1, :T_chunk_size] = T[:last_chunk_start].reshape( + n_chunks - 1, T_chunk_size + ) + tmp[: n_chunks - 1, T_chunk_size:] = 0.0 + tmp[n_chunks - 1, : n - last_chunk_start] = T[last_chunk_start:] + tmp[n_chunks - 1, n - last_chunk_start :] = 0.0 + + # fill with Q + tmp[n_chunks, :m] = Q + tmp[n_chunks, m:] = 0.0 + + fft_2d = r2c(True, tmp, axis=-1) + return c2r(False, np.multiply(fft_2d[:-1], fft_2d[[-1]]), n=conv_block_size) -def sliding_dot_product(Q, T): + +def _pocketfft_valid_oaconvolve(Q, T, conv_block_size): + QT_conv_blocks = _pocketfft_oaconvolve_block(Q, T, conv_block_size) + overlap = len(Q) - 1 + out = QT_conv_blocks[:, :-overlap] + out[1:, :overlap] += QT_conv_blocks[:-1, -overlap:] + + return np.reshape(out, (-1,))[len(Q) - 1 : len(T)] + + +def _valid_convolve(Q, T, conv_block_size=None): m = len(Q) - l = T.shape[0] - m + 1 - out = np.empty(l) - for i in range(l): - out[i] = np.dot(Q, T[i : i + m]) + n = len(T) + conv_block_size = _compute_block_size(m, n, conv_block_size=conv_block_size) + if conv_block_size >= n: + out = pocketfft_r2c_c2r_sdp._pocketfft_valid_convolve(Q, T) + else: + out = _pocketfft_valid_oaconvolve(Q, T, conv_block_size) + return out + + +def setup(Q, T): + return + + +def sliding_dot_product(Q, T, conv_block_size=None): + if len(Q) == len(T): + return np.dot(Q, T) + else: + return _valid_convolve(Q[::-1], T, conv_block_size=conv_block_size) diff --git a/sdp/pocketfft_r2c_c2r_sdp.py b/sdp/pocketfft_r2c_c2r_sdp.py index 1f14342..f663d32 100644 --- a/sdp/pocketfft_r2c_c2r_sdp.py +++ b/sdp/pocketfft_r2c_c2r_sdp.py @@ -3,20 +3,26 @@ from scipy.fft._pocketfft.basic import r2c, c2r -def setup(Q, T): - return - - -def sliding_dot_product(Q, T): +def _pocketfft_valid_convolve(Q, T): n = len(T) m = len(Q) next_fast_n = next_fast_len(n, real=True) tmp = np.empty((2, next_fast_n)) - tmp[0, :m] = Q[::-1] + tmp[0, :m] = Q tmp[0, m:] = 0.0 tmp[1, :n] = T tmp[1, n:] = 0.0 fft_2d = r2c(True, tmp, axis=-1) - return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] + return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[ + len(Q) - 1 : len(T) + ] + + +def setup(Q, T): + return + + +def sliding_dot_product(Q, T): + return _pocketfft_valid_convolve(Q[::-1], T) diff --git a/test.py b/test.py index d1b80e8..7fee8aa 100644 --- a/test.py +++ b/test.py @@ -210,3 +210,18 @@ def test_pyfftw_sdp_max_n(): np.testing.assert_allclose(comp, ref) return + + +def test_oaconvolve_sdp_blocksize(): + from sdp.challenger_sdp import sliding_dot_product + + T = np.random.rand(2**10) + Q = np.random.rand(2**8) + conv_block_size = 2**9 + + comp = sliding_dot_product(Q, T, conv_block_size=conv_block_size) + ref = naive_sliding_dot_product(Q, T) + + np.testing.assert_allclose(comp, ref) + + return