diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..4bdbc596 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,3 @@ +# format files with black +afe57d2e21ad53c69d67d61a30341bb5a1dc735d + diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml new file mode 100644 index 00000000..81e6a948 --- /dev/null +++ b/.github/workflows/black.yaml @@ -0,0 +1,10 @@ +name: Lint + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: psf/black@stable diff --git a/pyproject.toml b/pyproject.toml index 7cdf7953..124fe415 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "SMSTools" authors = [ { name="Music Technology Group, Universitat Pompeu Fabra", email="mtg-info@upf.edu" }, ] -version = "0.99.1" +version = "0.90" description = "Sound analysis/synthesis tools for music applications" readme = "README.md" requires-python = ">=3.9" diff --git a/smstools/models/dftModel.py b/smstools/models/dftModel.py index 138934f4..5b9cc6d5 100644 --- a/smstools/models/dftModel.py +++ b/smstools/models/dftModel.py @@ -11,15 +11,15 @@ def dftModel(x, w, N): """ - Analysis/synthesis of a signal using the discrete Fourier transform - x: input signal, w: analysis window, N: FFT size - returns y: output signal - """ + Analysis/synthesis of a signal using the discrete Fourier transform + x: input signal, w: analysis window, N: FFT size + returns y: output signal + """ if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") - if (w.size > N): # raise error if window size bigger than fft size + if w.size > N: # raise error if window size bigger than fft size raise ValueError("Window size (M) is bigger than FFT size") if all(x == 0): # if input array is zeros return empty output @@ -35,13 +35,17 @@ def dftModel(x, w, N): fftbuffer[-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT absX = abs(X[:hN]) # compute ansolute value of positive side - absX[absX < np.finfo(float).eps] = np.finfo(float).eps # if zeros add epsilon to handle log + absX[absX < np.finfo(float).eps] = np.finfo( + float + ).eps # if zeros add epsilon to handle log mX = 20 * np.log10(absX) # magnitude spectrum of positive frequencies in dB pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spectrum of positive frequencies # -----synthesis----- Y = np.zeros(N, dtype=complex) # clean output spectrum Y[:hN] = 10 ** (mX / 20) * np.exp(1j * pX) # generate positive frequencies - Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp(-1j * pX[-2:0:-1]) # generate negative frequencies + Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp( + -1j * pX[-2:0:-1] + ) # generate negative frequencies fftbuffer = np.real(ifft(Y)) # compute inverse FFT y[:hM2] = fftbuffer[-hM2:] # undo zero-phase window y[hM2:] = fftbuffer[:hM1] @@ -50,10 +54,10 @@ def dftModel(x, w, N): def dftAnal(x, w, N): """ - Analysis of a signal using the discrete Fourier transform - x: input signal, w: analysis window, N: FFT size - returns mX, pX: magnitude and phase spectrum - """ + Analysis of a signal using the discrete Fourier transform + x: input signal, w: analysis window, N: FFT size + returns mX, pX: magnitude and phase spectrum + """ if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") @@ -71,20 +75,26 @@ def dftAnal(x, w, N): fftbuffer[-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT absX = abs(X[:hN]) # compute ansolute value of positive side - absX[absX < np.finfo(float).eps] = np.finfo(float).eps # if zeros add epsilon to handle log + absX[absX < np.finfo(float).eps] = np.finfo( + float + ).eps # if zeros add epsilon to handle log mX = 20 * np.log10(absX) # magnitude spectrum of positive frequencies in dB - X[:hN].real[np.abs(X[:hN].real) < tol] = 0.0 # for phase calculation set to 0 the small values - X[:hN].imag[np.abs(X[:hN].imag) < tol] = 0.0 # for phase calculation set to 0 the small values + X[:hN].real[ + np.abs(X[:hN].real) < tol + ] = 0.0 # for phase calculation set to 0 the small values + X[:hN].imag[ + np.abs(X[:hN].imag) < tol + ] = 0.0 # for phase calculation set to 0 the small values pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spectrum of positive frequencies return mX, pX def dftSynth(mX, pX, M): """ - Synthesis of a signal using the discrete Fourier transform - mX: magnitude spectrum, pX: phase spectrum, M: window size - returns y: output signal - """ + Synthesis of a signal using the discrete Fourier transform + mX: magnitude spectrum, pX: phase spectrum, M: window size + returns y: output signal + """ hN = mX.size # size of positive spectrum, it includes sample 0 N = (hN - 1) * 2 # FFT size @@ -96,7 +106,9 @@ def dftSynth(mX, pX, M): y = np.zeros(M) # initialize output array Y = np.zeros(N, dtype=complex) # clean output spectrum Y[:hN] = 10 ** (mX / 20) * np.exp(1j * pX) # generate positive frequencies - Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp(-1j * pX[-2:0:-1]) # generate negative frequencies + Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp( + -1j * pX[-2:0:-1] + ) # generate negative frequencies fftbuffer = np.real(ifft(Y)) # compute inverse FFT y[:hM2] = fftbuffer[-hM2:] # undo zero-phase window y[hM2:] = fftbuffer[:hM1] diff --git a/smstools/models/harmonicModel.py b/smstools/models/harmonicModel.py index 52bc06dd..5c9116d8 100644 --- a/smstools/models/harmonicModel.py +++ b/smstools/models/harmonicModel.py @@ -12,26 +12,28 @@ def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): """ - Fundamental frequency detection of a sound using twm algorithm - x: input sound; fs: sampling rate; w: analysis window; - N: FFT size; t: threshold in negative dB, - minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, - f0et: error threshold in the f0 detection (ex: 5), - returns f0: fundamental frequency - """ - if (minf0 < 0): # raise exception if minf0 is smaller than 0 + Fundamental frequency detection of a sound using twm algorithm + x: input sound; fs: sampling rate; w: analysis window; + N: FFT size; t: threshold in negative dB, + minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, + f0et: error threshold in the f0 detection (ex: 5), + returns f0: fundamental frequency + """ + if minf0 < 0: # raise exception if minf0 is smaller than 0 raise ValueError("Minumum fundamental frequency (minf0) smaller than 0") - if (maxf0 >= 10000): # raise exception if maxf0 is bigger than fs/2 + if maxf0 >= 10000: # raise exception if maxf0 is bigger than fs/2 raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz") - if (H <= 0): # raise error if hop size 0 or negative + if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") hN = N // 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor - x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame @@ -41,14 +43,15 @@ def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin < pend: - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hez f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 - if ((f0stable == 0) & (f0t > 0)) \ - or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): + if ((f0stable == 0) & (f0t > 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 @@ -59,30 +62,36 @@ def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): def harmonicDetection(pfreq, pmag, pphase, f0, nH, hfreqp, fs, harmDevSlope=0.01): """ - Detection of the harmonics of a frame from a set of spectral peaks using f0 - to the ideal harmonic series built on top of a fundamental frequency - pfreq, pmag, pphase: peak frequencies, magnitudes and phases - f0: fundamental frequency, nH: number of harmonics, - hfreqp: harmonic frequencies of previous frame, - fs: sampling rate; harmDevSlope: slope of change of the deviation allowed to perfect harmonic - returns hfreq, hmag, hphase: harmonic frequencies, magnitudes, phases - """ - - if (f0 <= 0): # if no f0 return no harmonics + Detection of the harmonics of a frame from a set of spectral peaks using f0 + to the ideal harmonic series built on top of a fundamental frequency + pfreq, pmag, pphase: peak frequencies, magnitudes and phases + f0: fundamental frequency, nH: number of harmonics, + hfreqp: harmonic frequencies of previous frame, + fs: sampling rate; harmDevSlope: slope of change of the deviation allowed to perfect harmonic + returns hfreq, hmag, hphase: harmonic frequencies, magnitudes, phases + """ + + if f0 <= 0: # if no f0 return no harmonics return np.zeros(nH), np.zeros(nH), np.zeros(nH) hfreq = np.zeros(nH) # initialize harmonic frequencies hmag = np.zeros(nH) - 100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = f0 * np.arange(1, nH + 1) # initialize harmonic frequencies hi = 0 # initialize harmonic index - if len(hfreqp) == 0: # if no incomming harmonic tracks initialize to harmonic series + if ( + len(hfreqp) == 0 + ): # if no incomming harmonic tracks initialize to harmonic series hfreqp = hf while (f0 > 0) and (hi < nH) and (hf[hi] < fs / 2): # find harmonic peaks pei = np.argmin(abs(pfreq - hf[hi])) # closest peak dev1 = abs(pfreq[pei] - hf[hi]) # deviation from perfect harmonic - dev2 = (abs(pfreq[pei] - hfreqp[hi]) if hfreqp[hi] > 0 else fs) # deviation from previous frame + dev2 = ( + abs(pfreq[pei] - hfreqp[hi]) if hfreqp[hi] > 0 else fs + ) # deviation from previous frame threshold = f0 / 3 + harmDevSlope * pfreq[pei] - if ((dev1 < threshold) or (dev2 < threshold)): # accept peak if deviation is small + if (dev1 < threshold) or ( + dev2 < threshold + ): # accept peak if deviation is small hfreq[hi] = pfreq[pei] # harmonic frequencies hmag[hi] = pmag[pei] # harmonic magnitudes hphase[hi] = pphase[pei] # harmonic phases @@ -92,19 +101,21 @@ def harmonicDetection(pfreq, pmag, pphase, f0, nH, hfreqp, fs, harmDevSlope=0.01 def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ - Analysis/synthesis of a sound using the sinusoidal harmonic model - x: input sound, fs: sampling rate, w: analysis window, - N: FFT size (minimum 512), t: threshold in negative dB, - nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, - maxf0: maximim f0 frequency in Hz, - f0et: error threshold in the f0 detection (ex: 5), - returns y: output array sound - """ + Analysis/synthesis of a sound using the sinusoidal harmonic model + x: input sound, fs: sampling rate, w: analysis window, + N: FFT size (minimum 512), t: threshold in negative dB, + nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, + maxf0: maximim f0 frequency in Hz, + f0et: error threshold in the f0 detection (ex: 5), + returns y: output array sound + """ hN = N // 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor - x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis @@ -117,57 +128,70 @@ def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # overlapping window - sw[hNs - H:hNs + H] = ow + sw[hNs - H : hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window - sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # window for overlap-add + sw[hNs - H : hNs + H] = ( + sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] + ) # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: # -----analysis----- - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 - if ((f0stable == 0) & (f0t > 0)) \ - or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): + if ((f0stable == 0) & (f0t > 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 - hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics + hfreq, hmag, hphase = harmonicDetection( + ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs + ) # find harmonics hfreqp = hfreq # -----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT - yh[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window - yh[hNs - 1:] = fftbuffer[:hNs + 1] - y[pin - hNs:pin + hNs] += sw * yh # overlap-add + yh[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + yh[hNs - 1 :] = fftbuffer[: hNs + 1] + y[pin - hNs : pin + hNs] += sw * yh # overlap-add pin += H # advance sound pointer - y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal - y = np.delete(y, range(y.size - hM1, y.size)) # add zeros at the end to analyze last sample + y = np.delete( + y, range(hM2) + ) # delete half of first window which was added in stftAnal + y = np.delete( + y, range(y.size - hM1, y.size) + ) # add zeros at the end to analyze last sample return y -def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=.02): +def harmonicModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02 +): + """ + Analysis of a sound using the sinusoidal harmonic model + x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, + nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, + maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), + harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics + returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ - Analysis of a sound using the sinusoidal harmonic model - x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, - nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, - maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), - harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics - returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases - """ - - if (minSineDur < 0): # raise exception if minSineDur is smaller than 0 + + if minSineDur < 0: # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hN = N // 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor - x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame @@ -177,19 +201,21 @@ def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0. f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin <= pend: - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 - if ((f0stable == 0) & (f0t > 0)) \ - or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): + if ((f0stable == 0) & (f0t > 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 - hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, - harmDevSlope) # find harmonics + hfreq, hmag, hphase = harmonicDetection( + ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope + ) # find harmonics hfreqp = hfreq if pin == hM1: # first frame xhfreq = np.array([hfreq]) @@ -200,5 +226,7 @@ def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0. xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer - xhfreq = SM.cleaningSineTracks(xhfreq, round(fs * minSineDur / H)) # delete tracks shorter than minSineDur + xhfreq = SM.cleaningSineTracks( + xhfreq, round(fs * minSineDur / H) + ) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase diff --git a/smstools/models/hprModel.py b/smstools/models/hprModel.py index 2c8e2920..f18dc19b 100644 --- a/smstools/models/hprModel.py +++ b/smstools/models/hprModel.py @@ -10,102 +10,113 @@ from smstools.models import utilFunctions as UF from smstools.models import sineModel as SM + def hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope): - """Analysis of a sound using the harmonic plus residual model - x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, - minSineDur: minimum duration of sinusoidal tracks - nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound - maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm - harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation - returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal - """ + """Analysis of a sound using the harmonic plus residual model + x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, + minSineDur: minimum duration of sinusoidal tracks + nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound + maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm + harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation + returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal + """ + + # perform harmonic analysis + hfreq, hmag, hphase = HM.harmonicModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur + ) + Ns = 512 + xr = UF.sineSubtraction( + x, Ns, H, hfreq, hmag, hphase, fs + ) # subtract sinusoids from original sound + return hfreq, hmag, hphase, xr - # perform harmonic analysis - hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) - Ns = 512 - xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # subtract sinusoids from original sound - return hfreq, hmag, hphase, xr def hprModelSynth(hfreq, hmag, hphase, xr, N, H, fs): - """ - Synthesis of a sound using the sinusoidal plus residual model - tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope - N: synthesis FFT size; H: hop size, fs: sampling rate - returns y: output sound, yh: harmonic component - """ + """ + Synthesis of a sound using the sinusoidal plus residual model + tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope + N: synthesis FFT size; H: hop size, fs: sampling rate + returns y: output sound, yh: harmonic component + """ - yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize sinusoids - y = yh[:min(yh.size, xr.size)]+xr[:min(yh.size, xr.size)] # sum sinusoids and residual components - return y, yh + yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize sinusoids + y = ( + yh[: min(yh.size, xr.size)] + xr[: min(yh.size, xr.size)] + ) # sum sinusoids and residual components + return y, yh -def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): - """ - Analysis/synthesis of a sound using the harmonic plus residual model - x: input sound, fs: sampling rate, w: analysis window, - N: FFT size (minimum 512), t: threshold in negative dB, - nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, - maxf0: maximim f0 frequency in Hz, - f0et: error threshold in the f0 detection (ex: 5), - maxhd: max. relative deviation in harmonic detection (ex: .2) - returns y: output sound, yh: harmonic component, xr: residual component - """ - hN = N//2 # size of positive spectrum - hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding - hM2 = int(math.floor(w.size/2)) # half analysis window size by floor - Ns = 512 # FFT size for synthesis (even) - H = Ns//4 # Hop size used for analysis and synthesis - hNs = Ns//2 - pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window - pend = x.size - max(hNs, hM1) # last sample to start a frame - yhw = np.zeros(Ns) # initialize output sound frame - xrw = np.zeros(Ns) # initialize output sound frame - yh = np.zeros(x.size) # initialize output array - xr = np.zeros(x.size) # initialize output array - w = w / sum(w) # normalize analysis window - sw = np.zeros(Ns) - ow = triang(2*H) # overlapping window - sw[hNs-H:hNs+H] = ow - bh = blackmanharris(Ns) # synthesis window - bh = bh / sum(bh) # normalize synthesis window - wr = bh # window for residual - sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] - hfreqp = [] - f0t = 0 - f0stable = 0 - while pin0)) \ - or ((f0stable>0)&(np.abs(f0stable-f0t) 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): + f0stable = f0t # consider a stable f0 if it is close to the previous one + else: + f0stable = 0 + hfreq, hmag, hphase = HM.harmonicDetection( + ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs + ) # find harmonics + hfreqp = hfreq + ri = pin - hNs - 1 # input sound pointer for residual analysis + xw2 = x[ri : ri + Ns] * wr # window the input sound + fftbuffer = np.zeros(Ns) # reset buffer + fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer + fftbuffer[hNs:] = xw2[:hNs] + X2 = fft(fftbuffer) # compute FFT of input signal for residual analysis + # -----synthesis----- + Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines + Xr = X2 - Yh # get the residual complex spectrum + fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum + yhw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + yhw[hNs - 1 :] = fftbuffer[: hNs + 1] + fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum + xrw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + xrw[hNs - 1 :] = fftbuffer[: hNs + 1] + yh[ri : ri + Ns] += sw * yhw # overlap-add for sines + xr[ri : ri + Ns] += sw * xrw # overlap-add for residual + pin += H # advance sound pointer + y = yh + xr # sum of harmonic and residual components + return y, yh, xr diff --git a/smstools/models/hpsModel.py b/smstools/models/hpsModel.py index 5ae5e6c9..c3ef7934 100644 --- a/smstools/models/hpsModel.py +++ b/smstools/models/hpsModel.py @@ -13,18 +13,22 @@ from smstools.models import utilFunctions as UF -def hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf): +def hpsModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf +): + """ + Analysis of a sound using the harmonic plus stochastic model + x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, + nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, + maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), + harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics + returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual """ - Analysis of a sound using the harmonic plus stochastic model - x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, - nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, - maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), - harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics - returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual - """ # perform harmonic analysis - hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) + hfreq, hmag, hphase = HM.harmonicModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur + ) # subtract sinusoids from original sound xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # perform stochastic analysis of residual @@ -34,26 +38,28 @@ def hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSin def hpsModelSynth(hfreq, hmag, hphase, stocEnv, N, H, fs): """ - Synthesis of a sound using the harmonic plus stochastic model - hfreq, hmag: harmonic frequencies and amplitudes; stocEnv: stochastic envelope - Ns: synthesis FFT size; H: hop size, fs: sampling rate - returns y: output sound, yh: harmonic component, yst: stochastic component - """ + Synthesis of a sound using the harmonic plus stochastic model + hfreq, hmag: harmonic frequencies and amplitudes; stocEnv: stochastic envelope + Ns: synthesis FFT size; H: hop size, fs: sampling rate + returns y: output sound, yh: harmonic component, yst: stochastic component + """ yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize harmonics yst = STM.stochasticModelSynth(stocEnv, H, H * 2) # synthesize stochastic residual - y = yh[:min(yh.size, yst.size)] + yst[:min(yh.size, yst.size)] # sum harmonic and stochastic components + y = ( + yh[: min(yh.size, yst.size)] + yst[: min(yh.size, yst.size)] + ) # sum harmonic and stochastic components return y, yh, yst def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ - Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking - x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, - nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, - f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis - returns y: output sound, yh: harmonic component, yst: stochastic component - """ + Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking + x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, + nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, + f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis + returns y: output sound, yh: harmonic component, yst: stochastic component + """ hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor @@ -69,58 +75,68 @@ def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window - sw[hNs - H:hNs + H] = ow + sw[hNs - H : hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual - sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # synthesis window for harmonic component + sw[hNs - H : hNs + H] = ( + sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] + ) # synthesis window for harmonic component sws = H * hann(Ns) / 2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: # -----analysis----- - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 - if ((f0stable == 0) & (f0t > 0)) \ - or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): + if ((f0stable == 0) & (f0t > 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 - hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics + hfreq, hmag, hphase = HM.harmonicDetection( + ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs + ) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis - xw2 = x[ri:ri + Ns] * wr # window the input sound + xw2 = x[ri : ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis # -----synthesis----- - Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component + Yh = UF.genSpecSines( + hfreq, hmag, hphase, Ns, fs + ) # generate spec sines of harmonic component Xr = X2 - Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual - mXrenv = resample(np.maximum(-200, mXr), - mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf + mXrenv = resample( + np.maximum(-200, mXr), mXr.size * stocf + ) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10 ** (stocEnv / 20) * np.exp(1j * pYst) # generate positive freq. - Yst[hNs + 1:] = 10 ** (stocEnv[:0:-1] / 20) * np.exp(-1j * pYst[:0:-1]) # generate negative freq. + Yst[hNs + 1 :] = 10 ** (stocEnv[:0:-1] / 20) * np.exp( + -1j * pYst[:0:-1] + ) # generate negative freq. fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum - yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window - yhw[hNs - 1:] = fftbuffer[:hNs + 1] + yhw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + yhw[hNs - 1 :] = fftbuffer[: hNs + 1] fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum - ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window - ystw[hNs - 1:] = fftbuffer[:hNs + 1] + ystw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + ystw[hNs - 1 :] = fftbuffer[: hNs + 1] - yh[ri:ri + Ns] += sw * yhw # overlap-add for sines - yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic + yh[ri : ri + Ns] += sw * yhw # overlap-add for sines + yst[ri : ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh + yst # sum of harmonic and stochastic components diff --git a/smstools/models/interface/dftModel_GUI_frame.py b/smstools/models/interface/dftModel_GUI_frame.py index 1079d86a..838cb8f2 100644 --- a/smstools/models/interface/dftModel_GUI_frame.py +++ b/smstools/models/interface/dftModel_GUI_frame.py @@ -5,6 +5,7 @@ from smstools.models import utilFunctions as UF from tkinter import * + class DftModel_frame: def __init__(self, parent): @@ -15,8 +16,10 @@ def __init__(self, parent): def initUI(self): - choose_label = 'Input file (.wav, mono and 44100 sampling rate):' - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) + choose_label = "Input file (.wav, mono and 44100 sampling rate):" + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) @@ -24,31 +27,48 @@ def initUI(self): self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/piano.wav') + self.filelocation.insert(0, "../../sounds/piano.wav") # BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text='Browse...', command=self.browse_file) # see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE - # self.preview = Button(self.parent, text='>', command=lambda: UF.wavplay(self.filelocation.get()), bg="gray30", fg="white") - self.preview = Button(self.parent, text='>', command=lambda: UF.wavplay(self.filelocation.get())) + # self.preview = Button(self.parent, text='>', command=lambda: UF.wavplay(self.filelocation.get()), bg="gray30", fg="white") + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## DFT MODEL # ANALYSIS WINDOW TYPE wtype_label = "Window type:" - Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=wtype_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.w_type = StringVar() self.w_type.set("blackman") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hann", "hamming", "blackman", - "blackmanharris") + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hann", + "hamming", + "blackman", + "blackmanharris", + ) window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) # WINDOW SIZE M_label = "Window size (M):" - Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=M_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) @@ -57,7 +77,9 @@ def initUI(self): # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" - Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=N_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) @@ -66,7 +88,9 @@ def initUI(self): # TIME TO START ANALYSIS time_label = "Time in sound (in seconds):" - Label(self.parent, text=time_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=time_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.time = Entry(self.parent, justify=CENTER) self.time["width"] = 5 self.time.grid(row=5, column=0, sticky=W, padx=(180, 5), pady=(10, 2)) @@ -79,10 +103,10 @@ def initUI(self): # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): diff --git a/smstools/models/interface/dftModel_function.py b/smstools/models/interface/dftModel_function.py index 3894045f..0ae748d9 100644 --- a/smstools/models/interface/dftModel_function.py +++ b/smstools/models/interface/dftModel_function.py @@ -6,7 +6,10 @@ from smstools.models import utilFunctions as UF from smstools.models import dftModel as DFT -def main(inputFile = '../../sounds/piano.wav', window = 'blackman', M = 511, N = 1024, time = .2): + +def main( + inputFile="../../sounds/piano.wav", window="blackman", M=511, N=1024, time=0.2 +): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (choice of rectangular, hann, hamming, blackman, blackmanharris) @@ -22,54 +25,55 @@ def main(inputFile = '../../sounds/piano.wav', window = 'blackman', M = 511, N = w = get_window(window, M) # get a fragment of the input sound of size M - sample = int(time*fs) - if (sample+M >= x.size or sample < 0): # raise error if time outside of sound + sample = int(time * fs) + if sample + M >= x.size or sample < 0: # raise error if time outside of sound raise ValueError("Time outside sound boundaries") - x1 = x[sample:sample+M] + x1 = x[sample : sample + M] # compute the dft of the sound fragment mX, pX = DFT.dftAnal(x1, w, N) # compute the inverse dft of the spectrum - y = DFT.dftSynth(mX, pX, w.size)*sum(w) + y = DFT.dftSynth(mX, pX, w.size) * sum(w) # create figure plt.figure(figsize=(9, 6)) # plot the sound fragment - plt.subplot(4,1,1) - plt.plot(time + np.arange(M)/float(fs), x1) - plt.axis([time, time + M/float(fs), min(x1), max(x1)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') + plt.subplot(4, 1, 1) + plt.plot(time + np.arange(M) / float(fs), x1) + plt.axis([time, time + M / float(fs), min(x1), max(x1)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") # plot the magnitude spectrum - plt.subplot(4,1,2) - plt.plot(float(fs)*np.arange(mX.size)/float(N), mX, 'r') - plt.axis([0, fs/2.0, min(mX), max(mX)]) - plt.title ('magnitude spectrum: mX') - plt.ylabel('amplitude (dB)') - plt.xlabel('frequency (Hz)') + plt.subplot(4, 1, 2) + plt.plot(float(fs) * np.arange(mX.size) / float(N), mX, "r") + plt.axis([0, fs / 2.0, min(mX), max(mX)]) + plt.title("magnitude spectrum: mX") + plt.ylabel("amplitude (dB)") + plt.xlabel("frequency (Hz)") # plot the phase spectrum - plt.subplot(4,1,3) - plt.plot(float(fs)*np.arange(pX.size)/float(N), pX, 'c') - plt.axis([0, fs/2.0, min(pX), max(pX)]) - plt.title ('phase spectrum: pX') - plt.ylabel('phase (radians)') - plt.xlabel('frequency (Hz)') + plt.subplot(4, 1, 3) + plt.plot(float(fs) * np.arange(pX.size) / float(N), pX, "c") + plt.axis([0, fs / 2.0, min(pX), max(pX)]) + plt.title("phase spectrum: pX") + plt.ylabel("phase (radians)") + plt.xlabel("frequency (Hz)") # plot the sound resulting from the inverse dft - plt.subplot(4,1,4) - plt.plot(time + np.arange(M)/float(fs), y) - plt.axis([time, time + M/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') + plt.subplot(4, 1, 4) + plt.plot(time + np.arange(M) / float(fs), y) + plt.axis([time, time + M / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") plt.tight_layout() plt.ion() plt.show() + if __name__ == "__main__": main() diff --git a/smstools/models/interface/harmonicModel_GUI_frame.py b/smstools/models/interface/harmonicModel_GUI_frame.py index 096e814f..5e1cf7a4 100644 --- a/smstools/models/interface/harmonicModel_GUI_frame.py +++ b/smstools/models/interface/harmonicModel_GUI_frame.py @@ -19,7 +19,9 @@ def __init__(self, parent): def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) @@ -27,30 +29,47 @@ def initUI(self): self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/vignesh.wav') + self.filelocation.insert(0, "../../sounds/vignesh.wav") # BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text="Browse...", command=self.browse_file) # see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE - self.preview = Button(self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get())) + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## HARMONIC MODEL # ANALYSIS WINDOW TYPE wtype_label = "Window type:" - Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=wtype_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.w_type = StringVar() self.w_type.set("blackman") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hann", "hamming", "blackman", - "blackmanharris") + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hann", + "hamming", + "blackman", + "blackmanharris", + ) window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) # WINDOW SIZE M_label = "Window size (M):" - Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=M_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) @@ -59,7 +78,9 @@ def initUI(self): # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" - Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=N_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) @@ -68,7 +89,9 @@ def initUI(self): # THRESHOLD MAGNITUDE t_label = "Magnitude threshold (t) (in dB):" - Label(self.parent, text=t_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=t_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 self.t.grid(row=5, column=0, sticky=W, padx=(205, 5), pady=(10, 2)) @@ -77,7 +100,9 @@ def initUI(self): # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "Minimum duration of harmonic tracks:" - Label(self.parent, text=minSineDur_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=minSineDur_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) @@ -86,7 +111,9 @@ def initUI(self): # MAX NUMBER OF HARMONICS nH_label = "Maximum number of harmonics:" - Label(self.parent, text=nH_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=nH_label).grid( + row=7, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.nH = Entry(self.parent, justify=CENTER) self.nH["width"] = 5 self.nH.grid(row=7, column=0, sticky=W, padx=(210, 5), pady=(10, 2)) @@ -95,7 +122,9 @@ def initUI(self): # MIN FUNDAMENTAL FREQUENCY minf0_label = "Minimum fundamental frequency:" - Label(self.parent, text=minf0_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=minf0_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minf0 = Entry(self.parent, justify=CENTER) self.minf0["width"] = 5 self.minf0.grid(row=8, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) @@ -104,7 +133,9 @@ def initUI(self): # MAX FUNDAMENTAL FREQUENCY maxf0_label = "Maximum fundamental frequency:" - Label(self.parent, text=maxf0_label).grid(row=9, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=maxf0_label).grid( + row=9, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.maxf0 = Entry(self.parent, justify=CENTER) self.maxf0["width"] = 5 self.maxf0.grid(row=9, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) @@ -113,7 +144,9 @@ def initUI(self): # MAX ERROR ACCEPTED f0et_label = "Maximum error in f0 detection algorithm:" - Label(self.parent, text=f0et_label).grid(row=10, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=f0et_label).grid( + row=10, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.f0et = Entry(self.parent, justify=CENTER) self.f0et["width"] = 5 self.f0et.grid(row=10, column=0, sticky=W, padx=(265, 5), pady=(10, 2)) @@ -122,7 +155,9 @@ def initUI(self): # ALLOWED DEVIATION OF HARMONIC TRACKS harmDevSlope_label = "Max frequency deviation in harmonic tracks:" - Label(self.parent, text=harmDevSlope_label).grid(row=11, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=harmDevSlope_label).grid( + row=11, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.harmDevSlope = Entry(self.parent, justify=CENTER) self.harmDevSlope["width"] = 5 self.harmDevSlope.grid(row=11, column=0, sticky=W, padx=(285, 5), pady=(10, 2)) @@ -135,17 +170,26 @@ def initUI(self): # BUTTON TO PLAY OUTPUT output_label = "Output:" - Label(self.parent, text=output_label).grid(row=13, column=0, sticky=W, padx=5, pady=(10, 15)) - self.output = Button(self.parent, text=">", command=lambda: UF.wavplay( - 'output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_harmonicModel.wav')) + Label(self.parent, text=output_label).grid( + row=13, column=0, sticky=W, padx=5, pady=(10, 15) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_harmonicModel.wav" + ), + ) self.output.grid(row=13, column=0, padx=(60, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): @@ -170,7 +214,19 @@ def compute_model(self): f0et = int(self.f0et.get()) harmDevSlope = float(self.harmDevSlope.get()) - harmonicModel_function.main(inputFile, window, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) + harmonicModel_function.main( + inputFile, + window, + M, + N, + t, + minSineDur, + nH, + minf0, + maxf0, + f0et, + harmDevSlope, + ) except ValueError as errorMessage: messagebox.showerror("Input values error", str(errorMessage)) diff --git a/smstools/models/interface/harmonicModel_function.py b/smstools/models/interface/harmonicModel_function.py index ac307dda..b77b2239 100644 --- a/smstools/models/interface/harmonicModel_function.py +++ b/smstools/models/interface/harmonicModel_function.py @@ -8,78 +8,95 @@ from smstools.models import sineModel as SM from smstools.models import harmonicModel as HM -def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, - minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): - """ - Analysis and synthesis using the harmonic model - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size; N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks - nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound - maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm - harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # detect harmonics of input sound - hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) - - # synthesize the harmonics - y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) - - # output sound file (monophonic with sampling rate of 44100) - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' - - # write the sound resulting from harmonic analysis - UF.wavwrite(y, fs, outputFile) - - # create figure to show plots - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 5000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot the harmonic frequencies - plt.subplot(3,1,2) - if (hfreq.shape[1] > 0): - numFrames = hfreq.shape[0] - frmTime = H*np.arange(numFrames)/float(fs) - hfreq[hfreq<=0] = np.nan - plt.plot(frmTime, hfreq) - plt.axis([0, x.size/float(fs), 0, maxplotfreq]) - plt.title('frequencies of harmonic tracks') - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.ion() - plt.show() + +def main( + inputFile="../../sounds/vignesh.wav", + window="blackman", + M=1201, + N=2048, + t=-90, + minSineDur=0.1, + nH=100, + minf0=130, + maxf0=300, + f0et=7, + harmDevSlope=0.01, +): + """ + Analysis and synthesis using the harmonic model + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size; N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks + nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound + maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm + harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # detect harmonics of input sound + hfreq, hmag, hphase = HM.harmonicModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur + ) + + # synthesize the harmonics + y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) + + # output sound file (monophonic with sampling rate of 44100) + outputFile = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_harmonicModel.wav" + ) + + # write the sound resulting from harmonic analysis + UF.wavwrite(y, fs, outputFile) + + # create figure to show plots + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 5000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot the harmonic frequencies + plt.subplot(3, 1, 2) + if hfreq.shape[1] > 0: + numFrames = hfreq.shape[0] + frmTime = H * np.arange(numFrames) / float(fs) + hfreq[hfreq <= 0] = np.nan + plt.plot(frmTime, hfreq) + plt.axis([0, x.size / float(fs), 0, maxplotfreq]) + plt.title("frequencies of harmonic tracks") + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.ion() + plt.show() + if __name__ == "__main__": - main() + main() diff --git a/smstools/models/interface/hprModel_GUI_frame.py b/smstools/models/interface/hprModel_GUI_frame.py index 9a6930f1..2cf31984 100644 --- a/smstools/models/interface/hprModel_GUI_frame.py +++ b/smstools/models/interface/hprModel_GUI_frame.py @@ -7,6 +7,7 @@ from smstools.models.interface import hprModel_function from smstools.models import utilFunctions as UF + class HprModel_frame: def __init__(self, parent): @@ -17,152 +18,217 @@ def __init__(self, parent): def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) - #TEXTBOX TO PRINT PATH OF THE SOUND FILE + # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 - self.filelocation.grid(row=1,column=0, sticky=W, padx=10) + self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/sax-phrase-short.wav') - - #BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text="Browse...", command=self.browse_file) #see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE - self.preview = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation.get())) - self.preview.grid(row=1, column=0, sticky=W, padx=(306,6)) + self.filelocation.insert(0, "../../sounds/sax-phrase-short.wav") + + # BUTTON TO BROWSE SOUND FILE + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) + self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## HARMONIC MODEL - #ANALYSIS WINDOW TYPE + # ANALYSIS WINDOW TYPE wtype_label = "Window type:" - Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=wtype_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.w_type = StringVar() - self.w_type.set("blackman") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hann", "hamming", "blackman", "blackmanharris") - window_option.grid(row=2, column=0, sticky=W, padx=(95,5), pady=(10,2)) - - #WINDOW SIZE + self.w_type.set("blackman") # initial value + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hann", + "hamming", + "blackman", + "blackmanharris", + ) + window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) + + # WINDOW SIZE M_label = "Window size (M):" - Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=M_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 - self.M.grid(row=3,column=0, sticky=W, padx=(115,5), pady=(10,2)) + self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) self.M.delete(0, END) self.M.insert(0, "601") - #FFT SIZE + # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" - Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=N_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 - self.N.grid(row=4,column=0, sticky=W, padx=(270,5), pady=(10,2)) + self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "1024") - #THRESHOLD MAGNITUDE + # THRESHOLD MAGNITUDE t_label = "Magnitude threshold (t) (in dB):" - Label(self.parent, text=t_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=t_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 - self.t.grid(row=5, column=0, sticky=W, padx=(205,5), pady=(10,2)) + self.t.grid(row=5, column=0, sticky=W, padx=(205, 5), pady=(10, 2)) self.t.delete(0, END) self.t.insert(0, "-100") - #MIN DURATION SINUSOIDAL TRACKS + # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "Minimum duration of harmonic tracks:" - Label(self.parent, text=minSineDur_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=minSineDur_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 - self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250,5), pady=(10,2)) + self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) self.minSineDur.delete(0, END) self.minSineDur.insert(0, "0.1") - #MAX NUMBER OF HARMONICS + # MAX NUMBER OF HARMONICS nH_label = "Maximum number of harmonics:" - Label(self.parent, text=nH_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=nH_label).grid( + row=7, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.nH = Entry(self.parent, justify=CENTER) self.nH["width"] = 5 - self.nH.grid(row=7, column=0, sticky=W, padx=(215,5), pady=(10,2)) + self.nH.grid(row=7, column=0, sticky=W, padx=(215, 5), pady=(10, 2)) self.nH.delete(0, END) self.nH.insert(0, "100") - #MIN FUNDAMENTAL FREQUENCY + # MIN FUNDAMENTAL FREQUENCY minf0_label = "Minimum fundamental frequency:" - Label(self.parent, text=minf0_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=minf0_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minf0 = Entry(self.parent, justify=CENTER) self.minf0["width"] = 5 - self.minf0.grid(row=8, column=0, sticky=W, padx=(220,5), pady=(10,2)) + self.minf0.grid(row=8, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) self.minf0.delete(0, END) self.minf0.insert(0, "350") - #MAX FUNDAMENTAL FREQUENCY + # MAX FUNDAMENTAL FREQUENCY maxf0_label = "Maximum fundamental frequency:" - Label(self.parent, text=maxf0_label).grid(row=9, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=maxf0_label).grid( + row=9, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.maxf0 = Entry(self.parent, justify=CENTER) self.maxf0["width"] = 5 - self.maxf0.grid(row=9, column=0, sticky=W, padx=(220,5), pady=(10,2)) + self.maxf0.grid(row=9, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) self.maxf0.delete(0, END) self.maxf0.insert(0, "700") - #MAX ERROR ACCEPTED + # MAX ERROR ACCEPTED f0et_label = "Maximum error in f0 detection algorithm:" - Label(self.parent, text=f0et_label).grid(row=10, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=f0et_label).grid( + row=10, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.f0et = Entry(self.parent, justify=CENTER) self.f0et["width"] = 5 - self.f0et.grid(row=10, column=0, sticky=W, padx=(265,5), pady=(10,2)) + self.f0et.grid(row=10, column=0, sticky=W, padx=(265, 5), pady=(10, 2)) self.f0et.delete(0, END) self.f0et.insert(0, "5") - #ALLOWED DEVIATION OF HARMONIC TRACKS + # ALLOWED DEVIATION OF HARMONIC TRACKS harmDevSlope_label = "Max frequency deviation in harmonic tracks:" - Label(self.parent, text=harmDevSlope_label).grid(row=11, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=harmDevSlope_label).grid( + row=11, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.harmDevSlope = Entry(self.parent, justify=CENTER) self.harmDevSlope["width"] = 5 - self.harmDevSlope.grid(row=11, column=0, sticky=W, padx=(285,5), pady=(10,2)) + self.harmDevSlope.grid(row=11, column=0, sticky=W, padx=(285, 5), pady=(10, 2)) self.harmDevSlope.delete(0, END) self.harmDevSlope.insert(0, "0.01") - #BUTTON TO COMPUTE EVERYTHING + # BUTTON TO COMPUTE EVERYTHING self.compute = Button(self.parent, text="Compute", command=self.compute_model) - self.compute.grid(row=12, column=0, padx=5, pady=(10,2), sticky=W) + self.compute.grid(row=12, column=0, padx=5, pady=(10, 2), sticky=W) - #BUTTON TO PLAY SINE OUTPUT + # BUTTON TO PLAY SINE OUTPUT output_label = "Sinusoidal:" - Label(self.parent, text=output_label).grid(row=13, column=0, sticky=W, padx=5, pady=(10,0)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_hprModel_sines.wav')) - self.output.grid(row=13, column=0, padx=(80,5), pady=(10,0), sticky=W) - - #BUTTON TO PLAY RESIDUAL OUTPUT + Label(self.parent, text=output_label).grid( + row=13, column=0, sticky=W, padx=5, pady=(10, 0) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_hprModel_sines.wav" + ), + ) + self.output.grid(row=13, column=0, padx=(80, 5), pady=(10, 0), sticky=W) + + # BUTTON TO PLAY RESIDUAL OUTPUT output_label = "Residual:" - Label(self.parent, text=output_label).grid(row=14, column=0, sticky=W, padx=5, pady=(5,0)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_hprModel_residual.wav')) - self.output.grid(row=14, column=0, padx=(80,5), pady=(5,0), sticky=W) - - #BUTTON TO PLAY OUTPUT + Label(self.parent, text=output_label).grid( + row=14, column=0, sticky=W, padx=5, pady=(5, 0) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_hprModel_residual.wav" + ), + ) + self.output.grid(row=14, column=0, padx=(80, 5), pady=(5, 0), sticky=W) + + # BUTTON TO PLAY OUTPUT output_label = "Output:" - Label(self.parent, text=output_label).grid(row=15, column=0, sticky=W, padx=5, pady=(5,15)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_hprModel.wav')) - self.output.grid(row=15, column=0, padx=(80,5), pady=(5,15), sticky=W) - + Label(self.parent, text=output_label).grid( + row=15, column=0, sticky=W, padx=5, pady=(5, 15) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_hprModel.wav" + ), + ) + self.output.grid(row=15, column=0, padx=(80, 5), pady=(5, 15), sticky=W) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): self.filename = filedialog.askopenfilename(**self.file_opt) - #set the text of the self.filelocation + # set the text of the self.filelocation self.filelocation.delete(0, END) - self.filelocation.insert(0,self.filename) + self.filelocation.insert(0, self.filename) def compute_model(self): @@ -179,7 +245,19 @@ def compute_model(self): f0et = int(self.f0et.get()) harmDevSlope = float(self.harmDevSlope.get()) - hprModel_function.main(inputFile, window, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) + hprModel_function.main( + inputFile, + window, + M, + N, + t, + minSineDur, + nH, + minf0, + maxf0, + f0et, + harmDevSlope, + ) except ValueError as errorMessage: messagebox.showerror("Input values error", str(errorMessage)) diff --git a/smstools/models/interface/hprModel_function.py b/smstools/models/interface/hprModel_function.py index 3a67088c..1f2cd08f 100644 --- a/smstools/models/interface/hprModel_function.py +++ b/smstools/models/interface/hprModel_function.py @@ -8,96 +8,115 @@ from smstools.models import hprModel as HPR from smstools.models import stft as STFT -def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, - minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): - """ - Perform analysis/synthesis using the harmonic plus residual model - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size; N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks - nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound - maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm - harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # find harmonics and residual - hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) - - # compute spectrogram of residual - mXr, pXr = STFT.stftAnal(xr, w, N, H) - - # synthesize hpr model - y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) - - # output sound file (monophonic with sampling rate of 44100) - outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' - outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' - - # write sounds files for harmonics, residual, and the sum - UF.wavwrite(yh, fs, outputFileSines) - UF.wavwrite(xr, fs, outputFileResidual) - UF.wavwrite(y, fs, outputFile) - - # create figure to plot - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 5000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot the magnitude spectrogram of residual - plt.subplot(3,1,2) - maxplotbin = int(N*maxplotfreq/fs) - numFrames = int(mXr[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = np.arange(maxplotbin+1)*float(fs)/N - plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) - plt.autoscale(tight=True) - - # plot harmonic frequencies on residual spectrogram - if (hfreq.shape[1] > 0): - harms = hfreq*np.less(hfreq,maxplotfreq) - harms[harms==0] = np.nan - numFrames = int(harms[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, harms, color='k', ms=3, alpha=1) - plt.xlabel('time(s)') - plt.ylabel('frequency(Hz)') - plt.autoscale(tight=True) - plt.title('harmonics + residual spectrogram') - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.ion() - plt.show() + +def main( + inputFile="../../sounds/sax-phrase-short.wav", + window="blackman", + M=601, + N=1024, + t=-100, + minSineDur=0.1, + nH=100, + minf0=350, + maxf0=700, + f0et=5, + harmDevSlope=0.01, +): + """ + Perform analysis/synthesis using the harmonic plus residual model + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size; N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks + nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound + maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm + harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # find harmonics and residual + hfreq, hmag, hphase, xr = HPR.hprModelAnal( + x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope + ) + + # compute spectrogram of residual + mXr, pXr = STFT.stftAnal(xr, w, N, H) + + # synthesize hpr model + y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) + + # output sound file (monophonic with sampling rate of 44100) + outputFileSines = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_sines.wav" + ) + outputFileResidual = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_residual.wav" + ) + outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel.wav" + + # write sounds files for harmonics, residual, and the sum + UF.wavwrite(yh, fs, outputFileSines) + UF.wavwrite(xr, fs, outputFileResidual) + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 5000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot the magnitude spectrogram of residual + plt.subplot(3, 1, 2) + maxplotbin = int(N * maxplotfreq / fs) + numFrames = int(mXr[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = np.arange(maxplotbin + 1) * float(fs) / N + plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, : maxplotbin + 1])) + plt.autoscale(tight=True) + + # plot harmonic frequencies on residual spectrogram + if hfreq.shape[1] > 0: + harms = hfreq * np.less(hfreq, maxplotfreq) + harms[harms == 0] = np.nan + numFrames = int(harms[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, harms, color="k", ms=3, alpha=1) + plt.xlabel("time(s)") + plt.ylabel("frequency(Hz)") + plt.autoscale(tight=True) + plt.title("harmonics + residual spectrogram") + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.ion() + plt.show() + if __name__ == "__main__": - main() + main() diff --git a/smstools/models/interface/hpsModel_GUI_frame.py b/smstools/models/interface/hpsModel_GUI_frame.py index 06c4e8bf..fd6d92f9 100644 --- a/smstools/models/interface/hpsModel_GUI_frame.py +++ b/smstools/models/interface/hpsModel_GUI_frame.py @@ -19,7 +19,9 @@ def __init__(self, parent): def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) @@ -27,30 +29,47 @@ def initUI(self): self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/sax-phrase-short.wav') + self.filelocation.insert(0, "../../sounds/sax-phrase-short.wav") # BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text="Browse...", command=self.browse_file) # see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE - self.preview = Button(self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get())) + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## HARMONIC MODEL # ANALYSIS WINDOW TYPE wtype_label = "Window type:" - Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=wtype_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.w_type = StringVar() self.w_type.set("blackman") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hann", "hamming", "blackman", - "blackmanharris") + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hann", + "hamming", + "blackman", + "blackmanharris", + ) window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) # WINDOW SIZE M_label = "Window size (M):" - Label(self.parent, text=M_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=M_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=4, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) @@ -59,7 +78,9 @@ def initUI(self): # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" - Label(self.parent, text=N_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=N_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=5, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) @@ -68,7 +89,9 @@ def initUI(self): # THRESHOLD MAGNITUDE t_label = "Magnitude threshold (t) (in dB):" - Label(self.parent, text=t_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=t_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 self.t.grid(row=6, column=0, sticky=W, padx=(205, 5), pady=(10, 2)) @@ -77,7 +100,9 @@ def initUI(self): # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "Minimum duration of sinusoidal tracks:" - Label(self.parent, text=minSineDur_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=minSineDur_label).grid( + row=7, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 self.minSineDur.grid(row=7, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) @@ -86,7 +111,9 @@ def initUI(self): # MAX NUMBER OF HARMONICS nH_label = "Maximum number of harmonics:" - Label(self.parent, text=nH_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=nH_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.nH = Entry(self.parent, justify=CENTER) self.nH["width"] = 5 self.nH.grid(row=8, column=0, sticky=W, padx=(215, 5), pady=(10, 2)) @@ -95,7 +122,9 @@ def initUI(self): # MIN FUNDAMENTAL FREQUENCY minf0_label = "Minimum fundamental frequency:" - Label(self.parent, text=minf0_label).grid(row=9, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=minf0_label).grid( + row=9, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minf0 = Entry(self.parent, justify=CENTER) self.minf0["width"] = 5 self.minf0.grid(row=9, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) @@ -104,7 +133,9 @@ def initUI(self): # MAX FUNDAMENTAL FREQUENCY maxf0_label = "Maximum fundamental frequency:" - Label(self.parent, text=maxf0_label).grid(row=10, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=maxf0_label).grid( + row=10, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.maxf0 = Entry(self.parent, justify=CENTER) self.maxf0["width"] = 5 self.maxf0.grid(row=10, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) @@ -113,7 +144,9 @@ def initUI(self): # MAX ERROR ACCEPTED f0et_label = "Maximum error in f0 detection algorithm:" - Label(self.parent, text=f0et_label).grid(row=11, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=f0et_label).grid( + row=11, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.f0et = Entry(self.parent, justify=CENTER) self.f0et["width"] = 5 self.f0et.grid(row=11, column=0, sticky=W, padx=(265, 5), pady=(10, 2)) @@ -122,7 +155,9 @@ def initUI(self): # ALLOWED DEVIATION OF HARMONIC TRACKS harmDevSlope_label = "Max frequency deviation in harmonic tracks:" - Label(self.parent, text=harmDevSlope_label).grid(row=12, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=harmDevSlope_label).grid( + row=12, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.harmDevSlope = Entry(self.parent, justify=CENTER) self.harmDevSlope["width"] = 5 self.harmDevSlope.grid(row=12, column=0, sticky=W, padx=(285, 5), pady=(10, 2)) @@ -131,7 +166,9 @@ def initUI(self): # DECIMATION FACTOR stocf_label = "Stochastic approximation factor:" - Label(self.parent, text=stocf_label).grid(row=13, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=stocf_label).grid( + row=13, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.stocf = Entry(self.parent, justify=CENTER) self.stocf["width"] = 5 self.stocf.grid(row=13, column=0, sticky=W, padx=(210, 5), pady=(10, 2)) @@ -144,31 +181,58 @@ def initUI(self): # BUTTON TO PLAY SINE OUTPUT output_label = "Sinusoidal:" - Label(self.parent, text=output_label).grid(row=15, column=0, sticky=W, padx=5, pady=(10, 0)) - self.output = Button(self.parent, text=">", command=lambda: UF.wavplay( - 'output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_hpsModel_sines.wav')) + Label(self.parent, text=output_label).grid( + row=15, column=0, sticky=W, padx=5, pady=(10, 0) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_hpsModel_sines.wav" + ), + ) self.output.grid(row=15, column=0, padx=(80, 5), pady=(10, 0), sticky=W) # BUTTON TO PLAY STOCHASTIC OUTPUT output_label = "Stochastic:" - Label(self.parent, text=output_label).grid(row=16, column=0, sticky=W, padx=5, pady=(5, 0)) - self.output = Button(self.parent, text=">", command=lambda: UF.wavplay( - 'output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_hpsModel_stochastic.wav')) + Label(self.parent, text=output_label).grid( + row=16, column=0, sticky=W, padx=5, pady=(5, 0) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_hpsModel_stochastic.wav" + ), + ) self.output.grid(row=16, column=0, padx=(80, 5), pady=(5, 0), sticky=W) # BUTTON TO PLAY OUTPUT output_label = "Output:" - Label(self.parent, text=output_label).grid(row=17, column=0, sticky=W, padx=5, pady=(5, 15)) - self.output = Button(self.parent, text=">", command=lambda: UF.wavplay( - 'output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_hpsModel.wav')) + Label(self.parent, text=output_label).grid( + row=17, column=0, sticky=W, padx=5, pady=(5, 15) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_hpsModel.wav" + ), + ) self.output.grid(row=17, column=0, padx=(80, 5), pady=(5, 15), sticky=W) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): @@ -194,7 +258,20 @@ def compute_model(self): harmDevSlope = float(self.harmDevSlope.get()) stocf = float(self.stocf.get()) - hpsModel_function.main(inputFile, window, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope, stocf) + hpsModel_function.main( + inputFile, + window, + M, + N, + t, + minSineDur, + nH, + minf0, + maxf0, + f0et, + harmDevSlope, + stocf, + ) except ValueError as errorMessage: messagebox.showerror("Input values error", str(errorMessage)) diff --git a/smstools/models/interface/hpsModel_function.py b/smstools/models/interface/hpsModel_function.py index e5951a04..d30a528a 100644 --- a/smstools/models/interface/hpsModel_function.py +++ b/smstools/models/interface/hpsModel_function.py @@ -7,93 +7,117 @@ from smstools.models import utilFunctions as UF from smstools.models import hpsModel as HPS -def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, - minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1): - """ - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size; N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks - nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound - maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm - harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation - stocf: decimation factor used for the stochastic approximation - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # compute the harmonic plus stochastic model of the whole sound - hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) - - # synthesize a sound from the harmonic plus stochastic representation - y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs) - - # output sound file (monophonic with sampling rate of 44100) - outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_sines.wav' - outputFileStochastic = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_stochastic.wav' - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav' - - # write sounds files for harmonics, stochastic, and the sum - UF.wavwrite(yh, fs, outputFileSines) - UF.wavwrite(yst, fs, outputFileStochastic) - UF.wavwrite(y, fs, outputFile) - - # create figure to plot - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 15000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot spectrogram stochastic component - plt.subplot(3,1,2) - numFrames = int(stocEnv[:,0].size) - sizeEnv = int(stocEnv[0,:].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv - plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv[:,:int(sizeEnv*maxplotfreq/(.5*fs)+1)])) - plt.autoscale(tight=True) - - # plot harmonic on top of stochastic spectrogram - if (hfreq.shape[1] > 0): - harms = hfreq*np.less(hfreq,maxplotfreq) - harms[harms==0] = np.nan - numFrames = harms.shape[0] - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, harms, color='k', ms=3, alpha=1) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.autoscale(tight=True) - plt.title('harmonics + stochastic spectrogram') - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.ion() - plt.show() + +def main( + inputFile="../../sounds/sax-phrase-short.wav", + window="blackman", + M=601, + N=1024, + t=-100, + minSineDur=0.1, + nH=100, + minf0=350, + maxf0=700, + f0et=5, + harmDevSlope=0.01, + stocf=0.1, +): + """ + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size; N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks + nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound + maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm + harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation + stocf: decimation factor used for the stochastic approximation + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # compute the harmonic plus stochastic model of the whole sound + hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf + ) + + # synthesize a sound from the harmonic plus stochastic representation + y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs) + + # output sound file (monophonic with sampling rate of 44100) + outputFileSines = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel_sines.wav" + ) + outputFileStochastic = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel_stochastic.wav" + ) + outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel.wav" + + # write sounds files for harmonics, stochastic, and the sum + UF.wavwrite(yh, fs, outputFileSines) + UF.wavwrite(yst, fs, outputFileStochastic) + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 15000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot spectrogram stochastic component + plt.subplot(3, 1, 2) + numFrames = int(stocEnv[:, 0].size) + sizeEnv = int(stocEnv[0, :].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv + plt.pcolormesh( + frmTime, + binFreq, + np.transpose(stocEnv[:, : int(sizeEnv * maxplotfreq / (0.5 * fs) + 1)]), + ) + plt.autoscale(tight=True) + + # plot harmonic on top of stochastic spectrogram + if hfreq.shape[1] > 0: + harms = hfreq * np.less(hfreq, maxplotfreq) + harms[harms == 0] = np.nan + numFrames = harms.shape[0] + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, harms, color="k", ms=3, alpha=1) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.autoscale(tight=True) + plt.title("harmonics + stochastic spectrogram") + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.ion() + plt.show() + if __name__ == "__main__": - main() + main() diff --git a/smstools/models/interface/models_GUI.py b/smstools/models/interface/models_GUI.py index 4d5f8952..2f11fdb7 100644 --- a/smstools/models/interface/models_GUI.py +++ b/smstools/models/interface/models_GUI.py @@ -1,5 +1,6 @@ import matplotlib -matplotlib.use('TkAgg') + +matplotlib.use("TkAgg") from .notebook import * # window with tabs from .dftModel_GUI_frame import * @@ -13,8 +14,10 @@ from .hpsModel_GUI_frame import * root = Tk() -root.title('sms-tools models GUI') -nb = notebook(root, TOP) # make a few diverse frames (panels), each using the NB as 'master': +root.title("sms-tools models GUI") +nb = notebook( + root, TOP +) # make a few diverse frames (panels), each using the NB as 'master': # uses the notebook's frame f1 = Frame(nb()) @@ -56,5 +59,5 @@ nb.display(f1) -root.geometry('+0+0') +root.geometry("+0+0") root.mainloop() diff --git a/smstools/models/interface/notebook.py b/smstools/models/interface/notebook.py index 78eb3b89..1e5455c7 100644 --- a/smstools/models/interface/notebook.py +++ b/smstools/models/interface/notebook.py @@ -1,13 +1,15 @@ -from tkinter import * # notice lowercase 't' in tkinter here +from tkinter import * # notice lowercase 't' in tkinter here -class notebook(object): + +class notebook(object): def __init__(self, master, side=LEFT): self.active_fr = None self.count = 0 self.choice = IntVar() if side in (TOP, BOTTOM): self.side = LEFT - else: self.side = TOP + else: + self.side = TOP self.rb_fr = Frame(master, borderwidth=2, relief=GROOVE) self.rb_fr.pack(side=side, fill=BOTH) self.screen_fr = Frame(master, borderwidth=2, relief=FLAT) @@ -17,7 +19,14 @@ def __call__(self): return self.screen_fr def add_screen(self, fr, title): - b = Radiobutton(self.rb_fr, text=title, indicatoron=0, variable=self.choice, value=self.count, command=lambda: self.display(fr)) + b = Radiobutton( + self.rb_fr, + text=title, + indicatoron=0, + variable=self.choice, + value=self.count, + command=lambda: self.display(fr), + ) b.pack(fill=BOTH, side=self.side) if not self.active_fr: fr.pack(fill=BOTH, expand=1) @@ -25,6 +34,6 @@ def add_screen(self, fr, title): self.count += 1 def display(self, fr): - self.active_fr.forget( ) + self.active_fr.forget() fr.pack(fill=BOTH, expand=1) self.active_fr = fr diff --git a/smstools/models/interface/sineModel_GUI_frame.py b/smstools/models/interface/sineModel_GUI_frame.py index 8aceb8ae..40e60a70 100644 --- a/smstools/models/interface/sineModel_GUI_frame.py +++ b/smstools/models/interface/sineModel_GUI_frame.py @@ -19,7 +19,9 @@ def __init__(self, parent): def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) @@ -27,30 +29,47 @@ def initUI(self): self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/bendir.wav') + self.filelocation.insert(0, "../../sounds/bendir.wav") # BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text="Browse...", command=self.browse_file) # see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE - self.preview = Button(self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get())) + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## SINE MODEL # ANALYSIS WINDOW TYPE wtype_label = "Window type:" - Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=wtype_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.w_type = StringVar() self.w_type.set("hamming") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hann", "hamming", "blackman", - "blackmanharris") + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hann", + "hamming", + "blackman", + "blackmanharris", + ) window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) # WINDOW SIZE M_label = "Window size (M):" - Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=M_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) @@ -59,7 +78,9 @@ def initUI(self): # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" - Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=N_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) @@ -68,7 +89,9 @@ def initUI(self): # THRESHOLD MAGNITUDE t_label = "Magnitude threshold (t) (in dB):" - Label(self.parent, text=t_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=t_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 self.t.grid(row=5, column=0, sticky=W, padx=(205, 5), pady=(10, 2)) @@ -77,7 +100,9 @@ def initUI(self): # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "Minimum duration of sinusoidal tracks:" - Label(self.parent, text=minSineDur_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=minSineDur_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) @@ -86,7 +111,9 @@ def initUI(self): # MAX NUMBER PARALLEL SINUSOIDS maxnSines_label = "Maximum number of parallel sinusoids:" - Label(self.parent, text=maxnSines_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=maxnSines_label).grid( + row=7, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.maxnSines = Entry(self.parent, justify=CENTER) self.maxnSines["width"] = 5 self.maxnSines.grid(row=7, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) @@ -94,8 +121,12 @@ def initUI(self): self.maxnSines.insert(0, "150") # FREQUENCY DEVIATION ALLOWED - freqDevOffset_label = "Max frequency deviation in sinusoidal tracks (at freq 0):" - Label(self.parent, text=freqDevOffset_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10, 2)) + freqDevOffset_label = ( + "Max frequency deviation in sinusoidal tracks (at freq 0):" + ) + Label(self.parent, text=freqDevOffset_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.freqDevOffset = Entry(self.parent, justify=CENTER) self.freqDevOffset["width"] = 5 self.freqDevOffset.grid(row=8, column=0, sticky=W, padx=(350, 5), pady=(10, 2)) @@ -104,7 +135,9 @@ def initUI(self): # SLOPE OF THE FREQ DEVIATION freqDevSlope_label = "Slope of the frequency deviation (as function of freq):" - Label(self.parent, text=freqDevSlope_label).grid(row=9, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=freqDevSlope_label).grid( + row=9, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.freqDevSlope = Entry(self.parent, justify=CENTER) self.freqDevSlope["width"] = 5 self.freqDevSlope.grid(row=9, column=0, sticky=W, padx=(340, 5), pady=(10, 2)) @@ -117,17 +150,26 @@ def initUI(self): # BUTTON TO PLAY OUTPUT output_label = "Output:" - Label(self.parent, text=output_label).grid(row=11, column=0, sticky=W, padx=5, pady=(10, 15)) - self.output = Button(self.parent, text=">", command=lambda: UF.wavplay( - 'output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_sineModel.wav')) + Label(self.parent, text=output_label).grid( + row=11, column=0, sticky=W, padx=5, pady=(10, 15) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_sineModel.wav" + ), + ) self.output.grid(row=11, column=0, padx=(60, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): @@ -150,7 +192,17 @@ def compute_model(self): freqDevOffset = int(self.freqDevOffset.get()) freqDevSlope = float(self.freqDevSlope.get()) - sineModel_function.main(inputFile, window, M, N, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope) + sineModel_function.main( + inputFile, + window, + M, + N, + t, + minSineDur, + maxnSines, + freqDevOffset, + freqDevSlope, + ) except ValueError as errorMessage: messagebox.showerror("Input values error", str(errorMessage)) diff --git a/smstools/models/interface/sineModel_function.py b/smstools/models/interface/sineModel_function.py index 84f27e70..dfb0b1f6 100644 --- a/smstools/models/interface/sineModel_function.py +++ b/smstools/models/interface/sineModel_function.py @@ -7,80 +7,91 @@ from smstools.models import utilFunctions as UF from smstools.models import sineModel as SM -def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, - maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): - """ - Perform analysis/synthesis using the sinusoidal model - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size; N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks - maxnSines: maximum number of parallel sinusoids - freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 - freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - fs, x = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # analyze the sound with the sinusoidal model - tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) - - # synthesize the output sound from the sinusoidal representation - y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) - - # output sound file name - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' - - # write the synthesized sound obtained from the sinusoidal synthesis - UF.wavwrite(y, fs, outputFile) - - # create figure to show plots - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 5000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot the sinusoidal frequencies - plt.subplot(3,1,2) - if (tfreq.shape[1] > 0): - numFrames = tfreq.shape[0] - frmTime = H*np.arange(numFrames)/float(fs) - tfreq[tfreq<=0] = np.nan - plt.plot(frmTime, tfreq) - plt.axis([0, x.size/float(fs), 0, maxplotfreq]) - plt.title('frequencies of sinusoidal tracks') - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.ion() - plt.show() +def main( + inputFile="../../sounds/bendir.wav", + window="hamming", + M=2001, + N=2048, + t=-80, + minSineDur=0.02, + maxnSines=150, + freqDevOffset=10, + freqDevSlope=0.001, +): + """ + Perform analysis/synthesis using the sinusoidal model + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size; N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks + maxnSines: maximum number of parallel sinusoids + freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 + freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + fs, x = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # analyze the sound with the sinusoidal model + tfreq, tmag, tphase = SM.sineModelAnal( + x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope + ) + + # synthesize the output sound from the sinusoidal representation + y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) + + # output sound file name + outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_sineModel.wav" + + # write the synthesized sound obtained from the sinusoidal synthesis + UF.wavwrite(y, fs, outputFile) + + # create figure to show plots + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 5000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot the sinusoidal frequencies + plt.subplot(3, 1, 2) + if tfreq.shape[1] > 0: + numFrames = tfreq.shape[0] + frmTime = H * np.arange(numFrames) / float(fs) + tfreq[tfreq <= 0] = np.nan + plt.plot(frmTime, tfreq) + plt.axis([0, x.size / float(fs), 0, maxplotfreq]) + plt.title("frequencies of sinusoidal tracks") + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.ion() + plt.show() -if __name__ == "__main__": - main() +if __name__ == "__main__": + main() diff --git a/smstools/models/interface/sprModel_GUI_frame.py b/smstools/models/interface/sprModel_GUI_frame.py index 2831b069..b0fb63cb 100644 --- a/smstools/models/interface/sprModel_GUI_frame.py +++ b/smstools/models/interface/sprModel_GUI_frame.py @@ -7,6 +7,7 @@ from smstools.models.interface import sprModel_function from smstools.models import utilFunctions as UF + class SprModel_frame: def __init__(self, parent): @@ -17,133 +18,197 @@ def __init__(self, parent): def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) - #TEXTBOX TO PRINT PATH OF THE SOUND FILE + # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 - self.filelocation.grid(row=1,column=0, sticky=W, padx=10) + self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/bendir.wav') - - #BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text="Browse...", command=self.browse_file) #see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE - self.preview = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation.get())) - self.preview.grid(row=1, column=0, sticky=W, padx=(306,6)) + self.filelocation.insert(0, "../../sounds/bendir.wav") + + # BUTTON TO BROWSE SOUND FILE + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) + self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## SPR MODEL - #ANALYSIS WINDOW TYPE + # ANALYSIS WINDOW TYPE wtype_label = "Window type:" - Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=wtype_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.w_type = StringVar() - self.w_type.set("hamming") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hann", "hamming", "blackman", "blackmanharris") - window_option.grid(row=2, column=0, sticky=W, padx=(95,5), pady=(10,2)) - - #WINDOW SIZE + self.w_type.set("hamming") # initial value + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hann", + "hamming", + "blackman", + "blackmanharris", + ) + window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) + + # WINDOW SIZE M_label = "Window size (M):" - Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=M_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 - self.M.grid(row=3,column=0, sticky=W, padx=(115,5), pady=(10,2)) + self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) self.M.delete(0, END) self.M.insert(0, "2001") - #FFT SIZE + # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" - Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=N_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 - self.N.grid(row=4,column=0, sticky=W, padx=(270,5), pady=(10,2)) + self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "2048") - #THRESHOLD MAGNITUDE + # THRESHOLD MAGNITUDE t_label = "Magnitude threshold (t) (in dB):" - Label(self.parent, text=t_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=t_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 - self.t.grid(row=5, column=0, sticky=W, padx=(205,5), pady=(10,2)) + self.t.grid(row=5, column=0, sticky=W, padx=(205, 5), pady=(10, 2)) self.t.delete(0, END) self.t.insert(0, "-80") - #MIN DURATION SINUSOIDAL TRACKS + # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "Minimum duration of sinusoidal tracks:" - Label(self.parent, text=minSineDur_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=minSineDur_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 - self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250,5), pady=(10,2)) + self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) self.minSineDur.delete(0, END) self.minSineDur.insert(0, "0.02") - #MAX NUMBER PARALLEL SINUSOIDS + # MAX NUMBER PARALLEL SINUSOIDS maxnSines_label = "Maximum number of parallel sinusoids:" - Label(self.parent, text=maxnSines_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=maxnSines_label).grid( + row=7, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.maxnSines = Entry(self.parent, justify=CENTER) self.maxnSines["width"] = 5 - self.maxnSines.grid(row=7, column=0, sticky=W, padx=(250,5), pady=(10,2)) + self.maxnSines.grid(row=7, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) self.maxnSines.delete(0, END) self.maxnSines.insert(0, "150") - #FREQUENCY DEVIATION ALLOWED - freqDevOffset_label = "Max frequency deviation in sinusoidal tracks (at freq 0):" - Label(self.parent, text=freqDevOffset_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10,2)) + # FREQUENCY DEVIATION ALLOWED + freqDevOffset_label = ( + "Max frequency deviation in sinusoidal tracks (at freq 0):" + ) + Label(self.parent, text=freqDevOffset_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.freqDevOffset = Entry(self.parent, justify=CENTER) self.freqDevOffset["width"] = 5 - self.freqDevOffset.grid(row=8, column=0, sticky=W, padx=(350,5), pady=(10,2)) + self.freqDevOffset.grid(row=8, column=0, sticky=W, padx=(350, 5), pady=(10, 2)) self.freqDevOffset.delete(0, END) self.freqDevOffset.insert(0, "10") - #SLOPE OF THE FREQ DEVIATION + # SLOPE OF THE FREQ DEVIATION freqDevSlope_label = "Slope of the frequency deviation (as function of freq):" - Label(self.parent, text=freqDevSlope_label).grid(row=9, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=freqDevSlope_label).grid( + row=9, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.freqDevSlope = Entry(self.parent, justify=CENTER) self.freqDevSlope["width"] = 5 - self.freqDevSlope.grid(row=9, column=0, sticky=W, padx=(340,5), pady=(10,2)) + self.freqDevSlope.grid(row=9, column=0, sticky=W, padx=(340, 5), pady=(10, 2)) self.freqDevSlope.delete(0, END) self.freqDevSlope.insert(0, "0.001") - #BUTTON TO COMPUTE EVERYTHING + # BUTTON TO COMPUTE EVERYTHING self.compute = Button(self.parent, text="Compute", command=self.compute_model) - self.compute.grid(row=10, column=0, padx=5, pady=(10,2), sticky=W) + self.compute.grid(row=10, column=0, padx=5, pady=(10, 2), sticky=W) - #BUTTON TO PLAY SINE OUTPUT + # BUTTON TO PLAY SINE OUTPUT output_label = "Sinusoidal:" - Label(self.parent, text=output_label).grid(row=11, column=0, sticky=W, padx=5, pady=(10,0)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_sprModel_sines.wav')) - self.output.grid(row=11, column=0, padx=(80,5), pady=(10,0), sticky=W) - - #BUTTON TO PLAY RESIDUAL OUTPUT + Label(self.parent, text=output_label).grid( + row=11, column=0, sticky=W, padx=5, pady=(10, 0) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_sprModel_sines.wav" + ), + ) + self.output.grid(row=11, column=0, padx=(80, 5), pady=(10, 0), sticky=W) + + # BUTTON TO PLAY RESIDUAL OUTPUT output_label = "Residual:" - Label(self.parent, text=output_label).grid(row=12, column=0, sticky=W, padx=5, pady=(5,0)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_sprModel_residual.wav')) - self.output.grid(row=12, column=0, padx=(80,5), pady=(5,0), sticky=W) - - #BUTTON TO PLAY OUTPUT + Label(self.parent, text=output_label).grid( + row=12, column=0, sticky=W, padx=5, pady=(5, 0) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_sprModel_residual.wav" + ), + ) + self.output.grid(row=12, column=0, padx=(80, 5), pady=(5, 0), sticky=W) + + # BUTTON TO PLAY OUTPUT output_label = "Output:" - Label(self.parent, text=output_label).grid(row=13, column=0, sticky=W, padx=5, pady=(5,15)) - self.output = Button(self.parent, text=">", command=lambda: UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_sprModel.wav')) - self.output.grid(row=13, column=0, padx=(80,5), pady=(5,15), sticky=W) + Label(self.parent, text=output_label).grid( + row=13, column=0, sticky=W, padx=5, pady=(5, 15) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_sprModel.wav" + ), + ) + self.output.grid(row=13, column=0, padx=(80, 5), pady=(5, 15), sticky=W) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): self.filename = filedialog.askopenfilename(**self.file_opt) - #set the text of the self.filelocation + # set the text of the self.filelocation self.filelocation.delete(0, END) - self.filelocation.insert(0,self.filename) + self.filelocation.insert(0, self.filename) def compute_model(self): @@ -158,7 +223,17 @@ def compute_model(self): freqDevOffset = int(self.freqDevOffset.get()) freqDevSlope = float(self.freqDevSlope.get()) - sprModel_function.main(inputFile, window, M, N, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope) + sprModel_function.main( + inputFile, + window, + M, + N, + t, + minSineDur, + maxnSines, + freqDevOffset, + freqDevSlope, + ) except ValueError as errorMessage: messagebox.showerror("Input values error", str(errorMessage)) diff --git a/smstools/models/interface/sprModel_function.py b/smstools/models/interface/sprModel_function.py index 2fb2239b..0c8e07ef 100644 --- a/smstools/models/interface/sprModel_function.py +++ b/smstools/models/interface/sprModel_function.py @@ -8,94 +8,110 @@ from smstools.models import sprModel as SPR from smstools.models import stft as STFT -def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, - minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): - """ - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size - N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks - minSineDur: minimum duration of sinusoidal tracks - maxnSines: maximum number of parallel sinusoids - freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 - freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # perform sinusoidal plus residual analysis - tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope) - - # compute spectrogram of residual - mXr, pXr = STFT.stftAnal(xr, w, N, H) - - # sum sinusoids and residual - y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs) - - # output sound file (monophonic with sampling rate of 44100) - outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav' - outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav' - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav' - - # write sounds files for sinusoidal, residual, and the sum - UF.wavwrite(ys, fs, outputFileSines) - UF.wavwrite(xr, fs, outputFileResidual) - UF.wavwrite(y, fs, outputFile) - - # create figure to show plots - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 5000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot the magnitude spectrogram of residual - plt.subplot(3,1,2) - maxplotbin = int(N*maxplotfreq/fs) - numFrames = int(mXr[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = np.arange(maxplotbin+1)*float(fs)/N - plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) - plt.autoscale(tight=True) - - # plot the sinusoidal frequencies on top of the residual spectrogram - if (tfreq.shape[1] > 0): - tracks = tfreq*np.less(tfreq, maxplotfreq) - tracks[tracks<=0] = np.nan - plt.plot(frmTime, tracks, color='k') - plt.title('sinusoidal tracks + residual spectrogram') - plt.autoscale(tight=True) - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - - plt.tight_layout() - plt.ion() - plt.show() + +def main( + inputFile="../../sounds/bendir.wav", + window="hamming", + M=2001, + N=2048, + t=-80, + minSineDur=0.02, + maxnSines=150, + freqDevOffset=10, + freqDevSlope=0.001, +): + """ + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size + N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks + minSineDur: minimum duration of sinusoidal tracks + maxnSines: maximum number of parallel sinusoids + freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 + freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # perform sinusoidal plus residual analysis + tfreq, tmag, tphase, xr = SPR.sprModelAnal( + x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope + ) + + # compute spectrogram of residual + mXr, pXr = STFT.stftAnal(xr, w, N, H) + + # sum sinusoids and residual + y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs) + + # output sound file (monophonic with sampling rate of 44100) + outputFileSines = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_sprModel_sines.wav" + ) + outputFileResidual = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_sprModel_residual.wav" + ) + outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_sprModel.wav" + + # write sounds files for sinusoidal, residual, and the sum + UF.wavwrite(ys, fs, outputFileSines) + UF.wavwrite(xr, fs, outputFileResidual) + UF.wavwrite(y, fs, outputFile) + + # create figure to show plots + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 5000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot the magnitude spectrogram of residual + plt.subplot(3, 1, 2) + maxplotbin = int(N * maxplotfreq / fs) + numFrames = int(mXr[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = np.arange(maxplotbin + 1) * float(fs) / N + plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, : maxplotbin + 1])) + plt.autoscale(tight=True) + + # plot the sinusoidal frequencies on top of the residual spectrogram + if tfreq.shape[1] > 0: + tracks = tfreq * np.less(tfreq, maxplotfreq) + tracks[tracks <= 0] = np.nan + plt.plot(frmTime, tracks, color="k") + plt.title("sinusoidal tracks + residual spectrogram") + plt.autoscale(tight=True) + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.ion() + plt.show() + if __name__ == "__main__": - main() + main() diff --git a/smstools/models/interface/spsModel_GUI_frame.py b/smstools/models/interface/spsModel_GUI_frame.py index c6afbd1a..7d36faa9 100644 --- a/smstools/models/interface/spsModel_GUI_frame.py +++ b/smstools/models/interface/spsModel_GUI_frame.py @@ -7,151 +7,218 @@ from smstools.models.interface import spsModel_function from smstools.models import utilFunctions as UF + class SpsModel_frame: def __init__(self, parent): - self.parent = parent - self.initUI() + self.parent = parent + self.initUI() def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) - #TEXTBOX TO PRINT PATH OF THE SOUND FILE + # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 - self.filelocation.grid(row=1,column=0, sticky=W, padx=10) + self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/bendir.wav') - - #BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text="Browse...", command=self.browse_file) #see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE - self.preview = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation.get())) - self.preview.grid(row=1, column=0, sticky=W, padx=(306,6)) + self.filelocation.insert(0, "../../sounds/bendir.wav") + + # BUTTON TO BROWSE SOUND FILE + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) + self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## SPS MODEL - #ANALYSIS WINDOW TYPE + # ANALYSIS WINDOW TYPE wtype_label = "Window type:" - Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=wtype_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.w_type = StringVar() - self.w_type.set("hamming") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hann", "hamming", "blackman", "blackmanharris") - window_option.grid(row=2, column=0, sticky=W, padx=(95,5), pady=(10,2)) - - #WINDOW SIZE + self.w_type.set("hamming") # initial value + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hann", + "hamming", + "blackman", + "blackmanharris", + ) + window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) + + # WINDOW SIZE M_label = "Window size (M):" - Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=M_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 - self.M.grid(row=3,column=0, sticky=W, padx=(115,5), pady=(10,2)) + self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) self.M.delete(0, END) self.M.insert(0, "2001") - #FFT SIZE + # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" - Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=N_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 - self.N.grid(row=4,column=0, sticky=W, padx=(270,5), pady=(10,2)) + self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "2048") - #THRESHOLD MAGNITUDE + # THRESHOLD MAGNITUDE t_label = "Magnitude threshold (t) (in dB):" - Label(self.parent, text=t_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=t_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 - self.t.grid(row=5, column=0, sticky=W, padx=(205,5), pady=(10,2)) + self.t.grid(row=5, column=0, sticky=W, padx=(205, 5), pady=(10, 2)) self.t.delete(0, END) self.t.insert(0, "-80") - #MIN DURATION SINUSOIDAL TRACKS + # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "Minimum duration of sinusoidal tracks:" - Label(self.parent, text=minSineDur_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=minSineDur_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 - self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250,5), pady=(10,2)) + self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) self.minSineDur.delete(0, END) self.minSineDur.insert(0, "0.02") - #MAX NUMBER PARALLEL SINUSOIDS + # MAX NUMBER PARALLEL SINUSOIDS maxnSines_label = "Maximum number of parallel sinusoids:" - Label(self.parent, text=maxnSines_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=maxnSines_label).grid( + row=7, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.maxnSines = Entry(self.parent, justify=CENTER) self.maxnSines["width"] = 5 - self.maxnSines.grid(row=7, column=0, sticky=W, padx=(250,5), pady=(10,2)) + self.maxnSines.grid(row=7, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) self.maxnSines.delete(0, END) self.maxnSines.insert(0, "150") - #FREQUENCY DEVIATION ALLOWED - freqDevOffset_label = "Max frequency deviation in sinusoidal tracks (at freq 0):" - Label(self.parent, text=freqDevOffset_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10,2)) + # FREQUENCY DEVIATION ALLOWED + freqDevOffset_label = ( + "Max frequency deviation in sinusoidal tracks (at freq 0):" + ) + Label(self.parent, text=freqDevOffset_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.freqDevOffset = Entry(self.parent, justify=CENTER) self.freqDevOffset["width"] = 5 - self.freqDevOffset.grid(row=8, column=0, sticky=W, padx=(350,5), pady=(10,2)) + self.freqDevOffset.grid(row=8, column=0, sticky=W, padx=(350, 5), pady=(10, 2)) self.freqDevOffset.delete(0, END) self.freqDevOffset.insert(0, "10") - #SLOPE OF THE FREQ DEVIATION + # SLOPE OF THE FREQ DEVIATION freqDevSlope_label = "Slope of the frequency deviation (as function of freq):" - Label(self.parent, text=freqDevSlope_label).grid(row=9, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=freqDevSlope_label).grid( + row=9, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.freqDevSlope = Entry(self.parent, justify=CENTER) self.freqDevSlope["width"] = 5 - self.freqDevSlope.grid(row=9, column=0, sticky=W, padx=(340,5), pady=(10,2)) + self.freqDevSlope.grid(row=9, column=0, sticky=W, padx=(340, 5), pady=(10, 2)) self.freqDevSlope.delete(0, END) self.freqDevSlope.insert(0, "0.001") - #DECIMATION FACTOR + # DECIMATION FACTOR stocf_label = "Stochastic approximation factor:" - Label(self.parent, text=stocf_label).grid(row=10, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=stocf_label).grid( + row=10, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.stocf = Entry(self.parent, justify=CENTER) self.stocf["width"] = 5 - self.stocf.grid(row=10, column=0, sticky=W, padx=(210,5), pady=(10,2)) + self.stocf.grid(row=10, column=0, sticky=W, padx=(210, 5), pady=(10, 2)) self.stocf.delete(0, END) self.stocf.insert(0, "0.2") - #BUTTON TO COMPUTE EVERYTHING + # BUTTON TO COMPUTE EVERYTHING self.compute = Button(self.parent, text="Compute", command=self.compute_model) - self.compute.grid(row=11, column=0, padx=5, pady=(10,2), sticky=W) + self.compute.grid(row=11, column=0, padx=5, pady=(10, 2), sticky=W) - #BUTTON TO PLAY SINE OUTPUT + # BUTTON TO PLAY SINE OUTPUT output_label = "Sinusoidal:" - Label(self.parent, text=output_label).grid(row=12, column=0, sticky=W, padx=5, pady=(10,0)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_spsModel_sines.wav')) - self.output.grid(row=12, column=0, padx=(80,5), pady=(10,0), sticky=W) - - #BUTTON TO PLAY STOCHASTIC OUTPUT + Label(self.parent, text=output_label).grid( + row=12, column=0, sticky=W, padx=5, pady=(10, 0) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_spsModel_sines.wav" + ), + ) + self.output.grid(row=12, column=0, padx=(80, 5), pady=(10, 0), sticky=W) + + # BUTTON TO PLAY STOCHASTIC OUTPUT output_label = "Stochastic:" - Label(self.parent, text=output_label).grid(row=22, column=0, sticky=W, padx=5, pady=(5,0)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_spsModel_stochastic.wav')) - self.output.grid(row=22, column=0, padx=(80,5), pady=(5,0), sticky=W) - - #BUTTON TO PLAY OUTPUT + Label(self.parent, text=output_label).grid( + row=22, column=0, sticky=W, padx=5, pady=(5, 0) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_spsModel_stochastic.wav" + ), + ) + self.output.grid(row=22, column=0, padx=(80, 5), pady=(5, 0), sticky=W) + + # BUTTON TO PLAY OUTPUT output_label = "Output:" - Label(self.parent, text=output_label).grid(row=23, column=0, sticky=W, padx=5, pady=(5,15)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_spsModel.wav')) + Label(self.parent, text=output_label).grid( + row=23, column=0, sticky=W, padx=5, pady=(5, 15) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_spsModel.wav" + ), + ) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): self.filename = filedialog.askopenfilename(**self.file_opt) - #set the text of the self.filelocation + # set the text of the self.filelocation self.filelocation.delete(0, END) - self.filelocation.insert(0,self.filename) + self.filelocation.insert(0, self.filename) def compute_model(self): @@ -167,7 +234,18 @@ def compute_model(self): freqDevSlope = float(self.freqDevSlope.get()) stocf = float(self.stocf.get()) - spsModel_function.main(inputFile, window, M, N, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf) + spsModel_function.main( + inputFile, + window, + M, + N, + t, + minSineDur, + maxnSines, + freqDevOffset, + freqDevSlope, + stocf, + ) except ValueError as errorMessage: messagebox.showerror("Input values error", str(errorMessage)) diff --git a/smstools/models/interface/spsModel_function.py b/smstools/models/interface/spsModel_function.py index bd4bc7cc..3cb3b1c7 100644 --- a/smstools/models/interface/spsModel_function.py +++ b/smstools/models/interface/spsModel_function.py @@ -7,92 +7,115 @@ from smstools.models import spsModel as SPS from smstools.models import utilFunctions as UF -def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, - maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, stocf=0.2): - """ - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size; N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks - maxnSines: maximum number of parallel sinusoids - freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 - freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation - stocf: decimation factor used for the stochastic approximation - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # perform sinusoidal+sotchastic analysis - tfreq, tmag, tphase, stocEnv = SPS.spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf) - - # synthesize sinusoidal+stochastic model - y, ys, yst = SPS.spsModelSynth(tfreq, tmag, tphase, stocEnv, Ns, H, fs) - - # output sound file (monophonic with sampling rate of 44100) - outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_spsModel_sines.wav' - outputFileStochastic = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_spsModel_stochastic.wav' - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_spsModel.wav' - - # write sounds files for sinusoidal, residual, and the sum - UF.wavwrite(ys, fs, outputFileSines) - UF.wavwrite(yst, fs, outputFileStochastic) - UF.wavwrite(y, fs, outputFile) - - # create figure to plot - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 10000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - plt.subplot(3,1,2) - numFrames = int(stocEnv[:,0].size) - sizeEnv = int(stocEnv[0,:].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv - plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv[:,:int(sizeEnv*maxplotfreq/(.5*fs)+1)]), shading='auto') - plt.autoscale(tight=True) - - # plot sinusoidal frequencies on top of stochastic component - if (tfreq.shape[1] > 0): - sines = tfreq*np.less(tfreq,maxplotfreq) - sines[sines==0] = np.nan - numFrames = int(sines[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, sines, color='k', ms=3, alpha=1) - plt.xlabel('time(s)') - plt.ylabel('Frequency(Hz)') - plt.autoscale(tight=True) - plt.title('sinusoidal + stochastic spectrogram') - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.ion() - plt.show() + +def main( + inputFile="../../sounds/bendir.wav", + window="hamming", + M=2001, + N=2048, + t=-80, + minSineDur=0.02, + maxnSines=150, + freqDevOffset=10, + freqDevSlope=0.001, + stocf=0.2, +): + """ + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size; N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks + maxnSines: maximum number of parallel sinusoids + freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 + freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation + stocf: decimation factor used for the stochastic approximation + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # perform sinusoidal+sotchastic analysis + tfreq, tmag, tphase, stocEnv = SPS.spsModelAnal( + x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf + ) + + # synthesize sinusoidal+stochastic model + y, ys, yst = SPS.spsModelSynth(tfreq, tmag, tphase, stocEnv, Ns, H, fs) + + # output sound file (monophonic with sampling rate of 44100) + outputFileSines = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_sines.wav" + ) + outputFileStochastic = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_stochastic.wav" + ) + outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel.wav" + + # write sounds files for sinusoidal, residual, and the sum + UF.wavwrite(ys, fs, outputFileSines) + UF.wavwrite(yst, fs, outputFileStochastic) + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 10000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + plt.subplot(3, 1, 2) + numFrames = int(stocEnv[:, 0].size) + sizeEnv = int(stocEnv[0, :].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv + plt.pcolormesh( + frmTime, + binFreq, + np.transpose(stocEnv[:, : int(sizeEnv * maxplotfreq / (0.5 * fs) + 1)]), + shading="auto", + ) + plt.autoscale(tight=True) + + # plot sinusoidal frequencies on top of stochastic component + if tfreq.shape[1] > 0: + sines = tfreq * np.less(tfreq, maxplotfreq) + sines[sines == 0] = np.nan + numFrames = int(sines[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, sines, color="k", ms=3, alpha=1) + plt.xlabel("time(s)") + plt.ylabel("Frequency(Hz)") + plt.autoscale(tight=True) + plt.title("sinusoidal + stochastic spectrogram") + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.ion() + plt.show() + if __name__ == "__main__": - main() + main() diff --git a/smstools/models/interface/stft_GUI_frame.py b/smstools/models/interface/stft_GUI_frame.py index 619a1401..ab55d6bc 100644 --- a/smstools/models/interface/stft_GUI_frame.py +++ b/smstools/models/interface/stft_GUI_frame.py @@ -17,7 +17,9 @@ def __init__(self, parent): def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) @@ -25,30 +27,47 @@ def initUI(self): self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/piano.wav') + self.filelocation.insert(0, "../../sounds/piano.wav") # BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text="Browse...", command=self.browse_file) # see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE - self.preview = Button(self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get())) + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## STFT # ANALYSIS WINDOW TYPE wtype_label = "Window type:" - Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=wtype_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.w_type = StringVar() self.w_type.set("hamming") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hann", "hamming", "blackman", - "blackmanharris") + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hann", + "hamming", + "blackman", + "blackmanharris", + ) window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) # WINDOW SIZE M_label = "Window size (M):" - Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=M_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) @@ -57,7 +76,9 @@ def initUI(self): # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" - Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=N_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) @@ -66,7 +87,9 @@ def initUI(self): # HOP SIZE H_label = "Hop size (H):" - Label(self.parent, text=H_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10, 2)) + Label(self.parent, text=H_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.H = Entry(self.parent, justify=CENTER) self.H["width"] = 5 self.H.grid(row=5, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) @@ -79,17 +102,26 @@ def initUI(self): # BUTTON TO PLAY OUTPUT output_label = "Output:" - Label(self.parent, text=output_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10, 15)) - self.output = Button(self.parent, text=">", command=lambda: UF.wavplay( - 'output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_stft.wav')) + Label(self.parent, text=output_label).grid( + row=7, column=0, sticky=W, padx=5, pady=(10, 15) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_stft.wav" + ), + ) self.output.grid(row=7, column=0, padx=(60, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): diff --git a/smstools/models/interface/stft_function.py b/smstools/models/interface/stft_function.py index 4c27a515..2875a0fb 100644 --- a/smstools/models/interface/stft_function.py +++ b/smstools/models/interface/stft_function.py @@ -7,82 +7,89 @@ from smstools.models import utilFunctions as UF from smstools.models import stft as STFT -def main(inputFile = '../../sounds/piano.wav', window = 'hamming', M = 1024, N = 1024, H = 512): - """ - analysis/synthesis using the STFT - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size - N: fft size (power of two, bigger or equal than M) - H: hop size (at least 1/2 of analysis window size to have good overlap-add) - """ - - # read input sound (monophonic with sampling rate of 44100) - fs, x = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # compute the magnitude and phase spectrogram - mX, pX = STFT.stftAnal(x, w, N, H) - - # perform the inverse stft - y = STFT.stftSynth(mX, pX, M, H) - - # output sound file (monophonic with sampling rate of 44100) - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stft.wav' - - # write the sound resulting from the inverse stft - UF.wavwrite(y, fs, outputFile) - - # create figure to plot - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 5000.0 - - # plot the input sound - plt.subplot(4,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot magnitude spectrogram - plt.subplot(4,1,2) - numFrames = int(mX[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = fs*np.arange(N*maxplotfreq/fs)/N - plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:int(N*maxplotfreq/fs+1)])) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.title('magnitude spectrogram') - plt.autoscale(tight=True) - - # plot the phase spectrogram - plt.subplot(4,1,3) - numFrames = int(pX[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = fs*np.arange(N*maxplotfreq//fs)//N - plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pX[:,:int(N*maxplotfreq/fs+1)],axis=1))) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.title('phase spectrogram (derivative)') - plt.autoscale(tight=True) - - # plot the output sound - plt.subplot(4,1,4) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.ion() - plt.show() + +def main(inputFile="../../sounds/piano.wav", window="hamming", M=1024, N=1024, H=512): + """ + analysis/synthesis using the STFT + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size + N: fft size (power of two, bigger or equal than M) + H: hop size (at least 1/2 of analysis window size to have good overlap-add) + """ + + # read input sound (monophonic with sampling rate of 44100) + fs, x = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # compute the magnitude and phase spectrogram + mX, pX = STFT.stftAnal(x, w, N, H) + + # perform the inverse stft + y = STFT.stftSynth(mX, pX, M, H) + + # output sound file (monophonic with sampling rate of 44100) + outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_stft.wav" + + # write the sound resulting from the inverse stft + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 5000.0 + + # plot the input sound + plt.subplot(4, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot magnitude spectrogram + plt.subplot(4, 1, 2) + numFrames = int(mX[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = fs * np.arange(N * maxplotfreq / fs) / N + plt.pcolormesh( + frmTime, binFreq, np.transpose(mX[:, : int(N * maxplotfreq / fs + 1)]) + ) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.title("magnitude spectrogram") + plt.autoscale(tight=True) + + # plot the phase spectrogram + plt.subplot(4, 1, 3) + numFrames = int(pX[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = fs * np.arange(N * maxplotfreq // fs) // N + plt.pcolormesh( + frmTime, + binFreq, + np.transpose(np.diff(pX[:, : int(N * maxplotfreq / fs + 1)], axis=1)), + ) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.title("phase spectrogram (derivative)") + plt.autoscale(tight=True) + + # plot the output sound + plt.subplot(4, 1, 4) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.ion() + plt.show() if __name__ == "__main__": - main() + main() diff --git a/smstools/models/interface/stochasticModel_GUI_frame.py b/smstools/models/interface/stochasticModel_GUI_frame.py index f1609928..0f55f456 100644 --- a/smstools/models/interface/stochasticModel_GUI_frame.py +++ b/smstools/models/interface/stochasticModel_GUI_frame.py @@ -7,6 +7,7 @@ from smstools.models.interface import stochasticModel_function from smstools.models import utilFunctions as UF + class StochasticModel_frame: def __init__(self, parent): @@ -17,95 +18,123 @@ def __init__(self, parent): def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) - #TEXTBOX TO PRINT PATH OF THE SOUND FILE + # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 - self.filelocation.grid(row=1,column=0, sticky=W, padx=10) + self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/ocean.wav') - - #BUTTON TO BROWSE SOUND FILE - self.open_file = Button(self.parent, text="Browse...", command=self.browse_file) #see: def browse_file(self) - self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE - self.preview = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation.get())) - self.preview.grid(row=1, column=0, sticky=W, padx=(306,6)) + self.filelocation.insert(0, "../../sounds/ocean.wav") + + # BUTTON TO BROWSE SOUND FILE + self.open_file = Button( + self.parent, text="Browse...", command=self.browse_file + ) # see: def browse_file(self) + self.open_file.grid( + row=1, column=0, sticky=W, padx=(220, 6) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE + self.preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) + self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## STOCHASTIC MODEL - #HOP SIZE + # HOP SIZE H_label = "Hop size (H):" - Label(self.parent, text=H_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=H_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.H = Entry(self.parent, justify=CENTER) self.H["width"] = 5 - self.H.grid(row=2, column=0, sticky=W, padx=(90,5), pady=(10,2)) + self.H.grid(row=2, column=0, sticky=W, padx=(90, 5), pady=(10, 2)) self.H.delete(0, END) self.H.insert(0, "256") - #FFT size + # FFT size N_label = "FFT size (N):" - Label(self.parent, text=N_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=N_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 - self.N.grid(row=3, column=0, sticky=W, padx=(90,5), pady=(10,2)) + self.N.grid(row=3, column=0, sticky=W, padx=(90, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "512") - #DECIMATION FACTOR + # DECIMATION FACTOR stocf_label = "Decimation factor (bigger than 0, max of 1):" - Label(self.parent, text=stocf_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=stocf_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.stocf = Entry(self.parent, justify=CENTER) self.stocf["width"] = 5 - self.stocf.grid(row=4, column=0, sticky=W, padx=(285,5), pady=(10,2)) + self.stocf.grid(row=4, column=0, sticky=W, padx=(285, 5), pady=(10, 2)) self.stocf.delete(0, END) self.stocf.insert(0, "0.1") - #MEl SCALE + # MEl SCALE melScale_label = "Approximation scale (0: linear, 1: mel):" - Label(self.parent, text=melScale_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=melScale_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.melScale = Entry(self.parent, justify=CENTER) self.melScale["width"] = 5 - self.melScale.grid(row=5, column=0, sticky=W, padx=(285,5), pady=(10,2)) + self.melScale.grid(row=5, column=0, sticky=W, padx=(285, 5), pady=(10, 2)) self.melScale.delete(0, END) self.melScale.insert(0, "1") - #NORMALIZATION + # NORMALIZATION normalization_label = "Amplitude normalization (0: no, 1: yes):" - Label(self.parent, text=normalization_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=normalization_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(10, 2) + ) self.normalization = Entry(self.parent, justify=CENTER) self.normalization["width"] = 5 - self.normalization.grid(row=6, column=0, sticky=W, padx=(285,5), pady=(10,2)) + self.normalization.grid(row=6, column=0, sticky=W, padx=(285, 5), pady=(10, 2)) self.normalization.delete(0, END) self.normalization.insert(0, "1") - #BUTTON TO COMPUTE EVERYTHING + # BUTTON TO COMPUTE EVERYTHING self.compute = Button(self.parent, text="Compute", command=self.compute_model) - self.compute.grid(row=7, column=0, padx=5, pady=(10,2), sticky=W) + self.compute.grid(row=7, column=0, padx=5, pady=(10, 2), sticky=W) - #BUTTON TO PLAY OUTPUT + # BUTTON TO PLAY OUTPUT output_label = "Stochastic:" - Label(self.parent, text=output_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10,15)) - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_stochasticModel.wav')) - self.output.grid(row=8, column=0, padx=(80,5), pady=(10,15), sticky=W) + Label(self.parent, text=output_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(10, 15) + ) + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_stochasticModel.wav" + ), + ) + self.output.grid(row=8, column=0, padx=(80, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file(self): self.filename = filedialog.askopenfilename(**self.file_opt) - #set the text of the self.filelocation + # set the text of the self.filelocation self.filelocation.delete(0, END) - self.filelocation.insert(0,self.filename) + self.filelocation.insert(0, self.filename) def compute_model(self): @@ -117,7 +146,9 @@ def compute_model(self): melScale = int(self.melScale.get()) normalization = int(self.normalization.get()) - stochasticModel_function.main(inputFile, H, N, stocf, melScale, normalization) + stochasticModel_function.main( + inputFile, H, N, stocf, melScale, normalization + ) except ValueError as errorMessage: messagebox.showerror("Input values error", errorMessage) diff --git a/smstools/models/interface/stochasticModel_function.py b/smstools/models/interface/stochasticModel_function.py index 1d22a31a..7414e074 100644 --- a/smstools/models/interface/stochasticModel_function.py +++ b/smstools/models/interface/stochasticModel_function.py @@ -8,65 +8,80 @@ from smstools.models import stochasticModel as STM from smstools.models import stft as STFT -def main(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1, melScale=1, normalization=1): - """ - inputFile: input sound file (monophonic with sampling rate of 44100) - H: hop size, N: fft size - stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) - melScale: frequency approximation scale (0: linear approximation, 1: mel frequency approximation) - normalization: amplitude normalization of output (0: no normalization, 1: normalization to input amplitude) - """ - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # compute stochastic model - stocEnv = STM.stochasticModelAnal(x, H, N, stocf, fs, melScale) - - # synthesize sound from stochastic model - y = STM.stochasticModelSynth(stocEnv, H, N, fs, melScale) - - if (normalization==1): - y = y * max(x)/max(y) - - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav' - - # write output sound - UF.wavwrite(y, fs, outputFile) - - # create figure to plot - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 10000.0 - - # plot input spectrogram - plt.subplot(2,1,1) - mX, pX = STFT.stftAnal(x, hann(N), N, H) - numFrames = int(mX[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = fs*np.arange(N*maxplotfreq/fs)/N - plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:int(N*maxplotfreq/fs+1)])) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.title('input magnitude spectrogram') - plt.autoscale(tight=True) - - # plot the output sound - plt.subplot(2,1,2) - mY, pY = STFT.stftAnal(y, hann(N), N, H) - numFrames = int(mY[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = fs*np.arange(N*maxplotfreq/fs)/N - plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:,:int(N*maxplotfreq/fs+1)])) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.title('input magnitude spectrogram') - plt.autoscale(tight=True) - - plt.tight_layout() - plt.ion() - plt.show() + +def main( + inputFile="../../sounds/ocean.wav", + H=256, + N=512, + stocf=0.1, + melScale=1, + normalization=1, +): + """ + inputFile: input sound file (monophonic with sampling rate of 44100) + H: hop size, N: fft size + stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) + melScale: frequency approximation scale (0: linear approximation, 1: mel frequency approximation) + normalization: amplitude normalization of output (0: no normalization, 1: normalization to input amplitude) + """ + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # compute stochastic model + stocEnv = STM.stochasticModelAnal(x, H, N, stocf, fs, melScale) + + # synthesize sound from stochastic model + y = STM.stochasticModelSynth(stocEnv, H, N, fs, melScale) + + if normalization == 1: + y = y * max(x) / max(y) + + outputFile = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_stochasticModel.wav" + ) + + # write output sound + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 10000.0 + + # plot input spectrogram + plt.subplot(2, 1, 1) + mX, pX = STFT.stftAnal(x, hann(N), N, H) + numFrames = int(mX[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = fs * np.arange(N * maxplotfreq / fs) / N + plt.pcolormesh( + frmTime, binFreq, np.transpose(mX[:, : int(N * maxplotfreq / fs + 1)]) + ) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.title("input magnitude spectrogram") + plt.autoscale(tight=True) + + # plot the output sound + plt.subplot(2, 1, 2) + mY, pY = STFT.stftAnal(y, hann(N), N, H) + numFrames = int(mY[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = fs * np.arange(N * maxplotfreq / fs) / N + plt.pcolormesh( + frmTime, binFreq, np.transpose(mY[:, : int(N * maxplotfreq / fs + 1)]) + ) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.title("input magnitude spectrogram") + plt.autoscale(tight=True) + + plt.tight_layout() + plt.ion() + plt.show() + if __name__ == "__main__": - main() + main() diff --git a/smstools/models/sineModel.py b/smstools/models/sineModel.py index 0028f7a7..ca29700f 100644 --- a/smstools/models/sineModel.py +++ b/smstools/models/sineModel.py @@ -11,19 +11,21 @@ def sineTracking(pfreq, pmag, pphase, tfreq, freqDevOffset=20, freqDevSlope=0.01): """ - Tracking sinusoids from one frame to the next - pfreq, pmag, pphase: frequencies and magnitude of current frame - tfreq: frequencies of incoming tracks from previous frame - freqDevOffset: maximum frequency deviation at 0Hz - freqDevSlope: slope increase of maximum frequency deviation - returns tfreqn, tmagn, tphasen: frequency, magnitude and phase of tracks - """ + Tracking sinusoids from one frame to the next + pfreq, pmag, pphase: frequencies and magnitude of current frame + tfreq: frequencies of incoming tracks from previous frame + freqDevOffset: maximum frequency deviation at 0Hz + freqDevSlope: slope increase of maximum frequency deviation + returns tfreqn, tmagn, tphasen: frequency, magnitude and phase of tracks + """ tfreqn = np.zeros(tfreq.size) # initialize array for output frequencies tmagn = np.zeros(tfreq.size) # initialize array for output magnitudes tphasen = np.zeros(tfreq.size) # initialize array for output phases pindexes = np.array(np.nonzero(pfreq), dtype=int)[0] # indexes of current peaks - incomingTracks = np.array(np.nonzero(tfreq), dtype=int)[0] # indexes of incoming tracks + incomingTracks = np.array(np.nonzero(tfreq), dtype=int)[ + 0 + ] # indexes of incoming tracks newTracks = np.zeros(tfreq.size, dtype=int) - 1 # initialize to -1 new tracks magOrder = np.argsort(-pmag[pindexes]) # order current peaks by magnitude pfreqt = np.copy(pfreq) # copy current peaks to temporary array @@ -35,12 +37,22 @@ def sineTracking(pfreq, pmag, pphase, tfreq, freqDevOffset=20, freqDevSlope=0.01 for i in magOrder: # iterate over current peaks if incomingTracks.size == 0: # break when no more incoming tracks break - track = np.argmin(abs(pfreqt[i] - tfreq[incomingTracks])) # closest incoming track to peak - freqDistance = abs(pfreq[i] - tfreq[incomingTracks[track]]) # measure freq distance - if freqDistance < (freqDevOffset + freqDevSlope * pfreq[i]): # choose track if distance is small + track = np.argmin( + abs(pfreqt[i] - tfreq[incomingTracks]) + ) # closest incoming track to peak + freqDistance = abs( + pfreq[i] - tfreq[incomingTracks[track]] + ) # measure freq distance + if freqDistance < ( + freqDevOffset + freqDevSlope * pfreq[i] + ): # choose track if distance is small newTracks[incomingTracks[track]] = i # assign peak index to track index - incomingTracks = np.delete(incomingTracks, track) # delete index of track in incomming tracks - indext = np.array(np.nonzero(newTracks != -1), dtype=int)[0] # indexes of assigned tracks + incomingTracks = np.delete( + incomingTracks, track + ) # delete index of track in incomming tracks + indext = np.array(np.nonzero(newTracks != -1), dtype=int)[ + 0 + ] # indexes of assigned tracks if indext.size > 0: indexp = newTracks[indext] # indexes of assigned peaks tfreqn[indext] = pfreqt[indexp] # output freq tracks @@ -51,29 +63,33 @@ def sineTracking(pfreq, pmag, pphase, tfreq, freqDevOffset=20, freqDevSlope=0.01 pphaset = np.delete(pphaset, indexp) # delete used peaks # create new tracks from non used peaks - emptyt = np.array(np.nonzero(tfreq == 0), dtype=int)[0] # indexes of empty incoming tracks + emptyt = np.array(np.nonzero(tfreq == 0), dtype=int)[ + 0 + ] # indexes of empty incoming tracks peaksleft = np.argsort(-pmagt) # sort left peaks by magnitude - if ((peaksleft.size > 0) & (emptyt.size >= peaksleft.size)): # fill empty tracks - tfreqn[emptyt[:peaksleft.size]] = pfreqt[peaksleft] - tmagn[emptyt[:peaksleft.size]] = pmagt[peaksleft] - tphasen[emptyt[:peaksleft.size]] = pphaset[peaksleft] - elif ((peaksleft.size > 0) & (emptyt.size < peaksleft.size)): # add more tracks if necessary - tfreqn[emptyt] = pfreqt[peaksleft[:emptyt.size]] - tmagn[emptyt] = pmagt[peaksleft[:emptyt.size]] - tphasen[emptyt] = pphaset[peaksleft[:emptyt.size]] - tfreqn = np.append(tfreqn, pfreqt[peaksleft[emptyt.size:]]) - tmagn = np.append(tmagn, pmagt[peaksleft[emptyt.size:]]) - tphasen = np.append(tphasen, pphaset[peaksleft[emptyt.size:]]) + if (peaksleft.size > 0) & (emptyt.size >= peaksleft.size): # fill empty tracks + tfreqn[emptyt[: peaksleft.size]] = pfreqt[peaksleft] + tmagn[emptyt[: peaksleft.size]] = pmagt[peaksleft] + tphasen[emptyt[: peaksleft.size]] = pphaset[peaksleft] + elif (peaksleft.size > 0) & ( + emptyt.size < peaksleft.size + ): # add more tracks if necessary + tfreqn[emptyt] = pfreqt[peaksleft[: emptyt.size]] + tmagn[emptyt] = pmagt[peaksleft[: emptyt.size]] + tphasen[emptyt] = pphaset[peaksleft[: emptyt.size]] + tfreqn = np.append(tfreqn, pfreqt[peaksleft[emptyt.size :]]) + tmagn = np.append(tmagn, pmagt[peaksleft[emptyt.size :]]) + tphasen = np.append(tphasen, pphaset[peaksleft[emptyt.size :]]) return tfreqn, tmagn, tphasen def cleaningSineTracks(tfreq, minTrackLength=3): """ - Delete short fragments of a collection of sinusoidal tracks - tfreq: frequency of tracks - minTrackLength: minimum duration of tracks in number of frames - returns tfreqn: output frequency of tracks - """ + Delete short fragments of a collection of sinusoidal tracks + tfreq: frequency of tracks + minTrackLength: minimum duration of tracks in number of frames + returns tfreqn: output frequency of tracks + """ if tfreq.shape[1] == 0: # if no tracks return input return tfreq @@ -81,27 +97,37 @@ def cleaningSineTracks(tfreq, minTrackLength=3): nTracks = tfreq[0, :].size # number of tracks in a frame for t in range(nTracks): # iterate over all tracks trackFreqs = tfreq[:, t] # frequencies of one track - trackBegs = np.nonzero((trackFreqs[:nFrames - 1] <= 0) # begining of track contours - & (trackFreqs[1:] > 0))[0] + 1 + trackBegs = ( + np.nonzero( + (trackFreqs[: nFrames - 1] <= 0) # begining of track contours + & (trackFreqs[1:] > 0) + )[0] + + 1 + ) if trackFreqs[0] > 0: trackBegs = np.insert(trackBegs, 0, 0) - trackEnds = np.nonzero((trackFreqs[:nFrames - 1] > 0) # end of track contours - & (trackFreqs[1:] <= 0))[0] + 1 + trackEnds = ( + np.nonzero( + (trackFreqs[: nFrames - 1] > 0) # end of track contours + & (trackFreqs[1:] <= 0) + )[0] + + 1 + ) if trackFreqs[nFrames - 1] > 0: trackEnds = np.append(trackEnds, nFrames - 1) trackLengths = 1 + trackEnds - trackBegs # lengths of trach contours for i, j in zip(trackBegs, trackLengths): # delete short track contours if j <= minTrackLength: - trackFreqs[i:i + j] = 0 + trackFreqs[i : i + j] = 0 return tfreq def sineModel(x, fs, w, N, t): """ - Analysis/synthesis of a sound using the sinusoidal model, without sine tracking - x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB - returns y: output array sound - """ + Analysis/synthesis of a sound using the sinusoidal model, without sine tracking + x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB + returns y: output array sound + """ hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor @@ -115,64 +141,93 @@ def sineModel(x, fs, w, N, t): w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window - sw[hNs - H:hNs + H] = ow # add triangular window + sw[hNs - H : hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window - sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window + sw[hNs - H : hNs + H] = ( + sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] + ) # normalized synthesis window while pin < pend: # while input sound pointer is within sound # -----analysis----- - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks - iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation + iploc, ipmag, ipphase = UF.peakInterp( + mX, pX, ploc + ) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz # -----synthesis----- - Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum + Y = UF.genSpecSines( + ipfreq, ipmag, ipphase, Ns, fs + ) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT - yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window - yw[hNs - 1:] = fftbuffer[:hNs + 1] - y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window + yw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + yw[hNs - 1 :] = fftbuffer[: hNs + 1] + y[pin - hNs : pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y -def sineModelAnal(x, fs, w, N, H, t, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01): +def sineModelAnal( + x, + fs, + w, + N, + H, + t, + maxnSines=100, + minSineDur=0.01, + freqDevOffset=20, + freqDevSlope=0.01, +): """ - Analysis of a sound using the sinusoidal model with sine tracking - x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB - maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds - freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation - returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks - """ - - if (minSineDur < 0): # raise error if minSineDur is smaller than 0 + Analysis of a sound using the sinusoidal model with sine tracking + x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB + maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds + freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation + returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks + """ + + if minSineDur < 0: # raise error if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor - x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin < pend: # while input sound pointer is within sound - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks - iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation + iploc, ipmag, ipphase = UF.peakInterp( + mX, pX, ploc + ) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz # perform sinusoidal tracking by adding peaks to trajectories - tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope) - tfreq = np.resize(tfreq, min(maxnSines, tfreq.size)) # limit number of tracks to maxnSines - tmag = np.resize(tmag, min(maxnSines, tmag.size)) # limit number of tracks to maxnSines - tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines + tfreq, tmag, tphase = sineTracking( + ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope + ) + tfreq = np.resize( + tfreq, min(maxnSines, tfreq.size) + ) # limit number of tracks to maxnSines + tmag = np.resize( + tmag, min(maxnSines, tmag.size) + ) # limit number of tracks to maxnSines + tphase = np.resize( + tphase, min(maxnSines, tphase.size) + ) # limit number of tracks to maxnSines jtfreq = np.zeros(maxnSines) # temporary output array jtmag = np.zeros(maxnSines) # temporary output array jtphase = np.zeros(maxnSines) # temporary output array - jtfreq[:tfreq.size] = tfreq # save track frequencies to temporary array - jtmag[:tmag.size] = tmag # save track magnitudes to temporary array - jtphase[:tphase.size] = tphase # save track magnitudes to temporary array + jtfreq[: tfreq.size] = tfreq # save track frequencies to temporary array + jtmag[: tmag.size] = tmag # save track magnitudes to temporary array + jtphase[: tphase.size] = tphase # save track magnitudes to temporary array if pin == hM1: # if first frame initialize output sine tracks xtfreq = jtfreq xtmag = jtmag @@ -189,11 +244,11 @@ def sineModelAnal(x, fs, w, N, H, t, maxnSines=100, minSineDur=.01, freqDevOffse def sineModelSynth(tfreq, tmag, tphase, N, H, fs): """ - Synthesis of a sound using the sinusoidal model - tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids - N: synthesis FFT size, H: hop size, fs: sampling rate - returns y: output array sound - """ + Synthesis of a sound using the sinusoidal model + tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids + N: synthesis FFT size, H: hop size, fs: sampling rate + returns y: output array sound + """ hN = N // 2 # half of FFT size for synthesis L = tfreq.shape[0] # number of frames @@ -202,23 +257,29 @@ def sineModelSynth(tfreq, tmag, tphase, N, H, fs): y = np.zeros(ysize) # initialize output array sw = np.zeros(N) # initialize synthesis window ow = triang(2 * H) # triangular window - sw[hN - H:hN + H] = ow # add triangular window + sw[hN - H : hN + H] = ow # add triangular window bh = blackmanharris(N) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window - sw[hN - H:hN + H] = sw[hN - H:hN + H] / bh[hN - H:hN + H] # normalized synthesis window + sw[hN - H : hN + H] = ( + sw[hN - H : hN + H] / bh[hN - H : hN + H] + ) # normalized synthesis window lastytfreq = tfreq[0, :] # initialize synthesis frequencies - ytphase = 2 * np.pi * np.random.rand(tfreq[0, :].size) # initialize synthesis phases + ytphase = ( + 2 * np.pi * np.random.rand(tfreq[0, :].size) + ) # initialize synthesis phases for l in range(L): # iterate over all frames - if (tphase.size > 0): # if no phases generate them + if tphase.size > 0: # if no phases generate them ytphase = tphase[l, :] else: ytphase += (np.pi * (lastytfreq + tfreq[l, :]) / fs) * H # propagate phases - # Y = UF.genSpecSines_p(tfreq[l,:], tmag[l,:], ytphase, N, fs) # generate sines in the spectrum (python version) - Y = UF.genSpecSines(tfreq[l, :], tmag[l, :], ytphase, N, fs) # generate sines in the spectrum + # Y = UF.genSpecSines_p(tfreq[l,:], tmag[l,:], ytphase, N, fs) # generate sines in the spectrum (python version) + Y = UF.genSpecSines( + tfreq[l, :], tmag[l, :], ytphase, N, fs + ) # generate sines in the spectrum lastytfreq = tfreq[l, :] # save frequency for phase propagation ytphase = ytphase % (2 * np.pi) # make phase inside 2*pi yw = np.real(fftshift(ifft(Y))) # compute inverse FFT - y[pout:pout + N] += sw * yw # overlap-add and apply a synthesis window + y[pout : pout + N] += sw * yw # overlap-add and apply a synthesis window pout += H # advance sound pointer y = np.delete(y, range(hN)) # delete half of first window y = np.delete(y, range(y.size - hN, y.size)) # delete half of the last window diff --git a/smstools/models/sprModel.py b/smstools/models/sprModel.py index 40450b08..38eb7572 100644 --- a/smstools/models/sprModel.py +++ b/smstools/models/sprModel.py @@ -9,6 +9,7 @@ from smstools.models import sineModel as SM from smstools.models import utilFunctions as UF + def sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope): """ Analysis of a sound using the sinusoidal plus residual model @@ -21,11 +22,16 @@ def sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDe """ # perform sinusoidal analysis - tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) + tfreq, tmag, tphase = SM.sineModelAnal( + x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope + ) Ns = 512 - xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound + xr = UF.sineSubtraction( + x, Ns, H, tfreq, tmag, tphase, fs + ) # subtract sinusoids from original sound return tfreq, tmag, tphase, xr + def sprModelSynth(tfreq, tmag, tphase, xr, N, H, fs): """ Synthesis of a sound using the sinusoidal plus residual model @@ -34,10 +40,13 @@ def sprModelSynth(tfreq, tmag, tphase, xr, N, H, fs): returns y: output sound, y: sinusoidal component """ - ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids - y = ys[:min(ys.size, xr.size)]+xr[:min(ys.size, xr.size)] # sum sinusoids and residual components + ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids + y = ( + ys[: min(ys.size, xr.size)] + xr[: min(ys.size, xr.size)] + ) # sum sinusoids and residual components return y, ys + def sprModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal plus residual model, one frame at a time @@ -46,49 +55,53 @@ def sprModel(x, fs, w, N, t): returns y: output sound, ys: sinusoidal component, xr: residual component """ - hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding - hM2 = int(math.floor(w.size/2)) # half analysis window size by floor - Ns = 512 # FFT size for synthesis (even) - H = Ns//4 # Hop size used for analysis and synthesis - hNs = Ns//2 - pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window - pend = x.size - max(hNs, hM1) # last sample to start a frame - ysw = np.zeros(Ns) # initialize output sound frame - xrw = np.zeros(Ns) # initialize output sound frame - ys = np.zeros(x.size) # initialize output array - xr = np.zeros(x.size) # initialize output array - w = w / sum(w) # normalize analysis window + hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding + hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor + Ns = 512 # FFT size for synthesis (even) + H = Ns // 4 # Hop size used for analysis and synthesis + hNs = Ns // 2 + pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window + pend = x.size - max(hNs, hM1) # last sample to start a frame + ysw = np.zeros(Ns) # initialize output sound frame + xrw = np.zeros(Ns) # initialize output sound frame + ys = np.zeros(x.size) # initialize output array + xr = np.zeros(x.size) # initialize output array + w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) - ow = triang(2*H) # overlapping window - sw[hNs-H:hNs+H] = ow - bh = blackmanharris(Ns) # synthesis window - bh = bh / sum(bh) # normalize synthesis window - wr = bh # window for residual - sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] - while pin 1): # raise exception if decimation factor too big + if stocf > 1: # raise exception if decimation factor too big raise ValueError("Stochastic decimation factor above 1") - if (H <= 0): # raise error if hop size 0 or negative + if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") w = hann(N) # analysis window - x = np.append(np.zeros(No2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(No2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(No2)) # add zeros at the end to analyze last sample pin = No2 # initialize sound pointer in middle of analysis window pend = x.size - No2 # last sample to start a frame if melScale == 1: - binFreqsMel = hertz_to_mel(np.arange(hN)*fs/float(N)) + binFreqsMel = hertz_to_mel(np.arange(hN) * fs / float(N)) uniformMelFreq = np.linspace(binFreqsMel[0], binFreqsMel[-1], hN) while pin <= pend: - xw = x[pin - No2:pin + No2] * w # window the input sound + xw = x[pin - No2 : pin + No2] * w # window the input sound X = fft(xw) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies if melScale == 1: spl = splrep(binFreqsMel, np.maximum(-200, mX)) - mY = resample(splev(uniformMelFreq, spl), int(stocf * hN)) # decimate the mag spectrum + mY = resample( + splev(uniformMelFreq, spl), int(stocf * hN) + ) # decimate the mag spectrum else: - mY = resample(np.maximum(-200, mX), int(stocf * hN)) # decimate the mag spectrum + mY = resample( + np.maximum(-200, mX), int(stocf * hN) + ) # decimate the mag spectrum if pin == No2: # first frame stocEnv = np.array([mY]) else: # rest of frames @@ -71,12 +80,12 @@ def stochasticModelAnal(x, H, N, stocf, fs=44100, melScale=1): def stochasticModelSynth(stocEnv, H, N, fs=44100, melScale=1): """ - Stochastic synthesis of a sound - stocEnv: stochastic envelope; H: hop size; N: fft size - fs: sampling rate - melScale: choose between linear scale, 0, or mel scale, 1 (should match the analysis) - returns y: output sound - """ + Stochastic synthesis of a sound + stocEnv: stochastic envelope; H: hop size; N: fft size + fs: sampling rate + melScale: choose between linear scale, 0, or mel scale, 1 (should match the analysis) + returns y: output sound + """ if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("N is not a power of two") @@ -89,7 +98,7 @@ def stochasticModelSynth(stocEnv, H, N, fs=44100, melScale=1): ws = 2 * hann(N) # synthesis window pout = 0 # output sound pointer if melScale == 1: - binFreqsMel = hertz_to_mel(np.arange(hN)*fs/float(N)) + binFreqsMel = hertz_to_mel(np.arange(hN) * fs / float(N)) uniformMelFreq = np.linspace(binFreqsMel[0], binFreqsMel[-1], hN) for l in range(L): mY = resample(stocEnv[l, :], hN) # interpolate to original size @@ -99,9 +108,11 @@ def stochasticModelSynth(stocEnv, H, N, fs=44100, melScale=1): pY = 2 * np.pi * np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype=complex) # initialize synthesis spectrum Y[:hN] = 10 ** (mY / 20) * np.exp(1j * pY) # generate positive freq. - Y[hN:] = 10 ** (mY[-2:0:-1] / 20) * np.exp(-1j * pY[-2:0:-1]) # generate negative freq. + Y[hN:] = 10 ** (mY[-2:0:-1] / 20) * np.exp( + -1j * pY[-2:0:-1] + ) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT - y[pout:pout + N] += ws * fftbuffer # overlap-add + y[pout : pout + N] += ws * fftbuffer # overlap-add pout += H y = np.delete(y, range(No2)) # delete half of first window y = np.delete(y, range(y.size - No2, y.size)) # delete half of the last window @@ -110,46 +121,52 @@ def stochasticModelSynth(stocEnv, H, N, fs=44100, melScale=1): def stochasticModel(x, H, N, stocf, fs=44100, melScale=1): """ - Stochastic analysis/synthesis of a sound, one frame at a time - x: input array sound, H: hop size, N: fft size - stocf: decimation factor of mag spectrum for stochastic analysis, bigger than 0, maximum of 1 - fs: sampling rate - melScale: choose between linear scale, 0, or mel scale, 1 (should match the analysis) - returns y: output sound - """ + Stochastic analysis/synthesis of a sound, one frame at a time + x: input array sound, H: hop size, N: fft size + stocf: decimation factor of mag spectrum for stochastic analysis, bigger than 0, maximum of 1 + fs: sampling rate + melScale: choose between linear scale, 0, or mel scale, 1 (should match the analysis) + returns y: output sound + """ hN = N // 2 + 1 # positive size of fft No2 = N // 2 # half of N - if (hN * stocf < 3): # raise exception if decimation factor too small + if hN * stocf < 3: # raise exception if decimation factor too small raise ValueError("Stochastic decimation factor too small") - if (stocf > 1): # raise exception if decimation factor too big + if stocf > 1: # raise exception if decimation factor too big raise ValueError("Stochastic decimation factor above 1") - if (H <= 0): # raise error if hop size 0 or negative + if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") if not (UF.isPower2(N)): # raise error if N not a power of twou raise ValueError("FFT size (N) is not a power of 2") w = hann(N) # analysis/synthesis window - x = np.append(np.zeros(No2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(No2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(No2)) # add zeros at the end to analyze last sample pin = No2 # initialize sound pointer in middle of analysis window pend = x.size - No2 # last sample to start a frame y = np.zeros(x.size) # initialize output array if melScale == 1: - binFreqsMel = hertz_to_mel(np.arange(hN)*fs/float(N)) + binFreqsMel = hertz_to_mel(np.arange(hN) * fs / float(N)) uniformMelFreq = np.linspace(binFreqsMel[0], binFreqsMel[-1], hN) while pin <= pend: # -----analysis----- - xw = x[pin - No2:pin + No2] * w # window the input sound + xw = x[pin - No2 : pin + No2] * w # window the input sound X = fft(xw) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies if melScale == 1: spl = splrep(binFreqsMel, np.maximum(-200, mX)) - stocEnv = resample(splev(uniformMelFreq, spl), int(stocf * hN)) # decimate the mag spectrum + stocEnv = resample( + splev(uniformMelFreq, spl), int(stocf * hN) + ) # decimate the mag spectrum else: - stocEnv = resample(np.maximum(-200, mX), int(stocf * hN)) # decimate the mag spectrum + stocEnv = resample( + np.maximum(-200, mX), int(stocf * hN) + ) # decimate the mag spectrum # -----synthesis----- mY = resample(stocEnv, hN) # interpolate to original size if melScale == 1: @@ -158,10 +175,14 @@ def stochasticModel(x, H, N, stocf, fs=44100, melScale=1): pY = 2 * np.pi * np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype=complex) Y[:hN] = 10 ** (mY / 20) * np.exp(1j * pY) # generate positive freq. - Y[hN:] = 10 ** (mY[-2:0:-1] / 20) * np.exp(-1j * pY[-2:0:-1]) # generate negative freq. + Y[hN:] = 10 ** (mY[-2:0:-1] / 20) * np.exp( + -1j * pY[-2:0:-1] + ) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT - y[pin - No2:pin + No2] += w * fftbuffer # overlap-add + y[pin - No2 : pin + No2] += w * fftbuffer # overlap-add pin += H # advance sound pointer y = np.delete(y, range(No2)) # delete half of first window which was added - y = np.delete(y, range(y.size - No2, y.size)) # delete half of last window which was added + y = np.delete( + y, range(y.size - No2, y.size) + ) # delete half of last window which was added return y diff --git a/smstools/models/utilFunctions.py b/smstools/models/utilFunctions.py index 69a8a90c..422876c4 100644 --- a/smstools/models/utilFunctions.py +++ b/smstools/models/utilFunctions.py @@ -13,13 +13,17 @@ from smstools.models import utilFunctions_C as UF_C except ImportError: print("\n") - print("-------------------------------------------------------------------------------") + print( + "-------------------------------------------------------------------------------" + ) print("Warning:") print("Cython modules for some of the core functions were not imported.") print("Please refer to the README.md file in the 'sms-tools' directory,") print("for the instructions to compile the cython modules.") print("Exiting the code!!") - print("-------------------------------------------------------------------------------") + print( + "-------------------------------------------------------------------------------" + ) print("\n") sys.exit(0) @@ -35,33 +39,39 @@ def isPower2(num): """ - Check if num is power of two - """ + Check if num is power of two + """ return ((num & (num - 1)) == 0) and num > 0 -INT16_FAC = (2 ** 15) -INT32_FAC = (2 ** 31) -INT64_FAC = (2 ** 63) -norm_fact = {'int16': INT16_FAC, 'int32': INT32_FAC, 'int64': INT64_FAC, 'float32': 1.0, 'float64': 1.0} +INT16_FAC = 2**15 +INT32_FAC = 2**31 +INT64_FAC = 2**63 +norm_fact = { + "int16": INT16_FAC, + "int32": INT32_FAC, + "int64": INT64_FAC, + "float32": 1.0, + "float64": 1.0, +} def wavread(filename): """ - Read a sound file and convert it to a normalized floating point array - filename: name of file to read - returns fs: sampling rate of file, x: floating point array - """ + Read a sound file and convert it to a normalized floating point array + filename: name of file to read + returns fs: sampling rate of file, x: floating point array + """ - if (os.path.isfile(filename) == False): # raise error if wrong input file + if os.path.isfile(filename) == False: # raise error if wrong input file raise ValueError("Input file is wrong") fs, x = read(filename) - if (len(x.shape) != 1): # raise error if more than one channel + if len(x.shape) != 1: # raise error if more than one channel raise ValueError("Audio file should be mono") - if (fs != 44100): # raise error if more than one channel + if fs != 44100: # raise error if more than one channel raise ValueError("Sampling rate of input sound should be 44100") # scale down and convert audio into floating point number in range of -1 to 1 @@ -71,11 +81,13 @@ def wavread(filename): def wavplay(filename): """ - Play a wav audio file from system using OS calls - filename: name of file to read - """ - if (os.path.isfile(filename) == False): # raise error if wrong input file - print("Input file does not exist. Make sure you computed the analysis/synthesis") + Play a wav audio file from system using OS calls + filename: name of file to read + """ + if os.path.isfile(filename) == False: # raise error if wrong input file + print( + "Input file does not exist. Make sure you computed the analysis/synthesis" + ) else: if sys.platform == "linux" or sys.platform == "linux2": # linux @@ -95,10 +107,10 @@ def wavplay(filename): def wavwrite(y, fs, filename): """ - Write a sound file from an array with the sound and the sampling rate - y: floating point array of one dimension, fs: sampling rate - filename: name of file to create - """ + Write a sound file from an array with the sound and the sampling rate + y: floating point array of one dimension, fs: sampling rate + filename: name of file to create + """ x = copy.deepcopy(y) # copy array x *= INT16_FAC # scaling floating point -1 to 1 range signal to int16 range @@ -108,14 +120,18 @@ def wavwrite(y, fs, filename): def peakDetection(mX, t): """ - Detect spectral peak locations - mX: magnitude spectrum, t: threshold - returns ploc: peak locations - """ + Detect spectral peak locations + mX: magnitude spectrum, t: threshold + returns ploc: peak locations + """ thresh = np.where(np.greater(mX[1:-1], t), mX[1:-1], 0) # locations above threshold - next_minor = np.where(mX[1:-1] > mX[2:], mX[1:-1], 0) # locations higher than the next one - prev_minor = np.where(mX[1:-1] > mX[:-2], mX[1:-1], 0) # locations higher than the previous one + next_minor = np.where( + mX[1:-1] > mX[2:], mX[1:-1], 0 + ) # locations higher than the next one + prev_minor = np.where( + mX[1:-1] > mX[:-2], mX[1:-1], 0 + ) # locations higher than the previous one ploc = thresh * next_minor * prev_minor # locations fulfilling the three criteria ploc = ploc.nonzero()[0] + 1 # add 1 to compensate for previous steps return ploc @@ -123,26 +139,28 @@ def peakDetection(mX, t): def peakInterp(mX, pX, ploc): """ - Interpolate peak values using parabolic interpolation - mX, pX: magnitude and phase spectrum, ploc: locations of peaks - returns iploc, ipmag, ipphase: interpolated peak location, magnitude and phase values - """ + Interpolate peak values using parabolic interpolation + mX, pX: magnitude and phase spectrum, ploc: locations of peaks + returns iploc, ipmag, ipphase: interpolated peak location, magnitude and phase values + """ val = mX[ploc] # magnitude of peak bin lval = mX[ploc - 1] # magnitude of bin at left rval = mX[ploc + 1] # magnitude of bin at right iploc = ploc + 0.5 * (lval - rval) / (lval - 2 * val + rval) # center of parabola ipmag = val - 0.25 * (lval - rval) * (iploc - ploc) # magnitude of peaks - ipphase = np.interp(iploc, np.arange(0, pX.size), pX) # phase of peaks by linear interpolation + ipphase = np.interp( + iploc, np.arange(0, pX.size), pX + ) # phase of peaks by linear interpolation return iploc, ipmag, ipphase def sinc(x, N): """ - Generate the main lobe of a sinc function (Dirichlet kernel) - x: array of indexes to compute; N: size of FFT to simulate - returns y: samples of the main lobe of a sinc function - """ + Generate the main lobe of a sinc function (Dirichlet kernel) + x: array of indexes to compute; N: size of FFT to simulate + returns y: samples of the main lobe of a sinc function + """ y = np.sin(N * x / 2) / np.sin(x / 2) # compute the sinc function y[np.isnan(y)] = N # avoid NaN if x == 0 @@ -151,10 +169,10 @@ def sinc(x, N): def genBhLobe(x): """ - Generate the main lobe of a Blackman-Harris window - x: bin positions to compute (real values) - returns y: main lobe os spectrum of a Blackman-Harris window - """ + Generate the main lobe of a Blackman-Harris window + x: bin positions to compute (real values) + returns y: main lobe os spectrum of a Blackman-Harris window + """ N = 512 # size of fft to use f = x * np.pi * 2 / N # frequency sampling @@ -162,18 +180,20 @@ def genBhLobe(x): y = np.zeros(x.size) # initialize window consts = [0.35875, 0.48829, 0.14128, 0.01168] # window constants for m in range(0, 4): # iterate over the four sincs to sum - y += consts[m] / 2 * (sinc(f - df * m, N) + sinc(f + df * m, N)) # sum of scaled sinc functions + y += ( + consts[m] / 2 * (sinc(f - df * m, N) + sinc(f + df * m, N)) + ) # sum of scaled sinc functions y = y / N / consts[0] # normalize return y def genSpecSines(ipfreq, ipmag, ipphase, N, fs): """ - Generate a spectrum from a series of sine values, calling a C function - ipfreq, ipmag, ipphase: sine peaks frequencies, magnitudes and phases - N: size of the complex spectrum to generate; fs: sampling frequency - returns Y: generated complex spectrum of sines - """ + Generate a spectrum from a series of sine values, calling a C function + ipfreq, ipmag, ipphase: sine peaks frequencies, magnitudes and phases + N: size of the complex spectrum to generate; fs: sampling frequency + returns Y: generated complex spectrum of sines + """ Y = UF_C.genSpecSines(N * ipfreq / float(fs), ipmag, ipphase, N) return Y @@ -181,41 +201,50 @@ def genSpecSines(ipfreq, ipmag, ipphase, N, fs): def genSpecSines_p(ipfreq, ipmag, ipphase, N, fs): """ - Generate a spectrum from a series of sine values - iploc, ipmag, ipphase: sine peaks locations, magnitudes and phases - N: size of the complex spectrum to generate; fs: sampling rate - returns Y: generated complex spectrum of sines - """ + Generate a spectrum from a series of sine values + iploc, ipmag, ipphase: sine peaks locations, magnitudes and phases + N: size of the complex spectrum to generate; fs: sampling rate + returns Y: generated complex spectrum of sines + """ Y = np.zeros(N, dtype=complex) # initialize output complex spectrum hN = N // 2 # size of positive freq. spectrum for i in range(0, ipfreq.size): # generate all sine spectral lobes loc = N * ipfreq[i] / fs # it should be in range ]0,hN-1[ - if loc == 0 or loc > hN - 1: continue + if loc == 0 or loc > hN - 1: + continue binremainder = round(loc) - loc - lb = np.arange(binremainder - 4, binremainder + 5) # main lobe (real value) bins to read - lmag = genBhLobe(lb) * 10 ** (ipmag[i] / 20) # lobe magnitudes of the complex exponential - b = np.arange(round(loc) - 4, round(loc) + 5, dtype='int') + lb = np.arange( + binremainder - 4, binremainder + 5 + ) # main lobe (real value) bins to read + lmag = genBhLobe(lb) * 10 ** ( + ipmag[i] / 20 + ) # lobe magnitudes of the complex exponential + b = np.arange(round(loc) - 4, round(loc) + 5, dtype="int") for m in range(0, 9): if b[m] < 0: # peak lobe crosses DC bin Y[-b[m]] += lmag[m] * np.exp(-1j * ipphase[i]) elif b[m] > hN: # peak lobe croses Nyquist bin Y[b[m]] += lmag[m] * np.exp(-1j * ipphase[i]) elif b[m] == 0 or b[m] == hN: # peak lobe in the limits of the spectrum - Y[b[m]] += lmag[m] * np.exp(1j * ipphase[i]) + lmag[m] * np.exp(-1j * ipphase[i]) + Y[b[m]] += lmag[m] * np.exp(1j * ipphase[i]) + lmag[m] * np.exp( + -1j * ipphase[i] + ) else: # peak lobe in positive freq. range Y[b[m]] += lmag[m] * np.exp(1j * ipphase[i]) - Y[hN + 1:] = Y[hN - 1:0:-1].conjugate() # fill the negative part of the spectrum + Y[hN + 1 :] = Y[ + hN - 1 : 0 : -1 + ].conjugate() # fill the negative part of the spectrum return Y def sinewaveSynth(freqs, amp, H, fs): """ - Synthesis of one sinusoid with time-varying frequency - freqs, amps: array of frequencies and amplitudes of sinusoids - H: hop size, fs: sampling rate - returns y: output array sound - """ + Synthesis of one sinusoid with time-varying frequency + freqs, amps: array of frequencies and amplitudes of sinusoids + H: hop size, fs: sampling rate + returns y: output array sound + """ t = np.arange(H) / float(fs) # time array lastphase = 0 # initialize synthesis phase @@ -230,7 +259,7 @@ def sinewaveSynth(freqs, amp, H, fs): freq = np.ones(H) * freqs[l] elif (lastfreq > 0) & (freqs[l] > 0): # if freqs in boundaries use both A = np.ones(H) * amp - if (lastfreq == freqs[l]): + if lastfreq == freqs[l]: freq = np.ones(H) * lastfreq else: freq = np.arange(lastfreq, freqs[l], (freqs[l] - lastfreq) / H) @@ -240,75 +269,93 @@ def sinewaveSynth(freqs, amp, H, fs): phase = 2 * np.pi * freq * t + lastphase # generate phase values yh = A * np.cos(phase) # compute sine for one frame lastfreq = freqs[l] # save frequency for phase propagation - lastphase = np.remainder(phase[H - 1], 2 * np.pi) # save phase to be use for next frame + lastphase = np.remainder( + phase[H - 1], 2 * np.pi + ) # save phase to be use for next frame y = np.append(y, yh) # append frame to previous one return y def cleaningTrack(track, minTrackLength=3): """ - Delete fragments of one single track smaller than minTrackLength - track: array of values; minTrackLength: minimum duration of tracks in number of frames - returns cleanTrack: array of clean values - """ + Delete fragments of one single track smaller than minTrackLength + track: array of values; minTrackLength: minimum duration of tracks in number of frames + returns cleanTrack: array of clean values + """ nFrames = track.size # number of frames cleanTrack = np.copy(track) # copy array - trackBegs = np.nonzero((track[:nFrames - 1] <= 0) # beginning of track contours - & (track[1:] > 0))[0] + 1 + trackBegs = ( + np.nonzero( + (track[: nFrames - 1] <= 0) & (track[1:] > 0) # beginning of track contours + )[0] + + 1 + ) if track[0] > 0: trackBegs = np.insert(trackBegs, 0, 0) - trackEnds = np.nonzero((track[:nFrames - 1] > 0) & (track[1:] <= 0))[0] + 1 + trackEnds = np.nonzero((track[: nFrames - 1] > 0) & (track[1:] <= 0))[0] + 1 if track[nFrames - 1] > 0: trackEnds = np.append(trackEnds, nFrames - 1) trackLengths = 1 + trackEnds - trackBegs # lengths of trach contours for i, j in zip(trackBegs, trackLengths): # delete short track contours if j <= minTrackLength: - cleanTrack[i:i + j] = 0 + cleanTrack[i : i + j] = 0 return cleanTrack def f0Twm(pfreq, pmag, ef0max, minf0, maxf0, f0t=0): """ - Function that wraps the f0 detection function TWM, selecting the possible f0 candidates - and calling the function TWM with them - pfreq, pmag: peak frequencies and magnitudes, - ef0max: maximum error allowed, minf0, maxf0: minimum and maximum f0 - f0t: f0 of previous frame if stable - returns f0: fundamental frequency in Hz - """ - if (minf0 < 0): # raise exception if minf0 is smaller than 0 + Function that wraps the f0 detection function TWM, selecting the possible f0 candidates + and calling the function TWM with them + pfreq, pmag: peak frequencies and magnitudes, + ef0max: maximum error allowed, minf0, maxf0: minimum and maximum f0 + f0t: f0 of previous frame if stable + returns f0: fundamental frequency in Hz + """ + if minf0 < 0: # raise exception if minf0 is smaller than 0 raise ValueError("Minimum fundamental frequency (minf0) smaller than 0") - if (maxf0 >= 10000): # raise exception if maxf0 is bigger than 10000Hz + if maxf0 >= 10000: # raise exception if maxf0 is bigger than 10000Hz raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz") - if (pfreq.size < 3) & (f0t == 0): # return 0 if less than 3 peaks and not previous f0 + if (pfreq.size < 3) & ( + f0t == 0 + ): # return 0 if less than 3 peaks and not previous f0 return 0 - f0c = np.argwhere((pfreq > minf0) & (pfreq < maxf0))[:, 0] # use only peaks within given range - if (f0c.size == 0): # return 0 if no peaks within range + f0c = np.argwhere((pfreq > minf0) & (pfreq < maxf0))[ + :, 0 + ] # use only peaks within given range + if f0c.size == 0: # return 0 if no peaks within range return 0 f0cf = pfreq[f0c] # frequencies of peak candidates f0cm = pmag[f0c] # magnitude of peak candidates if f0t > 0: # if stable f0 in previous frame - shortlist = np.argwhere(np.abs(f0cf - f0t) < f0t / 2.0)[:, 0] # use only peaks close to it + shortlist = np.argwhere(np.abs(f0cf - f0t) < f0t / 2.0)[ + :, 0 + ] # use only peaks close to it maxc = np.argmax(f0cm) maxcfd = f0cf[maxc] % f0t if maxcfd > f0t / 2: maxcfd = f0t - maxcfd - if (maxc not in shortlist) and (maxcfd > (f0t / 4)): # or the maximum magnitude peak is not a harmonic + if (maxc not in shortlist) and ( + maxcfd > (f0t / 4) + ): # or the maximum magnitude peak is not a harmonic shortlist = np.append(maxc, shortlist) f0cf = f0cf[shortlist] # frequencies of candidates - if (f0cf.size == 0): # return 0 if no peak candidates + if f0cf.size == 0: # return 0 if no peak candidates return 0 - f0, f0error = UF_C.twm(pfreq, pmag, f0cf) # call the TWM function with peak candidates, cython version - # f0, f0error = TWM_p(pfreq, pmag, f0cf) # call the TWM function with peak candidates, python version + f0, f0error = UF_C.twm( + pfreq, pmag, f0cf + ) # call the TWM function with peak candidates, cython version + # f0, f0error = TWM_p(pfreq, pmag, f0cf) # call the TWM function with peak candidates, python version - if (f0 > 0) and (f0error < ef0max): # accept and return f0 if below max error allowed + if (f0 > 0) and ( + f0error < ef0max + ): # accept and return f0 if below max error allowed return f0 else: return 0 @@ -316,12 +363,12 @@ def f0Twm(pfreq, pmag, ef0max, minf0, maxf0, f0t=0): def TWM_p(pfreq, pmag, f0c): """ - Two-way mismatch algorithm for f0 detection (by Beauchamp&Maher) - [better to use the C version of this function: UF_C.twm] - pfreq, pmag: peak frequencies in Hz and magnitudes, - f0c: frequencies of f0 candidates - returns f0, f0Error: fundamental frequency detected and its error - """ + Two-way mismatch algorithm for f0 detection (by Beauchamp&Maher) + [better to use the C version of this function: UF_C.twm] + pfreq, pmag: peak frequencies in Hz and magnitudes, + f0c: frequencies of f0 candidates + returns f0, f0Error: fundamental frequency detected and its error + """ p = 0.5 # weighting by frequency value q = 1.4 # weighting related to magnitude of peaks @@ -363,60 +410,74 @@ def TWM_p(pfreq, pmag, f0c): def sineSubtraction(x, N, H, sfreq, smag, sphase, fs): """ - Subtract sinusoids from a sound - x: input sound, N: fft-size, H: hop-size - sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases - returns xr: residual sound - """ + Subtract sinusoids from a sound + x: input sound, N: fft-size, H: hop-size + sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases + returns xr: residual sound + """ hN = N // 2 # half of fft size - x = np.append(np.zeros(hN), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hN), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hN)) # add zeros at the end to analyze last sample bh = blackmanharris(N) # blackman harris window w = bh / sum(bh) # normalize window sw = np.zeros(N) # initialize synthesis window - sw[hN - H:hN + H] = triang(2 * H) / w[hN - H:hN + H] # synthesis window + sw[hN - H : hN + H] = triang(2 * H) / w[hN - H : hN + H] # synthesis window L = sfreq.shape[0] # number of frames, this works if no sines xr = np.zeros(x.size) # initialize output array pin = 0 for l in range(L): - xw = x[pin:pin + N] * w # window the input sound + xw = x[pin : pin + N] * w # window the input sound X = fft(fftshift(xw)) # compute FFT - Yh = UF_C.genSpecSines(N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N) # generate spec sines, cython version - # Yh = genSpecSines_p(N*sfreq[l,:]/fs, smag[l,:], sphase[l,:], N, fs) # generate spec sines, python version + Yh = UF_C.genSpecSines( + N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N + ) # generate spec sines, cython version + # Yh = genSpecSines_p(N*sfreq[l,:]/fs, smag[l,:], sphase[l,:], N, fs) # generate spec sines, python version Xr = X - Yh # subtract sines from original spectrum xrw = np.real(fftshift(ifft(Xr))) # inverse FFT - xr[pin:pin + N] += xrw * sw # overlap-add + xr[pin : pin + N] += xrw * sw # overlap-add pin += H # advance sound pointer - xr = np.delete(xr, range(hN)) # delete half of first window which was added in stftAnal - xr = np.delete(xr, range(xr.size - hN, xr.size)) # delete half of last window which was added in stftAnal + xr = np.delete( + xr, range(hN) + ) # delete half of first window which was added in stftAnal + xr = np.delete( + xr, range(xr.size - hN, xr.size) + ) # delete half of last window which was added in stftAnal return xr def stochasticResidualAnal(x, N, H, sfreq, smag, sphase, fs, stocf): """ - Subtract sinusoids from a sound and approximate the residual with an envelope - x: input sound, N: fft size, H: hop-size - sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases - fs: sampling rate; stocf: stochastic factor, used in the approximation - returns stocEnv: stochastic approximation of residual - """ + Subtract sinusoids from a sound and approximate the residual with an envelope + x: input sound, N: fft size, H: hop-size + sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases + fs: sampling rate; stocf: stochastic factor, used in the approximation + returns stocEnv: stochastic approximation of residual + """ hN = N // 2 # half of fft size - x = np.append(np.zeros(hN), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hN), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hN)) # add zeros at the end to analyze last sample bh = blackmanharris(N) # synthesis window w = bh / sum(bh) # normalize synthesis window L = sfreq.shape[0] # number of frames, this works if no sines pin = 0 for l in range(L): - xw = x[pin:pin + N] * w # window the input sound + xw = x[pin : pin + N] * w # window the input sound X = fft(fftshift(xw)) # compute FFT - Yh = UF_C.genSpecSines(N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N) # generate spec sines, cython version - # Yh = genSpecSines_p(N*sfreq[l,:]/fs, smag[l,:], sphase[l,:], N, fs) # generate spec sines, python version + Yh = UF_C.genSpecSines( + N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N + ) # generate spec sines, cython version + # Yh = genSpecSines_p(N*sfreq[l,:]/fs, smag[l,:], sphase[l,:], N, fs) # generate spec sines, python version Xr = X - Yh # subtract sines from original spectrum mXr = 20 * np.log10(abs(Xr[:hN])) # magnitude spectrum of residual - mXrenv = resample(np.maximum(-200, mXr), mXr.size * stocf) # decimate the mag spectrum + mXrenv = resample( + np.maximum(-200, mXr), mXr.size * stocf + ) # decimate the mag spectrum if l == 0: # if first frame stocEnv = np.array([mXrenv]) else: # rest of frames diff --git a/smstools/transformations/harmonicTransformations.py b/smstools/transformations/harmonicTransformations.py index 2872adcf..862bf4c1 100644 --- a/smstools/transformations/harmonicTransformations.py +++ b/smstools/transformations/harmonicTransformations.py @@ -4,27 +4,33 @@ from scipy.interpolate import interp1d -def harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs): +def harmonicFreqScaling( + hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs +): """ - Frequency scaling of the harmonics of a sound - hfreq, hmag: frequencies and magnitudes of input harmonics - freqScaling: scaling factors, in time-value pairs (value of 1 no scaling) - freqStretching: stretching factors, in time-value pairs (value of 1 no stretching) - timbrePreservation: 0 no timbre preservation, 1 timbre preservation - fs: sampling rate of input sound - returns yhfreq, yhmag: frequencies and magnitudes of output harmonics - """ - if (freqScaling.size % 2 != 0): # raise exception if array not even length + Frequency scaling of the harmonics of a sound + hfreq, hmag: frequencies and magnitudes of input harmonics + freqScaling: scaling factors, in time-value pairs (value of 1 no scaling) + freqStretching: stretching factors, in time-value pairs (value of 1 no stretching) + timbrePreservation: 0 no timbre preservation, 1 timbre preservation + fs: sampling rate of input sound + returns yhfreq, yhmag: frequencies and magnitudes of output harmonics + """ + if freqScaling.size % 2 != 0: # raise exception if array not even length raise ValueError("Frequency scaling array does not have an even size") - if (freqStretching.size % 2 != 0): # raise exception if array not even length + if freqStretching.size % 2 != 0: # raise exception if array not even length raise ValueError("Frequency stretching array does not have an even size") L = hfreq.shape[0] # number of frames # create interpolation object with the scaling values - freqScalingEnv = np.interp(np.arange(L), L * freqScaling[::2] / freqScaling[-2], freqScaling[1::2]) + freqScalingEnv = np.interp( + np.arange(L), L * freqScaling[::2] / freqScaling[-2], freqScaling[1::2] + ) # create interpolation object with the stretching values - freqStretchingEnv = np.interp(np.arange(L), L * freqStretching[::2] / freqStretching[-2], freqStretching[1::2]) + freqStretchingEnv = np.interp( + np.arange(L), L * freqStretching[::2] / freqStretching[-2], freqStretching[1::2] + ) yhfreq = np.zeros_like(hfreq) # create empty output matrix yhmag = np.zeros_like(hmag) # create empty output matrix for l in range(L): # go through all frames @@ -36,11 +42,19 @@ def harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreserva x_vals = np.append(np.append(0, hfreq[l, ind_valid]), fs / 2) # values of harmonic magnitudes to be considered for interpolation y_vals = np.append(np.append(hmag[l, 0], hmag[l, ind_valid]), hmag[l, -1]) - specEnvelope = interp1d(x_vals, y_vals, kind='linear', bounds_error=False, fill_value=-100) - yhfreq[l, ind_valid] = hfreq[l, ind_valid] * freqScalingEnv[l] # scale frequencies - yhfreq[l, ind_valid] = yhfreq[l, ind_valid] * (freqStretchingEnv[l] ** ind_valid) # stretch frequencies + specEnvelope = interp1d( + x_vals, y_vals, kind="linear", bounds_error=False, fill_value=-100 + ) + yhfreq[l, ind_valid] = ( + hfreq[l, ind_valid] * freqScalingEnv[l] + ) # scale frequencies + yhfreq[l, ind_valid] = yhfreq[l, ind_valid] * ( + freqStretchingEnv[l] ** ind_valid + ) # stretch frequencies if (timbrePreservation == 1) & (ind_valid.size > 1): # if timbre preservation - yhmag[l, ind_valid] = specEnvelope(yhfreq[l, ind_valid]) # change amplitudes to maintain timbre + yhmag[l, ind_valid] = specEnvelope( + yhfreq[l, ind_valid] + ) # change amplitudes to maintain timbre else: yhmag[l, ind_valid] = hmag[l, ind_valid] # use same amplitudes as input return yhfreq, yhmag diff --git a/smstools/transformations/hpsTransformations.py b/smstools/transformations/hpsTransformations.py index e3bfa98c..a1009736 100644 --- a/smstools/transformations/hpsTransformations.py +++ b/smstools/transformations/hpsTransformations.py @@ -3,82 +3,104 @@ import numpy as np from scipy.interpolate import interp1d + def hpsTimeScale(hfreq, hmag, stocEnv, timeScaling): - """ - Time scaling of the harmonic plus stochastic representation - hfreq, hmag: harmonic frequencies and magnitudes, stocEnv: residual envelope - timeScaling: scaling factors, in time-value pairs - returns yhfreq, yhmag, ystocEnv: hps output representation - """ + """ + Time scaling of the harmonic plus stochastic representation + hfreq, hmag: harmonic frequencies and magnitudes, stocEnv: residual envelope + timeScaling: scaling factors, in time-value pairs + returns yhfreq, yhmag, ystocEnv: hps output representation + """ + + if timeScaling.size % 2 != 0: # raise exception if array not even length + raise ValueError("Time scaling array does not have an even size") + + L = hfreq[:, 0].size # number of input frames + maxInTime = max(timeScaling[::2]) # maximum value used as input times + maxOutTime = max(timeScaling[1::2]) # maximum value used in output times + outL = int(L * maxOutTime / maxInTime) # number of output frames + inFrames = (L - 1) * timeScaling[::2] / maxInTime # input time values in frames + outFrames = outL * timeScaling[1::2] / maxOutTime # output time values in frames + timeScalingEnv = interp1d( + outFrames, inFrames, fill_value=0 + ) # interpolation function + indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output + yhfreq = np.zeros((indexes.shape[0], hfreq.shape[1])) # allocate space for yhfreq + yhmag = np.zeros((indexes.shape[0], hmag.shape[1])) # allocate space for yhmag + ystocEnv = np.zeros( + (indexes.shape[0], stocEnv.shape[1]) + ) # allocate space for ystocEnv + frameIdx = 0 + for l in indexes[1:]: # iterate over all output frame indexes + yhfreq[frameIdx, :] = hfreq[int(round(l)), :] # get the closest input frame + yhmag[frameIdx, :] = hmag[int(round(l)), :] # get the closest input frame + ystocEnv[frameIdx, :] = stocEnv[int(round(l)), :] # get the closest input frame + frameIdx += 1 + return yhfreq, yhmag, ystocEnv + + +def hpsMorph( + hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp +): + """ + Morph between two sounds using the harmonic plus stochastic model + hfreq1, hmag1, stocEnv1: hps representation of sound 1 + hfreq2, hmag2, stocEnv2: hps representation of sound 2 + hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + returns yhfreq, yhmag, ystocEnv: hps output representation + """ + + if hfreqIntp.size % 2 != 0: # raise exception if array not even length + raise ValueError( + "Harmonic frequencies interpolation array does not have an even size" + ) + + if hmagIntp.size % 2 != 0: # raise exception if array not even length + raise ValueError("Harmonic magnitudes interpolation does not have an even size") - if (timeScaling.size % 2 != 0): # raise exception if array not even length - raise ValueError("Time scaling array does not have an even size") - - L = hfreq[:,0].size # number of input frames - maxInTime = max(timeScaling[::2]) # maximum value used as input times - maxOutTime = max(timeScaling[1::2]) # maximum value used in output times - outL = int(L*maxOutTime/maxInTime) # number of output frames - inFrames = (L-1)*timeScaling[::2]/maxInTime # input time values in frames - outFrames = outL*timeScaling[1::2]/maxOutTime # output time values in frames - timeScalingEnv = interp1d(outFrames, inFrames, fill_value=0) # interpolation function - indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output - yhfreq = np.zeros((indexes.shape[0], hfreq.shape[1])) # allocate space for yhfreq - yhmag = np.zeros((indexes.shape[0], hmag.shape[1])) # allocate space for yhmag - ystocEnv = np.zeros((indexes.shape[0], stocEnv.shape[1]))# allocate space for ystocEnv - frameIdx = 0 - for l in indexes[1:]: # iterate over all output frame indexes - yhfreq[frameIdx,:] = hfreq[int(round(l)),:] # get the closest input frame - yhmag[frameIdx,:] = hmag[int(round(l)),:] # get the closest input frame - ystocEnv[frameIdx,:] = stocEnv[int(round(l)),:] # get the closest input frame - frameIdx += 1 - return yhfreq, yhmag, ystocEnv - - -def hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp): - """ - Morph between two sounds using the harmonic plus stochastic model - hfreq1, hmag1, stocEnv1: hps representation of sound 1 - hfreq2, hmag2, stocEnv2: hps representation of sound 2 - hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - returns yhfreq, yhmag, ystocEnv: hps output representation - """ + if stocIntp.size % 2 != 0: # raise exception if array not even length + raise ValueError("Stochastic component array does not have an even size") - if (hfreqIntp.size % 2 != 0): # raise exception if array not even length - raise ValueError("Harmonic frequencies interpolation array does not have an even size") - - if (hmagIntp.size % 2 != 0): # raise exception if array not even length - raise ValueError("Harmonic magnitudes interpolation does not have an even size") - - if (stocIntp.size % 2 != 0): # raise exception if array not even length - raise ValueError("Stochastic component array does not have an even size") - - L1 = hfreq1[:,0].size # number of frames of sound 1 - L2 = hfreq2[:,0].size # number of frames of sound 2 - hfreqIntp[::2] = (L1-1)*hfreqIntp[::2]/hfreqIntp[-2] # normalize input values - hmagIntp[::2] = (L1-1)*hmagIntp[::2]/hmagIntp[-2] # normalize input values - stocIntp[::2] = (L1-1)*stocIntp[::2]/stocIntp[-2] # normalize input values - hfreqIntpEnv = interp1d(hfreqIntp[0::2], hfreqIntp[1::2], fill_value=0) # interpolation function - hfreqIndexes = hfreqIntpEnv(np.arange(L1)) # generate frame indexes for the output - hmagIntpEnv = interp1d(hmagIntp[0::2], hmagIntp[1::2], fill_value=0) # interpolation function - hmagIndexes = hmagIntpEnv(np.arange(L1)) # generate frame indexes for the output - stocIntpEnv = interp1d(stocIntp[0::2], stocIntp[1::2], fill_value=0) # interpolation function - stocIndexes = stocIntpEnv(np.arange(L1)) # generate frame indexes for the output - yhfreq = np.zeros_like(hfreq1) # create empty output matrix - yhmag = np.zeros_like(hmag1) # create empty output matrix - ystocEnv = np.zeros_like(stocEnv1) # create empty output matrix - - for l in range(L1): # generate morphed frames - dataIndex = int(round(((L2-1)*l)/float(L1-1))) - # identify harmonics that are present in both frames - harmonics = np.intersect1d(np.array(np.nonzero(hfreq1[l,:]), dtype=np.int)[0], np.array(np.nonzero(hfreq2[dataIndex,:]), dtype=np.int)[0]) - # interpolate the frequencies of the existing harmonics - yhfreq[l,harmonics] = (1-hfreqIndexes[l])* hfreq1[l,harmonics] + hfreqIndexes[l] * hfreq2[dataIndex,harmonics] - # interpolate the magnitudes of the existing harmonics - yhmag[l,harmonics] = (1-hmagIndexes[l])* hmag1[l,harmonics] + hmagIndexes[l] * hmag2[dataIndex,harmonics] - # interpolate the stochastic envelopes of both frames - ystocEnv[l,:] = (1-stocIndexes[l])* stocEnv1[l,:] + stocIndexes[l] * stocEnv2[dataIndex,:] - return yhfreq, yhmag, ystocEnv - + L1 = hfreq1[:, 0].size # number of frames of sound 1 + L2 = hfreq2[:, 0].size # number of frames of sound 2 + hfreqIntp[::2] = (L1 - 1) * hfreqIntp[::2] / hfreqIntp[-2] # normalize input values + hmagIntp[::2] = (L1 - 1) * hmagIntp[::2] / hmagIntp[-2] # normalize input values + stocIntp[::2] = (L1 - 1) * stocIntp[::2] / stocIntp[-2] # normalize input values + hfreqIntpEnv = interp1d( + hfreqIntp[0::2], hfreqIntp[1::2], fill_value=0 + ) # interpolation function + hfreqIndexes = hfreqIntpEnv(np.arange(L1)) # generate frame indexes for the output + hmagIntpEnv = interp1d( + hmagIntp[0::2], hmagIntp[1::2], fill_value=0 + ) # interpolation function + hmagIndexes = hmagIntpEnv(np.arange(L1)) # generate frame indexes for the output + stocIntpEnv = interp1d( + stocIntp[0::2], stocIntp[1::2], fill_value=0 + ) # interpolation function + stocIndexes = stocIntpEnv(np.arange(L1)) # generate frame indexes for the output + yhfreq = np.zeros_like(hfreq1) # create empty output matrix + yhmag = np.zeros_like(hmag1) # create empty output matrix + ystocEnv = np.zeros_like(stocEnv1) # create empty output matrix + for l in range(L1): # generate morphed frames + dataIndex = int(round(((L2 - 1) * l) / float(L1 - 1))) + # identify harmonics that are present in both frames + harmonics = np.intersect1d( + np.array(np.nonzero(hfreq1[l, :]), dtype=np.int)[0], + np.array(np.nonzero(hfreq2[dataIndex, :]), dtype=np.int)[0], + ) + # interpolate the frequencies of the existing harmonics + yhfreq[l, harmonics] = (1 - hfreqIndexes[l]) * hfreq1[ + l, harmonics + ] + hfreqIndexes[l] * hfreq2[dataIndex, harmonics] + # interpolate the magnitudes of the existing harmonics + yhmag[l, harmonics] = (1 - hmagIndexes[l]) * hmag1[l, harmonics] + hmagIndexes[ + l + ] * hmag2[dataIndex, harmonics] + # interpolate the stochastic envelopes of both frames + ystocEnv[l, :] = (1 - stocIndexes[l]) * stocEnv1[l, :] + stocIndexes[ + l + ] * stocEnv2[dataIndex, :] + return yhfreq, yhmag, ystocEnv diff --git a/smstools/transformations/interface/harmonicTransformations_GUI_frame.py b/smstools/transformations/interface/harmonicTransformations_GUI_frame.py index 7a4967dd..2181b7cf 100644 --- a/smstools/transformations/interface/harmonicTransformations_GUI_frame.py +++ b/smstools/transformations/interface/harmonicTransformations_GUI_frame.py @@ -8,233 +8,333 @@ from smstools.transformations.interface import harmonicTransformations_function as hT from smstools.models import utilFunctions as UF + class HarmonicTransformations_frame: - def __init__(self, parent): - - self.parent = parent - self.initUI() - - def initUI(self): - - choose_label = "inputFile:" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) - - #TEXTBOX TO PRINT PATH OF THE SOUND FILE - self.filelocation = Entry(self.parent) - self.filelocation.focus_set() - self.filelocation["width"] = 32 - self.filelocation.grid(row=0,column=0, sticky=W, padx=(70, 5), pady=(10,2)) - self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/vignesh.wav') - - #BUTTON TO BROWSE SOUND FILE - open_file = Button(self.parent, text="...", command=self.browse_file) #see: def browse_file(self) - open_file.grid(row=0, column=0, sticky=W, padx=(340, 6), pady=(10,2)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE - preview = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation.get())) - preview.grid(row=0, column=0, sticky=W, padx=(385,6), pady=(10,2)) - - ## HARMONIC TRANSFORMATIONS ANALYSIS - - #ANALYSIS WINDOW TYPE - wtype_label = "window:" - Label(self.parent, text=wtype_label).grid(row=1, column=0, sticky=W, padx=5, pady=(10,2)) - self.w_type = StringVar() - self.w_type.set("blackman") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") - window_option.grid(row=1, column=0, sticky=W, padx=(65,5), pady=(10,2)) - - #WINDOW SIZE - M_label = "M:" - Label(self.parent, text=M_label).grid(row=1, column=0, sticky=W, padx=(180, 5), pady=(10,2)) - self.M = Entry(self.parent, justify=CENTER) - self.M["width"] = 5 - self.M.grid(row=1,column=0, sticky=W, padx=(200,5), pady=(10,2)) - self.M.delete(0, END) - self.M.insert(0, "1201") - - #FFT SIZE - N_label = "N:" - Label(self.parent, text=N_label).grid(row=1, column=0, sticky=W, padx=(255, 5), pady=(10,2)) - self.N = Entry(self.parent, justify=CENTER) - self.N["width"] = 5 - self.N.grid(row=1,column=0, sticky=W, padx=(275,5), pady=(10,2)) - self.N.delete(0, END) - self.N.insert(0, "2048") - - #THRESHOLD MAGNITUDE - t_label = "t:" - Label(self.parent, text=t_label).grid(row=1, column=0, sticky=W, padx=(330,5), pady=(10,2)) - self.t = Entry(self.parent, justify=CENTER) - self.t["width"] = 5 - self.t.grid(row=1, column=0, sticky=W, padx=(348,5), pady=(10,2)) - self.t.delete(0, END) - self.t.insert(0, "-90") - - #MIN DURATION SINUSOIDAL TRACKS - minSineDur_label = "minSineDur:" - Label(self.parent, text=minSineDur_label).grid(row=2, column=0, sticky=W, padx=(5, 5), pady=(10,2)) - self.minSineDur = Entry(self.parent, justify=CENTER) - self.minSineDur["width"] = 5 - self.minSineDur.grid(row=2, column=0, sticky=W, padx=(87,5), pady=(10,2)) - self.minSineDur.delete(0, END) - self.minSineDur.insert(0, "0.1") - - #MAX NUMBER OF HARMONICS - nH_label = "nH:" - Label(self.parent, text=nH_label).grid(row=2, column=0, sticky=W, padx=(145,5), pady=(10,2)) - self.nH = Entry(self.parent, justify=CENTER) - self.nH["width"] = 5 - self.nH.grid(row=2, column=0, sticky=W, padx=(172,5), pady=(10,2)) - self.nH.delete(0, END) - self.nH.insert(0, "100") - - #MIN FUNDAMENTAL FREQUENCY - minf0_label = "minf0:" - Label(self.parent, text=minf0_label).grid(row=2, column=0, sticky=W, padx=(227,5), pady=(10,2)) - self.minf0 = Entry(self.parent, justify=CENTER) - self.minf0["width"] = 5 - self.minf0.grid(row=2, column=0, sticky=W, padx=(275,5), pady=(10,2)) - self.minf0.delete(0, END) - self.minf0.insert(0, "130") - - #MAX FUNDAMENTAL FREQUENCY - maxf0_label = "maxf0:" - Label(self.parent, text=maxf0_label).grid(row=2, column=0, sticky=W, padx=(330,5), pady=(10,2)) - self.maxf0 = Entry(self.parent, justify=CENTER) - self.maxf0["width"] = 5 - self.maxf0.grid(row=2, column=0, sticky=W, padx=(380,5), pady=(10,2)) - self.maxf0.delete(0, END) - self.maxf0.insert(0, "300") - - #MAX ERROR ACCEPTED - f0et_label = "f0et:" - Label(self.parent, text=f0et_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10,2)) - self.f0et = Entry(self.parent, justify=CENTER) - self.f0et["width"] = 3 - self.f0et.grid(row=3, column=0, sticky=W, padx=(42,5), pady=(10,2)) - self.f0et.delete(0, END) - self.f0et.insert(0, "7") - - #ALLOWED DEVIATION OF HARMONIC TRACKS - harmDevSlope_label = "harmDevSlope:" - Label(self.parent, text=harmDevSlope_label).grid(row=3, column=0, sticky=W, padx=(90,5), pady=(10,2)) - self.harmDevSlope = Entry(self.parent, justify=CENTER) - self.harmDevSlope["width"] = 5 - self.harmDevSlope.grid(row=3, column=0, sticky=W, padx=(190,5), pady=(10,2)) - self.harmDevSlope.delete(0, END) - self.harmDevSlope.insert(0, "0.01") - - #BUTTON TO DO THE ANALYSIS OF THE SOUND - self.compute = Button(self.parent, text="Analysis/Synthesis", command=self.analysis) - self.compute.grid(row=4, column=0, padx=5, pady=(10,5), sticky=W) - - #BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_harmonicModel.wav')) - self.output.grid(row=4, column=0, padx=(145,5), pady=(10,5), sticky=W) - - ### - #SEPARATION LINE - Frame(self.parent,height=1,width=50,bg="black").grid(row=5, pady=5, sticky=W+E) - ### - - #FREQUENCY SCALING FACTORS - freqScaling_label = "Frequency scaling factors (time, value pairs):" - Label(self.parent, text=freqScaling_label).grid(row=6, column=0, sticky=W, padx=5, pady=(5,2)) - self.freqScaling = Entry(self.parent, justify=CENTER) - self.freqScaling["width"] = 35 - self.freqScaling.grid(row=7, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.freqScaling.delete(0, END) - self.freqScaling.insert(0, "[0, 2.0, 1, 0.3]") - - #FREQUENCY STRETCHING FACTORSharmonicModelTransformation - freqStretching_label = "Frequency stretching factors (time, value pairs):" - Label(self.parent, text=freqStretching_label).grid(row=8, column=0, sticky=W, padx=5, pady=(5,2)) - self.freqStretching = Entry(self.parent, justify=CENTER) - self.freqStretching["width"] = 35 - self.freqStretching.grid(row=9, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.freqStretching.delete(0, END) - self.freqStretching.insert(0, "[0, 1, 1, 1.5]") - - #TIMBRE PRESERVATION - timbrePreservation_label = "Timbre preservation (1 preserves original timbre, 0 it does not):" - Label(self.parent, text=timbrePreservation_label).grid(row=10, column=0, sticky=W, padx=5, pady=(5,2)) - self.timbrePreservation = Entry(self.parent, justify=CENTER) - self.timbrePreservation["width"] = 2 - self.timbrePreservation.grid(row=10, column=0, sticky=W+E, padx=(395,5), pady=(5,2)) - self.timbrePreservation.delete(0, END) - self.timbrePreservation.insert(0, "1") - - #TIME SCALING FACTORS - timeScaling_label = "Time scaling factors (time, value pairs):" - Label(self.parent, text=timeScaling_label).grid(row=11, column=0, sticky=W, padx=5, pady=(5,2)) - self.timeScaling = Entry(self.parent, justify=CENTER) - self.timeScaling["width"] = 35 - self.timeScaling.grid(row=12, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.timeScaling.delete(0, END) - self.timeScaling.insert(0, "[0, 0, 0.671, 0.671, 1.978, 1.978+1.0]") - - #BUTTON TO DO THE SYNTHESIS - self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis) - self.compute.grid(row=13, column=0, padx=5, pady=(10,15), sticky=W) - - #BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT - self.transf_output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_harmonicModelTransformation.wav')) - self.transf_output.grid(row=13, column=0, padx=(165,5), pady=(10,15), sticky=W) - - # define options for opening file - self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' - - def browse_file(self, tkFileDialog=None): - - self.filename = tkFileDialog.askopenfilename(**self.file_opt) - - #set the text of the self.filelocation - self.filelocation.delete(0, END) - self.filelocation.insert(0,self.filename) - - def analysis(self, tkMessageBox=None): - - try: - inputFile = self.filelocation.get() - window = self.w_type.get() - M = int(self.M.get()) - N = int(self.N.get()) - t = int(self.t.get()) - minSineDur = float(self.minSineDur.get()) - nH = int(self.nH.get()) - minf0 = int(self.minf0.get()) - maxf0 = int(self.maxf0.get()) - f0et = int(self.f0et.get()) - harmDevSlope = float(self.harmDevSlope.get()) - - self.inputFile, self.fs, self.hfreq, self.hmag = hT.analysis(inputFile, window, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) - - except ValueError: - tkMessageBox.showerror("Input values error", "Some parameters are incorrect") - - def transformation_synthesis(self): - - try: - inputFile = self.inputFile - fs = self.fs - hfreq = self.hfreq - hmag = self.hmag - freqScaling = np.array(eval(self.freqScaling.get())) - freqStretching = np.array(eval(self.freqStretching.get())) - timbrePreservation = int(self.timbrePreservation.get()) - timeScaling = np.array(eval(self.timeScaling.get())) - - hT.transformation_synthesis(inputFile, fs, hfreq, hmag, freqScaling, freqStretching, timbrePreservation, timeScaling) - - except ValueError as errorMessage: - messagebox.showerror("Input values error", errorMessage) - - except AttributeError: - messagebox.showerror("Analysis not computed", "First you must analyse the sound!") + def __init__(self, parent): + + self.parent = parent + self.initUI() + + def initUI(self): + + choose_label = "inputFile:" + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) + + # TEXTBOX TO PRINT PATH OF THE SOUND FILE + self.filelocation = Entry(self.parent) + self.filelocation.focus_set() + self.filelocation["width"] = 32 + self.filelocation.grid(row=0, column=0, sticky=W, padx=(70, 5), pady=(10, 2)) + self.filelocation.delete(0, END) + self.filelocation.insert(0, "../../sounds/vignesh.wav") + + # BUTTON TO BROWSE SOUND FILE + open_file = Button( + self.parent, text="...", command=self.browse_file + ) # see: def browse_file(self) + open_file.grid( + row=0, column=0, sticky=W, padx=(340, 6), pady=(10, 2) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE + preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) + preview.grid(row=0, column=0, sticky=W, padx=(385, 6), pady=(10, 2)) + + ## HARMONIC TRANSFORMATIONS ANALYSIS + + # ANALYSIS WINDOW TYPE + wtype_label = "window:" + Label(self.parent, text=wtype_label).grid( + row=1, column=0, sticky=W, padx=5, pady=(10, 2) + ) + self.w_type = StringVar() + self.w_type.set("blackman") # initial value + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hanning", + "hamming", + "blackman", + "blackmanharris", + ) + window_option.grid(row=1, column=0, sticky=W, padx=(65, 5), pady=(10, 2)) + + # WINDOW SIZE + M_label = "M:" + Label(self.parent, text=M_label).grid( + row=1, column=0, sticky=W, padx=(180, 5), pady=(10, 2) + ) + self.M = Entry(self.parent, justify=CENTER) + self.M["width"] = 5 + self.M.grid(row=1, column=0, sticky=W, padx=(200, 5), pady=(10, 2)) + self.M.delete(0, END) + self.M.insert(0, "1201") + + # FFT SIZE + N_label = "N:" + Label(self.parent, text=N_label).grid( + row=1, column=0, sticky=W, padx=(255, 5), pady=(10, 2) + ) + self.N = Entry(self.parent, justify=CENTER) + self.N["width"] = 5 + self.N.grid(row=1, column=0, sticky=W, padx=(275, 5), pady=(10, 2)) + self.N.delete(0, END) + self.N.insert(0, "2048") + + # THRESHOLD MAGNITUDE + t_label = "t:" + Label(self.parent, text=t_label).grid( + row=1, column=0, sticky=W, padx=(330, 5), pady=(10, 2) + ) + self.t = Entry(self.parent, justify=CENTER) + self.t["width"] = 5 + self.t.grid(row=1, column=0, sticky=W, padx=(348, 5), pady=(10, 2)) + self.t.delete(0, END) + self.t.insert(0, "-90") + + # MIN DURATION SINUSOIDAL TRACKS + minSineDur_label = "minSineDur:" + Label(self.parent, text=minSineDur_label).grid( + row=2, column=0, sticky=W, padx=(5, 5), pady=(10, 2) + ) + self.minSineDur = Entry(self.parent, justify=CENTER) + self.minSineDur["width"] = 5 + self.minSineDur.grid(row=2, column=0, sticky=W, padx=(87, 5), pady=(10, 2)) + self.minSineDur.delete(0, END) + self.minSineDur.insert(0, "0.1") + + # MAX NUMBER OF HARMONICS + nH_label = "nH:" + Label(self.parent, text=nH_label).grid( + row=2, column=0, sticky=W, padx=(145, 5), pady=(10, 2) + ) + self.nH = Entry(self.parent, justify=CENTER) + self.nH["width"] = 5 + self.nH.grid(row=2, column=0, sticky=W, padx=(172, 5), pady=(10, 2)) + self.nH.delete(0, END) + self.nH.insert(0, "100") + + # MIN FUNDAMENTAL FREQUENCY + minf0_label = "minf0:" + Label(self.parent, text=minf0_label).grid( + row=2, column=0, sticky=W, padx=(227, 5), pady=(10, 2) + ) + self.minf0 = Entry(self.parent, justify=CENTER) + self.minf0["width"] = 5 + self.minf0.grid(row=2, column=0, sticky=W, padx=(275, 5), pady=(10, 2)) + self.minf0.delete(0, END) + self.minf0.insert(0, "130") + + # MAX FUNDAMENTAL FREQUENCY + maxf0_label = "maxf0:" + Label(self.parent, text=maxf0_label).grid( + row=2, column=0, sticky=W, padx=(330, 5), pady=(10, 2) + ) + self.maxf0 = Entry(self.parent, justify=CENTER) + self.maxf0["width"] = 5 + self.maxf0.grid(row=2, column=0, sticky=W, padx=(380, 5), pady=(10, 2)) + self.maxf0.delete(0, END) + self.maxf0.insert(0, "300") + + # MAX ERROR ACCEPTED + f0et_label = "f0et:" + Label(self.parent, text=f0et_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) + self.f0et = Entry(self.parent, justify=CENTER) + self.f0et["width"] = 3 + self.f0et.grid(row=3, column=0, sticky=W, padx=(42, 5), pady=(10, 2)) + self.f0et.delete(0, END) + self.f0et.insert(0, "7") + + # ALLOWED DEVIATION OF HARMONIC TRACKS + harmDevSlope_label = "harmDevSlope:" + Label(self.parent, text=harmDevSlope_label).grid( + row=3, column=0, sticky=W, padx=(90, 5), pady=(10, 2) + ) + self.harmDevSlope = Entry(self.parent, justify=CENTER) + self.harmDevSlope["width"] = 5 + self.harmDevSlope.grid(row=3, column=0, sticky=W, padx=(190, 5), pady=(10, 2)) + self.harmDevSlope.delete(0, END) + self.harmDevSlope.insert(0, "0.01") + + # BUTTON TO DO THE ANALYSIS OF THE SOUND + self.compute = Button( + self.parent, text="Analysis/Synthesis", command=self.analysis + ) + self.compute.grid(row=4, column=0, padx=5, pady=(10, 5), sticky=W) + + # BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_harmonicModel.wav" + ), + ) + self.output.grid(row=4, column=0, padx=(145, 5), pady=(10, 5), sticky=W) + + ### + # SEPARATION LINE + Frame(self.parent, height=1, width=50, bg="black").grid( + row=5, pady=5, sticky=W + E + ) + ### + + # FREQUENCY SCALING FACTORS + freqScaling_label = "Frequency scaling factors (time, value pairs):" + Label(self.parent, text=freqScaling_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.freqScaling = Entry(self.parent, justify=CENTER) + self.freqScaling["width"] = 35 + self.freqScaling.grid(row=7, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.freqScaling.delete(0, END) + self.freqScaling.insert(0, "[0, 2.0, 1, 0.3]") + + # FREQUENCY STRETCHING FACTORSharmonicModelTransformation + freqStretching_label = "Frequency stretching factors (time, value pairs):" + Label(self.parent, text=freqStretching_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.freqStretching = Entry(self.parent, justify=CENTER) + self.freqStretching["width"] = 35 + self.freqStretching.grid(row=9, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.freqStretching.delete(0, END) + self.freqStretching.insert(0, "[0, 1, 1, 1.5]") + + # TIMBRE PRESERVATION + timbrePreservation_label = ( + "Timbre preservation (1 preserves original timbre, 0 it does not):" + ) + Label(self.parent, text=timbrePreservation_label).grid( + row=10, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.timbrePreservation = Entry(self.parent, justify=CENTER) + self.timbrePreservation["width"] = 2 + self.timbrePreservation.grid( + row=10, column=0, sticky=W + E, padx=(395, 5), pady=(5, 2) + ) + self.timbrePreservation.delete(0, END) + self.timbrePreservation.insert(0, "1") + + # TIME SCALING FACTORS + timeScaling_label = "Time scaling factors (time, value pairs):" + Label(self.parent, text=timeScaling_label).grid( + row=11, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.timeScaling = Entry(self.parent, justify=CENTER) + self.timeScaling["width"] = 35 + self.timeScaling.grid(row=12, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.timeScaling.delete(0, END) + self.timeScaling.insert(0, "[0, 0, 0.671, 0.671, 1.978, 1.978+1.0]") + + # BUTTON TO DO THE SYNTHESIS + self.compute = Button( + self.parent, + text="Apply Transformation", + command=self.transformation_synthesis, + ) + self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W) + + # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT + self.transf_output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_harmonicModelTransformation.wav" + ), + ) + self.transf_output.grid( + row=13, column=0, padx=(165, 5), pady=(10, 15), sticky=W + ) + + # define options for opening file + self.file_opt = options = {} + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" + + def browse_file(self, tkFileDialog=None): + + self.filename = tkFileDialog.askopenfilename(**self.file_opt) + + # set the text of the self.filelocation + self.filelocation.delete(0, END) + self.filelocation.insert(0, self.filename) + + def analysis(self, tkMessageBox=None): + + try: + inputFile = self.filelocation.get() + window = self.w_type.get() + M = int(self.M.get()) + N = int(self.N.get()) + t = int(self.t.get()) + minSineDur = float(self.minSineDur.get()) + nH = int(self.nH.get()) + minf0 = int(self.minf0.get()) + maxf0 = int(self.maxf0.get()) + f0et = int(self.f0et.get()) + harmDevSlope = float(self.harmDevSlope.get()) + + self.inputFile, self.fs, self.hfreq, self.hmag = hT.analysis( + inputFile, + window, + M, + N, + t, + minSineDur, + nH, + minf0, + maxf0, + f0et, + harmDevSlope, + ) + + except ValueError: + tkMessageBox.showerror( + "Input values error", "Some parameters are incorrect" + ) + + def transformation_synthesis(self): + + try: + inputFile = self.inputFile + fs = self.fs + hfreq = self.hfreq + hmag = self.hmag + freqScaling = np.array(eval(self.freqScaling.get())) + freqStretching = np.array(eval(self.freqStretching.get())) + timbrePreservation = int(self.timbrePreservation.get()) + timeScaling = np.array(eval(self.timeScaling.get())) + + hT.transformation_synthesis( + inputFile, + fs, + hfreq, + hmag, + freqScaling, + freqStretching, + timbrePreservation, + timeScaling, + ) + + except ValueError as errorMessage: + messagebox.showerror("Input values error", errorMessage) + + except AttributeError: + messagebox.showerror( + "Analysis not computed", "First you must analyse the sound!" + ) diff --git a/smstools/transformations/interface/harmonicTransformations_function.py b/smstools/transformations/interface/harmonicTransformations_function.py index a3092aea..2737821f 100644 --- a/smstools/transformations/interface/harmonicTransformations_function.py +++ b/smstools/transformations/interface/harmonicTransformations_function.py @@ -1,4 +1,6 @@ -block=False# function call to the transformation functions of relevance for the hpsModel +block = ( + False # function call to the transformation functions of relevance for the hpsModel +) import numpy as np import matplotlib.pyplot as plt @@ -10,158 +12,185 @@ from smstools.transformations import harmonicTransformations as HT from smstools.models import utilFunctions as UF -def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, - minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): - """ - Analyze a sound with the harmonic model - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size - N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks - minSineDur: minimum duration of sinusoidal tracks - nH: maximum number of harmonics - minf0: minimum fundamental frequency in sound - maxf0: maximum fundamental frequency in sound - f0et: maximum error accepted in f0 detection algorithm - harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation - returns inputFile: input file name; fs: sampling rate of input file, tfreq, - tmag: sinusoidal frequencies and magnitudes - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - fs, x = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # compute the harmonic model of the whole sound - hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) - - # synthesize the sines without original phases - y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) - - # output sound file (monophonic with sampling rate of 44100) - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' - - # write the sound resulting from the inverse stft - UF.wavwrite(y, fs, outputFile) - - # create figure to show plots - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 5000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - if (hfreq.shape[1] > 0): - plt.subplot(3,1,2) - tracks = np.copy(hfreq) - numFrames = tracks.shape[0] - frmTime = H*np.arange(numFrames)/float(fs) - tracks[tracks<=0] = np.nan - plt.plot(frmTime, tracks) - plt.axis([0, x.size/float(fs), 0, maxplotfreq]) - plt.title('frequencies of harmonic tracks') - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.show(block=False) - - return inputFile, fs, hfreq, hmag - - -def transformation_synthesis(inputFile, fs, hfreq, hmag, freqScaling = np.array([0, 2.0, 1, .3]), - freqStretching = np.array([0, 1, 1, 1.5]), timbrePreservation = 1, - timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])): - """ - Transform the analysis values returned by the analysis function and synthesize the sound - inputFile: name of input file - fs: sampling rate of input file - tfreq, tmag: sinusoidal frequencies and magnitudes - freqScaling: frequency scaling factors, in time-value pairs - freqStretchig: frequency stretching factors, in time-value pairs - timbrePreservation: 1 preserves original timbre, 0 it does not - timeScaling: time scaling factors, in time-value pairs - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # frequency scaling of the harmonics - yhfreq, yhmag = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) - - # time scale the sound - yhfreq, yhmag = ST.sineTimeScaling(yhfreq, yhmag, timeScaling) - - # synthesis - y = SM.sineModelSynth(yhfreq, yhmag, np.array([]), Ns, H, fs) - - # write output sound - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModelTransformation.wav' - UF.wavwrite(y, fs, outputFile) - - # create figure to plot - plt.figure(figsize=(12, 6)) - - # frequency range to plot - maxplotfreq = 15000.0 - - # plot the transformed sinusoidal frequencies - plt.subplot(2,1,1) - if (yhfreq.shape[1] > 0): - tracks = np.copy(yhfreq) - tracks = tracks*np.less(tracks, maxplotfreq) - tracks[tracks<=0] = np.nan - numFrames = int(tracks[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, tracks) - plt.title('transformed harmonic tracks') - plt.autoscale(tight=True) - - # plot the output sound - plt.subplot(2,1,2) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.show() -if __name__ == "__main__": - - # analysis - inputFile, fs, hfreq, hmag = analysis() +def analysis( + inputFile="../../sounds/vignesh.wav", + window="blackman", + M=1201, + N=2048, + t=-90, + minSineDur=0.1, + nH=100, + minf0=130, + maxf0=300, + f0et=7, + harmDevSlope=0.01, +): + """ + Analyze a sound with the harmonic model + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size + N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks + minSineDur: minimum duration of sinusoidal tracks + nH: maximum number of harmonics + minf0: minimum fundamental frequency in sound + maxf0: maximum fundamental frequency in sound + f0et: maximum error accepted in f0 detection algorithm + harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation + returns inputFile: input file name; fs: sampling rate of input file, tfreq, + tmag: sinusoidal frequencies and magnitudes + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + fs, x = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # compute the harmonic model of the whole sound + hfreq, hmag, hphase = HM.harmonicModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur + ) + + # synthesize the sines without original phases + y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) + + # output sound file (monophonic with sampling rate of 44100) + outputFile = ( + "output_sounds/" + os.path.basename(inputFile)[:-4] + "_harmonicModel.wav" + ) + + # write the sound resulting from the inverse stft + UF.wavwrite(y, fs, outputFile) + + # create figure to show plots + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 5000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + if hfreq.shape[1] > 0: + plt.subplot(3, 1, 2) + tracks = np.copy(hfreq) + numFrames = tracks.shape[0] + frmTime = H * np.arange(numFrames) / float(fs) + tracks[tracks <= 0] = np.nan + plt.plot(frmTime, tracks) + plt.axis([0, x.size / float(fs), 0, maxplotfreq]) + plt.title("frequencies of harmonic tracks") + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.show(block=False) + + return inputFile, fs, hfreq, hmag + + +def transformation_synthesis( + inputFile, + fs, + hfreq, + hmag, + freqScaling=np.array([0, 2.0, 1, 0.3]), + freqStretching=np.array([0, 1, 1, 1.5]), + timbrePreservation=1, + timeScaling=np.array([0, 0.0, 0.671, 0.671, 1.978, 1.978 + 1.0]), +): + """ + Transform the analysis values returned by the analysis function and synthesize the sound + inputFile: name of input file + fs: sampling rate of input file + tfreq, tmag: sinusoidal frequencies and magnitudes + freqScaling: frequency scaling factors, in time-value pairs + freqStretchig: frequency stretching factors, in time-value pairs + timbrePreservation: 1 preserves original timbre, 0 it does not + timeScaling: time scaling factors, in time-value pairs + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # frequency scaling of the harmonics + yhfreq, yhmag = HT.harmonicFreqScaling( + hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs + ) + + # time scale the sound + yhfreq, yhmag = ST.sineTimeScaling(yhfreq, yhmag, timeScaling) + + # synthesis + y = SM.sineModelSynth(yhfreq, yhmag, np.array([]), Ns, H, fs) + + # write output sound + outputFile = ( + "output_sounds/" + + os.path.basename(inputFile)[:-4] + + "_harmonicModelTransformation.wav" + ) + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(12, 6)) + + # frequency range to plot + maxplotfreq = 15000.0 + + # plot the transformed sinusoidal frequencies + plt.subplot(2, 1, 1) + if yhfreq.shape[1] > 0: + tracks = np.copy(yhfreq) + tracks = tracks * np.less(tracks, maxplotfreq) + tracks[tracks <= 0] = np.nan + numFrames = int(tracks[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, tracks) + plt.title("transformed harmonic tracks") + plt.autoscale(tight=True) + + # plot the output sound + plt.subplot(2, 1, 2) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.show() - # transformation and synthesis - transformation_synthesis (inputFile, fs, hfreq, hmag) - plt.show() +if __name__ == "__main__": + # analysis + inputFile, fs, hfreq, hmag = analysis() + # transformation and synthesis + transformation_synthesis(inputFile, fs, hfreq, hmag) + plt.show() diff --git a/smstools/transformations/interface/hpsMorph_GUI_frame.py b/smstools/transformations/interface/hpsMorph_GUI_frame.py index 984a3ae8..97530c24 100644 --- a/smstools/transformations/interface/hpsMorph_GUI_frame.py +++ b/smstools/transformations/interface/hpsMorph_GUI_frame.py @@ -8,367 +8,516 @@ from smstools.transformations.interface import hpsMorph_function as hM from smstools.models import utilFunctions as UF + class HpsMorph_frame: - def __init__(self, parent): - - self.parent = parent - self.initUI() - - def initUI(self): - - ## INPUT FILE 1 - choose1_label = "inputFile1:" - Label(self.parent, text=choose1_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) - - #TEXTBOX TO PRINT PATH OF THE SOUND FILE - self.filelocation1 = Entry(self.parent) - self.filelocation1.focus_set() - self.filelocation1["width"] = 30 - self.filelocation1.grid(row=0,column=0, sticky=W, padx=(75, 5), pady=(10,2)) - self.filelocation1.delete(0, END) - self.filelocation1.insert(0, '../../sounds/violin-B3.wav') - - #BUTTON TO BROWSE SOUND FILE 1 - open_file1 = Button(self.parent, text="...", command=self.browse_file1) #see: def browse_file(self) - open_file1.grid(row=0, column=0, sticky=W, padx=(330, 6), pady=(10,2)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE 1 - preview1 = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation1.get())) - preview1.grid(row=0, column=0, sticky=W, padx=(375,6), pady=(10,2)) - - #ANALYSIS WINDOW TYPE SOUND 1 - wtype1_label = "window1:" - Label(self.parent, text=wtype1_label).grid(row=1, column=0, sticky=W, padx=5, pady=(4,2)) - self.w1_type = StringVar() - self.w1_type.set("blackman") # initial value - window1_option = OptionMenu(self.parent, self.w1_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") - window1_option.grid(row=1, column=0, sticky=W, padx=(68,5), pady=(4,2)) - - #WINDOW SIZE SOUND 1 - M1_label = "M1:" - Label(self.parent, text=M1_label).grid(row=1, column=0, sticky=W, padx=(180, 5), pady=(4,2)) - self.M1 = Entry(self.parent, justify=CENTER) - self.M1["width"] = 5 - self.M1.grid(row=1,column=0, sticky=W, padx=(208,5), pady=(4,2)) - self.M1.delete(0, END) - self.M1.insert(0, "1001") - - #FFT SIZE SOUND 1 - N1_label = "N1:" - Label(self.parent, text=N1_label).grid(row=1, column=0, sticky=W, padx=(265, 5), pady=(4,2)) - self.N1 = Entry(self.parent, justify=CENTER) - self.N1["width"] = 5 - self.N1.grid(row=1,column=0, sticky=W, padx=(290,5), pady=(4,2)) - self.N1.delete(0, END) - self.N1.insert(0, "1024") - - #THRESHOLD MAGNITUDE SOUND 1 - t1_label = "t1:" - Label(self.parent, text=t1_label).grid(row=1, column=0, sticky=W, padx=(343,5), pady=(4,2)) - self.t1 = Entry(self.parent, justify=CENTER) - self.t1["width"] = 5 - self.t1.grid(row=1, column=0, sticky=W, padx=(370,5), pady=(4,2)) - self.t1.delete(0, END) - self.t1.insert(0, "-100") - - #MIN DURATION SINUSOIDAL TRACKS SOUND 1 - minSineDur1_label = "minSineDur1:" - Label(self.parent, text=minSineDur1_label).grid(row=2, column=0, sticky=W, padx=(5, 5), pady=(4,2)) - self.minSineDur1 = Entry(self.parent, justify=CENTER) - self.minSineDur1["width"] = 5 - self.minSineDur1.grid(row=2, column=0, sticky=W, padx=(92,5), pady=(4,2)) - self.minSineDur1.delete(0, END) - self.minSineDur1.insert(0, "0.05") - - #MIN FUNDAMENTAL FREQUENCY SOUND 1 - minf01_label = "minf01:" - Label(self.parent, text=minf01_label).grid(row=2, column=0, sticky=W, padx=(157,5), pady=(4,2)) - self.minf01 = Entry(self.parent, justify=CENTER) - self.minf01["width"] = 5 - self.minf01.grid(row=2, column=0, sticky=W, padx=(208,5), pady=(4,2)) - self.minf01.delete(0, END) - self.minf01.insert(0, "200") - - #MAX FUNDAMENTAL FREQUENCY SOUND 1 - maxf01_label = "maxf01:" - Label(self.parent, text=maxf01_label).grid(row=2, column=0, sticky=W, padx=(270,5), pady=(4,2)) - self.maxf01 = Entry(self.parent, justify=CENTER) - self.maxf01["width"] = 5 - self.maxf01.grid(row=2, column=0, sticky=W, padx=(325,5), pady=(4,2)) - self.maxf01.delete(0, END) - self.maxf01.insert(0, "300") - - #MAX ERROR ACCEPTED SOUND 1 - f0et1_label = "f0et1:" - Label(self.parent, text=f0et1_label).grid(row=3, column=0, sticky=W, padx=5, pady=(4,2)) - self.f0et1 = Entry(self.parent, justify=CENTER) - self.f0et1["width"] = 3 - self.f0et1.grid(row=3, column=0, sticky=W, padx=(45,5), pady=(4,2)) - self.f0et1.delete(0, END) - self.f0et1.insert(0, "10") - - #ALLOWED DEVIATION OF HARMONIC TRACKS SOUND 1 - harmDevSlope1_label = "harmDevSlope1:" - Label(self.parent, text=harmDevSlope1_label).grid(row=3, column=0, sticky=W, padx=(108,5), pady=(4,2)) - self.harmDevSlope1 = Entry(self.parent, justify=CENTER) - self.harmDevSlope1["width"] = 5 - self.harmDevSlope1.grid(row=3, column=0, sticky=W, padx=(215,5), pady=(4,2)) - self.harmDevSlope1.delete(0, END) - self.harmDevSlope1.insert(0, "0.01") - - ### - #SEPARATION LINE - Frame(self.parent,height=1,width=50,bg="black").grid(row=4, pady=5, sticky=W+E) - ### - - ## INPUT FILE 2 - choose2_label = "inputFile2:" - Label(self.parent, text=choose2_label).grid(row=5, column=0, sticky=W, padx=5, pady=(2,2)) - - #TEXTBOX TO PRINT PATH OF THE SOUND FILE - self.filelocation2 = Entry(self.parent) - self.filelocation2.focus_set() - self.filelocation2["width"] = 30 - self.filelocation2.grid(row=5,column=0, sticky=W, padx=(75, 5), pady=(2,2)) - self.filelocation2.delete(0, END) - self.filelocation2.insert(0, '../../sounds/soprano-E4.wav') - - #BUTTON TO BROWSE SOUND FILE 2 - open_file2 = Button(self.parent, text="...", command=self.browse_file2) #see: def browse_file(self) - open_file2.grid(row=5, column=0, sticky=W, padx=(330, 6), pady=(2,2)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE 2 - preview2 = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation2.get())) - preview2.grid(row=5, column=0, sticky=W, padx=(375,6), pady=(2,2)) - - - #ANALYSIS WINDOW TYPE SOUND 2 - wtype2_label = "window2:" - Label(self.parent, text=wtype2_label).grid(row=6, column=0, sticky=W, padx=5, pady=(4,2)) - self.w2_type = StringVar() - self.w2_type.set("hamming") # initial value - window2_option = OptionMenu(self.parent, self.w2_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") - window2_option.grid(row=6, column=0, sticky=W, padx=(68,5), pady=(4,2)) - - #WINDOW SIZE SOUND 2 - M2_label = "M2:" - Label(self.parent, text=M2_label).grid(row=6, column=0, sticky=W, padx=(180, 5), pady=(4,2)) - self.M2 = Entry(self.parent, justify=CENTER) - self.M2["width"] = 5 - self.M2.grid(row=6,column=0, sticky=W, padx=(208,5), pady=(4,2)) - self.M2.delete(0, END) - self.M2.insert(0, "901") - - #FFT SIZE SOUND 2 - N2_label = "N2:" - Label(self.parent, text=N2_label).grid(row=6, column=0, sticky=W, padx=(265, 5), pady=(4,2)) - self.N2 = Entry(self.parent, justify=CENTER) - self.N2["width"] = 5 - self.N2.grid(row=6,column=0, sticky=W, padx=(290,5), pady=(4,2)) - self.N2.delete(0, END) - self.N2.insert(0, "1024") - - #THRESHOLD MAGNITUDE SOUND 2 - t2_label = "t2:" - Label(self.parent, text=t2_label).grid(row=6, column=0, sticky=W, padx=(343,5), pady=(4,2)) - self.t2 = Entry(self.parent, justify=CENTER) - self.t2["width"] = 5 - self.t2.grid(row=6, column=0, sticky=W, padx=(370,5), pady=(4,2)) - self.t2.delete(0, END) - self.t2.insert(0, "-100") - - #MIN DURATION SINUSOIDAL TRACKS SOUND 2 - minSineDur2_label = "minSineDur2:" - Label(self.parent, text=minSineDur2_label).grid(row=7, column=0, sticky=W, padx=(5, 5), pady=(4,2)) - self.minSineDur2 = Entry(self.parent, justify=CENTER) - self.minSineDur2["width"] = 5 - self.minSineDur2.grid(row=7, column=0, sticky=W, padx=(92,5), pady=(4,2)) - self.minSineDur2.delete(0, END) - self.minSineDur2.insert(0, "0.05") - - #MIN FUNDAMENTAL FREQUENCY SOUND 2 - minf02_label = "minf02:" - Label(self.parent, text=minf02_label).grid(row=7, column=0, sticky=W, padx=(157,5), pady=(4,2)) - self.minf02 = Entry(self.parent, justify=CENTER) - self.minf02["width"] = 5 - self.minf02.grid(row=7, column=0, sticky=W, padx=(208,5), pady=(4,2)) - self.minf02.delete(0, END) - self.minf02.insert(0, "250") - - #MAX FUNDAMENTAL FREQUENCY SOUND 2 - maxf02_label = "maxf02:" - Label(self.parent, text=maxf02_label).grid(row=7, column=0, sticky=W, padx=(270,5), pady=(4,2)) - self.maxf02 = Entry(self.parent, justify=CENTER) - self.maxf02["width"] = 5 - self.maxf02.grid(row=7, column=0, sticky=W, padx=(325,5), pady=(4,2)) - self.maxf02.delete(0, END) - self.maxf02.insert(0, "500") - - #MAX ERROR ACCEPTED SOUND 2 - f0et2_label = "f0et2:" - Label(self.parent, text=f0et2_label).grid(row=8, column=0, sticky=W, padx=5, pady=(4,2)) - self.f0et2 = Entry(self.parent, justify=CENTER) - self.f0et2["width"] = 3 - self.f0et2.grid(row=8, column=0, sticky=W, padx=(45,5), pady=(4,2)) - self.f0et2.delete(0, END) - self.f0et2.insert(0, "10") - - #ALLOWED DEVIATION OF HARMONIC TRACKS SOUND 2 - harmDevSlope2_label = "harmDevSlope2:" - Label(self.parent, text=harmDevSlope2_label).grid(row=8, column=0, sticky=W, padx=(108,5), pady=(4,2)) - self.harmDevSlope2 = Entry(self.parent, justify=CENTER) - self.harmDevSlope2["width"] = 5 - self.harmDevSlope2.grid(row=8, column=0, sticky=W, padx=(215,5), pady=(4,2)) - self.harmDevSlope2.delete(0, END) - self.harmDevSlope2.insert(0, "0.01") - - ### - #SEPARATION LINE - Frame(self.parent,height=1,width=50,bg="black").grid(row=9, pady=5, sticky=W+E) - ### - - #MAX NUMBER OF HARMONICS SOUND 1 - nH_label = "nH:" - Label(self.parent, text=nH_label).grid(row=10, column=0, sticky=W, padx=(5,5), pady=(2,2)) - self.nH = Entry(self.parent, justify=CENTER) - self.nH["width"] = 5 - self.nH.grid(row=10, column=0, sticky=W, padx=(35,5), pady=(2,2)) - self.nH.delete(0, END) - self.nH.insert(0, "60") - - #DECIMATION FACTOR SOUND 1 - stocf_label = "stocf:" - Label(self.parent, text=stocf_label).grid(row=10, column=0, sticky=W, padx=(98,5), pady=(2,2)) - self.stocf = Entry(self.parent, justify=CENTER) - self.stocf["width"] = 5 - self.stocf.grid(row=10, column=0, sticky=W, padx=(138,5), pady=(2,2)) - self.stocf.delete(0, END) - self.stocf.insert(0, "0.1") - - #BUTTON TO DO THE ANALYSIS OF THE SOUND - self.compute = Button(self.parent, text="Analysis", command=self.analysis) - self.compute.grid(row=10, column=0, padx=(210, 5), pady=(2,2), sticky=W) - - ### - #SEPARATION LINE - Frame(self.parent,height=1,width=50,bg="black").grid(row=11, pady=5, sticky=W+E) - ### - - # - hfreqIntp_label = "harmonic frequencies interpolation factors, 0 to 1 (time,value pairs)" - Label(self.parent, text=hfreqIntp_label).grid(row=12, column=0, sticky=W, padx=5, pady=(2,2)) - self.hfreqIntp = Entry(self.parent, justify=CENTER) - self.hfreqIntp["width"] = 35 - self.hfreqIntp.grid(row=13, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.hfreqIntp.delete(0, END) - self.hfreqIntp.insert(0, "[0, 0, .1, 0, .9, 1, 1, 1]") - - # - hmagIntp_label = "harmonic magnitudes interpolation factors, 0 to 1 (time,value pairs)" - Label(self.parent, text=hmagIntp_label).grid(row=14, column=0, sticky=W, padx=5, pady=(5,2)) - self.hmagIntp = Entry(self.parent, justify=CENTER) - self.hmagIntp["width"] = 35 - self.hmagIntp.grid(row=15, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.hmagIntp.delete(0, END) - self.hmagIntp.insert(0, "[0, 0, .1, 0, .9, 1, 1, 1]") - - # - stocIntp_label = "stochastic component interpolation factors, 0 to 1 (time,value pairs)" - Label(self.parent, text=stocIntp_label).grid(row=16, column=0, sticky=W, padx=5, pady=(5,2)) - self.stocIntp = Entry(self.parent, justify=CENTER) - self.stocIntp["width"] = 35 - self.stocIntp.grid(row=17, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.stocIntp.delete(0, END) - self.stocIntp.insert(0, "[0, 0, .1, 0, .9, 1, 1, 1]") - - - #BUTTON TO DO THE SYNTHESIS - self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis) - self.compute.grid(row=18, column=0, padx=5, pady=(10,15), sticky=W) - - #BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT - self.transf_output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation1.get())[:-4] + '_hpsMorph.wav')) - self.transf_output.grid(row=18, column=0, padx=(165,5), pady=(10,15), sticky=W) - - # define options for opening file - self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' - - def browse_file1(self, tkFileDialog=None): - - self.filename1 = tkFileDialog.askopenfilename(**self.file_opt) - - #set the text of the self.filelocation - self.filelocation1.delete(0, END) - self.filelocation1.insert(0,self.filename1) - - def browse_file2(self, tkFileDialog=None): - - self.filename2 = tkFileDialog.askopenfilename(**self.file_opt) - - #set the text of the self.filelocation - self.filelocation2.delete(0, END) - self.filelocation2.insert(0,self.filename2) - - def analysis(self, tkMessageBox=None): - - try: - inputFile1 = self.filelocation1.get() - window1 = self.w1_type.get() - M1 = int(self.M1.get()) - N1 = int(self.N1.get()) - t1 = int(self.t1.get()) - minSineDur1 = float(self.minSineDur1.get()) - minf01 = int(self.minf01.get()) - maxf01 = int(self.maxf01.get()) - f0et1 = int(self.f0et1.get()) - harmDevSlope1 = float(self.harmDevSlope1.get()) - - nH = int(self.nH.get()) - stocf = float(self.stocf.get()) - - inputFile2 = self.filelocation2.get() - window2 = self.w2_type.get() - M2 = int(self.M2.get()) - N2 = int(self.N2.get()) - t2 = int(self.t2.get()) - minSineDur2 = float(self.minSineDur2.get()) - minf02 = int(self.minf02.get()) - maxf02 = int(self.maxf02.get()) - f0et2 = int(self.f0et2.get()) - harmDevSlope2 = float(self.harmDevSlope2.get()) - - self.inputFile1, self.fs1, self.hfreq1, self.hmag1, self.stocEnv1, \ - self.inputFile2, self.hfreq2, self.hmag2, self.stocEnv2 = hM.analysis(inputFile1, window1, M1, N1, t1, \ - minSineDur1, nH, minf01, maxf01, f0et1, harmDevSlope1, stocf, inputFile2, window2, M2, N2, t2, minSineDur2, minf02, maxf02, f0et2, harmDevSlope2) - - except ValueError as errorMessage: - tkMessageBox.showerror("Input values error", errorMessage) - - def transformation_synthesis(self): - - try: - inputFile1 = self.inputFile1 - fs = self.fs1 - hfreq1 = self.hfreq1 - hmag1 = self.hmag1 - stocEnv1 = self.stocEnv1 - inputFile2 = self.inputFile2 - hfreq2 = self.hfreq2 - hmag2 = self.hmag2 - stocEnv2 = self.stocEnv2 - hfreqIntp = np.array(eval(self.hfreqIntp.get())) - hmagIntp = np.array(eval(self.hmagIntp.get())) - stocIntp = np.array(eval(self.stocIntp.get())) - - hM.transformation_synthesis(inputFile1, fs, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) - - except ValueError as errorMessage: - messagebox.showerror("Input values error", errorMessage) - - except AttributeError: - messagebox.showerror("Analysis not computed", "First you must analyse the sound!") + def __init__(self, parent): + + self.parent = parent + self.initUI() + + def initUI(self): + + ## INPUT FILE 1 + choose1_label = "inputFile1:" + Label(self.parent, text=choose1_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) + + # TEXTBOX TO PRINT PATH OF THE SOUND FILE + self.filelocation1 = Entry(self.parent) + self.filelocation1.focus_set() + self.filelocation1["width"] = 30 + self.filelocation1.grid(row=0, column=0, sticky=W, padx=(75, 5), pady=(10, 2)) + self.filelocation1.delete(0, END) + self.filelocation1.insert(0, "../../sounds/violin-B3.wav") + + # BUTTON TO BROWSE SOUND FILE 1 + open_file1 = Button( + self.parent, text="...", command=self.browse_file1 + ) # see: def browse_file(self) + open_file1.grid( + row=0, column=0, sticky=W, padx=(330, 6), pady=(10, 2) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE 1 + preview1 = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation1.get()) + ) + preview1.grid(row=0, column=0, sticky=W, padx=(375, 6), pady=(10, 2)) + + # ANALYSIS WINDOW TYPE SOUND 1 + wtype1_label = "window1:" + Label(self.parent, text=wtype1_label).grid( + row=1, column=0, sticky=W, padx=5, pady=(4, 2) + ) + self.w1_type = StringVar() + self.w1_type.set("blackman") # initial value + window1_option = OptionMenu( + self.parent, + self.w1_type, + "rectangular", + "hanning", + "hamming", + "blackman", + "blackmanharris", + ) + window1_option.grid(row=1, column=0, sticky=W, padx=(68, 5), pady=(4, 2)) + + # WINDOW SIZE SOUND 1 + M1_label = "M1:" + Label(self.parent, text=M1_label).grid( + row=1, column=0, sticky=W, padx=(180, 5), pady=(4, 2) + ) + self.M1 = Entry(self.parent, justify=CENTER) + self.M1["width"] = 5 + self.M1.grid(row=1, column=0, sticky=W, padx=(208, 5), pady=(4, 2)) + self.M1.delete(0, END) + self.M1.insert(0, "1001") + + # FFT SIZE SOUND 1 + N1_label = "N1:" + Label(self.parent, text=N1_label).grid( + row=1, column=0, sticky=W, padx=(265, 5), pady=(4, 2) + ) + self.N1 = Entry(self.parent, justify=CENTER) + self.N1["width"] = 5 + self.N1.grid(row=1, column=0, sticky=W, padx=(290, 5), pady=(4, 2)) + self.N1.delete(0, END) + self.N1.insert(0, "1024") + + # THRESHOLD MAGNITUDE SOUND 1 + t1_label = "t1:" + Label(self.parent, text=t1_label).grid( + row=1, column=0, sticky=W, padx=(343, 5), pady=(4, 2) + ) + self.t1 = Entry(self.parent, justify=CENTER) + self.t1["width"] = 5 + self.t1.grid(row=1, column=0, sticky=W, padx=(370, 5), pady=(4, 2)) + self.t1.delete(0, END) + self.t1.insert(0, "-100") + + # MIN DURATION SINUSOIDAL TRACKS SOUND 1 + minSineDur1_label = "minSineDur1:" + Label(self.parent, text=minSineDur1_label).grid( + row=2, column=0, sticky=W, padx=(5, 5), pady=(4, 2) + ) + self.minSineDur1 = Entry(self.parent, justify=CENTER) + self.minSineDur1["width"] = 5 + self.minSineDur1.grid(row=2, column=0, sticky=W, padx=(92, 5), pady=(4, 2)) + self.minSineDur1.delete(0, END) + self.minSineDur1.insert(0, "0.05") + + # MIN FUNDAMENTAL FREQUENCY SOUND 1 + minf01_label = "minf01:" + Label(self.parent, text=minf01_label).grid( + row=2, column=0, sticky=W, padx=(157, 5), pady=(4, 2) + ) + self.minf01 = Entry(self.parent, justify=CENTER) + self.minf01["width"] = 5 + self.minf01.grid(row=2, column=0, sticky=W, padx=(208, 5), pady=(4, 2)) + self.minf01.delete(0, END) + self.minf01.insert(0, "200") + + # MAX FUNDAMENTAL FREQUENCY SOUND 1 + maxf01_label = "maxf01:" + Label(self.parent, text=maxf01_label).grid( + row=2, column=0, sticky=W, padx=(270, 5), pady=(4, 2) + ) + self.maxf01 = Entry(self.parent, justify=CENTER) + self.maxf01["width"] = 5 + self.maxf01.grid(row=2, column=0, sticky=W, padx=(325, 5), pady=(4, 2)) + self.maxf01.delete(0, END) + self.maxf01.insert(0, "300") + + # MAX ERROR ACCEPTED SOUND 1 + f0et1_label = "f0et1:" + Label(self.parent, text=f0et1_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(4, 2) + ) + self.f0et1 = Entry(self.parent, justify=CENTER) + self.f0et1["width"] = 3 + self.f0et1.grid(row=3, column=0, sticky=W, padx=(45, 5), pady=(4, 2)) + self.f0et1.delete(0, END) + self.f0et1.insert(0, "10") + + # ALLOWED DEVIATION OF HARMONIC TRACKS SOUND 1 + harmDevSlope1_label = "harmDevSlope1:" + Label(self.parent, text=harmDevSlope1_label).grid( + row=3, column=0, sticky=W, padx=(108, 5), pady=(4, 2) + ) + self.harmDevSlope1 = Entry(self.parent, justify=CENTER) + self.harmDevSlope1["width"] = 5 + self.harmDevSlope1.grid(row=3, column=0, sticky=W, padx=(215, 5), pady=(4, 2)) + self.harmDevSlope1.delete(0, END) + self.harmDevSlope1.insert(0, "0.01") + + ### + # SEPARATION LINE + Frame(self.parent, height=1, width=50, bg="black").grid( + row=4, pady=5, sticky=W + E + ) + ### + + ## INPUT FILE 2 + choose2_label = "inputFile2:" + Label(self.parent, text=choose2_label).grid( + row=5, column=0, sticky=W, padx=5, pady=(2, 2) + ) + + # TEXTBOX TO PRINT PATH OF THE SOUND FILE + self.filelocation2 = Entry(self.parent) + self.filelocation2.focus_set() + self.filelocation2["width"] = 30 + self.filelocation2.grid(row=5, column=0, sticky=W, padx=(75, 5), pady=(2, 2)) + self.filelocation2.delete(0, END) + self.filelocation2.insert(0, "../../sounds/soprano-E4.wav") + + # BUTTON TO BROWSE SOUND FILE 2 + open_file2 = Button( + self.parent, text="...", command=self.browse_file2 + ) # see: def browse_file(self) + open_file2.grid( + row=5, column=0, sticky=W, padx=(330, 6), pady=(2, 2) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE 2 + preview2 = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation2.get()) + ) + preview2.grid(row=5, column=0, sticky=W, padx=(375, 6), pady=(2, 2)) + + # ANALYSIS WINDOW TYPE SOUND 2 + wtype2_label = "window2:" + Label(self.parent, text=wtype2_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(4, 2) + ) + self.w2_type = StringVar() + self.w2_type.set("hamming") # initial value + window2_option = OptionMenu( + self.parent, + self.w2_type, + "rectangular", + "hanning", + "hamming", + "blackman", + "blackmanharris", + ) + window2_option.grid(row=6, column=0, sticky=W, padx=(68, 5), pady=(4, 2)) + + # WINDOW SIZE SOUND 2 + M2_label = "M2:" + Label(self.parent, text=M2_label).grid( + row=6, column=0, sticky=W, padx=(180, 5), pady=(4, 2) + ) + self.M2 = Entry(self.parent, justify=CENTER) + self.M2["width"] = 5 + self.M2.grid(row=6, column=0, sticky=W, padx=(208, 5), pady=(4, 2)) + self.M2.delete(0, END) + self.M2.insert(0, "901") + + # FFT SIZE SOUND 2 + N2_label = "N2:" + Label(self.parent, text=N2_label).grid( + row=6, column=0, sticky=W, padx=(265, 5), pady=(4, 2) + ) + self.N2 = Entry(self.parent, justify=CENTER) + self.N2["width"] = 5 + self.N2.grid(row=6, column=0, sticky=W, padx=(290, 5), pady=(4, 2)) + self.N2.delete(0, END) + self.N2.insert(0, "1024") + + # THRESHOLD MAGNITUDE SOUND 2 + t2_label = "t2:" + Label(self.parent, text=t2_label).grid( + row=6, column=0, sticky=W, padx=(343, 5), pady=(4, 2) + ) + self.t2 = Entry(self.parent, justify=CENTER) + self.t2["width"] = 5 + self.t2.grid(row=6, column=0, sticky=W, padx=(370, 5), pady=(4, 2)) + self.t2.delete(0, END) + self.t2.insert(0, "-100") + + # MIN DURATION SINUSOIDAL TRACKS SOUND 2 + minSineDur2_label = "minSineDur2:" + Label(self.parent, text=minSineDur2_label).grid( + row=7, column=0, sticky=W, padx=(5, 5), pady=(4, 2) + ) + self.minSineDur2 = Entry(self.parent, justify=CENTER) + self.minSineDur2["width"] = 5 + self.minSineDur2.grid(row=7, column=0, sticky=W, padx=(92, 5), pady=(4, 2)) + self.minSineDur2.delete(0, END) + self.minSineDur2.insert(0, "0.05") + + # MIN FUNDAMENTAL FREQUENCY SOUND 2 + minf02_label = "minf02:" + Label(self.parent, text=minf02_label).grid( + row=7, column=0, sticky=W, padx=(157, 5), pady=(4, 2) + ) + self.minf02 = Entry(self.parent, justify=CENTER) + self.minf02["width"] = 5 + self.minf02.grid(row=7, column=0, sticky=W, padx=(208, 5), pady=(4, 2)) + self.minf02.delete(0, END) + self.minf02.insert(0, "250") + + # MAX FUNDAMENTAL FREQUENCY SOUND 2 + maxf02_label = "maxf02:" + Label(self.parent, text=maxf02_label).grid( + row=7, column=0, sticky=W, padx=(270, 5), pady=(4, 2) + ) + self.maxf02 = Entry(self.parent, justify=CENTER) + self.maxf02["width"] = 5 + self.maxf02.grid(row=7, column=0, sticky=W, padx=(325, 5), pady=(4, 2)) + self.maxf02.delete(0, END) + self.maxf02.insert(0, "500") + + # MAX ERROR ACCEPTED SOUND 2 + f0et2_label = "f0et2:" + Label(self.parent, text=f0et2_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(4, 2) + ) + self.f0et2 = Entry(self.parent, justify=CENTER) + self.f0et2["width"] = 3 + self.f0et2.grid(row=8, column=0, sticky=W, padx=(45, 5), pady=(4, 2)) + self.f0et2.delete(0, END) + self.f0et2.insert(0, "10") + + # ALLOWED DEVIATION OF HARMONIC TRACKS SOUND 2 + harmDevSlope2_label = "harmDevSlope2:" + Label(self.parent, text=harmDevSlope2_label).grid( + row=8, column=0, sticky=W, padx=(108, 5), pady=(4, 2) + ) + self.harmDevSlope2 = Entry(self.parent, justify=CENTER) + self.harmDevSlope2["width"] = 5 + self.harmDevSlope2.grid(row=8, column=0, sticky=W, padx=(215, 5), pady=(4, 2)) + self.harmDevSlope2.delete(0, END) + self.harmDevSlope2.insert(0, "0.01") + + ### + # SEPARATION LINE + Frame(self.parent, height=1, width=50, bg="black").grid( + row=9, pady=5, sticky=W + E + ) + ### + + # MAX NUMBER OF HARMONICS SOUND 1 + nH_label = "nH:" + Label(self.parent, text=nH_label).grid( + row=10, column=0, sticky=W, padx=(5, 5), pady=(2, 2) + ) + self.nH = Entry(self.parent, justify=CENTER) + self.nH["width"] = 5 + self.nH.grid(row=10, column=0, sticky=W, padx=(35, 5), pady=(2, 2)) + self.nH.delete(0, END) + self.nH.insert(0, "60") + + # DECIMATION FACTOR SOUND 1 + stocf_label = "stocf:" + Label(self.parent, text=stocf_label).grid( + row=10, column=0, sticky=W, padx=(98, 5), pady=(2, 2) + ) + self.stocf = Entry(self.parent, justify=CENTER) + self.stocf["width"] = 5 + self.stocf.grid(row=10, column=0, sticky=W, padx=(138, 5), pady=(2, 2)) + self.stocf.delete(0, END) + self.stocf.insert(0, "0.1") + + # BUTTON TO DO THE ANALYSIS OF THE SOUND + self.compute = Button(self.parent, text="Analysis", command=self.analysis) + self.compute.grid(row=10, column=0, padx=(210, 5), pady=(2, 2), sticky=W) + + ### + # SEPARATION LINE + Frame(self.parent, height=1, width=50, bg="black").grid( + row=11, pady=5, sticky=W + E + ) + ### + + # + hfreqIntp_label = ( + "harmonic frequencies interpolation factors, 0 to 1 (time,value pairs)" + ) + Label(self.parent, text=hfreqIntp_label).grid( + row=12, column=0, sticky=W, padx=5, pady=(2, 2) + ) + self.hfreqIntp = Entry(self.parent, justify=CENTER) + self.hfreqIntp["width"] = 35 + self.hfreqIntp.grid(row=13, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.hfreqIntp.delete(0, END) + self.hfreqIntp.insert(0, "[0, 0, .1, 0, .9, 1, 1, 1]") + + # + hmagIntp_label = ( + "harmonic magnitudes interpolation factors, 0 to 1 (time,value pairs)" + ) + Label(self.parent, text=hmagIntp_label).grid( + row=14, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.hmagIntp = Entry(self.parent, justify=CENTER) + self.hmagIntp["width"] = 35 + self.hmagIntp.grid(row=15, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.hmagIntp.delete(0, END) + self.hmagIntp.insert(0, "[0, 0, .1, 0, .9, 1, 1, 1]") + + # + stocIntp_label = ( + "stochastic component interpolation factors, 0 to 1 (time,value pairs)" + ) + Label(self.parent, text=stocIntp_label).grid( + row=16, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.stocIntp = Entry(self.parent, justify=CENTER) + self.stocIntp["width"] = 35 + self.stocIntp.grid(row=17, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.stocIntp.delete(0, END) + self.stocIntp.insert(0, "[0, 0, .1, 0, .9, 1, 1, 1]") + + # BUTTON TO DO THE SYNTHESIS + self.compute = Button( + self.parent, + text="Apply Transformation", + command=self.transformation_synthesis, + ) + self.compute.grid(row=18, column=0, padx=5, pady=(10, 15), sticky=W) + + # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT + self.transf_output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation1.get())[:-4] + + "_hpsMorph.wav" + ), + ) + self.transf_output.grid( + row=18, column=0, padx=(165, 5), pady=(10, 15), sticky=W + ) + + # define options for opening file + self.file_opt = options = {} + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" + + def browse_file1(self, tkFileDialog=None): + + self.filename1 = tkFileDialog.askopenfilename(**self.file_opt) + + # set the text of the self.filelocation + self.filelocation1.delete(0, END) + self.filelocation1.insert(0, self.filename1) + + def browse_file2(self, tkFileDialog=None): + + self.filename2 = tkFileDialog.askopenfilename(**self.file_opt) + + # set the text of the self.filelocation + self.filelocation2.delete(0, END) + self.filelocation2.insert(0, self.filename2) + + def analysis(self, tkMessageBox=None): + + try: + inputFile1 = self.filelocation1.get() + window1 = self.w1_type.get() + M1 = int(self.M1.get()) + N1 = int(self.N1.get()) + t1 = int(self.t1.get()) + minSineDur1 = float(self.minSineDur1.get()) + minf01 = int(self.minf01.get()) + maxf01 = int(self.maxf01.get()) + f0et1 = int(self.f0et1.get()) + harmDevSlope1 = float(self.harmDevSlope1.get()) + + nH = int(self.nH.get()) + stocf = float(self.stocf.get()) + + inputFile2 = self.filelocation2.get() + window2 = self.w2_type.get() + M2 = int(self.M2.get()) + N2 = int(self.N2.get()) + t2 = int(self.t2.get()) + minSineDur2 = float(self.minSineDur2.get()) + minf02 = int(self.minf02.get()) + maxf02 = int(self.maxf02.get()) + f0et2 = int(self.f0et2.get()) + harmDevSlope2 = float(self.harmDevSlope2.get()) + + ( + self.inputFile1, + self.fs1, + self.hfreq1, + self.hmag1, + self.stocEnv1, + self.inputFile2, + self.hfreq2, + self.hmag2, + self.stocEnv2, + ) = hM.analysis( + inputFile1, + window1, + M1, + N1, + t1, + minSineDur1, + nH, + minf01, + maxf01, + f0et1, + harmDevSlope1, + stocf, + inputFile2, + window2, + M2, + N2, + t2, + minSineDur2, + minf02, + maxf02, + f0et2, + harmDevSlope2, + ) + + except ValueError as errorMessage: + tkMessageBox.showerror("Input values error", errorMessage) + + def transformation_synthesis(self): + + try: + inputFile1 = self.inputFile1 + fs = self.fs1 + hfreq1 = self.hfreq1 + hmag1 = self.hmag1 + stocEnv1 = self.stocEnv1 + inputFile2 = self.inputFile2 + hfreq2 = self.hfreq2 + hmag2 = self.hmag2 + stocEnv2 = self.stocEnv2 + hfreqIntp = np.array(eval(self.hfreqIntp.get())) + hmagIntp = np.array(eval(self.hmagIntp.get())) + stocIntp = np.array(eval(self.stocIntp.get())) + + hM.transformation_synthesis( + inputFile1, + fs, + hfreq1, + hmag1, + stocEnv1, + inputFile2, + hfreq2, + hmag2, + stocEnv2, + hfreqIntp, + hmagIntp, + stocIntp, + ) + + except ValueError as errorMessage: + messagebox.showerror("Input values error", errorMessage) + + except AttributeError: + messagebox.showerror( + "Analysis not computed", "First you must analyse the sound!" + ) diff --git a/smstools/transformations/interface/hpsMorph_function.py b/smstools/transformations/interface/hpsMorph_function.py index 07ae91d2..a1007bab 100644 --- a/smstools/transformations/interface/hpsMorph_function.py +++ b/smstools/transformations/interface/hpsMorph_function.py @@ -8,171 +8,253 @@ from smstools.transformations import hpsTransformations as HPST from smstools.models import utilFunctions as UF -def analysis(inputFile1='../../sounds/violin-B3.wav', window1='blackman', M1=1001, N1=1024, t1=-100, - minSineDur1=0.05, nH=60, minf01=200, maxf01=300, f0et1=10, harmDevSlope1=0.01, stocf=0.1, - inputFile2='../../sounds/soprano-E4.wav', window2='blackman', M2=901, N2=1024, t2=-100, - minSineDur2=0.05, minf02=250, maxf02=500, f0et2=10, harmDevSlope2=0.01): - """ - Analyze two sounds with the harmonic plus stochastic model - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size - N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks - minSineDur: minimum duration of sinusoidal tracks - nH: maximum number of harmonics - minf0: minimum fundamental frequency in sound - maxf0: maximum fundamental frequency in sound - f0et: maximum error accepted in f0 detection algorithm - harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation - stocf: decimation factor used for the stochastic approximation - returns inputFile: input file name; fs: sampling rate of input file, - hfreq, hmag: harmonic frequencies, magnitude; stocEnv: stochastic residual - """ - - # size of fft used in synthesis - Ns = 512 - # hop size (has to be 1/4 of Ns) - H = 128 - # read input sounds - (fs1, x1) = UF.wavread(inputFile1) - (fs2, x2) = UF.wavread(inputFile2) - # compute analysis windows - w1 = get_window(window1, M1) - w2 = get_window(window2, M2) - # compute the harmonic plus stochastic models - hfreq1, hmag1, hphase1, stocEnv1 = HPS.hpsModelAnal(x1, fs1, w1, N1, H, t1, nH, minf01, maxf01, f0et1, harmDevSlope1, minSineDur1, Ns, stocf) - hfreq2, hmag2, hphase2, stocEnv2 = HPS.hpsModelAnal(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf) - - # create figure to plot - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 15000.0 - - # plot spectrogram stochastic component of sound 1 - plt.subplot(2,1,1) - numFrames = int(stocEnv1[:,0].size) - sizeEnv = int(stocEnv1[0,:].size) - frmTime = H*np.arange(numFrames)/float(fs1) - binFreq = (.5*fs1)*np.arange(sizeEnv*maxplotfreq/(.5*fs1))/sizeEnv - plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv1[:,:int(sizeEnv*maxplotfreq/(.5*fs1))+1])) - plt.autoscale(tight=True) - - # plot harmonic on top of stochastic spectrogram of sound 1 - if (hfreq1.shape[1] > 0): - harms = np.copy(hfreq1) - harms = harms*np.less(harms,maxplotfreq) - harms[harms==0] = np.nan - numFrames = int(harms[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs1) - plt.plot(frmTime, harms, color='k', ms=3, alpha=1) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.autoscale(tight=True) - plt.title('harmonics + stochastic spectrogram of sound 1') - - # plot spectrogram stochastic component of sound 2 - plt.subplot(2,1,2) - numFrames = int(stocEnv2[:,0].size) - sizeEnv = int(stocEnv2[0,:].size) - frmTime = H*np.arange(numFrames)/float(fs2) - binFreq = (.5*fs2)*np.arange(sizeEnv*maxplotfreq/(.5*fs2))/sizeEnv - plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv2[:,:int(sizeEnv*maxplotfreq/(.5*fs2))+1])) - plt.autoscale(tight=True) - - # plot harmonic on top of stochastic spectrogram of sound 2 - if (hfreq2.shape[1] > 0): - harms = np.copy(hfreq2) - harms = harms*np.less(harms,maxplotfreq) - harms[harms==0] = np.nan - numFrames = int(harms[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs2) - plt.plot(frmTime, harms, color='k', ms=3, alpha=1) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.autoscale(tight=True) - plt.title('harmonics + stochastic spectrogram of sound 2') - - plt.tight_layout() - plt.show(block=False) - - return inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2 - -def transformation_synthesis(inputFile1, fs, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2, - hfreqIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]), hmagIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]), stocIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])): - """ - Transform the analysis values returned by the analysis function and synthesize the sound - inputFile1: name of input file 1 - fs: sampling rate of input file 1 - hfreq1, hmag1, stocEnv1: hps representation of sound 1 - inputFile2: name of input file 2 - hfreq2, hmag2, stocEnv2: hps representation of sound 2 - hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - """ - - # size of fft used in synthesis - Ns = 512 - # hop size (has to be 1/4 of Ns) - H = 128 - - # morph the two sounds - yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) - - # synthesis - y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) - - # write output sound - outputFile = 'output_sounds/' + os.path.basename(inputFile1)[:-4] + '_hpsMorph.wav' - UF.wavwrite(y, fs, outputFile) - - # create figure to plot - plt.figure(figsize=(12, 9)) - - # frequency range to plot - maxplotfreq = 15000.0 - - # plot spectrogram of transformed stochastic compoment - plt.subplot(2,1,1) - numFrames = int(ystocEnv[:,0].size) - sizeEnv = int(ystocEnv[0,:].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv - plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:int(sizeEnv*maxplotfreq/(.5*fs))+1])) - plt.autoscale(tight=True) - - # plot transformed harmonic on top of stochastic spectrogram - if (yhfreq.shape[1] > 0): - harms = np.copy(yhfreq) - harms = harms*np.less(harms,maxplotfreq) - harms[harms==0] = np.nan - numFrames = int(harms[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, harms, color='k', ms=3, alpha=1) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.autoscale(tight=True) - plt.title('harmonics + stochastic spectrogram') - - # plot the output sound - plt.subplot(2,1,2) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.show() + +def analysis( + inputFile1="../../sounds/violin-B3.wav", + window1="blackman", + M1=1001, + N1=1024, + t1=-100, + minSineDur1=0.05, + nH=60, + minf01=200, + maxf01=300, + f0et1=10, + harmDevSlope1=0.01, + stocf=0.1, + inputFile2="../../sounds/soprano-E4.wav", + window2="blackman", + M2=901, + N2=1024, + t2=-100, + minSineDur2=0.05, + minf02=250, + maxf02=500, + f0et2=10, + harmDevSlope2=0.01, +): + """ + Analyze two sounds with the harmonic plus stochastic model + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size + N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks + minSineDur: minimum duration of sinusoidal tracks + nH: maximum number of harmonics + minf0: minimum fundamental frequency in sound + maxf0: maximum fundamental frequency in sound + f0et: maximum error accepted in f0 detection algorithm + harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation + stocf: decimation factor used for the stochastic approximation + returns inputFile: input file name; fs: sampling rate of input file, + hfreq, hmag: harmonic frequencies, magnitude; stocEnv: stochastic residual + """ + + # size of fft used in synthesis + Ns = 512 + # hop size (has to be 1/4 of Ns) + H = 128 + # read input sounds + (fs1, x1) = UF.wavread(inputFile1) + (fs2, x2) = UF.wavread(inputFile2) + # compute analysis windows + w1 = get_window(window1, M1) + w2 = get_window(window2, M2) + # compute the harmonic plus stochastic models + hfreq1, hmag1, hphase1, stocEnv1 = HPS.hpsModelAnal( + x1, + fs1, + w1, + N1, + H, + t1, + nH, + minf01, + maxf01, + f0et1, + harmDevSlope1, + minSineDur1, + Ns, + stocf, + ) + hfreq2, hmag2, hphase2, stocEnv2 = HPS.hpsModelAnal( + x2, + fs2, + w2, + N2, + H, + t2, + nH, + minf02, + maxf02, + f0et2, + harmDevSlope2, + minSineDur2, + Ns, + stocf, + ) + + # create figure to plot + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 15000.0 + + # plot spectrogram stochastic component of sound 1 + plt.subplot(2, 1, 1) + numFrames = int(stocEnv1[:, 0].size) + sizeEnv = int(stocEnv1[0, :].size) + frmTime = H * np.arange(numFrames) / float(fs1) + binFreq = (0.5 * fs1) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs1)) / sizeEnv + plt.pcolormesh( + frmTime, + binFreq, + np.transpose(stocEnv1[:, : int(sizeEnv * maxplotfreq / (0.5 * fs1)) + 1]), + ) + plt.autoscale(tight=True) + + # plot harmonic on top of stochastic spectrogram of sound 1 + if hfreq1.shape[1] > 0: + harms = np.copy(hfreq1) + harms = harms * np.less(harms, maxplotfreq) + harms[harms == 0] = np.nan + numFrames = int(harms[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs1) + plt.plot(frmTime, harms, color="k", ms=3, alpha=1) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.autoscale(tight=True) + plt.title("harmonics + stochastic spectrogram of sound 1") + + # plot spectrogram stochastic component of sound 2 + plt.subplot(2, 1, 2) + numFrames = int(stocEnv2[:, 0].size) + sizeEnv = int(stocEnv2[0, :].size) + frmTime = H * np.arange(numFrames) / float(fs2) + binFreq = (0.5 * fs2) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs2)) / sizeEnv + plt.pcolormesh( + frmTime, + binFreq, + np.transpose(stocEnv2[:, : int(sizeEnv * maxplotfreq / (0.5 * fs2)) + 1]), + ) + plt.autoscale(tight=True) + + # plot harmonic on top of stochastic spectrogram of sound 2 + if hfreq2.shape[1] > 0: + harms = np.copy(hfreq2) + harms = harms * np.less(harms, maxplotfreq) + harms[harms == 0] = np.nan + numFrames = int(harms[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs2) + plt.plot(frmTime, harms, color="k", ms=3, alpha=1) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.autoscale(tight=True) + plt.title("harmonics + stochastic spectrogram of sound 2") + + plt.tight_layout() + plt.show(block=False) + + return inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2 + + +def transformation_synthesis( + inputFile1, + fs, + hfreq1, + hmag1, + stocEnv1, + inputFile2, + hfreq2, + hmag2, + stocEnv2, + hfreqIntp=np.array([0, 0, 0.1, 0, 0.9, 1, 1, 1]), + hmagIntp=np.array([0, 0, 0.1, 0, 0.9, 1, 1, 1]), + stocIntp=np.array([0, 0, 0.1, 0, 0.9, 1, 1, 1]), +): + """ + Transform the analysis values returned by the analysis function and synthesize the sound + inputFile1: name of input file 1 + fs: sampling rate of input file 1 + hfreq1, hmag1, stocEnv1: hps representation of sound 1 + inputFile2: name of input file 2 + hfreq2, hmag2, stocEnv2: hps representation of sound 2 + hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + """ + + # size of fft used in synthesis + Ns = 512 + # hop size (has to be 1/4 of Ns) + H = 128 + + # morph the two sounds + yhfreq, yhmag, ystocEnv = HPST.hpsMorph( + hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp + ) + + # synthesis + y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) + + # write output sound + outputFile = "output_sounds/" + os.path.basename(inputFile1)[:-4] + "_hpsMorph.wav" + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(12, 9)) + + # frequency range to plot + maxplotfreq = 15000.0 + + # plot spectrogram of transformed stochastic compoment + plt.subplot(2, 1, 1) + numFrames = int(ystocEnv[:, 0].size) + sizeEnv = int(ystocEnv[0, :].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv + plt.pcolormesh( + frmTime, + binFreq, + np.transpose(ystocEnv[:, : int(sizeEnv * maxplotfreq / (0.5 * fs)) + 1]), + ) + plt.autoscale(tight=True) + + # plot transformed harmonic on top of stochastic spectrogram + if yhfreq.shape[1] > 0: + harms = np.copy(yhfreq) + harms = harms * np.less(harms, maxplotfreq) + harms[harms == 0] = np.nan + numFrames = int(harms[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, harms, color="k", ms=3, alpha=1) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.autoscale(tight=True) + plt.title("harmonics + stochastic spectrogram") + + # plot the output sound + plt.subplot(2, 1, 2) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.show() if __name__ == "__main__": - # analysis - inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2 = analysis() + # analysis + inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2 = ( + analysis() + ) - # transformation and synthesis - transformation_synthesis (inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2) + # transformation and synthesis + transformation_synthesis( + inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2 + ) - plt.show() + plt.show() diff --git a/smstools/transformations/interface/hpsTransformations_GUI_frame.py b/smstools/transformations/interface/hpsTransformations_GUI_frame.py index f5e0b724..5751dd63 100644 --- a/smstools/transformations/interface/hpsTransformations_GUI_frame.py +++ b/smstools/transformations/interface/hpsTransformations_GUI_frame.py @@ -8,244 +8,346 @@ from smstools.transformations.interface import hpsTransformations_function as hT from smstools.models import utilFunctions as UF + class HpsTransformations_frame: - def __init__(self, parent): - - self.parent = parent - self.initUI() - - def initUI(self): - - choose_label = "inputFile:" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) - - #TEXTBOX TO PRINT PATH OF THE SOUND FILE - self.filelocation = Entry(self.parent) - self.filelocation.focus_set() - self.filelocation["width"] = 32 - self.filelocation.grid(row=0,column=0, sticky=W, padx=(70, 5), pady=(10,2)) - self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/sax-phrase-short.wav') - - #BUTTON TO BROWSE SOUND FILE - open_file = Button(self.parent, text="...", command=self.browse_file) #see: def browse_file(self) - open_file.grid(row=0, column=0, sticky=W, padx=(340, 6), pady=(10,2)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE - preview = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation.get())) - preview.grid(row=0, column=0, sticky=W, padx=(385,6), pady=(10,2)) - - ## HPS TRANSFORMATIONS ANALYSIS - - #ANALYSIS WINDOW TYPE - wtype_label = "window:" - Label(self.parent, text=wtype_label).grid(row=1, column=0, sticky=W, padx=5, pady=(10,2)) - self.w_type = StringVar() - self.w_type.set("blackman") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") - window_option.grid(row=1, column=0, sticky=W, padx=(65,5), pady=(10,2)) - - #WINDOW SIZE - M_label = "M:" - Label(self.parent, text=M_label).grid(row=1, column=0, sticky=W, padx=(180, 5), pady=(10,2)) - self.M = Entry(self.parent, justify=CENTER) - self.M["width"] = 5 - self.M.grid(row=1,column=0, sticky=W, padx=(200,5), pady=(10,2)) - self.M.delete(0, END) - self.M.insert(0, "601") - - #FFT SIZE - N_label = "N:" - Label(self.parent, text=N_label).grid(row=1, column=0, sticky=W, padx=(255, 5), pady=(10,2)) - self.N = Entry(self.parent, justify=CENTER) - self.N["width"] = 5 - self.N.grid(row=1,column=0, sticky=W, padx=(275,5), pady=(10,2)) - self.N.delete(0, END) - self.N.insert(0, "1024") - - #THRESHOLD MAGNITUDE - t_label = "t:" - Label(self.parent, text=t_label).grid(row=1, column=0, sticky=W, padx=(330,5), pady=(10,2)) - self.t = Entry(self.parent, justify=CENTER) - self.t["width"] = 5 - self.t.grid(row=1, column=0, sticky=W, padx=(348,5), pady=(10,2)) - self.t.delete(0, END) - self.t.insert(0, "-100") - - #MIN DURATION SINUSOIDAL TRACKS - minSineDur_label = "minSineDur:" - Label(self.parent, text=minSineDur_label).grid(row=2, column=0, sticky=W, padx=(5, 5), pady=(10,2)) - self.minSineDur = Entry(self.parent, justify=CENTER) - self.minSineDur["width"] = 5 - self.minSineDur.grid(row=2, column=0, sticky=W, padx=(87,5), pady=(10,2)) - self.minSineDur.delete(0, END) - self.minSineDur.insert(0, "0.1") - - #MAX NUMBER OF HARMONICS - nH_label = "nH:" - Label(self.parent, text=nH_label).grid(row=2, column=0, sticky=W, padx=(145,5), pady=(10,2)) - self.nH = Entry(self.parent, justify=CENTER) - self.nH["width"] = 5 - self.nH.grid(row=2, column=0, sticky=W, padx=(172,5), pady=(10,2)) - self.nH.delete(0, END) - self.nH.insert(0, "100") - - #MIN FUNDAMENTAL FREQUENCY - minf0_label = "minf0:" - Label(self.parent, text=minf0_label).grid(row=2, column=0, sticky=W, padx=(227,5), pady=(10,2)) - self.minf0 = Entry(self.parent, justify=CENTER) - self.minf0["width"] = 5 - self.minf0.grid(row=2, column=0, sticky=W, padx=(275,5), pady=(10,2)) - self.minf0.delete(0, END) - self.minf0.insert(0, "350") - - #MAX FUNDAMENTAL FREQUENCY - maxf0_label = "maxf0:" - Label(self.parent, text=maxf0_label).grid(row=2, column=0, sticky=W, padx=(330,5), pady=(10,2)) - self.maxf0 = Entry(self.parent, justify=CENTER) - self.maxf0["width"] = 5 - self.maxf0.grid(row=2, column=0, sticky=W, padx=(380,5), pady=(10,2)) - self.maxf0.delete(0, END) - self.maxf0.insert(0, "700") - - #MAX ERROR ACCEPTED - f0et_label = "f0et:" - Label(self.parent, text=f0et_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10,2)) - self.f0et = Entry(self.parent, justify=CENTER) - self.f0et["width"] = 3 - self.f0et.grid(row=3, column=0, sticky=W, padx=(42,5), pady=(10,2)) - self.f0et.delete(0, END) - self.f0et.insert(0, "7") - - #ALLOWED DEVIATION OF HARMONIC TRACKS - harmDevSlope_label = "harmDevSlope:" - Label(self.parent, text=harmDevSlope_label).grid(row=3, column=0, sticky=W, padx=(90,5), pady=(10,2)) - self.harmDevSlope = Entry(self.parent, justify=CENTER) - self.harmDevSlope["width"] = 5 - self.harmDevSlope.grid(row=3, column=0, sticky=W, padx=(190,5), pady=(10,2)) - self.harmDevSlope.delete(0, END) - self.harmDevSlope.insert(0, "0.01") - - #DECIMATION FACTOR - stocf_label = "stocf:" - Label(self.parent, text=stocf_label).grid(row=3, column=0, sticky=W, padx=(250,5), pady=(10,2)) - self.stocf = Entry(self.parent, justify=CENTER) - self.stocf["width"] = 5 - self.stocf.grid(row=3, column=0, sticky=W, padx=(290,5), pady=(10,2)) - self.stocf.delete(0, END) - self.stocf.insert(0, "0.1") - - #BUTTON TO DO THE ANALYSIS OF THE SOUND - self.compute = Button(self.parent, text="Analysis/Synthesis", command=self.analysis) - self.compute.grid(row=4, column=0, padx=5, pady=(10,5), sticky=W) - - #BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_hpsModel.wav')) - self.output.grid(row=4, column=0, padx=(145,5), pady=(10,5), sticky=W) - - ### - #SEPARATION LINE - Frame(self.parent,height=1,width=50,bg="black").grid(row=5, pady=5, sticky=W+E) - ### - - #FREQUENCY SCALING FACTORS - freqScaling_label = "Frequency scaling factors (time, value pairs):" - Label(self.parent, text=freqScaling_label).grid(row=6, column=0, sticky=W, padx=5, pady=(5,2)) - self.freqScaling = Entry(self.parent, justify=CENTER) - self.freqScaling["width"] = 35 - self.freqScaling.grid(row=7, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.freqScaling.delete(0, END) - self.freqScaling.insert(0, "[0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]") - - #FREQUENCY STRETCHING FACTORS - freqStretching_label = "Frequency stretching factors (time, value pairs):" - Label(self.parent, text=freqStretching_label).grid(row=8, column=0, sticky=W, padx=5, pady=(5,2)) - self.freqStretching = Entry(self.parent, justify=CENTER) - self.freqStretching["width"] = 35 - self.freqStretching.grid(row=9, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.freqStretching.delete(0, END) - self.freqStretching.insert(0, "[0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]") - - #TIMBRE PRESERVATION - timbrePreservation_label = "Timbre preservation (1 preserves original timbre, 0 it does not):" - Label(self.parent, text=timbrePreservation_label).grid(row=10, column=0, sticky=W, padx=5, pady=(5,2)) - self.timbrePreservation = Entry(self.parent, justify=CENTER) - self.timbrePreservation["width"] = 2 - self.timbrePreservation.grid(row=10, column=0, sticky=W+E, padx=(395,5), pady=(5,2)) - self.timbrePreservation.delete(0, END) - self.timbrePreservation.insert(0, "1") - - #TIME SCALING FACTORS - timeScaling_label = "Time scaling factors (time, value pairs):" - Label(self.parent, text=timeScaling_label).grid(row=11, column=0, sticky=W, padx=5, pady=(5,2)) - self.timeScaling = Entry(self.parent, justify=CENTER) - self.timeScaling["width"] = 35 - self.timeScaling.grid(row=12, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.timeScaling.delete(0, END) - self.timeScaling.insert(0, "[0, 0, 2.138, 2.138-1.0, 3.146, 3.146]") - - #BUTTON TO DO THE SYNTHESIS - self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis) - self.compute.grid(row=13, column=0, padx=5, pady=(10,15), sticky=W) - - #BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT - self.transf_output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_hpsModelTransformation.wav')) - self.transf_output.grid(row=13, column=0, padx=(165,5), pady=(10,15), sticky=W) - - # define options for opening file - self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' - - def browse_file(self): - - self.filename = filedialog.askopenfilename(**self.file_opt) - - #set the text of the self.filelocation - self.filelocation.delete(0, END) - self.filelocation.insert(0,self.filename) - - def analysis(self): - - try: - inputFile = self.filelocation.get() - window = self.w_type.get() - M = int(self.M.get()) - N = int(self.N.get()) - t = int(self.t.get()) - minSineDur = float(self.minSineDur.get()) - nH = int(self.nH.get()) - minf0 = int(self.minf0.get()) - maxf0 = int(self.maxf0.get()) - f0et = int(self.f0et.get()) - harmDevSlope = float(self.harmDevSlope.get()) - stocf = float(self.stocf.get()) - - self.inputFile, self.fs, self.hfreq, self.hmag, self.mYst = hT.analysis(inputFile, window, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope, stocf) - - except ValueError: - messagebox.showerror("Input values error", "Some parameters are incorrect") - - def transformation_synthesis(self): - - try: - inputFile = self.inputFile - fs = self.fs - hfreq = self.hfreq - hmag = self.hmag - mYst = self.mYst - freqScaling = np.array(eval(self.freqScaling.get())) - freqStretching = np.array(eval(self.freqStretching.get())) - timbrePreservation = int(self.timbrePreservation.get()) - timeScaling = np.array(eval(self.timeScaling.get())) - - hT.transformation_synthesis(inputFile, fs, hfreq, hmag, mYst, freqScaling, freqStretching, timbrePreservation, timeScaling) - - except ValueError as errorMessage: - messagebox.showerror("Input values error", errorMessage) - - except AttributeError: - messagebox.showerror("Analysis not computed", "First you must analyse the sound!") + def __init__(self, parent): + + self.parent = parent + self.initUI() + + def initUI(self): + + choose_label = "inputFile:" + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) + + # TEXTBOX TO PRINT PATH OF THE SOUND FILE + self.filelocation = Entry(self.parent) + self.filelocation.focus_set() + self.filelocation["width"] = 32 + self.filelocation.grid(row=0, column=0, sticky=W, padx=(70, 5), pady=(10, 2)) + self.filelocation.delete(0, END) + self.filelocation.insert(0, "../../sounds/sax-phrase-short.wav") + + # BUTTON TO BROWSE SOUND FILE + open_file = Button( + self.parent, text="...", command=self.browse_file + ) # see: def browse_file(self) + open_file.grid( + row=0, column=0, sticky=W, padx=(340, 6), pady=(10, 2) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE + preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) + preview.grid(row=0, column=0, sticky=W, padx=(385, 6), pady=(10, 2)) + + ## HPS TRANSFORMATIONS ANALYSIS + + # ANALYSIS WINDOW TYPE + wtype_label = "window:" + Label(self.parent, text=wtype_label).grid( + row=1, column=0, sticky=W, padx=5, pady=(10, 2) + ) + self.w_type = StringVar() + self.w_type.set("blackman") # initial value + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hanning", + "hamming", + "blackman", + "blackmanharris", + ) + window_option.grid(row=1, column=0, sticky=W, padx=(65, 5), pady=(10, 2)) + + # WINDOW SIZE + M_label = "M:" + Label(self.parent, text=M_label).grid( + row=1, column=0, sticky=W, padx=(180, 5), pady=(10, 2) + ) + self.M = Entry(self.parent, justify=CENTER) + self.M["width"] = 5 + self.M.grid(row=1, column=0, sticky=W, padx=(200, 5), pady=(10, 2)) + self.M.delete(0, END) + self.M.insert(0, "601") + + # FFT SIZE + N_label = "N:" + Label(self.parent, text=N_label).grid( + row=1, column=0, sticky=W, padx=(255, 5), pady=(10, 2) + ) + self.N = Entry(self.parent, justify=CENTER) + self.N["width"] = 5 + self.N.grid(row=1, column=0, sticky=W, padx=(275, 5), pady=(10, 2)) + self.N.delete(0, END) + self.N.insert(0, "1024") + + # THRESHOLD MAGNITUDE + t_label = "t:" + Label(self.parent, text=t_label).grid( + row=1, column=0, sticky=W, padx=(330, 5), pady=(10, 2) + ) + self.t = Entry(self.parent, justify=CENTER) + self.t["width"] = 5 + self.t.grid(row=1, column=0, sticky=W, padx=(348, 5), pady=(10, 2)) + self.t.delete(0, END) + self.t.insert(0, "-100") + + # MIN DURATION SINUSOIDAL TRACKS + minSineDur_label = "minSineDur:" + Label(self.parent, text=minSineDur_label).grid( + row=2, column=0, sticky=W, padx=(5, 5), pady=(10, 2) + ) + self.minSineDur = Entry(self.parent, justify=CENTER) + self.minSineDur["width"] = 5 + self.minSineDur.grid(row=2, column=0, sticky=W, padx=(87, 5), pady=(10, 2)) + self.minSineDur.delete(0, END) + self.minSineDur.insert(0, "0.1") + + # MAX NUMBER OF HARMONICS + nH_label = "nH:" + Label(self.parent, text=nH_label).grid( + row=2, column=0, sticky=W, padx=(145, 5), pady=(10, 2) + ) + self.nH = Entry(self.parent, justify=CENTER) + self.nH["width"] = 5 + self.nH.grid(row=2, column=0, sticky=W, padx=(172, 5), pady=(10, 2)) + self.nH.delete(0, END) + self.nH.insert(0, "100") + + # MIN FUNDAMENTAL FREQUENCY + minf0_label = "minf0:" + Label(self.parent, text=minf0_label).grid( + row=2, column=0, sticky=W, padx=(227, 5), pady=(10, 2) + ) + self.minf0 = Entry(self.parent, justify=CENTER) + self.minf0["width"] = 5 + self.minf0.grid(row=2, column=0, sticky=W, padx=(275, 5), pady=(10, 2)) + self.minf0.delete(0, END) + self.minf0.insert(0, "350") + + # MAX FUNDAMENTAL FREQUENCY + maxf0_label = "maxf0:" + Label(self.parent, text=maxf0_label).grid( + row=2, column=0, sticky=W, padx=(330, 5), pady=(10, 2) + ) + self.maxf0 = Entry(self.parent, justify=CENTER) + self.maxf0["width"] = 5 + self.maxf0.grid(row=2, column=0, sticky=W, padx=(380, 5), pady=(10, 2)) + self.maxf0.delete(0, END) + self.maxf0.insert(0, "700") + + # MAX ERROR ACCEPTED + f0et_label = "f0et:" + Label(self.parent, text=f0et_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(10, 2) + ) + self.f0et = Entry(self.parent, justify=CENTER) + self.f0et["width"] = 3 + self.f0et.grid(row=3, column=0, sticky=W, padx=(42, 5), pady=(10, 2)) + self.f0et.delete(0, END) + self.f0et.insert(0, "7") + + # ALLOWED DEVIATION OF HARMONIC TRACKS + harmDevSlope_label = "harmDevSlope:" + Label(self.parent, text=harmDevSlope_label).grid( + row=3, column=0, sticky=W, padx=(90, 5), pady=(10, 2) + ) + self.harmDevSlope = Entry(self.parent, justify=CENTER) + self.harmDevSlope["width"] = 5 + self.harmDevSlope.grid(row=3, column=0, sticky=W, padx=(190, 5), pady=(10, 2)) + self.harmDevSlope.delete(0, END) + self.harmDevSlope.insert(0, "0.01") + + # DECIMATION FACTOR + stocf_label = "stocf:" + Label(self.parent, text=stocf_label).grid( + row=3, column=0, sticky=W, padx=(250, 5), pady=(10, 2) + ) + self.stocf = Entry(self.parent, justify=CENTER) + self.stocf["width"] = 5 + self.stocf.grid(row=3, column=0, sticky=W, padx=(290, 5), pady=(10, 2)) + self.stocf.delete(0, END) + self.stocf.insert(0, "0.1") + + # BUTTON TO DO THE ANALYSIS OF THE SOUND + self.compute = Button( + self.parent, text="Analysis/Synthesis", command=self.analysis + ) + self.compute.grid(row=4, column=0, padx=5, pady=(10, 5), sticky=W) + + # BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_hpsModel.wav" + ), + ) + self.output.grid(row=4, column=0, padx=(145, 5), pady=(10, 5), sticky=W) + + ### + # SEPARATION LINE + Frame(self.parent, height=1, width=50, bg="black").grid( + row=5, pady=5, sticky=W + E + ) + ### + + # FREQUENCY SCALING FACTORS + freqScaling_label = "Frequency scaling factors (time, value pairs):" + Label(self.parent, text=freqScaling_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.freqScaling = Entry(self.parent, justify=CENTER) + self.freqScaling["width"] = 35 + self.freqScaling.grid(row=7, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.freqScaling.delete(0, END) + self.freqScaling.insert(0, "[0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]") + + # FREQUENCY STRETCHING FACTORS + freqStretching_label = "Frequency stretching factors (time, value pairs):" + Label(self.parent, text=freqStretching_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.freqStretching = Entry(self.parent, justify=CENTER) + self.freqStretching["width"] = 35 + self.freqStretching.grid(row=9, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.freqStretching.delete(0, END) + self.freqStretching.insert(0, "[0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]") + + # TIMBRE PRESERVATION + timbrePreservation_label = ( + "Timbre preservation (1 preserves original timbre, 0 it does not):" + ) + Label(self.parent, text=timbrePreservation_label).grid( + row=10, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.timbrePreservation = Entry(self.parent, justify=CENTER) + self.timbrePreservation["width"] = 2 + self.timbrePreservation.grid( + row=10, column=0, sticky=W + E, padx=(395, 5), pady=(5, 2) + ) + self.timbrePreservation.delete(0, END) + self.timbrePreservation.insert(0, "1") + + # TIME SCALING FACTORS + timeScaling_label = "Time scaling factors (time, value pairs):" + Label(self.parent, text=timeScaling_label).grid( + row=11, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.timeScaling = Entry(self.parent, justify=CENTER) + self.timeScaling["width"] = 35 + self.timeScaling.grid(row=12, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.timeScaling.delete(0, END) + self.timeScaling.insert(0, "[0, 0, 2.138, 2.138-1.0, 3.146, 3.146]") + + # BUTTON TO DO THE SYNTHESIS + self.compute = Button( + self.parent, + text="Apply Transformation", + command=self.transformation_synthesis, + ) + self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W) + + # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT + self.transf_output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_hpsModelTransformation.wav" + ), + ) + self.transf_output.grid( + row=13, column=0, padx=(165, 5), pady=(10, 15), sticky=W + ) + + # define options for opening file + self.file_opt = options = {} + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" + + def browse_file(self): + + self.filename = filedialog.askopenfilename(**self.file_opt) + + # set the text of the self.filelocation + self.filelocation.delete(0, END) + self.filelocation.insert(0, self.filename) + + def analysis(self): + + try: + inputFile = self.filelocation.get() + window = self.w_type.get() + M = int(self.M.get()) + N = int(self.N.get()) + t = int(self.t.get()) + minSineDur = float(self.minSineDur.get()) + nH = int(self.nH.get()) + minf0 = int(self.minf0.get()) + maxf0 = int(self.maxf0.get()) + f0et = int(self.f0et.get()) + harmDevSlope = float(self.harmDevSlope.get()) + stocf = float(self.stocf.get()) + + self.inputFile, self.fs, self.hfreq, self.hmag, self.mYst = hT.analysis( + inputFile, + window, + M, + N, + t, + minSineDur, + nH, + minf0, + maxf0, + f0et, + harmDevSlope, + stocf, + ) + + except ValueError: + messagebox.showerror("Input values error", "Some parameters are incorrect") + + def transformation_synthesis(self): + + try: + inputFile = self.inputFile + fs = self.fs + hfreq = self.hfreq + hmag = self.hmag + mYst = self.mYst + freqScaling = np.array(eval(self.freqScaling.get())) + freqStretching = np.array(eval(self.freqStretching.get())) + timbrePreservation = int(self.timbrePreservation.get()) + timeScaling = np.array(eval(self.timeScaling.get())) + + hT.transformation_synthesis( + inputFile, + fs, + hfreq, + hmag, + mYst, + freqScaling, + freqStretching, + timbrePreservation, + timeScaling, + ) + + except ValueError as errorMessage: + messagebox.showerror("Input values error", errorMessage) + + except AttributeError: + messagebox.showerror( + "Analysis not computed", "First you must analyse the sound!" + ) diff --git a/smstools/transformations/interface/hpsTransformations_function.py b/smstools/transformations/interface/hpsTransformations_function.py index 72b15125..0165d587 100644 --- a/smstools/transformations/interface/hpsTransformations_function.py +++ b/smstools/transformations/interface/hpsTransformations_function.py @@ -9,175 +9,213 @@ from smstools.transformations import harmonicTransformations as HT from smstools.models import utilFunctions as UF -def analysis(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, - minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1): - """ - Analyze a sound with the harmonic plus stochastic model - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size - N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks - minSineDur: minimum duration of sinusoidal tracks - nH: maximum number of harmonics - minf0: minimum fundamental frequency in sound - maxf0: maximum fundamental frequency in sound - f0et: maximum error accepted in f0 detection algorithm - harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation - stocf: decimation factor used for the stochastic approximation - returns inputFile: input file name; fs: sampling rate of input file, - hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # compute the harmonic plus stochastic model of the whole sound - hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) - - # synthesize the harmonic plus stochastic model without original phases - y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs) - - # write output sound - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav' - UF.wavwrite(y,fs, outputFile) - - # create figure to plot - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 15000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot spectrogram stochastic compoment - plt.subplot(3,1,2) - numFrames = int(mYst[:,0].size) - sizeEnv = int(mYst[0,:].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv - plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:,:int(sizeEnv*maxplotfreq/(.5*fs))+1])) - plt.autoscale(tight=True) - - # plot harmonic on top of stochastic spectrogram - if (hfreq.shape[1] > 0): - harms = hfreq*np.less(hfreq,maxplotfreq) - harms[harms==0] = np.nan - numFrames = int(harms[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, harms, color='k', ms=3, alpha=1) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.autoscale(tight=True) - plt.title('harmonics + stochastic spectrogram') - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.show(block=False) - - return inputFile, fs, hfreq, hmag, mYst - - -def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst, freqScaling = np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), - freqStretching = np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation = 1, - timeScaling = np.array([0, 0, 2.138, 2.138-1.0, 3.146, 3.146])): - """ - transform the analysis values returned by the analysis function and synthesize the sound - inputFile: name of input file - fs: sampling rate of input file - hfreq, hmag: harmonic frequencies and magnitudes - mYst: stochastic residual - freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling) - freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching) - timbrePreservation: 1 preserves original timbre, 0 it does not - timeScaling: time scaling factors, in time-value pairs - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # frequency scaling of the harmonics - hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) - - # time scaling the sound - yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreqt, hmagt, mYst, timeScaling) - - # synthesis from the trasformed hps representation - y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) - - # write output sound - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModelTransformation.wav' - UF.wavwrite(y,fs, outputFile) - - # create figure to plot - plt.figure(figsize=(12, 6)) - - # frequency range to plot - maxplotfreq = 15000.0 - - # plot spectrogram of transformed stochastic compoment - plt.subplot(2,1,1) - numFrames = int(ystocEnv[:,0].size) - sizeEnv = int(ystocEnv[0,:].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv - plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:int(sizeEnv*maxplotfreq/(.5*fs))+1])) - plt.autoscale(tight=True) - - # plot transformed harmonic on top of stochastic spectrogram - if (yhfreq.shape[1] > 0): - harms = yhfreq*np.less(yhfreq,maxplotfreq) - harms[harms==0] = np.nan - numFrames = int(harms[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, harms, color='k', ms=3, alpha=1) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.autoscale(tight=True) - plt.title('harmonics + stochastic spectrogram') - - # plot the output sound - plt.subplot(2,1,2) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.show() + +def analysis( + inputFile="../../sounds/sax-phrase-short.wav", + window="blackman", + M=601, + N=1024, + t=-100, + minSineDur=0.1, + nH=100, + minf0=350, + maxf0=700, + f0et=5, + harmDevSlope=0.01, + stocf=0.1, +): + """ + Analyze a sound with the harmonic plus stochastic model + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size + N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks + minSineDur: minimum duration of sinusoidal tracks + nH: maximum number of harmonics + minf0: minimum fundamental frequency in sound + maxf0: maximum fundamental frequency in sound + f0et: maximum error accepted in f0 detection algorithm + harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation + stocf: decimation factor used for the stochastic approximation + returns inputFile: input file name; fs: sampling rate of input file, + hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # compute the harmonic plus stochastic model of the whole sound + hfreq, hmag, hphase, mYst = HPS.hpsModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf + ) + + # synthesize the harmonic plus stochastic model without original phases + y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs) + + # write output sound + outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel.wav" + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 15000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot spectrogram stochastic compoment + plt.subplot(3, 1, 2) + numFrames = int(mYst[:, 0].size) + sizeEnv = int(mYst[0, :].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv + plt.pcolormesh( + frmTime, + binFreq, + np.transpose(mYst[:, : int(sizeEnv * maxplotfreq / (0.5 * fs)) + 1]), + ) + plt.autoscale(tight=True) + + # plot harmonic on top of stochastic spectrogram + if hfreq.shape[1] > 0: + harms = hfreq * np.less(hfreq, maxplotfreq) + harms[harms == 0] = np.nan + numFrames = int(harms[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, harms, color="k", ms=3, alpha=1) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.autoscale(tight=True) + plt.title("harmonics + stochastic spectrogram") + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.show(block=False) + + return inputFile, fs, hfreq, hmag, mYst + + +def transformation_synthesis( + inputFile, + fs, + hfreq, + hmag, + mYst, + freqScaling=np.array([0, 1.2, 2.01, 1.2, 2.679, 0.7, 3.146, 0.7]), + freqStretching=np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), + timbrePreservation=1, + timeScaling=np.array([0, 0, 2.138, 2.138 - 1.0, 3.146, 3.146]), +): + """ + transform the analysis values returned by the analysis function and synthesize the sound + inputFile: name of input file + fs: sampling rate of input file + hfreq, hmag: harmonic frequencies and magnitudes + mYst: stochastic residual + freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling) + freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching) + timbrePreservation: 1 preserves original timbre, 0 it does not + timeScaling: time scaling factors, in time-value pairs + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # frequency scaling of the harmonics + hfreqt, hmagt = HT.harmonicFreqScaling( + hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs + ) + + # time scaling the sound + yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreqt, hmagt, mYst, timeScaling) + + # synthesis from the trasformed hps representation + y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) + + # write output sound + outputFile = ( + "output_sounds/" + + os.path.basename(inputFile)[:-4] + + "_hpsModelTransformation.wav" + ) + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(12, 6)) + + # frequency range to plot + maxplotfreq = 15000.0 + + # plot spectrogram of transformed stochastic compoment + plt.subplot(2, 1, 1) + numFrames = int(ystocEnv[:, 0].size) + sizeEnv = int(ystocEnv[0, :].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv + plt.pcolormesh( + frmTime, + binFreq, + np.transpose(ystocEnv[:, : int(sizeEnv * maxplotfreq / (0.5 * fs)) + 1]), + ) + plt.autoscale(tight=True) + + # plot transformed harmonic on top of stochastic spectrogram + if yhfreq.shape[1] > 0: + harms = yhfreq * np.less(yhfreq, maxplotfreq) + harms[harms == 0] = np.nan + numFrames = int(harms[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, harms, color="k", ms=3, alpha=1) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.autoscale(tight=True) + plt.title("harmonics + stochastic spectrogram") + + # plot the output sound + plt.subplot(2, 1, 2) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.show() + if __name__ == "__main__": - # analysis - inputFile, fs, hfreq, hmag, mYst = analysis() + # analysis + inputFile, fs, hfreq, hmag, mYst = analysis() - # transformation and synthesis - transformation_synthesis(inputFile, fs, hfreq, hmag, mYst) + # transformation and synthesis + transformation_synthesis(inputFile, fs, hfreq, hmag, mYst) - plt.show() + plt.show() diff --git a/smstools/transformations/interface/notebook.py b/smstools/transformations/interface/notebook.py index f9530d73..11523b86 100644 --- a/smstools/transformations/interface/notebook.py +++ b/smstools/transformations/interface/notebook.py @@ -1,18 +1,20 @@ try: # for Python2 - from Tkinter import * # notice capitalized T in Tkinter + from Tkinter import * # notice capitalized T in Tkinter except ImportError: # for Python3 - from tkinter import * # notice lowercase 't' in tkinter here + from tkinter import * # notice lowercase 't' in tkinter here -class notebook(object): + +class notebook(object): def __init__(self, master, side=LEFT): self.active_fr = None self.count = 0 self.choice = IntVar() if side in (TOP, BOTTOM): self.side = LEFT - else: self.side = TOP + else: + self.side = TOP self.rb_fr = Frame(master, borderwidth=2, relief=GROOVE) self.rb_fr.pack(side=side, fill=BOTH) self.screen_fr = Frame(master, borderwidth=2, relief=FLAT) @@ -22,7 +24,14 @@ def __call__(self): return self.screen_fr def add_screen(self, fr, title): - b = Radiobutton(self.rb_fr, text=title, indicatoron=0, variable=self.choice, value=self.count, command=lambda: self.display(fr)) + b = Radiobutton( + self.rb_fr, + text=title, + indicatoron=0, + variable=self.choice, + value=self.count, + command=lambda: self.display(fr), + ) b.pack(fill=BOTH, side=self.side) if not self.active_fr: fr.pack(fill=BOTH, expand=1) @@ -30,6 +39,6 @@ def add_screen(self, fr, title): self.count += 1 def display(self, fr): - self.active_fr.forget( ) + self.active_fr.forget() fr.pack(fill=BOTH, expand=1) self.active_fr = fr diff --git a/smstools/transformations/interface/sineTransformations_GUI_frame.py b/smstools/transformations/interface/sineTransformations_GUI_frame.py index 25064520..79fcfda2 100644 --- a/smstools/transformations/interface/sineTransformations_GUI_frame.py +++ b/smstools/transformations/interface/sineTransformations_GUI_frame.py @@ -8,193 +8,270 @@ from smstools.transformations.interface import sineTransformations_function as sT from smstools.models import utilFunctions as UF + class SineTransformations_frame: - def __init__(self, parent): - - self.parent = parent - self.initUI() - - def initUI(self): - - choose_label = "inputFile:" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) - - #TEXTBOX TO PRINT PATH OF THE SOUND FILE - self.filelocation = Entry(self.parent) - self.filelocation.focus_set() - self.filelocation["width"] = 32 - self.filelocation.grid(row=0,column=0, sticky=W, padx=(70, 5), pady=(10,2)) - self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/mridangam.wav') - - #BUTTON TO BROWSE SOUND FILE - open_file = Button(self.parent, text="...", command=self.browse_file) #see: def browse_file(self) - open_file.grid(row=0, column=0, sticky=W, padx=(340, 6), pady=(10,2)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE - preview = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation.get())) - preview.grid(row=0, column=0, sticky=W, padx=(385,6), pady=(10,2)) - - ## SINE TRANSFORMATIONS ANALYSIS - - #ANALYSIS WINDOW TYPE - wtype_label = "window:" - Label(self.parent, text=wtype_label).grid(row=1, column=0, sticky=W, padx=5, pady=(10,2)) - self.w_type = StringVar() - self.w_type.set("hamming") # initial value - window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") - window_option.grid(row=1, column=0, sticky=W, padx=(65,5), pady=(10,2)) - - #WINDOW SIZE - M_label = "M:" - Label(self.parent, text=M_label).grid(row=1, column=0, sticky=W, padx=(180, 5), pady=(10,2)) - self.M = Entry(self.parent, justify=CENTER) - self.M["width"] = 5 - self.M.grid(row=1,column=0, sticky=W, padx=(200,5), pady=(10,2)) - self.M.delete(0, END) - self.M.insert(0, "801") - - #FFT SIZE - N_label = "N:" - Label(self.parent, text=N_label).grid(row=1, column=0, sticky=W, padx=(255, 5), pady=(10,2)) - self.N = Entry(self.parent, justify=CENTER) - self.N["width"] = 5 - self.N.grid(row=1,column=0, sticky=W, padx=(275,5), pady=(10,2)) - self.N.delete(0, END) - self.N.insert(0, "2048") - - #THRESHOLD MAGNITUDE - t_label = "t:" - Label(self.parent, text=t_label).grid(row=1, column=0, sticky=W, padx=(330,5), pady=(10,2)) - self.t = Entry(self.parent, justify=CENTER) - self.t["width"] = 5 - self.t.grid(row=1, column=0, sticky=W, padx=(348,5), pady=(10,2)) - self.t.delete(0, END) - self.t.insert(0, "-90") - - #MIN DURATION SINUSOIDAL TRACKS - minSineDur_label = "minSineDur:" - Label(self.parent, text=minSineDur_label).grid(row=2, column=0, sticky=W, padx=(5, 5), pady=(10,2)) - self.minSineDur = Entry(self.parent, justify=CENTER) - self.minSineDur["width"] = 5 - self.minSineDur.grid(row=2, column=0, sticky=W, padx=(87,5), pady=(10,2)) - self.minSineDur.delete(0, END) - self.minSineDur.insert(0, "0.01") - - #MAX NUMBER OF SINES - maxnSines_label = "maxnSines:" - Label(self.parent, text=maxnSines_label).grid(row=2, column=0, sticky=W, padx=(145,5), pady=(10,2)) - self.maxnSines = Entry(self.parent, justify=CENTER) - self.maxnSines["width"] = 5 - self.maxnSines.grid(row=2, column=0, sticky=W, padx=(220,5), pady=(10,2)) - self.maxnSines.delete(0, END) - self.maxnSines.insert(0, "150") - - #FREQUENCY DEVIATION ALLOWED - freqDevOffset_label = "freqDevOffset:" - Label(self.parent, text=freqDevOffset_label).grid(row=2, column=0, sticky=W, padx=(280,5), pady=(10,2)) - self.freqDevOffset = Entry(self.parent, justify=CENTER) - self.freqDevOffset["width"] = 5 - self.freqDevOffset.grid(row=2, column=0, sticky=W, padx=(372,5), pady=(10,2)) - self.freqDevOffset.delete(0, END) - self.freqDevOffset.insert(0, "20") - - #SLOPE OF THE FREQUENCY DEVIATION - freqDevSlope_label = "freqDevSlope:" - Label(self.parent, text=freqDevSlope_label).grid(row=3, column=0, sticky=W, padx=(5,5), pady=(10,2)) - self.freqDevSlope = Entry(self.parent, justify=CENTER) - self.freqDevSlope["width"] = 5 - self.freqDevSlope.grid(row=3, column=0, sticky=W, padx=(98,5), pady=(10,2)) - self.freqDevSlope.delete(0, END) - self.freqDevSlope.insert(0, "0.02") - - #BUTTON TO DO THE ANALYSIS OF THE SOUND - self.compute = Button(self.parent, text="Analysis/Synthesis", command=self.analysis) - self.compute.grid(row=4, column=0, padx=5, pady=(10,5), sticky=W) - - #BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT - self.output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_sineModel.wav')) - self.output.grid(row=4, column=0, padx=(145,5), pady=(10,5), sticky=W) - - ### - #SEPARATION LINE - Frame(self.parent,height=1,width=50,bg="black").grid(row=5, pady=5, sticky=W+E) - ### - - #FREQUENCY SCALING FACTORS - freqScaling_label = "Frequency scaling factors (time, value pairs):" - Label(self.parent, text=freqScaling_label).grid(row=6, column=0, sticky=W, padx=5, pady=(5,2)) - self.freqScaling = Entry(self.parent, justify=CENTER) - self.freqScaling["width"] = 35 - self.freqScaling.grid(row=7, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.freqScaling.delete(0, END) - self.freqScaling.insert(0, "[0, 2.0, 1, .3]") - - #TIME SCALING FACTORS - timeScaling_label = "Time scaling factors (in time, value pairs):" - Label(self.parent, text=timeScaling_label).grid(row=8, column=0, sticky=W, padx=5, pady=(5,2)) - self.timeScaling = Entry(self.parent, justify=CENTER) - self.timeScaling["width"] = 35 - self.timeScaling.grid(row=9, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.timeScaling.delete(0, END) - self.timeScaling.insert(0, "[0, .0, .671, .671, 1.978, 1.978+1.0]") - - #BUTTON TO DO THE SYNTHESIS - self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis) - self.compute.grid(row=13, column=0, padx=5, pady=(10,15), sticky=W) - - #BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT - self.transf_output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_sineModelTransformation.wav')) - self.transf_output.grid(row=13, column=0, padx=(165,5), pady=(10,15), sticky=W) - - # define options for opening file - self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' - - def browse_file(self): - - self.filename = filedialog.askopenfilename(**self.file_opt) - - #set the text of the self.filelocation - self.filelocation.delete(0, END) - self.filelocation.insert(0,self.filename) - - def analysis(self): - - try: - inputFile = self.filelocation.get() - window = self.w_type.get() - M = int(self.M.get()) - N = int(self.N.get()) - t = int(self.t.get()) - minSineDur = float(self.minSineDur.get()) - maxnSines = int(self.maxnSines.get()) - freqDevOffset = int(self.freqDevOffset.get()) - freqDevSlope = float(self.freqDevSlope.get()) - - self.inputFile, self.fs, self.tfreq, self.tmag = sT.analysis(inputFile, window, M, N, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope) - - except ValueError: - messagebox.showerror("Input values error", "Some parameters are incorrect") - - def transformation_synthesis(self): - - try: - inputFile = self.inputFile - fs = self.fs - tfreq = self.tfreq - tmag = self.tmag - freqScaling = np.array(eval(self.freqScaling.get())) - timeScaling = np.array(eval(self.timeScaling.get())) - - sT.transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling, timeScaling) - - except ValueError as errorMessage: - messagebox.showerror("Input values error", errorMessage) - - except AttributeError: - messagebox.showerror("Analysis not computed", "First you must analyse the sound!") + def __init__(self, parent): + + self.parent = parent + self.initUI() + + def initUI(self): + + choose_label = "inputFile:" + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) + + # TEXTBOX TO PRINT PATH OF THE SOUND FILE + self.filelocation = Entry(self.parent) + self.filelocation.focus_set() + self.filelocation["width"] = 32 + self.filelocation.grid(row=0, column=0, sticky=W, padx=(70, 5), pady=(10, 2)) + self.filelocation.delete(0, END) + self.filelocation.insert(0, "../../sounds/mridangam.wav") + + # BUTTON TO BROWSE SOUND FILE + open_file = Button( + self.parent, text="...", command=self.browse_file + ) # see: def browse_file(self) + open_file.grid( + row=0, column=0, sticky=W, padx=(340, 6), pady=(10, 2) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE + preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) + preview.grid(row=0, column=0, sticky=W, padx=(385, 6), pady=(10, 2)) + + ## SINE TRANSFORMATIONS ANALYSIS + + # ANALYSIS WINDOW TYPE + wtype_label = "window:" + Label(self.parent, text=wtype_label).grid( + row=1, column=0, sticky=W, padx=5, pady=(10, 2) + ) + self.w_type = StringVar() + self.w_type.set("hamming") # initial value + window_option = OptionMenu( + self.parent, + self.w_type, + "rectangular", + "hanning", + "hamming", + "blackman", + "blackmanharris", + ) + window_option.grid(row=1, column=0, sticky=W, padx=(65, 5), pady=(10, 2)) + + # WINDOW SIZE + M_label = "M:" + Label(self.parent, text=M_label).grid( + row=1, column=0, sticky=W, padx=(180, 5), pady=(10, 2) + ) + self.M = Entry(self.parent, justify=CENTER) + self.M["width"] = 5 + self.M.grid(row=1, column=0, sticky=W, padx=(200, 5), pady=(10, 2)) + self.M.delete(0, END) + self.M.insert(0, "801") + + # FFT SIZE + N_label = "N:" + Label(self.parent, text=N_label).grid( + row=1, column=0, sticky=W, padx=(255, 5), pady=(10, 2) + ) + self.N = Entry(self.parent, justify=CENTER) + self.N["width"] = 5 + self.N.grid(row=1, column=0, sticky=W, padx=(275, 5), pady=(10, 2)) + self.N.delete(0, END) + self.N.insert(0, "2048") + + # THRESHOLD MAGNITUDE + t_label = "t:" + Label(self.parent, text=t_label).grid( + row=1, column=0, sticky=W, padx=(330, 5), pady=(10, 2) + ) + self.t = Entry(self.parent, justify=CENTER) + self.t["width"] = 5 + self.t.grid(row=1, column=0, sticky=W, padx=(348, 5), pady=(10, 2)) + self.t.delete(0, END) + self.t.insert(0, "-90") + + # MIN DURATION SINUSOIDAL TRACKS + minSineDur_label = "minSineDur:" + Label(self.parent, text=minSineDur_label).grid( + row=2, column=0, sticky=W, padx=(5, 5), pady=(10, 2) + ) + self.minSineDur = Entry(self.parent, justify=CENTER) + self.minSineDur["width"] = 5 + self.minSineDur.grid(row=2, column=0, sticky=W, padx=(87, 5), pady=(10, 2)) + self.minSineDur.delete(0, END) + self.minSineDur.insert(0, "0.01") + + # MAX NUMBER OF SINES + maxnSines_label = "maxnSines:" + Label(self.parent, text=maxnSines_label).grid( + row=2, column=0, sticky=W, padx=(145, 5), pady=(10, 2) + ) + self.maxnSines = Entry(self.parent, justify=CENTER) + self.maxnSines["width"] = 5 + self.maxnSines.grid(row=2, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) + self.maxnSines.delete(0, END) + self.maxnSines.insert(0, "150") + + # FREQUENCY DEVIATION ALLOWED + freqDevOffset_label = "freqDevOffset:" + Label(self.parent, text=freqDevOffset_label).grid( + row=2, column=0, sticky=W, padx=(280, 5), pady=(10, 2) + ) + self.freqDevOffset = Entry(self.parent, justify=CENTER) + self.freqDevOffset["width"] = 5 + self.freqDevOffset.grid(row=2, column=0, sticky=W, padx=(372, 5), pady=(10, 2)) + self.freqDevOffset.delete(0, END) + self.freqDevOffset.insert(0, "20") + + # SLOPE OF THE FREQUENCY DEVIATION + freqDevSlope_label = "freqDevSlope:" + Label(self.parent, text=freqDevSlope_label).grid( + row=3, column=0, sticky=W, padx=(5, 5), pady=(10, 2) + ) + self.freqDevSlope = Entry(self.parent, justify=CENTER) + self.freqDevSlope["width"] = 5 + self.freqDevSlope.grid(row=3, column=0, sticky=W, padx=(98, 5), pady=(10, 2)) + self.freqDevSlope.delete(0, END) + self.freqDevSlope.insert(0, "0.02") + + # BUTTON TO DO THE ANALYSIS OF THE SOUND + self.compute = Button( + self.parent, text="Analysis/Synthesis", command=self.analysis + ) + self.compute.grid(row=4, column=0, padx=5, pady=(10, 5), sticky=W) + + # BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT + self.output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_sineModel.wav" + ), + ) + self.output.grid(row=4, column=0, padx=(145, 5), pady=(10, 5), sticky=W) + + ### + # SEPARATION LINE + Frame(self.parent, height=1, width=50, bg="black").grid( + row=5, pady=5, sticky=W + E + ) + ### + + # FREQUENCY SCALING FACTORS + freqScaling_label = "Frequency scaling factors (time, value pairs):" + Label(self.parent, text=freqScaling_label).grid( + row=6, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.freqScaling = Entry(self.parent, justify=CENTER) + self.freqScaling["width"] = 35 + self.freqScaling.grid(row=7, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.freqScaling.delete(0, END) + self.freqScaling.insert(0, "[0, 2.0, 1, .3]") + + # TIME SCALING FACTORS + timeScaling_label = "Time scaling factors (in time, value pairs):" + Label(self.parent, text=timeScaling_label).grid( + row=8, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.timeScaling = Entry(self.parent, justify=CENTER) + self.timeScaling["width"] = 35 + self.timeScaling.grid(row=9, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.timeScaling.delete(0, END) + self.timeScaling.insert(0, "[0, .0, .671, .671, 1.978, 1.978+1.0]") + + # BUTTON TO DO THE SYNTHESIS + self.compute = Button( + self.parent, + text="Apply Transformation", + command=self.transformation_synthesis, + ) + self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W) + + # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT + self.transf_output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_sineModelTransformation.wav" + ), + ) + self.transf_output.grid( + row=13, column=0, padx=(165, 5), pady=(10, 15), sticky=W + ) + + # define options for opening file + self.file_opt = options = {} + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" + + def browse_file(self): + + self.filename = filedialog.askopenfilename(**self.file_opt) + + # set the text of the self.filelocation + self.filelocation.delete(0, END) + self.filelocation.insert(0, self.filename) + + def analysis(self): + + try: + inputFile = self.filelocation.get() + window = self.w_type.get() + M = int(self.M.get()) + N = int(self.N.get()) + t = int(self.t.get()) + minSineDur = float(self.minSineDur.get()) + maxnSines = int(self.maxnSines.get()) + freqDevOffset = int(self.freqDevOffset.get()) + freqDevSlope = float(self.freqDevSlope.get()) + + self.inputFile, self.fs, self.tfreq, self.tmag = sT.analysis( + inputFile, + window, + M, + N, + t, + minSineDur, + maxnSines, + freqDevOffset, + freqDevSlope, + ) + + except ValueError: + messagebox.showerror("Input values error", "Some parameters are incorrect") + + def transformation_synthesis(self): + + try: + inputFile = self.inputFile + fs = self.fs + tfreq = self.tfreq + tmag = self.tmag + freqScaling = np.array(eval(self.freqScaling.get())) + timeScaling = np.array(eval(self.timeScaling.get())) + + sT.transformation_synthesis( + inputFile, fs, tfreq, tmag, freqScaling, timeScaling + ) + + except ValueError as errorMessage: + messagebox.showerror("Input values error", errorMessage) + + except AttributeError: + messagebox.showerror( + "Analysis not computed", "First you must analyse the sound!" + ) diff --git a/smstools/transformations/interface/sineTransformations_function.py b/smstools/transformations/interface/sineTransformations_function.py index c0ada491..3a8a3617 100644 --- a/smstools/transformations/interface/sineTransformations_function.py +++ b/smstools/transformations/interface/sineTransformations_function.py @@ -8,150 +8,172 @@ from smstools.transformations import sineTransformations as ST from smstools.models import utilFunctions as UF -def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, - minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02): - """ - Analyze a sound with the sine model - inputFile: input sound file (monophonic with sampling rate of 44100) - window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) - M: analysis window size; N: fft size (power of two, bigger or equal than M) - t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks - maxnSines: maximum number of parallel sinusoids - freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 - freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation - returns inputFile: input file name; fs: sampling rate of input file, - tfreq, tmag: sinusoidal frequencies and magnitudes - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # compute analysis window - w = get_window(window, M) - - # compute the sine model of the whole sound - tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) - - # synthesize the sines without original phases - y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs) - - # output sound file (monophonic with sampling rate of 44100) - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' - - # write the sound resulting from the inverse stft - UF.wavwrite(y, fs, outputFile) - - # create figure to show plots - plt.figure(figsize=(9, 6)) - - # frequency range to plot - maxplotfreq = 5000.0 - - # plot the input sound - plt.subplot(3,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot the sinusoidal frequencies - if (tfreq.shape[1] > 0): - plt.subplot(3,1,2) - tracks = np.copy(tfreq) - tracks = tracks*np.less(tracks, maxplotfreq) - tracks[tracks<=0] = np.nan - numFrames = int(tracks[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, tracks) - plt.axis([0, x.size/float(fs), 0, maxplotfreq]) - plt.title('frequencies of sinusoidal tracks') - - # plot the output sound - plt.subplot(3,1,3) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.show(block=False) - - return inputFile, fs, tfreq, tmag - - -def transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling = np.array([0, 2.0, 1, .3]), - timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])): - """ - Transform the analysis values returned by the analysis function and synthesize the sound - inputFile: name of input file; fs: sampling rate of input file - tfreq, tmag: sinusoidal frequencies and magnitudes - freqScaling: frequency scaling factors, in time-value pairs - timeScaling: time scaling factors, in time-value pairs - """ - - # size of fft used in synthesis - Ns = 512 - - # hop size (has to be 1/4 of Ns) - H = 128 - - # frequency scaling of the sinusoidal tracks - ytfreq = ST.sineFreqScaling(tfreq, freqScaling) - - # time scale the sinusoidal tracks - ytfreq, ytmag = ST.sineTimeScaling(ytfreq, tmag, timeScaling) - - # synthesis - y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) - - # write output sound - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModelTransformation.wav' - UF.wavwrite(y,fs, outputFile) - - # create figure to plot - plt.figure(figsize=(12, 6)) - - # frequency range to plot - maxplotfreq = 15000.0 - - # plot the transformed sinusoidal frequencies - if (ytfreq.shape[1] > 0): - plt.subplot(2,1,1) - tracks = np.copy(ytfreq) - tracks = tracks*np.less(tracks, maxplotfreq) - tracks[tracks<=0] = np.nan - numFrames = int(tracks[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - plt.plot(frmTime, tracks) - plt.title('transformed sinusoidal tracks') - plt.autoscale(tight=True) - - # plot the output sound - plt.subplot(2,1,2) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') - - plt.tight_layout() - plt.show() -if __name__ == "__main__": +def analysis( + inputFile="../../sounds/mridangam.wav", + window="hamming", + M=801, + N=2048, + t=-90, + minSineDur=0.01, + maxnSines=150, + freqDevOffset=20, + freqDevSlope=0.02, +): + """ + Analyze a sound with the sine model + inputFile: input sound file (monophonic with sampling rate of 44100) + window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) + M: analysis window size; N: fft size (power of two, bigger or equal than M) + t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks + maxnSines: maximum number of parallel sinusoids + freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 + freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation + returns inputFile: input file name; fs: sampling rate of input file, + tfreq, tmag: sinusoidal frequencies and magnitudes + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # compute analysis window + w = get_window(window, M) + + # compute the sine model of the whole sound + tfreq, tmag, tphase = SM.sineModelAnal( + x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope + ) + + # synthesize the sines without original phases + y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs) + + # output sound file (monophonic with sampling rate of 44100) + outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_sineModel.wav" + + # write the sound resulting from the inverse stft + UF.wavwrite(y, fs, outputFile) + + # create figure to show plots + plt.figure(figsize=(9, 6)) + + # frequency range to plot + maxplotfreq = 5000.0 + + # plot the input sound + plt.subplot(3, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot the sinusoidal frequencies + if tfreq.shape[1] > 0: + plt.subplot(3, 1, 2) + tracks = np.copy(tfreq) + tracks = tracks * np.less(tracks, maxplotfreq) + tracks[tracks <= 0] = np.nan + numFrames = int(tracks[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, tracks) + plt.axis([0, x.size / float(fs), 0, maxplotfreq]) + plt.title("frequencies of sinusoidal tracks") + + # plot the output sound + plt.subplot(3, 1, 3) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.show(block=False) + + return inputFile, fs, tfreq, tmag + + +def transformation_synthesis( + inputFile, + fs, + tfreq, + tmag, + freqScaling=np.array([0, 2.0, 1, 0.3]), + timeScaling=np.array([0, 0.0, 0.671, 0.671, 1.978, 1.978 + 1.0]), +): + """ + Transform the analysis values returned by the analysis function and synthesize the sound + inputFile: name of input file; fs: sampling rate of input file + tfreq, tmag: sinusoidal frequencies and magnitudes + freqScaling: frequency scaling factors, in time-value pairs + timeScaling: time scaling factors, in time-value pairs + """ + + # size of fft used in synthesis + Ns = 512 + + # hop size (has to be 1/4 of Ns) + H = 128 + + # frequency scaling of the sinusoidal tracks + ytfreq = ST.sineFreqScaling(tfreq, freqScaling) + + # time scale the sinusoidal tracks + ytfreq, ytmag = ST.sineTimeScaling(ytfreq, tmag, timeScaling) + + # synthesis + y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) + + # write output sound + outputFile = ( + "output_sounds/" + + os.path.basename(inputFile)[:-4] + + "_sineModelTransformation.wav" + ) + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(12, 6)) + + # frequency range to plot + maxplotfreq = 15000.0 + + # plot the transformed sinusoidal frequencies + if ytfreq.shape[1] > 0: + plt.subplot(2, 1, 1) + tracks = np.copy(ytfreq) + tracks = tracks * np.less(tracks, maxplotfreq) + tracks[tracks <= 0] = np.nan + numFrames = int(tracks[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + plt.plot(frmTime, tracks) + plt.title("transformed sinusoidal tracks") + plt.autoscale(tight=True) + + # plot the output sound + plt.subplot(2, 1, 2) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") + + plt.tight_layout() + plt.show() + - # analysis - inputFile, fs, tfreq, tmag = analysis() +if __name__ == "__main__": - # transformation and synthesis - transformation_synthesis (inputFile, fs, tfreq, tmag) + # analysis + inputFile, fs, tfreq, tmag = analysis() - plt.show() + # transformation and synthesis + transformation_synthesis(inputFile, fs, tfreq, tmag) + plt.show() diff --git a/smstools/transformations/interface/stftMorph_GUI_frame.py b/smstools/transformations/interface/stftMorph_GUI_frame.py index 39c0f311..054bb700 100644 --- a/smstools/transformations/interface/stftMorph_GUI_frame.py +++ b/smstools/transformations/interface/stftMorph_GUI_frame.py @@ -8,6 +8,7 @@ from smstools.transformations.interface import stftMorph_function as sT from smstools.models import utilFunctions as UF + class StftMorph_frame: def __init__(self, parent): @@ -19,166 +20,239 @@ def initUI(self): ## INPUT FILE 1 choose1_label = "inputFile1:" - Label(self.parent, text=choose1_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) + Label(self.parent, text=choose1_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) - #TEXTBOX TO PRINT PATH OF THE SOUND FILE + # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation1 = Entry(self.parent) self.filelocation1.focus_set() self.filelocation1["width"] = 30 - self.filelocation1.grid(row=0,column=0, sticky=W, padx=(75, 5), pady=(10,2)) + self.filelocation1.grid(row=0, column=0, sticky=W, padx=(75, 5), pady=(10, 2)) self.filelocation1.delete(0, END) - self.filelocation1.insert(0, '../../sounds/ocean.wav') - - #BUTTON TO BROWSE SOUND FILE 1 - open_file1 = Button(self.parent, text="...", command=self.browse_file1) #see: def browse_file(self) - open_file1.grid(row=0, column=0, sticky=W, padx=(330, 6), pady=(10,2)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE 1 - preview1 = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation1.get())) - preview1.grid(row=0, column=0, sticky=W, padx=(375,6), pady=(10,2)) - - #ANALYSIS WINDOW TYPE SOUND 1 + self.filelocation1.insert(0, "../../sounds/ocean.wav") + + # BUTTON TO BROWSE SOUND FILE 1 + open_file1 = Button( + self.parent, text="...", command=self.browse_file1 + ) # see: def browse_file(self) + open_file1.grid( + row=0, column=0, sticky=W, padx=(330, 6), pady=(10, 2) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE 1 + preview1 = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation1.get()) + ) + preview1.grid(row=0, column=0, sticky=W, padx=(375, 6), pady=(10, 2)) + + # ANALYSIS WINDOW TYPE SOUND 1 wtype1_label = "window1:" - Label(self.parent, text=wtype1_label).grid(row=1, column=0, sticky=W, padx=5, pady=(4,2)) + Label(self.parent, text=wtype1_label).grid( + row=1, column=0, sticky=W, padx=5, pady=(4, 2) + ) self.w1_type = StringVar() - self.w1_type.set("hamming") # initial value - window1_option = OptionMenu(self.parent, self.w1_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") - window1_option.grid(row=1, column=0, sticky=W, padx=(68,5), pady=(4,2)) - - #WINDOW SIZE SOUND 1 + self.w1_type.set("hamming") # initial value + window1_option = OptionMenu( + self.parent, + self.w1_type, + "rectangular", + "hanning", + "hamming", + "blackman", + "blackmanharris", + ) + window1_option.grid(row=1, column=0, sticky=W, padx=(68, 5), pady=(4, 2)) + + # WINDOW SIZE SOUND 1 M1_label = "M1:" - Label(self.parent, text=M1_label).grid(row=1, column=0, sticky=W, padx=(180, 5), pady=(4,2)) + Label(self.parent, text=M1_label).grid( + row=1, column=0, sticky=W, padx=(180, 5), pady=(4, 2) + ) self.M1 = Entry(self.parent, justify=CENTER) self.M1["width"] = 5 - self.M1.grid(row=1,column=0, sticky=W, padx=(208,5), pady=(4,2)) + self.M1.grid(row=1, column=0, sticky=W, padx=(208, 5), pady=(4, 2)) self.M1.delete(0, END) self.M1.insert(0, "1024") - #FFT SIZE SOUND 1 + # FFT SIZE SOUND 1 N1_label = "N1:" - Label(self.parent, text=N1_label).grid(row=1, column=0, sticky=W, padx=(265, 5), pady=(4,2)) + Label(self.parent, text=N1_label).grid( + row=1, column=0, sticky=W, padx=(265, 5), pady=(4, 2) + ) self.N1 = Entry(self.parent, justify=CENTER) self.N1["width"] = 5 - self.N1.grid(row=1,column=0, sticky=W, padx=(290,5), pady=(4,2)) + self.N1.grid(row=1, column=0, sticky=W, padx=(290, 5), pady=(4, 2)) self.N1.delete(0, END) self.N1.insert(0, "1024") - #HOP SIZE SOUND 1 + # HOP SIZE SOUND 1 H1_label = "H1:" - Label(self.parent, text=H1_label).grid(row=1, column=0, sticky=W, padx=(343,5), pady=(4,2)) + Label(self.parent, text=H1_label).grid( + row=1, column=0, sticky=W, padx=(343, 5), pady=(4, 2) + ) self.H1 = Entry(self.parent, justify=CENTER) self.H1["width"] = 5 - self.H1.grid(row=1, column=0, sticky=W, padx=(370,5), pady=(4,2)) + self.H1.grid(row=1, column=0, sticky=W, padx=(370, 5), pady=(4, 2)) self.H1.delete(0, END) self.H1.insert(0, "256") ### - #SEPARATION LINE - Frame(self.parent,height=1,width=50,bg="black").grid(row=2, pady=15, sticky=W+E) + # SEPARATION LINE + Frame(self.parent, height=1, width=50, bg="black").grid( + row=2, pady=15, sticky=W + E + ) ### ## INPUT FILE 2 choose2_label = "inputFile2:" - Label(self.parent, text=choose2_label).grid(row=3, column=0, sticky=W, padx=5, pady=(2,2)) + Label(self.parent, text=choose2_label).grid( + row=3, column=0, sticky=W, padx=5, pady=(2, 2) + ) - #TEXTBOX TO PRINT PATH OF THE SOUND FILE + # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation2 = Entry(self.parent) self.filelocation2.focus_set() self.filelocation2["width"] = 30 - self.filelocation2.grid(row=3,column=0, sticky=W, padx=(75, 5), pady=(2,2)) + self.filelocation2.grid(row=3, column=0, sticky=W, padx=(75, 5), pady=(2, 2)) self.filelocation2.delete(0, END) - self.filelocation2.insert(0, '../../sounds/speech-male.wav') - - #BUTTON TO BROWSE SOUND FILE 2 - open_file2 = Button(self.parent, text="...", command=self.browse_file2) #see: def browse_file(self) - open_file2.grid(row=3, column=0, sticky=W, padx=(330, 6), pady=(2,2)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE 2 - preview2 = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation2.get())) - preview2.grid(row=3, column=0, sticky=W, padx=(375,6), pady=(2,2)) - - - #ANALYSIS WINDOW TYPE SOUND 2 + self.filelocation2.insert(0, "../../sounds/speech-male.wav") + + # BUTTON TO BROWSE SOUND FILE 2 + open_file2 = Button( + self.parent, text="...", command=self.browse_file2 + ) # see: def browse_file(self) + open_file2.grid( + row=3, column=0, sticky=W, padx=(330, 6), pady=(2, 2) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE 2 + preview2 = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation2.get()) + ) + preview2.grid(row=3, column=0, sticky=W, padx=(375, 6), pady=(2, 2)) + + # ANALYSIS WINDOW TYPE SOUND 2 wtype2_label = "window2:" - Label(self.parent, text=wtype2_label).grid(row=4, column=0, sticky=W, padx=5, pady=(4,2)) + Label(self.parent, text=wtype2_label).grid( + row=4, column=0, sticky=W, padx=5, pady=(4, 2) + ) self.w2_type = StringVar() - self.w2_type.set("hamming") # initial value - window2_option = OptionMenu(self.parent, self.w2_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") - window2_option.grid(row=4, column=0, sticky=W, padx=(68,5), pady=(4,2)) - - #WINDOW SIZE SOUND 2 + self.w2_type.set("hamming") # initial value + window2_option = OptionMenu( + self.parent, + self.w2_type, + "rectangular", + "hanning", + "hamming", + "blackman", + "blackmanharris", + ) + window2_option.grid(row=4, column=0, sticky=W, padx=(68, 5), pady=(4, 2)) + + # WINDOW SIZE SOUND 2 M2_label = "M2:" - Label(self.parent, text=M2_label).grid(row=4, column=0, sticky=W, padx=(180, 5), pady=(4,2)) + Label(self.parent, text=M2_label).grid( + row=4, column=0, sticky=W, padx=(180, 5), pady=(4, 2) + ) self.M2 = Entry(self.parent, justify=CENTER) self.M2["width"] = 5 - self.M2.grid(row=4,column=0, sticky=W, padx=(208,5), pady=(4,2)) + self.M2.grid(row=4, column=0, sticky=W, padx=(208, 5), pady=(4, 2)) self.M2.delete(0, END) self.M2.insert(0, "1024") - #FFT SIZE SOUND 2 + # FFT SIZE SOUND 2 N2_label = "N2:" - Label(self.parent, text=N2_label).grid(row=4, column=0, sticky=W, padx=(265, 5), pady=(4,2)) + Label(self.parent, text=N2_label).grid( + row=4, column=0, sticky=W, padx=(265, 5), pady=(4, 2) + ) self.N2 = Entry(self.parent, justify=CENTER) self.N2["width"] = 5 - self.N2.grid(row=4,column=0, sticky=W, padx=(290,5), pady=(4,2)) + self.N2.grid(row=4, column=0, sticky=W, padx=(290, 5), pady=(4, 2)) self.N2.delete(0, END) self.N2.insert(0, "1024") ### - #SEPARATION LINE - Frame(self.parent,height=1,width=50,bg="black").grid(row=5, pady=15, sticky=W+E) + # SEPARATION LINE + Frame(self.parent, height=1, width=50, bg="black").grid( + row=5, pady=15, sticky=W + E + ) ### - #SMOOTHING FACTOR - smoothf_label1 = "Smooth factor of sound 2 (bigger than 0 to max of 1, where 1 is no" - Label(self.parent, text=smoothf_label1).grid(row=6, column=0, sticky=W, padx=(5, 5), pady=(2,2)) + # SMOOTHING FACTOR + smoothf_label1 = ( + "Smooth factor of sound 2 (bigger than 0 to max of 1, where 1 is no" + ) + Label(self.parent, text=smoothf_label1).grid( + row=6, column=0, sticky=W, padx=(5, 5), pady=(2, 2) + ) smoothf_label2 = "smothing):" - Label(self.parent, text=smoothf_label2).grid(row=7, column=0, sticky=W, padx=(5, 5), pady=(0,2)) + Label(self.parent, text=smoothf_label2).grid( + row=7, column=0, sticky=W, padx=(5, 5), pady=(0, 2) + ) self.smoothf = Entry(self.parent, justify=CENTER) self.smoothf["width"] = 5 - self.smoothf.grid(row=8, column=0, sticky=W, padx=(5,5), pady=(2,2)) + self.smoothf.grid(row=8, column=0, sticky=W, padx=(5, 5), pady=(2, 2)) self.smoothf.delete(0, END) self.smoothf.insert(0, "0.5") - #BALANCE FACTOR - balancef_label = "Balance factor (from 0 to 1, where 0 is sound 1 and 1 is sound 2):" - Label(self.parent, text=balancef_label).grid(row=9, column=0, sticky=W, padx=(5,5), pady=(10,2)) + # BALANCE FACTOR + balancef_label = ( + "Balance factor (from 0 to 1, where 0 is sound 1 and 1 is sound 2):" + ) + Label(self.parent, text=balancef_label).grid( + row=9, column=0, sticky=W, padx=(5, 5), pady=(10, 2) + ) self.balancef = Entry(self.parent, justify=CENTER) self.balancef["width"] = 5 - self.balancef.grid(row=10, column=0, sticky=W, padx=(5,5), pady=(2,2)) + self.balancef.grid(row=10, column=0, sticky=W, padx=(5, 5), pady=(2, 2)) self.balancef.delete(0, END) self.balancef.insert(0, "0.2") - #BUTTON TO DO THE SYNTHESIS - self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis) - self.compute.grid(row=11, column=0, padx=5, pady=(10,15), sticky=W) - - #BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT - self.transf_output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation1.get())[:-4] + '_stftMorph.wav')) - self.transf_output.grid(row=11, column=0, padx=(165,5), pady=(10,15), sticky=W) + # BUTTON TO DO THE SYNTHESIS + self.compute = Button( + self.parent, + text="Apply Transformation", + command=self.transformation_synthesis, + ) + self.compute.grid(row=11, column=0, padx=5, pady=(10, 15), sticky=W) + + # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT + self.transf_output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation1.get())[:-4] + + "_stftMorph.wav" + ), + ) + self.transf_output.grid( + row=11, column=0, padx=(165, 5), pady=(10, 15), sticky=W + ) # define options for opening file self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" def browse_file1(self): self.filename1 = filedialog.askopenfilename(**self.file_opt) - #set the text of the self.filelocation + # set the text of the self.filelocation self.filelocation1.delete(0, END) - self.filelocation1.insert(0,self.filename1) + self.filelocation1.insert(0, self.filename1) def browse_file2(self): self.filename2 = filedialog.askopenfilename(**self.file_opt) - #set the text of the self.filelocation + # set the text of the self.filelocation self.filelocation2.delete(0, END) - self.filelocation2.insert(0,self.filename2) + self.filelocation2.insert(0, self.filename2) def transformation_synthesis(self): @@ -195,7 +269,19 @@ def transformation_synthesis(self): smoothf = float(self.smoothf.get()) balancef = float(self.balancef.get()) - sT.main(inputFile1, inputFile2, window1, window2, M1, M2, N1, N2, H1, smoothf, balancef) + sT.main( + inputFile1, + inputFile2, + window1, + window2, + M1, + M2, + N1, + N2, + H1, + smoothf, + balancef, + ) except ValueError as errorMessage: messagebox.showerror("Input values error", errorMessage) diff --git a/smstools/transformations/interface/stftMorph_function.py b/smstools/transformations/interface/stftMorph_function.py index c16dcd20..d228e77e 100644 --- a/smstools/transformations/interface/stftMorph_function.py +++ b/smstools/transformations/interface/stftMorph_function.py @@ -10,20 +10,30 @@ from smstools.transformations import stftTransformations as STFTT -def main(inputFile1='../../sounds/ocean.wav', inputFile2='../../sounds/speech-male.wav', window1='hamming', - window2='hamming', - M1=1024, M2=1024, N1=1024, N2=1024, H1=256, smoothf=.5, balancef=0.2): +def main( + inputFile1="../../sounds/ocean.wav", + inputFile2="../../sounds/speech-male.wav", + window1="hamming", + window2="hamming", + M1=1024, + M2=1024, + N1=1024, + N2=1024, + H1=256, + smoothf=0.5, + balancef=0.2, +): + """ + Function to perform a morph between two sounds + inputFile1: name of input sound file to be used as source + inputFile2: name of input sound file to be used as filter + window1 and window2: windows for both files + M1 and M2: window sizes for both files + N1 and N2: fft sizes for both sounds + H1: hop size for sound 1 (the one for sound 2 is computed automatically) + smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing + balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2 """ - Function to perform a morph between two sounds - inputFile1: name of input sound file to be used as source - inputFile2: name of input sound file to be used as filter - window1 and window2: windows for both files - M1 and M2: window sizes for both files - N1 and N2: fft sizes for both sounds - H1: hop size for sound 1 (the one for sound 2 is computed automatically) - smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing - balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2 - """ # read input sounds (fs, x1) = UF.wavread(inputFile1) @@ -43,7 +53,7 @@ def main(inputFile1='../../sounds/ocean.wav', inputFile2='../../sounds/speech-ma mY, pY = STFT.stftAnal(y, w1, N1, H1) # write output sound - outputFile = 'output_sounds/' + os.path.basename(inputFile1)[:-4] + '_stftMorph.wav' + outputFile = "output_sounds/" + os.path.basename(inputFile1)[:-4] + "_stftMorph.wav" UF.wavwrite(y, fs, outputFile) # create figure to plot @@ -56,19 +66,21 @@ def main(inputFile1='../../sounds/ocean.wav', inputFile2='../../sounds/speech-ma plt.subplot(4, 1, 1) plt.plot(np.arange(x1.size) / float(fs), x1) plt.axis([0, x1.size / float(fs), min(x1), max(x1)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") # plot magnitude spectrogram of sound 1 plt.subplot(4, 1, 2) numFrames = int(mX1[:, 0].size) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 - plt.pcolormesh(frmTime, binFreq, np.transpose(mX1[:, :int(N1 * maxplotfreq / fs) + 1])) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.title('magnitude spectrogram of x') + plt.pcolormesh( + frmTime, binFreq, np.transpose(mX1[:, : int(N1 * maxplotfreq / fs) + 1]) + ) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.title("magnitude spectrogram of x") plt.autoscale(tight=True) # plot magnitude spectrogram of morphed sound @@ -76,23 +88,25 @@ def main(inputFile1='../../sounds/ocean.wav', inputFile2='../../sounds/speech-ma numFrames = int(mY[:, 0].size) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 - plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:, :int(N1 * maxplotfreq / fs) + 1])) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.title('magnitude spectrogram of y') + plt.pcolormesh( + frmTime, binFreq, np.transpose(mY[:, : int(N1 * maxplotfreq / fs) + 1]) + ) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.title("magnitude spectrogram of y") plt.autoscale(tight=True) # plot the morphed sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('output sound: y') + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("output sound: y") plt.tight_layout() plt.show() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/smstools/transformations/interface/stochasticTransformations_GUI_frame.py b/smstools/transformations/interface/stochasticTransformations_GUI_frame.py index 81aac040..ef7c028e 100644 --- a/smstools/transformations/interface/stochasticTransformations_GUI_frame.py +++ b/smstools/transformations/interface/stochasticTransformations_GUI_frame.py @@ -9,85 +9,112 @@ from smstools.transformations.interface import stochasticTransformations_function as sT from smstools.models import utilFunctions as UF -class StochasticTransformations_frame: - - def __init__(self, parent): - - self.parent = parent - self.initUI() - - def initUI(self): - - choose_label = "inputFile:" - Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10,2)) - - #TEXTBOX TO PRINT PATH OF THE SOUND FILE - self.filelocation = Entry(self.parent) - self.filelocation.focus_set() - self.filelocation["width"] = 25 - self.filelocation.grid(row=0,column=0, sticky=W, padx=(70, 5), pady=(10,2)) - self.filelocation.delete(0, END) - self.filelocation.insert(0, '../../sounds/rain.wav') - - #BUTTON TO BROWSE SOUND FILE - open_file = Button(self.parent, text="...", command=self.browse_file) #see: def browse_file(self) - open_file.grid(row=0, column=0, sticky=W, padx=(280, 6), pady=(10,2)) #put it beside the filelocation textbox - - #BUTTON TO PREVIEW SOUND FILE - preview = Button(self.parent, text=">", command=lambda:UF.wavplay(self.filelocation.get())) - preview.grid(row=0, column=0, sticky=W, padx=(325,6), pady=(10,2)) - - ## STOCHASTIC TRANSFORMATIONS ANALYSIS - - #DECIMATION FACTOR - stocf_label = "stocf:" - Label(self.parent, text=stocf_label).grid(row=1, column=0, sticky=W, padx=(5,5), pady=(10,2)) - self.stocf = Entry(self.parent, justify=CENTER) - self.stocf["width"] = 5 - self.stocf.grid(row=1, column=0, sticky=W, padx=(47,5), pady=(10,2)) - self.stocf.delete(0, END) - self.stocf.insert(0, "0.1") - #TIME SCALING FACTORS - timeScaling_label = "Time scaling factors (time, value pairs):" - Label(self.parent, text=timeScaling_label).grid(row=2, column=0, sticky=W, padx=5, pady=(5,2)) - self.timeScaling = Entry(self.parent, justify=CENTER) - self.timeScaling["width"] = 35 - self.timeScaling.grid(row=3, column=0, sticky=W+E, padx=5, pady=(0,2)) - self.timeScaling.delete(0, END) - self.timeScaling.insert(0, "[0, 0, 1, 2]") - - #BUTTON TO DO THE SYNTHESIS - self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis) - self.compute.grid(row=13, column=0, padx=5, pady=(10,15), sticky=W) - - #BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT - self.transf_output = Button(self.parent, text=">", command=lambda:UF.wavplay('output_sounds/' + os.path.basename(self.filelocation.get())[:-4] + '_stochasticModelTransformation.wav')) - self.transf_output.grid(row=13, column=0, padx=(165,5), pady=(10,15), sticky=W) - - # define options for opening file - self.file_opt = options = {} - options['defaultextension'] = '.wav' - options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] - options['initialdir'] = '../../sounds/' - options['title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz' - - def browse_file(self): - - self.filename = filedialog.askopenfilename(**self.file_opt) - - #set the text of the self.filelocation - self.filelocation.delete(0, END) - self.filelocation.insert(0,self.filename) - - def transformation_synthesis(self): - - try: - inputFile = self.filelocation.get() - stocf = float(self.stocf.get()) - timeScaling = np.array(eval(self.timeScaling.get())) - - sT.main(inputFile, stocf, timeScaling) +class StochasticTransformations_frame: - except ValueError as errorMessage: - messagebox.showerror("Input values error", errorMessage) + def __init__(self, parent): + + self.parent = parent + self.initUI() + + def initUI(self): + + choose_label = "inputFile:" + Label(self.parent, text=choose_label).grid( + row=0, column=0, sticky=W, padx=5, pady=(10, 2) + ) + + # TEXTBOX TO PRINT PATH OF THE SOUND FILE + self.filelocation = Entry(self.parent) + self.filelocation.focus_set() + self.filelocation["width"] = 25 + self.filelocation.grid(row=0, column=0, sticky=W, padx=(70, 5), pady=(10, 2)) + self.filelocation.delete(0, END) + self.filelocation.insert(0, "../../sounds/rain.wav") + + # BUTTON TO BROWSE SOUND FILE + open_file = Button( + self.parent, text="...", command=self.browse_file + ) # see: def browse_file(self) + open_file.grid( + row=0, column=0, sticky=W, padx=(280, 6), pady=(10, 2) + ) # put it beside the filelocation textbox + + # BUTTON TO PREVIEW SOUND FILE + preview = Button( + self.parent, text=">", command=lambda: UF.wavplay(self.filelocation.get()) + ) + preview.grid(row=0, column=0, sticky=W, padx=(325, 6), pady=(10, 2)) + + ## STOCHASTIC TRANSFORMATIONS ANALYSIS + + # DECIMATION FACTOR + stocf_label = "stocf:" + Label(self.parent, text=stocf_label).grid( + row=1, column=0, sticky=W, padx=(5, 5), pady=(10, 2) + ) + self.stocf = Entry(self.parent, justify=CENTER) + self.stocf["width"] = 5 + self.stocf.grid(row=1, column=0, sticky=W, padx=(47, 5), pady=(10, 2)) + self.stocf.delete(0, END) + self.stocf.insert(0, "0.1") + + # TIME SCALING FACTORS + timeScaling_label = "Time scaling factors (time, value pairs):" + Label(self.parent, text=timeScaling_label).grid( + row=2, column=0, sticky=W, padx=5, pady=(5, 2) + ) + self.timeScaling = Entry(self.parent, justify=CENTER) + self.timeScaling["width"] = 35 + self.timeScaling.grid(row=3, column=0, sticky=W + E, padx=5, pady=(0, 2)) + self.timeScaling.delete(0, END) + self.timeScaling.insert(0, "[0, 0, 1, 2]") + + # BUTTON TO DO THE SYNTHESIS + self.compute = Button( + self.parent, + text="Apply Transformation", + command=self.transformation_synthesis, + ) + self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W) + + # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT + self.transf_output = Button( + self.parent, + text=">", + command=lambda: UF.wavplay( + "output_sounds/" + + os.path.basename(self.filelocation.get())[:-4] + + "_stochasticModelTransformation.wav" + ), + ) + self.transf_output.grid( + row=13, column=0, padx=(165, 5), pady=(10, 15), sticky=W + ) + + # define options for opening file + self.file_opt = options = {} + options["defaultextension"] = ".wav" + options["filetypes"] = [("All files", ".*"), ("Wav files", ".wav")] + options["initialdir"] = "../../sounds/" + options["title"] = "Open a mono audio file .wav with sample frequency 44100 Hz" + + def browse_file(self): + + self.filename = filedialog.askopenfilename(**self.file_opt) + + # set the text of the self.filelocation + self.filelocation.delete(0, END) + self.filelocation.insert(0, self.filename) + + def transformation_synthesis(self): + + try: + inputFile = self.filelocation.get() + stocf = float(self.stocf.get()) + timeScaling = np.array(eval(self.timeScaling.get())) + + sT.main(inputFile, stocf, timeScaling) + + except ValueError as errorMessage: + messagebox.showerror("Input values error", errorMessage) diff --git a/smstools/transformations/interface/stochasticTransformations_function.py b/smstools/transformations/interface/stochasticTransformations_function.py index 60eda305..f6129317 100644 --- a/smstools/transformations/interface/stochasticTransformations_function.py +++ b/smstools/transformations/interface/stochasticTransformations_function.py @@ -7,75 +7,83 @@ from smstools.models import utilFunctions as UF from smstools.transformations import stochasticTransformations as STCT -def main (inputFile='../../sounds/rain.wav', stocf=0.1, timeScaling = np.array([0, 0, 1, 2])): - """ - function to perform a time scaling using the stochastic model - inputFile: name of input sound file - stocf: decimation factor used for the stochastic approximation - timeScaling: time scaling factors, in time-value pairs - """ - - # hop size - H = 128 - - # read input sound - (fs, x) = UF.wavread(inputFile) - - # perform stochastic analysis - mYst = STC.stochasticModelAnal(x, H, H*2, stocf) - - # perform time scaling of stochastic representation - ystocEnv = STCT.stochasticTimeScale(mYst, timeScaling) - - # synthesize output sound - y = STC.stochasticModelSynth(ystocEnv, H, H*2) - - # write output sound - outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModelTransformation.wav' - UF.wavwrite(y, fs, outputFile) - - # create figure to plot - plt.figure(figsize=(9, 6)) - - # plot the input sound - plt.subplot(4,1,1) - plt.plot(np.arange(x.size)/float(fs), x) - plt.axis([0, x.size/float(fs), min(x), max(x)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - plt.title('input sound: x') - - # plot stochastic representation - plt.subplot(4,1,2) - numFrames = int(mYst[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = np.arange(int(stocf*H))*float(fs)/(stocf*2*H) - plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) - plt.autoscale(tight=True) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.title('stochastic approximation') - - # plot modified stochastic representation - plt.subplot(4,1,3) - numFrames = int(ystocEnv[:,0].size) - frmTime = H*np.arange(numFrames)/float(fs) - binFreq = np.arange(int(stocf*H))*float(fs)/(stocf*2*H) - plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv)) - plt.autoscale(tight=True) - plt.xlabel('time (sec)') - plt.ylabel('frequency (Hz)') - plt.title('modified stochastic approximation') - - # plot the output sound - plt.subplot(4,1,4) - plt.plot(np.arange(y.size)/float(fs), y) - plt.axis([0, y.size/float(fs), min(y), max(y)]) - plt.ylabel('amplitude') - plt.xlabel('time (sec)') - - plt.tight_layout() - plt.show() - -if __name__ == '__main__': - main() + +def main( + inputFile="../../sounds/rain.wav", stocf=0.1, timeScaling=np.array([0, 0, 1, 2]) +): + """ + function to perform a time scaling using the stochastic model + inputFile: name of input sound file + stocf: decimation factor used for the stochastic approximation + timeScaling: time scaling factors, in time-value pairs + """ + + # hop size + H = 128 + + # read input sound + (fs, x) = UF.wavread(inputFile) + + # perform stochastic analysis + mYst = STC.stochasticModelAnal(x, H, H * 2, stocf) + + # perform time scaling of stochastic representation + ystocEnv = STCT.stochasticTimeScale(mYst, timeScaling) + + # synthesize output sound + y = STC.stochasticModelSynth(ystocEnv, H, H * 2) + + # write output sound + outputFile = ( + "output_sounds/" + + os.path.basename(inputFile)[:-4] + + "_stochasticModelTransformation.wav" + ) + UF.wavwrite(y, fs, outputFile) + + # create figure to plot + plt.figure(figsize=(9, 6)) + + # plot the input sound + plt.subplot(4, 1, 1) + plt.plot(np.arange(x.size) / float(fs), x) + plt.axis([0, x.size / float(fs), min(x), max(x)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + plt.title("input sound: x") + + # plot stochastic representation + plt.subplot(4, 1, 2) + numFrames = int(mYst[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = np.arange(int(stocf * H)) * float(fs) / (stocf * 2 * H) + plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) + plt.autoscale(tight=True) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.title("stochastic approximation") + + # plot modified stochastic representation + plt.subplot(4, 1, 3) + numFrames = int(ystocEnv[:, 0].size) + frmTime = H * np.arange(numFrames) / float(fs) + binFreq = np.arange(int(stocf * H)) * float(fs) / (stocf * 2 * H) + plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv)) + plt.autoscale(tight=True) + plt.xlabel("time (sec)") + plt.ylabel("frequency (Hz)") + plt.title("modified stochastic approximation") + + # plot the output sound + plt.subplot(4, 1, 4) + plt.plot(np.arange(y.size) / float(fs), y) + plt.axis([0, y.size / float(fs), min(y), max(y)]) + plt.ylabel("amplitude") + plt.xlabel("time (sec)") + + plt.tight_layout() + plt.show() + + +if __name__ == "__main__": + main() diff --git a/smstools/transformations/interface/transformations_GUI.py b/smstools/transformations/interface/transformations_GUI.py index f0cfeb22..6faeeb06 100644 --- a/smstools/transformations/interface/transformations_GUI.py +++ b/smstools/transformations/interface/transformations_GUI.py @@ -1,7 +1,7 @@ import sys import matplotlib -matplotlib.use('TkAgg') +matplotlib.use("TkAgg") from tkinter import * from .notebook import * # window with tabs from .stftMorph_GUI_frame import * @@ -12,8 +12,10 @@ from .hpsMorph_GUI_frame import * root = Tk() -root.title('sms-tools transformations GUI') -nb = notebook(root, TOP) # make a few diverse frames (panels), each using the NB as 'master': +root.title("sms-tools transformations GUI") +nb = notebook( + root, TOP +) # make a few diverse frames (panels), each using the NB as 'master': # uses the notebook's frame f1 = Frame(nb()) @@ -43,5 +45,5 @@ nb.display(f1) -root.geometry('+0+0') +root.geometry("+0+0") root.mainloop() diff --git a/smstools/transformations/sineTransformations.py b/smstools/transformations/sineTransformations.py index 71c2cbd8..2c1086a0 100644 --- a/smstools/transformations/sineTransformations.py +++ b/smstools/transformations/sineTransformations.py @@ -3,48 +3,60 @@ import numpy as np from scipy.interpolate import interp1d + def sineTimeScaling(sfreq, smag, timeScaling): - """ - Time scaling of sinusoidal tracks - sfreq, smag: frequencies and magnitudes of input sinusoidal tracks - timeScaling: scaling factors, in time-value pairs - returns ysfreq, ysmag: frequencies and magnitudes of output sinusoidal tracks - """ - if (timeScaling.size % 2 != 0): # raise exception if array not even length - raise ValueError("Time scaling array does not have an even size") - - L = sfreq.shape[0] # number of input frames - maxInTime = max(timeScaling[::2]) # maximum value used as input times - maxOutTime = max(timeScaling[1::2]) # maximum value used in output times - outL = int(L*maxOutTime/maxInTime) # number of output frames - inFrames = (L-1)*timeScaling[::2]/maxInTime # input time values in frames - outFrames = outL*timeScaling[1::2]/maxOutTime # output time values in frames - timeScalingEnv = interp1d(outFrames, inFrames, fill_value=0) # interpolation function - indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output - ysfreq = sfreq[int(round(indexes[0])),:] # first output frame - ysmag = smag[int(round(indexes[0])),:] # first output frame - for l in indexes[1:]: # generate frames for output sine tracks - ysfreq = np.vstack((ysfreq, sfreq[int(round(l)),:])) # get closest frame to scaling value - ysmag = np.vstack((ysmag, smag[int(round(l)),:])) # get closest frame to scaling value - return ysfreq, ysmag + """ + Time scaling of sinusoidal tracks + sfreq, smag: frequencies and magnitudes of input sinusoidal tracks + timeScaling: scaling factors, in time-value pairs + returns ysfreq, ysmag: frequencies and magnitudes of output sinusoidal tracks + """ + if timeScaling.size % 2 != 0: # raise exception if array not even length + raise ValueError("Time scaling array does not have an even size") + + L = sfreq.shape[0] # number of input frames + maxInTime = max(timeScaling[::2]) # maximum value used as input times + maxOutTime = max(timeScaling[1::2]) # maximum value used in output times + outL = int(L * maxOutTime / maxInTime) # number of output frames + inFrames = (L - 1) * timeScaling[::2] / maxInTime # input time values in frames + outFrames = outL * timeScaling[1::2] / maxOutTime # output time values in frames + timeScalingEnv = interp1d( + outFrames, inFrames, fill_value=0 + ) # interpolation function + indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output + ysfreq = sfreq[int(round(indexes[0])), :] # first output frame + ysmag = smag[int(round(indexes[0])), :] # first output frame + for l in indexes[1:]: # generate frames for output sine tracks + ysfreq = np.vstack( + (ysfreq, sfreq[int(round(l)), :]) + ) # get closest frame to scaling value + ysmag = np.vstack( + (ysmag, smag[int(round(l)), :]) + ) # get closest frame to scaling value + return ysfreq, ysmag + def sineFreqScaling(sfreq, freqScaling): - """ - Frequency scaling of sinusoidal tracks - sfreq: frequencies of input sinusoidal tracks - freqScaling: scaling factors, in time-value pairs (value of 1 is no scaling) - returns ysfreq: frequencies of output sinusoidal tracks - """ - if (freqScaling.size % 2 != 0): # raise exception if array not even length - raise ValueError("Frequency scaling array does not have an even size") - - L = sfreq.shape[0] # number of input frames - # create interpolation object from the scaling values - freqScalingEnv = np.interp(np.arange(L), L*freqScaling[::2]/freqScaling[-2], freqScaling[1::2]) - ysfreq = np.zeros_like(sfreq) # create empty output matrix - for l in range(L): # go through all frames - ind_valid = np.where(sfreq[l,:]!=0)[0] # check if there are frequency values - if ind_valid.size == 0: # if no values go to next frame - continue - ysfreq[l,ind_valid] = sfreq[l,ind_valid] * freqScalingEnv[l] # scale of frequencies - return ysfreq + """ + Frequency scaling of sinusoidal tracks + sfreq: frequencies of input sinusoidal tracks + freqScaling: scaling factors, in time-value pairs (value of 1 is no scaling) + returns ysfreq: frequencies of output sinusoidal tracks + """ + if freqScaling.size % 2 != 0: # raise exception if array not even length + raise ValueError("Frequency scaling array does not have an even size") + + L = sfreq.shape[0] # number of input frames + # create interpolation object from the scaling values + freqScalingEnv = np.interp( + np.arange(L), L * freqScaling[::2] / freqScaling[-2], freqScaling[1::2] + ) + ysfreq = np.zeros_like(sfreq) # create empty output matrix + for l in range(L): # go through all frames + ind_valid = np.where(sfreq[l, :] != 0)[0] # check if there are frequency values + if ind_valid.size == 0: # if no values go to next frame + continue + ysfreq[l, ind_valid] = ( + sfreq[l, ind_valid] * freqScalingEnv[l] + ) # scale of frequencies + return ysfreq diff --git a/smstools/transformations/stftTransformations.py b/smstools/transformations/stftTransformations.py index 447b30fe..8b649c3f 100644 --- a/smstools/transformations/stftTransformations.py +++ b/smstools/transformations/stftTransformations.py @@ -5,88 +5,107 @@ from scipy.signal import resample from smstools.models import dftModel as DFT + def stftFiltering(x, fs, w, N, H, filter): - """ - Apply a filter to a sound by using the STFT - x: input sound, w: analysis window, N: FFT size, H: hop size - filter: magnitude response of filter with frequency-magnitude pairs (in dB) - returns y: output sound - """ + """ + Apply a filter to a sound by using the STFT + x: input sound, w: analysis window, N: FFT size, H: hop size + filter: magnitude response of filter with frequency-magnitude pairs (in dB) + returns y: output sound + """ - M = w.size # size of analysis window - hM1 = int(math.floor((M+1)/2)) # half analysis window size by rounding - hM2 = int(math.floor(M/2)) # half analysis window size by floor - x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 - x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample - pin = hM1 # initialize sound pointer in middle of analysis window - pend = x.size-hM1 # last sample to start a frame - w = w / sum(w) # normalize analysis window - y = np.zeros(x.size) # initialize output array - while pin<=pend: # while sound pointer is smaller than last sample - #-----analysis----- - x1 = x[pin-hM1:pin+hM2] # select one frame of input sound - mX, pX = DFT.dftAnal(x1, w, N) # compute dft - #------transformation----- - mY = mX + filter # filter input magnitude spectrum - #-----synthesis----- - y1 = DFT.dftSynth(mY, pX, M) # compute idft - y[pin-hM1:pin+hM2] += H*y1 # overlap-add to generate output sound - pin += H # advance sound pointer - y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal - y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample - return y + M = w.size # size of analysis window + hM1 = int(math.floor((M + 1) / 2)) # half analysis window size by rounding + hM2 = int(math.floor(M / 2)) # half analysis window size by floor + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 + x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample + pin = hM1 # initialize sound pointer in middle of analysis window + pend = x.size - hM1 # last sample to start a frame + w = w / sum(w) # normalize analysis window + y = np.zeros(x.size) # initialize output array + while pin <= pend: # while sound pointer is smaller than last sample + # -----analysis----- + x1 = x[pin - hM1 : pin + hM2] # select one frame of input sound + mX, pX = DFT.dftAnal(x1, w, N) # compute dft + # ------transformation----- + mY = mX + filter # filter input magnitude spectrum + # -----synthesis----- + y1 = DFT.dftSynth(mY, pX, M) # compute idft + y[pin - hM1 : pin + hM2] += H * y1 # overlap-add to generate output sound + pin += H # advance sound pointer + y = np.delete( + y, range(hM2) + ) # delete half of first window which was added in stftAnal + y = np.delete( + y, range(y.size - hM1, y.size) + ) # add zeros at the end to analyze last sample + return y def stftMorph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef): - """ - Morph of two sounds using the STFT - x1, x2: input sounds, fs: sampling rate - w1, w2: analysis windows, N1, N2: FFT sizes, H1: hop size - smoothf: smooth factor of sound 2, bigger than 0 to max of 1, where 1 is no smothing, - balancef: balance between the 2 sounds, from 0 to 1, where 0 is sound 1 and 1 is sound 2 - returns y: output sound - """ + """ + Morph of two sounds using the STFT + x1, x2: input sounds, fs: sampling rate + w1, w2: analysis windows, N1, N2: FFT sizes, H1: hop size + smoothf: smooth factor of sound 2, bigger than 0 to max of 1, where 1 is no smothing, + balancef: balance between the 2 sounds, from 0 to 1, where 0 is sound 1 and 1 is sound 2 + returns y: output sound + """ - if (N2/2*smoothf < 3): # raise exception if decimation factor too small - raise ValueError("Smooth factor too small") + if N2 / 2 * smoothf < 3: # raise exception if decimation factor too small + raise ValueError("Smooth factor too small") - if (smoothf > 1): # raise exception if decimation factor too big - raise ValueError("Smooth factor above 1") + if smoothf > 1: # raise exception if decimation factor too big + raise ValueError("Smooth factor above 1") - if (balancef > 1 or balancef < 0): # raise exception if balancef outside 0-1 - raise ValueError("Balance factor outside range") + if balancef > 1 or balancef < 0: # raise exception if balancef outside 0-1 + raise ValueError("Balance factor outside range") - if (H1 <= 0): # raise error if hop size 0 or negative - raise ValueError("Hop size (H1) smaller or equal to 0") + if H1 <= 0: # raise error if hop size 0 or negative + raise ValueError("Hop size (H1) smaller or equal to 0") - M1 = w1.size # size of analysis window - hM1_1 = int(math.floor((M1+1)/2)) # half analysis window size by rounding - hM1_2 = int(math.floor(M1/2)) # half analysis window size by floor - L = int(x1.size/H1) # number of frames for x1 - x1 = np.append(np.zeros(hM1_2),x1) # add zeros at beginning to center first window at sample 0 - x1 = np.append(x1,np.zeros(hM1_1)) # add zeros at the end to analyze last sample - pin1 = hM1_1 # initialize sound pointer in middle of analysis window - w1 = w1 / sum(w1) # normalize analysis window - M2 = w2.size # size of analysis window - hM2_1 = int(math.floor((M2+1)/2)) # half analysis window size by rounding - hM2_2 = int(math.floor(M2/2)) # half analysis window size by floor2 - H2 = int(x2.size/L) # hop size for second sound - x2 = np.append(np.zeros(hM2_2),x2) # add zeros at beginning to center first window at sample 0 - x2 = np.append(x2,np.zeros(hM2_1)) # add zeros at the end to analyze last sample - pin2 = hM2_1 # initialize sound pointer in middle of analysis window - y = np.zeros(x1.size) # initialize output array - for l in range(L): - #-----analysis----- - mX1, pX1 = DFT.dftAnal(x1[pin1-hM1_1:pin1+hM1_2], w1, N1) # compute dft - mX2, pX2 = DFT.dftAnal(x2[pin2-hM2_1:pin2+hM2_2], w2, N2) # compute dft - #-----transformation----- - mX2smooth = resample(np.maximum(-200, mX2), int(mX2.size*smoothf)) # smooth spectrum of second sound - mX2 = resample(mX2smooth, mX1.size) # generate back the same size spectrum - mY = balancef * mX2 + (1-balancef) * mX1 # generate output spectrum - #-----synthesis----- - y[pin1-hM1_1:pin1+hM1_2] += H1*DFT.dftSynth(mY, pX1, M1) # overlap-add to generate output sound - pin1 += H1 # advance sound pointer - pin2 += H2 # advance sound pointer - y = np.delete(y, range(hM1_2)) # delete half of first window which was added in stftAnal - y = np.delete(y, range(y.size-hM1_1, y.size)) # add zeros at the end to analyze last sample - return y + M1 = w1.size # size of analysis window + hM1_1 = int(math.floor((M1 + 1) / 2)) # half analysis window size by rounding + hM1_2 = int(math.floor(M1 / 2)) # half analysis window size by floor + L = int(x1.size / H1) # number of frames for x1 + x1 = np.append( + np.zeros(hM1_2), x1 + ) # add zeros at beginning to center first window at sample 0 + x1 = np.append(x1, np.zeros(hM1_1)) # add zeros at the end to analyze last sample + pin1 = hM1_1 # initialize sound pointer in middle of analysis window + w1 = w1 / sum(w1) # normalize analysis window + M2 = w2.size # size of analysis window + hM2_1 = int(math.floor((M2 + 1) / 2)) # half analysis window size by rounding + hM2_2 = int(math.floor(M2 / 2)) # half analysis window size by floor2 + H2 = int(x2.size / L) # hop size for second sound + x2 = np.append( + np.zeros(hM2_2), x2 + ) # add zeros at beginning to center first window at sample 0 + x2 = np.append(x2, np.zeros(hM2_1)) # add zeros at the end to analyze last sample + pin2 = hM2_1 # initialize sound pointer in middle of analysis window + y = np.zeros(x1.size) # initialize output array + for l in range(L): + # -----analysis----- + mX1, pX1 = DFT.dftAnal(x1[pin1 - hM1_1 : pin1 + hM1_2], w1, N1) # compute dft + mX2, pX2 = DFT.dftAnal(x2[pin2 - hM2_1 : pin2 + hM2_2], w2, N2) # compute dft + # -----transformation----- + mX2smooth = resample( + np.maximum(-200, mX2), int(mX2.size * smoothf) + ) # smooth spectrum of second sound + mX2 = resample(mX2smooth, mX1.size) # generate back the same size spectrum + mY = balancef * mX2 + (1 - balancef) * mX1 # generate output spectrum + # -----synthesis----- + y[pin1 - hM1_1 : pin1 + hM1_2] += H1 * DFT.dftSynth( + mY, pX1, M1 + ) # overlap-add to generate output sound + pin1 += H1 # advance sound pointer + pin2 += H2 # advance sound pointer + y = np.delete( + y, range(hM1_2) + ) # delete half of first window which was added in stftAnal + y = np.delete( + y, range(y.size - hM1_1, y.size) + ) # add zeros at the end to analyze last sample + return y diff --git a/smstools/transformations/stochasticTransformations.py b/smstools/transformations/stochasticTransformations.py index 94b7bf5a..359f91dd 100644 --- a/smstools/transformations/stochasticTransformations.py +++ b/smstools/transformations/stochasticTransformations.py @@ -5,21 +5,27 @@ def stochasticTimeScale(stocEnv, timeScaling): - """ - Time scaling of the stochastic representation of a sound - stocEnv: stochastic envelope - timeScaling: scaling factors, in time-value pairs - returns ystocEnv: stochastic envelope - """ - if (timeScaling.size % 2 != 0): # raise exception if array not even length - raise ValueError("Time scaling array does not have an even size") - - L = stocEnv[:,0].size # number of input frames - outL = int(L*timeScaling[-1]/timeScaling[-2]) # number of synthesis frames - # create interpolation object with the time scaling values - timeScalingEnv = interp1d(timeScaling[::2]/timeScaling[-2], timeScaling[1::2]/timeScaling[-1]) - indexes = (L-1)*timeScalingEnv(np.arange(outL)/float(outL)) # generate output time indexes - ystocEnv = stocEnv[0,:] # first output frame is same than input - for l in indexes[1:]: # step through the output frames - ystocEnv = np.vstack((ystocEnv, stocEnv[int(round(l)),:])) # get the closest input frame - return ystocEnv + """ + Time scaling of the stochastic representation of a sound + stocEnv: stochastic envelope + timeScaling: scaling factors, in time-value pairs + returns ystocEnv: stochastic envelope + """ + if timeScaling.size % 2 != 0: # raise exception if array not even length + raise ValueError("Time scaling array does not have an even size") + + L = stocEnv[:, 0].size # number of input frames + outL = int(L * timeScaling[-1] / timeScaling[-2]) # number of synthesis frames + # create interpolation object with the time scaling values + timeScalingEnv = interp1d( + timeScaling[::2] / timeScaling[-2], timeScaling[1::2] / timeScaling[-1] + ) + indexes = (L - 1) * timeScalingEnv( + np.arange(outL) / float(outL) + ) # generate output time indexes + ystocEnv = stocEnv[0, :] # first output frame is same than input + for l in indexes[1:]: # step through the output frames + ystocEnv = np.vstack( + (ystocEnv, stocEnv[int(round(l)), :]) + ) # get the closest input frame + return ystocEnv