diff --git a/README.md b/README.md index 3d201e4..2d4d262 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,71 @@ # pop-to-8bit -This is a Python version implementaion of the [paper](https:///lemonatsu.github.io/files/su17icassp.pdf), and you can also see some informations in our [website](https://lemonatsu.github.io). +This is a Python version implementation of the [paper](https:///lemonatsu.github.io/files/su17icassp.pdf), and you can also see some information on our [website](https://lemonatsu.github.io). -Note that this version may generate slightly different result in compare to the original version, and the processing technique in [section 2.3](https://lemonatsu.github.io/pdf/su17icassp.pdf) of paper is omitted due to the fact that it can be achieved by tuning the parameter of [pYIN](https://code.soundsoftware.ac.uk/projects/pyin) plug-in. +Note that this version may generate slightly different results compared to the original version, and the processing technique in [section 2.3](https://lemonatsu.github.io/pdf/su17icassp.pdf) of the paper is omitted due to the fact that it can be achieved by tuning the pYIN parameters. -The NMF constraint is also not implemented in this version due to its ineffectiveness of improving the conversion result. +The NMF constraint is also not implemented in this version due to its ineffectiveness in improving the conversion result. ## Prerequisites -- Python 3.4+ -- [pYIN vamp plug-in](https://code.soundsoftware.ac.uk/projects/pyin) +- Python 3.8+ - [LibROSA](http://librosa.github.io/librosa/) -- [Pypropack](https://github.com/jakevdp/pypropack) -- Numpy +- [SciPy](https://scipy.org/) +- NumPy If you use Archlinux, there is an [AUR](https://aur.archlinux.org/packages/pop-to-8bit) package available. ## Installation -You can install the package with +You can install the package with pip: - python3 setup.py install +```bash +pip install . +``` + +Or for development: + +```bash +pip install -e . +``` ## Usage You can simply convert your audio with: ```console popto8bit [-h] [-s SAMPLE_RATE] [--block_size BLOCK_SIZE] - [--step_size STEP_SIZE] - audio_path output_path ``` +### Example + +```bash +popto8bit input.wav output_8bit.wav +``` + Tuning the ``step_size`` and ``block_size`` can help reach a more accurate pitch result. +## Changes from Original + +This fork includes the following updates for compatibility with modern Python and libraries: + +- **Removed external pYIN vamp plugin dependency**: Now uses LibROSA's built-in `pyin` function for pitch detection +- **Removed pypropack dependency**: Replaced with `scipy.sparse.linalg.svds` for truncated SVD +- **Updated scipy imports**: Fixed deprecated `scipy.signal.hamming` to `scipy.signal.windows.hamming` +- **Updated librosa API**: Fixed `librosa.load()` to use keyword arguments +- **Simplified dependencies**: Only requires `librosa` and `scipy` (NumPy is installed as a dependency of these) + ## Acknowledgement -- [pYIN vamp plug-in](https://code.soundsoftware.ac.uk/projects/pyin) : Matthias Mauch, Dixon, Simon -- [LibROSA: 0.4.1](http://librosa.github.io/librosa/) : +- [pYIN algorithm](https://code.soundsoftware.ac.uk/projects/pyin) : Matthias Mauch, Dixon, Simon (now integrated via LibROSA) +- [LibROSA](http://librosa.github.io/librosa/) : Brian McFee; Matt McVicar; Colin Raffel; Dawen Liang; Oriol Nieto; Eric Battenberg; Josh Moore; Dan Ellis; Ryuichi YAMAMOTO; Rachel Bittner; Douglas Repetto; Petr Viktorin; João Felipe Santos; Adrian Holovaty -- [Pypropack](https://github.com/jakevdp/pypropack) : Jake Vanderplas +- [SciPy](https://scipy.org/) : For sparse linear algebra (replacement for pypropack) - [robust-matrix-decomposition](https://kastnerkyle.github.io/posts/robust-matrix-decomposition/) : Kyle Kastner - [RPCA](https://github.com/apapanico/RPCA) : Alex Pananicolaou +## License + +This project is licensed under the GNU Affero General Public License v3.0 or later (AGPL-3.0-or-later). + diff --git a/popto8bit/__init__.py b/popto8bit/__init__.py index 8fab382..76b75f4 100755 --- a/popto8bit/__init__.py +++ b/popto8bit/__init__.py @@ -49,7 +49,7 @@ def main(): print(f'block_size : {args.block_size}, step_size : {args.step_size}') audio, fs = librosa.load(args.audio_path, - args.sample_rate, + sr=args.sample_rate, mono=False) audio_8bit = core.convert(audio, diff --git a/popto8bit/py8bits/core.py b/popto8bit/py8bits/core.py index c1aef27..cef5441 100644 --- a/popto8bit/py8bits/core.py +++ b/popto8bit/py8bits/core.py @@ -2,7 +2,7 @@ import numpy as np import librosa import scipy.io as spio -from scipy.signal import hamming +from scipy.signal.windows import hamming from .nmf import nmf from .pyin import pYIN from .svs import svs diff --git a/popto8bit/py8bits/ialm_rpca.py b/popto8bit/py8bits/ialm_rpca.py index 4863822..beee581 100644 --- a/popto8bit/py8bits/ialm_rpca.py +++ b/popto8bit/py8bits/ialm_rpca.py @@ -1,6 +1,6 @@ import numpy as np from numpy.linalg import svd, norm -from pypropack import svdp +from scipy.sparse.linalg import svds def ialm_RPCA(D, l=None, @@ -78,7 +78,10 @@ def ialm_RPCA(D, T = D - A_hat + (1. / u) * Y E_hat = np.maximum(T - (l / u), 0) + np.minimum(T + (l / u), 0) if choosvd(n, sv): - U, S, V = svdp(D - E_hat + (1. / u) *Y, sv, kmax=sv*kmax) + U, S, V = svds(D - E_hat + (1. / u) *Y, k=min(sv, min(m, n) - 1)) + # svds returns in ascending order, reverse to descending + idx = np.argsort(S)[::-1] + U, S, V = U[:, idx], S[idx], V[idx, :] else: U, S, V = svd(D-E_hat + (1. / u) * Y, full_matrices=False) diff --git a/popto8bit/py8bits/pyin.py b/popto8bit/py8bits/pyin.py index bb5539f..798f2f7 100644 --- a/popto8bit/py8bits/pyin.py +++ b/popto8bit/py8bits/pyin.py @@ -1,11 +1,10 @@ import numpy as np -import vamp import librosa def pYIN(audio, fs=44100., hop_size=1024, block_size=2048, step_size=1024, lowampsuppression=.1, onsetsensitivity=.7, prunethresh=.09): """ - This function will call the pYIN vamp plug-in to conduct the pitch analysis, + This function uses librosa's built-in pYIN to conduct pitch analysis, and convert the pitch estimates into an activation matrix. Tuning the parameters here can improve the resulting 8-bit music. @@ -19,15 +18,15 @@ def pYIN(audio, fs=44100., hop_size=1024, block_size=2048, step_size=1024, hop_size : int Hop size for the resulting activation matrix. block_size : int - Block size for pYIN. + Block size for pYIN (frame_length). step_size : int - Step size for pYin. + Step size for pYin (hop_length for pyin). lowampsuppression : float - The threshold for pYIN to suppress pithches that have low amplitude. + The threshold for pYIN to suppress pitches that have low amplitude. onsetsensitivity : float - Onset sensitivity for pYIN. + Onset sensitivity for pYIN (not used in librosa's pyin). prunethresh : float - Prune threshold for pYIN. + Prune threshold for pYIN (not used in librosa's pyin). Return ------ @@ -36,38 +35,49 @@ def pYIN(audio, fs=44100., hop_size=1024, block_size=2048, step_size=1024, """ - length = len(audio) audio = np.asarray(audio) - parameters = { - 'prunethresh' : prunethresh, - 'lowampsuppression' : lowampsuppression, - 'onsetsensitivity' : onsetsensitivity, - } + # Use librosa's built-in pyin + # fmin and fmax cover typical singing voice range + fmin = librosa.note_to_hz('C2') # ~65 Hz + fmax = librosa.note_to_hz('C7') # ~2093 Hz + + f0, voiced_flag, voiced_probs = librosa.pyin( + audio, + fmin=fmin, + fmax=fmax, + sr=fs, + frame_length=block_size, + hop_length=step_size + ) - data = vamp.collect(audio, fs, 'vamp-pyin-f0:pyin', 'notes', - parameters=parameters, block_size=block_size, step_size=step_size)['list'] - actl = proc_frame(data, length, fs=fs, hop_size=hop_size) + # Convert f0 to activation matrix format + actl = proc_frame_librosa(f0, voiced_flag, length, fs=fs, + hop_size=hop_size, step_size=step_size) return actl -def proc_frame(data, length, fs=44100., hop_size=1024, offset=34-1): +def proc_frame_librosa(f0, voiced_flag, length, fs=44100., hop_size=1024, + step_size=1024, offset=34-1): """ - Parse the pYIN result and generate a corresponding activation matrix. + Parse the librosa pYIN result and generate a corresponding activation matrix. Parameters ---------- - data : array - Array of dictionary such that each dictionary contains the duration - and timestamp of a pitch. + f0 : ndarray + Fundamental frequency estimates from librosa.pyin. + voiced_flag : ndarray + Boolean array indicating voiced frames. length : int Length of the audio input. It will be used to calculate the size of resulting activation matrix. fs : float Sample rate. hop_size : int - Hop size of the activation matix. + Hop size of the activation matrix. + step_size : int + Step size used in pyin analysis. offset : int The offset is used to offset the note number in order to match the pre-recorded 8-bit template, due to the fact that the index @@ -82,16 +92,19 @@ def proc_frame(data, length, fs=44100., hop_size=1024, offset=34-1): flen = int(length / hop_size) - 1 frames = np.zeros(flen) - samples = np.zeros(length, dtype=np.int) - hz_samples = np.zeros(length, dtype=np.int) - - for d in data: - dur = int(float(d['duration']) * fs) - st = int(float(d['timestamp']) * fs) - midi = int(np.round(librosa.hz_to_midi(float(d['values'])) - offset)) - samples[st : st+dur] = midi - hz_samples[st : st+dur] = float(d['values']) - + samples = np.zeros(length, dtype=int) + + # Convert f0 to midi and fill samples array + for i, (freq, voiced) in enumerate(zip(f0, voiced_flag)): + if voiced and not np.isnan(freq) and freq > 0: + st = i * step_size + dur = step_size + midi = int(np.round(librosa.hz_to_midi(freq) - offset)) + midi = max(0, midi) # Ensure non-negative + end_idx = min(st + dur, length) + samples[st:end_idx] = midi + + # Convert samples to frames for i in range(0, flen): d = samples[i * hop_size : (i + 1) * hop_size] counts = np.bincount(d) diff --git a/setup.py b/setup.py index 56e3329..812fb5d 100644 --- a/setup.py +++ b/setup.py @@ -6,8 +6,6 @@ classifiers = [ "Programming Language :: Python :: 3", - ("License :: OSI Approved :: " - "GNU Affero General Public License v3 or later (AGPLv3+)"), "Operating System :: Unix"] setup_kwargs = { @@ -23,8 +21,7 @@ "include_package_data": True, "entry_points": {'console_scripts': ['popto8bit = popto8bit:main']}, "install_requires": ['librosa', - 'pypropack', - 'vamp'], + 'scipy'], "classifiers": classifiers }