|
103 | 103 | ],
|
104 | 104 | "source": [
|
105 | 105 | "import os\n",
|
106 |
| - "import mindaudio\n", |
| 106 | + "import mindaudio.data.io as io\n", |
107 | 107 | "import scipy.io\n",
|
108 | 108 | "\n",
|
109 | 109 | "# Get a multi-channel audio file from the tests/data directory.\n",
|
110 | 110 | "data_dir = os.path.join(os.path.dirname(scipy.io.__file__), \"tests\", \"data\")\n",
|
111 | 111 | "wav_fname = os.path.join(data_dir, \"test-44100Hz-2ch-32bit-float-be.wav\")\n",
|
112 | 112 | "\n",
|
113 | 113 | "# Load the .wav file contents.\n",
|
114 |
| - "audio, sr = mindaudio.read(wav_fname)\n", |
| 114 | + "audio, sr = io.read(wav_fname)\n", |
115 | 115 | "print(f\"number of channels = {audio.shape[1]}\")\n",
|
116 | 116 | "\n",
|
117 | 117 | "length = audio.shape[0] / sr\n",
|
|
146 | 146 | "outputs": [],
|
147 | 147 | "source": [
|
148 | 148 | "import numpy as np\n",
|
149 |
| - "import mindaudio\n", |
| 149 | + "import mindaudio.data.io as io\n", |
150 | 150 | "\n",
|
151 | 151 | "samplerate = 44100\n",
|
152 | 152 | "fs = 100\n",
|
153 | 153 | "t = np.linspace(0., 1., samplerate)\n",
|
154 | 154 | "amplitude = np.iinfo(np.int16).max\n",
|
155 | 155 | "data = amplitude * np.sin(2. * np.pi * fs * t)\n",
|
156 |
| - "mindaudio.write(\"example.wav\", data, samplerate)" |
| 156 | + "io.write(\"example.wav\", data, samplerate)" |
157 | 157 | ]
|
158 | 158 | },
|
159 | 159 | {
|
|
194 | 194 | ],
|
195 | 195 | "source": [
|
196 | 196 | "import numpy as np\n",
|
197 |
| - "import mindaudio\n", |
| 197 | + "import mindaudio.data.io as io\n", |
| 198 | + "import mindaudio.data.features as features\n", |
198 | 199 | "\n",
|
199 |
| - "test_data, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
| 200 | + "test_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
200 | 201 | "# Carry out data feature extraction\n",
|
201 | 202 | "n_fft = 512\n",
|
202 |
| - "matrix = mindaudio.fbank(test_data, n_fft=n_fft)\n", |
| 203 | + "matrix = features.fbank(test_data, n_fft=n_fft)\n", |
203 | 204 | "data_shape = matrix.shape\n",
|
204 | 205 | "\n",
|
205 | 206 | "# Drawing display\n",
|
|
270 | 271 | }
|
271 | 272 | ],
|
272 | 273 | "source": [
|
273 |
| - "import mindaudio\n", |
| 274 | + "import mindaudio.data.io as io\n", |
| 275 | + "import mindaudio.data.filters as filters\n", |
274 | 276 | "import matplotlib.pyplot as plt\n",
|
275 | 277 | "import numpy as np\n",
|
276 | 278 | "import IPython.display as ipd\n",
|
277 | 279 | "\n",
|
278 |
| - "audio, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
| 280 | + "audio, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
279 | 281 | "ipd.display(ipd.Audio(audio, rate=sr))\n",
|
280 | 282 | "\n",
|
281 | 283 | "cutoff_freq = 1000\n",
|
282 |
| - "out_waveform = mindaudio.low_pass_filter(audio, sr, cutoff_freq)\n", |
| 284 | + "out_waveform = filters.low_pass_filter(audio, sr, cutoff_freq)\n", |
283 | 285 | "ipd.display(ipd.Audio(out_waveform, rate=sr))"
|
284 | 286 | ]
|
285 | 287 | },
|
|
342 | 344 | }
|
343 | 345 | ],
|
344 | 346 | "source": [
|
345 |
| - "import mindaudio\n", |
| 347 | + "import mindaudio.data.io as io\n", |
| 348 | + "import mindaudio.data.augment as augment\n", |
346 | 349 | "import IPython.display as ipd\n",
|
347 | 350 | "\n",
|
348 |
| - "samples, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
| 351 | + "samples, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
349 | 352 | "ipd.display(ipd.Audio(samples, rate=sr))\n",
|
350 | 353 | "background_list = [\"../tests/samples/ASR/BAC009S0002W0123.wav\"]\n",
|
351 | 354 | "\n",
|
352 | 355 | "# test add noise for 1d\n",
|
353 |
| - "noisy_wav = mindaudio.add_noise(samples, background_list, 3, 30, 1.0)\n", |
| 356 | + "noisy_wav = augment.add_noise(samples, background_list, 3, 30, 1.0)\n", |
354 | 357 | "# display\n",
|
355 | 358 | "ipd.display(ipd.Audio(noisy_wav, rate=sr))"
|
356 | 359 | ]
|
|
406 | 409 | }
|
407 | 410 | ],
|
408 | 411 | "source": [
|
409 |
| - "import mindaudio\n", |
| 412 | + "import mindaudio.data.io as io\n", |
| 413 | + "import mindaudio.data.augment as augment\n", |
410 | 414 | "import IPython.display as ipd\n",
|
411 | 415 | "\n",
|
412 |
| - "samples, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
413 |
| - "ipd.display(ipd.Audio(samples, rate=sr))\n", |
| 416 | + "audio_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
| 417 | + "ipd.display(ipd.Audio(audio_data, rate=sr))\n", |
414 | 418 | "rir_list = [\"../tests/samples/rir/air_binaural_aula_carolina_0_1_1_90_3_16k.wav\"]\n",
|
415 | 419 | "\n",
|
416 | 420 | "# test add reverb for 1d\n",
|
417 |
| - "rir_wav = mindaudio.add_reverb(samples, rir_list, 1.0)\n", |
| 421 | + "rir_wav = augment.add_reverb(audio_data, rir_list, 1.0)\n", |
418 | 422 | "# display\n",
|
419 | 423 | "ipd.display(ipd.Audio(rir_wav, rate=sr))"
|
420 | 424 | ]
|
|
452 | 456 | }
|
453 | 457 | ],
|
454 | 458 | "source": [
|
455 |
| - "import mindaudio\n", |
| 459 | + "import mindaudio.data.processing as processing\n", |
456 | 460 | "\n",
|
457 | 461 | "waveform = np.random.random([1, 441000])\n",
|
458 |
| - "y_8k = mindaudio.resample(waveform, orig_freq=44100, new_freq=16000)\n", |
| 462 | + "y_8k = processing.resample(waveform, orig_freq=44100, new_freq=16000)\n", |
459 | 463 | "print(waveform.shape)\n",
|
460 | 464 | "print(y_8k.shape)"
|
461 | 465 | ]
|
|
511 | 515 | }
|
512 | 516 | ],
|
513 | 517 | "source": [
|
514 |
| - "import mindaudio\n", |
| 518 | + "import mindaudio.data.io as io\n", |
| 519 | + "import mindaudio.data.processing as processing\n", |
515 | 520 | "\n",
|
516 |
| - "waveform, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
517 |
| - "ipd.display(ipd.Audio(waveform, rate=sr))\n", |
| 521 | + "audio_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
| 522 | + "ipd.display(ipd.Audio(audio_data, rate=sr))\n", |
518 | 523 | "\n",
|
519 | 524 | "offset_factor = 0.2\n",
|
520 | 525 | "duration_factor = 0.3\n",
|
521 | 526 | "# do clip\n",
|
522 |
| - "out_waveform = mindaudio.clip(waveform, offset_factor, duration_factor)\n", |
| 527 | + "out_waveform = processing.clip(audio_data, offset_factor, duration_factor)\n", |
523 | 528 | "ipd.display(ipd.Audio(out_waveform, rate=sr))"
|
524 | 529 | ]
|
525 | 530 | },
|
|
555 | 560 | }
|
556 | 561 | ],
|
557 | 562 | "source": [
|
558 |
| - "import mindaudio\n", |
| 563 | + "import mindaudio.data.io as io\n", |
| 564 | + "import mindaudio.data.spectrum as spectrum\n", |
559 | 565 | "\n",
|
560 | 566 | "# Read audio file\n",
|
561 |
| - "test_data, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
562 |
| - "# Carry out data feature extraction\n", |
| 567 | + "test_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
| 568 | + "# data feature extraction\n", |
563 | 569 | "n_fft = 512\n",
|
564 |
| - "matrix = mindaudio.stft(test_data, n_fft=n_fft)\n", |
| 570 | + "matrix = spectrum.stft(test_data, n_fft=n_fft)\n", |
565 | 571 | "print(matrix.shape)"
|
566 | 572 | ]
|
567 | 573 | },
|
|
603 | 609 | "source": [
|
604 | 610 | "import numpy as np\n",
|
605 | 611 | "import matplotlib.pyplot as plt\n",
|
606 |
| - "import mindaudio\n", |
| 612 | + "import mindaudio.data.io as io\n", |
| 613 | + "import mindaudio.data.spectrum as spectrum\n", |
607 | 614 | "\n",
|
608 | 615 | "# Read audio file\n",
|
609 |
| - "test_data, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
610 |
| - "# Carry out data feature extraction\n", |
| 616 | + "audio_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n", |
| 617 | + "# data feature extraction\n", |
611 | 618 | "n_fft = 512\n",
|
612 |
| - "matrix = mindaudio.stft(test_data, n_fft=n_fft)\n", |
613 |
| - "magnitude, _ = mindaudio.magphase(matrix, 1)\n", |
| 619 | + "matrix = spectrum.stft(audio_data, n_fft=n_fft)\n", |
| 620 | + "magnitude, _ = spectrum.magphase(matrix, 1)\n", |
614 | 621 | "print(magnitude.shape)\n",
|
615 | 622 | "# Drawing display\n",
|
616 | 623 | "x = [i for i in range(0, int(n_fft/2*magnitude.shape[1]), int(n_fft/2))]\n",
|
|
0 commit comments