Skip to content

Commit 9f78dac

Browse files
committed
modify tutorials for audio data processing
1 parent 9ce3895 commit 9f78dac

File tree

3 files changed

+41
-32
lines changed

3 files changed

+41
-32
lines changed

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mindspore=2.0.0
12
numpy>=1.17.0
23
scipy>=1.6.0
34
pyyaml>=5.3

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mindspore=2.0.0
12
numpy>=1.17.0
23
scipy>=1.6.0
34
pyyaml>=5.3

tutorials/MindAudio数据处理快速入门.ipynb renamed to tutorials/audio_data_processing_with_mindaudio.ipynb

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,15 @@
103103
],
104104
"source": [
105105
"import os\n",
106-
"import mindaudio\n",
106+
"import mindaudio.data.io as io\n",
107107
"import scipy.io\n",
108108
"\n",
109109
"# Get a multi-channel audio file from the tests/data directory.\n",
110110
"data_dir = os.path.join(os.path.dirname(scipy.io.__file__), \"tests\", \"data\")\n",
111111
"wav_fname = os.path.join(data_dir, \"test-44100Hz-2ch-32bit-float-be.wav\")\n",
112112
"\n",
113113
"# Load the .wav file contents.\n",
114-
"audio, sr = mindaudio.read(wav_fname)\n",
114+
"audio, sr = io.read(wav_fname)\n",
115115
"print(f\"number of channels = {audio.shape[1]}\")\n",
116116
"\n",
117117
"length = audio.shape[0] / sr\n",
@@ -146,14 +146,14 @@
146146
"outputs": [],
147147
"source": [
148148
"import numpy as np\n",
149-
"import mindaudio\n",
149+
"import mindaudio.data.io as io\n",
150150
"\n",
151151
"samplerate = 44100\n",
152152
"fs = 100\n",
153153
"t = np.linspace(0., 1., samplerate)\n",
154154
"amplitude = np.iinfo(np.int16).max\n",
155155
"data = amplitude * np.sin(2. * np.pi * fs * t)\n",
156-
"mindaudio.write(\"example.wav\", data, samplerate)"
156+
"io.write(\"example.wav\", data, samplerate)"
157157
]
158158
},
159159
{
@@ -194,12 +194,13 @@
194194
],
195195
"source": [
196196
"import numpy as np\n",
197-
"import mindaudio\n",
197+
"import mindaudio.data.io as io\n",
198+
"import mindaudio.data.features as features\n",
198199
"\n",
199-
"test_data, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
200+
"test_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
200201
"# Carry out data feature extraction\n",
201202
"n_fft = 512\n",
202-
"matrix = mindaudio.fbank(test_data, n_fft=n_fft)\n",
203+
"matrix = features.fbank(test_data, n_fft=n_fft)\n",
203204
"data_shape = matrix.shape\n",
204205
"\n",
205206
"# Drawing display\n",
@@ -270,16 +271,17 @@
270271
}
271272
],
272273
"source": [
273-
"import mindaudio\n",
274+
"import mindaudio.data.io as io\n",
275+
"import mindaudio.data.filters as filters\n",
274276
"import matplotlib.pyplot as plt\n",
275277
"import numpy as np\n",
276278
"import IPython.display as ipd\n",
277279
"\n",
278-
"audio, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
280+
"audio, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
279281
"ipd.display(ipd.Audio(audio, rate=sr))\n",
280282
"\n",
281283
"cutoff_freq = 1000\n",
282-
"out_waveform = mindaudio.low_pass_filter(audio, sr, cutoff_freq)\n",
284+
"out_waveform = filters.low_pass_filter(audio, sr, cutoff_freq)\n",
283285
"ipd.display(ipd.Audio(out_waveform, rate=sr))"
284286
]
285287
},
@@ -342,15 +344,16 @@
342344
}
343345
],
344346
"source": [
345-
"import mindaudio\n",
347+
"import mindaudio.data.io as io\n",
348+
"import mindaudio.data.augment as augment\n",
346349
"import IPython.display as ipd\n",
347350
"\n",
348-
"samples, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
351+
"samples, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
349352
"ipd.display(ipd.Audio(samples, rate=sr))\n",
350353
"background_list = [\"../tests/samples/ASR/BAC009S0002W0123.wav\"]\n",
351354
"\n",
352355
"# test add noise for 1d\n",
353-
"noisy_wav = mindaudio.add_noise(samples, background_list, 3, 30, 1.0)\n",
356+
"noisy_wav = augment.add_noise(samples, background_list, 3, 30, 1.0)\n",
354357
"# display\n",
355358
"ipd.display(ipd.Audio(noisy_wav, rate=sr))"
356359
]
@@ -406,15 +409,16 @@
406409
}
407410
],
408411
"source": [
409-
"import mindaudio\n",
412+
"import mindaudio.data.io as io\n",
413+
"import mindaudio.data.augment as augment\n",
410414
"import IPython.display as ipd\n",
411415
"\n",
412-
"samples, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
413-
"ipd.display(ipd.Audio(samples, rate=sr))\n",
416+
"audio_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
417+
"ipd.display(ipd.Audio(audio_data, rate=sr))\n",
414418
"rir_list = [\"../tests/samples/rir/air_binaural_aula_carolina_0_1_1_90_3_16k.wav\"]\n",
415419
"\n",
416420
"# test add reverb for 1d\n",
417-
"rir_wav = mindaudio.add_reverb(samples, rir_list, 1.0)\n",
421+
"rir_wav = augment.add_reverb(audio_data, rir_list, 1.0)\n",
418422
"# display\n",
419423
"ipd.display(ipd.Audio(rir_wav, rate=sr))"
420424
]
@@ -452,10 +456,10 @@
452456
}
453457
],
454458
"source": [
455-
"import mindaudio\n",
459+
"import mindaudio.data.processing as processing\n",
456460
"\n",
457461
"waveform = np.random.random([1, 441000])\n",
458-
"y_8k = mindaudio.resample(waveform, orig_freq=44100, new_freq=16000)\n",
462+
"y_8k = processing.resample(waveform, orig_freq=44100, new_freq=16000)\n",
459463
"print(waveform.shape)\n",
460464
"print(y_8k.shape)"
461465
]
@@ -511,15 +515,16 @@
511515
}
512516
],
513517
"source": [
514-
"import mindaudio\n",
518+
"import mindaudio.data.io as io\n",
519+
"import mindaudio.data.processing as processing\n",
515520
"\n",
516-
"waveform, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
517-
"ipd.display(ipd.Audio(waveform, rate=sr))\n",
521+
"audio_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
522+
"ipd.display(ipd.Audio(audio_data, rate=sr))\n",
518523
"\n",
519524
"offset_factor = 0.2\n",
520525
"duration_factor = 0.3\n",
521526
"# do clip\n",
522-
"out_waveform = mindaudio.clip(waveform, offset_factor, duration_factor)\n",
527+
"out_waveform = processing.clip(audio_data, offset_factor, duration_factor)\n",
523528
"ipd.display(ipd.Audio(out_waveform, rate=sr))"
524529
]
525530
},
@@ -555,13 +560,14 @@
555560
}
556561
],
557562
"source": [
558-
"import mindaudio\n",
563+
"import mindaudio.data.io as io\n",
564+
"import mindaudio.data.spectrum as spectrum\n",
559565
"\n",
560566
"# Read audio file\n",
561-
"test_data, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
562-
"# Carry out data feature extraction\n",
567+
"test_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
568+
"# data feature extraction\n",
563569
"n_fft = 512\n",
564-
"matrix = mindaudio.stft(test_data, n_fft=n_fft)\n",
570+
"matrix = spectrum.stft(test_data, n_fft=n_fft)\n",
565571
"print(matrix.shape)"
566572
]
567573
},
@@ -603,14 +609,15 @@
603609
"source": [
604610
"import numpy as np\n",
605611
"import matplotlib.pyplot as plt\n",
606-
"import mindaudio\n",
612+
"import mindaudio.data.io as io\n",
613+
"import mindaudio.data.spectrum as spectrum\n",
607614
"\n",
608615
"# Read audio file\n",
609-
"test_data, sr = mindaudio.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
610-
"# Carry out data feature extraction\n",
616+
"audio_data, sr = io.read(\"../tests/samples/ASR/BAC009S0002W0122.wav\")\n",
617+
"# data feature extraction\n",
611618
"n_fft = 512\n",
612-
"matrix = mindaudio.stft(test_data, n_fft=n_fft)\n",
613-
"magnitude, _ = mindaudio.magphase(matrix, 1)\n",
619+
"matrix = spectrum.stft(audio_data, n_fft=n_fft)\n",
620+
"magnitude, _ = spectrum.magphase(matrix, 1)\n",
614621
"print(magnitude.shape)\n",
615622
"# Drawing display\n",
616623
"x = [i for i in range(0, int(n_fft/2*magnitude.shape[1]), int(n_fft/2))]\n",

0 commit comments

Comments
 (0)