diff --git a/PrepareDataset.ipynb b/PrepareDataset.ipynb index 2cee886..6a3edd0 100644 --- a/PrepareDataset.ipynb +++ b/PrepareDataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "id": "b0992ed3-6fb2-4f7c-b5b2-6c1aaf632b18", "metadata": {}, "outputs": [], @@ -11,21 +11,24 @@ "import os\n", "import shutil\n", "from tqdm import tqdm\n", + "import torchaudio\n", + "import torchaudio.transforms as T\n", + "import glob\n", "\n", "SRC_PATH = r'F:\\Hackathon_Dataset'\n", - "DST_PATH = r'C:\\Users\\amitli\\Repo\\SpeakerVerification2024\\Dataset_SR32000'\n", + "DST_PATH = r'C:\\Users\\amitli\\Repo\\SpeakerVerification2024\\Dataset_SR16000'\n", "df = pd.read_csv(r'F:\\Hackathon_Dataset\\Hackathon_final_without_outliers.csv')\n", - "df = df[df.language == 'arabic']\n" + "df = df[df.language == 'arabic']\n", + "df = df[df.hackathon_type == 'tactic']" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "id": "23bedaf6-6695-42b4-99f4-e632d0a77ef7", "metadata": {}, "outputs": [], "source": [ - "\n", "total_collect = 0\n", "l_speaker = []\n", "l_fname = []\n", @@ -45,44 +48,19 @@ "\n", " l_speaker.append(speaker)\n", " l_fname .append(file_name) \n", - " shutil.copy(rf'{SRC_PATH}/{file_name}', rf'{DST_PATH}/{file_name}')\n", + " #shutil.copy(rf'{SRC_PATH}/{file_name}', rf'{DST_PATH}/{file_name}')\n", + "\n", + " # Load the audio file \n", + " waveform, sample_rate = torchaudio.load(rf'{SRC_PATH}/{file_name}')\n", + " if sample_rate != 16000: \n", + " resampler = T.Resample(orig_freq=sample_rate, new_freq=16000)\n", + " waveform = resampler(waveform)\n", + " torchaudio.save(rf'{DST_PATH}/{file_name}', waveform, 16000) \n", + " \n", "\n", "df_results = pd.DataFrame({'speaker': l_speaker, 'file_name': l_fname})\n", "df_results.to_csv(rf\"{DST_PATH}/speakers.csv\")" ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "270f4f53-fd46-4eb9-a05c-a636d34f0873", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 166.34it/s]\n" - ] - } - ], - "source": [ - "import torchaudio\n", - "import torchaudio.transforms as T\n", - "import glob\n", - "\n", - "\n", - "DST_PATH_16000 = r'C:\\Users\\amitli\\Repo\\SpeakerVerification2024\\Dataset_SR16000'\n", - "l_files = glob.glob(rf'{DST_PATH}\\*.wav')\n", - "for file in tqdm(l_files):\n", - "\n", - " # Load the audio file \n", - " waveform, sample_rate = torchaudio.load(file)\n", - " if sample_rate != 16000: \n", - " resampler = T.Resample(orig_freq=sample_rate, new_freq=16000)\n", - " waveform = resampler(waveform)\n", - "\n", - " torchaudio.save(rf'{DST_PATH_16000}\\{os.path.basename(file)}', waveform, 16000) \n" - ] } ], "metadata": {