Skip to content

Commit b613162

Browse files
committedFeb 28, 2021
Added last small changes.
1 parent 2ef4227 commit b613162

7 files changed

+42
-9
lines changed
 

‎Data_Preprocessing.ipynb

+15-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,22 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Data Preprocessing\n",
8+
"* needs to run once before training several models\n",
9+
"* downloads all files, which are not downloaded so far\n",
10+
"* saves a dataset consisting of already splitted log-mel specs"
11+
]
12+
},
313
{
414
"cell_type": "code",
515
"execution_count": null,
616
"metadata": {},
717
"outputs": [],
818
"source": [
9-
"# set to True for the first time\n",
19+
"# set to \"True\" for the first time\n",
1020
"need_to_download_gtzan = False"
1121
]
1222
},
@@ -28,6 +38,7 @@
2838
"metadata": {},
2939
"outputs": [],
3040
"source": [
41+
"# used for dynamic changes within the code\n",
3142
"%reload_ext autoreload\n",
3243
"%autoreload 2"
3344
]
@@ -38,6 +49,7 @@
3849
"metadata": {},
3950
"outputs": [],
4051
"source": [
52+
"# allow for gpu growth and read the config for the data preparation\n",
4153
"from utils import load_config, allow_growth\n",
4254
"allow_growth()\n",
4355
"\n",
@@ -60,7 +72,7 @@
6072
"metadata": {},
6173
"outputs": [],
6274
"source": [
63-
"''' Some small amount of reproducibility '''\n",
75+
"# some small amount of reproducibility (splitting the data)\n",
6476
"import tensorflow as tf\n",
6577
"import numpy as np\n",
6678
"tf.random.set_seed(data_parameter[\"seed\"])\n",
@@ -73,6 +85,7 @@
7385
"metadata": {},
7486
"outputs": [],
7587
"source": [
88+
"# loads & preprocesses all datasets (FSD, GTZAN, FMA) and save them to disk\n",
7689
"from preprocessor import Preprocessor\n",
7790
"prep = Preprocessor(config=data_parameter)\n",
7891
"prep.create_logger()\n",

‎README.md

+11-5
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,15 @@
2020
* ```wandb login```
2121

2222
## How to execute
23-
* Execute "Data_Preprocessing.ipynb" once (1 dataset)
24-
* Execute "Training.ipynb" once per training
25-
* Execute "ANN_Inference.ipynb" once per trained model to evalute embedding and classify genres/songs
23+
* Execute "Data_Preprocessing.ipynb" once (saves the dataset e.g. in /tmp/ folder)
24+
* Execute "Training.ipynb" once per training (trains model and saves it into models/ folder)
25+
* Execute "ANN_Inference.ipynb" once per trained model to evalute embedding and classify genres/songs (creates ANN/Plots/Metrics of a model)
26+
27+
28+
## How to configure
29+
* The data preparation and the model training is configured via the "config.json"
30+
* You can also use command line arguments (also via W&B) and overwrite config settings during the execution
31+
* Leave it as it is, to reproduce our results
2632

2733
## W&B alternative login for CI/CD (silent login):
2834
* ```export WANDB_ENTITY=dl4aed```
@@ -35,11 +41,11 @@
3541

3642
1. Integrate classifier model into training loop. ✅
3743
2. Build Inference pipeline: embed -> song-recognition. ✅
38-
3. Inspect & maybe implement Transfer Learning (for spectrograms?).
44+
3. Inspect & maybe implement Transfer Learning (for spectrograms?). 🔜
3945
4. Prepare slides. ✅
4046
5. Build ResNet-Model. ✅
4147
6. Test different dataset. ✅
42-
7. Extending scope lenght of each image (1 image should have longer temporal view than ~3 sec.).
48+
7. Extending scope lenght of each image (1 image should have longer temporal view than ~3 sec.). 🔜
4349

4450
## Git LFS
4551
* Install it on your local system: https://www.atlassian.com/git/tutorials/git-lfs#installing-git-lfs

‎Training.ipynb

+10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Training Procedure\n",
8+
"* loads the saved data from the data preprocessing notebook\n",
9+
"* creates the autoencoder model & trains it\n",
10+
"* creates the numpy embeddings of the GTZAN testset and FMA dataset"
11+
]
12+
},
313
{
414
"cell_type": "code",
515
"execution_count": null,

‎prep_utils.py

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import time
55

6+
''' Util functions used for the tf.data.Dataset pipeline '''
67
#============================== PREPROCESSING ==============================
78

89
def wrapper_cast(x):
@@ -133,6 +134,7 @@ def wrapper_roll(x, roll_val):
133134
x['mel_noise'] = tf.roll(x['mel_noise'], roll_tensor, axis=0)
134135
return x
135136

137+
# TODO: The RMS calculation might still be off
136138
def get_noise_from_sound(signal, noise, SNR):
137139
# current RMS of signal
138140
centered_signal = signal-tf.reduce_mean(signal)

‎preprocessor.py

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import tensorflow_datasets as tfds
66

77
AUTOTUNE = tf.data.AUTOTUNE
8+
''' The preprocessor class which does everything related to data (loading, preprocessing, etc.)'''
89

910
class Preprocessor():
1011
'''

‎resnet.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#https://github.com/YunYang1994/TensorFlow2.0-Examples/blob/master/3-Neural_Network_Architecture/resnet.py
2-
2+
''' A copy of the above code used to possibly customize the ResNet architecture'''
33

44
import tensorflow as tf
55

‎resnet_decoder.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# https://github.com/YunYang1994/TensorFlow2.0-Examples/blob/master/3-Neural_Network_Architecture/resnet.py
2+
''' A *modified* copy of the above code used to possibly customize the ResNet architecture (use as Decoder)'''
23

34
import tensorflow as tf
45

5-
6+
# TODO: adapt to be usable as Decoder
67
class BasicBlock_Transposed(tf.keras.Model):
78
expansion = 1
89

0 commit comments

Comments
 (0)
Please sign in to comment.