Added last small changes.

nicojahn · nicojahn · commit b61316245db0 · 2021-02-28T21:24:10.000Z
diff --git a/Data_Preprocessing.ipynb b/Data_Preprocessing.ipynb
@@ -1,12 +1,22 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Preprocessing\n",
+    "* needs to run once before training several models\n",
+    "* downloads all files, which are not downloaded so far\n",
+    "* saves a dataset consisting of already splitted log-mel specs"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# set to True for the first time\n",
+    "# set to \"True\" for the first time\n",
     "need_to_download_gtzan = False"
    ]
   },
@@ -28,6 +38,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# used for dynamic changes within the code\n",
     "%reload_ext autoreload\n",
     "%autoreload 2"
    ]
@@ -38,6 +49,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# allow for gpu growth and read the config for the data preparation\n",
     "from utils import load_config, allow_growth\n",
     "allow_growth()\n",
     "\n",
@@ -60,7 +72,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "''' Some small amount of reproducibility '''\n",
+    "# some small amount of reproducibility (splitting the data)\n",
     "import tensorflow as tf\n",
     "import numpy as np\n",
     "tf.random.set_seed(data_parameter[\"seed\"])\n",
@@ -73,6 +85,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# loads & preprocesses all datasets (FSD, GTZAN, FMA) and save them to disk\n",
     "from preprocessor import Preprocessor\n",
     "prep = Preprocessor(config=data_parameter)\n",
     "prep.create_logger()\n",
diff --git a/README.md b/README.md
@@ -20,9 +20,15 @@
     * ```wandb login```
 
 ## How to execute
-* Execute "Data_Preprocessing.ipynb" once (1 dataset)
-* Execute "Training.ipynb" once per training
-* Execute "ANN_Inference.ipynb" once per trained model to evalute embedding and classify genres/songs
+* Execute "Data_Preprocessing.ipynb" once (saves the dataset e.g. in /tmp/ folder)
+* Execute "Training.ipynb" once per training (trains model and saves it into models/ folder)
+* Execute "ANN_Inference.ipynb" once per trained model to evalute embedding and classify genres/songs (creates ANN/Plots/Metrics of a model)
+
+
+## How to configure
+* The data preparation and the model training is configured via the "config.json"
+* You can also use command line arguments (also via W&B) and overwrite config settings during the execution
+* Leave it as it is, to reproduce our results
 
 ## W&B alternative login for CI/CD (silent login):
 * ```export WANDB_ENTITY=dl4aed```
@@ -35,11 +41,11 @@
 
 1. Integrate classifier model into training loop. ✅ 
 2. Build Inference pipeline: embed -> song-recognition. ✅ 
-3. Inspect & maybe implement Transfer Learning (for spectrograms?).
+3. Inspect & maybe implement Transfer Learning (for spectrograms?). 🔜
 4. Prepare slides. ✅ 
 5. Build ResNet-Model. ✅ 
 6. Test different dataset. ✅
-7. Extending scope lenght of each image (1 image should have longer temporal view than ~3 sec.).
+7. Extending scope lenght of each image (1 image should have longer temporal view than ~3 sec.). 🔜
 
 ## Git LFS
 * Install it on your local system: https://www.atlassian.com/git/tutorials/git-lfs#installing-git-lfs
diff --git a/Training.ipynb b/Training.ipynb
@@ -1,5 +1,15 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training Procedure\n",
+    "* loads the saved data from the data preprocessing notebook\n",
+    "* creates the autoencoder model & trains it\n",
+    "* creates the numpy embeddings of the GTZAN testset and FMA dataset"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/prep_utils.py b/prep_utils.py
@@ -3,6 +3,7 @@
 import os
 import time
 
+''' Util functions used for the tf.data.Dataset pipeline ''' 
 #============================== PREPROCESSING ==============================
 
 def wrapper_cast(x):
@@ -133,6 +134,7 @@ def wrapper_roll(x, roll_val):
     x['mel_noise'] = tf.roll(x['mel_noise'], roll_tensor, axis=0)
     return x
 
+# TODO: The RMS calculation might still be off
 def get_noise_from_sound(signal, noise, SNR):
     # current RMS of signal
     centered_signal = signal-tf.reduce_mean(signal)
diff --git a/preprocessor.py b/preprocessor.py
@@ -5,6 +5,7 @@
 import tensorflow_datasets as tfds
 
 AUTOTUNE = tf.data.AUTOTUNE
+''' The preprocessor class which does everything related to data (loading, preprocessing, etc.)'''
 
 class Preprocessor():
     '''
diff --git a/resnet.py b/resnet.py
@@ -1,5 +1,5 @@
 #https://github.com/YunYang1994/TensorFlow2.0-Examples/blob/master/3-Neural_Network_Architecture/resnet.py
-
+''' A copy of the above code used to possibly customize the ResNet architecture'''
 
 import tensorflow as tf
 
diff --git a/resnet_decoder.py b/resnet_decoder.py
@@ -1,8 +1,9 @@
 # https://github.com/YunYang1994/TensorFlow2.0-Examples/blob/master/3-Neural_Network_Architecture/resnet.py
+''' A *modified* copy of the above code used to possibly customize the ResNet architecture (use as Decoder)'''
 
 import tensorflow as tf
 
-
+# TODO: adapt to be usable as Decoder
 class BasicBlock_Transposed(tf.keras.Model):
     expansion = 1