|
1 | 1 | {
|
2 | 2 | "cells": [
|
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "metadata": {}, |
| 6 | + "source": [ |
| 7 | + "# Data Preprocessing\n", |
| 8 | + "* needs to run once before training several models\n", |
| 9 | + "* downloads all files, which are not downloaded so far\n", |
| 10 | + "* saves a dataset consisting of already splitted log-mel specs" |
| 11 | + ] |
| 12 | + }, |
3 | 13 | {
|
4 | 14 | "cell_type": "code",
|
5 | 15 | "execution_count": null,
|
6 | 16 | "metadata": {},
|
7 | 17 | "outputs": [],
|
8 | 18 | "source": [
|
9 |
| - "# set to True for the first time\n", |
| 19 | + "# set to \"True\" for the first time\n", |
10 | 20 | "need_to_download_gtzan = False"
|
11 | 21 | ]
|
12 | 22 | },
|
|
28 | 38 | "metadata": {},
|
29 | 39 | "outputs": [],
|
30 | 40 | "source": [
|
| 41 | + "# used for dynamic changes within the code\n", |
31 | 42 | "%reload_ext autoreload\n",
|
32 | 43 | "%autoreload 2"
|
33 | 44 | ]
|
|
38 | 49 | "metadata": {},
|
39 | 50 | "outputs": [],
|
40 | 51 | "source": [
|
| 52 | + "# allow for gpu growth and read the config for the data preparation\n", |
41 | 53 | "from utils import load_config, allow_growth\n",
|
42 | 54 | "allow_growth()\n",
|
43 | 55 | "\n",
|
|
60 | 72 | "metadata": {},
|
61 | 73 | "outputs": [],
|
62 | 74 | "source": [
|
63 |
| - "''' Some small amount of reproducibility '''\n", |
| 75 | + "# some small amount of reproducibility (splitting the data)\n", |
64 | 76 | "import tensorflow as tf\n",
|
65 | 77 | "import numpy as np\n",
|
66 | 78 | "tf.random.set_seed(data_parameter[\"seed\"])\n",
|
|
73 | 85 | "metadata": {},
|
74 | 86 | "outputs": [],
|
75 | 87 | "source": [
|
| 88 | + "# loads & preprocesses all datasets (FSD, GTZAN, FMA) and save them to disk\n", |
76 | 89 | "from preprocessor import Preprocessor\n",
|
77 | 90 | "prep = Preprocessor(config=data_parameter)\n",
|
78 | 91 | "prep.create_logger()\n",
|
|
0 commit comments