seth814 · pradyunkumar · Oct 27, 2022 · Nov 10, 2022 · Nov 15, 2022 · Nov 15, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 __pycache__
 .ipynb_checkpoints
 clean
+.DS_Store
diff --git a/app.py b/app.py
@@ -0,0 +1,75 @@
+import json
+import os
+from flask import Flask, render_template, request, jsonify
+from werkzeug.utils import secure_filename
+from predict import predict
+import subprocess
+
+# Create App
+app = Flask(__name__)
+
+# Select Route
+@app.route('/', methods=['POST', 'GET'])
+@app.route('/index.html')
+def index():
+    return render_template("index.html")
+
+@app.route('/about-us')
+@app.route('/about-us.html')
+def about_us():
+    return render_template("about-us.html")
+
+@app.route('/more')
+@app.route('/more.html')
+def more():
+    return render_template("more.html")
+
+@app.route('/works')
+@app.route('/works.html')
+def works():
+    return render_template("works.html")
+
+@app.route('/uploader', methods = ['GET', 'POST'])
+def upload_file():
+  if request.method == 'POST':
+     f = request.files['file']
+     f.save('incoming/idk/' + secure_filename(f.filename))
+   #   return predict(f)
+     return prediction(f)
+
+def prediction(file):
+   result = str(predict())
+   print(result)
+   return render_template("index.html")
+
+@app.route("/receive", methods=['POST'])
+def form():
+    file = request.files['file']
+    file.save(secure_filename(file.filename))
+    print(file)
+
+    # with open(os.path.abspath(f'{file.filename}'), 'wb') as f:
+    #     f.write(file.getvalue())
+
+    command = ['ffmpeg', '-i', secure_filename(file.filename), '-f', 'segment', '-segment_time', '15', 'incoming/idk/out%9d.wav']
+    # # file.save(secure_filename('out.wav'))
+    subprocess.run(command,stdout=subprocess.PIPE,stdin=subprocess.PIPE)
+
+    result = predict()
+    os.remove(secure_filename(file.filename))
+    os.remove('incoming/idk/out000000000.wav')
+
+    result = jsonify(result)
+    return result
+
+# def format_output(res):
+#     results = res['results']
+#     results.sort(key=lambda x: x['hashes_matched_in_input'], reverse=True)
+#     # return str({i: res['results'][i]['song_name'] for i in range(len(res['results']))})
+#     return res['results'][0]['song_name']
+
+if __name__ == "__main__":
+    app.run(debug=False, host="0.0.0.0")
+
+
+# hi
diff --git a/augmented_data/.gitignore b/augmented_data/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/clean.py b/clean.py
@@ -71,6 +71,8 @@ def split_wavs(args):
         target_dir = os.path.join(dst_root, _cls)
         check_dir(target_dir)
         src_dir = os.path.join(src_root, _cls)
+        if not os.path.isdir(src_dir):
+            continue
         for fn in tqdm(os.listdir(src_dir)):
             src_fn = os.path.join(src_dir, fn)
             rate, wav = downsample_mono(src_fn, args.sr)
@@ -117,13 +119,13 @@ def test_threshold(args):
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser(description='Cleaning audio data')
-    parser.add_argument('--src_root', type=str, default='wavfiles',
+    parser.add_argument('--src_root', type=str, default='augmented_data',
                         help='directory of audio files in total duration')
-    parser.add_argument('--dst_root', type=str, default='clean',
+    parser.add_argument('--dst_root', type=str, default='cleaned_data',
                         help='directory to put audio files split by delta_time')
-    parser.add_argument('--delta_time', '-dt', type=float, default=1.0,
+    parser.add_argument('--delta_time', '-dt', type=float, default=7.0,
                         help='time in seconds to sample audio')
-    parser.add_argument('--sr', type=int, default=16000,
+    parser.add_argument('--sr', type=int, default=22050,
                         help='rate to downsample audio')
 
     parser.add_argument('--fn', type=str, default='3a3d0279',

diff --git a/cleaned_data/.gitignore b/cleaned_data/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,30 @@
+version: '3'
+services:
+  db:
+    ports:
+      - 5432:5432
+      - 8000:8000
+      # - 5000:5000
+    build:
+      context: ./docker/postgres
+    volumes:
+      - db:/var/lib/postgresql/data
+    environment:
+      - POSTGRES_DB=dejavu
+      - POSTGRES_USER=postgres
+      - POSTGRES_PASSWORD=password
+    networks:
+      - db_network
+  python:
+    build:
+      context: './docker/python'
+    volumes:
+      - .:/app
+    depends_on:
+      - db
+    networks:
+      - db_network
+networks:
+  db_network:
+volumes:
+  db:
diff --git a/docker/.gitkeep b/docker/.gitkeep
diff --git a/docker/postgres/Dockerfile b/docker/postgres/Dockerfile
@@ -0,0 +1,2 @@
+FROM postgres:10.7-alpine
+COPY init.sql /docker-entrypoint-initdb.d/
diff --git a/docker/postgres/init.sql b/docker/postgres/init.sql
@@ -0,0 +1,2 @@
+-- put any SQL you'd like to run on creation of the image
+-- in this file :)
diff --git a/docker/python/Dockerfile b/docker/python/Dockerfile
@@ -0,0 +1,13 @@
+FROM python:3.8
+RUN apt-get update -y && apt-get upgrade -y
+RUN apt-get install \
+    gcc nano \
+    ffmpeg libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 \
+    postgresql postgresql-contrib -y
+RUN pip install scipy matplotlib pydub pyaudio psycopg2 numpy flask tensorflow kapre sklearn pandas librosa tqdm wavio
+WORKDIR /app
+COPY . /app
+
+EXPOSE 5000
+# CMD ["python", "-m" , "flask", "run", "--host=0.0.0.0"]
+# CMD ["python", "example_docker_postgres.py"]
diff --git a/docs/conf_mat.png b/docs/conf_mat.png
diff --git a/docs/kapre.pdf b/docs/kapre.pdf
diff --git a/docs/mel_spectrograms.png b/docs/mel_spectrograms.png
diff --git a/docs/model_history.png b/docs/model_history.png
diff --git a/docs/roc.png b/docs/roc.png
diff --git a/docs/signal_envelope.png b/docs/signal_envelope.png
diff --git a/docs/time_series.png b/docs/time_series.png
diff --git a/logs/conv2d_history.csv b/logs/conv2d_history.csv
@@ -1,9 +1,11 @@
 epoch,accuracy,loss,val_accuracy,val_loss
-0,0.3958333432674408,1.6769458055496216,0.625,1.1900074481964111
-1,0.7413194179534912,0.9228217005729675,0.7578125,0.8368486166000366
-2,0.8350694179534912,0.6329742670059204,0.828125,0.6942646503448486
-3,0.8880208134651184,0.4653857946395874,0.859375,0.5436592102050781
-4,0.9236111044883728,0.3417181372642517,0.875,0.4703327417373657
-5,0.9539930820465088,0.28213703632354736,0.890625,0.4489744305610657
-6,0.96875,0.2299293875694275,0.9140625,0.38280224800109863
-7,0.9826388955116272,0.17747408151626587,0.921875,0.3396981358528137
+0,0.8849009871482849,0.3325098156929016,1.0,0.07005131244659424
+1,1.0,0.056749843060970306,1.0,0.044334087520837784
+2,1.0,0.040281377732753754,1.0,0.03505175933241844
+3,1.0,0.03274228423833847,1.0,0.029564393684267998
+4,1.0,0.028036946430802345,1.0,0.025668691843748093
+5,1.0,0.024560758844017982,1.0,0.02288614585995674
+6,1.0,0.0219702310860157,1.0,0.02055056206882
+7,1.0,0.019765833392739296,1.0,0.018730701878666878
+8,1.0,0.018027806654572487,1.0,0.017003608867526054
+9,1.0,0.01654466614127159,1.0,0.015729038044810295
diff --git a/logs/lstm_history.csv b/logs/lstm_history.csv
@@ -1,6 +1,31 @@
 epoch,accuracy,loss,val_accuracy,val_loss
-0,0.4557291567325592,1.5647531747817993,0.6796875,1.0939487218856812
-1,0.7829861044883728,0.7867996096611023,0.7890625,0.7798664569854736
-2,0.8723958134651184,0.5168218612670898,0.78125,0.7349932789802551
-3,0.9079861044883728,0.38693663477897644,0.8203125,0.626288652420044
-4,0.9461805820465088,0.2926805913448334,0.8515625,0.6554898023605347
+0,0.9811262488365173,0.08876163512468338,1.0,0.030933193862438202
+1,1.0,0.026136912405490875,1.0,0.02131524868309498
+2,1.0,0.018989909440279007,1.0,0.016451247036457062
+3,1.0,0.014996059238910675,1.0,0.013288840651512146
+4,1.0,0.012410617433488369,1.0,0.011248855851590633
+5,1.0,0.010610337369143963,1.0,0.009705700911581516
+6,1.0,0.009278601966798306,1.0,0.008588694967329502
+7,1.0,0.00823213905096054,1.0,0.007705947384238243
+8,1.0,0.007404371630400419,1.0,0.00697157671675086
+9,1.0,0.006726888008415699,1.0,0.006349526811391115
+10,1.0,0.006195608526468277,1.0,0.00587928481400013
+11,1.0,0.005678337533026934,1.0,0.0054064965806901455
+12,1.0,0.005265973042696714,1.0,0.0050956192426383495
+13,1.0,0.0049118525348603725,1.0,0.004678372293710709
+14,1.0,0.004568244330585003,1.0,0.004367395304143429
+15,1.0,0.004268248099833727,1.0,0.004098730161786079
+16,1.0,0.003996027167886496,1.0,0.0038535133935511112
+17,1.0,0.003743569366633892,1.0,0.0036182308103889227
+18,1.0,0.0035073852632194757,1.0,0.0033817195799201727
+19,1.0,0.0032807623501867056,1.0,0.003142411820590496
+20,1.0,0.0030841054394841194,1.0,0.0029559850227087736
+21,1.0,0.002883247332647443,1.0,0.002759976079687476
+22,1.0,0.0027159592136740685,1.0,0.0026271624956279993
+23,1.0,0.0025536995381116867,1.0,0.002461410127580166
+24,1.0,0.0024143008049577475,1.0,0.0023202775046229362
+25,1.0,0.002279494656249881,1.0,0.0021944318432360888
+26,1.0,0.0021636777091771364,1.0,0.002097269520163536
+27,1.0,0.0020486365538090467,1.0,0.001977000618353486
+28,1.0,0.00194443145301193,1.0,0.0018732781754806638
+29,1.0,0.0018497094279155135,1.0,0.001791474991478026
diff --git a/logs/y_pred.npy b/logs/y_pred.npy
diff --git a/models.py b/models.py
@@ -63,6 +63,7 @@ def Conv2D(N_CLASSES=10, SR=16000, DT=1.0):
     x = layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_3')(x)
     x = layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_4')(x)
     x = layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_4')(x)
+
     x = layers.Flatten(name='flatten')(x)
     x = layers.Dropout(rate=0.2, name='dropout')(x)
     x = layers.Dense(64, activation='relu', activity_regularizer=l2(0.001), name='dense')(x)
@@ -74,6 +75,7 @@ def Conv2D(N_CLASSES=10, SR=16000, DT=1.0):
     return model
 
 
+
 def LSTM(N_CLASSES=10, SR=16000, DT=1.0):
     input_shape = (int(SR*DT), 1)
     i = get_melspectrogram_layer(input_shape=input_shape,

diff --git a/noise.py b/noise.py
@@ -0,0 +1,53 @@
+import numpy as np
+import os
+import librosa
+from tqdm import tqdm
+import soundfile as sf
+
+def shift(data, sampling_rate, shift_max, shift_direction):
+    shift = np.random.randint(sampling_rate * shift_max)
+    if shift_direction == 'right':
+        shift = -shift
+    elif shift_direction == 'both':
+        direction = np.random.randint(0, 2)
+        if direction == 1:
+            shift = -shift
+    augmented_data = np.roll(data, shift)
+    # Set to silence for heading/ tailing
+    if shift > 0:
+        augmented_data[:shift] = 0
+    else:
+        augmented_data[shift:] = 0
+    return augmented_data
+
+def manipulate(data, noise_factor):
+    noise = np.random.randn(len(data))
+    augmented_data = data + noise_factor * noise
+    # Cast back to same data type
+    augmented_data = augmented_data.astype(type(data[0]))
+    return augmented_data
+
+def augment(x=50):
+    for file in tqdm(os.listdir('original_data/')):
+        if os.path.isfile(file):
+            continue
+        i = 0
+        for f in os.listdir('original_data/' + file):
+            print(f)
+            p = os.path.join('original_data/', file) + '/' + f
+            data, sr = librosa.core.load(p)
+            for j in range(x):
+                d = manipulate(data, 0.05)
+                d1, d2 = shift(data, sr, 3, 'both'), shift(d, sr, 3, 'both')
+                p = f'augmented_data/{file}'
+                if os.path.exists(p) is False:
+                    os.mkdir(p)
+                sf.write(f'{p}/{i}_{1}.wav', d1, sr)
+                sf.write(f'{p}/{i}_{2}.wav', d2, sr)
+                sf.write(f'{p}/{i}_{0}.wav', d, sr)
+                i += 1
+
+if __name__ == "__main__":
+    augment()
+
+
diff --git a/notebooks/Confusion Matrix and ROC.ipynb b/notebooks/Confusion Matrix and ROC.ipynb
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		FROM postgres:10.7-alpine
		COPY init.sql /docker-entrypoint-initdb.d/
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		-- put any SQL you'd like to run on creation of the image
		-- in this file :)