From a75e8e98e220678bbd7dee5a7c9ec697b2647069 Mon Sep 17 00:00:00 2001
From: fork123aniket <fork123aniket>
Date: Sat, 26 Nov 2022 18:58:30 +0530
Subject: [PATCH] Added Extract Features File

---
 Extract_Features_Using_VGG.py | 94 +++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 Extract_Features_Using_VGG.py
diff --git a/Extract_Features_Using_VGG.py b/Extract_Features_Using_VGG.py
new file mode 100644
index 0000000..8e0d528
--- /dev/null
+++ b/Extract_Features_Using_VGG.py
@@ -0,0 +1,94 @@
+import shutil
+import numpy as np
+import cv2
+import os
+from tensorflow.keras.applications.vgg16 import VGG16
+from tensorflow.keras.models import Model
+
+
+def video_to_frames(video):
+    path = os.path.join("data/testing_data", 'temporary_images')
+    if os.path.exists(path):
+        shutil.rmtree(path)
+    os.makedirs(path)
+    video_path = os.path.join("data/testing_data", 'video', video)
+    count = 0
+    image_list = []
+    # Path to video file
+    cap = cv2.VideoCapture(video_path)
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if ret is False:
+            break
+        cv2.imwrite(os.path.join("data/testing_data", 'temporary_images', 'frame%d.jpg' % count), frame)
+        image_list.append(os.path.join("data/testing_data", 'temporary_images', 'frame%d.jpg' % count))
+        count += 1
+
+    cap.release()
+    cv2.destroyAllWindows()
+    return image_list
+
+
+def model_cnn_load():
+    model = VGG16(weights="imagenet", include_top=True, input_shape=(224, 224, 3))
+    out = model.layers[-2].output
+    model_final = Model(inputs=model.input, outputs=out)
+    return model_final
+
+
+def load_image(path):
+    img = cv2.imread(path)
+    img = cv2.resize(img, (224, 224))
+    return img
+
+
+def extract_features(video, model):
+    """
+    :param video: The video whose frames are to be extracted to convert into a numpy array
+    :param model: the pretrained vgg16 model
+    :return: numpy array of size 4096x80
+    """
+    video_id = video.split(".")[0]
+    print(video_id)
+    print(f'Processing video {video}')
+
+    image_list = video_to_frames(video)
+    samples = np.round(np.linspace(
+        0, len(image_list) - 1, 80))
+    image_list = [image_list[int(sample)] for sample in samples]
+    images = np.zeros((len(image_list), 224, 224, 3))
+    for i in range(len(image_list)):
+        img = load_image(image_list[i])
+        images[i] = img
+    images = np.array(images)
+    fc_feats = model.predict(images, batch_size=128)
+    img_feats = np.array(fc_feats)
+    # cleanup
+    shutil.rmtree(os.path.join("data/testing_data", 'temporary_images'))
+    return img_feats
+
+
+def extract_feats_pretrained_cnn():
+    """
+    saves the numpy features from all the videos
+    """
+    model = model_cnn_load()
+    print('Model loaded')
+
+    if not os.path.isdir(os.path.join("data/testing_data", 'feat')):
+        os.mkdir(os.path.join("data/testing_data", 'feat'))
+
+    video_list = os.listdir(os.path.join("data/testing_data", 'video'))
+
+    # ًWhen running the script on Colab an item called '.ipynb_checkpoints'
+    # is added to the beginning of the list causing errors later on, so the next line removes it.
+    video_list.remove('.ipynb_checkpoints')
+
+    for video in video_list:
+        outfile = os.path.join("data/testing_data", 'feat', video + '.npy')
+        img_feats = extract_features(video, model)
+        np.save(outfile, img_feats)
+
+
+if __name__ == "__main__":
+    extract_feats_pretrained_cnn()