Add files via upload

ruizhecao96 · Jan 31, 2021 · 4260417 · 4260417
1 parent a1ec06b
commit 4260417
Show file tree

Hide file tree

Showing 39 changed files with 2,762 additions and 0 deletions.
diff --git a/batch.sh b/batch.sh
@@ -0,0 +1,12 @@
+#!/bin/bash -l
+
+# Slurm parameters
+#SBATCH --job-name=team08_tuning
+#SBATCH --output=team08_tuning-%j.%N.out
+#SBATCH --time=1-00:00:00
+#SBATCH --gpus=1
+
+# Activate everything you need
+module load cuda/10.1
+# Run your python code
+python3 tune.py
diff --git a/configs/config.gin b/configs/config.gin
@@ -0,0 +1,22 @@
+# Architectures
+DenseNet121.dropout_rate = 0.4691184943574922
+DenseNet121.dense_units = 11
+DenseNet121.idx_layer = 233
+
+
+# Training
+Trainer.total_steps = 5000
+Trainer.total_steps_ft = 694
+Trainer.log_interval = 200
+Trainer.ckpt_interval = 1000
+Trainer.lr = 0.00934198826999423
+Trainer.lr_ft = 1.6939236757572164e-05
+Trainer.ft_layer_idx = 129
+
+
+# Input pipeline
+load.name = 'idrid'
+# Dataset dir including idrid dataset, need to be changed to your own dir
+load.dataset_dir='/misc/home/RUS_CIP/st170042/Dataset'
+load.BATCH_SIZE = 32
+load.tfrecord = True
diff --git a/evaluation/__pycache__/eval.cpython-37.pyc b/evaluation/__pycache__/eval.cpython-37.pyc
diff --git a/evaluation/__pycache__/metrics.cpython-37.pyc b/evaluation/__pycache__/metrics.cpython-37.pyc
diff --git a/evaluation/eval.py b/evaluation/eval.py
@@ -0,0 +1,40 @@
+import tensorflow as tf
+import logging
+import seaborn as sns
+import matplotlib.pyplot as plt
+from evaluation.metrics import ConfusionMatrix
+
+
+def evaluate(model, ds_test):
+
+    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
+    test_loss = tf.keras.metrics.Mean(name='test_loss')
+    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
+    test_AUC = tf.keras.metrics.AUC(name='test_AUC')
+    cm = ConfusionMatrix(2)
+
+    for image, label in ds_test:
+        predictions = model(image, training=False)
+        t_loss = loss_object(label, predictions)
+        test_loss(t_loss)
+        test_accuracy(label, predictions)
+        cm.update_state(label, predictions)
+        predictions = tf.math.argmax(predictions, axis=1)
+        test_AUC(label, predictions)
+
+    template = 'Test Loss: {}, Test Accuracy: {}, Confusion Matrix: {}, Test AUC: {}'
+    logging.info(template.format(
+                                test_loss.result(),
+                                test_accuracy.result() * 100,
+                                cm.result(),
+                                test_AUC.result()
+                                ))
+    visualize_cm(cm.result())
+
+    return test_accuracy.result().numpy()
+
+
+def visualize_cm(cm):
+    # Visualize Confusion Matrix
+    sns.heatmap(cm, annot=True)
+    plt.show()
diff --git a/evaluation/metrics.py b/evaluation/metrics.py
@@ -0,0 +1,24 @@
+import tensorflow as tf
+
+
+class ConfusionMatrix(tf.keras.metrics.Metric):
+
+    def __init__(self, num_classes, **kwargs):
+        super(ConfusionMatrix, self).__init__(name="confusion_matrix", **kwargs)
+        self.num_classes = num_classes
+        self.total_cm = self.add_weight("total", shape=(num_classes, num_classes), initializer="zeros")
+
+    def update_state(self, y_true, y_pred, sample_weight=None, **kwargs):
+        # convert predictions from probability to boolean
+        y_pred = tf.math.argmax(y_pred, axis=1)
+        # y_true = tf.cast(y_true, tf.bool)
+        # apply confusion matrix
+        cm = tf.math.confusion_matrix(y_true, y_pred, dtype=tf.float32, num_classes=self.num_classes)
+        self.total_cm.assign_add(cm)
+
+    def reset_states(self):
+        for s in self.variables:
+            s.assign(tf.zeros(shape=s.shape))
+
+    def result(self):
+        return self.total_cm
diff --git a/grad_cam.py b/grad_cam.py
@@ -0,0 +1,184 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+import os
+import tensorflow as tf
+from tensorflow.keras.models import Model
+from tensorflow.keras.preprocessing.image import load_img, img_to_array
+from tensorflow.keras import backend as K
+from input_pipeline.preprocessing import preprocess, crop_image_from_gray
+
+class GradCAM:
+    # Source: https://www.kaggle.com/nguyenhoa/dog-cat-classifier-gradcam-with-tensorflow-2-0
+    def __init__(self, model, layerName=None):
+
+        self.model = model
+        self.layerName = layerName
+
+    def compute_heatmap(self, image, classIdx, upsample_size, eps=1e-5):
+        gradModel = Model(
+            inputs=[self.model.inputs],
+            outputs=[self.model.get_layer(self.layerName).output, self.model.output]
+        )
+        # record operations for automatic differentiation
+
+        with tf.GradientTape() as tape:
+            inputs = tf.cast(image, tf.float32)
+            (convOuts, preds) = gradModel(inputs)  # preds after softmax
+            loss = preds[:, classIdx]
+
+        # compute gradients with automatic differentiation
+        grads = tape.gradient(loss, convOuts)
+        # discard batch
+        convOuts = convOuts[0]
+        grads = grads[0]
+        norm_grads = tf.divide(grads, tf.reduce_mean(tf.square(grads)) + tf.constant(eps))
+
+        # compute weights
+        weights = tf.reduce_mean(norm_grads, axis=(0, 1))
+        cam = tf.reduce_sum(tf.multiply(weights, convOuts), axis=-1)
+
+        # Apply reLU
+        cam = np.maximum(cam, 0)
+        cam = cam / np.max(cam)
+        cam = cv2.resize(cam, upsample_size, interpolation=cv2.INTER_LINEAR)
+
+        # convert to 3D
+        cam3 = np.expand_dims(cam, axis=2)
+        cam3 = np.tile(cam3, [1, 1, 3])
+
+        return cam3
+
+
+def overlay_gradCAM(img, cam3):
+    cam3 = np.uint8(255 * cam3)
+    cam3 = cv2.applyColorMap(cam3, cv2.COLORMAP_JET)
+
+    new_img = 0.3 * cam3 + 0.5 * img
+
+    return (new_img * 255.0 / new_img.max()).astype("uint8")
+
+
+@tf.custom_gradient
+def guidedRelu(x):
+    def grad(dy):
+        return tf.cast(dy > 0, "float32") * tf.cast(x > 0, "float32") * dy
+
+    return tf.nn.relu(x), grad
+
+
+
+class GuidedBackprop:
+    def __init__(self, model, layerName=None):
+        self.model = model
+        self.layerName = layerName
+        self.gbModel = self.build_guided_model()
+
+
+    def build_guided_model(self):
+        gbModel = Model(
+            inputs=[self.model.inputs],
+            outputs=[self.model.get_layer(self.layerName).output]
+        )
+        layer_dict = [layer for layer in gbModel.layers[1:] if hasattr(layer, "activation")]
+        for layer in layer_dict:
+            if layer.activation == tf.keras.activations.relu:
+                layer.activation = guidedRelu
+
+        return gbModel
+
+    def guided_backprop(self, images, upsample_size):
+        """Guided Backpropagation method for visualizing input saliency."""
+        with tf.GradientTape() as tape:
+            inputs = tf.cast(images, tf.float32)
+            tape.watch(inputs)
+            outputs = self.gbModel(inputs)
+
+        grads = tape.gradient(outputs, inputs)[0]
+
+        saliency = cv2.resize(np.asarray(grads), upsample_size)
+
+        return saliency
+
+
+def deprocess_image(x):
+
+    # normalize tensor: center on 0., ensure std is 0.25
+    x = x.copy()
+    x -= x.mean()
+    x /= (x.std() + K.epsilon())
+    x *= 0.25
+
+    # clip to [0, 1]
+    x += 0.5
+    x = np.clip(x, 0, 1)
+
+    # convert to RGB array
+    x *= 255
+    if K.image_data_format() == 'channels_first':
+        x = x.transpose((1, 2, 0))
+    x = np.clip(x, 0, 255).astype('uint8')
+    return x
+
+
+def show_gradCAMs(model, gradCAM, GuidedBP, data_dir, n):
+
+    plt.subplots(figsize=(30, 10*n))
+    k = 1
+
+    #  choose first n image from data directory
+    for i, image_dir in enumerate(os.listdir(data_dir)):
+        img = cv2.imread(os.path.join(data_dir, image_dir))
+        img = crop_image_from_gray(img)
+        upsample_size = (img.shape[1], img.shape[0])
+
+        # Show original image
+        plt.subplot(n,3,k)
+        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+        plt.title("Filename: {}".format(image_dir), fontsize=20)
+        plt.axis("off")
+
+        # Show overlayed grad
+        plt.subplot(n,3,k+1)
+        im = img_to_array(load_img(os.path.join(data_dir, image_dir)))
+        x = preprocess(im)
+        x = x/255.
+        x = np.expand_dims(x, axis=0)
+        preds = model.predict(x)
+        idx = preds.argmax()
+        cam3 = gradCAM.compute_heatmap(image=x, classIdx=idx, upsample_size=upsample_size)
+        new_img = overlay_gradCAM(img, cam3)
+        new_img = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB)
+        plt.imshow(new_img)
+        plt.title("GradCAM-Prediction: {}".format(idx), fontsize=20)
+        plt.axis("off")
+
+        # Show guided GradCAM
+        plt.subplot(n,3,k+2)
+        gb = GuidedBP.guided_backprop(x, upsample_size)
+        guided_gradcam = deprocess_image(gb * cam3)
+        guided_gradcam = cv2.cvtColor(guided_gradcam, cv2.COLOR_BGR2RGB)
+        plt.imshow(guided_gradcam)
+        plt.title("Guided GradCAM", fontsize=20)
+        plt.axis("off")
+        k += 3
+
+        if i == n-1:
+            break
+
+    plt.show()
+
+
+# change dir to your own dataset dir
+data_dir = r'E:\idrid\IDRID_dataset\train'
+# change dir to saved model dir
+model_dir = r"D:\Uni Stuttgart\Deep learning lab\Diabetic Retinopathy Detection\dl-lab-2020-team08\diabetic_retinopathy\logs\20201221-225335\saved_model_ft"
+
+densenet = tf.keras.models.load_model(model_dir)
+
+densenet_logit = Model(inputs=densenet.inputs, outputs=densenet.get_layer('dense_1').output)
+
+guidedBP = GuidedBackprop(model=densenet, layerName="conv4_block14_0_relu")   # the last convolution output of model
+gradCAM = GradCAM(model=densenet_logit, layerName="conv4_block14_0_relu")
+
+show_gradCAMs(densenet, gradCAM, guidedBP, data_dir=data_dir, n=2)
diff --git a/input_pipeline/__pycache__/create_tfrecord.cpython-37.pyc b/input_pipeline/__pycache__/create_tfrecord.cpython-37.pyc
diff --git a/input_pipeline/__pycache__/datasets.cpython-37.pyc b/input_pipeline/__pycache__/datasets.cpython-37.pyc
diff --git a/input_pipeline/__pycache__/preprocessing.cpython-37.pyc b/input_pipeline/__pycache__/preprocessing.cpython-37.pyc
diff --git a/input_pipeline/create_tfrecord.py b/input_pipeline/create_tfrecord.py
@@ -0,0 +1,56 @@
+import os
+import tensorflow as tf
+import pandas as pd
+
+
+def _bytes_feature(value):
+    """Returns a bytes_list from a string / byte."""
+    if isinstance(value, type(tf.constant(0))):
+        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+def _float_feature(value):
+    """Returns a float_list from a float / double."""
+    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
+
+def _int64_feature(value):
+    """Returns an int64_list from a bool / enum / int / uint."""
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def image_example(image_string, label):
+    image_shape = tf.image.decode_jpeg(image_string).shape
+    feature = {
+      'height': _int64_feature(image_shape[0]),
+      'width': _int64_feature(image_shape[1]),
+      'depth': _int64_feature(image_shape[2]),
+      'label': _int64_feature(label),
+      'image_raw': _bytes_feature(image_string),
+    }
+
+    return tf.train.Example(features=tf.train.Features(feature=feature))
+
+def tfrecord(train_df_total, test_df, dataset_dir):
+    train_df = train_df_total.iloc[:351, :]        # train dataset 351 images, factor by 0.15
+    valid_df = train_df_total.iloc[351:, :]        # valid dataset 62 images
+
+
+
+    with tf.io.TFRecordWriter(os.path.join(dataset_dir, "train_image.tfrecords")) as writer:
+        for index, row in train_df.iterrows():
+            img_string = open(os.path.join(dataset_dir, 'train', row['Image name']+'.jpg'), 'rb').read()
+            example = image_example(img_string, row['Retinopathy grade'])
+            writer.write(example.SerializeToString())
+
+    with tf.io.TFRecordWriter(os.path.join(dataset_dir, "valid_image.tfrecords")) as writer:
+        for index, row in valid_df.iterrows():
+            img_string = open(os.path.join(dataset_dir, 'train', row['Image name']+'.jpg'), 'rb').read()
+            example = image_example(img_string, row['Retinopathy grade'])
+            writer.write(example.SerializeToString())
+
+
+    with tf.io.TFRecordWriter(os.path.join(dataset_dir, "test_image.tfrecords")) as writer:
+        for index, row in test_df.iterrows():
+            img_string = open(os.path.join(dataset_dir, 'test', row['Image name']+'.jpg'), 'rb').read()
+            example = image_example(img_string, row['Retinopathy grade'])
+            writer.write(example.SerializeToString())
diff --git a/input_pipeline/data_EDA.py b/input_pipeline/data_EDA.py
@@ -0,0 +1,52 @@
+import pandas as pd
+from create_tfrecord import tfrecord
+import os
+import matplotlib.pyplot as plt
+
+def label_transfer(dataset_dir):
+
+    #convert label from 5 classes to 2 classes
+
+    train_dir = os.path.join(dataset_dir, r"labels\train.csv")
+    test_dir = os.path.join(dataset_dir, r"labels\test.csv")
+
+    df_train = pd.read_csv(train_dir)
+    df_test = pd.read_csv(test_dir)
+
+    for index, row in df_train.iterrows():
+        if row['Retinopathy grade'] <= 1:
+            df_train.loc[index, 'Retinopathy grade'] = 0
+        else:
+            df_train.loc[index, 'Retinopathy grade'] = 1
+
+    for index, row in df_test.iterrows():
+        if row['Retinopathy grade'] <= 1:
+            df_test.loc[index, 'Retinopathy grade'] = 0
+        else:
+            df_test.loc[index, 'Retinopathy grade'] = 1
+    df_train.to_csv(os.path.join(dataset_dir, r"train_binary.csv"), index=False)
+    df_test.to_csv(os.path.join(dataset_dir, r"test_binary.csv"), index=False)
+    return df_train, df_test
+
+def EDA(data):
+
+    #Visualize dataset distribution
+
+    data = data['Retinopathy grade']
+    data_value = data.value_counts()
+    plt.bar(data_value.index, data_value)
+    plt.xticks(data_value.index, data_value.index.values)
+    plt.xlabel("labels")
+    plt.ylabel("Frequency")
+    plt.title('Distribution of diabetic retinopathy in test dataset')
+    plt.show()
+
+# change dataset_dir to your own dir
+dataset_dir = "E:\idrid\IDRID_dataset"
+# convert 5 classification to 2 classification
+train_dataset, test_dataset = label_transfer(dataset_dir)
+# create tfrecord files
+tfrecord(train_dataset, test_dataset, dataset_dir)
+# visualized data distribution
+EDA(train_dataset)
+EDA(test_dataset)