-
Notifications
You must be signed in to change notification settings - Fork 0
/
finalcode.py
200 lines (172 loc) · 9.55 KB
/
finalcode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import numpy as np # Import numpy for numerical operations
import pandas as pd # Import pandas for data manipulation
import os # Import os for file path operations
from PIL import Image # Import PIL for image processing
import tensorflow as tf # Import TensorFlow for deep learning
from tensorflow.keras.models import Sequential # Import Sequential model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout # Import layers
from tensorflow.keras.utils import to_categorical # Import utility for one-hot encoding
import matplotlib.pyplot as plt # Import matplotlib for plotting
from sklearn.metrics import roc_curve, auc, precision_score, recall_score, f1_score, accuracy_score # Import metrics
from tqdm import tqdm # Import tqdm for progress bars
import csv # Import csv for writing results to CSV files
# Base directory for images
base_image_dir = "C:/Users/nikhil/Desktop/CS412/Fiji Data/Trial"
# Load CSV files
train_csv_path = os.path.join(base_image_dir, 'Train.csv')
validation_csv_path = os.path.join(base_image_dir, 'Validation.csv') # Validation CSV file
test_csv_paths = [
os.path.join(base_image_dir, 'Day.csv'),
os.path.join(base_image_dir, 'Night.csv'),
os.path.join(base_image_dir, 'M5Day.csv'),
os.path.join(base_image_dir, 'M5Night.csv'),
os.path.join(base_image_dir, 'M10Day.csv'),
os.path.join(base_image_dir, 'M10Night.csv'),
os.path.join(base_image_dir, 'M15Day.csv'),
os.path.join(base_image_dir, 'M15Night.csv'),
os.path.join(base_image_dir, 'M20Day.csv'),
os.path.join(base_image_dir, 'M20Night.csv'),
os.path.join(base_image_dir, 'BlurDay-High.csv'),
os.path.join(base_image_dir, 'BlurDay-Medium.csv'),
os.path.join(base_image_dir, 'BlurNight-High.csv'),
os.path.join(base_image_dir, 'BlurNight-Medium.csv'),
os.path.join(base_image_dir, 'FogDay - High.csv'),
os.path.join(base_image_dir, 'FogDay - Low.csv'),
os.path.join(base_image_dir, 'FogDay-Medium.csv'),
os.path.join(base_image_dir, 'FogNight-High.csv'),
os.path.join(base_image_dir, 'FogNight-Low.csv'),
os.path.join(base_image_dir, 'FogNight-Medium.csv')
]
train_df = pd.read_csv(train_csv_path) # Load training data
validation_df = pd.read_csv(validation_csv_path) # Load validation data
# Function to load images from a dataframe
def load_images(dataframe, base_dir):
data = [] # Initialize list for image data
labels = [] # Initialize list for labels
error_count = 0 # Initialize error counter
for idx, row in tqdm(dataframe.iterrows(), total=dataframe.shape[0], desc="Loading images"):
relative_path = row['Path'].replace('\\', '/') # Normalize file path
img_path = os.path.join(base_dir, relative_path).replace('\\', '/')
if not os.path.exists(img_path): # Check if file exists
print(f"File not found: {img_path}")
continue # Skip if not found
try:
image = Image.open(img_path).convert('RGB') # Open and convert image to RGB
image = image.resize((30, 30)) # Resize image
data.append(np.array(image)) # Add image data to list
labels.append(row['ClassId'] - 1) # Add label to list (adjusted to start from 0)
except Exception as e: # Handle exceptions
print(f"Error loading image {img_path}: {e}")
error_count += 1
continue
print(f"Total images loaded: {len(data)}") # Print total images loaded
print(f"Total loading errors: {error_count}") # Print total errors
return np.array(data), np.array(labels) # Return data and labels as numpy arrays
# Function to train, evaluate, and save results
def train_and_evaluate(run_number, metrics_writer):
# Load training data
X_train, y_train = load_images(train_df, base_image_dir)
print(f"Loaded training data shape: {X_train.shape}")
print(f"Number of training images loaded: {X_train.shape[0]}")
# Load validation data
X_val, y_val = load_images(validation_df, base_image_dir)
print(f"Loaded validation data shape: {X_val.shape}")
print(f"Number of validation images loaded: {X_val.shape[0]}")
# Verify class labels
unique_labels = np.unique(y_train)
print(f"Unique class labels in training data: {unique_labels}")
# Ensure all labels are within the range [0, 5]
assert np.all(unique_labels < 6), "Error: Found class labels outside the range [0, 5]"
# Converting the labels into one hot encoding
y_train = to_categorical(y_train, 6) # Updated to 6 classes
y_val = to_categorical(y_val, 6) # Updated to 6 classes for validation
with tf.device('/GPU:0'): # Use GPU for training
# Model Building
model = Sequential([
Conv2D(32, (5, 5), activation='relu', input_shape=X_train.shape[1:]), # First convolutional layer
Conv2D(32, (5, 5), activation='relu'), # Second convolutional layer
MaxPool2D(pool_size=(2, 2)), # First max pooling layer
Dropout(0.25), # First dropout layer
Conv2D(64, (3, 3), activation='relu'), # Third convolutional layer
Conv2D(64, (3, 3), activation='relu'), # Fourth convolutional layer
MaxPool2D(pool_size=(2, 2)), # Second max pooling layer
Dropout(0.25), # Second dropout layer
Flatten(), # Flatten layer
Dense(256, activation='relu'), # Dense layer with 256 units
Dropout(0.5), # Third dropout layer
Dense(6, activation='softmax') # Output layer with 6 units (classes)
])
# Compilation of the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Displaying the model summary
model.summary()
# Training the model
epochs = 15
history = model.fit(X_train, y_train, batch_size=32, epochs=epochs, validation_data=(X_val, y_val))
# Save training and validation accuracy and loss
train_acc = history.history['accuracy'][-1]
val_acc = history.history['val_accuracy'][-1]
train_loss = history.history['loss'][-1]
val_loss = history.history['val_loss'][-1]
# Function to load and evaluate a test dataset and plot ROC curve
def evaluate_test_dataset(test_csv_path, model, base_dir, run_number, metrics_writer):
test_df = pd.read_csv(test_csv_path) # Load test data
X_test, y_test = load_images(test_df, base_dir) # Load images
y_test_cat = to_categorical(y_test, 6) # Updated to 6 classes
# Predict probabilities for ROC curve
y_pred_prob = model.predict(X_test)
# Predict classes
y_pred = np.argmax(y_pred_prob, axis=1)
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=2)
print(f"Test accuracy for {os.path.basename(test_csv_path)}: {test_acc}")
# Compute ROC curve and AUC for each class
fpr = {}
tpr = {}
roc_auc = {}
for i in range(6): # For each class
fpr[i], tpr[i], _ = roc_curve(y_test == i, y_pred_prob[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Plot ROC curve for each class
plt.figure()
for i in range(6): # For each class
plt.plot(fpr[i], tpr[i], label=f'Class {i} (area = {roc_auc[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'ROC Curve for {os.path.basename(test_csv_path)} - Run {run_number}')
plt.legend(loc='lower right')
plt.savefig(f'roc_curve_{os.path.basename(test_csv_path)}_run_{run_number}.png')
plt.close()
# Calculate additional metrics
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)
sensitivity = recall # In multi-class classification, sensitivity is equivalent to recall
print(f"Metrics for {os.path.basename(test_csv_path)} - Run {run_number}:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Sensitivity: {sensitivity:.4f}")
# Save metrics to the CSV file
metrics_writer.writerow([run_number, os.path.basename(test_csv_path), train_acc, val_acc, train_loss, val_loss,
precision, recall, f1, accuracy, sensitivity, test_loss, test_acc])
# Evaluate the model on each test dataset
for test_csv_path in test_csv_paths:
evaluate_test_dataset(test_csv_path, model, base_image_dir, run_number, metrics_writer)
# Save the model
model.save(f'traffic_classifier_run_{run_number}.h5')
# Open CSV file for writing metrics
with open('model_metrics.csv', mode='w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Run', 'Test_Set', 'Train_Accuracy', 'Validation_Accuracy', 'Train_Loss', 'Validation_Loss',
'Precision', 'Recall', 'F1_Score', 'Accuracy', 'Sensitivity', 'Test_Loss', 'Test_Accuracy'])
# Run the process 30 times
for run in range(1, 2): # Run only once for demonstration
print(f"Starting run {run}...")
train_and_evaluate(run, writer)
print(f"Completed run {run}.\n")