-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathtensorflow_test_computer_vision_food101.py
143 lines (113 loc) · 5.85 KB
/
tensorflow_test_computer_vision_food101.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import argparse
import os
from timeit import default_timer as timer
from pathlib import Path
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from helper_functions import get_nvidia_gpu_name
try:
import cpuinfo
CPU_PROCESSOR = cpuinfo.get_cpu_info().get('brand_raw').replace(" ", "_")
print(f"[INFO] CPU Processor: {CPU_PROCESSOR}")
except Exception as e:
print(f"Error: {e}, may have failed to get CPU_PROCESSOR name from cpuinfo, please install cpuinfo or set CPU_PROCESSOR manually")
# Create argument parser
parser = argparse.ArgumentParser()
parser.add_argument("--batch_sizes", default="32, 64, 128", help="Delimited list input of batch sizes to test, defaults to '32, 64, 128'", type=str)
parser.add_argument("--epochs", type=int, default=3, help="Number of epochs to train for, default is 3")
args = parser.parse_args()
# Convert batch_sizes to list
batch_size_args = [int(item.strip()) for item in args.batch_sizes.split(",")]
# Set constants
GPU_NAME = get_nvidia_gpu_name()
DATASET_NAME = "FOOD101"
IMAGE_SIZE = 224
INPUT_SHAPE = (IMAGE_SIZE, IMAGE_SIZE, 3)
BATCH_SIZES = batch_size_args
MODEL_NAME = "ResNet50"
EPOCHS = args.epochs
BACKEND = "tensorflow"
print(f"[INFO] Testing model: {MODEL_NAME} on {DATASET_NAME} dataset with input shape {INPUT_SHAPE} for {EPOCHS} epochs across batch sizes: {BATCH_SIZES}")
# Load the dataset
# Note: This is store a ~5GB file in ./data, so make sure to delete it after if you want to free up space
print(f"[INFO] Loading {DATASET_NAME} dataset, note: this will store a ~5GB file in ./data, so make sure to delete it after if you want to free up space")
(train_data, test_data), dataset_info = tfds.load(
'food101',
split=['train', 'validation'],
as_supervised=True,
with_info=True,
data_dir="./data"
)
print(f"[INFO] Finished loading {DATASET_NAME} dataset.")
# Create preprocess layer
preprocess_layer = tf.keras.Sequential([
tf.keras.layers.Rescaling(1/255.),
tf.keras.layers.Resizing(height=IMAGE_SIZE, width=IMAGE_SIZE)
])
# Setup training
def train_and_time(batch_sizes=BATCH_SIZES,
epochs=EPOCHS,
train_data=train_data,
test_data=test_data):
batch_size_training_results = []
for batch_size in batch_sizes:
print(f"[INFO] Training with batch size {batch_size} for {epochs} epochs...")
# Map preprocessing function to data and turn into batches
train_data_batched = train_data.map(lambda image, label: (preprocess_layer(image), label)).shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
# train_data = train_data.map(lambda image, label: (preprocess_layer(image), label)).shuffle(1000)
test_data = test_data.map(lambda image, label: (preprocess_layer(image), label)) # don't shuffle test data (we're not using it anyway)
# Print shape of first training batch
for image_batch, label_batch in train_data_batched.take(1):
print(f"[INFO] Training batch shape: {image_batch.shape}, label batch shape: {label_batch.shape}")
# Create model
model = tf.keras.applications.ResNet50(
include_top=True,
weights=None,
input_shape=INPUT_SHAPE,
classes=101,)
# Create loss function and compile model
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])
try:
start_time = timer()
model.fit(train_data_batched, # batch the data dynamically
epochs=epochs,
batch_size=batch_size,
validation_data=None)
# No validation, just testing training speed
# validation_data=(x_test, y_test))
end_time = timer()
total_training_time = end_time - start_time
avg_time_per_epoch = total_training_time / epochs
batch_size_training_results.append({"batch_size": batch_size,
"avg_time_per_epoch": avg_time_per_epoch})
print(f"[INFO] Finished training with batch size {batch_size} for {epochs} epochs, total time: {round(total_training_time, 3)} seconds, avg time per epoch: {round(avg_time_per_epoch, 3)} seconds\n\n")
save_results(batch_size_training_results)
except Exception as e:
print(f"[INFO] Error: {e}")
print(f"[INFO] Failed training with batch size {batch_size} for {epochs} epochs...\n\n")
batch_size_training_results.append({"batch_size": batch_size,
"avg_time_per_epoch": "FAILED"})
save_results(batch_size_training_results)
break
return batch_size_training_results
def save_results(batch_size_training_results, results_dir="results", target_dir="results_tensorflow_cv"):
# Create CSV filename
if GPU_NAME:
csv_filename = f"{GPU_NAME.replace(' ', '_')}_{DATASET_NAME}_{MODEL_NAME}_{INPUT_SHAPE[0]}_{BACKEND}_results.csv"
else:
csv_filename = f"{CPU_PROCESSOR}_{DATASET_NAME}_{MODEL_NAME}_{INPUT_SHAPE[0]}_{BACKEND}_results.csv"
# Make the target results directory if it doesn't exist (include the parents)
target_results_dir = target_dir
results_path = Path(results_dir) / target_results_dir
results_path.mkdir(parents=True, exist_ok=True)
csv_filepath = results_path / csv_filename
# Turn dict into DataFrame
df = pd.DataFrame(batch_size_training_results)
# Save to CSV
print(f"[INFO] Saving results to: {csv_filepath}")
df.to_csv(csv_filepath, index=False)
if __name__ == "__main__":
batch_size_training_results = train_and_time()
print(f"[INFO] Results:\n{batch_size_training_results}")