-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature_extractor_batch_vgg16.py
executable file
·85 lines (59 loc) · 2.11 KB
/
feature_extractor_batch_vgg16.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm
import time
start_time = time.time()
folder_path = 'lake'
save_file = 'img_features_lake.csv'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Load the pre-trained VGG16 model
vgg16 = models.vgg16(pretrained=True).to(device)
# # Remove the last layer (the classifier)
# modules = list(vgg16.children())[:-1]
# vgg16 = nn.Sequential(*modules)
# Set the model to evaluation mode
vgg16.eval()
# Define the transformation to be applied to each image
transform = transforms.Compose([
transforms.Grayscale(3),
transforms.Resize(224),
# transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
def extract_features(img_path: str) -> list:
"""
:param img_path: Path to where image is stored
:return: List of features
"""
# Load an example image
img = Image.open(img_path)
# Apply the transformation and convert the image to a tensor
img_tensor = transform(img).unsqueeze(0).to(device)
# Extract the features using the ResNet18 model
with torch.no_grad():
features = vgg16(img_tensor)
# Flatten the features and convert to a 1D numpy array
features = features.squeeze().to('cpu').numpy()
features = features.flatten()
# Print the shape of the features array
# print(features.shape)
return list(features)
def extract_features_batch(img_folder_path: str):
img_files = os.listdir(img_folder_path)
num_imgs = len(img_files)
features_df = pd.DataFrame()
for i in tqdm(range(num_imgs)):
img_path = os.path.join(img_folder_path, img_files[i])
img_features = extract_features(img_path)
features_df[img_files[i]] = img_features
features_df.to_csv(save_file, index=False)
extract_features_batch(folder_path)
end_time = time.time()
print(f'Time taken for computation - {np.round((end_time - start_time) / 60, decimals=2)} mins')