-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
102 lines (77 loc) · 4.16 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import clip
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch.optim as optim
#Datasets and Dataloaders
class BaselineDataset(Dataset):
def __init__(self,json_path):
self.root_dir = json_path
self.samples = self._load_samples()
def _load_samples(self):
samples=[]
data_dict=torch.load(self.root_dir)
for key in data_dict.keys():
samples.append([data_dict[key]['image_features'][0],data_dict[key]['description_embeddings'][0],data_dict[key]['target_index']])
return samples
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
image_features, description_embeddings, target_index= self.samples[idx]
return image_features, description_embeddings, target_index
def dataloader_baseline(root_dir, batch_size,BaselineDataset):
dataset = BaselineDataset(root_dir)
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
#Tuning version
class TuningDataset(Dataset):
def __init__(self,json_path):
self.root_dir = json_path
self.samples = self._load_samples()
self.device = "cuda" if torch.cuda.is_available() else "cpu"
_, self.preprocess_clip = clip.load('ViT-B/16', self.device)
def _load_samples(self):
samples=[]
data_dict=torch.load(self.root_dir)
for key in data_dict.keys():
samples.append([data_dict[key]['image_features'],data_dict[key]['description_embeddings'][0],data_dict[key]['target_index']])
return samples
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
image_features, description_embeddings, target_index = self.samples[idx]
image_features = self.preprocess_clip(Image.open(image_features).convert("RGB")).unsqueeze(0) # .to(self.device)
image_features = image_features[0]
return image_features, description_embeddings, target_index
def dataloader_Tuning(root_dir, batch_size,TuningDataset):
dataset = TuningDataset(root_dir)
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
def build_optimizer( projection_model, optimizer, learning_rate, momentum, version):
params1 = {"params": projection_model.description_encoder.parameters(), "lr": learning_rate,
"momentum": momentum}
params2 = {"params": projection_model.logit_scale_CLIP, "lr": learning_rate, "momentum": momentum}
params3 = {"params": projection_model.logit_scale_LLaVA, "lr": learning_rate, "momentum": momentum}
scheduler = None # Inicializa el scheduler como None
if optimizer == "sgd":
if version == 'base':
optimizer = optim.SGD([params1, params2, params3], lr=learning_rate, momentum=momentum)
elif version == 'projection':
params4 = {"params": projection_model.proyection_Img_CLIP.parameters(), "lr": learning_rate,
"momentum": momentum}
params5 = {"params": projection_model.proyection_txt_CLIP.parameters(), "lr": learning_rate,
"momentum": momentum}
optimizer = optim.SGD([params1, params2, params3, params4, params5], lr=learning_rate,
momentum=momentum)
elif version == 'fine_tuning':
params6 = {"params": projection_model.model_clip.visual.parameters(), "lr": learning_rate,
"momentum": momentum}
optimizer = optim.SGD([params1, params2, params3, params6], lr=learning_rate,momentum=momentum)#, weight_decay=0.2
T_max = 50
eta_min = learning_rate
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=T_max, eta_min=eta_min)
elif version == 'fine_tuning_last_layer':
params7 = {"params": projection_model.model_clip.visual.proj, "lr": learning_rate, "momentum": momentum}
optimizer = optim.SGD([params1, params2, params3, params7], lr=learning_rate,momentum=momentum, weight_decay=0.2)
T_max = 50
eta_min = learning_rate
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=T_max, eta_min=eta_min)
return optimizer,scheduler