-
Notifications
You must be signed in to change notification settings - Fork 5
/
linear_probe.py
128 lines (102 loc) · 6.01 KB
/
linear_probe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from utils.utils import copy_log_from_pueue
def main(args):
dataset = args.dataset_name
assert dataset in ['modelnet40', 'scanobjectnn']
s1 = {'modelnet40': 'mn40', 'scanobjectnn': 'sonn'}[dataset]
train_file = np.load(os.path.join(args.output_dir, args.proj_name, f'fs-{s1}-train-feat-pointbert-1', "train.npz"))
train_feature, train_label = train_file["feature_list"], train_file["label_list"]
test_file = np.load(os.path.join(args.output_dir, args.proj_name, f'fs-{s1}-test-feat-pointbert-1', "test.npz"))
test_feature, test_label = test_file["feature_list"], test_file["label_list"]
val_feature, val_label = test_feature, test_label
val_shot_list = {1: 1, 2: 2, 4: 4, 8: 4, 16: 4}
for num_shot in [1, 2, 4, 8, 16]:
test_acc_step_list = np.zeros([args.num_run, args.num_step])
for seed in range(1, args.num_run + 1): # `seed` as running id
np.random.seed(seed)
print(f"-- Seed: {seed} --------------------------------------------------------------")
# Sampling
all_label_list = np.unique(train_label)
selected_idx_list = []
for label in all_label_list:
label_collection = np.where(train_label == label)[0]
selected_idx = np.random.choice(label_collection, size=num_shot, replace=False)
selected_idx_list.extend(selected_idx)
fewshot_train_feature = train_feature[selected_idx_list]
fewshot_train_label = train_label[selected_idx_list]
val_num_shot = val_shot_list[num_shot]
val_selected_idx_list = []
for label in all_label_list:
label_collection = np.where(val_label == label)[0]
selected_idx = np.random.choice(label_collection, size=val_num_shot, replace=False)
val_selected_idx_list.extend(selected_idx)
fewshot_val_feature = val_feature[val_selected_idx_list]
fewshot_val_label = val_label[val_selected_idx_list]
# search initialization
search_list = [1e6, 1e4, 1e2, 1, 1e-2, 1e-4, 1e-6]
acc_list = []
for c_weight in search_list:
clf = LogisticRegression(solver="lbfgs", max_iter=1000, penalty="l2", C=c_weight).fit(fewshot_train_feature, fewshot_train_label)
pred = clf.predict(fewshot_val_feature)
acc_val = sum(pred == fewshot_val_label) / len(fewshot_val_label)
acc_list.append(acc_val)
print(acc_list, flush=True)
# binary search
peak_idx = np.argmax(acc_list)
c_peak = search_list[peak_idx]
c_left, c_right = 1e-1 * c_peak, 1e1 * c_peak
def binary_search(c_left, c_right, seed, step, test_acc_step_list):
clf_left = LogisticRegression(solver="lbfgs", max_iter=1000, penalty="l2", C=c_left).fit(fewshot_train_feature, fewshot_train_label)
pred_left = clf_left.predict(fewshot_val_feature)
acc_left = sum(pred_left == fewshot_val_label) / len(fewshot_val_label)
print("Val accuracy (Left): {:.2f}".format(100 * acc_left), flush=True)
clf_right = LogisticRegression(solver="lbfgs", max_iter=1000, penalty="l2", C=c_right).fit(fewshot_train_feature, fewshot_train_label)
pred_right = clf_right.predict(fewshot_val_feature)
acc_right = sum(pred_right == fewshot_val_label) / len(fewshot_val_label)
print("Val accuracy (Right): {:.2f}".format(100 * acc_right), flush=True)
# find maximum and update ranges
if acc_left < acc_right:
c_final = c_right
clf_final = clf_right
# range for the next step
c_left = 0.5 * (np.log10(c_right) + np.log10(c_left))
c_right = np.log10(c_right)
else:
c_final = c_left
clf_final = clf_left
# range for the next step
c_right = 0.5 * (np.log10(c_right) + np.log10(c_left))
c_left = np.log10(c_left)
pred = clf_final.predict(test_feature)
test_acc = 100 * sum(pred == test_label) / len(pred)
print("Test Accuracy: {:.2f}".format(test_acc), flush=True)
test_acc_step_list[seed - 1, step] = test_acc
saveline = "{}, seed {}, {} shot, weight {}, test_acc {:.2f}\n".format(dataset, seed, num_shot, c_final, test_acc)
fname = "{}-run{}-step{}_details.txt".format(s1, args.num_run, args.num_step)
with open(os.path.join(args.output_dir, args.proj_name, args.exp_name, fname), "a+") as writer:
writer.write(saveline)
return (
np.power(10, c_left),
np.power(10, c_right),
seed,
step,
test_acc_step_list,
)
for step in range(args.num_step):
print(f"{dataset}, {num_shot} Shot, Round {step}: c_left/c_right -> {c_left}/{c_right}", flush=True)
c_left, c_right, seed, step, test_acc_step_list = binary_search(c_left, c_right, seed, step, test_acc_step_list)
# save results of last step
test_acc_list = test_acc_step_list[:, -1]
acc_mean = np.mean(test_acc_list)
acc_std = np.std(test_acc_list)
save_line = "{}, {} Shot, Test acc stat: {:.2f} ({:.2f})\n".format(dataset, num_shot, acc_mean, acc_std)
print(save_line, flush=True)
fname = "{}-run{}-step{}.txt".format(s1, args.num_run, args.num_step)
with open(os.path.join(args.output_dir, args.proj_name, args.exp_name, fname), "a+") as writer:
writer.write(save_line)
copy_log_from_pueue(args.output_dir, args.proj_name, args.exp_name, 'run.log')
if __name__ == '__main__':
from parser import args
main(args)