-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
183 lines (149 loc) · 6.64 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#usr/bin/env python
import itertools
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import accuracy_score, roc_auc_score
REG_METRICS = {
'r2_score': {'callable': r2_score, 'input': 'raw'},
'mean_absolute_error': {'callable': mean_absolute_error, 'input': 'raw'},
'mean_squared_error': {'callable': mean_squared_error, 'input': 'raw'},
'pearsonr': {'callable': pearsonr, 'input': 'raw'},
'spearmanr': {'callable': spearmanr, 'input': 'raw'},
}
CLA_METRICS = {
'accuracy': {'callable': accuracy_score, 'input': 'bin'},
'roc_auc': {'callable': roc_auc_score, 'input': 'raw'},
}
def compute_regression_metrics(
true,
pred,
metrics=['r2_score', 'mean_absolute_error', 'mean_squared_error', 'pearsonr', 'spearmanr'],
):
'''compute accuracy metrics for regression problem
Args:
true (np.ndarray): array of ground truth regression target values
pred (np.ndarray): array of regressor predictions
metircs (list): list of regression metrics to evaluate
'''
results = {}
for metric in metrics:
print('>>> ', metric)
met = REG_METRICS[metric]
if met['input'] == 'raw':
val = met['callable'](true.ravel(), pred.ravel())
if metric in ['pearsonr', 'spearmanr']:
results[metric] = val[0]
else:
results[metric] = val
else:
print('something is wrong ...')
quit()
return results
def compute_feasibility_metrics(
true,
pred,
metrics=['accuracy', 'roc_auc'],
):
'''compute accuracy metrics for classification problem
Args:
true (np.ndarray): array of ground truth binary labels
pred (np.ndarray): array of raw classifier outputs
metircs (list): list of classification metrics to evaluate
'''
results = {}
for metric in metrics:
print('>>> ', metric)
met = CLA_METRICS[metric]
if met['input'] == 'raw':
# the numbers here are reversed (reversed between Gryffin source
# code and the manuscript)
pred_transform = 1.0-pred
elif met['input'] == 'bin':
# the numbers here are reversed (reversed between Gryffin source
# code and the manuscript)
pred_transform = np.where(pred >= 0.5, 0, 1)
val = met['callable'](true, pred_transform)
results[metric] = val
return results
def to_binary(val):
if np.isnan(val):
return 1 # infeasible measurement
else:
return 0 # feasible measuremnet
def build_datasets(observations, df_results, param_names, obj_names, is_feas_name):
''' build the datasets of observed and unobserved inputs for the regression and
classification problems
Args:
observations (list): list of current observations in Gryfiin format
df_results (pd.DataFrame): lookup dataframe
param_names (list): list of parameter names
obj_names (list): list of objective names
is_feas_name (str): lookup dataframe column with feasibility value
'''
# add param names columns for more rapid lookup
df_results['params'] = df_results[param_names[0]].str.cat(
[df_results[param_names[i+1]] for i in range(len(param_names)-1)], sep='.',
)
# get the set of observations that are feasible
obs_reg_params = []
obs_cla_params = [] # all the observations
obs_reg_values = []
obs_cla_values = [] # all the observations
for obs_ix, obs in enumerate(observations):
if np.isnan(obs[obj_names[0]]): # either all objectives are nan or none are (might change in future)
pass
else:
obs_reg_params.append({param_name:obs[param_name] for param_name in param_names})
obs_reg_values.append([obs[obj_name] for obj_name in obj_names])
obs_cla_params.append({param_name:obs[param_name] for param_name in param_names})
obs_cla_values.append([to_binary(obs[obj_name]) for obj_name in obj_names])
obs_reg_values = np.array(obs_reg_values)
obs_cla_values = np.array(obs_cla_values)
# get the unobserved values
obs_cla_params_cat = [ '.'.join([obs[p] for p in param_names]) for obs in obs_cla_params]
obs_reg_params_cat = [ '.'.join([obs[p] for p in param_names]) for obs in obs_reg_params]
unobs_cla_df = df_results[~(df_results.params.isin(obs_cla_params_cat))] # predict on all remaining points
unobs_reg_df = df_results[~(df_results.params.isin(obs_reg_params_cat))&(df_results[is_feas_name]==0)] # only predict on feasible points
unobs_cla_params = [row.to_dict() for _, row in unobs_cla_df[param_names].iterrows()]
unobs_cla_values = unobs_cla_df[is_feas_name].values
unobs_reg_params = [row.to_dict() for _, row in unobs_reg_df[param_names].iterrows()]
unobs_reg_values = unobs_reg_df[obj_names].values
return {
'obs_cla_params': obs_cla_params, 'obs_cla_values': obs_cla_values,
'obs_reg_params': obs_reg_params, 'obs_reg_values': obs_reg_values,
'unobs_cla_params': unobs_cla_params, 'unobs_cla_values': unobs_cla_values,
'unobs_reg_params': unobs_reg_params, 'unobs_reg_values': unobs_reg_values,
}
def make_df_results_synthetic(surface_obj, param_type, num_dims=2, num_opts=21, res=50):
''' make a lookup table with all the possible
param naming convention is "x0", "x1", "x2", ...
single objective is always called "obj"
convention for options is "x_0", "x_1", "x_2", ...
Args:
surface_obj (obj): Olympus-like surface object with constraints
param_type (str): "continuous" ot "categorical"
num_dims (int): number of parameter space dimensions
num_opts (int): number of options per parameter space dimension
res (int): per-dimension resolution for the evalauation grid (for continous surfaces only)
'''
if param_type == 'continuous':
pass
elif param_type == 'categorical':
# build the cartesian product space (feas and infeas)
options = [f'x_{i}' for i in range(num_opts)]
dim_options = [options for _ in range(num_dims)]
cart_product = list(itertools.product(*dim_options))
all_obs = []
for params in cart_product:
input = {f'x{i}': p for i, p in enumerate(params)}
input = surface_obj.eval_merit(input)
if np.isnan(input['obj']):
input['is_feas'] = 1
else:
input['is_feas'] = 0
all_obs.append(input)
else:
raise NotImplementedError
return pd.DataFrame(all_obs)