Skip to content

Commit 99e9e5f

Browse files
committed
Add more arguments to the regression tools
1 parent 58b7c4d commit 99e9e5f

File tree

3 files changed

+41
-25
lines changed

3 files changed

+41
-25
lines changed

qstack/regression/final_error.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,34 @@
55
from qstack.regression.kernel_utils import get_kernel, defaults
66
from qstack.tools import correct_num_threads
77

8-
def final_error(X, y, sigma=defaults.sigma, eta=defaults.eta, akernel=defaults.kernel, test_size=defaults.test_size, save_alpha=None):
8+
def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, akernel=defaults.kernel,
9+
test_size=defaults.test_size,
10+
random_state=defaults.random_state,
11+
return_pred=False, save_alpha=None):
912
"""
1013
1114
.. todo::
1215
Write the docstring
1316
"""
14-
kernel = get_kernel(akernel)
15-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)
16-
K_all = kernel(X_train, X_train, 1.0/sigma)
17-
Ks_all = kernel(X_test, X_train, 1.0/sigma)
17+
if read_kernel is False:
18+
kernel = get_kernel(akernel)
19+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
20+
K_all = kernel(X_train, X_train, 1.0/sigma)
21+
Ks_all = kernel(X_test, X_train, 1.0/sigma)
22+
else:
23+
idx_train, idx_test, y_train, y_test = train_test_split(np.arange(len(y)), y, test_size=test_size, random_state=random_state)
24+
K_all = X[np.ix_(idx_train,idx_train)]
25+
Ks_all = X[np.ix_(idx_test, idx_train)]
1826
K_all[np.diag_indices_from(K_all)] += eta
1927
alpha = scipy.linalg.solve(K_all, y_train, assume_a='pos')
2028
y_kf_predict = np.dot(Ks_all, alpha)
2129
aes = np.abs(y_test-y_kf_predict)
22-
if save_alpha: np.save(save_alpha, alpha)
23-
return aes
30+
if save_alpha:
31+
np.save(save_alpha, alpha)
32+
if return_pred:
33+
return aes, y_kf_predict
34+
else:
35+
return aes
2436

2537
def main():
2638
import sys
@@ -34,12 +46,13 @@ def main():
3446
parser.add_argument('--kernel', type=str, dest='kernel', default=defaults.kernel, help='kernel type (G for Gaussian, L for Laplacian, myL for Laplacian for open-shell systems) (default '+defaults.kernel+')')
3547
parser.add_argument('--save-alpha', type=str, dest='save_alpha', default=None, help='file to write the regression coefficients to (default None)')
3648
parser.add_argument('--ll', action='store_true', dest='ll', default=False, help='if correct for the numper of threads')
49+
parser.add_argument('--random_state', type=int, dest='random_state', default=defaults.random_state, help='random state for test / train splitting')
3750
args = parser.parse_args()
3851
print(vars(args))
3952
if(args.ll): correct_num_threads()
4053
X = np.load(args.repr)
4154
y = np.loadtxt(args.prop)
42-
aes = final_error(X, y, sigma=args.sigma, eta=args.eta, akernel=args.kernel, test_size=args.test_size, save_alpha=args.save_alpha)
55+
aes = final_error(X, y, sigma=args.sigma, eta=args.eta, akernel=args.kernel, test_size=args.test_size, save_alpha=args.save_alpha, random_state=random_state)
4356
np.savetxt(sys.stdout, aes, fmt='%e')
4457

4558
if __name__ == "__main__":

qstack/regression/kernel_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def __call__(self, parser, namespace, values, option_string=None):
3030
train_size=[0.125, 0.25, 0.5, 0.75, 1.0],
3131
etaarr=list(numpy.logspace(-10, 0, 5)),
3232
sigmaarr=list(numpy.logspace(0,6, 13)),
33-
sigmaarr_mult=list(numpy.logspace(0,2, 5))
33+
sigmaarr_mult=list(numpy.logspace(0,2, 5)),
34+
random_state=0,
3435
)
3536

3637

qstack/regression/regression.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta,
1212
akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict,
1313
test_size=defaults.test_size, train_size=defaults.train_size, n_rep=defaults.n_rep,
14+
random_state=defaults.random_state,
1415
sparse=None, debug=False):
1516
"""
1617
@@ -19,11 +20,11 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta,
1920
"""
2021
if read_kernel is False:
2122
kernel = get_kernel(akernel, [gkernel, gdict])
22-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)
23+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
2324
K_all = kernel(X_train, X_train, 1.0/sigma)
2425
Ks_all = kernel(X_test, X_train, 1.0/sigma)
2526
else:
26-
idx_train, idx_test, y_train, y_test = train_test_split(np.arange(len(y)), y, test_size=test_size, random_state=0)
27+
idx_train, idx_test, y_train, y_test = train_test_split(np.arange(len(y)), y, test_size=test_size, random_state=random_state)
2728
K_all = X[np.ix_(idx_train,idx_train)]
2829
Ks_all = X[np.ix_(idx_test, idx_train)]
2930

@@ -68,20 +69,21 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta,
6869
def main():
6970
import argparse
7071
parser = argparse.ArgumentParser(description='This program computes the learning curve.')
71-
parser.add_argument('--x', type=str, dest='repr', required=True, help='path to the representations file')
72-
parser.add_argument('--y', type=str, dest='prop', required=True, help='path to the properties file')
73-
parser.add_argument('--test', type=float, dest='test_size', default=defaults.test_size, help='test set fraction (default='+str(defaults.test_size)+')')
74-
parser.add_argument('--eta', type=float, dest='eta', default=defaults.eta, help='eta hyperparameter (default='+str(defaults.eta)+')')
75-
parser.add_argument('--sigma', type=float, dest='sigma', default=defaults.sigma, help='sigma hyperparameter (default='+str(defaults.sigma)+')')
76-
parser.add_argument('--akernel', type=str, dest='akernel', default=defaults.kernel, help='local kernel type (G for Gaussian, L for Laplacian, myL for Laplacian for open-shell systems) (default '+defaults.kernel+')')
77-
parser.add_argument('--gkernel', type=str, dest='gkernel', default=defaults.gkernel, help='global kernel type (avg for average kernel, rem for REMatch kernel) (default '+str(defaults.gkernel)+')')
78-
parser.add_argument('--gdict', nargs='*', action=ParseKwargs, dest='gdict', default=defaults.gdict, help='dictionary like input string to initialize global kernel parameters')
79-
parser.add_argument('--splits', type=int, dest='splits', default=defaults.n_rep, help='number of splits (default='+str(defaults.n_rep)+')')
80-
parser.add_argument('--train', type=float, dest='train_size', default=defaults.train_size, nargs='+', help='training set fractions')
81-
parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='enable debug')
82-
parser.add_argument('--ll', action='store_true', dest='ll', default=False, help='if correct for the numper of threads')
83-
parser.add_argument('--readkernel', action='store_true', dest='readk', default=False, help='if X is kernel')
84-
parser.add_argument('--sparse', type=int, dest='sparse', default=None, help='regression basis size for sparse learning')
72+
parser.add_argument('--x', type=str, dest='repr', required=True, help='path to the representations file')
73+
parser.add_argument('--y', type=str, dest='prop', required=True, help='path to the properties file')
74+
parser.add_argument('--test', type=float, dest='test_size', default=defaults.test_size, help='test set fraction (default='+str(defaults.test_size)+')')
75+
parser.add_argument('--eta', type=float, dest='eta', default=defaults.eta, help='eta hyperparameter (default='+str(defaults.eta)+')')
76+
parser.add_argument('--sigma', type=float, dest='sigma', default=defaults.sigma, help='sigma hyperparameter (default='+str(defaults.sigma)+')')
77+
parser.add_argument('--akernel', type=str, dest='akernel', default=defaults.kernel, help='local kernel type (G for Gaussian, L for Laplacian, myL for Laplacian for open-shell systems) (default '+defaults.kernel+')')
78+
parser.add_argument('--gkernel', type=str, dest='gkernel', default=defaults.gkernel, help='global kernel type (avg for average kernel, rem for REMatch kernel) (default '+str(defaults.gkernel)+')')
79+
parser.add_argument('--gdict', nargs='*', action=ParseKwargs, dest='gdict', default=defaults.gdict, help='dictionary like input string to initialize global kernel parameters')
80+
parser.add_argument('--splits', type=int, dest='splits', default=defaults.n_rep, help='number of splits (default='+str(defaults.n_rep)+')')
81+
parser.add_argument('--train', type=float, dest='train_size', default=defaults.train_size, nargs='+', help='training set fractions')
82+
parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='enable debug')
83+
parser.add_argument('--ll', action='store_true', dest='ll', default=False, help='if correct for the numper of threads')
84+
parser.add_argument('--readkernel', action='store_true', dest='readk', default=False, help='if X is kernel')
85+
parser.add_argument('--sparse', type=int, dest='sparse', default=None, help='regression basis size for sparse learning')
86+
parser.add_argument('--random_state', type=int, dest='random_state', default=defaults.random_state, help='random state for test / train splitting')
8587
args = parser.parse_args()
8688
print(vars(args))
8789
if(args.ll): correct_num_threads()

0 commit comments

Comments
 (0)