diff --git a/dft_comparison/dft_comparison_with_GPR.py b/dft_comparison/dft_comparison_with_GPR.py index de17fb9..c0e46fc 100644 --- a/dft_comparison/dft_comparison_with_GPR.py +++ b/dft_comparison/dft_comparison_with_GPR.py @@ -16,15 +16,15 @@ from kernels import Tanimoto -def main(path, path_to_dft_dataset, task, representation, theory_level): +def main(path, path_to_dft_dataset, representation, theory_level): """ :param path: str specifying path to photoswitches.csv file. :param path_to_dft_dataset: str specifying path to dft_comparison.csv file. - :param task: str specifying the task. e_iso_pi only supported task for the TD-DFT comparison. :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints'] :param theory_level: str giving the level of theory to compare against - CAM-B3LYP or PBE0 ['CAM-B3LYP', 'PBE0'] """ + task = 'e_iso_pi' # e_iso_pi only task supported for TD-DFT comparison data_loader = TaskDataLoader(task, path) smiles_list, _, pbe0_vals, cam_vals, experimental_vals = data_loader.load_dft_comparison_data(path_to_dft_dataset) @@ -145,8 +145,6 @@ def objective_closure(): help='Path to the photoswitches.csv file.') parser.add_argument('-pd', '--path_to_dft_dataset', type=str, default='../dataset/dft_comparison.csv', help='str giving path to dft_comparison.csv file') - parser.add_argument('-t', '--task', type=str, default='e_iso_pi', - help='str specifying the task. e_iso_pi only task supported for the TD-DFT comparison.') parser.add_argument('-r', '--representation', type=str, default='fragprints', help='str specifying the molecular representation. ' 'One of [fingerprints, fragments, fragprints].') @@ -155,4 +153,4 @@ def objective_closure(): args = parser.parse_args() - main(args.path, args.path_to_dft_dataset, args.task, args.representation, args.theory_level) + main(args.path, args.path_to_dft_dataset, args.representation, args.theory_level) diff --git a/dft_comparison/dft_comparison_with_multioutput_GPR.py b/dft_comparison/dft_comparison_with_multioutput_GPR.py new file mode 100644 index 0000000..1170c5c --- /dev/null +++ b/dft_comparison/dft_comparison_with_multioutput_GPR.py @@ -0,0 +1,204 @@ +# Author: Ryan-Rhys Griffiths +""" +Property prediction comparison against DFT error. 99 molecules with DFT-computed values at the CAM-B3LYP level of +theory and 114 molecules with DFT-computed values at the PBE0 level of theory. +""" + +import argparse + +import gpflow +from gpflow.ci_utils import ci_niter +from gpflow.mean_functions import Constant +from gpflow.utilities import print_summary +import numpy as np +from sklearn.metrics import mean_squared_error + +from data_utils import TaskDataLoader, featurise_mols +from kernels import Tanimoto + + +def main(path, path_to_dft_dataset, representation, theory_level): + """ + :param path: str specifying path to photoswitches.csv file. + :param path_to_dft_dataset: str specifying path to dft_comparison.csv file. + :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints'] + :param theory_level: str giving the level of theory to compare against - CAM-B3LYP or PBE0 ['CAM-B3LYP', 'PBE0'] + """ + + task = 'e_iso_pi' # e_iso_pi only task supported for TD-DFT comparison + data_loader = TaskDataLoader(task, path) + smiles_list, _, pbe0_vals, cam_vals, experimental_vals = data_loader.load_dft_comparison_data(path_to_dft_dataset) + + X = featurise_mols(smiles_list, representation) + + # Keep only non-duplicate entries because we're not considering effects of solvent + + non_duplicate_indices = np.array([i for i, smiles in enumerate(smiles_list) if smiles not in smiles_list[:i]]) + X = X[non_duplicate_indices, :] + experimental_vals = experimental_vals[non_duplicate_indices] + non_dup_pbe0 = np.array([i for i, smiles in enumerate(smiles_list) if smiles not in smiles_list[:i]]) + non_dup_cam = np.array([i for i, smiles in enumerate(smiles_list) if smiles not in smiles_list[:i]]) + pbe0_vals = pbe0_vals[non_dup_pbe0] + cam_vals = cam_vals[non_dup_cam] + + # molecules with dft values to be split into train/test + if theory_level == 'CAM-B3LYP': + X_with_dft = np.delete(X, np.argwhere(np.isnan(cam_vals)), axis=0) + y_with_dft = np.delete(experimental_vals, np.argwhere(np.isnan(cam_vals))) + # DFT values for the CAM-B3LYP level of theory + dft_vals = np.delete(cam_vals, np.argwhere(np.isnan(cam_vals))) + # molecules with no dft vals must go into the training set. + X_no_dft = np.delete(X, np.argwhere(~np.isnan(cam_vals)), axis=0) + y_no_dft = np.delete(experimental_vals, np.argwhere(~np.isnan(cam_vals))) + else: + X_with_dft = np.delete(X, np.argwhere(np.isnan(pbe0_vals)), axis=0) + y_with_dft = np.delete(experimental_vals, np.argwhere(np.isnan(pbe0_vals))) + # DFT values for the PBE0 level of theory + dft_vals = np.delete(pbe0_vals, np.argwhere(np.isnan(pbe0_vals))) + # molecules with no dft vals must go into the training set. + X_no_dft = np.delete(X, np.argwhere(~np.isnan(pbe0_vals)), axis=0) + y_no_dft = np.delete(experimental_vals, np.argwhere(~np.isnan(pbe0_vals))) + + # Load in the other property values for multitask learning. e_iso_pi is a always the task in this instance. + + data_loader_z_iso_pi = TaskDataLoader('z_iso_pi', path) + data_loader_e_iso_n = TaskDataLoader('e_iso_n', path) + data_loader_z_iso_n = TaskDataLoader('z_iso_n', path) + + smiles_list_z_iso_pi, y_z_iso_pi = data_loader_z_iso_pi.load_property_data() + smiles_list_e_iso_n, y_e_iso_n = data_loader_e_iso_n.load_property_data() + smiles_list_z_iso_n, y_z_iso_n = data_loader_z_iso_n.load_property_data() + + y_z_iso_pi = y_z_iso_pi.reshape(-1, 1) + y_e_iso_n = y_e_iso_n.reshape(-1, 1) + y_z_iso_n = y_z_iso_n.reshape(-1, 1) + + X_z_iso_pi = featurise_mols(smiles_list_z_iso_pi, representation) + X_e_iso_n = featurise_mols(smiles_list_e_iso_n, representation) + X_z_iso_n = featurise_mols(smiles_list_z_iso_n, representation) + + output_dim = 4 # Number of outputs + rank = 1 # Rank of W + feature_dim = len(X_no_dft[0, :]) + + tanimoto_active_dims = [i for i in range(feature_dim)] # active dims for Tanimoto base kernel. + + mae_list = [] + dft_mae_list = [] + + # We define the Gaussian Process optimisation objective + + m = None + + def objective_closure(): + return -m.log_marginal_likelihood() + + print('\nBeginning training loop...') + + for i in range(len(y_with_dft)): + + X_train = np.delete(X_with_dft, i, axis=0) + y_train = np.delete(y_with_dft, i) + X_test = X_with_dft[i].reshape(1, -1) + y_test = y_with_dft[i] + dft_test = dft_vals[i] + + X_train = np.concatenate((X_train, X_no_dft)) + y_train = np.concatenate((y_train, y_no_dft)) + y_train = y_train.reshape(-1, 1) + y_test = y_test.reshape(-1, 1) + + X_train = X_train.astype(np.float64) + X_test = X_test.astype(np.float64) + + # Augment the input with zeroes, ones, twos, threes to indicate the required output dimension + X_augmented = np.vstack((np.append(X_train, np.zeros((len(X_train), 1)), axis=1), + np.append(X_z_iso_pi, np.ones((len(X_z_iso_pi), 1)), axis=1), + np.append(X_e_iso_n, np.ones((len(X_e_iso_n), 1)) * 2, axis=1), + np.append(X_z_iso_n, np.ones((len(X_z_iso_n), 1)) * 3, axis=1))) + + X_test = np.append(X_test, np.zeros((len(X_test), 1)), axis=1) + X_train = np.append(X_train, np.zeros((len(X_train), 1)), axis=1) + + # Augment the Y data with zeroes, ones, twos and threes that specify a likelihood from the list of likelihoods + Y_augmented = np.vstack((np.hstack((y_train, np.zeros_like(y_train))), + np.hstack((y_z_iso_pi, np.ones_like(y_z_iso_pi))), + np.hstack((y_e_iso_n, np.ones_like(y_e_iso_n) * 2)), + np.hstack((y_z_iso_n, np.ones_like(y_z_iso_n) * 3)))) + + y_test = np.hstack((y_test, np.zeros_like(y_test))) + + # Base kernel + k = Tanimoto(active_dims=tanimoto_active_dims) + #set_trainable(k.variance, False) + + # Coregion kernel + coreg = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[feature_dim]) + + # Create product kernel + kern = k * coreg + + # This likelihood switches between Gaussian noise with different variances for each f_i: + lik = gpflow.likelihoods.SwitchedLikelihood([gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian(), + gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian()]) + + # now build the GP model as normal + m = gpflow.models.VGP((X_augmented, Y_augmented), mean_function=Constant(np.mean(y_train[:, 0])), kernel=kern, likelihood=lik) + + # fit the covariance function parameters + maxiter = ci_niter(1000) + gpflow.optimizers.Scipy().minimize(m.training_loss, m.trainable_variables, options=dict(maxiter=maxiter), method="L-BFGS-B",) + print_summary(m) + + # Output Standardised RMSE and RMSE on Train Set + + y_pred_train, _ = m.predict_f(X_train) + train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train)) + train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train)) + print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan)) + print("Train RMSE: {:.3f}".format(train_rmse)) + + # mean and variance GP prediction + + y_pred, y_var = m.predict_f(X_test) + + # Output MAE for this trial + + mae = abs(y_test[:, 0] - y_pred) + + print("MAE: {}".format(mae)) + + # Store values in order to compute the mean and standard error of the statistics across trials + + mae_list.append(mae) + + # DFT prediction scores on the same trial + + dft_mae = abs(y_test[:, 0] - dft_test) + + dft_mae_list.append(dft_mae) + + mae_list = np.array(mae_list) + dft_mae_list = np.array(dft_mae_list) + + print("\nmean GP-Tanimoto MAE: {:.4f} +- {:.4f}\n".format(np.mean(mae_list), np.std(mae_list)/np.sqrt(len(mae_list)))) + print("mean {} MAE: {:.4f} +- {:.4f}\n".format(theory_level, np.mean(dft_mae_list), np.std(dft_mae_list)/np.sqrt(len(dft_mae_list)))) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + + parser.add_argument('-p', '--path', type=str, default='../dataset/photoswitches.csv', + help='Path to the photoswitches.csv file.') + parser.add_argument('-pd', '--path_to_dft_dataset', type=str, default='../dataset/dft_comparison.csv', + help='str giving path to dft_comparison.csv file') + parser.add_argument('-r', '--representation', type=str, default='fragprints', + help='str specifying the molecular representation. ' + 'One of [fingerprints, fragments, fragprints].') + parser.add_argument('-th', '--theory_level', type=str, default='PBE0', + help='level of theory to compare against - CAM-B3LYP or PBE0 [CAM-B3LYP, PBE0]') + + args = parser.parse_args() + + main(args.path, args.path_to_dft_dataset, args.representation, args.theory_level) diff --git a/human_comparison/human_performance_comparison.py b/human_comparison/human_performance_comparison.py index 9535d57..59cc3e2 100644 --- a/human_comparison/human_performance_comparison.py +++ b/human_comparison/human_performance_comparison.py @@ -1,7 +1,7 @@ # Copyright Ryan-Rhys Griffiths and Aditya Raymond Thawani 2020 # Author: Ryan-Rhys Griffiths """ -Script for comparing against human performance on a set of 5 molecules. +Script for comparing against human performance on a set of 5 molecules with Tanimoto GP. """ import argparse @@ -16,13 +16,13 @@ from kernels import Tanimoto -def main(path, task, representation): +def main(path, representation): """ :param path: str specifying path to dataset. - :param task: str specifying the task. Always e_iso_pi in the case of the human performance comparison :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints'] """ + task = 'e_iso_pi' # Always e_iso_pi for human performance comparison data_loader = TaskDataLoader(task, path) smiles_list, y = data_loader.load_property_data() X = featurise_mols(smiles_list, representation) @@ -112,12 +112,10 @@ def objective_closure(): parser.add_argument('-p', '--path', type=str, default='../dataset/photoswitches.csv', help='Path to the photoswitches.csv file.') - parser.add_argument('-t', '--task', type=str, default='e_iso_pi', - help='str specifying the task. Always e_iso_pi in the case of the human performance comparison') parser.add_argument('-r', '--representation', type=str, default='fragprints', help='str specifying the molecular representation. ' 'One of [fingerprints, fragments, fragprints].') args = parser.parse_args() - main(args.path, args.task, args.representation) + main(args.path, args.representation) diff --git a/human_comparison/human_performance_comparison_MOGP.py b/human_comparison/human_performance_comparison_MOGP.py new file mode 100644 index 0000000..2a094a0 --- /dev/null +++ b/human_comparison/human_performance_comparison_MOGP.py @@ -0,0 +1,167 @@ +# Copyright Ryan-Rhys Griffiths and Aditya Raymond Thawani 2020 +# Author: Ryan-Rhys Griffiths +""" +Script for comparing against human performance on a set of 5 molecules with Tanimoto MOGP. +""" + +import argparse + +import gpflow +from gpflow.ci_utils import ci_niter +from gpflow.mean_functions import Constant +from gpflow.utilities import print_summary +import numpy as np +from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error + +from data_utils import transform_data, TaskDataLoader, featurise_mols +from kernels import Tanimoto + + +def main(path, representation): + """ + :param path: str specifying path to dataset. + :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints'] + """ + + task = 'e_iso_pi' # task always e_iso_pi with human performance comparison + data_loader = TaskDataLoader(task, path) + smiles_list, y = data_loader.load_property_data() + X = featurise_mols(smiles_list, representation) + + # 5 test molecules + + test_smiles = ['BrC1=CC=C(/N=N/C2=CC=CC=C2)C=C1', + 'O=[N+]([O-])C1=CC=C(/N=N/C2=CC=CC=C2)C=C1', + 'CC(C=C1)=CC=C1/N=N/C2=CC=C(N(C)C)C=C2', + 'BrC1=CC([N+]([O-])=O)=CC([N+]([O-])=O)=C1/N=N/C2=CC([H])=C(C=C2[H])N(CC)CC', + 'ClC%11=CC([N+]([O-])=O)=CC(C#N)=C%11/N=N/C%12=CC([H])=C(C=C%12OC)N(CC)CC'] + + # and their indices in the loaded data + test_smiles_indices = [116, 131, 168, 221, 229] + + X_train = np.delete(X, np.array(test_smiles_indices), axis=0) + y_train = np.delete(y, np.array(test_smiles_indices)) + X_test = X[[116, 131, 168, 221, 229]] + + # experimental wavelength values in EtOH. Main csv file has 400nm instead of 407nm because measurement was + # under a different solvent + y_test = y[[116, 131, 168, 221, 229]] + y_test[2] = 407. + + y_train = y_train.reshape(-1, 1) + y_test = y_test.reshape(-1, 1) + + # # We standardise the outputs but leave the inputs unchanged + # + # _, y_train, _, y_test, y_scaler = transform_data(X_train, y_train, X_test, y_test) + + X_train = X_train.astype(np.float64) + X_test = X_test.astype(np.float64) + + data_loader_z_iso_pi = TaskDataLoader('z_iso_pi', path) + data_loader_e_iso_n = TaskDataLoader('e_iso_n', path) + data_loader_z_iso_n = TaskDataLoader('z_iso_n', path) + + smiles_list_z_iso_pi, y_z_iso_pi = data_loader_z_iso_pi.load_property_data() + smiles_list_e_iso_n, y_e_iso_n = data_loader_e_iso_n.load_property_data() + smiles_list_z_iso_n, y_z_iso_n = data_loader_z_iso_n.load_property_data() + + y_z_iso_pi = y_z_iso_pi.reshape(-1, 1) + y_e_iso_n = y_e_iso_n.reshape(-1, 1) + y_z_iso_n = y_z_iso_n.reshape(-1, 1) + + X_z_iso_pi = featurise_mols(smiles_list_z_iso_pi, representation) + X_e_iso_n = featurise_mols(smiles_list_e_iso_n, representation) + X_z_iso_n = featurise_mols(smiles_list_z_iso_n, representation) + + output_dim = 4 # Number of outputs + rank = 1 # Rank of W + feature_dim = len(X_train[0, :]) + + tanimoto_active_dims = [i for i in range(feature_dim)] # active dims for Tanimoto base kernel. + + # We define the Gaussian Process Regression Model using the Tanimoto kernel + + m = None + + def objective_closure(): + return -m.log_marginal_likelihood() + + # Augment the input with zeroes, ones, twos, threes to indicate the required output dimension + X_augmented = np.vstack((np.append(X_train, np.zeros((len(X_train), 1)), axis=1), + np.append(X_z_iso_pi, np.ones((len(X_z_iso_pi), 1)), axis=1), + np.append(X_e_iso_n, np.ones((len(X_e_iso_n), 1)) * 2, axis=1), + np.append(X_z_iso_n, np.ones((len(X_z_iso_n), 1)) * 3, axis=1))) + + X_test = np.append(X_test, np.zeros((len(X_test), 1)), axis=1) + X_train = np.append(X_train, np.zeros((len(X_train), 1)), axis=1) + + # Augment the Y data with zeroes, ones, twos and threes that specify a likelihood from the list of likelihoods + Y_augmented = np.vstack((np.hstack((y_train, np.zeros_like(y_train))), + np.hstack((y_z_iso_pi, np.ones_like(y_z_iso_pi))), + np.hstack((y_e_iso_n, np.ones_like(y_e_iso_n) * 2)), + np.hstack((y_z_iso_n, np.ones_like(y_z_iso_n) * 3)))) + + y_test = np.hstack((y_test, np.zeros_like(y_test))) + + # Base kernel + k = Tanimoto(active_dims=tanimoto_active_dims) + # set_trainable(k.variance, False) + + # Coregion kernel + coreg = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[feature_dim]) + + # Create product kernel + kern = k * coreg + + # This likelihood switches between Gaussian noise with different variances for each f_i: + lik = gpflow.likelihoods.SwitchedLikelihood([gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian(), + gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian()]) + + # now build the GP model as normal + m = gpflow.models.VGP((X_augmented, Y_augmented), mean_function=Constant(np.mean(y_train[:, 0])), kernel=kern, + likelihood=lik) + + # fit the covariance function parameters + maxiter = ci_niter(1000) + gpflow.optimizers.Scipy().minimize(m.training_loss, m.trainable_variables, options=dict(maxiter=maxiter), + method="L-BFGS-B", ) + print_summary(m) + + # mean and variance GP prediction + + y_pred, y_var = m.predict_f(X_test) + + # Output Standardised RMSE and RMSE on Train Set + + y_pred_train, _ = m.predict_f(X_train) + train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train)) + train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train)) + print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan)) + print("Train RMSE: {:.3f}".format(train_rmse)) + + r2 = r2_score(y_test[:, 0], y_pred) + rmse = np.sqrt(mean_squared_error(y_test[:, 0], y_pred)) + mae = mean_absolute_error(y_test[:, 0], y_pred) + per_molecule = np.diag(abs(y_pred - y_test[:, 0])) + + print("\n Averaged test statistics are") + print("\nR^2: {:.3f}".format(r2)) + print("RMSE: {:.3f}".format(rmse)) + print("MAE: {:.3f}".format(mae)) + print("\nAbsolute error per molecule is {} ".format(per_molecule)) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + + parser.add_argument('-p', '--path', type=str, default='../dataset/photoswitches.csv', + help='Path to the photoswitches.csv file.') + parser.add_argument('-r', '--representation', type=str, default='fragprints', + help='str specifying the molecular representation. ' + 'One of [fingerprints, fragments, fragprints].') + + args = parser.parse_args() + + main(args.path, args.representation) \ No newline at end of file diff --git a/property_prediction/plot_box.py b/property_prediction/plot_box.py new file mode 100644 index 0000000..3d89b01 --- /dev/null +++ b/property_prediction/plot_box.py @@ -0,0 +1,67 @@ +# Copyright Ryan-Rhys Griffiths and Aditya Raymond Thawani 2021 +# Author: Ryan-Rhys Griffiths +""" +Script for plotting marginal box plots. +""" + +import plotly.graph_objects as go + +if __name__ == '__main__': + + fig = go.Figure() + + # Defining x axis + x = ['$\mathrm{\LARGE{E-Isomer}} \: \LARGE{\pi - \pi^*}$', '$\mathrm{\LARGE{E-Isomer}} \: \LARGE{\pi - \pi^*}$', + '$\mathrm{\LARGE{E-Isomer}} \: \LARGE{\pi - \pi^*}$', '$\mathrm{\LARGE{E-Isomer}} \: \LARGE{\pi - \pi^*}$', + '$\mathrm{\LARGE{E-Isomer}} \: \LARGE{n - \pi^*}$', '$\mathrm{\LARGE{E-Isomer}} \: \LARGE{n - \pi^*}$', + '$\mathrm{\LARGE{E-Isomer}} \: \LARGE{n - \pi^*}$', '$\mathrm{\LARGE{E-Isomer}} \: \LARGE{n - \pi^*}$', + '$\mathrm{\LARGE{Z-Isomer}} \: \LARGE{\pi - \pi^*}$', '$\mathrm{\LARGE{Z-Isomer}} \: \LARGE{\pi - \pi^*}$', + '$\mathrm{\LARGE{Z-Isomer}} \: \LARGE{\pi - \pi^*}$', '$\mathrm{\LARGE{Z-Isomer}} \: \LARGE{\pi - \pi^*}$', + '$\mathrm{\LARGE{Z-Isomer}} \: \LARGE{n - \pi^*}$', '$\mathrm{\LARGE{Z-Isomer}} \: \LARGE{n - \pi^*}$', + '$\mathrm{\LARGE{Z-Isomer}} \: \LARGE{n - \pi^*}$', '$\mathrm{\LARGE{Z-Isomer}} \: \LARGE{n - \pi^*}$'] + + fig.add_trace(go.Box( + + # defining y axis in corresponding + # to x-axis + y=[16.4, 17.3, 17.2, 17.4, 8.5, 8.6, 8.9, 9.4, 12.2, 11.5, 11.9, 12.3, 9.0, 8.2, 8.5, 8.9], + x=x, + name='Fragments', + marker_color='paleturquoise', + boxpoints='outliers' + )) + + fig.add_trace(go.Box( + y=[15.5, 15.2, 14.4, 17.9, 7.3, 8.4, 8.5, 10.1, 10.1, 9.8, 9.6, 10.0, 6.6, 6.9, 6.9, 7.2], + x=x, + name='Morgan', + marker_color='darksalmon', + boxpoints='outliers' + )) + + fig.add_trace(go.Box( + y=[13.9, 13.3, 13.1, 18.1, 7.7, 8.2, 8.3, 8.6, 10.0, 9.8, 8.8, 10.4, 6.8, 7.1, 7.1, 7.0], + x=x, + name='Fragprints', + marker_color='sienna', + boxpoints='outliers' + )) + + fig.update_layout( + + # group together boxes of the different + # traces for each value of x + yaxis_title="MAE (nm)", + font=dict( + family="roman", + size=40), + boxmode='group', + legend=dict(font=dict(family="roman", size=50, color="black")), + boxgap=0.2, + boxgroupgap=0.1, + yaxis=dict(tickfont=dict(size=30)), + xaxis=dict(tickfont=dict(size=50) + ) + ) + fig.update_xaxes(title_font_family="Arial") + fig.show() diff --git a/property_prediction/predict_with_multioutput_GPR.py b/property_prediction/predict_with_multioutput_GPR.py index a69b1c7..88ea4da 100644 --- a/property_prediction/predict_with_multioutput_GPR.py +++ b/property_prediction/predict_with_multioutput_GPR.py @@ -228,7 +228,7 @@ def main(path, task, representation, use_pca, n_trials, test_set_size): parser.add_argument('-p', '--path', type=str, default='../dataset/photoswitches.csv', help='Path to the photoswitches.csv file.') - parser.add_argument('-t', '--task', type=str, default='e_iso_pi', + parser.add_argument('-t', '--task', type=str, default='z_iso_n', help='str specifying the task. One of [e_iso_pi, z_iso_pi, e_iso_n, z_iso_n].') parser.add_argument('-r', '--representation', type=str, default='fragprints', help='str specifying the molecular representation. '