From 89f57c0899a252a3aa169d857798c3b2658e8ff7 Mon Sep 17 00:00:00 2001 From: Jana Pazurikova Date: Fri, 3 Jun 2016 12:41:51 +0200 Subject: [PATCH] Add guided minimization as params method --- src/eem.c | 6 +++++- src/kappa.c | 8 +++++++- src/settings.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++--- src/settings.h | 7 +++++++ 4 files changed, 67 insertions(+), 5 deletions(-) diff --git a/src/eem.c b/src/eem.c index c4f888a..5ab6d15 100644 --- a/src/eem.c +++ b/src/eem.c @@ -199,7 +199,11 @@ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) { starts[0] = 0; for(int i = 1; i < ts.molecules_count; i++) starts[i] = starts[i - 1] + ts.molecules[i - 1].atoms_count; - int nt = s.max_threads/s.de_threads; + int nt = s.max_threads; + if (s.params_method == PARAMS_DE) + nt /= s.de_threads; + if (s.params_method == PARAMS_GM) + nt /= s.gm_threads; int nthreads = ts.molecules_count < nt ? ts.molecules_count : nt; #pragma omp parallel for num_threads(nthreads) for(int i = 0; i < ts.molecules_count; i++) { diff --git a/src/kappa.c b/src/kappa.c index b05189b..ccc3814 100644 --- a/src/kappa.c +++ b/src/kappa.c @@ -20,6 +20,7 @@ #include "statistics.h" #include "structures.h" #include "diffevolution.h" +#include "guidedmin.h" extern const struct training_set ts; extern const struct settings s; @@ -229,6 +230,11 @@ void find_the_best_parameters_for_subset(struct subset * const ss) { //runs a differential evolution algorithm to find the best parameters, ss->best is set after the call run_diff_evolution(ss); } + if (s.params_method == PARAMS_GM) { + //runs a guided minimization algorithm, ss->best is set after the call + run_guided_min(ss); + } + /* Determine the best parameters for computed data */ if(s.params_method == PARAMS_LR_FULL) { @@ -238,7 +244,7 @@ void find_the_best_parameters_for_subset(struct subset * const ss) { /* If Brent is used, the maximum is stored in the last item */ ss->best = &ss->data[ss->kappa_data_count - 1]; } - else if (s.params_method == PARAMS_DE) { + else if (s.params_method == PARAMS_DE || s.params_method == PARAMS_GM) { //well, nothing, the best structure has been already set } diff --git a/src/settings.c b/src/settings.c index 2ca07b1..04eaadd 100644 --- a/src/settings.c +++ b/src/settings.c @@ -61,6 +61,10 @@ static struct option long_options[] = { {"de-fix-kappa",required_argument, 0, 188}, {"de-threads",required_argument, 0, 189}, {"de-polish", required_argument, 0, 190}, + {"gm-size", required_argument, 0, 191}, + {"gm-iterations-beg", required_argument, 0, 192}, + {"gm-iterations-end", required_argument, 0, 193}, + {"gm-threads", required_argument, 0, 194}, {NULL, 0, 0, 0} }; @@ -95,6 +99,10 @@ void s_init(void) { s.limit_de_iters = NO_LIMIT_ITERS; s.limit_de_time = NO_LIMIT_TIME; s.polish = 0; //0 off, 1 only result, 2 result + during evolve, 3 result, evolve and some structures in initial population + s.gm_size = 100; + s.gm_iterations_beg = 1000; + s.gm_iterations_end = 2000; + s.gm_threads = 1; s.sort_by = SORT_R2; s.at_customization = AT_CUSTOM_ELEMENT_BOND; s.discard = DISCARD_OFF; @@ -128,7 +136,7 @@ static void print_help(void) { printf(" --version display version information and exit\n"); printf(" --max-threads N use up to N threads to solve EEM system in parallel\n"); printf(" -m, --mode MODE set mode for the NEEMP. Valid choices are: info, params, charges, cross, cover (required)\n"); - printf(" -p, --params-method METHOD set optimization method used for calculation of parameters. Valid choices are: lr-full, lr-full-brent, de (optional)\n"); + printf(" -p, --params-method METHOD set optimization method used for calculation of parameters. Valid choices are: lr-full, lr-full-brent, de, gm (optional)\n"); printf(" --sdf-file FILE SDF file (required)\n"); printf(" --atom-types-by METHOD classify atoms according to the METHOD. Valid choices are: Element, ElemBond.\n"); printf(" --list-omitted-molecules list names of molecules for which we don't have charges or parameters loaded (mode dependent).\n"); @@ -150,6 +158,10 @@ static void print_help(void) { printf(" --de-dither set the mutation constant to random value from [0.5;1] for ech iteration (optional).\n"); printf(" --de-polish VALUE apply polishing on parameters. Valid choices: 0 (off), 1 (result), 2 (during evolving), 3 (at the beginning). Strongly recommend to keep the default value.\n"); printf(" --de-fix-kappa set kappa to one fixed value (optional).\n"); + printf(" --gm-size set number of randomly generated vectors of parameters, those with reasonable stats will be minimized (optional).\n"); + printf(" --gm-iterations-beg set number of minimization iterations for each reasonable vector of parameters (optional).\n"); + printf(" --gm-iterations-end set number of minimization itertions for the best to polish the final result (optional).\n"); + printf(" --gm-threads set number of threads used for parallel minimization (optional).\n"); printf("Other options:\n"); printf(" --par-out-file FILE output the parameters to the FILE\n"); printf(" -d, --discard METHOD perform discarding with METHOD. Valid choices are: iterative, simple and off. Default is off.\n"); @@ -167,8 +179,8 @@ static void print_help(void) { printf("neemp -m params --sdf-file molecules.sdf --chg-file charges.chg --kappa-max 1.0 --fs-precision 0.2 --sort-by RMSD --fs-only.\n\ Compute parameters for the given molecules in file molecules.sdf and ab-initio charges in charges.chg. Set maximum value for kappa to 1.0, step for the full scan to 0.2, no iterative refinement, sort results according to the relative mean square deviation.\n"); - printf("neemp -m params -p de --sdf-file molecules.sdf --chg-file charges.chg --sort-by RMSD_avg --de-pop-size 250 --de-iters-max 500 -vv.\n\ - Compute parameters for the given molecules in file molecules.sdf and ab-initio charges in charges.chg. The chosen optimization method: differential evolution will create population of 250 sets of parameters and evolve these in maximum of 500 iterations. The fitness function evaluating the set of parameters is average per atom RMSD.\n"); + printf("neemp -m params -p gm --sdf-file molecules.sdf --chg-file charges.chg --sort-by RMSD_avg --gm-size 250 -gm-iterations-beg 1000 -gm-iterations-end 500 --random-seed 1234 -vv.\n\ + Compute parameters for the given molecules in file molecules.sdf and ab-initio charges in charges.chg. The chosen optimization method: guided minimization will create 250 vectors (each vector consists of all parameters) and minimized reasonably good ones for 1000 iterations. The best of them will be minimized again, for 500 iterations.\n"); printf("neemp -m charges --sdf-file molecules.sdf --par-file parameters --chg-out-file output.chg\n\ Calculate and store EEM charges to the file output.chg\n"); @@ -209,6 +221,8 @@ void parse_options(int argc, char **argv) { s.params_method = PARAMS_LR_FULL_BRENT; else if (!strcmp(optarg, "de")) s.params_method = PARAMS_DE; + else if (!strcmp(optarg, "gm")) + s.params_method = PARAMS_GM; else EXIT_ERROR(ARG_ERROR, "Invalid params-method: %s\n", optarg); break; @@ -397,6 +411,19 @@ void parse_options(int argc, char **argv) { case 190: s.polish = atoi(optarg); break; + //GM settings + case 191: + s.gm_size = atoi(optarg); + break; + case 192: + s.gm_iterations_beg = atoi(optarg); + break; + case 193: + s.gm_iterations_end = atoi(optarg); + break; + case 194: + s.gm_threads = atoi(optarg); + break; case '?': EXIT_ERROR(ARG_ERROR, "%s", "Try -h/--help.\n"); default: @@ -465,6 +492,15 @@ void check_settings(void) { } + if (s.params_method == PARAMS_GM) { //all settings are optional, so check for mistakes + if (s.gm_size < 1) + EXIT_ERROR(ARG_ERROR, "%s", "Size of GM set has to be positive.\n"); + if (s.gm_iterations_beg < 1 || s.gm_iterations_end < 1) + EXIT_ERROR(ARG_ERROR, "%s", "Number of minimization iterations for GM has to be positive.\n"); + if (s.gm_threads < 1 || s.gm_threads > s.max_threads) + EXIT_ERROR(ARG_ERROR, "%s", "Number of threads for minimization must be between 1 and maximum number of threads.\n"); + } + if (s.random_seed == -1) s.random_seed = 123; @@ -685,6 +721,15 @@ void print_settings(void) { } + + if (s.params_method == PARAMS_GM) { + printf("\nGuided minimization settings:\n"); + printf("\t - set size %d\n", s.gm_size); + printf("\t - iterations for set at the beginning %d\n", s.gm_iterations_beg); + printf("\t - iterations for the result at the end %d\n", s.gm_iterations_end); + printf("\t - threads used for minimization %d\n", s.gm_threads); + + } } printf("\n"); diff --git a/src/settings.h b/src/settings.h index a4a37b3..2930d99 100644 --- a/src/settings.h +++ b/src/settings.h @@ -27,6 +27,7 @@ enum params_calc_method { PARAMS_LR_FULL, PARAMS_LR_FULL_BRENT, PARAMS_DE, + PARAMS_GM, //guided minimization PARAMS_NOT_SET }; @@ -95,6 +96,12 @@ struct settings { time_t limit_de_time; int polish; //use NEWUOA minimization to polish trial or results + //settings regarding PARAMS_GM optimization method + int gm_size; + int gm_iterations_beg; + int gm_iterations_end; + int gm_threads; + //other settings int random_seed; enum verbosity_levels verbosity;