Skip to content

Commit

Permalink
Add guided minimization as params method
Browse files Browse the repository at this point in the history
  • Loading branch information
Jana Pazurikova committed Jun 3, 2016
1 parent 894e4d1 commit 89f57c0
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 5 deletions.
6 changes: 5 additions & 1 deletion src/eem.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,11 @@ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) {
starts[0] = 0;
for(int i = 1; i < ts.molecules_count; i++)
starts[i] = starts[i - 1] + ts.molecules[i - 1].atoms_count;
int nt = s.max_threads/s.de_threads;
int nt = s.max_threads;
if (s.params_method == PARAMS_DE)
nt /= s.de_threads;
if (s.params_method == PARAMS_GM)
nt /= s.gm_threads;
int nthreads = ts.molecules_count < nt ? ts.molecules_count : nt;
#pragma omp parallel for num_threads(nthreads)
for(int i = 0; i < ts.molecules_count; i++) {
Expand Down
8 changes: 7 additions & 1 deletion src/kappa.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "statistics.h"
#include "structures.h"
#include "diffevolution.h"
#include "guidedmin.h"

extern const struct training_set ts;
extern const struct settings s;
Expand Down Expand Up @@ -229,6 +230,11 @@ void find_the_best_parameters_for_subset(struct subset * const ss) {
//runs a differential evolution algorithm to find the best parameters, ss->best is set after the call
run_diff_evolution(ss);
}
if (s.params_method == PARAMS_GM) {
//runs a guided minimization algorithm, ss->best is set after the call
run_guided_min(ss);
}

/* Determine the best parameters for computed data */

if(s.params_method == PARAMS_LR_FULL) {
Expand All @@ -238,7 +244,7 @@ void find_the_best_parameters_for_subset(struct subset * const ss) {
/* If Brent is used, the maximum is stored in the last item */
ss->best = &ss->data[ss->kappa_data_count - 1];
}
else if (s.params_method == PARAMS_DE) {
else if (s.params_method == PARAMS_DE || s.params_method == PARAMS_GM) {
//well, nothing, the best structure has been already set
}

Expand Down
51 changes: 48 additions & 3 deletions src/settings.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ static struct option long_options[] = {
{"de-fix-kappa",required_argument, 0, 188},
{"de-threads",required_argument, 0, 189},
{"de-polish", required_argument, 0, 190},
{"gm-size", required_argument, 0, 191},
{"gm-iterations-beg", required_argument, 0, 192},
{"gm-iterations-end", required_argument, 0, 193},
{"gm-threads", required_argument, 0, 194},
{NULL, 0, 0, 0}
};

Expand Down Expand Up @@ -95,6 +99,10 @@ void s_init(void) {
s.limit_de_iters = NO_LIMIT_ITERS;
s.limit_de_time = NO_LIMIT_TIME;
s.polish = 0; //0 off, 1 only result, 2 result + during evolve, 3 result, evolve and some structures in initial population
s.gm_size = 100;
s.gm_iterations_beg = 1000;
s.gm_iterations_end = 2000;
s.gm_threads = 1;
s.sort_by = SORT_R2;
s.at_customization = AT_CUSTOM_ELEMENT_BOND;
s.discard = DISCARD_OFF;
Expand Down Expand Up @@ -128,7 +136,7 @@ static void print_help(void) {
printf(" --version display version information and exit\n");
printf(" --max-threads N use up to N threads to solve EEM system in parallel\n");
printf(" -m, --mode MODE set mode for the NEEMP. Valid choices are: info, params, charges, cross, cover (required)\n");
printf(" -p, --params-method METHOD set optimization method used for calculation of parameters. Valid choices are: lr-full, lr-full-brent, de (optional)\n");
printf(" -p, --params-method METHOD set optimization method used for calculation of parameters. Valid choices are: lr-full, lr-full-brent, de, gm (optional)\n");
printf(" --sdf-file FILE SDF file (required)\n");
printf(" --atom-types-by METHOD classify atoms according to the METHOD. Valid choices are: Element, ElemBond.\n");
printf(" --list-omitted-molecules list names of molecules for which we don't have charges or parameters loaded (mode dependent).\n");
Expand All @@ -150,6 +158,10 @@ static void print_help(void) {
printf(" --de-dither set the mutation constant to random value from [0.5;1] for ech iteration (optional).\n");
printf(" --de-polish VALUE apply polishing on parameters. Valid choices: 0 (off), 1 (result), 2 (during evolving), 3 (at the beginning). Strongly recommend to keep the default value.\n");
printf(" --de-fix-kappa set kappa to one fixed value (optional).\n");
printf(" --gm-size set number of randomly generated vectors of parameters, those with reasonable stats will be minimized (optional).\n");
printf(" --gm-iterations-beg set number of minimization iterations for each reasonable vector of parameters (optional).\n");
printf(" --gm-iterations-end set number of minimization itertions for the best to polish the final result (optional).\n");
printf(" --gm-threads set number of threads used for parallel minimization (optional).\n");
printf("Other options:\n");
printf(" --par-out-file FILE output the parameters to the FILE\n");
printf(" -d, --discard METHOD perform discarding with METHOD. Valid choices are: iterative, simple and off. Default is off.\n");
Expand All @@ -167,8 +179,8 @@ static void print_help(void) {

printf("neemp -m params --sdf-file molecules.sdf --chg-file charges.chg --kappa-max 1.0 --fs-precision 0.2 --sort-by RMSD --fs-only.\n\
Compute parameters for the given molecules in file molecules.sdf and ab-initio charges in charges.chg. Set maximum value for kappa to 1.0, step for the full scan to 0.2, no iterative refinement, sort results according to the relative mean square deviation.\n");
printf("neemp -m params -p de --sdf-file molecules.sdf --chg-file charges.chg --sort-by RMSD_avg --de-pop-size 250 --de-iters-max 500 -vv.\n\
Compute parameters for the given molecules in file molecules.sdf and ab-initio charges in charges.chg. The chosen optimization method: differential evolution will create population of 250 sets of parameters and evolve these in maximum of 500 iterations. The fitness function evaluating the set of parameters is average per atom RMSD.\n");
printf("neemp -m params -p gm --sdf-file molecules.sdf --chg-file charges.chg --sort-by RMSD_avg --gm-size 250 -gm-iterations-beg 1000 -gm-iterations-end 500 --random-seed 1234 -vv.\n\
Compute parameters for the given molecules in file molecules.sdf and ab-initio charges in charges.chg. The chosen optimization method: guided minimization will create 250 vectors (each vector consists of all parameters) and minimized reasonably good ones for 1000 iterations. The best of them will be minimized again, for 500 iterations.\n");

printf("neemp -m charges --sdf-file molecules.sdf --par-file parameters --chg-out-file output.chg\n\
Calculate and store EEM charges to the file output.chg\n");
Expand Down Expand Up @@ -209,6 +221,8 @@ void parse_options(int argc, char **argv) {
s.params_method = PARAMS_LR_FULL_BRENT;
else if (!strcmp(optarg, "de"))
s.params_method = PARAMS_DE;
else if (!strcmp(optarg, "gm"))
s.params_method = PARAMS_GM;
else
EXIT_ERROR(ARG_ERROR, "Invalid params-method: %s\n", optarg);
break;
Expand Down Expand Up @@ -397,6 +411,19 @@ void parse_options(int argc, char **argv) {
case 190:
s.polish = atoi(optarg);
break;
//GM settings
case 191:
s.gm_size = atoi(optarg);
break;
case 192:
s.gm_iterations_beg = atoi(optarg);
break;
case 193:
s.gm_iterations_end = atoi(optarg);
break;
case 194:
s.gm_threads = atoi(optarg);
break;
case '?':
EXIT_ERROR(ARG_ERROR, "%s", "Try -h/--help.\n");
default:
Expand Down Expand Up @@ -465,6 +492,15 @@ void check_settings(void) {

}

if (s.params_method == PARAMS_GM) { //all settings are optional, so check for mistakes
if (s.gm_size < 1)
EXIT_ERROR(ARG_ERROR, "%s", "Size of GM set has to be positive.\n");
if (s.gm_iterations_beg < 1 || s.gm_iterations_end < 1)
EXIT_ERROR(ARG_ERROR, "%s", "Number of minimization iterations for GM has to be positive.\n");
if (s.gm_threads < 1 || s.gm_threads > s.max_threads)
EXIT_ERROR(ARG_ERROR, "%s", "Number of threads for minimization must be between 1 and maximum number of threads.\n");
}

if (s.random_seed == -1)
s.random_seed = 123;

Expand Down Expand Up @@ -685,6 +721,15 @@ void print_settings(void) {


}

if (s.params_method == PARAMS_GM) {
printf("\nGuided minimization settings:\n");
printf("\t - set size %d\n", s.gm_size);
printf("\t - iterations for set at the beginning %d\n", s.gm_iterations_beg);
printf("\t - iterations for the result at the end %d\n", s.gm_iterations_end);
printf("\t - threads used for minimization %d\n", s.gm_threads);

}
}

printf("\n");
Expand Down
7 changes: 7 additions & 0 deletions src/settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ enum params_calc_method {
PARAMS_LR_FULL,
PARAMS_LR_FULL_BRENT,
PARAMS_DE,
PARAMS_GM, //guided minimization
PARAMS_NOT_SET
};

Expand Down Expand Up @@ -95,6 +96,12 @@ struct settings {
time_t limit_de_time;
int polish; //use NEWUOA minimization to polish trial or results

//settings regarding PARAMS_GM optimization method
int gm_size;
int gm_iterations_beg;
int gm_iterations_end;
int gm_threads;

//other settings
int random_seed;
enum verbosity_levels verbosity;
Expand Down

0 comments on commit 89f57c0

Please sign in to comment.