-
Notifications
You must be signed in to change notification settings - Fork 0
/
fast.cpp
1018 lines (901 loc) · 31.4 KB
/
fast.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/// test
/// @mainpage FastCodeML
///
/// @section intro_sect Introduction
///
/// FastCodeML is a rewrite of CodeML based directly on the pseudocode document.
/// It incorporates various parallelization strategies to be able to exploit
/// modern HPC machines architecture.
/// For this reason there are various parts of the code that can be selected at
/// compile time or run time to experiment with various, possible solutions.
///
/// @section contacts_sect Contacts
///
/// Contact us if you want more information on the project, want to collaborate
/// or suggest new ideas.
///
///- Ing. <a href="mailto:mvalle@cscs.ch">Mario Valle</a> - Swiss National
///Supercomputing Centre (CSCS) - Switzerland
///- The HP2C <a href="mailto:selectome@hp2c.ch">Selectome</a> Project Group -
///Mainly based in University of Lausanne - Switzerland
///
#include <iostream>
#include <iomanip>
#include <limits>
#include <string>
#include "CmdLine.h"
#include "Newick.h"
#include "Phylip.h"
#include "BayesTest.h"
#include "Forest.h"
#include "Exceptions.h"
#include "BranchSiteModel.h"
#include "ParseParameters.h"
#include "VerbosityLevels.h"
#include "WriteResults.h"
#include "MathSupport.h"
#ifndef VTRACE
#ifdef _OPENMP
#include <omp.h>
#endif
#endif
#ifdef USE_MKL_VML
#include <mkl_vml.h>
#endif
#include "Timer.h"
#ifdef USE_MPI
#include "HighLevelCoordinator.h"
#endif
/// Main program for FastCodeML.
///
/// @author Mario Valle - Swiss National Supercomputing Centre
///(CSCS)
/// @date 2010-12-22 (initial version)
/// @version 1.1
///
/// @param[in] aRgc Number of command line parameters
/// @param[in] aRgv Command line parameters
///
const char *version = "1.3.0";
int main(int aRgc, char **aRgv) {
Timer timer_app;
timer_app.start();
try {
#ifdef USE_MKL_VML
// If used, intitialize the MKL VML library
vmlSetMode(VML_HA | VML_DOUBLE_CONSISTENT);
#endif
#ifdef USE_MPI
// Start the high level parallel executor (based on MPI)
HighLevelCoordinator hlc(&aRgc, &aRgv);
#endif
// Parse the command line
CmdLine cmd;
cmd.parseCmdLine(aRgc, aRgv);
// Adjust and report the number of threads that will be used
#ifdef _OPENMP
int num_threads = omp_get_max_threads();
//std::cout<<"max num of thr: "<< num_threads <<std::endl;
if ((cmd.mNumThreads >= 1) &&
(cmd.mNumThreads <= (unsigned int)num_threads))
num_threads = cmd.mNumThreads;
//else
//num_threads = num_threads/2 + 1;// to prevent possible false sharing when it is set to maximum
// std::cout<<"num of thr: "<< num_threads <<std::endl;
omp_set_num_threads(num_threads);
//std::cout<<"current num of thr: "<< num_threads <<", command line num of thr: "<<cmd.mNumThreads<<std::endl;
/*if (num_threads < 2)
cmd.mForceSerial = true;
else
cmd.mForceSerial = false;*/
#else
cmd.mNumThreads = 1;
int num_threads = 1;
cmd.mForceSerial = true;
#endif
/*#ifdef _OPENMP
int num_threads = omp_get_max_threads();
if(num_threads < 2 || cmd.mForceSerial)
{
cmd.mForceSerial = true;
num_threads = 1;
omp_set_num_threads(1);
}
#else
cmd.mForceSerial = true;
int num_threads = 1;
#endif*/
#ifdef USE_MPI
// Shutdown messages from all MPI processes except the master
if (!hlc.isMaster())
cmd.mVerboseLevel = VERBOSE_NONE;
#endif
// std::cout <<std::endl<<"------------------"<< std::endl<<"FastCodeML
//V"<<version<<std::endl<<"------------------"<<std::endl;
// Write out command line parameters (if not quiet i.e. if verbose level >
// 0)
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
std::cout << "------------------------------------" << std::endl;
std::cout << "FastCodeML V" << version << std::endl;
std::cout << "------------------------------------" << std::endl;
std::cout << std::endl;
std::cout << "Tree file: " << cmd.mTreeFile << std::endl;
std::cout << "Gene file: " << cmd.mGeneFile << std::endl;
std::cout << "Verbose level: " << cmd.mVerboseLevel << " ("
<< decodeVerboseLevel(cmd.mVerboseLevel) << ')'
<< std::endl;
if (cmd.mSeed)
std::cout << "Seed: " << cmd.mSeed << std::endl;
if (cmd.mBranchFromFile)
std::cout << "Branch: From tree file" << std::endl;
else if (cmd.mBranchAll)
std::cout << "FG Branches: All (internals + leaves) "
<< std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchStart ==
// cmd.mBranchEnd)
// std::cout
//<< "Branch: " << cmd.mBranchStart << std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd == UINT_MAX)
// std::cout
//<< "Branches: " << cmd.mBranchStart << "-end" << std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd != UINT_MAX)
// std::cout
//<< "Branches: " << cmd.mBranchStart << '-' <<
//cmd.mBranchEnd << std::endl;
if (!cmd.mStopIfNotLRT)
std::cout << "H0 pre stop: No" << std::endl;
if (cmd.mIgnoreFreq)
std::cout << "Codon freq.: Ignore" << std::endl;
if (cmd.mDoNotReduceForest)
std::cout << "Reduce forest: Do not reduce" << std::endl;
else
std::cout << "Reduce forest: Aggressive" << std::endl;
if (cmd.mInitH0fromH1)
std::cout << "Starting val.: From H1" << std::endl;
else if (cmd.mInitFromParams && cmd.mBranchLengthsFromFile)
std::cout
<< "Starting val.: Times from tree file and params from "
"const (see below)" << std::endl;
else if (cmd.mInitFromParams)
std::cout << "Starting val.: Params from const (see below)"
<< std::endl;
else if (cmd.mBranchLengthsFromFile)
std::cout << "Starting val.: Times from tree file"
<< std::endl;
if (cmd.mNoMaximization)
std::cout << "Maximization: No" << std::endl;
if (cmd.mTrace)
std::cout << "Trace: On" << std::endl;
if (cmd.mCleanData)
std::cout << "Clean data: On" << std::endl;
else
std::cout << "Clean data: Off" << std::endl;
if (cmd.mGraphFile)
std::cout << "Graph file: " << cmd.mGraphFile << std::endl;
if (cmd.mGraphFile && cmd.mExportComputedTimes != UINT_MAX)
std::cout << "Graph times: From H" << cmd.mExportComputedTimes
<< std::endl;
if (!cmd.mNoMaximization)
std::cout << "Optimizer: " << cmd.mOptimizationAlgo
<< std::endl;
if (cmd.mMaxIterations != MAX_ITERATIONS)
std::cout << "Max iterations: " << cmd.mMaxIterations
<< std::endl;
if (cmd.mDeltaValueForGradient > 0.0)
std::cout << "Delta value: " << cmd.mDeltaValueForGradient
<< std::endl;
std::cout << "Relative error: " << cmd.mRelativeError << std::endl;
if (cmd.mResultsFile)
std::cout << "Results file: " << cmd.mResultsFile
<< std::endl;
//if (cmd.mNumThreads)
// std::cout << "Number of threads: " << cmd.mNumThreads
// << std::endl;
if (cmd.mFixedBranchLength)
std::cout << "Branch lengths: fixed" << std::endl;
#ifdef _OPENMP
if (num_threads > 1) {
std::cout << "Current num. cores: " << num_threads << std::endl
<< "Total num. cores: " << omp_get_num_procs() << std::endl;
} else
#endif
{
std::cout << "Current num. cores: 1 serial" << std::endl
<< "Total num. cores: 1" << std::endl;
}
#ifdef USE_MPI
if (hlc.numJobs() > 2)
std::cout << "Num. MPI proc: 1 (master) + " << hlc.numJobs() - 1
<< " (workers)" << std::endl;
else
std::cout << "Num. MPI proc: Insufficient, single task execution"
<< std::endl;
#endif
std::cout << "Compiled with: ";
#ifdef _OPENMP
std::cout << "USE_OPENMP ";
#endif
#ifdef USE_MPI
std::cout << "USE_MPI ";
#endif
#ifdef USE_CPV_SCALING
std::cout << "USE_CPV_SCALING ";
#endif
#ifdef NEW_LIKELIHOOD
std::cout << "NEW_LIKELIHOOD ";
#endif
#ifdef NON_RECURSIVE_VISIT
std::cout << "NON_RECURSIVE_VISIT ";
#endif
#ifdef USE_DAG
std::cout << "USE_DAG ";
#endif
#ifdef USE_ORIGINAL_PROPORTIONS
std::cout << "USE_ORIGINAL_PROPORTIONS ";
#endif
#ifdef USE_LAPACK
std::cout << "USE_LAPACK ";
#endif
#ifdef USE_MKL_VML
std::cout << "USE_MKL_VML";
#endif
std::cout << std::endl << std::endl;
if (cmd.mInitFromParams) {
std::cout << "Param init. values: " << std::endl << std::endl
<< ParseParameters::getInstance();
}
}
// Initialize the random number generator (0 means it is not set on the command
// line)
#ifdef USE_MPI
// Insure that each MPI process starts with a different seed
if (cmd.mSeed == 0)
cmd.mSeed = static_cast<unsigned int>(time(NULL)) +
static_cast<unsigned int>(hlc.getRank()) * 1000;
#else
if (cmd.mSeed == 0)
cmd.mSeed = static_cast<unsigned int>(time(NULL));
#endif
if ((cmd.mFixedBranchLength) and (cmd.mInitH0fromH1))
{
std::cout << " Either remove -bl option or remove -i1 option. Setting both at the same time is not supported !" << std::endl ;
return 0;
}
// srand(cmd.mSeed); // fastcodeml seed
SetSeedCodeml(cmd.mSeed, 0); // codeml seed is 1
// Verify the optimizer algorithm selected on the command line
if (!cmd.mNoMaximization)
BranchSiteModel::verifyOptimizerAlgo(cmd.mOptimizationAlgo);
// Start a timer (to measure serial part over parallel one)
Timer timer;
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start();
// Create the forest
Forest forest(cmd.mVerboseLevel);
// Enclose file loading into a block so temporary structures could be
// deleted when no more needed
//{
// Load the multiple sequence alignment (MSA)
Phylip msa(cmd.mVerboseLevel);
msa.readFile(cmd.mGeneFile, cmd.mCleanData);
// Load the phylogenetic tree
Newick tree(cmd.mVerboseLevel);
tree.readFile(cmd.mTreeFile);
// Check coherence between the two files
msa.checkNameCoherence(tree.getSpecies());
// Check root and unrooting if tree is rooted
tree.checkRootBranches();
// If times from file then check for null branch lengths for any leaf
if (cmd.mBranchLengthsFromFile) {
int zero_on_leaf_cnt = 0;
int zero_on_int_cnt = 0;
tree.countNullBranchLengths(zero_on_leaf_cnt, zero_on_int_cnt);
if (zero_on_leaf_cnt > 0 || zero_on_int_cnt > 0) {
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout
<< "Found null or missing branch length in tree file: on "
<< zero_on_leaf_cnt << " leave(s) and on "
<< zero_on_int_cnt << " internal branch(es)."
<< std::endl;
}
}
}
// Print the tree with the numbering of internal branches
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
tree.printTreeAnnotated(std::cout);
std::cout<<std::endl;
// Load the forest
forest.loadTreeAndGenes(tree, msa,
cmd.mIgnoreFreq ?
CodonFrequencies::CODON_FREQ_MODEL_UNIF :
CodonFrequencies::CODON_FREQ_MODEL_F3X4);
// Reduce the forest merging common subtrees. Add also more reduction, then
// clean the no more useful data.
if (!cmd.mDoNotReduceForest) {
// bool sts = forest.reduceSubtrees(cmd.mNumReductionBlocks);
forest.reduceSubtrees();
#ifndef NEW_LIKELIHOOD
forest.addAggressiveReduction();
#endif
forest.cleanReductionWorkingData();
#ifdef NEW_LIKELIHOOD
forest.prepareNewReduction();
#endif
}
#ifdef NEW_LIKELIHOOD
else {
forest.prepareNewReductionNoReuse();
}
#endif
#ifdef NON_RECURSIVE_VISIT
// Prepare the pointers to visit the trees without recursion
forest.prepareNonRecursiveVisit();
#endif
// Subdivide the trees in groups based on dependencies
// forest.prepareDependencies(cmd.mForceSerial || cmd.mDoNotReduceForest);
#ifdef USE_DAG
// Load the forest into a DAG
forest.loadForestIntoDAG(Nt);
#endif
// Get the time needed by data preprocessing
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop();
std::cout << std::endl << "TIMER (preproc.): " << timer.get() << " ms" << std::endl << std::endl;
}
// Print few statistics
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
std::cout << forest;
#ifdef USE_MPI
// Distribute the work. If run under MPI then finish, else return to the
// standard execution flow
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start();
bool has_run_under_MPI = hlc.startWork(forest, cmd);
// If executed under MPI report the time spent, otherwise stop the timer so
// it can be restarted around the serial execution
if (has_run_under_MPI) {
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop();
std::cout << std::endl
<< "TIMER (processing) ncores: " << std::setw(2)
<< num_threads * (hlc.numJobs() - 1) + 1
<< " time: " << timer.get() << std::endl;
}
return 0;
} else {
timer.stop();
}
#endif
// Compute the range of branches to mark as foreground
size_t branch_start, branch_end;
std::set<int> fg_set; // to save a list of fg branches from the
// getBranchRange function
std::set<int> ib_set; // to save a list of internal branches from the
// getBranchRange function
std::vector<double> mVar; // to save optimization variables
forest.getBranchRange(cmd, branch_start, branch_end, fg_set, ib_set); // fgset is added to save a list of fg branches
// for (std::set<int>::iterator it=ib_set.begin(); it!=ib_set.end(); ++it)
// std::cout << " " << *it << ",";
// Start timing parallel part
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start();
double lnl0, lnl1 = 0.;
if (!fg_set.empty()) // in case of marked fg branches (one or multiple fg)
{
//initial the output results object
WriteResultsMfg output_results_mfg(cmd.mResultsFile);
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS)
std::cout << std::endl
<< "Doing foreground branch(es) from tree file "
<< std::endl;
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS)
std::cout << "-------------------------------------------"
<< std::endl;
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << "Doing foreground branch(es) ";
for (std::set<int>::iterator it = fg_set.begin();
it != fg_set.end(); ++it)
std::cout << " " << *it << " ";
std::cout << std::endl;
}
// Initialize the models
MfgBranchSiteModelNullHyp h0(forest, cmd);
MfgBranchSiteModelAltHyp h1(forest, cmd);
// Initialize the test
MfgBayesTest beb(forest, cmd.mVerboseLevel, cmd.mDoNotReduceForest);
// Compute the alternate model maximum loglikelihood
double lnl1 = 0.;
if (cmd.mComputeHypothesis != 0) {
if (cmd.mInitFromParams)
h1.initFromParams();
if (cmd.mBranchLengthsFromFile)
h1.initFromTree();
lnl1 = h1(fg_set);
// h1.saveComputedTimes();
// std::cout << "lnl1 = " << lnl1 << std::endl;
// Save the value for formatted output
output_results_mfg.saveLnL(fg_set, lnl1, 1);
}
// Compute the null model maximum loglikelihood
double lnl0 = 0.;
if (cmd.mComputeHypothesis != 1) {
if (cmd.mInitH0fromH1)
h0.initFromResult(h1.getVariables());
else {
if (cmd.mInitFromParams)
h0.initFromParams();
if (cmd.mBranchLengthsFromFile)
h0.initFromTree();
}
lnl0 = h0(fg_set,
cmd.mStopIfNotLRT && cmd.mComputeHypothesis != 0,
lnl1 - THRESHOLD_FOR_LRT);
// std::cout << "lnl0 = " << lnl0 << std::endl;
// Save the value for formatted output (only if has not be forced to
// stop)
if(lnl0 < DBL_MAX) output_results_mfg.saveLnL(fg_set, lnl0, 0);
}
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << std::endl;
if (cmd.mComputeHypothesis != 1) {
std::cout << "LnL0: ";
if (lnl0 == std::numeric_limits<double>::infinity())
std::cout << "**Invalid result**";
else if (lnl0 < DBL_MAX)
std::cout << std::setprecision(15) << std::fixed
<< lnl0;
else
std::cout << "(Doesn't pass LRT, skipping)";
std::cout << " Function calls: " << h0.getNumEvaluations()
<< " ";
std::cout << std::endl << std::endl;
if (lnl0 != std::numeric_limits<double>::infinity()) {
std::string s0 = h0.printFinalVars(std::cout);
// std::cout<<"EDW0: "<< s0 <<std::endl;
output_results_mfg.saveParameters(fg_set, s0, 0);
}
std::cout << std::endl;
}
if (cmd.mComputeHypothesis != 0) {
std::cout << "LnL1: ";
if (lnl1 == std::numeric_limits<double>::infinity())
std::cout << "**Invalid result**";
else
std::cout << std::setprecision(15) << std::fixed
<< lnl1;
std::cout << " Function calls: " << h1.getNumEvaluations();
std::cout << std::endl << std::endl;
if (lnl1 != std::numeric_limits<double>::infinity()) {
std::string s1 = h1.printFinalVars(std::cout);
// std::cout<<"EDW1: "<< s1 <<std::endl;
output_results_mfg.saveParameters(fg_set, s1, 1);
}
std::cout << std::endl;
}
if (cmd.mComputeHypothesis > 1) {
if (lnl0 == std::numeric_limits<double>::infinity()
|| lnl1 == std::numeric_limits<double>::infinity())
std::cout << "LRT: **Invalid result**";
else if (lnl0 < DBL_MAX)
std::cout << "LRT: " << std::setprecision(15)
<< std::fixed << lnl1 - lnl0 << " (threshold: "
<< std::setprecision(15) << std::fixed
<< THRESHOLD_FOR_LRT << ')';
else
std::cout << "LRT: < " << std::setprecision(15)
<< std::fixed << THRESHOLD_FOR_LRT;
std::cout << std::endl;
}
}
// If requested set the time in the forest and export to a graph
// visualization tool
if (cmd.mGraphFile) {
switch (cmd.mExportComputedTimes) {
case 0:
h0.saveComputedTimes();
break;
case 1:
h1.saveComputedTimes();
break;
default:
break;
}
// Use the forest export class
ForestExport fe(forest);
fe.exportForest(cmd.mGraphFile, 0);
}
// If the two hypothesis are computed, H0 has not been stopped and the run
// passes the LRT, then compute the BEB
if (cmd.mComputeHypothesis > 1 && lnl0 < DBL_MAX
&& BranchSiteModel::performLRT(lnl0, lnl1)) {
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS)
std::cout << std::endl
<< "LRT is significant. Computing sites under positive "
"selection ... " << std::endl;
// Get the scale values from the latest optimized h1.
std::vector<double> scales(2);
h1.getScales(scales);
// Run the BEB test
// if(cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) std::cout << std::endl
// << "LRT is significant. Computing sites under positive selection ...
// " << std::endl ;
beb.computeBEB(h1.getVariables(), fg_set, scales);
// Output the sites under positive selection (if any)
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS)
beb.printPositiveSelSites(fg_set);
// Get the sites under positive selection for printing in the results
// file (if defined)
if(output_results_mfg.isWriteResultsEnabled())
{
std::vector<unsigned int> positive_sel_sites;
std::vector<double> positive_sel_sites_prob;
beb.extractPositiveSelSites(positive_sel_sites,
positive_sel_sites_prob);
//if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
//std::cout << std::endl
//<< "Positively selected sites and their probabilities : ";
//std::cout << std::endl;
//for (std::vector<unsigned int>::iterator it =
//positive_sel_sites.begin();
//it != positive_sel_sites.end(); ++it)
//std::cout << " " << *it << ",";
//std::cout << std::endl;
//for (std::vector<double>::iterator it =
//positive_sel_sites_prob.begin();
//it != positive_sel_sites_prob.end(); ++it)
//std::cout << " " << *it << ",";
//std::cout << std::endl;
//}
output_results_mfg.savePositiveSelSites(fg_set, positive_sel_sites,
positive_sel_sites_prob);
}
}
// if branches are fixed
if (cmd.mFixedBranchLength)
{
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << std::endl << "Final ";
tree.printTreeAnnotated(std::cout, NULL, 0, true);
std::cout << std::endl;
tree.printTreeAnnotated(std::cout, NULL, 0, true, false);
}
}
else
{
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
if (cmd.mComputeHypothesis == 0)
mVar = h0.getVariables();
else
mVar = h1.getVariables();
if (cmd.mComputeHypothesis == 0)
std::cout << std::endl << "H0 Final ";
else
std::cout << std::endl << "H1 Final ";
tree.printTreeAnnotatedWithEstLens(std::cout, NULL, 0, true,
&mVar);
std::cout << std::endl;
tree.printTreeAnnotatedWithEstLens(std::cout, NULL, 0, true,
&mVar, false);
}
// tree.printTreeAnnotatedWithEstLens(std::cout, NULL, 0, true,
// &h1.getVariables());
// std :: cout << std::endl;
}
/*if(cmd.mInitFromParams) h0.initFromParams();
if(cmd.mBranchLengthsFromFile) h0.initFromTree();
lnl0 = h0(fg_set, cmd.mStopIfNotLRT && cmd.mComputeHypothesis != 0,
0-THRESHOLD_FOR_LRT);
std::cout << "lnl0 (multiple fg) = " << lnl0 << std::endl;
if(cmd.mInitFromParams) h1.initFromParams();
if(cmd.mBranchLengthsFromFile) h1.initFromTree();
lnl1 = h1(fg_set);
std::cout << "lnl1 (multiple fg) = " << lnl1 << std::endl;*/
timer_app.stop();
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << std::endl << "Time used: " << timer_app.get() / 60000
<< "m:" << (timer_app.get() / 1000) % 60 << "s" << std::endl;
std::cout << "Cores used: " << num_threads << std::endl;
}
// Output the results
output_results_mfg.outputResults(fg_set);
return 0;
}
// Else for all requested internal branches
// Initialize the output results file (if the argument is null, no file is
// created)
WriteResults output_results(cmd.mResultsFile);
// Initialize the models
BranchSiteModelNullHyp h0(forest, cmd);
BranchSiteModelAltHyp h1(forest, cmd);
// Initialize the test
BayesTest beb(forest, cmd.mVerboseLevel, cmd.mDoNotReduceForest);
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
if (cmd.mBranchAll)
std::cout << std::endl << "Doing for all branches (internal + leaf) as foreground"
<< std::endl;
else
std::cout << std::endl << "Doing for all internal branches as foreground"
<< std::endl;
std::cout << "------------------------------------" << std::endl;
}
for (size_t fg_branch = branch_start; fg_branch <= branch_end;
++fg_branch) {
if (cmd.mBranchAll
or (!cmd.mBranchAll
and ib_set.find(fg_branch) != ib_set.end()))
{
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS)
std::cout << "Doing foreground branch " << fg_branch
<< std::endl;
// Compute the alternate model maximum loglikelihood
double lnl1 = 0.;
if (cmd.mComputeHypothesis != 0) {
if (cmd.mInitFromParams)
h1.initFromParams();
if (cmd.mBranchLengthsFromFile)
h1.initFromTree();
lnl1 = h1(fg_branch);
// h1.saveComputedTimes();
// h1.mBranches
// Save the value for formatted output
output_results.saveLnL(fg_branch, lnl1, 1);
}
// Compute the null model maximum loglikelihood
double lnl0 = 0.;
if (cmd.mComputeHypothesis != 1) {
if (cmd.mInitH0fromH1)
h0.initFromResult(h1.getVariables());
else {
if (cmd.mInitFromParams)
h0.initFromParams();
if (cmd.mBranchLengthsFromFile)
h0.initFromTree();
}
lnl0 = h0(fg_branch,
cmd.mStopIfNotLRT && cmd.mComputeHypothesis != 0,
lnl1 - THRESHOLD_FOR_LRT);
// Save the value for formatted output (only if has not be forced to
// stop)
if(lnl0 < DBL_MAX) output_results.saveLnL(fg_branch, lnl0, 0);
}
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << std::endl;
if (cmd.mComputeHypothesis != 1) {
std::cout << "LnL0: ";
if (lnl0 == std::numeric_limits<double>::infinity())
std::cout << "**Invalid result**";
else if (lnl0 < DBL_MAX)
std::cout << std::setprecision(15) << std::fixed
<< lnl0;
else
std::cout << "(Doesn't pass LRT, skipping)";
std::cout << " Function calls: "
<< h0.getNumEvaluations() << " ";
std::cout << std::endl << std::endl;
if (lnl0 != std::numeric_limits<double>::infinity()) {
std::string s0 = h0.printFinalVars(std::cout);
output_results.saveParameters(fg_branch, s0, 0);
}
std::cout << std::endl;
}
if (cmd.mComputeHypothesis != 0) {
std::cout << "LnL1: ";
if (lnl1 == std::numeric_limits<double>::infinity())
std::cout << "**Invalid result**";
else
std::cout << std::setprecision(15) << std::fixed
<< lnl1;
std::cout << " Function calls: "
<< h1.getNumEvaluations();
std::cout << std::endl << std::endl;
if (lnl1 != std::numeric_limits<double>::infinity()) {
std::string s1 = h1.printFinalVars(std::cout);
output_results.saveParameters(fg_branch, s1, 1);
}
std::cout << std::endl;
}
if (cmd.mComputeHypothesis > 1) {
if (lnl0 == std::numeric_limits<double>::infinity()
|| lnl1
== std::numeric_limits<double>::infinity())
std::cout << "LRT: **Invalid result**";
else if (lnl0 < DBL_MAX)
std::cout << "LRT: " << std::setprecision(15)
<< std::fixed << lnl1 - lnl0
<< " (threshold: " << std::setprecision(15)
<< std::fixed << THRESHOLD_FOR_LRT << ')';
else
std::cout << "LRT: < " << std::setprecision(15)
<< std::fixed << THRESHOLD_FOR_LRT;
std::cout << std::endl;
}
}
// If requested set the time in the forest and export to a graph
// visualization tool
if (cmd.mGraphFile) {
switch (cmd.mExportComputedTimes) {
case 0:
h0.saveComputedTimes();
break;
case 1:
h1.saveComputedTimes();
break;
default:
break;
}
// Use the forest export class
ForestExport fe(forest);
fe.exportForest(cmd.mGraphFile, fg_branch);
}
// If the two hypothesis are computed, H0 has not been stopped and the
// run passes the LRT, then compute the BEB
if (cmd.mComputeHypothesis > 1 && lnl0 < DBL_MAX
&& BranchSiteModel::performLRT(lnl0, lnl1)) {
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS)
std::cout << std::endl
<< "LRT is significant. Computing sites under positive "
"selection ... " << std::endl;
// Get the scale values from the latest optimized h1.
std::vector<double> scales(2);
h1.getScales(scales);
// Run the BEB test
// if(cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) std::cout <<
// std::endl << "LRT is significant. Computing sites under positive
// selection ... " << std::endl ;
beb.computeBEB(h1.getVariables(), fg_branch, scales);
// Output the sites under positive selection (if any)
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS)
beb.printPositiveSelSites(fg_branch);
// Get the sites under positive selection for printing in the results
// file (if defined)
if (output_results.isWriteResultsEnabled()) {
std::vector<unsigned int> positive_sel_sites;
std::vector<double> positive_sel_sites_prob;
beb.extractPositiveSelSites(positive_sel_sites,
positive_sel_sites_prob);
output_results.savePositiveSelSites(fg_branch,
positive_sel_sites, positive_sel_sites_prob);
}
}
if (cmd.mFixedBranchLength)
{
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << std::endl << "Final ";
tree.printTreeAnnotated(std::cout, NULL, 0, true);
std::cout << std::endl;
tree.printTreeAnnotated(std::cout, NULL, 0, true, false);
std::cout << std::endl;
}
}
else {
// std :: cout << std::endl;
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
if (cmd.mComputeHypothesis == 0)
mVar = h0.getVariables();
else
mVar = h1.getVariables();
if (cmd.mComputeHypothesis == 0)
std::cout << std::endl << "H0 Final ";
else
std::cout << std::endl << "H1 Final ";
tree.printTreeAnnotatedWithEstLens(std::cout, NULL, 0,
true, &mVar);
std::cout << std::endl;
tree.printTreeAnnotatedWithEstLens(std::cout, NULL, 0,
true, &mVar, false);
std::cout << std::endl;
}
}
}
}
// Get the time needed by the parallel part
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop();
std::cout << std::endl << "TIMER (processing) ncores: "
<< std::setw(2) << num_threads << " time: " << timer.get()
<< std::endl;
}
timer_app.stop();
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << std::endl << "Time used: " << timer_app.get() / 60000
<< "m:" << (timer_app.get() / 1000) % 60 << "s" << std::endl;
std::cout << "Cores used: " << num_threads << std::endl;
}
// Output the results
output_results.outputResults(ib_set,cmd.mBranchAll);
// Catch all exceptions
} catch (const FastCodeMLSuccess &) {
return 0;
} catch (const FastCodeMLFatal &e) {
// If a message associated (i.e. no empty string), display it
if (e.what()[0])
std::cout << std::endl << e.what() << std::endl << std::endl;
return 1;
} catch (const FastCodeMLMemoryError &e) {
std::cout << std::endl << e.what() << std::endl << std::endl;
return 1;
} catch (const std::bad_alloc &e) {
std::cout << std::endl << e.what() << std::endl << std::endl;
return 1;
} catch (...) {
std::cout << std::endl << "Default exception caught." << std::endl
<< std::endl;
return 1;
}
return 0;
}
/// @page cppstd_page C++ Coding Standard
/// Here are collected few rules for coding this project.
///
/// @section cnames_sect Class names
/// Class names are CamelCase with first letter uppercase.
///
/// Ex: %PhyloTree
///
/// @section cmeth_sect Class methods
/// Class methods names are CamelCase with the first letter lowercase.
/// Only very common and specific names should be all lowercase, like read,
/// clean, size.
///
/// Ex: testFillQ
///
/// @section cdatamemb_sect Class data members
/// Class member variables names start with 'm' followed by CamelCase name.
///
/// Ex: mFgBranch
///
/// @section carg_sect Function arguments
/// Function arguments names start with 'a' followed by CamelCase name.
///
/// Ex: aFgBranch
///
/// @section const_sect Constants and enumeration
/// Constants and enumerations are all uppercase with words separated by '_'.
/// The first letters specify the kind of constant (like: STS_ for status, OPT_
/// for option value).
///
/// Ex: STS_CANT_OPEN
///
/// @section stack_sect Temporary variables
/// All the other variables are all lower case with parts separated by '_'.
///
/// Ex: branch_list
///
/// @section misc_sect Miscellaneous rules
/// In case of error main should return 1.
///
/// Array sizes and corresponding indexes should be size_t. The remaining
/// counters should be unsigned int.
///
/// The null pointer should be written as NULL, not 0 to make clear its purpose.
///
/// @page vampir_page Using Vampir for profiling
/// On Linux we use VampirTrace to collect profile data and Vampir to display
/// the results (http://www.vampir.eu/).
///
/// Before invoking CMAKE define CXX=vtCC
///
/// Define CMAKE_BUILD_TYPE as: RelWithDebInfo
///
/// Run CMAKE and configure.
///
/// Then define CMAKE_CXX_FLAGS as: -vt:mt -vt:noopari
/// If you want to trace also the OpenMP calls then change it to: -vt:mt