This repository has been archived by the owner on Jul 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
iCANDY.py
executable file
·5933 lines (4595 loc) · 210 KB
/
iCANDY.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
#################################
# Import some necessary modules #
#################################
import os, sys
import dendropy
import string, re
import random
from math import sqrt, pow, log, floor, sin, log10, ceil
from numpy import repeat, convolve, mean, median
from optparse import OptionParser, OptionGroup
from Bio.Nexus import Trees, Nodes
import shlex, subprocess
from colorsys import hsv_to_rgb
from modules.Si_general import *
from modules.Si_SeqIO import *
#from Si_nexus import midpoint_root, tree_to_string
from Bio import SeqIO
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import GenBank
from Bio.Graphics.GenomeDiagram._Colors import ColorTranslator
from math import floor
import imp
from reportlab.lib import colors
from reportlab.lib.units import inch
from reportlab.lib import pagesizes
from reportlab.graphics.shapes import *
from reportlab.pdfgen.canvas import Canvas
from reportlab.graphics.charts.textlabels import Label
from reportlab.graphics.charts.lineplots import LinePlot
from reportlab.graphics.widgets.markers import makeMarker
from reportlab.graphics.charts.axes import XValueAxis, Label
from reportlab.graphics.charts.legends import Legend, LineLegend
from reportlab.graphics.charts.barcharts import VerticalBarChart
from reportlab.graphics.widgets.grids import ShadedRect
from reportlab.graphics.widgets.signsandsymbols import ArrowOne
from reportlab.graphics import renderPDF
import pysam
from itertools import izip
import datetime
#################################
# Set up some global converters #
#################################
pagesizeconverter={'A0':pagesizes.A0, 'A1':pagesizes.A1, 'A2':pagesizes.A2, 'A3':pagesizes.A3, 'A4':pagesizes.A4, 'A5':pagesizes.A5, 'A6':pagesizes.A6, 'B0':pagesizes.B0, 'B1':pagesizes.B1, 'B2':pagesizes.B2, 'B3':pagesizes.B3, 'B4':pagesizes.B4, 'B5':pagesizes.B5, 'B6':pagesizes.B6, 'LEGAL':pagesizes.LEGAL, 'LETTER':pagesizes.LETTER, 'landscape':pagesizes.landscape, 'legal':pagesizes.legal, 'letter':pagesizes.letter, 'portrait':pagesizes.portrait}
colourconverter={'aliceblue':colors.aliceblue, 'antiquewhite':colors.antiquewhite, 'aqua':colors.aqua, 'aquamarine':colors.aquamarine, 'azure':colors.azure, 'beige':colors.beige, 'bisque':colors.bisque, 'black':colors.black, 'blanchedalmond':colors.blanchedalmond, 'blue':colors.blue, 'blueviolet':colors.blueviolet, 'brown':colors.brown, 'burlywood':colors.burlywood, 'cadetblue':colors.cadetblue, 'chartreuse':colors.chartreuse, 'chocolate':colors.chocolate, 'coral':colors.coral, 'cornflower':colors.cornflower, 'cornflowerblue':colors.cornflowerblue, 'cornsilk':colors.cornsilk, 'crimson':colors.crimson, 'cyan':colors.cyan, 'darkblue':colors.darkblue, 'darkcyan':colors.darkcyan, 'darkgoldenrod':colors.darkgoldenrod, 'darkgray':colors.darkgray, 'darkgreen':colors.darkgreen, 'darkgrey':colors.darkgrey, 'darkkhaki':colors.darkkhaki, 'darkmagenta':colors.darkmagenta, 'darkolivegreen':colors.darkolivegreen, 'darkorange':colors.darkorange, 'darkorchid':colors.darkorchid, 'darkred':colors.darkred, 'darksalmon':colors.darksalmon, 'darkseagreen':colors.darkseagreen, 'darkslateblue':colors.darkslateblue, 'darkslategray':colors.darkslategray, 'darkslategrey':colors.darkslategrey, 'darkturquoise':colors.darkturquoise, 'darkviolet':colors.darkviolet, 'deeppink':colors.deeppink, 'deepskyblue':colors.deepskyblue, 'dimgray':colors.dimgray, 'dimgrey':colors.dimgrey, 'dodgerblue':colors.dodgerblue, 'fidblue':colors.fidblue, 'fidlightblue':colors.fidlightblue, 'fidred':colors.fidred, 'firebrick':colors.floralwhite, 'floralwhite':colors.floralwhite, 'forestgreen':colors.forestgreen, 'fuchsia':colors.fuchsia, 'gainsboro':colors.gainsboro, 'ghostwhite':colors.ghostwhite, 'gold':colors.gold, 'goldenrod':colors.goldenrod, 'gray':colors.gray, 'green':colors.green, 'greenyellow':colors.greenyellow, 'grey':colors.grey, 'honeydew':colors.honeydew, 'hotpink':colors.hotpink, 'indianred':colors.indianred, 'indigo':colors.indigo, 'ivory':colors.ivory, 'khaki':colors.khaki, 'lavender':colors.lavender, 'lavenderblush':colors.lavenderblush, 'lawngreen':colors.lawngreen, 'lemonchiffon':colors.lemonchiffon, 'lightblue':colors.lightblue, 'lightcoral':colors.lightcoral, 'lightcyan':colors.lightcyan, 'lightgoldenrodyellow':colors.lightgoldenrodyellow, 'lightgreen':colors.lightgreen, 'lightgrey':colors.lightgrey, 'lightpink':colors.lightpink, 'lightsalmon':colors.lightsalmon, 'lightseagreen':colors.lightseagreen, 'lightskyblue':colors.lightskyblue, 'lightslategray':colors.lightslategray, 'lightslategrey':colors.lightslategrey, 'lightsteelblue':colors.lightsteelblue, 'lightyellow':colors.lightyellow, 'lime':colors.lime, 'limegreen':colors.limegreen, 'linen':colors.linen, 'magenta':colors.magenta, 'maroon':colors.maroon, 'mediumaquamarine':colors.mediumaquamarine, 'mediumblue':colors.mediumblue, 'mediumorchid':colors.mediumorchid, 'mediumpurple':colors.mediumpurple, 'mediumseagreen':colors.mediumseagreen, 'mediumslateblue':colors.mediumslateblue, 'mediumspringgreen':colors.mediumspringgreen, 'mediumturquoise':colors.mediumturquoise, 'mediumvioletred':colors.mediumvioletred, 'midnightblue':colors.midnightblue, 'mintcream':colors.mintcream, 'mistyrose':colors.mistyrose, 'moccasin':colors.moccasin, 'navajowhite':colors.navajowhite, 'navy':colors.navy , 'oldlace':colors.oldlace, 'olive':colors.olive, 'olivedrab':colors.olivedrab, 'orange':colors.orange, 'orangered':colors.orangered, 'orchid':colors.orchid, 'palegoldenrod':colors.palegoldenrod, 'palegreen':colors.palegreen, 'paleturquoise':colors.paleturquoise, 'palevioletred':colors.palevioletred, 'papayawhip':colors.papayawhip, 'peachpuff':colors.peachpuff, 'peru':colors.peru, 'pink':colors.pink, 'plum':colors.plum, 'powderblue':colors.powderblue, 'purple':colors.purple, 'red':colors.red, 'rosybrown':colors.rosybrown, 'royalblue':colors.royalblue, 'saddlebrown':colors.saddlebrown, 'salmon':colors.salmon, 'sandybrown':colors.sandybrown, 'seagreen':colors.seagreen, 'seashell':colors.seashell, 'sienna':colors.sienna, 'silver':colors.silver, 'skyblue':colors.skyblue, 'slateblue':colors.slateblue, 'slategray':colors.slategray, 'slategrey':colors.slategrey, 'snow':colors.snow, 'springgreen':colors.springgreen, 'steelblue':colors.steelblue, 'tan':colors.tan, 'teal':colors.teal, 'thistle':colors.thistle, 'tomato':colors.tomato, 'turquoise':colors.turquoise, 'violet':colors.violet, 'wheat':colors.wheat, 'white':colors.white, 'whitesmoke':colors.whitesmoke, 'yellow':colors.yellow, 'yellowgreen':colors.yellowgreen}
default_rgb_colours=["blue", "red", "limegreen", "aqua", "fuchsia", "orange", "green", "gray", "purple", "olive", "teal", "silver", "navy", "white", "black", "maroon", "yellow"]
default_plot_colours=["red", "blue", "limegreen", "aqua", "fuchsia", "orange", "green", "gray", "purple", "olive", "teal", "silver", "navy", "black", "maroon", "yellow"]
########################################################
# Up the default recursion limit to allow bigger trees #
########################################################
sys.setrecursionlimit(10000)
################################
# Get the command line options #
################################
def main():
usage = "usage: %prog [options] args"
parser = OptionParser(usage=usage)
group = OptionGroup(parser, "General Output Options")
group.add_option("-o", "--output", action="store", dest="outputfile", help="output file name [default= %default]", type="string", metavar="FILE", default="test.pdf")
group.add_option("-O", "--orientation", action="store", choices=['landscape', 'portrait'], dest="orientation", help="page orientation [default= %default]", type="choice", default="landscape")
group.add_option("-p", "--pagesize", action="store", choices=['A0', 'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'LEGAL', 'LETTER', 'legal', 'letter', 'custom'], dest="page", help="page size [default= %default]", type="choice", default="A4")
group.add_option("-5", "--custompagesize", action="store", dest="custompage", help="custom page size. Must be in the form of x,y in mm. Only applicable with the custom -p option", default="")
group.add_option("-P", "--npages", action="store", dest="npages", help="number of pages to split picture over [default= %default]", type="int", default=1)
parser.add_option_group(group)
group = OptionGroup(parser, "Tree Output Options")
group.add_option("-t", "--tree", action="store", dest="tree", help="tree file to align tab files to", default="")
group.add_option("-2", "--proportion", action="store", dest="treeproportion", help="Proportion of page to take up with the tree", default=0.3, type='float')
group.add_option("-s", "--support", action="store", dest="tree_support", help="Scale tree branch widths by value. For newick trees this can be any value stored in the tree. Otherwise, use 'support' to scale by branch support values (if present)", default="")
group.add_option("-7", "--height_HPD", action="store_true", dest="show_height_HPD", help="show branch 95% HPD heights (if present in tree) [default= %default]", default=False)
group.add_option("-6", "--brlabels", action="store", dest="branch_labels", help="Label branches with value. For newick trees this can be any value stored in the tree. Otherwise, use 'support' to label with branch support values (if present), or 'brlen' to label with branch lengths.", default="")
group.add_option("-M", "--midpoint", action="store_true", dest="midpoint", help="Midpoint root tree", default=False)
group.add_option("-L", "--ladderise", action="store", choices=['right', 'left'], dest="ladderise", help="ladderise tree (choose from right or left) [default= %default]", type="choice", default=None)
group.add_option("-z", "--names_as_shapes", action="store", choices=['circle', 'square', 'rectangle', 'auto'], dest="names_as_shapes", help="Use shapes rather than taxon names in tree (choose from circle) [default= %default]", type="choice", default="auto")
group.add_option("-1", "--logbranches", action="store_true", dest="log_branches", help="log branch lengths [default= %default]", default=False)
group.add_option("-D", "--date_separator", action="store", dest="date_separator", help="For trees with dating information, the script will read these from the taxon names if they are a suffix separated from the rest of the taxon name by a particular character. To do this you need to choose the character used to separate the dates with this option", default="")
group.add_option("-8", "--most_recent_isolation_date", action="store", dest="most_recent", help="Most recent isolation date for samples in tree. This must be in the same units used in the BEAST tree, and as defined by the -I option.", default=0, type="float")
group.add_option("-I", "--time_type", action="store", choices=['days', 'years'], dest="time_type", help="Unit of time used in trees with dating information (choose from days or years) [default= %default]", type="choice", default="years")
group.add_option("-j", "--Figtree_colours", action="store_true", dest="figtree", help="Use colours in tree saved from Figtree [default= %default]", default=False)
group.add_option("-J", "--colour_by_annotation", action="store", dest="colour_by_annotation", help="Colour tree based on annotations (e.g. BEAST trait states). You must specify the annotation to use.", default="")
parser.add_option_group(group)
group = OptionGroup(parser, "Output Options For Tracks Without Tree")
group.add_option("-x", "--taxon_list", action="store", dest="taxon_list", help="File with ordered taxa", default="")
group.add_option("-n", "--names", action="store_true", dest="track_names", help="Show track names", default=False)
parser.add_option_group(group)
group = OptionGroup(parser, "Names Options")
group.add_option("-a", "--aligntaxa", action="store", dest="aligntaxa", help="Align taxon names (0=no, 1=align left, 2=align right, 3=name on tree but metadata aligned to right) [default= %default]", default=0, type="int")
group.add_option("-N", "--taxanames", action="store_false", dest="taxon_names", help="Do not show taxa names on tree. [default=show names]", default=True)
group.add_option("-m", "--metadata", action="store", dest="metadata", help="metadata file in csv format. Note that this file must have a header row ontaining titles for each column", default="")
group.add_option("-c", "--columns", action="store", dest="columns", help="column(s) from metadata file to use for track name (comma separated list) [default=%default]", default="1")
group.add_option("-C", "--colourbycolumns", action="store", dest="colour_columns", help="column(s) from metadata file to use to colour track name and blocks next to name (comma separated list). If names are being shown, the first column will be used to colour names. All following columns will be added as coloured shapes as defined by the -z option. [default=%default]", default=False)
group.add_option("-k", "--nometadatakey", action="store_false", dest="show_metadata_key", help="Do not show metadata keys. [default=show keys]", default=True)
group.add_option("-r", "--parsimony_reconstruction", action="store", dest="transformation", help="Reconstruct colours across branches using parsimony. Select from acctran or deltran transformations [default=%default]", default=None, type="choice", choices=['acctran', 'deltran'])
group.add_option("-i", "--suffix", action="store", dest="suffix", help="suffix to remove from filenames", default="")
group.add_option("-W", "--listcolours", action="store_true", dest="printcolours", help="Print list of available colours for columns and exit", default=False)
parser.add_option_group(group)
group = OptionGroup(parser, "Annotation Track Options")
group.add_option("-b", "--beginning", action="store", dest="beginning", help="Start position", default=0, metavar="int", type="int")
group.add_option("-e", "--end", action="store", dest="end", help="End position", default=-1, metavar="int", type="int")
group.add_option("-f", "--fragments", action="store", dest="fragments", help="number of fragments (lines on which to split genome) for linear diagram [default= %default]", type="int", default=1, metavar="int")
group.add_option("-F", "--fragment_separation", action="store", dest="fragment_separation", help="Number of blank tracks between fragments [default= %default]", default=1, metavar="int", type="int")
group.add_option("-T", "--tracksize", action="store", dest="tracksize", help="Proportion of space available to each track that should be used in drawing", default=0.8, metavar="float", type="float")
group.add_option("-E", "--emblheight", action="store", dest="emblheight", help="Relative track height of embl tracks to other tracks [default= %default]", default=2, metavar="int", type="int")
group.add_option("-l", "--labels", action="store", dest="labels", help="Relative track height of track for CDS labels (0 = do not show labels) [default= %default]", default=0, metavar="int", type="int")
group.add_option("-g", "--label_angle", action="store", dest="label_angle", help="Angle of CDS labels (anticlockwise from horizontal) [default= %default]", default=45, metavar="int", type="int")
group.add_option("-B", "--outline_features", action="store_true", dest="outline_features", help="Outline all embl features in black", default=False)
group.add_option("-G", "--greytracks", action="store_true", dest="greytracks", help="Make tracks behind features grey", default=False)
group.add_option("-q", "--qualifier", action="store", dest="qualifier", help="qualifier in tab file containing strain names to sort by (must be the same as in the tree provided) [e.g. note]", default="")
group.add_option("-A", "--arrowheads", action="store", choices=['0', '1', '2'], dest="arrows", help="add arrowheads to features on forward and reverse strands. 0=No arrows, 1=arrow style 1, 2=arrow style 2 [default= %default]", type="choice", default=0)
group.add_option("-S", "--oneline", action="store_true", dest="oneline", help="Draw forward and reverse embl objects on one line", default=False)
parser.add_option_group(group)
group = OptionGroup(parser, "Bam options")
group.add_option("-u", "--base_qual_filter", action="store", dest="base_qual_filter", help="Base quality filter for bam file mapping plots [default= %default]", default=0, type="float")
group.add_option("-U", "--mapping_qual_filter", action="store", dest="mapping_qual_filter", help="Mapping quality filter for bam file plots [default= %default]", default=0, type="float")
parser.add_option_group(group)
group = OptionGroup(parser, "Bcf options")
group.add_option("-v", "--varianttype", action="store", dest="bcfvariants", choices=["a","A","i","I","s","S", "h", "H"], help="bcf variants to show. Letter code for types of variant to include: a/A=include all i/I:just indels, s/S:just SNPs, h/H just heterozygotous sites. Lower case means exclude non-variant sites, upper case means include them [default= %default]", default="A", type="choice")
parser.add_option_group(group)
group = OptionGroup(parser, "Plot Options")
group.add_option("-H", "--plotheight", action="store", dest="plotheight", help="Relative track height of plot tracks to other tracks [default= %default]", default=2, metavar="int", type="int")
group.add_option("-d", "--default_plot_type", choices=["hist", "heat", "bar", "line", "area", "stackedarea"], type="choice", action="store", dest="plottype", help="Set the default plot type (for plots called plot or graph and bam files). Choose from "+", ".join(["hist", "heat", "bar", "line", "area", "stackedarea"])+" [default= %default]", default="line")
group.add_option("-X", "--scale_plots", action="store_true", dest="scale_plots_same", help="Use the same max and min scale for all plots", default=False)
group.add_option("-y", "--plot_max", action="store", dest="plot_max", help="Set a maximum value for plots. Can be a number or coverage relative to the median (e.g. 2x)", default="Inf", type="string")
group.add_option("-Y", "--plot_min", action="store", dest="plot_min", help="Set a minimum value for plots. Can be a number or coverage relative to the median (e.g. 1x)", default="Inf", type="string")
group.add_option("-Z", "--log_plot", action="store_true", dest="log_plots", help="Show plots on a log scale (doesn't work yet)", default=False)
group.add_option("-w", "--plot_scales", action="store_true", dest="plot_scales", help="Show legend on heatmaps", default=False)
group.add_option("-3", "--heatmap_colour", default="bluered", choices=["redblue", "bluered", "blackwhite", "whiteblack", "redandblue"], type="choice", action="store", dest="heat_colour", help="Set the default plot type (for plots called plot or graph and bam files). Choose from "+", ".join(["redblue", "bluered", "blackwhite", "whiteblack"]))
group.add_option("-4", "--windows", action="store", dest="windows", help="Number of windows per line (be careful, making this value too high will make things very slow and memory intensive) [default= %default]", default=501, type="float")
parser.add_option_group(group)
return parser.parse_args()
###############################################
# Check the command line options are sensible #
###############################################
#############################
# Remove values from a list #
#############################
def remove_zeros_from_list(the_list):
return [value for value in the_list if value != 0]
##########################################
# Function to read in Excel date formats #
##########################################
def xldate_as_datetime(xldate, datemode=0):
if datemode not in (0, 1):
print "Error with Excel date mode", xldate
sys.exit()
xldays = int(xldate)+1
if xldays >= 1000000:
print "Error with Excel date", xldate
print "Cannot handle dates above 25/11/4637 (equals 999999 days post 1900)"
sys.exit()
if (xldays + 693594 + (1462 * datemode))<=0:
print "Error with Excel date", xldate
print "Cannot handle dates below 1/1/1 (equals 693594 days pre 1900)"
sys.exit()
return datetime.datetime.fromordinal(xldays + 693594 + (1462 * datemode))
#######################################################################
# Function to reconstruct a character across the tree using parsimony #
#######################################################################
def parsimony_reconstruction(treeObject, namecolours, colours, transformation="acctran"):
def make_transistion_matrix():
for name in namecolours:
if not namecolours[name] in transition_matrix:
transition_matrix[colours[namecolours[name]]]={}
if not (0,0,0) in transition_matrix:
transition_matrix[(0,0,0)]={}
for colour in transition_matrix:
for colourb in transition_matrix:
if colour==colourb:
transition_matrix[colour][colourb]=0
else:
transition_matrix[colour][colourb]=1
def sankoff(treeObject, node=-1):
if node==-1:
node=treeObject.root
daughters=treeObject.node(node).get_succ()
if treeObject.is_terminal(node):
node_data=treeObject.node(node).get_data()
node_data.comment["sankoff"]={}
if "name_colour" in node_data.comment:
node_data.comment["branch_colour"]=node_data.comment["name_colour"]
else:
node_data.comment["branch_colour"]=(0,0,0)
for state in transition_matrix.keys():
node_data.comment["sankoff"][state]=100
node_data.comment["sankoff"][node_data.comment["branch_colour"]]=0
treeObject.node(node).set_data(node_data)
else:
node_data=treeObject.node(node).get_data()
node_data.comment["sankoff"]={}
for state in transition_matrix.keys():
node_data.comment["sankoff"][state]=0
for daughter in daughters:
treeObject=sankoff(treeObject, daughter)
daughter_sankoff=treeObject.node(daughter).get_data().comment["sankoff"]
for state in transition_matrix.keys():
min_cost=100000
for comparison_state in daughter_sankoff.keys():
cost=transition_matrix[state][comparison_state]+daughter_sankoff[comparison_state]
if cost<min_cost:
min_cost=cost
node_data.comment["sankoff"][state]+=min_cost
treeObject.node(node).set_data(node_data)
return treeObject
def sankoff_second_traversal(treeObject, node=-1, transformation="acctran"):
if node==-1:
node=treeObject.root
daughters=treeObject.node(node).get_succ()
if node==treeObject.root:
min_cost_states=set()
min_cost=100000
for state in transition_matrix.keys():
cost=treeObject.node(node).get_data().comment["sankoff"][state]
if cost<min_cost:
min_cost=cost
min_cost_states=set()
min_cost_states.add(state)
elif cost==min_cost:
min_cost_states.add(state)
root_states=set()
if len(min_cost_states)>1 and (0,0,0) in min_cost_states:
min_cost_states.remove((0,0,0))
if len(min_cost_states)>1:# and transformation=="deltran":
parent_states_min_cost={}
for state in min_cost_states:
parent_states_min_cost[state]=[set()]
daughter_min_cost_states=set()
daughter_min_cost=100000
for comparison_state in transition_matrix.keys():
cost=0
for daughter in daughters:
cost+=transition_matrix[state][comparison_state]+treeObject.node(daughter).get_data().comment["sankoff"][comparison_state]
if cost<daughter_min_cost:
daughter_min_cost=cost
daughter_min_cost_states=set()
daughter_min_cost_states.add(comparison_state)
parent_states_min_cost[state]=[set(),cost]
parent_states_min_cost[state][0].add(comparison_state)
elif cost==daughter_min_cost:
daughter_min_cost_states.add(comparison_state)
parent_states_min_cost[state][0].add(comparison_state)
min_cost=100000
for state in parent_states_min_cost:
if parent_states_min_cost[state][1]<min_cost:
min_cost=parent_states_min_cost[state][1]
root_states=set()
for state in parent_states_min_cost:
if parent_states_min_cost[state][1]==min_cost:
for base in parent_states_min_cost[state][0]:
root_states.add(base)
if len(root_states)>0:
min_cost_states=root_states
node_data=treeObject.node(node).get_data()
possible_states=list(min_cost_states)
#shuffle(possible_states)
if len(possible_states)>1:
#print sitenumber, possible_states
min_cost_state=possible_states[0]
min_cost=100000
for state in possible_states:
for comparison_state in transition_matrix.keys():
cost=transition_matrix[state][comparison_state]+treeObject.node(daughters[0]).get_data().comment["sankoff"][comparison_state]
if cost<min_cost:
min_cost=cost
min_cost_state=state
possible_states=[min_cost_state]
#print sitenumber, possible_states
node_data.comment["branch_colour"]=possible_states[0]
treeObject.node(node).set_data(node_data)
node_data=treeObject.node(node).get_data()
node_state=node_data.comment["branch_colour"]
node_state_set=set(node_state)
for daughter in daughters:
daughter_data=treeObject.node(daughter).get_data()
daughter_sankoff=daughter_data.comment["sankoff"]
min_cost_states=set()
min_cost=100000
for comparison_state in daughter_sankoff.keys():
cost=transition_matrix[node_state][comparison_state]+daughter_sankoff[comparison_state]
if cost<min_cost:
min_cost=cost
min_cost_states=set()
min_cost_states.add(comparison_state)
elif cost==min_cost:
min_cost_states.add(comparison_state)
if len(min_cost_states)>1 and (0,0,0) in min_cost_states:
min_cost_states.remove((0,0,0))
if len(min_cost_states)>1:
if transformation=="acctran":
min_cost_states=min_cost_states.difference(node_state_set)
elif transformation=="deltran":
min_cost_states_new=min_cost_states.intersection(node_state_set)
if len(min_cost_states_new)==0:
min_cost_states=min_cost_states
else:
min_cost_states=min_cost_states_new
possible_states=list(min_cost_states)
shuffle(possible_states)
daughter_data.comment["branch_colour"]=possible_states[0]
treeObject.node(daughter).set_data(daughter_data)
sankoff_second_traversal(treeObject, daughter, transformation=transformation)
return treeObject
def print_node_sankoff(node):
for daughter in treeObject.node(node).get_succ():
print_node_sankoff(daughter)
if "branch_colour" in treeObject.node(node).get_data().comment:
print node, treeObject.node(node).get_data().comment["branch_colour"]
else:
print node, "unknown"
def print_count_of_changes_to_state(treeObject):
node=treeObject.root
changes={}
print "here"
def get_node_change(node):
for daughter in treeObject.node(node).get_succ():
if treeObject.node(node).get_data().comment["branch_colour"]!= treeObject.node(daughter).get_data().comment["branch_colour"]:
if not treeObject.node(daughter).get_data().comment["branch_colour"] in changes:
changes[treeObject.node(daughter).get_data().comment["branch_colour"]]=0
changes[treeObject.node(daughter).get_data().comment["branch_colour"]]+=1
get_node_change(daughter)
print_node_sankoff(node)
for change in changes:
print change, changes[change]
transition_matrix={}
make_transistion_matrix()
treeObject=sankoff(treeObject, treeObject.root)
treeObject=sankoff_second_traversal(treeObject, treeObject.root, transformation=transformation)
print_count_of_changes_to_state(treeObject)
return treeObject
####################################################
# Function to round floats to n significant digits #
####################################################
def round_to_n(x, n):
if n < 1:
raise ValueError("number of significant digits must be >= 1")
# Use %e format to get the n most significant digits, as a string.
format = "%." + str(n-1) + "e"
as_string = format % x
if x>=10 or x<=-10:
return int(float(as_string))
else:
return float(as_string)
##############################################################################################################
# Function to convert features with subfeatures (e.g. pseudogenes) to a list of locations of the subfeatures #
##############################################################################################################
def iterate_subfeatures(feature, locations):
if hasattr(feature, "sub_features") and len(feature.sub_features)>0:
for subfeature in feature.sub_features:
locations=iterate_subfeatures(subfeature, locations)
else:
locations.append((feature.location.start.position+1, feature.location.end.position+1))
return locations
####################################################
# Function to get the pixel width of a text string #
####################################################
def get_text_width(font, size, text):
c = Canvas(test,pagesize=pagesize)
length= c.stringWidth(str(text),font,size)
return length
###############################################################################
# Function to find the best font size for a string to fit in a certain length #
###############################################################################
def set_text_width(font, size, length, text):
c = Canvas(test,pagesize=pagesize)
strlen=float("Inf")
while strlen>length:
strlen= c.stringWidth(text,font,size)
size-=0.1
return size
##########################################
# Function to produce colours for a list #
##########################################
def get_colours_for_list(data_list, data_type="discrete", data_max=float("-Inf"), data_min=float("Inf"), direction="anticlockwise", start_angle=0.0, end_angle=240, start_saturation=0.8, end_saturation=0.8, start_value=0.9, end_value=0.9):
s_start=control.metadata_colour_start_saturation
s_end=control.metadata_colour_end_saturation
v_start=control.metadata_colour_start_value
v_end=control.metadata_colour_end_value
# start_angle=control.metadata_colour_start_angle
# end_angle=control.metadata_colour_end_angle
# direction=control.metadata_colour_direction
print start_angle, end_angle, direction
colour_db={}
if data_type=="continuous" and data_max==float("-Inf"):
data_max=max(data_list)
if data_type=="continuous" and data_min==float("Inf"):
data_min=min(data_list)
if direction=="clockwise" and start_angle>end_angle:
rotation_degrees=start_angle-end_angle
direction_multiplier=-1
elif direction=="clockwise":
rotation_degrees=start_angle+(360-end_angle)
direction_multiplier=-1
elif direction in ["anticlockwise", "counterclockwise"] and start_angle>end_angle:
rotation_degrees=(360-start_angle)+end_angle
direction_multiplier=1
else:
rotation_degrees=end_angle-start_angle
direction_multiplier=1
if len(data_list)==1 and data_type=="discrete":
colour_db[data_list[0]]=colors.Color(1, 0, 0)
elif len(data_list)==2 and data_type=="discrete":
colour_db[data_list[0]]=colors.Color(0, 0, 1)
colour_db[data_list[1]]=colors.Color(1, 0, 0)
elif data_type=="continuous":
for y, name in enumerate(data_list):
value=name
if value<data_min:
value=data_min
elif value>data_max:
value=data_max
proportion=((float(value)-data_min)/((data_max-data_min)))
h=(start_angle/360)+(direction_multiplier*(((proportion/360)*rotation_degrees)))
v=v_start+(proportion*(v_end-v_start))
s=s_start+(proportion*(s_end-s_start))
red, green, blue = hsv_to_rgb(h,s,v)
colour_db[name]=colors.Color(float(red), float(green), float(blue))
elif len(data_list)>2:
if data_type=="discrete":
for y, name in enumerate(data_list):
proportion=(float(y)/(len(data_list)-1))
h=(start_angle/360)+(direction_multiplier*(((proportion/360)*rotation_degrees)))
v=v_start+(proportion*(v_end-v_start))
s=s_start+(proportion*(s_end-s_start))
red, green, blue = hsv_to_rgb(h,s,v)
colour_db[name]=colors.Color(float(red), float(green), float(blue))
return colour_db
##############################################
# Function to convert RGB ints to RGB tuples #
##############################################
def rgbint2rgbtuple(RGBint):
blue = RGBint & 255
green = (RGBint >> 8) & 255
red = (RGBint >> 16) & 255
return (red, green, blue)
def hex_to_rgb(value):
value = value.lstrip('#')
lv = len(value)
return tuple(int(value[i:i+lv/3], 16) for i in range(0, lv, lv/3))
##############################################
# Function toread a tree file using dendropy #
##############################################
def read_dendropy_tree(treefile):
def log_branchlengths(t):
print "Log transforming branch lengths"
for node in t.postorder_node_iter():
if node.edge_length!=None:
node.edge_length=log(node.edge_length+1)
#Some fixes to dendropy to make it read annotations properly
dendropy.dataio.nexustokenizer.NHX_COMMENT_FIELD_PATTERN = re.compile(r'(.+?)=({.+?}|.+?)(,|$)')
dendropy.dataio.nexustokenizer.FIGTREE_COMMENT_FIELD_PATTERN = re.compile(r'(.+?)=({.+?}|.+?)(,|$)')
#And to reroot properly if the tree is already rooted on the midpoint
def reroot_at_midpoint(self, update_splits=False, delete_outdegree_one=True):
"""
Reroots the tree at the the mid-point of the longest distance between
two taxa in a tree.
Sets the rooted flag on the tree to True.
If `update_splits` is True, then the edges' `split_bitmask` and the tree's
`split_edges` attributes will be updated.
If the *old* root of the tree had an outdegree of 2, then after this
operation, it will have an outdegree of one. In this case, unless
`delete_outdegree_one` is False, then it will be
removed from the tree.
"""
from dendropy import treecalc
pdm = treecalc.PatristicDistanceMatrix(self)
n1, n2 = pdm.max_dist_nodes
plen = float(pdm.max_dist) / 2
mrca_node = pdm.mrca(n1.taxon, n2.taxon)
#assert mrca_node is self.mrca(taxa=[n1.taxon, n2.taxon])
#mrca_node = self.mrca(taxa=[n1.taxon, n2.taxon])
cur_node = n1
break_on_node = None # populated *iff* midpoint is exactly at an existing node
target_edge = None
head_node_edge_len = None
# going up ...
while cur_node is not mrca_node:
if str(cur_node.edge.length)==str(plen):
break_on_node = cur_node
#FIX
break #when find the midpoint, it should break the loop
elif cur_node.edge.length > plen:
target_edge = cur_node.edge
head_node_edge_len = plen #cur_node.edge.length - plen
plen = 0
break
elif cur_node.edge.length < plen:
plen -= cur_node.edge.length
cur_node = cur_node.parent_node
else:
print "Error midpoint rooting tree"
sys.exit()
assert break_on_node is not None or target_edge is not None
if break_on_node:
self.reseed_at(break_on_node, update_splits=False, delete_outdegree_one=delete_outdegree_one)
new_seed_node = break_on_node
else:
tail_node_edge_len = target_edge.length - head_node_edge_len
old_head_node = target_edge.head_node
old_tail_node = target_edge.tail_node
old_tail_node.remove_child(old_head_node)
new_seed_node = dendropy.dataobject.Node()
new_seed_node.add_child(old_head_node, edge_length=head_node_edge_len)
old_tail_node.add_child(new_seed_node, edge_length=tail_node_edge_len)
self.reseed_at(new_seed_node, update_splits=False, delete_outdegree_one=delete_outdegree_one)
self.is_rooted = True
if update_splits:
self.update_splits(delete_outdegree_one=False)
return self.seed_node
dendropy.dataobject.Tree.reroot_at_midpoint=reroot_at_midpoint
def _parse_taxlabels_statement(self, taxon_set=None):
"""
Processes a TAXLABELS command. Assumes that the file reader is
positioned right after the "TAXLABELS" token in a TAXLABELS command.
"""
if taxon_set is None:
taxon_set = self._get_taxon_set()
token = self.stream_tokenizer.read_next_token()
while token != ';':
label = token
if len(taxon_set) >= self.file_specified_ntax and not self.attached_taxon_set:
raise self.too_many_taxa_error(taxon_set=taxon_set, label=label)
taxon = taxon_set.require_taxon(label=label)
self.stream_tokenizer.store_comment_metadata(taxon)
self.stream_tokenizer.store_comments(taxon)
token = self.stream_tokenizer.read_next_token()
dendropy.dataio.nexusreader_py.NexusReader._parse_taxlabels_statement=_parse_taxlabels_statement
#Try opening the tree using various schemas
opened=False
for treeschema in ["nexus", "newick"]:#["beast-summary-tree", "nexus", "newick"]:
try:
t = dendropy.Tree.get_from_path(treefile, schema=treeschema, as_rooted=True, preserve_underscores=True, case_insensitive_taxon_labels=False, set_node_attributes=True, extract_comment_metadata=True)
opened=True
t.schema=treeschema
break
except dendropy.utility.error.DataParseError:
continue
except ValueError as e:
print "Encountered ValueError while trying to read tree file as", treeschema+":", e
continue
if not opened:
print "Failed to open tree file"
sys.exit()
# for node in t.postorder_node_iter():
# print node.label, node.edge.length
#log the branch lengths if the option has been chosen
if options.log_branches:
log_branchlengths(t)
#some code here to make the branch lengths equal - not actually possible to choose this yet
t.draw_scale=True
if options.branch_labels=="brlen":
t.draw_scale=False
equal_branches=False
if equal_branches:
for node in t.postorder_node_iter():
if node.edge_length!=None:
node.edge_length=0.1
t.draw_scale=False
#Midpoint root if the option is selected
if options.midpoint:
print "Midpoint rooting tree"
if t.schema=="beast-summary-tree":
print "Warning: Midpoint rooting a BEAST tree may destroy temporal information represented in node positions"
t.reroot_at_midpoint(update_splits=True)
#Ladderise the tree if the option is selected
if options.ladderise=="left":
print "ladderising tree to the left"
#ladderise the tree right
t.ladderize(ascending=False)
elif options.ladderise=="right":
print "ladderising tree to the right"
#ladderise the tree right
t.ladderize(ascending=True)
#Make sure the tree is rooted on an edge rather than a node
if t.is_unrooted:
print "Tree is unrooted. Rooting it now."
t.is_rooted=True
t.update_splits(delete_outdegree_one=False)
root = t.seed_node
root_children = root.child_nodes()
if len(root_children) != 2:
print "Tree rooted at node. Rerooting on first edge from that node."
t.reroot_at_edge(root_children[0].edge, update_splits=True)
#print the tree in ascii as a cladogram
#print(t.as_ascii_plot())
#print the tree in ascii including branch lengths
#print(t.as_ascii_plot(plot_metric='length'))
for node in t.postorder_node_iter():
if node.edge_length==None:
node.edge_length=0
for node in t.postorder_node_iter():
for x, a in enumerate(node.annotations):
if isinstance(a.value, str):
a.value=a.value.replace('"','')
try:
node.annotations[x].value=float(a.value)
except:
node.annotations[x].value=a.value
# print node.annotations[x]
elif isinstance(a.value, list):
for y in xrange(len(a.value)):
if isinstance(a.value[y], str):
a.value[y]=a.value[y].replace('"','')
node.annotations[x].value[y]=a.value[y]
try:
node.annotations[x].value=map(float,node.annotations[x].value)
except:
if a.name=="!hilight" and len(a.value)==3 and len(a.value[2])>1 and a.value[2][0]=="#":
try:
rgbint=int(a.value[2][1:])
r,g,b=rgbint2rgbtuple(rgbint)
except:
try:
r,g,b=hex_to_rgb(a.value[2])
except:
print a.value[1:]
break
node.annotations[x].name="Figtree_hilight"
node.annotations[x].value=colors.Color(float(r)/255,float(g)/255,float(b)/255, 0.5)
if isinstance(a.value, str):
if a.name=="!color" and len (a.value)>1 and a.value[0]=="#":
try:
rgbint=int(a.value[1:])
r,g,b=rgbint2rgbtuple(rgbint)
if g!=0:
print r,g,b, a.value[1:]
except:
try:
r,g,b=hex_to_rgb(a.value)
except:
print "Figtree hilight value not a valid colour:", a.value[1:]
break
node.annotations[x].name="Figtree_colour"
node.annotations[x].value=colors.Color(float(r)/255,float(g)/255,float(b)/255)
#parse leaf annotation information
for leaf in t.leaf_iter():
#print leaf.taxon
if hasattr(leaf, "taxon"):
if hasattr(leaf.taxon, "annotations"):
for x, a in enumerate(leaf.taxon.annotations):
if isinstance(a.value, str):
a.value=a.value.replace('"','')
try:
leaf.taxon.annotations[x].value=float(a.value)
except:
leaf.taxon.annotations[x].value=a.value
elif isinstance(a.value, list):
for y in xrange(len(a.value)):
if isinstance(a.value[y], str):
a.value[y]=a.value[y].replace('"','')
leaf.taxon.annotations[x].value[y]=a.value[y]
try:
leaf.taxon.annotations[x].value=map(float,leaf.taxon.annotations[x].value)
except:
continue
# print leaf.taxon.label, a.name, a.value
if isinstance(a.value, str) and a.name=="!color" and len (a.value)>1 and a.value[0]=="#":
try:
rgbint=int(a.value[1:])
r,g,b=rgbint2rgbtuple(rgbint)
except:
try:
r,g,b=hex_to_rgb(a.value)
except:
print a.value[1:]
break
leaf.taxon.annotations[x].name="Figtree_colour"
leaf.taxon.annotations[x].value=colors.Color(float(r)/255,float(g)/255,float(b)/255)
#Check if tree is from BEAST (not sure best way to check this, but will check from height on root node)
root=t.seed_node
t.is_BEAST=False
if hasattr(root, "annotations"):
for a in root.annotations:
if a.name=="height" or a.name=="CI":
t.is_BEAST=True
break
if t.is_BEAST and options.date_separator!="":
min_leaf_sampling_date=float("Inf")
max_leaf_sampling_date=float("-Inf")
for leaf in t.leaf_iter():
try:
last_part_of_name=str(leaf.taxon).split(options.date_separator)[-1]
except:
"Found name that failed to split on", options.date_separator+":", leaf.taxon
try:
leaf.sampling_date=int(float(last_part_of_name))
if leaf.sampling_date>max_leaf_sampling_date:
max_leaf_sampling_date=leaf.sampling_date
if leaf.sampling_date<min_leaf_sampling_date:
min_leaf_sampling_date=leaf.sampling_date
except:
if last_part_of_name=="NA":
leaf.sampling_date=None
else:
"Failed to find date when splitting on", options.date_separator+":", leaf.taxon
if min_leaf_sampling_date!=float("Inf") and max_leaf_sampling_date!=float("-Inf"):
t.min_leaf_sampling_date=min_leaf_sampling_date
t.max_leaf_sampling_date=max_leaf_sampling_date
else:
print "Failed to find any dates for taxa"
if t.is_BEAST and options.most_recent>0:
t.max_leaf_sampling_date=options.most_recent
min_height=float("Inf")
max_height=float("-Inf")
for leaf in t.leaf_iter():
if hasattr(leaf, "annotations"):
for a in leaf.annotations:
if a.name=="height":
if a.value>max_height:
max_height=a.value
elif a.value<min_height:
min_height=a.value
t.min_leaf_sampling_date=options.most_recent-max_height
#colour branches by annotation
if options.colour_by_annotation!="":
annotation_list=[]
for node in t.postorder_node_iter():
if hasattr(node, "annotations"):
for a in node.annotations:
if a.name==options.colour_by_annotation and a.value not in annotation_list:
annotation_list.append(a.value)
annotation_list.sort()
colour_dictionary=get_colours_for_list(annotation_list, data_type="continuous", start_angle=240.0, end_angle=0.0)
for node in t.postorder_node_iter():
if hasattr(node, "annotations"):
for a in node.annotations:
if a.name==options.colour_by_annotation and a.value in colour_dictionary:
node.edge_colour=colour_dictionary[a.value]
return t
def get_leaf_nodes(t):
leaves=[]
for leaf in t.leaf_iter():
leaves.append(leaf)
return leaves
def get_vertical_positions_of_leaves(t):
#Set the vertical position of the leaves
vert_scaling_factor=float(height-20)/leaf_count
count=1.0
for leaf in t.leaf_iter():
leaf.vertpos=vert_scaling_factor*count
count+=1
return
mycolours=[(1,0,0),(0,1,0),(0,0,1)]
for leaf in t.leaf_iter():
leaf.edge_colours=[set([mycolours[randrange(0, 3)]])]
postorder_node_list=[]
for node in t.postorder_node_iter():
postorder_node_list.append(node)
preorder_node_list=[]
for node in t.preorder_node_iter():
preorder_node_list.append(node)