1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ ###############################>GENERAL-INFORMATIONS<###############################
4
+ """
5
+ Build in Python 3.6.5+
6
+
7
+ Author:
8
+ Filipe Dezordi
9
+ zimmer.filipe@gmail.com
10
+ https://dezordi.github.io/
11
+
12
+ """
13
+ ###############################>LIBRARIES<###############################
14
+ import pandas as pd
15
+ import numpy as np
16
+ import argparse , re
17
+ import seaborn as sns
18
+ import matplotlib .pyplot as plt
19
+ import matplotlib .style as style
20
+ from matplotlib .colors import ListedColormap
21
+
22
+
23
+ ###############################>ARGUMENTS<###############################
24
+ parser = argparse .ArgumentParser (description = 'Create beeswarm plots of .tax files' )
25
+ parser .add_argument ("-in" , "--input" , help = "TSV file (mos_tax.tsv)" , required = True )
26
+ parser .add_argument ("-md" ,"--mode" ,help = "Create plots about Genus, Subgenus, Specie or Year? default = Genus" ,default = 'Genus' , choices = ['Genus' ,'Subgenus' ,'Specie' ,'Year' ])
27
+ parser .add_argument ("-st" ,"--specifictax" ,help = "Pass specific taxon to generate plots, e.g. Aedes Culex" ,nargs = '+' )
28
+ parser .add_argument ("-pt" ,"--plottype" ,help = "Choose the type of graph, default = Bar" ,default = 'Bar' , choices = ['Donut' ,'Bar' ,'Hist' ])
29
+ parser .add_argument ("-gb" ,"--groupby" ,help = "Treshold value to group taxon in 'Others' category, default = 30" ,default = int (30 ), type = int )
30
+ parser .add_argument ("-cm" ,"--colormap" ,help = "Choose seaborn colors, default=rainbow" ,default = 'rainbow' )
31
+ args = parser .parse_args ()
32
+ input_file = args .input
33
+ plot_mode = args .mode
34
+ plot_type = args .plottype
35
+ specific_tax = args .specifictax
36
+ group_by = args .groupby
37
+ color_palette = args .colormap
38
+ ###############################>SNS-STYLE<###############################
39
+ sns .set (style = "ticks" )
40
+ my_cmap = ListedColormap (sns .color_palette (color_palette ))
41
+ ###############################>Functions<###############################
42
+ def taxonomy_distribuition (tsv_file ,plot_type ):
43
+ if plot_type == 'Bar' :
44
+ tax_count = df [plot_mode ].value_counts ()
45
+ df2 = pd .DataFrame (tax_count )
46
+ df2 .columns = ['Number' ]
47
+ bar_plot = sns .barplot (x = df2 .index .values , y = 'Number' , data = df2 ,palette = color_palette )
48
+ sns .despine (fig = None , top = True , right = True , left = False , bottom = False , offset = None , trim = False )
49
+ bar_plot .set_ylabel ('Number of species' )
50
+ bar_plot .set_xlabel (plot_mode )
51
+ bar_plot .set_xticklabels (bar_plot .get_xticklabels (),rotation = 90 )
52
+ bar_plot .annotate ('Sources: plot(https://github.com/dezordi/mosquitotax), taxonomy(http://mosquito-taxonomic-inventory.info/).' , xy = (1 , 0 ), xycoords = 'axes fraction' , fontsize = 6 , xytext = (0 , 265 ), textcoords = 'offset points' ,ha = 'right' , va = 'top' )
53
+ plt .savefig (input_file + '.bar_plot.pdf' ,dpi = 300 ,bbox_inches = 'tight' )
54
+ plt .clf ()
55
+ if plot_type == 'Donut' :
56
+ tax_count = df [plot_mode ].value_counts ()
57
+ df2 = pd .DataFrame (tax_count )
58
+ df2 .columns = ['Number' ]
59
+ my_circle = plt .Circle ((0 ,0 ), 0.5 , color = 'white' )
60
+ donut_plot = df2 .plot .pie (y = 'Number' , cmap = my_cmap ,radius = 1.7 ,labels = None )
61
+ donut_plot .set_ylabel ('' )
62
+ df2 .reset_index (level = 0 , inplace = True )
63
+ list_ = [df2 .columns .values .tolist ()] + df2 .values .tolist ()
64
+ labels_list = ['' .join (str (x )) for x in list_ ]
65
+ del labels_list [0 ]
66
+ labels = []
67
+ for i in labels_list :
68
+ i = re .sub (r"\['" ,'' ,i )
69
+ i = re .sub (r"', " ,' ' ,i )
70
+ i = re .sub (r"\]" ,'' ,i )
71
+ labels .append (i )
72
+ donut_plot .legend (loc = 'center left' , bbox_to_anchor = (1.2 , 0.5 ), ncol = 1 , fancybox = True , prop = {'size' : 12 },title = plot_mode + ' and Nº species' , labels = labels )
73
+ donut_plot .annotate ('Sources: plot(https://github.com/dezordi/mosquitotax), taxonomy(http://mosquito-taxonomic-inventory.info/).' , xy = (1 , 0 ), xycoords = 'axes fraction' , fontsize = 8 , xytext = (30 , - 65 ), textcoords = 'offset points' ,ha = 'right' , va = 'top' )
74
+ p = plt .gcf ()
75
+ p .gca ().add_artist (my_circle )
76
+ plt .savefig (input_file + '.donut_plot.pdf' ,dpi = 300 ,bbox_inches = 'tight' )
77
+ plt .clf ()
78
+ if plot_type == "Hist" :
79
+ df .dropna (inplace = True )
80
+ df ['Year' ].loc [(df ['Year' ] <= 1800 )] = 1800
81
+ df ['Year' ].loc [(df ['Year' ] > 1800 ) & (df ['Year' ] <= 1850 )] = 1850
82
+ df ['Year' ].loc [(df ['Year' ] > 1850 ) & (df ['Year' ] <= 1900 )] = 1900
83
+ data = df ['Year' ].tolist ()
84
+ data = list (map (int , data ))
85
+ dist_plot = sns .distplot (data )
86
+ plt .savefig (input_file + '.hist_plot.pdf' ,dpi = 300 ,bbox_inches = 'tight' )
87
+ plt .clf ()
88
+
89
+
90
+
91
+ if __name__ == '__main__' :
92
+ '''
93
+ Main Routine
94
+ This block of code is executed, whenever the script
95
+ is started from the command line.
96
+ '''
97
+
98
+ ###############################>DATAFRAME<###############################
99
+ df = pd .read_csv (input_file , sep = '\t ' ,header = 0 )
100
+ if specific_tax != None :
101
+ df = df .loc [df [plot_mode ].isin (specific_tax )]
102
+ tax_count = df [plot_mode ].value_counts ()
103
+ tax_low = tax_count [tax_count <= group_by ]
104
+ tax_low_list = tax_low .index
105
+ for i in tax_low_list :
106
+ df [plot_mode ] = df [plot_mode ].replace ({i :'Others' })
107
+
108
+ taxonomy_distribuition (input_file ,plot_type )
0 commit comments