-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLG_PLOT.py
executable file
·151 lines (100 loc) · 3.54 KB
/
LG_PLOT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#author=Tyler Fording
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def makeDataFrame(cpL_mother, cpL_father):
motherDict = makedict(cpL_mother)
fatherDict = makedict(cpL_father)
#print fatherDict
mother_twodlist = []
father_twodlist = []
for key in motherDict:
mother_twodlist.append([int(key), len(motherDict[key])])
mother_twodlist = sorted(mother_twodlist)
mother_markers = []
for i in mother_twodlist:
mother_markers.append(i[1])
for key in fatherDict:
father_twodlist.append([int(key), len(fatherDict[key])])
father_twodlist = sorted(father_twodlist)
father_markers = []
for i in father_twodlist:
father_markers.append(i[1])
groupnum = []
for i in range(1,65):
groupnum.append('Group #'+str(i))
#groupnum = [x for x in range(1,65)]
df = pd.DataFrame(columns=['Linkage Group', 'Mother', 'Father'])
df['Linkage Group'] = groupnum
#print mother_markers
df['Mother'] = mother_markers
#print len(father_markers)
df['Father'] = father_markers
barplot(df)
def barplot(df):
pos = list(range(len(df['Linkage Group'])))
width = 0.40
fig, ax = plt.subplots(figsize=(20, 10))
#First set of bars
plt.bar(pos,
# using df['mother'] data,
df['Mother'],
# of width
width,
# with alpha 0.5
alpha=.70,
# with color
color='#EE3224',
# with label the first value in first_name
label=df['Linkage Group'][0])
plt.bar([p + width for p in pos],
# using df['father'] data,
df['Father'],
# of width
width,
# with alpha 0.5
alpha=.70,
# with color
color='#F78F1E',
# with label the second value in first_name
label=df['Linkage Group'][1])
ax.set_ylabel('Number of Markers')
# Set the chart's title
ax.set_title('Number of Makers Per Linkage Group')
# Set the position of the x ticks
ax.set_xticks([p + 2 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(df['Linkage Group'], rotation=45, ha='right')
# Set x and y limits
plt.xlim(min(pos) - width, max(pos) + width * 4)
plt.ylim([0, max(df['Mother']+6000)])
plt.legend(['Mother', 'Father'], loc='upper right')
plt.axvline(x=22.90, color='k')
plt.grid()
plt.show()
def makedict(cpL_path):
'''
This function takes a path to a tsv(CHR POS LG) and creates a dictionary. The key=LG and value=whole line
:param cpL_path: PATH to cpL file
:return: dictionary
'''
fh = open(cpL_path, 'r')
dataDict = {}
for line in fh:
line = line.strip('\r')
line = line.strip('\n')
wline = line
line = line.split('\t')
if line[2] in dataDict:
dataDict[line[2]].append(wline)
else:
dataDict.update({line[2]: line})
return dataDict
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="VCF to pre-makeped")
parser.add_argument("cpL_mother", type=str, help="Enter file path to a cpL file", nargs='?') #cpL refers to CHR POS LG (.tsv)
parser.add_argument("cpL_father", type=str, help="Enter file path to the cpL to be parsed", nargs='?')
#parser.add_argument("new_vcf", type=str, help="Enter file name for output", nargs='?')
args = parser.parse_args()
makeDataFrame(args.cpL_mother, args. cpL_father)