-
Notifications
You must be signed in to change notification settings - Fork 2
/
PhosMHCpred.py
executable file
·158 lines (132 loc) · 4.29 KB
/
PhosMHCpred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
######
##
##
## PhosMHCpred is a predictor for HLA-I - phosphorylated ligand interactions.
##
## CITATION:
## If you use PhosMHCpred in a publication, please cite:
## Solleder et al., BioRxiv (2018).
##
## FOR-PROFIT USERS:
## If you plan to use PhosMHCpred or any data provided with the script in any for-profit
## application, you are required to obtain a separate license. To do so, please contact
## eauffarth@licr.org or lfoit@licr.org at the Ludwig Institute for Cancer Research Ltd.
##
## CONTACT:
## Marthe Solleder (marthe.solleder@unil.ch) and David Gfeller (david.gfeller@unil.ch).
##
##
######
## import packages
import argparse, os, sys
from operator import itemgetter
import numpy as np
## import function for predictor
from lib.PhosMHCpred_score import main as scoreF
## path to predictor directory
path = 'Path/To/Predictor/'
pathTD = path + '/trainingData/'
## inpput arguments
parser = argparse.ArgumentParser()
parser.add_argument( '-p',
'--peptides',
help='Give a list of peptides for prediction. \
Accpted format is a text file with one peptide per line or comma-separated in \
the command line (i.e. ASEILPPtL,EMDPVtQLY,FTDIEtLKQ)',
required=True )
parser.add_argument( '-a',
'--alleles',
help='Give a list of alleles for which the peptides should be predicted, \
text file with linewise list of alleles or comma-separated in the command line.',
required=True )
parser.add_argument( '-o',
'--output',
help='Location for output file. Default: results.txt is stored in current directory.',
default='' )
args = parser.parse_args()
## read peptides
listPeptides = []
if '.txt' in args.peptides:
listPeptides = open(args.peptides, 'r').read().split('\n')
listPeptides = list(filter(None, listPeptides))
else:
listPeptides = args.peptides.split(',')
## checking residues
aa = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','s','t','y']
for p in listPeptides:
if len(p) != 9:
print 'Only 9-mer peptides allowed.'
print p, 'is length', len(p)
exit(1)
for r in p:
if r not in aa:
print 'Not supported residue present in peptides: ', r
print 'Allowed alphabet: ACDEFGHIKLMNPQRSTVWYsty.'
exit(1)
## length of peptides
length = str(len(listPeptides[0]))
## read / define alleles
listAlleles = []
if '.txt' in args.alleles:
listAlleles = open(args.alleles, 'r').read().split('\n')
listAlleles = list(filter(None, listAlleles))
else:
listAlleles = args.alleles.split(',')
listAlleles = sorted(listAlleles)
## check if requested alleles are in training data
allelesTD = open( pathTD + 'alleles.txt', 'r' ).read().split('\n')
for x in listAlleles:
if x not in allelesTD:
print 'Allele', x, 'not provided in training data...'
print 'Check alleles.txt in trainingData directory for allowed alleles.'
exit(1)
## path for output [optional]
if args.output == '':
pathOutput = './results.txt'
else:
pathOutput = args.output
## start predictions
print 'Predicting ... '
predictionResults = {}
predictionResultsRanks = {}
for hla in listAlleles:
print hla
lScores = []
## read pwm
pwm = np.loadtxt(pathTD + length + 'mers/' + hla + '_PWM.txt')
## calculate peptide score
for peptide in listPeptides:
if peptide not in predictionResults:
predictionResults[peptide] = {}
if hla not in predictionResults[peptide]:
predictionResults[peptide][hla] = 0
score = scoreF( peptide,
pwm )
predictionResults[peptide][hla] = score
lScores.append(score)
temp = np.asarray(lScores).argsort()
ranks = np.empty_like(temp)
ranks[temp] = np.arange(len(lScores))
for p in range(len(listPeptides)):
peptide = listPeptides[p]
if peptide not in predictionResultsRanks:
predictionResultsRanks[peptide] = {}
if hla not in predictionResultsRanks[peptide]:
predictionResultsRanks[peptide][hla] = 0
predictionResultsRanks[peptide][hla] = -1 * (ranks[p] - len(listPeptides))
## print results
output = open(pathOutput, 'w')
output.write('peptide')
for a in listAlleles:
output.write('\t' + a + '_score')
output.write('\t' + a + '_rank')
output.write('\n')
for p in listPeptides:
output.write(p)
for a in listAlleles:
output.write( '\t' + str(predictionResults[p][a]) )
output.write( '\t' + str(predictionResultsRanks[p][a]) )
output.write('\n')
output.close()
print '... done!'