-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathRelabelChains.py
executable file
·128 lines (103 loc) · 5.88 KB
/
RelabelChains.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#! /usr/bin/env python
# Copyright (c) 2015 William Lees
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
# Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# pdb files created from an Amber trajectory do not contain chain IDs
# this script will reconstruct the chain IDs by consulting a reference pdb file
__author__ = 'William Lees'
__docformat__ = "restructuredtext en"
import sys
import argparse
def main(argv):
parser = argparse.ArgumentParser(description='Label the chains in an unlabelled pdb file, by consulting a reference.')
parser.add_argument('infile', help='file to process')
parser.add_argument('outfile', help='file to produce')
parser.add_argument('reference', help='reference file, including chain labels')
parser.add_argument('-c', '--chain', help='unlabelled file contains the specified chain only')
parser.add_argument('-r', '--replace_md_res', help='replace CIS, HIS in file with CYX, HID etc in reference, if found', action='store_true')
parser.add_argument('-d', '--delete_unreferenced', help='delete records found past the end of the reference file', action='store_true')
args = parser.parse_args()
res = []
old_resnum = -1
# Read reference file
with open(args.reference, "r") as rf:
for line in rf:
if line[0:6] == "ATOM ":
resnum = line[22:27]
resname = line[17:20]
chain = line[21]
# Add each residue to the list exactly once
if resnum != old_resnum:
if args.chain is None or chain == args.chain:
res.append([resnum, resname, chain])
old_resnum = resnum
res_iter = iter(res)
old_resnum = ""
old_ref_resnum = None
old_chain = ""
ref_chain_startnum = ""
inf_chain_startnum = ""
ref_res = None
last_warning = ''
error = None
print "Chain Ref. Start Ref. End Inf. Start Inf. End"
with open(args.infile, "r") as fi, open(args.outfile, "w") as fo:
try:
for line in fi:
if line[0:6] == "ATOM ":
resnum = line[22:27]
resname = line[17:20]
# For each residue, exactly once...
if resnum != old_resnum:
old_ref_resnum = ref_res[0] if ref_res else None
# Find the corresponding reference residue
ref_res = res_iter.next()
# Track chain changes
if old_chain != ref_res[2]:
if old_chain != "":
print "%5s %11s %8s %10s %8s" % (old_chain, ref_chain_startnum, old_ref_resnum, inf_chain_startnum, old_resnum)
ref_chain_startnum = ref_res[0]
inf_chain_startnum = resnum
old_chain = ref_res[2]
old_resnum = resnum
l = list(line)
# Here the algorithm expects that the infile and the reference file have the same ordering of the
# residue names and just differ in their chain and residue numbering (this also means that the chain
# ordering has to be the same and also that no new residues have been added in the ATOM records
# which might be the case with modelled loops)
# if there is a different order we output the following warnings or changes the resname to the pdb
# convention
if resname != ref_res[1]:
if args.replace_md_res and resname == 'HIS' and ref_res[1] in ['HID', 'HIE']:
l[17:20] = ref_res[1]
elif args.replace_md_res and resname == 'CYS' and ref_res[1] == 'CYX':
l[17:20] = ref_res[1]
else:
if resnum != last_warning:
print "Warning: at residue %s in infile, residue %s in infile differs from %s in reference." % (resnum, resname, ref_res[1])
last_warning = resnum
# Use reference residue number and chain
l[22:27] = ref_res[0]
l[21] = ref_res[2]
line = "".join(l)
fo.write(line)
except StopIteration:
if args.delete_unreferenced:
error = '\nInput file contains additional residues past the end of the reference file (possibly solvents): these have been omitted.'
fo.write('END')
else:
for line in fi:
fo.write(line)
print "%5s %11s %8s %10s %8s" % (old_chain, ref_chain_startnum, old_ref_resnum, inf_chain_startnum, old_resnum)
if error:
print error
if __name__ == "__main__":
main(sys.argv)