-
Notifications
You must be signed in to change notification settings - Fork 0
/
protein_alignment.py
103 lines (75 loc) · 3.2 KB
/
protein_alignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pymol
import os
# pymol -cq this_script_name.py.
mutation_id_to_mutation_aa = dict()
with open('./data/mutation_info.tsv', 'r') as f:
line = f.readline().strip()
while line:
mutation_id, mutations_aa = line.split('\t')
mutations_aa = mutations_aa.split(';')
pos = []
for mutation_aa in mutations_aa:
pos.append(mutation_aa[3:-1])
line = f.readline().strip()
mutation_id_to_mutation_aa[mutation_id] = pos
f.close()
cwd = os.getcwd()
structures_path = os.path.join(cwd, 'data/structures')
list_structure_3d = os.listdir(structures_path)
dict_structure_3d = dict()
for structure in list_structure_3d:
if structure.count('_') > 1:
gene_name = structure.split('_')
gene_name = f'{gene_name[0]}_{gene_name[1]}'
else:
gene_name = structure[:-4]
if gene_name not in dict_structure_3d:
dict_structure_3d[gene_name] = [structure]
else:
dict_structure_3d[gene_name].append(structure)
def super_prots():
path = 'data/structures/'
for protein in dict_structure_3d:
prot_1 = f'{path}{protein}.pdb'
for protein_mut in dict_structure_3d[protein]:
if '_COSM' in protein_mut:
prot_2 = f'{path}{protein_mut}'
mutation_id = prot_2.split('_')[-1][:-4]
print(prot_2)
pos = mutation_id_to_mutation_aa[mutation_id]
pos = '+'.join(pos)
# If you are using already downloaded pdb files, use load instead:
pymol.cmd.load(prot_1, 'prot_1')
pymol.cmd.load(prot_2, 'prot_2')
pymol.cmd.select(f'SNP', f'id {pos}')
pymol.cmd.hide('all')
pymol.cmd.set('ray_opaque_background', 0)
# Show cartoon and paint them different
pymol.cmd.show('cartoon', 'prot_1')
pymol.cmd.color('orange', 'prot_1')
pymol.cmd.show('cartoon', 'prot_2')
pymol.cmd.color('lightblue', 'prot_2')
pymol.cmd.show('sphere', 'SNP')
pymol.cmd.color('red', 'SNP')
# Align both proteins
pymol.cmd.super('prot_1', 'prot_2')
# Improve image quality, and get png.
pymol.cmd.set('depth_cue', 0)
pymol.cmd.set('spec_reflect', 0)
pymol.cmd.set('cartoon_sampling', 15)
pymol.cmd.set('ribbon_sampling', 15)
pymol.cmd.set('antialias', 2)
pymol.cmd.space('cmyk')
# This creates high-quality images, but might take a long time to
# process. Adjust parameters accordingly.
alignment_name = 'aligned_' + prot_2.split('/')[-1][:-4]
# pymol.cmd.save(f'./data/alignments/{alignment_name}.pdb', state=0)
print(alignment_name)
pymol.cmd.png(f'./data/alignments/{alignment_name}.png',
width=500,
dpi=100,
ray=1
)
pymol.cmd.reinitialize()
# run the alignment
super_prots()