-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
65 lines (49 loc) · 1.98 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import json
import wget
from rcsbsearch import Attr
import csv
def download(path):
url_download = "https://files.rcsb.org/download/" + assemblyid + ".pdb"
wget.download(url_download, path + assemblyid + '.pdb')
with open('mobidb_result.json') as data_file:
data = json.load(data_file)
with open('jobs-sruthi.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["", "protein_file", "start", "end", "acc", "name"])
row_num = 1
for i in data:
results = Attr(
"rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_accession").exact_match(
i["acc"]) \
.and_("rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_name").exact_match(
"UniProt") \
.and_("rcsb_entry_info.polymer_entity_count_protein").equals(1) \
.exec("entry")
is_found = False
final_assembly_id = None
for assemblyid in results:
is_found = True
download('/Users/sruthikurada/Documents/MIT PRIMES/Ergothionine/')
final_assembly_id = assemblyid
break
if not is_found:
results = Attr(
"rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_accession").exact_match(
i["acc"]) \
.and_("rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_name").exact_match(
"UniProt") \
.exec("entry")
for assemblyid in results:
is_found = True
download('/Users/sruthikurada/Documents/MIT PRIMES/Ergothionine/')
final_assembly_id = assemblyid
break
protein_file = final_assembly_id + '.pdb'
start = 1
end = i["length"]
acc = i["acc"]
name = i["name"]
with open('jobs-sruthi.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow([row_num, protein_file, start, end, acc, name])
row_num += 1