Skip to content

Commit

Permalink
support fetch pdb method
Browse files Browse the repository at this point in the history
  • Loading branch information
minhuanli committed Sep 26, 2023
1 parent ab7e57c commit 8f3f936
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 2 deletions.
2 changes: 1 addition & 1 deletion SFC_Torch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Top level API
from .Fmodel import SFcalculator
from .io import PDBParser
from .io import PDBParser, fetch_pdb

# Suboodules
from . import utils
Expand Down
66 changes: 66 additions & 0 deletions SFC_Torch/io.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import gemmi
import torch
import numpy as np
import urllib.request, os
from tqdm import tqdm
import pandas as pd

from .utils import try_gpu

Expand Down Expand Up @@ -276,3 +279,66 @@ def from_atom_slices(self, atom_slices, inplace=False):
def savePDB(self, savefilename, include_header=True):
structure = self.to_gemmi(include_header=include_header)
structure.write_pdb(savefilename)

def fetch_pdb(idlist, outpath):
'''
Fetch pdb and mtz files from Protein Data Bank, with static urllib
Parameters
----------
idlist : [str]
List of PDB ids
outpath : str
Returns
-------
DataFrame of fetch stats
pdb files will be saved at outpath/models/
mtz files will be saved at outpath/reflections/
Record csv file will be saved at outpath/fetchpdb.csv
'''
model_path = os.path.join(outpath, 'models/')
reflection_path = os.path.join(outpath, 'reflections/')
for folder in [model_path, reflection_path]:
if os.path.exists(folder):
print(f"{folder:<80}" + f"{'already exists': >20}")
else:
os.makedirs(folder)
print(f"{folder:<80}" + f"{'created': >20}")

codes = []
with_pdb = []
with_mtz = []
for pdb_code in tqdm(idlist):
valid_code = pdb_code.lower()

pdblink = "https://files.rcsb.org/download/" + valid_code.upper() + ".pdb"
mtzlink = "https://edmaps.rcsb.org/coefficients/" + valid_code + ".mtz"
codes.append(valid_code)
try:
urllib.request.urlretrieve(pdblink, os.path.join(model_path, valid_code+".pdb"))
with_pdb.append(1)
except:
with_pdb.append(0)
try:
urllib.request.urlretrieve(mtzlink, os.path.join(reflection_path, valid_code+".mtz"))
with_mtz.append(1)
except:
with_mtz.append(0)

stat_df = pd.DataFrame({
"code" : codes,
"with_pdb" : with_pdb,
"with_mtz" : with_mtz
})
stat_df.to_csv(os.path.join(outpath, "fetchpdb.csv"))
return stat_df







11 changes: 10 additions & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import gemmi
import numpy as np

from SFC_Torch.io import PDBParser
from SFC_Torch.io import PDBParser, fetch_pdb


def test_setdata(data_pdb):
Expand Down Expand Up @@ -80,3 +80,12 @@ def test_fromatomslices(data_pdb, inplace):
assert b.cell == a.cell
assert b.spacegroup.hm == a.spacegroup.hm
assert len(b.atom_pos) == 55

def test_fetchpdb():
df = fetch_pdb(['4lZt', '1cTS'], outpath='../dev/')
assert df['code'].tolist() == ['4lzt', '1cts']
assert df['with_pdb'].tolist() == [1, 1]
assert df['with_mtz'].tolist() == [1, 0]
assert exists("../dev/models/4lzt.pdb")
assert exists("../dev/models/1cts.pdb")
assert exists("../dev/reflections/4lzt.mtz")

0 comments on commit 8f3f936

Please sign in to comment.