@@ -21,6 +21,7 @@ class Papyrus(DataSource):
21
21
Attributes:
22
22
DEFAULT_DIR (str): default directory for Papyrus database and the extracted data
23
23
dataDir (str): storage directory for Papyrus database and the extracted data
24
+ _papyrusDir (str): directory where the Papyrus database is located, os.path.join(dataDir, "papyrus")
24
25
version (list): Papyrus database version
25
26
descriptors (list, str, None): descriptors to download if not already present
26
27
stereo (bool): use version with stereochemistry
@@ -60,6 +61,7 @@ def __init__(
60
61
use only plusplus version, only high quality data
61
62
"""
62
63
self .dataDir = data_dir
64
+ self ._papyrusDir = os .path .join (self .dataDir , "papyrus" )
63
65
self .version = version
64
66
self .descriptors = descriptors
65
67
self .stereo = stereo
@@ -73,8 +75,8 @@ def download(self):
73
75
Only newly requested data is downloaded. Remove the files if you want to
74
76
reload the data completely.
75
77
"""
76
- os .makedirs (self .dataDir , exist_ok = True )
77
- if not os .path . exists ( os . path . join ( self .dataDir , "papyrus" )):
78
+ if not os .path . exists (self ._papyrusDir ):
79
+ os .makedirs ( self .dataDir , exist_ok = True )
78
80
logger .info ("Downloading Papyrus database..." )
79
81
download_papyrus (
80
82
outdir = self .dataDir ,
@@ -87,10 +89,8 @@ def download(self):
87
89
)
88
90
else :
89
91
logger .info (
90
- f"Papyrus database already"
91
- f" downloaded. Using existing data. "
92
- f"Delete the following folder to reload the data: "
93
- f"{ os .path .join (self .dataDir , 'papyrus' )} "
92
+ "Papyrus database already downloaded. Using existing data. "
93
+ f"Delete the following folder to reload the data: { self ._papyrusDir } "
94
94
)
95
95
96
96
def getData (
@@ -121,7 +121,7 @@ def getData(
121
121
Returns:
122
122
MolculeTable: the filtered data set
123
123
"""
124
- logger .debug (f "Getting data from Papyrus data source..." )
124
+ logger .debug ("Getting data from Papyrus data source..." )
125
125
assert acc_keys is not None , "Please provide a list of accession keys."
126
126
name = name or "papyrus"
127
127
self .download ()
@@ -143,7 +143,7 @@ def getData(
143
143
plusplus = self .plusplus ,
144
144
papyrus_dir = self .dataDir ,
145
145
)
146
- logger .debug (f "Finished filtering Papyrus data set." )
146
+ logger .debug ("Finished filtering Papyrus data set." )
147
147
logger .debug (f"Creating MoleculeTable from '{ path } '." )
148
148
ret = MoleculeTable .fromTableFile (name , path , store_dir = output_dir , ** kwargs )
149
149
logger .debug (f"Finished creating MoleculeTable from '{ path } '." )
@@ -175,7 +175,9 @@ def getProteinData(
175
175
if os .path .exists (path ) and use_existing :
176
176
return pd .read_table (path )
177
177
else :
178
- protein_data = papyrus_scripts .read_protein_set (version = self .version )
178
+ protein_data = papyrus_scripts .read_protein_set (
179
+ source_path = self .dataDir , version = self .version
180
+ )
179
181
protein_data ["accession" ] = protein_data ["target_id" ].apply (
180
182
lambda x : x .split ("_" )[0 ]
181
183
)
0 commit comments