Skip to content

Commit 9132cc1

Browse files
committed
fix read_protein_set to use source_path=self.dataDir
1 parent 0d4c498 commit 9132cc1

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

qsprpred/data/sources/papyrus/papyrus_class.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class Papyrus(DataSource):
2121
Attributes:
2222
DEFAULT_DIR (str): default directory for Papyrus database and the extracted data
2323
dataDir (str): storage directory for Papyrus database and the extracted data
24+
_papyrusDir (str): directory where the Papyrus database is located, os.path.join(dataDir, "papyrus")
2425
version (list): Papyrus database version
2526
descriptors (list, str, None): descriptors to download if not already present
2627
stereo (bool): use version with stereochemistry
@@ -60,6 +61,7 @@ def __init__(
6061
use only plusplus version, only high quality data
6162
"""
6263
self.dataDir = data_dir
64+
self._papyrusDir = os.path.join(self.dataDir, "papyrus")
6365
self.version = version
6466
self.descriptors = descriptors
6567
self.stereo = stereo
@@ -73,8 +75,8 @@ def download(self):
7375
Only newly requested data is downloaded. Remove the files if you want to
7476
reload the data completely.
7577
"""
76-
os.makedirs(self.dataDir, exist_ok=True)
77-
if not os.path.exists(os.path.join(self.dataDir, "papyrus")):
78+
if not os.path.exists(self._papyrusDir):
79+
os.makedirs(self.dataDir, exist_ok=True)
7880
logger.info("Downloading Papyrus database...")
7981
download_papyrus(
8082
outdir=self.dataDir,
@@ -87,10 +89,8 @@ def download(self):
8789
)
8890
else:
8991
logger.info(
90-
f"Papyrus database already"
91-
f" downloaded. Using existing data. "
92-
f"Delete the following folder to reload the data: "
93-
f"{os.path.join(self.dataDir, 'papyrus')}"
92+
"Papyrus database already downloaded. Using existing data. "
93+
f"Delete the following folder to reload the data: {self._papyrusDir}"
9494
)
9595

9696
def getData(
@@ -121,7 +121,7 @@ def getData(
121121
Returns:
122122
MolculeTable: the filtered data set
123123
"""
124-
logger.debug(f"Getting data from Papyrus data source...")
124+
logger.debug("Getting data from Papyrus data source...")
125125
assert acc_keys is not None, "Please provide a list of accession keys."
126126
name = name or "papyrus"
127127
self.download()
@@ -143,7 +143,7 @@ def getData(
143143
plusplus=self.plusplus,
144144
papyrus_dir=self.dataDir,
145145
)
146-
logger.debug(f"Finished filtering Papyrus data set.")
146+
logger.debug("Finished filtering Papyrus data set.")
147147
logger.debug(f"Creating MoleculeTable from '{path}'.")
148148
ret = MoleculeTable.fromTableFile(name, path, store_dir=output_dir, **kwargs)
149149
logger.debug(f"Finished creating MoleculeTable from '{path}'.")
@@ -175,7 +175,9 @@ def getProteinData(
175175
if os.path.exists(path) and use_existing:
176176
return pd.read_table(path)
177177
else:
178-
protein_data = papyrus_scripts.read_protein_set(version=self.version)
178+
protein_data = papyrus_scripts.read_protein_set(
179+
source_path=self.dataDir, version=self.version
180+
)
179181
protein_data["accession"] = protein_data["target_id"].apply(
180182
lambda x: x.split("_")[0]
181183
)

0 commit comments

Comments
 (0)