Skip to content

Commit 73c9d88

Browse files
authored
Merge pull request #34 from EBI-Metagenomics/develop
Prep new release - EBI private data
2 parents 681fbdd + 4860f96 commit 73c9d88

File tree

4 files changed

+50
-18
lines changed

4 files changed

+50
-18
lines changed

config/fetchdata-config-template.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
"url_max_attempts": 5,
33
"ena_api_username": "",
44
"ena_api_password": "",
5-
"aspera_bin": "",
6-
"aspera_cert": ""
5+
"fire_access_key_id": "",
6+
"fire_secret_access_key": ""
77
}

config/testing.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
{
22
"url_max_attempts": 5,
33
"ena_api_username": "",
4-
"ena_api_password": ""
4+
"ena_api_password": "",
5+
"fire_access_key_id": "",
6+
"fire_secret_access_key": ""
57
}

fetchtool/abstract_fetch.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,13 @@ def __init__(self, argv=sys.argv[1:]):
8383
self.create_output_dir(self.args.dir)
8484
self.base_dir = self.args.dir
8585

86+
self.interactive_mode = self.args.interactive
87+
self.private_mode = self.args.private
88+
self.force_mode = self.args.force
89+
self.desc_file_only = self.args.fix_desc_file
90+
self.ignore_errors = self.args.ignore_errors
91+
self.ebi = self.args.ebi
92+
8693
self.config = {}
8794
self._load_default_config_values()
8895

@@ -97,13 +104,6 @@ def __init__(self, argv=sys.argv[1:]):
97104
self.ENA_API_USER = self.config["ena_api_username"]
98105
self.ENA_API_PASSWORD = self.config["ena_api_password"]
99106

100-
self.interactive_mode = self.args.interactive
101-
self.private_mode = self.args.private
102-
self.force_mode = self.args.force
103-
self.desc_file_only = self.args.fix_desc_file
104-
self.ignore_errors = self.args.ignore_errors
105-
self.ebi = self.args.ebi
106-
107107
self._process_additional_args()
108108
if self.args.projects or self.args.project_list:
109109
self.projects = self._get_project_accessions(self.args)
@@ -178,8 +178,10 @@ def _load_default_config_values(self):
178178
self.config["ena_api_username"] = ""
179179
self.config["ena_api_password"] = ""
180180
self.config["url_max_attempts"] = 5
181-
self.config["fire_endpoint"] = "http://hl.fire.sdo.ebi.ac.uk"
182-
self.config["fire_ena_bucket"] = "era-public"
181+
self.config["fire_endpoint"] = "https://hl.fire.sdo.ebi.ac.uk"
182+
self.config["fire_ena_bucket"] = "era-private" if self.private_mode else "era-public"
183+
self.config["fire_access_key_id"] = ""
184+
self.config["fire_secret_access_key"] = ""
183185

184186
@staticmethod
185187
def add_arguments(parser):
@@ -278,8 +280,8 @@ def download_raw_file(self, dl_file, dest, dl_md5s):
278280
if not self._is_file_valid(dest, dl_md5s) or self.force_mode:
279281
silent_remove(dest)
280282
try:
281-
# Copying data from NFS within EBI infrastructure only works for public data
282-
if not self.private_mode and self.ebi:
283+
# Copying data from NFS within EBI infrastructure #
284+
if self.ebi:
283285
logging.info("Downloading using EBI's Fire AWS compatible storage")
284286
file_downloaded = self.download_fire(dest, dl_file)
285287
if not file_downloaded:
@@ -599,14 +601,36 @@ def download_lftp(self, dest, url):
599601
def download_fire(self, dest: str, url: str) -> bool:
600602
"""Copy the file using the aws cli to access EBI Fire. Only works within EBI Network
601603
Usage example, to get file path and names from full FTP URL
602-
- url = ftp.sra.ebi.ac.uk/vol1/sequence/ERZ166/ERZ1669403/contig.fa.gz
604+
- url = ftp.sra.ebi.ac.uk/vol1/sequence/ERZ166/ERZ1669403/contig.fa.gz (ftp.dcc-private.ebi.ac.uk/vol1/ for private)
603605
- dest = destination path
604606
"""
605-
fire_path = url.replace("ftp.sra.ebi.ac.uk/vol1/", "")
607+
# Remove the public and private prefixes
608+
fire_path = url.replace("ftp.sra.ebi.ac.uk/vol1/", "").replace("ftp.dcc-private.ebi.ac.uk/vol1/", "")
606609
fire_endpoint = self.config["fire_endpoint"]
607610
ena_bucket_name = self.config["fire_ena_bucket"]
611+
fire_access_key_id = self.config.get("fire_access_key_id")
612+
fire_secret_access_key = self.config.get("fire_secret_access_key")
608613
try:
609-
s3 = boto3.client("s3", endpoint_url=fire_endpoint, config=Config(signature_version=UNSIGNED))
614+
s3_args = {
615+
"endpoint_url": fire_endpoint,
616+
}
617+
if self.private_mode:
618+
if not fire_access_key_id:
619+
logging.error("Can't use Fire as the 'fire_access_key_id' is empty")
620+
return False
621+
if not fire_secret_access_key:
622+
logging.error("Can't use Fire as the 'fire_secret_access_key' is empty")
623+
return False
624+
s3_args.update(
625+
{
626+
"aws_access_key_id": fire_access_key_id,
627+
"aws_secret_access_key": fire_secret_access_key,
628+
}
629+
)
630+
else:
631+
# Public endpoint calls are not verified
632+
s3_args.update({"config": Config(signature_version=UNSIGNED)})
633+
s3 = boto3.client("s3", **s3_args)
610634
object_key = fire_path
611635
s3.download_file(ena_bucket_name, object_key, dest)
612636
logging.info("File downloaded successfully")

tests/unit/test_config_loading.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@ def test_config_defaults(self):
2828
"ena_api_username": "",
2929
"ena_api_password": "",
3030
"url_max_attempts": 5,
31-
"fire_endpoint": "http://hl.fire.sdo.ebi.ac.uk",
31+
"fire_endpoint": "https://hl.fire.sdo.ebi.ac.uk",
3232
"fire_ena_bucket": "era-public",
33+
"fire_access_key_id": "",
34+
"fire_secret_access_key": "",
3335
}
3436

3537
def test_config_override_with_json_file(self):
@@ -40,6 +42,8 @@ def test_config_override_with_json_file(self):
4042
"url_max_attempts": 10,
4143
"fire_endpoint": "fake_endpoint",
4244
"fire_ena_bucket": "fake_bucket",
45+
"fire_access_key_id": "",
46+
"fire_secret_access_key": "",
4347
}
4448

4549
def test_config_override_partial_with_json(self):
@@ -50,4 +54,6 @@ def test_config_override_partial_with_json(self):
5054
"url_max_attempts": 8,
5155
"fire_endpoint": "fake_endpoint",
5256
"fire_ena_bucket": "fake_bucket",
57+
"fire_access_key_id": "",
58+
"fire_secret_access_key": "",
5359
}

0 commit comments

Comments
 (0)