Skip to content

Commit ea547cb

Browse files
committed
add example code to download fasta and fastq files
1 parent 4983f5a commit ea547cb

File tree

3 files changed

+225
-1
lines changed

3 files changed

+225
-1
lines changed

source/codes/get_fasta_fastq.rst

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
.. _codes_get_fasta_fastq:
2+
3+
4+
Downloading Files: FASTA and FASTQ
5+
===================================
6+
7+
The example below is a CLI script to download FASTA and FASTQ files from
8+
two plugins output with given report (Results) ID. One of plugin output
9+
has a non-deterministic file output name.
10+
11+
12+
.. code-block:: bash
13+
14+
~$ python get_fasta_and_fastq.py -h
15+
usage: get_plugin_fasta_fastq_data.py [-h] [--host [HOST]]
16+
[--username [USERNAME]]
17+
[--password [PASSWORD]]
18+
[--resultPK [RESULTPK]]
19+
20+
Get the FASTA and FASTQ from generateConsensus and FileExporter for all the
21+
barcodes for the requested Result id
22+
23+
optional arguments:
24+
-h, --help show this help message and exit
25+
--host [HOST] target host to download files. (default: None)
26+
--username [USERNAME]
27+
TB account username. (default: ionadmin)
28+
--password [PASSWORD]
29+
TB account password. (default: ionadmin)
30+
--resultPK [RESULTPK]
31+
the result primary key or ID. Case insensitive.
32+
33+
34+
35+
36+
.. code-block:: python
37+
38+
# Copyright (C) 2021 Thermo Fisher Scientific. All Rights Reserved.
39+
40+
import os
41+
import sys
42+
import json
43+
import argparse
44+
import requests
45+
from bs4 import BeautifulSoup
46+
47+
help_text = """
48+
Requirement:
49+
one of these options must be used: '--host', '--resultPK', .
50+
51+
Logic:
52+
- Downloads all Sample Consensus Sequences FASTA from the Generate Consensus
53+
- Downloads compressed Zip which contains all FASTQ from FileExporter
54+
- Make sure requests, bs4(BeautifulSoup4) modules are installed via pip
55+
"""
56+
57+
58+
class GetPluginResults:
59+
api_name = "results"
60+
deleted_count = 0
61+
ignored_names = []
62+
63+
def __init__(self, inputArgs):
64+
self.host = inputArgs["host"]
65+
self.auth = (inputArgs["username"], inputArgs["password"])
66+
self.resultPK = inputArgs["resultPK"]
67+
68+
def get_objects(self, url):
69+
try:
70+
out = requests.get(url, auth=self.auth)
71+
if out.ok:
72+
return out.json()
73+
else:
74+
print(
75+
">>>> (Status Code: %d) Unable to retrieve %s"
76+
% (out.status_code, out.url)
77+
)
78+
except requests.ConnectionError:
79+
print(">>>> Unable to connect %s" % (url))
80+
81+
def getFastaFastq(self):
82+
if self.resultPK:
83+
url = os.path.join(self.host, "rundb/api/v1", self.api_name, self.resultPK)
84+
85+
for obj in self.get_objects(url).get("pluginresults"):
86+
pluginUrl = str(self.host + obj)
87+
pluginOut = self.get_objects(pluginUrl)
88+
if pluginOut.get("pluginName") == "generateConsensus":
89+
self.download_fasta(pluginOut)
90+
if pluginOut.get("pluginName") == "FileExporter":
91+
self.download_fastq(pluginOut)
92+
93+
def getStartPluginJson(self, pluginOut):
94+
startPluginUrl = self.host + pluginOut.get("URL") + "startplugin.json"
95+
req = requests.get(startPluginUrl, auth=self.auth)
96+
return req.json()
97+
98+
def download_fasta(self, pluginOut):
99+
print("Starting FASTA download...")
100+
startPluginUrl = self.host + pluginOut.get("URL") + "startplugin.json"
101+
req = requests.get(startPluginUrl, auth=self.auth)
102+
data = req.json()
103+
allConsenusFastaIn = (
104+
data.get("expmeta").get("output_file_name_stem") + ".consensus.fasta"
105+
)
106+
allConsenusFastaOut = (
107+
data.get("expmeta").get("output_file_name_stem")
108+
+ "_"
109+
+ str(pluginOut.get("id"))
110+
+ ".consensus.fasta"
111+
)
112+
try:
113+
file_url = self.host + pluginOut.get("URL") + allConsenusFastaIn
114+
req = requests.get(file_url, auth=self.auth)
115+
with open(allConsenusFastaOut, "wb") as f:
116+
f.write(req.content)
117+
print(allConsenusFastaIn)
118+
except Exception as Err:
119+
print("FASTQ download failed. Please check %s" % Err)
120+
print("Completed FASTA download")
121+
122+
def download_fastq(self, pluginOut):
123+
print("Starting the FASTQ download...")
124+
resultDirPath = (
125+
self.getStartPluginJson(pluginOut)
126+
.get("runinfo")
127+
.get("results_dir")
128+
.split("/")
129+
)
130+
metal_url = os.path.join(
131+
self.host,
132+
"report",
133+
str(self.resultPK),
134+
"metal",
135+
resultDirPath[-2],
136+
resultDirPath[-1],
137+
)
138+
139+
req = requests.get(metal_url, auth=self.auth)
140+
soup = BeautifulSoup(req.content, features="html.parser")
141+
rows = soup.find("table").find_all("tr")
142+
fastq_zip = None
143+
for row in rows:
144+
try:
145+
fileName = row.find("a").get_text()
146+
if "zip" in fileName:
147+
fastq_zip = fileName
148+
exit()
149+
except Exception:
150+
continue
151+
152+
if fastq_zip:
153+
zipUrlIn = self.host + pluginOut.get("URL") + fastq_zip
154+
zipUrlOut = str(pluginOut.get("id")) + "_" + fastq_zip
155+
response = requests.get(
156+
zipUrlIn, stream=True, auth=("ionadmin", "ionadmin")
157+
)
158+
with open(zipUrlOut, "wb") as zip:
159+
for chunk in response.iter_content(chunk_size=512):
160+
if chunk: # filter out keep-alive new chunks
161+
zip.write(chunk)
162+
print(fastq_zip)
163+
else:
164+
print("FASTQ download did not complete")
165+
exit()
166+
print("Completed FASTA download")
167+
168+
169+
if __name__ == "__main__":
170+
parser = argparse.ArgumentParser(
171+
prog="get_plugin_fasta_fastq_data.py",
172+
description="Get the FASTA and FASTQ from generateConsensus and FileExporter "
173+
"for all the barcodes for the requested "
174+
"Result id",
175+
)
176+
177+
parser.add_argument(
178+
"--host",
179+
nargs="?",
180+
help="target host to download files. (default: %(default)s)",
181+
)
182+
183+
parser.add_argument(
184+
"--username",
185+
nargs="?",
186+
default="ionadmin",
187+
help="TB account username. (default: %(default)s)",
188+
)
189+
190+
parser.add_argument(
191+
"--password",
192+
nargs="?",
193+
default="ionadmin",
194+
help="TB account password. (default: %(default)s)",
195+
)
196+
197+
parser.add_argument(
198+
"--resultPK",
199+
nargs="?",
200+
help="the result primary key or ID. " + "Case insensitive.",
201+
)
202+
203+
args = vars(parser.parse_args())
204+
205+
if args.get("host") and args.get("resultPK"):
206+
if "http" not in args.get("host"):
207+
print("need to specifiy HTTP or HTTPS")
208+
sys.exit(1)
209+
210+
pluginResultData = GetPluginResults(args)
211+
pluginResultData.getFastaFastq()
212+
213+
else:
214+
print("Error: one of the required options is not used")
215+
print(help_text)
216+
parser.parse_args(["-h"])
217+
sys.exit(1)

source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343

4444
# General information about the project.
4545
project = u'Ion Torrent SDK'
46-
copyright = u'2019 Thermo Fisher Scientific Inc. All rights reserved'
46+
copyright = u'2021 Thermo Fisher Scientific Inc. All rights reserved'
4747

4848
# The version info for the project you're documenting, acts as replacement for
4949
# |version| and |release|, also used in various other places throughout the

source/index.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ This section describes all of the REST API end points that |TS| makes available
3535
api/examples
3636

3737

38+
.. toctree::
39+
:maxdepth: 2
40+
:caption: Script Examples
41+
42+
codes/get_fasta_fastq
43+
44+
3845
Legal
3946
-----
4047

0 commit comments

Comments
 (0)