Skip to content

Commit f824b3e

Browse files
authored
Merge pull request #8 from usegalaxy-eu/vir-support
Support for viral sample template
2 parents 03f6c3a + 86c4745 commit f824b3e

File tree

5 files changed

+107
-15
lines changed

5 files changed

+107
-15
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ optional arguments:
6262
--password PASSWORD the password of your Webin account
6363
--secret SECRET .secret file containing the password of your Webin account
6464
-d, --dev Flag to use the dev/sandbox endpoint of ENA.
65+
--vir Flag to use the viral sample template.
6566
```
6667

6768
Mandatory arguments: --action, --center, --webin_id, --password or --secret.
@@ -127,3 +128,7 @@ test command: **add metadata and sequence data**
127128
test command **.secret file**
128129

129130
`ena-upload-cli --action add --center 'your_center_name' --webin_id your_id --study example_tables/ENA_template_studies.tsv --sample example_tables/ENA_template_samples.tsv --experiment example_tables/ENA_template_experiments.tsv --run example_tables/ENA_template_runs.tsv --data example_data/*gz --dev --secret .secret`
131+
132+
test command for **viral data**
133+
134+
`ena_upload --action add --center 'your_center_name' --webin_id your_id --password your_password --study example_tables/ENA_template_studies.tsv --sample example_tables/ENA_template_samples_vir.tsv --experiment example_tables/ENA_template_experiments.tsv --run example_tables/ENA_template_runs.tsv --data example_data/*gz --dev --vir`

ena_upload/ena_upload.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def construct_xml(schema, stream, xsd):
159159
return xml_file
160160

161161

162-
def actors(template_path):
162+
def actors(template_path, vir):
163163
''':return: the filenames of schema definitions and templates
164164
'''
165165

@@ -173,19 +173,25 @@ def add_path(dic, path):
173173
'submission': 'SRA.submission.xsd',
174174
'sample': 'SRA.sample.xsd',
175175
'study': 'SRA.study.xsd'}
176-
177-
templates = {'run': 'ENA_template_runs.xml',
178-
'experiment': 'ENA_template_experiments.xml',
179-
'submission': 'ENA_template_submission.xml',
180-
'sample': 'ENA_template_samples.xml',
181-
'study': 'ENA_template_studies.xml'}
176+
if vir:
177+
templates = {'run': 'ENA_template_runs.xml',
178+
'experiment': 'ENA_template_experiments.xml',
179+
'submission': 'ENA_template_submission.xml',
180+
'sample': 'ENA_template_vir_sample.xml',
181+
'study': 'ENA_template_studies.xml'}
182+
else:
183+
templates = {'run': 'ENA_template_runs.xml',
184+
'experiment': 'ENA_template_experiments.xml',
185+
'submission': 'ENA_template_submission.xml',
186+
'sample': 'ENA_template_samples.xml',
187+
'study': 'ENA_template_studies.xml'}
182188

183189
xsds = add_path(xsds, template_path)
184190

185191
return xsds, templates
186192

187193

188-
def run_construct(template_path, schema_targets, center):
194+
def run_construct(template_path, schema_targets, center, vir):
189195
'''construct XMLs for schema in schema_targets
190196
191197
:param schema_targets: dictionary of 'schema:targets' generated
@@ -196,7 +202,7 @@ def run_construct(template_path, schema_targets, center):
196202
:return schema_xmls: dictionary of 'schema:filename'
197203
'''
198204

199-
xsds, templates = actors(template_path)
205+
xsds, templates = actors(template_path, vir)
200206

201207
schema_xmls = {}
202208

@@ -211,7 +217,7 @@ def run_construct(template_path, schema_targets, center):
211217
return schema_xmls
212218

213219

214-
def construct_submission(template_path, action, submission_input, center):
220+
def construct_submission(template_path, action, submission_input, center, vir):
215221
'''construct XML for submission
216222
217223
:param action: action for submission -
@@ -224,7 +230,7 @@ def construct_submission(template_path, action, submission_input, center):
224230
:return submission_xml: filename of submission XML
225231
'''
226232

227-
xsds, templates = actors(template_path)
233+
xsds, templates = actors(template_path, vir)
228234

229235
template = templates['submission']
230236
loader = TemplateLoader(search_path=template_path)
@@ -569,6 +575,8 @@ def process_args():
569575
help='.secret file containing the password of your Webin account')
570576

571577
parser.add_argument('-d', '--dev', help="Flag to use the dev/sandbox endpoint of ENA.", action="store_true")
578+
579+
parser.add_argument('--vir', help="Flag to use the viral sample template.", action="store_true")
572580

573581
args = parser.parse_args()
574582

@@ -621,6 +629,7 @@ def main ():
621629
action = args.action.upper()
622630
center = args.center_name
623631
dev = args.dev
632+
vir = args.vir
624633
webin_id = args.webin_id
625634
password = ""
626635

@@ -695,18 +704,18 @@ def main ():
695704
# when ADD/MODIFY,
696705
# requires source XMLs for 'run', 'experiment', 'sample', 'experiment'
697706
# schema_xmls record XMLs for all these schema and following 'submission'
698-
schema_xmls = run_construct(template_path, schema_targets, center)
707+
schema_xmls = run_construct(template_path, schema_targets, center, vir)
699708

700709
submission_xml = construct_submission(template_path, action,
701-
schema_xmls, center)
710+
schema_xmls, center, vir)
702711

703712
elif action in ['CANCEL', 'RELEASE']:
704713
# when CANCEL/RELEASE, only accessions needed
705714
# schema_xmls only used to record the following 'submission'
706715
schema_xmls = {}
707716

708717
submission_xml = construct_submission(template_path, action,
709-
schema_targets, center)
718+
schema_targets, center, vir)
710719

711720
schema_xmls['submission'] = submission_xml
712721

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<SAMPLE_SET xmlns:py="http://genshi.edgewall.org/"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:noNamespaceSchemaLocation="ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/SRA.sample.xsd">
5+
<py:for each="index, row in df.iterrows()">
6+
<SAMPLE alias="${row.alias}" center_name="${center}">
7+
<!--TODO: UNIQUE NAME FOR SAMPLE-->
8+
<!--TODO: CENTER NAME-->
9+
<TITLE>${row.title}</TITLE>
10+
<!--TODO: A SHORT INFORMATIVE DESCRIPTION OF THE SAMPLE-->
11+
<SAMPLE_NAME>
12+
<TAXON_ID>${row.taxon_id}</TAXON_ID>
13+
<!--TODO: PROVIDE NCBI TAXID FOR ORGANISM (e.g. 9606 for human)-->
14+
<!-- For complete prokaryotic genomes, a taxid should be generate for the strain.
15+
Please contact us so we can generate this on your behalf. -->
16+
<SCIENTIFIC_NAME>${row.scientific_name}</SCIENTIFIC_NAME>
17+
<!--TODO: SCIENTIFIC NAME AS APPEARS IN NCBI TAXONOMY FOR THE
18+
TAXON_ID (e.g. homo sapiens)-->
19+
</SAMPLE_NAME>
20+
<DESCRIPTION>${row.sample_description}</DESCRIPTION>
21+
<!--TODO: A LONGER DESCRIPTION OF SAMPLE AND HOW IT DIFFERS FROM
22+
OTHER SAMPLES-->
23+
<SAMPLE_ATTRIBUTES>
24+
<SAMPLE_ATTRIBUTE>
25+
<TAG>collection date</TAG>
26+
<VALUE>${row.collection_date}</VALUE>
27+
</SAMPLE_ATTRIBUTE>
28+
<SAMPLE_ATTRIBUTE>
29+
<TAG>geographic location (country and/or sea)</TAG>
30+
<VALUE>${row.geographic_location}</VALUE>
31+
</SAMPLE_ATTRIBUTE>
32+
<SAMPLE_ATTRIBUTE>
33+
<TAG>sample capture status</TAG>
34+
<VALUE>active surveillance in response to outbreak</VALUE>
35+
</SAMPLE_ATTRIBUTE>
36+
<SAMPLE_ATTRIBUTE>
37+
<TAG>host common name</TAG>
38+
<VALUE>${row.host_common_name}</VALUE>
39+
</SAMPLE_ATTRIBUTE>
40+
<SAMPLE_ATTRIBUTE>
41+
<TAG>host subject id</TAG>
42+
<VALUE>${row.host_subject_id}</VALUE>
43+
</SAMPLE_ATTRIBUTE>
44+
<SAMPLE_ATTRIBUTE>
45+
<TAG>host health state</TAG>
46+
<VALUE>${row.host_health_state}</VALUE>
47+
</SAMPLE_ATTRIBUTE>
48+
<SAMPLE_ATTRIBUTE>
49+
<TAG>host sex</TAG>
50+
<VALUE>${row.host_sex}</VALUE>
51+
</SAMPLE_ATTRIBUTE>
52+
<SAMPLE_ATTRIBUTE>
53+
<TAG>host scientific name</TAG>
54+
<VALUE>${row.host_scientific_name}</VALUE>
55+
</SAMPLE_ATTRIBUTE>
56+
<SAMPLE_ATTRIBUTE>
57+
<TAG>collector name</TAG>
58+
<VALUE>${row.collector_name}</VALUE>
59+
</SAMPLE_ATTRIBUTE>
60+
<SAMPLE_ATTRIBUTE>
61+
<TAG>collecting institution</TAG>
62+
<VALUE>${row.collecting_institution}</VALUE>
63+
</SAMPLE_ATTRIBUTE>
64+
<SAMPLE_ATTRIBUTE>
65+
<TAG>isolate</TAG>
66+
<VALUE>${row.isolate}</VALUE>
67+
</SAMPLE_ATTRIBUTE>
68+
<SAMPLE_ATTRIBUTE>
69+
<TAG>ENA-CHECKLIST</TAG>
70+
<VALUE>ERC000033</VALUE>
71+
</SAMPLE_ATTRIBUTE>
72+
</SAMPLE_ATTRIBUTES>
73+
</SAMPLE>
74+
</py:for>
75+
</SAMPLE_SET>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
alias status accession title scientific_name taxon_id sample_description collection_date geographic_location host_common_name host_subject_id host_health_state host_sex host_scientific_name collector_name collecting_institution isolate submission_date
2+
sample_alias_4 add update_by_ENA sample_title_2 Severe acute respiratory syndrome coronavirus 2 update_by_ENA sample_description_1 2020-10-11 Argentina host_common_name_1 host_subject_id_1 diseased male host_scientific_name_1 collector_name_1 collecting_institution_1 isolate_1 update_by_ENA
3+
sample_alias_5 add update_by_ENA sample_title_3 Severe acute respiratory syndrome coronavirus 2 update_by_ENA sample_description_2 2008-01-24 Belgium host_common_name_2 host_subject_id_2 restricted access female host_scientific_name_2 collector_name_2 collecting_institution_2 isolate_2 update_by_ENA

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
setup(
1010
name='ena-upload-cli',
11-
version='0.1.7',
11+
version='0.1.8',
1212
keywords=["pip", "ena-upload-cli", "cli", "ENA", "upload"],
1313
description='Command Line Interface to upload data to the European Nucleotide Archive',
1414
author="Dilmurat Yusuf",

0 commit comments

Comments
 (0)