Skip to content

Commit

Permalink
Create sample.xml objects to send to ENA using PHAG4E's metadata fiel…
Browse files Browse the repository at this point in the history
…d mapping. Keep state in sample_table. Send slack notifications if submission fails.
  • Loading branch information
anna-parker committed Aug 14, 2024
1 parent 2cfe26e commit 8872d39
Show file tree
Hide file tree
Showing 11 changed files with 885 additions and 194 deletions.
15 changes: 15 additions & 0 deletions ena-submission/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,21 @@ rule create_project:
project_created=touch("results/project_created"),
params:
log_level=LOG_LEVEL,
shell:
"""
python {input.script} \
--config-file {input.config} \
--log-level {params.log_level} \
"""

rule create_sample:
input:
script="scripts/create_sample.py",
config="results/config.yaml",
output:
sample_created=touch("results/sample_created"),
params:
log_level=LOG_LEVEL,
shell:
"""
python {input.script} \
Expand Down
43 changes: 43 additions & 0 deletions ena-submission/config/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,46 @@ ena_submission_url: https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit # TODO(h
github_username: fake_username
github_pat: fake_pat
github_url: https://api.github.com/repos/pathoplexus/ena-submission/contents/test/approved_ena_submission_list.json?ref=main # TODO(https://github.com/loculus-project/loculus/issues/2425): update in production
metadata_mapping:
'subject exposure':
loculus_fields: [exposure_event]
'type exposure':
loculus_fields: [exposure_event]
hospitalisation:
loculus_fields: [host_health_state]
function: match
args: [Hospital]
'illness symptoms':
loculus_fields: [signs_and_symptoms]
'collection date':
loculus_fields: [sample_collection_date]
'geographic location (country and/or sea)':
loculus_fields: [geo_loc_country]
'geographic location (region and locality)':
loculus_fields: [geo_loc_admin_1]
'sample capture status':
loculus_fields: [purpose_of_sampling]
'host disease outcome':
loculus_fields: [host_health_outcome]
'host common name':
loculus_fields: [host_name_common]
'host age':
loculus_fields: [host_age]
'host health state':
loculus_fields: [host_health_state]
'host sex':
loculus_fields: [host_gender]
'host scientific name':
loculus_fields: [host_name_scientific]
'isolate':
loculus_fields: [specimen_collector_sample_id]
'collecting institution':
loculus_fields: [sequenced_by_organization, author_affiliations]
'receipt date':
loculus_fields: [received date]
'isolation source host-associated':
loculus_fields: [anatomical material, anatomical part, body product]
'isolation source non-host-associated':
loculus_fields: [environmental site, environmental material]
'authors':
loculus_fields: [authors]
2 changes: 2 additions & 0 deletions ena-submission/flyway/sql/V1__Initial_Schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ CREATE TABLE submission_table (
finished_at timestamp,
metadata jsonb,
unaligned_nucleotide_sequences jsonb,
center_name text,
external_metadata jsonb,
primary key (accession, version)
);
Expand All @@ -22,6 +23,7 @@ CREATE TABLE project_table (
status text not null,
started_at timestamp not null,
finished_at timestamp,
center_name text,
result jsonb,
primary key (group_id, organism)
);
Expand Down
10 changes: 8 additions & 2 deletions ena-submission/scripts/create_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,10 @@ def submission_table_start(db_config):
)
if len(corresponding_project) == 1:
if corresponding_project[0]["status"] == str(Status.SUBMITTED):
update_values = {"status_all": StatusAll.SUBMITTED_PROJECT}
update_values = {
"status_all": StatusAll.SUBMITTED_PROJECT,
"center_name": corresponding_project[0]["center_name"],
}
update_db_where_conditions(
db_config,
table_name="submission_table",
Expand Down Expand Up @@ -203,7 +206,10 @@ def submission_table_update(db_config):
if len(corresponding_project) == 1 and corresponding_project[0]["status"] == str(
Status.SUBMITTED
):
update_values = {"status_all": StatusAll.SUBMITTED_PROJECT}
update_values = {
"status_all": StatusAll.SUBMITTED_PROJECT,
"center_name": corresponding_project[0]["center_name"],
}
update_db_where_conditions(
db_config,
table_name="submission_table",
Expand Down
Loading

0 comments on commit 8872d39

Please sign in to comment.