Skip to content

Commit

Permalink
Refactor to make test a flag and local testing clearer.
Browse files Browse the repository at this point in the history
  • Loading branch information
anna-parker committed Sep 22, 2024
1 parent 0828e8a commit 25396ae
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 25 deletions.
24 changes: 24 additions & 0 deletions ena-submission/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,24 @@ with open("results/config.yaml", "w") as f:
f.write(yaml.dump(config))

LOG_LEVEL = config.get("log_level", "INFO")
SUBMIT_TO_ENA_PROD = config.get("submit_to_ena_prod", False)
SUBMIT_TO_ENA_DEV = not SUBMIT_TO_ENA_PROD

if SUBMIT_TO_ENA_DEV:
print("Submitting to ENA dev environment")
config["ena_submission_url"] = "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit"
config["github_url"] = (
"https://raw.githubusercontent.com/pathoplexus/ena-submission/main/test/approved_ena_submission_list.json"
)
config["ena_reports_service_url"] = "https://wwwdev.ebi.ac.uk/ena/submit/report"

if SUBMIT_TO_ENA_PROD:
print("WARNING: Submitting to ENA production")
config["ena_submission_url"] = "https://www.ebi.ac.uk/ena/submit/drop-box/submit"
config["github_url"] = (
"https://raw.githubusercontent.com/pathoplexus/ena-submission/main/approved/approved_ena_submission_list.json"
)
config["ena_reports_service_url"] = "https://www.ebi.ac.uk/ena/submit/report"


rule all:
Expand Down Expand Up @@ -88,11 +106,13 @@ rule create_project:
project_created=touch("results/project_created"),
params:
log_level=LOG_LEVEL,
test_flag="--test" if SUBMIT_TO_ENA_DEV else "",
shell:
"""
python {input.script} \
--config-file {input.config} \
--log-level {params.log_level} \
{params.test_flag}
"""


Expand All @@ -104,11 +124,13 @@ rule create_sample:
sample_created=touch("results/sample_created"),
params:
log_level=LOG_LEVEL,
test_flag="--test" if SUBMIT_TO_ENA_DEV else "",
shell:
"""
python {input.script} \
--config-file {input.config} \
--log-level {params.log_level} \
{params.test_flag}
"""


Expand All @@ -120,11 +142,13 @@ rule create_assembly:
sample_created=touch("results/assembly_created"),
params:
log_level=LOG_LEVEL,
test_flag="--test" if SUBMIT_TO_ENA_DEV else "",
shell:
"""
python {input.script} \
--config-file {input.config} \
--log-level {params.log_level} \
{params.test_flag}
"""


Expand Down
4 changes: 1 addition & 3 deletions ena-submission/config/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ db_name: Loculus
unique_project_suffix: Loculus
ena_submission_username: fake-user
ena_submission_password: fake-password
ena_submission_url: https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit # TODO(https://github.com/loculus-project/loculus/issues/2425): update in production
github_url: https://raw.githubusercontent.com/pathoplexus/ena-submission/main/test/approved_ena_submission_list.json # TODO(https://github.com/loculus-project/loculus/issues/2425): update in production
ena_reports_service_url: https://wwwdev.ebi.ac.uk/ena/submit/report # TODO(https://github.com/loculus-project/loculus/issues/2425): update in production
submit_to_ena_prod: False # TODO(https://github.com/loculus-project/loculus/issues/2425): update in production
#ena_checklist: ERC000033 - do not use until all fields are mapped to ENA accepted options
metadata_mapping:
'subject exposure':
Expand Down
32 changes: 27 additions & 5 deletions ena-submission/scripts/create_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,17 @@ def create_manifest_object(
group_key: dict[str, str],
test=False,
) -> AssemblyManifest:
"""
Create an AssemblyManifest object for an entry in the assembly table using:
- the corresponding ena_sample_accession and bioproject_accession
- the organism metadata from the config file
- sequencing metadata from the corresponding submission table entry
- unaligned nucleotide sequences from the corresponding submission table entry,
these are used to create chromosome files and fasta files which are passed to the manifest.
If test=True add a timestamp to the alias suffix to allow for multiple submissions of the same
manifest for testing.
"""
sample_accession = sample_table_entry["result"]["ena_sample_accession"]
study_accession = project_table_entry["result"]["bioproject_accession"]

Expand Down Expand Up @@ -276,13 +287,18 @@ def submission_table_update(db_config: SimpleConnectionPool):
raise RuntimeError(error_msg)


def assembly_table_create(db_config: SimpleConnectionPool, config: Config, retry_number: int = 3):
def assembly_table_create(
db_config: SimpleConnectionPool, config: Config, retry_number: int = 3, test: bool = False
):
"""
1. Find all entries in assembly_table in state READY
2. Create temporary files: chromosome_list_file, fasta_file, manifest_file
3. Update assembly_table to state SUBMITTING (only proceed if update succeeds)
4. If (create_ena_assembly succeeds): update state to SUBMITTED with results
3. Else update state to HAS_ERRORS with error messages
If test=True: add a timestamp to the alias suffix to allow for multiple submissions of the same
manifest for testing AND use the test ENA webin-cli endpoint for submission.
"""
ena_config = get_ena_config(
config.ena_submission_username,
Expand Down Expand Up @@ -333,7 +349,7 @@ def assembly_table_create(db_config: SimpleConnectionPool, config: Config, retry
sample_data_in_submission_table[0],
seq_key,
group_key,
test=True, # TODO(https://github.com/loculus-project/loculus/issues/2425): remove in production
test,
)
manifest_file = create_manifest(manifest_object)

Expand All @@ -354,7 +370,7 @@ def assembly_table_create(db_config: SimpleConnectionPool, config: Config, retry
logger.info(f"Starting assembly creation for accession {row["accession"]}")
segment_order = get_segment_order(sample_data_in_submission_table[0]["unaligned_sequences"])
assembly_creation_results: CreationResults = create_ena_assembly(
ena_config, manifest_file, center_name=center_name
ena_config, manifest_file, center_name=center_name, test=test
)
if assembly_creation_results.results:
assembly_creation_results.results["segment_order"] = segment_order
Expand Down Expand Up @@ -519,7 +535,13 @@ def assembly_table_handle_errors(
required=True,
type=click.Path(exists=True),
)
def create_assembly(log_level, config_file):
@click.option(
"--test",
is_flag=True,
default=False,
help="Allow multiple submissions of the same project for testing AND use the webin-cli test endpoint",
)
def create_assembly(log_level, config_file, test=False):
logger.setLevel(log_level)
logging.getLogger("requests").setLevel(logging.INFO)

Expand All @@ -540,7 +562,7 @@ def create_assembly(log_level, config_file):
submission_table_start(db_config)
submission_table_update(db_config)

assembly_table_create(db_config, config, retry_number=3)
assembly_table_create(db_config, config, retry_number=3, test=test)
assembly_table_update(db_config, config)
assembly_table_handle_errors(db_config, config, slack_config)
time.sleep(2)
Expand Down
21 changes: 16 additions & 5 deletions ena-submission/scripts/create_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def construct_project_set_object(
if test:
alias = XmlAttribute(
f"{entry["group_id"]}:{entry["organism"]}:{config.unique_project_suffix}:{datetime.now(tz=pytz.utc)}"
) # TODO(https://github.com/loculus-project/loculus/issues/2425): remove in production
)
else:
alias = XmlAttribute(
f"{entry["group_id"]}:{entry["organism"]}:{config.unique_project_suffix}"
Expand Down Expand Up @@ -217,13 +217,18 @@ def submission_table_update(db_config: SimpleConnectionPool):
raise RuntimeError(error_msg)


def project_table_create(db_config: SimpleConnectionPool, config: Config, retry_number: int = 3):
def project_table_create(
db_config: SimpleConnectionPool, config: Config, retry_number: int = 3, test: bool = False
):
"""
1. Find all entries in project_table in state READY
2. Create project_set: get_group_info from loculus, use entry and config for other fields
3. Update project_table to state SUBMITTING (only proceed if update succeeds)
4. If (create_ena_project succeeds): update state to SUBMITTED with results
3. Else update state to HAS_ERRORS with error messages
If test=True add a timestamp to the alias suffix to allow for multiple submissions of the same
project for testing.
"""
ena_config = get_ena_config(
config.ena_submission_username,
Expand All @@ -245,7 +250,7 @@ def project_table_create(db_config: SimpleConnectionPool, config: Config, retry_
logger.error(f"Was unable to get group info for group: {row["group_id"]}, {e}")
continue

project_set = construct_project_set_object(group_info, config, row, test=True)
project_set = construct_project_set_object(group_info, config, row, test)
update_values = {
"status": Status.SUBMITTING,
"started_at": datetime.now(tz=pytz.utc),
Expand Down Expand Up @@ -358,7 +363,13 @@ def project_table_handle_errors(
required=True,
type=click.Path(exists=True),
)
def create_project(log_level, config_file):
@click.option(
"--test",
is_flag=True,
default=False,
help="Allow multiple submissions of the same project for testing",
)
def create_project(log_level, config_file, test=False):
logger.setLevel(log_level)
logging.getLogger("requests").setLevel(logging.INFO)

Expand All @@ -379,7 +390,7 @@ def create_project(log_level, config_file):
submission_table_start(db_config)
submission_table_update(db_config)

project_table_create(db_config, config)
project_table_create(db_config, config, test=test)
project_table_handle_errors(db_config, config, slack_config)
time.sleep(2)

Expand Down
24 changes: 16 additions & 8 deletions ena-submission/scripts/create_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def construct_sample_set_object(
if test:
alias = XmlAttribute(
f"{entry["accession"]}:{organism}:{config.unique_project_suffix}:{datetime.now(tz=pytz.utc)}"
) # TODO(https://github.com/loculus-project/loculus/issues/2425): remove in production
)
else:
alias = XmlAttribute(f"{entry["accession"]}:{organism}:{config.unique_project_suffix}")
list_sample_attributes = get_sample_attributes(config, sample_metadata, entry)
Expand Down Expand Up @@ -268,14 +268,19 @@ def submission_table_update(db_config: SimpleConnectionPool):
raise RuntimeError(error_msg)


def sample_table_create(db_config: SimpleConnectionPool, config: Config, retry_number: int = 3):
def sample_table_create(
db_config: SimpleConnectionPool, config: Config, retry_number: int = 3, test: bool = False
):
"""
1. Find all entries in sample_table in state READY
2. Create sample_set_object: use metadata, center_name, organism, and ingest fields
from submission_table
3. Update sample_table to state SUBMITTING (only proceed if update succeeds)
4. If (create_ena_sample succeeds): update state to SUBMITTED with results
3. Else update state to HAS_ERRORS with error messages
If test=True add a timestamp to the alias suffix to allow for multiple submissions of the same
sample for testing.
"""
ena_config = get_ena_config(
config.ena_submission_username,
Expand All @@ -295,10 +300,7 @@ def sample_table_create(db_config: SimpleConnectionPool, config: Config, retry_n
)

sample_set = construct_sample_set_object(
config,
sample_data_in_submission_table[0],
row,
test=True, # TODO(https://github.com/loculus-project/loculus/issues/2425): remove in production
config, sample_data_in_submission_table[0], row, test
)
update_values = {
"status": Status.SUBMITTING,
Expand Down Expand Up @@ -408,7 +410,13 @@ def sample_table_handle_errors(
required=True,
type=click.Path(exists=True),
)
def create_sample(log_level, config_file):
@click.option(
"--test",
is_flag=True,
default=False,
help="Allow multiple submissions of the same project for testing",
)
def create_sample(log_level, config_file, test=False):
logger.setLevel(log_level)
logging.getLogger("requests").setLevel(logging.INFO)

Expand All @@ -429,7 +437,7 @@ def create_sample(log_level, config_file):
submission_table_start(db_config)
submission_table_update(db_config)

sample_table_create(db_config, config)
sample_table_create(db_config, config, test=test)
sample_table_handle_errors(db_config, config, slack_config)
time.sleep(2)

Expand Down
9 changes: 5 additions & 4 deletions ena-submission/scripts/ena_submission_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def create_manifest(manifest: AssemblyManifest) -> str:


def post_webin_cli(
config: ENAConfig, manifest_filename, center_name=None
config: ENAConfig, manifest_filename, center_name=None, test=True
) -> subprocess.CompletedProcess:
subprocess_args = [
"java",
Expand All @@ -311,8 +311,8 @@ def post_webin_cli(
"-manifest",
manifest_filename,
"-submit",
"-test", # TODO(https://github.com/loculus-project/loculus/issues/2425): remove in prod
]
subprocess_args.append("-test") if test else None
if center_name:
subprocess_args.extend(["-centername", center_name])
return subprocess.run(
Expand All @@ -324,16 +324,17 @@ def post_webin_cli(


def create_ena_assembly(
config: ENAConfig, manifest_filename: str, center_name=None
config: ENAConfig, manifest_filename: str, center_name=None, test=True
) -> CreationResults:
"""
This is equivalent to running:
webin-cli -username {params.ena_submission_username} -password {params.ena_submission_password}
-context genome -manifest {manifest_file} -submit
test=True, adds the `-test` flag which means submissions will use the ENA dev endpoint.
"""
errors = []
warnings = []
response = post_webin_cli(config, manifest_filename, center_name=center_name)
response = post_webin_cli(config, manifest_filename, center_name=center_name, test=test)
logger.info(response.stdout)
if response.returncode != 0:
error_message = (
Expand Down

0 comments on commit 25396ae

Please sign in to comment.