From 81905356a4501a7db3f98e7236b0a5673a7da66b Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Tue, 19 Mar 2024 16:08:17 +0100 Subject: [PATCH] Update --- ingest/Snakefile | 18 +++++- ingest/scripts/submit_to_loculus.py | 96 ++++++++++++++++------------- 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index b76fa280a..10fd0c737 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -1,4 +1,6 @@ TAXON_ID = 10244 # Mpox +BRANCH = "cfgbl-prepro" +NUMBER_OF_SAMPLES = 500 rule all: @@ -56,7 +58,13 @@ rule rename_columns: ncbi_dataset_tsv="data/metadata.tsv", shell: """ - sed '1s/Accession/submissionId/; 1s/Isolate Collection date/collection_date/' \ + sed '1s/Accession/submissionId/;\ + 1s/Isolate Collection date/collection_date/; \ + 1s/Release date/ncbi_release_date/; \ + 1s/Geographic Location/country/; \ + 1s/Submitter Affiliation/author_affiliation/; \ + 1s/Submitter Names/authors/; \ + 1s/Isolate Lineage/isolate_name/;' \ {input.ncbi_dataset_tsv} \ > {output.ncbi_dataset_tsv} """ @@ -71,7 +79,7 @@ rule subsample: ncbi_dataset_sequences="data/sequences_sample.fasta", strains="data/strains_sample.txt", params: - number_of_samples=100, + number_of_samples=NUMBER_OF_SAMPLES, shell: """ tsv-sample -H -n {params.number_of_samples} \ @@ -93,19 +101,25 @@ rule submit_to_loculus: submit_config="config/submit_config.yaml", output: ids="result/loculus_ids.tsv", + params: + branch=BRANCH, shell: """ python scripts/submit_to_loculus.py \ --mode submit \ --metadata {input.metadata} \ + --branch {params.branch} \ --sequences {input.sequences} \ --output-ids {output.ids} """ rule approve: + params: + branch=BRANCH, shell: """ python scripts/submit_to_loculus.py \ --mode approve + --branch {params.branch} """ diff --git a/ingest/scripts/submit_to_loculus.py b/ingest/scripts/submit_to_loculus.py index 3aee63332..f021fcf35 100644 --- a/ingest/scripts/submit_to_loculus.py +++ b/ingest/scripts/submit_to_loculus.py @@ -1,9 +1,10 @@ -import click import json -import requests import logging -BRANCH="main" +import click +import requests + +BRANCH="demo-prepro" GROUP_NAME="insdc_ingest_group" USERNAME="insdc_ingest_user" PASSWORD="insdc_ingest_user" @@ -14,7 +15,10 @@ # Create the ingest user on instance start in kubernetes/loculus/templates/keycloak-config-map.yaml -def get_jwt(username, password): +def backend_url(branch): + return f"https://backend-{branch}.loculus.org" + +def get_jwt(username, password, branch): """ Get a JWT token for the given username and password """ @@ -27,17 +31,19 @@ def get_jwt(username, password): } headers = {'Content-Type': 'application/x-www-form-urlencoded'} - response = requests.post(KEYCLOAK_TOKEN_URL, data=data, headers=headers) + keycloak_token_url = f"https://authentication-{branch}.loculus.org/realms/loculus/protocol/openid-connect/token" + + response = requests.post(keycloak_token_url, data=data, headers=headers) response.raise_for_status() jwt_keycloak = response.json() jwt = jwt_keycloak['access_token'] return jwt -def create_group(group_name): +def create_group(group_name, branch): # Create the ingest group - url = f"{BACKEND_URL}/groups" - token = get_jwt(USERNAME, PASSWORD) + url = f"{backend_url(branch)}/groups" + token = get_jwt(USERNAME, PASSWORD, branch) group_name = group_name headers = { @@ -47,6 +53,16 @@ def create_group(group_name): data = { "groupName": group_name, + "institution": "NA", + "address": { + "line1": "1234 Loculus Street", + "line2": "NA", + "city": "Dortmund", + "state": "NRW", + "postalCode": "12345", + "country": "Germany" + }, + "contactEmail": "something@loculus.org" } response = requests.post(url, json=data, headers=headers) @@ -55,17 +71,18 @@ def create_group(group_name): print("Group already exists") # raise if not 409 and not happy 2xx elif not response.ok: + print(f"Error creating group: {response.json()}") response.raise_for_status() -def submit(metadata, sequences): +def submit(metadata, sequences, branch): """ Submit data to Loculus. """ - jwt = get_jwt(USERNAME, PASSWORD) + jwt = get_jwt(USERNAME, PASSWORD, branch) # Endpoint URL - url = f'{BACKEND_URL}/{ORGANISM}/submit' + url = f'{backend_url(branch)}/{ORGANISM}/submit' # Headers with Bearer Authentication headers = { @@ -94,16 +111,16 @@ def submit(metadata, sequences): return response.json() -def approve(): +def approve(branch): """ Get sequences that were preprocessed successfully and approve them. 1. Get the ids of the sequences that were preprocessed successfully /ORGANISM/get-sequences 2. Approve the sequences """ - jwt = get_jwt(USERNAME, PASSWORD) + jwt = get_jwt(USERNAME, PASSWORD, branch) - url = f'{BACKEND_URL}/{ORGANISM}/get-sequences' + url = f'{backend_url(branch)}/{ORGANISM}/get-sequences' # Headers with Bearer Authentication @@ -116,69 +133,64 @@ def approve(): response = requests.get(url, headers=headers) response.raise_for_status() - # Get sequences to approve - # Roughly of this shape: {'accession': '182', 'version': 1, 'status': 'AWAITING_APPROVAL', 'isRevocation': False}, - to_approve = [] - for sequence in response.json(): - # Get sequences where status is AWAITING_APPROVAL - # Approve them by adding them to list with {'accession': '182', 'version': 1} - if sequence['status'] == 'AWAITING_APPROVAL': - to_approve.append({'accession': sequence['accession'], 'version': sequence['version']}) + # logging.info(f"Response: {response.json()}") - payload = {"accessionVersions": to_approve} + # # Get sequences to approve + # # Roughly of this shape: {'accession': '182', 'version': 1, 'status': 'AWAITING_APPROVAL', 'isRevocation': False}, + # to_approve = [] + # for sequence in response.json()["sequenceEntries"]: + # # Get sequences where status is AWAITING_APPROVAL + # # Approve them by adding them to list with {'accession': '182', 'version': 1} + # if sequence['status'] == 'AWAITING_APPROVAL': + # to_approve.append({'accession': sequence['accession'], 'version': sequence['version']}) - url = f'{BACKEND_URL}/{ORGANISM}/approve-processed-data' + payload = {"scope": "ALL"} + + url = f'{backend_url(branch)}/{ORGANISM}/approve-processed-data' response = requests.post(url, headers=headers, json=payload) response.raise_for_status() - return to_approve - - - - - - # Submit - # %% @click.command() @click.option('--metadata', required=False, type=click.Path(exists=True), help='Path to the metadata file') @click.option('--sequences', required=False, type=click.Path(exists=True), help='Path to the sequences file') +@click.option('--branch', required=False, type=click.STRING, help='Branch to submit to', default="main") @click.option('--output-ids', required=False, type=click.Path(), help='Path to the output IDs file') @click.option('--mode', required=True, type=click.Choice(['submit', 'approve']), help='Mode to run in') @click.option('--log-level', default='INFO', type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), help='Log level') # @click.option('--submit-config', required=True, type=click.Path(exists=True), help='Path to the submit configuration file') # def submit_to_loculus(metadata, sequences, submit_config, output_ids): -def submit_to_loculus(metadata, sequences, output_ids, mode, log_level): +def submit_to_loculus(metadata, sequences, branch: str, output_ids, mode, log_level): """ Submit data to Loculus. """ logging.basicConfig(level=log_level) if mode == 'submit': - logging.info(f"Submitting to Loculus") + logging.info("Submitting to Loculus") logging.debug(f"Args: {metadata}, {sequences}, {output_ids}") # Create group if it doesn't exist logging.info(f"Creating group {GROUP_NAME}") - create_group(GROUP_NAME) + create_group(GROUP_NAME, branch) logging.info(f"Group {GROUP_NAME} created") # Submit - logging.info(f"Starting submission") - response = submit(metadata, sequences) - logging.info(f"Submission complete") + logging.info("Starting submission") + response = submit(metadata, sequences, branch) + logging.info("Submission complete") json.dump(response, open(output_ids, 'w'), indent=4) logging.info(f"IDs written to {output_ids}") if mode == 'approve': - logging.info(f"Approving sequences") - response = approve() + logging.info("Approving sequences") + response = approve(branch) logging.debug(f"Approved: {response}") - logging.info(f"Approving sequences complete") + logging.info("Approving sequences complete") if __name__ == '__main__': - submit_to_loculus() \ No newline at end of file + submit_to_loculus()