Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
corneliusroemer committed Mar 19, 2024
1 parent b746876 commit 8190535
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 44 deletions.
18 changes: 16 additions & 2 deletions ingest/Snakefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
TAXON_ID = 10244 # Mpox
BRANCH = "cfgbl-prepro"
NUMBER_OF_SAMPLES = 500


rule all:
Expand Down Expand Up @@ -56,7 +58,13 @@ rule rename_columns:
ncbi_dataset_tsv="data/metadata.tsv",
shell:
"""
sed '1s/Accession/submissionId/; 1s/Isolate Collection date/collection_date/' \
sed '1s/Accession/submissionId/;\
1s/Isolate Collection date/collection_date/; \
1s/Release date/ncbi_release_date/; \
1s/Geographic Location/country/; \
1s/Submitter Affiliation/author_affiliation/; \
1s/Submitter Names/authors/; \
1s/Isolate Lineage/isolate_name/;' \
{input.ncbi_dataset_tsv} \
> {output.ncbi_dataset_tsv}
"""
Expand All @@ -71,7 +79,7 @@ rule subsample:
ncbi_dataset_sequences="data/sequences_sample.fasta",
strains="data/strains_sample.txt",
params:
number_of_samples=100,
number_of_samples=NUMBER_OF_SAMPLES,
shell:
"""
tsv-sample -H -n {params.number_of_samples} \
Expand All @@ -93,19 +101,25 @@ rule submit_to_loculus:
submit_config="config/submit_config.yaml",
output:
ids="result/loculus_ids.tsv",
params:
branch=BRANCH,
shell:
"""
python scripts/submit_to_loculus.py \
--mode submit \
--metadata {input.metadata} \
--branch {params.branch} \
--sequences {input.sequences} \
--output-ids {output.ids}
"""


rule approve:
params:
branch=BRANCH,
shell:
"""
python scripts/submit_to_loculus.py \
--mode approve
--branch {params.branch}
"""
96 changes: 54 additions & 42 deletions ingest/scripts/submit_to_loculus.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import click
import json
import requests
import logging

BRANCH="main"
import click
import requests

BRANCH="demo-prepro"
GROUP_NAME="insdc_ingest_group"
USERNAME="insdc_ingest_user"
PASSWORD="insdc_ingest_user"
Expand All @@ -14,7 +15,10 @@

# Create the ingest user on instance start in kubernetes/loculus/templates/keycloak-config-map.yaml

def get_jwt(username, password):
def backend_url(branch):
return f"https://backend-{branch}.loculus.org"

def get_jwt(username, password, branch):
"""
Get a JWT token for the given username and password
"""
Expand All @@ -27,17 +31,19 @@ def get_jwt(username, password):
}
headers = {'Content-Type': 'application/x-www-form-urlencoded'}

response = requests.post(KEYCLOAK_TOKEN_URL, data=data, headers=headers)
keycloak_token_url = f"https://authentication-{branch}.loculus.org/realms/loculus/protocol/openid-connect/token"

response = requests.post(keycloak_token_url, data=data, headers=headers)
response.raise_for_status()

jwt_keycloak = response.json()
jwt = jwt_keycloak['access_token']
return jwt

def create_group(group_name):
def create_group(group_name, branch):
# Create the ingest group
url = f"{BACKEND_URL}/groups"
token = get_jwt(USERNAME, PASSWORD)
url = f"{backend_url(branch)}/groups"
token = get_jwt(USERNAME, PASSWORD, branch)
group_name = group_name

headers = {
Expand All @@ -47,6 +53,16 @@ def create_group(group_name):

data = {
"groupName": group_name,
"institution": "NA",
"address": {
"line1": "1234 Loculus Street",
"line2": "NA",
"city": "Dortmund",
"state": "NRW",
"postalCode": "12345",
"country": "Germany"
},
"contactEmail": "something@loculus.org"
}

response = requests.post(url, json=data, headers=headers)
Expand All @@ -55,17 +71,18 @@ def create_group(group_name):
print("Group already exists")
# raise if not 409 and not happy 2xx
elif not response.ok:
print(f"Error creating group: {response.json()}")
response.raise_for_status()

def submit(metadata, sequences):
def submit(metadata, sequences, branch):
"""
Submit data to Loculus.
"""

jwt = get_jwt(USERNAME, PASSWORD)
jwt = get_jwt(USERNAME, PASSWORD, branch)

# Endpoint URL
url = f'{BACKEND_URL}/{ORGANISM}/submit'
url = f'{backend_url(branch)}/{ORGANISM}/submit'

# Headers with Bearer Authentication
headers = {
Expand Down Expand Up @@ -94,16 +111,16 @@ def submit(metadata, sequences):

return response.json()

def approve():
def approve(branch):
"""
Get sequences that were preprocessed successfully and approve them.
1. Get the ids of the sequences that were preprocessed successfully
/ORGANISM/get-sequences
2. Approve the sequences
"""
jwt = get_jwt(USERNAME, PASSWORD)
jwt = get_jwt(USERNAME, PASSWORD, branch)

url = f'{BACKEND_URL}/{ORGANISM}/get-sequences'
url = f'{backend_url(branch)}/{ORGANISM}/get-sequences'


# Headers with Bearer Authentication
Expand All @@ -116,69 +133,64 @@ def approve():
response = requests.get(url, headers=headers)
response.raise_for_status()

# Get sequences to approve
# Roughly of this shape: {'accession': '182', 'version': 1, 'status': 'AWAITING_APPROVAL', 'isRevocation': False},
to_approve = []
for sequence in response.json():
# Get sequences where status is AWAITING_APPROVAL
# Approve them by adding them to list with {'accession': '182', 'version': 1}
if sequence['status'] == 'AWAITING_APPROVAL':
to_approve.append({'accession': sequence['accession'], 'version': sequence['version']})
# logging.info(f"Response: {response.json()}")

payload = {"accessionVersions": to_approve}
# # Get sequences to approve
# # Roughly of this shape: {'accession': '182', 'version': 1, 'status': 'AWAITING_APPROVAL', 'isRevocation': False},
# to_approve = []
# for sequence in response.json()["sequenceEntries"]:
# # Get sequences where status is AWAITING_APPROVAL
# # Approve them by adding them to list with {'accession': '182', 'version': 1}
# if sequence['status'] == 'AWAITING_APPROVAL':
# to_approve.append({'accession': sequence['accession'], 'version': sequence['version']})

url = f'{BACKEND_URL}/{ORGANISM}/approve-processed-data'
payload = {"scope": "ALL"}

url = f'{backend_url(branch)}/{ORGANISM}/approve-processed-data'

response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()

return to_approve





# Submit


# %%

@click.command()
@click.option('--metadata', required=False, type=click.Path(exists=True), help='Path to the metadata file')
@click.option('--sequences', required=False, type=click.Path(exists=True), help='Path to the sequences file')
@click.option('--branch', required=False, type=click.STRING, help='Branch to submit to', default="main")
@click.option('--output-ids', required=False, type=click.Path(), help='Path to the output IDs file')
@click.option('--mode', required=True, type=click.Choice(['submit', 'approve']), help='Mode to run in')
@click.option('--log-level', default='INFO', type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), help='Log level')
# @click.option('--submit-config', required=True, type=click.Path(exists=True), help='Path to the submit configuration file')
# def submit_to_loculus(metadata, sequences, submit_config, output_ids):
def submit_to_loculus(metadata, sequences, output_ids, mode, log_level):
def submit_to_loculus(metadata, sequences, branch: str, output_ids, mode, log_level):
"""
Submit data to Loculus.
"""
logging.basicConfig(level=log_level)
if mode == 'submit':
logging.info(f"Submitting to Loculus")
logging.info("Submitting to Loculus")
logging.debug(f"Args: {metadata}, {sequences}, {output_ids}")
# Create group if it doesn't exist
logging.info(f"Creating group {GROUP_NAME}")
create_group(GROUP_NAME)
create_group(GROUP_NAME, branch)
logging.info(f"Group {GROUP_NAME} created")

# Submit
logging.info(f"Starting submission")
response = submit(metadata, sequences)
logging.info(f"Submission complete")
logging.info("Starting submission")
response = submit(metadata, sequences, branch)
logging.info("Submission complete")

json.dump(response, open(output_ids, 'w'), indent=4)
logging.info(f"IDs written to {output_ids}")

if mode == 'approve':
logging.info(f"Approving sequences")
response = approve()
logging.info("Approving sequences")
response = approve(branch)
logging.debug(f"Approved: {response}")

logging.info(f"Approving sequences complete")
logging.info("Approving sequences complete")


if __name__ == '__main__':
submit_to_loculus()
submit_to_loculus()

0 comments on commit 8190535

Please sign in to comment.