From e3161789eba25ef285f91d70c772d5c549c198c9 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Tue, 20 Aug 2024 20:13:25 +0200 Subject: [PATCH 01/19] Update config, remove deployment, add revoke cronjob --- ingest/scripts/prepare_files.py | 5 +- .../loculus/templates/ingest-deployment.yaml | 111 ++++++++---------- 2 files changed, 55 insertions(+), 61 deletions(-) diff --git a/ingest/scripts/prepare_files.py b/ingest/scripts/prepare_files.py index 468a6444b..ac2813128 100644 --- a/ingest/scripts/prepare_files.py +++ b/ingest/scripts/prepare_files.py @@ -13,6 +13,7 @@ @dataclass class Config: + organism: str segmented: str nucleotide_sequences: list[str] slack_hook: str @@ -52,8 +53,8 @@ def revocation_notification(config: Config, to_revoke: dict[str, dict[str, str]] text = ( f"{config.backend_url}: Ingest pipeline wants to add the following sequences" f" which will lead to revocations: {to_revoke}. " - "If you agree with this run the regroup_and_revoke rule in the ingest pod:" - " `kubectl exec -it INGEST_POD_NAME -- snakemake regroup_and_revoke`." + "If you agree with this manually run the regroup_and_revoke cronjob:" + f" `kubectl create job --from=cronjob/loculus-revoke-and-regroup-cronjob-{config.organism} `." ) notify(config, text) diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index 3147c52c1..c2fb26204 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -3,74 +3,66 @@ {{- range $key, $value := (.Values.organisms | default .Values.defaultOrganisms) }} {{- if $value.ingest }} --- -apiVersion: apps/v1 -kind: Deployment +apiVersion: batch/v1 +kind: CronJob metadata: - name: loculus-ingest-{{ $key }} - annotations: - argocd.argoproj.io/sync-options: Replace=true - reloader.stakater.com/auto: "true" + name: loculus-ingest-cronjob-{{ $key }} spec: - replicas: 1 - selector: - matchLabels: - app: loculus - component: loculus-ingest-{{ $key }} - template: - metadata: - labels: - app: loculus - component: loculus-ingest-{{ $key }} + schedule: "*/1 * * * *" # ingest every minute but forbid concurrency have jobs run only for Values.ingestLimitSeconds + startingDeadlineSeconds: 60 + concurrencyPolicy: Forbid + jobTemplate: spec: - containers: - - name: ingest-{{ $key }} - image: {{ $value.ingest.image}}:{{ $dockerTag }} - imagePullPolicy: Always - resources: - requests: - memory: "80Mi" - cpu: "10m" - limits: - memory: "10Gi" - env: - - name: KEYCLOAK_INGEST_PASSWORD - valueFrom: - secretKeyRef: - name: service-accounts - key: insdcIngestUserPassword - - name: NCBI_API_KEY - valueFrom: - secretKeyRef: - name: ingest-ncbi - key: api-key - - name: SLACK_HOOK - valueFrom: - secretKeyRef: - name: slack-notifications - key: slack-hook - args: - - snakemake - - results/approved - - results/submitted # Remove in production, see #1777 - - results/revised # Remove in production, see #1777 - - --all-temp # Reduce disk usage by not keeping files around - {{- if $value.ingest.configFile }} - volumeMounts: + activeDeadlineSeconds: {{ $.Values.ingestLimitSeconds }} + template: + metadata: + labels: + app: loculus + component: loculus-ingest-cronjob-{{ $key }} + annotations: + argocd.argoproj.io/sync-options: Replace=true + reloader.stakater.com/auto: "true" + spec: + restartPolicy: Never + containers: + - name: ingest-{{ $key }} + image: {{ $value.ingest.image}}:{{ $dockerTag }} + imagePullPolicy: Always + resources: + requests: + memory: "1Gi" + cpu: "200m" + limits: + cpu: "200m" + memory: "10Gi" + env: + - name: KEYCLOAK_INGEST_PASSWORD + valueFrom: + secretKeyRef: + name: service-accounts + key: insdcIngestUserPassword + args: + - snakemake + - results/submitted + - results/revised + - --all-temp # Reduce disk usage by not keeping files around + {{- if $value.ingest.configFile }} + volumeMounts: + - name: loculus-ingest-config-volume-{{ $key }} + mountPath: /package/config/config.yaml + subPath: config.yaml + volumes: - name: loculus-ingest-config-volume-{{ $key }} - mountPath: /package/config/config.yaml - subPath: config.yaml - volumes: - - name: loculus-ingest-config-volume-{{ $key }} - configMap: - name: loculus-ingest-config-{{ $key }} - {{- end }} + configMap: + name: loculus-ingest-config-{{ $key }} + {{- end }} --- apiVersion: batch/v1 kind: CronJob metadata: - name: loculus-ingest-cronjob-{{ $key }} + name: loculus-revoke-and-regroup-cronjob-{{ $key }} spec: - schedule: "*/30 * * * *" # ingest every 30 minutes (not more often to be kind to NCBI) + schedule: "0 0 31 2 *" # Never runs without manual trigger startingDeadlineSeconds: 60 concurrencyPolicy: Forbid jobTemplate: @@ -107,6 +99,7 @@ spec: - snakemake - results/submitted - results/revised + - results/revoked - --all-temp # Reduce disk usage by not keeping files around {{- if $value.ingest.configFile }} volumeMounts: From bca1e6f31dff723691aff9be4e493d8f87d9fba9 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 09:02:04 +0200 Subject: [PATCH 02/19] Check if having the startingDeadlineSeconds half the length of the schedule frequency helps --- kubernetes/loculus/templates/ingest-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index c2fb26204..acc48ee8b 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -8,7 +8,7 @@ kind: CronJob metadata: name: loculus-ingest-cronjob-{{ $key }} spec: - schedule: "*/1 * * * *" # ingest every minute but forbid concurrency have jobs run only for Values.ingestLimitSeconds + schedule: "*/2 * * * *" # ingest every 2 minutes but forbid concurrency, have jobs run only for Values.ingestLimitSeconds startingDeadlineSeconds: 60 concurrencyPolicy: Forbid jobTemplate: From 0fb90fd6fc0da0eddef48ac64f1b065966fa195f Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 09:12:15 +0200 Subject: [PATCH 03/19] Add approval step --- kubernetes/loculus/templates/ingest-deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index acc48ee8b..f5e533ea9 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -45,6 +45,7 @@ spec: - snakemake - results/submitted - results/revised + - results/approved - --all-temp # Reduce disk usage by not keeping files around {{- if $value.ingest.configFile }} volumeMounts: From 820e3ed7058234322afa0de13b52dcd4e022d41c Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 09:12:59 +0200 Subject: [PATCH 04/19] Also add approve to regroup and revoke cronjob --- kubernetes/loculus/templates/ingest-deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index f5e533ea9..42458e8ea 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -101,6 +101,7 @@ spec: - results/submitted - results/revised - results/revoked + - results/approved - --all-temp # Reduce disk usage by not keeping files around {{- if $value.ingest.configFile }} volumeMounts: From bf6a70e5eaa7bdf8ed127a58e289b6a0ef3d6131 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 11:17:41 +0200 Subject: [PATCH 05/19] Test out timeout --- kubernetes/loculus/templates/ingest-deployment.yaml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index 42458e8ea..e680fc712 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -97,12 +97,11 @@ spec: name: service-accounts key: insdcIngestUserPassword args: - - snakemake - - results/submitted - - results/revised - - results/revoked - - results/approved - - --all-temp # Reduce disk usage by not keeping files around + - timeout + - 25m + - bash + - -c + - "snakemake results/submitted results/revised results/revoked results/approved --all-temp" # Reduce disk usage by not keeping files around {{- if $value.ingest.configFile }} volumeMounts: - name: loculus-ingest-config-volume-{{ $key }} From b31923a6a66f23f9eeb82b3e56cbb2b68163f035 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 11:26:43 +0200 Subject: [PATCH 06/19] Add timeout only to approve --- ingest/Snakefile | 4 ++-- kubernetes/loculus/templates/ingest-deployment.yaml | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index 3d583a944..03a6f5611 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -446,8 +446,8 @@ rule approve: log_level=LOG_LEVEL, shell: """ - python {input.script} \ + timeout 25m bash -c "python {input.script} \ --mode approve \ --config-file {input.config} \ - --log-level {params.log_level} \ + --log-level {params.log_level}" \ """ diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index e680fc712..42458e8ea 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -97,11 +97,12 @@ spec: name: service-accounts key: insdcIngestUserPassword args: - - timeout - - 25m - - bash - - -c - - "snakemake results/submitted results/revised results/revoked results/approved --all-temp" # Reduce disk usage by not keeping files around + - snakemake + - results/submitted + - results/revised + - results/revoked + - results/approved + - --all-temp # Reduce disk usage by not keeping files around {{- if $value.ingest.configFile }} volumeMounts: - name: loculus-ingest-config-volume-{{ $key }} From 7eb29de215a14abf97225420a7e8c9d2e1754ff0 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 12:29:30 +0200 Subject: [PATCH 07/19] force replacement of cronjobs when sync is called --- kubernetes/loculus/templates/ingest-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index 42458e8ea..49500eb6c 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -20,7 +20,7 @@ spec: app: loculus component: loculus-ingest-cronjob-{{ $key }} annotations: - argocd.argoproj.io/sync-options: Replace=true + argocd.argoproj.io/sync-options: Force=true,Replace=true reloader.stakater.com/auto: "true" spec: restartPolicy: Never From 0ba9133982eb6fa569bfbd244e88959b6cbb7db3 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 13:08:20 +0200 Subject: [PATCH 08/19] check if removing restartpolicy will let jobs be removed by argocd --- kubernetes/loculus/templates/ingest-deployment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index 49500eb6c..e856bda52 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -23,7 +23,6 @@ spec: argocd.argoproj.io/sync-options: Force=true,Replace=true reloader.stakater.com/auto: "true" spec: - restartPolicy: Never containers: - name: ingest-{{ $key }} image: {{ $value.ingest.image}}:{{ $dockerTag }} From f761c708c9a86686e465d411c5fe7c76076520d7 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 13:18:53 +0200 Subject: [PATCH 09/19] Add back restart policy and do not throw error on timeout --- ingest/Snakefile | 5 ++++- kubernetes/loculus/templates/ingest-deployment.yaml | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index 03a6f5611..eb5dbb542 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -25,6 +25,7 @@ COLUMN_MAPPING = config["column_mapping"] LOG_LEVEL = config.get("log_level", "INFO") NCBI_API_KEY = os.getenv("NCBI_API_KEY") FILTER_FASTA_HEADERS = config.get("filter_fasta_headers", None) +APPROVE_TIMEOUT = config.get("approve_timeout", '25m') def rename_columns(input_file, output_file, mapping=COLUMN_MAPPING): @@ -444,9 +445,11 @@ rule approve: approved=touch("results/approved"), params: log_level=LOG_LEVEL, + approve_timeout=APPROVE_TIMEOUT, shell: """ - timeout 25m bash -c "python {input.script} \ + timeout --preserve-status -s SIGTERM {params.approve_timeout} \ + bash -c "python {input.script} \ --mode approve \ --config-file {input.config} \ --log-level {params.log_level}" \ diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index e856bda52..49500eb6c 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -23,6 +23,7 @@ spec: argocd.argoproj.io/sync-options: Force=true,Replace=true reloader.stakater.com/auto: "true" spec: + restartPolicy: Never containers: - name: ingest-{{ $key }} image: {{ $value.ingest.image}}:{{ $dockerTag }} From dca0767a45a6dbed6912986276375a490253e170 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 13:22:00 +0200 Subject: [PATCH 10/19] lower approve_timeout for debugging --- ingest/Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index eb5dbb542..8df92aaf4 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -25,7 +25,7 @@ COLUMN_MAPPING = config["column_mapping"] LOG_LEVEL = config.get("log_level", "INFO") NCBI_API_KEY = os.getenv("NCBI_API_KEY") FILTER_FASTA_HEADERS = config.get("filter_fasta_headers", None) -APPROVE_TIMEOUT = config.get("approve_timeout", '25m') +APPROVE_TIMEOUT = config.get("approve_timeout", '1m') def rename_columns(input_file, output_file, mapping=COLUMN_MAPPING): From 8b932a52280efac7453715d23b1ce2c3646535c3 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:16:02 +0200 Subject: [PATCH 11/19] Just add approve_timeout to the python script --- ingest/Snakefile | 9 +++++---- ingest/scripts/call_loculus.py | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index 8df92aaf4..585749d23 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -25,7 +25,7 @@ COLUMN_MAPPING = config["column_mapping"] LOG_LEVEL = config.get("log_level", "INFO") NCBI_API_KEY = os.getenv("NCBI_API_KEY") FILTER_FASTA_HEADERS = config.get("filter_fasta_headers", None) -APPROVE_TIMEOUT = config.get("approve_timeout", '1m') +APPROVE_TIMEOUT = config.get("approve_timeout", '1') #time in minutes def rename_columns(input_file, output_file, mapping=COLUMN_MAPPING): @@ -448,9 +448,10 @@ rule approve: approve_timeout=APPROVE_TIMEOUT, shell: """ - timeout --preserve-status -s SIGTERM {params.approve_timeout} \ - bash -c "python {input.script} \ + python {input.script} \ --mode approve \ --config-file {input.config} \ - --log-level {params.log_level}" \ + --log-level {params.log_level} \ + --approve-timeout {params.approve_timeout} + touch results/approved """ diff --git a/ingest/scripts/call_loculus.py b/ingest/scripts/call_loculus.py index 0d10390ed..6b7a37c80 100644 --- a/ingest/scripts/call_loculus.py +++ b/ingest/scripts/call_loculus.py @@ -3,6 +3,7 @@ import os from collections import defaultdict from dataclasses import dataclass +from datetime import datetime, timedelta from http import HTTPMethod from pathlib import Path from time import sleep @@ -10,6 +11,7 @@ import click import jsonlines +import pytz import requests import yaml @@ -21,6 +23,8 @@ datefmt="%H:%M:%S", ) +_start_time: datetime | None = None + @dataclass class Config: @@ -413,10 +417,18 @@ def get_submitted(config: Config): required=False, type=click.Path(exists=True), ) -def submit_to_loculus(metadata, sequences, mode, log_level, config_file, output, revoke_map): +@click.option( + "--approve-timeout", + required=False, + type=int, +) +def submit_to_loculus( + metadata, sequences, mode, log_level, config_file, output, revoke_map, approve_timeout +): """ Submit data to Loculus. """ + global _start_time logger.setLevel(log_level) logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) @@ -449,10 +461,14 @@ def record_factory(*args, **kwargs): if mode == "approve": while True: + if not _start_time: + _start_time = datetime.now(tz=pytz.utc) logger.info("Approving sequences") response = approve(config) logger.info(f"Approved: {len(response)} sequences") sleep(30) + if datetime.now(tz=pytz.utc) - timedelta(minutes=approve_timeout) > _start_time: + break if mode == "regroup-and-revoke": try: From 36f0f51472fdacb727fc8749018f1cee9bde338e Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:23:27 +0200 Subject: [PATCH 12/19] Remove double touch --- ingest/Snakefile | 1 - 1 file changed, 1 deletion(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index 585749d23..80093b6d6 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -453,5 +453,4 @@ rule approve: --config-file {input.config} \ --log-level {params.log_level} \ --approve-timeout {params.approve_timeout} - touch results/approved """ From 9097e9ea4198fd19cf018a72cc4a3274e899ff1d Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:34:03 +0200 Subject: [PATCH 13/19] Increase approve timeout to 5min for testing --- ingest/Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index 80093b6d6..81d688671 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -25,7 +25,7 @@ COLUMN_MAPPING = config["column_mapping"] LOG_LEVEL = config.get("log_level", "INFO") NCBI_API_KEY = os.getenv("NCBI_API_KEY") FILTER_FASTA_HEADERS = config.get("filter_fasta_headers", None) -APPROVE_TIMEOUT = config.get("approve_timeout", '1') #time in minutes +APPROVE_TIMEOUT = config.get("approve_timeout", '5') #time in minutes def rename_columns(input_file, output_file, mapping=COLUMN_MAPPING): From 6b61d26f71355bc14268ae0946f6d1ef7b0621a7 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:37:05 +0200 Subject: [PATCH 14/19] force a sync by adding an echo statement --- ingest/Snakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/ingest/Snakefile b/ingest/Snakefile index 81d688671..1c3adb773 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -453,4 +453,5 @@ rule approve: --config-file {input.config} \ --log-level {params.log_level} \ --approve-timeout {params.approve_timeout} + echo "testing argocd can kill pods" """ From dd2a1f85fdffcd1c1d13d7fc4503aa1e2badaa08 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:48:34 +0200 Subject: [PATCH 15/19] Remove reloader.stakater.com/auto: "true" just to check --- kubernetes/loculus/templates/ingest-deployment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/kubernetes/loculus/templates/ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml index 49500eb6c..aca1c48ba 100644 --- a/kubernetes/loculus/templates/ingest-deployment.yaml +++ b/kubernetes/loculus/templates/ingest-deployment.yaml @@ -21,7 +21,6 @@ spec: component: loculus-ingest-cronjob-{{ $key }} annotations: argocd.argoproj.io/sync-options: Force=true,Replace=true - reloader.stakater.com/auto: "true" spec: restartPolicy: Never containers: From eeac2b4b180df286e82ac3914f7fb111824c888d Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:53:34 +0200 Subject: [PATCH 16/19] Remove echo see if we can force sync --- ingest/Snakefile | 1 - 1 file changed, 1 deletion(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index 1c3adb773..81d688671 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -453,5 +453,4 @@ rule approve: --config-file {input.config} \ --log-level {params.log_level} \ --approve-timeout {params.approve_timeout} - echo "testing argocd can kill pods" """ From 9c29689cb9830d2b8dd9f572427a762660b10462 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Wed, 21 Aug 2024 15:28:11 +0200 Subject: [PATCH 17/19] Empty commit From 4912db7f428939ed8ba8744b51aeafa7a35609ad Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Wed, 21 Aug 2024 15:34:43 +0200 Subject: [PATCH 18/19] Try out 60min --- ingest/Snakefile | 19 +++++++++++++------ ingest/config/defaults.yaml | 1 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/ingest/Snakefile b/ingest/Snakefile index 81d688671..a464a2f5f 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -25,7 +25,7 @@ COLUMN_MAPPING = config["column_mapping"] LOG_LEVEL = config.get("log_level", "INFO") NCBI_API_KEY = os.getenv("NCBI_API_KEY") FILTER_FASTA_HEADERS = config.get("filter_fasta_headers", None) -APPROVE_TIMEOUT = config.get("approve_timeout", '5') #time in minutes +APPROVE_TIMEOUT_MIN = config.get("approve_timeout_min") # time in minutes def rename_columns(input_file, output_file, mapping=COLUMN_MAPPING): @@ -59,7 +59,7 @@ rule fetch_ncbi_dataset_package: dataset_package="results/ncbi_dataset.zip", params: taxon_id=TAXON_ID, - api_key=NCBI_API_KEY + api_key=NCBI_API_KEY, shell: """ datasets download virus genome taxon {params.taxon_id} \ @@ -114,7 +114,7 @@ rule extract_ncbi_dataset_sequences: > {output.ncbi_dataset_sequences} """ - + rule calculate_sequence_hashes: """Output JSON: {insdc_accession: md5_sequence_hash, ...}""" input: @@ -131,7 +131,9 @@ rule calculate_sequence_hashes: --output-sequences {output.sequence_json} """ + if FILTER_FASTA_HEADERS: + rule filter_fasta_headers: input: sequences="results/sequences.fasta", @@ -154,7 +156,11 @@ if FILTER_FASTA_HEADERS: rule align: input: - sequences="results/sequences_filtered.fasta" if FILTER_FASTA_HEADERS else "results/sequences.fasta", + sequences=( + "results/sequences_filtered.fasta" + if FILTER_FASTA_HEADERS + else "results/sequences.fasta" + ), output: results="results/nextclade_{segment}.tsv", params: @@ -412,6 +418,7 @@ rule revise: fi """ + rule regroup_and_revoke: input: script="scripts/call_loculus.py", @@ -445,12 +452,12 @@ rule approve: approved=touch("results/approved"), params: log_level=LOG_LEVEL, - approve_timeout=APPROVE_TIMEOUT, + approve_timeout_min=APPROVE_TIMEOUT_MIN, shell: """ python {input.script} \ --mode approve \ --config-file {input.config} \ --log-level {params.log_level} \ - --approve-timeout {params.approve_timeout} + --approve-timeout {params.approve_timeout_min} """ diff --git a/ingest/config/defaults.yaml b/ingest/config/defaults.yaml index 6f7facb24..c2a916f1a 100644 --- a/ingest/config/defaults.yaml +++ b/ingest/config/defaults.yaml @@ -75,3 +75,4 @@ username: insdc_ingest_user password: insdc_ingest_user keycloak_client_id: backend-client subsample_fraction: 1.0 +approve_timeout_min: "60" \ No newline at end of file From 9f1f938a843ed44bde88c6d191f36c5b61bcdb8f Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Thu, 22 Aug 2024 10:12:23 +0200 Subject: [PATCH 19/19] Set approve timeout back to 25min. --- ingest/config/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingest/config/defaults.yaml b/ingest/config/defaults.yaml index c2a916f1a..e77117381 100644 --- a/ingest/config/defaults.yaml +++ b/ingest/config/defaults.yaml @@ -75,4 +75,4 @@ username: insdc_ingest_user password: insdc_ingest_user keycloak_client_id: backend-client subsample_fraction: 1.0 -approve_timeout_min: "60" \ No newline at end of file +approve_timeout_min: "25" # Cronjobs run every 30min, make approve stop before it is forced to stop by argocd