diff --git a/ingest/Snakefile b/ingest/Snakefile index 97e010c..2569ce6 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -25,3 +25,8 @@ rule clean: """ rm -rfv {params.targets} """ + +# Import custom rules provided via the config. +if "custom_rules" in config: + for rule_file in config["custom_rules"]: + include: rule_file diff --git a/ingest/build-configs/nextstrain-automation/README.md b/ingest/build-configs/nextstrain-automation/README.md deleted file mode 100644 index 9175569..0000000 --- a/ingest/build-configs/nextstrain-automation/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# Nextstrain automation - -> [!NOTE] -> External users can ignore this directory! -> This build config/customization is tailored for the internal Nextstrain team -> to extend the core ingest workflow for automated workflows. - -## Update the config - -Update the [config.yaml][] for your pathogen: - -1. Edit the `s3_dst` param to add the pathogen repository name. -2. Edit the `files_to_upload` param to a mapping of files you need to - upload for your pathogen. - -The default includes suggested files for uploading curated data and -Nextclade outputs. - -## Run the workflow - -Provide the additional config file to the Snakemake options in order -to include the custom rules from [upload.smk][] in the workflow. -Specify the `upload_all` target in order to run the additional upload -rules. - -The upload rules will require AWS credentials for a user that has -permissions to upload to the Nextstrain data bucket. - -The customized workflow can be run from the top level pathogen repo -directory with: - -```bash -nextstrain build \ - --env AWS_ACCESS_KEY_ID \ - --env AWS_SECRET_ACCESS_KEY \ - ingest \ - upload_all \ - --configfile build-configs/nextstrain-automation/config.yaml -``` - -## Automated GitHub Action workflows - -Additional instructions on how to use this with the shared -`pathogen-repo-build` GitHub Action workflow to come! - -[config.yaml]: ./config.yaml -[upload.smk]: ./upload.smk diff --git a/ingest/build-configs/nextstrain-automation/config.yaml b/ingest/build-configs/nextstrain-automation/config.yaml index c24191c..aefd2fa 100644 --- a/ingest/build-configs/nextstrain-automation/config.yaml +++ b/ingest/build-configs/nextstrain-automation/config.yaml @@ -1,9 +1,5 @@ -# This configuration file should contain all required configuration -# parameters for the ingest workflow to run with additional Nextstrain -# automation rules. - -# Custom rules to run as part of the Nextstrain automated workflow The -# paths should be relative to the ingest directory. +# Custom rules to run as part of the Nextstrain automated workflow. +# The paths should be relative to the ingest directory. custom_rules: - build-configs/nextstrain-automation/upload.smk @@ -13,7 +9,7 @@ custom_rules: cloudfront_domain: "data.nextstrain.org" # Nextstrain AWS S3 Bucket with pathogen prefix -s3_dst: "s3://nextstrain-data/files/workflows/seasonal-cov" +s3_dst: "s3://nextstrain-data/files/workflows/yellow-fever" # Mapping of files to upload files_to_upload: @@ -21,4 +17,3 @@ files_to_upload: metadata.tsv.zst: results/metadata.tsv sequences.fasta.zst: results/sequences.fasta alignments.fasta.zst: results/alignment.fasta - translations.zip: results/translations.zip diff --git a/ingest/build-configs/nextstrain-automation/upload.smk b/ingest/build-configs/nextstrain-automation/upload.smk index cf1c2c5..0c21284 100644 --- a/ingest/build-configs/nextstrain-automation/upload.smk +++ b/ingest/build-configs/nextstrain-automation/upload.smk @@ -28,7 +28,7 @@ rule upload_to_s3: s3_dst=config["s3_dst"], cloudfront_domain=config["cloudfront_domain"], shell: - """ + r""" ./vendored/upload-to-s3 \ {params.quiet} \ {input.file_to_upload:q} \ @@ -39,6 +39,9 @@ rule upload_to_s3: rule upload_all: input: - uploads=[f"results/upload/{remote_file}.upload" for remote_file in config["files_to_upload"].keys()], + uploads=[ + f"results/upload/{remote_file}.upload" + for remote_file in config["files_to_upload"].keys() + ], output: touch("results/upload_all.done"),