From a2dfb6d795484f45dcfc361a66407d2e0ad567a8 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Tue, 22 Oct 2024 11:28:26 -0700 Subject: [PATCH 1/6] nextstrain_profiles: Remove unused `use_nextalign` param The config param `use_nextalign` has not been used since --- nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml | 1 - nextstrain_profiles/nextstrain-gisaid/builds.yaml | 1 - nextstrain_profiles/nextstrain-open/builds.yaml | 1 - 3 files changed, 3 deletions(-) diff --git a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml index 1431850a7..d9d9c3968 100644 --- a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml @@ -18,7 +18,6 @@ slack_token: ~ slack_channel: "#ncov-gisaid-updates" genes: ["ORF1a", "ORF1b", "S", "ORF3a", "E", "M", "ORF6", "ORF7a", "ORF7b", "ORF8", "N", "ORF9b"] -use_nextalign: true include_hcov19_prefix: True files: diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml index d80b7f521..03dce7edf 100644 --- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml @@ -17,7 +17,6 @@ slack_token: ~ slack_channel: "#ncov-gisaid-updates" genes: ["ORF1a", "ORF1b", "S", "ORF3a", "E", "M", "ORF6", "ORF7a", "ORF7b", "ORF8", "N", "ORF9b"] -use_nextalign: true include_hcov19_prefix: True files: diff --git a/nextstrain_profiles/nextstrain-open/builds.yaml b/nextstrain_profiles/nextstrain-open/builds.yaml index 7c19b72eb..9d4e60aec 100644 --- a/nextstrain_profiles/nextstrain-open/builds.yaml +++ b/nextstrain_profiles/nextstrain-open/builds.yaml @@ -17,7 +17,6 @@ slack_token: ~ slack_channel: "#ncov-genbank-updates" genes: ["ORF1a", "ORF1b", "S", "ORF3a", "E", "M", "ORF6", "ORF7a", "ORF7b", "ORF8", "N", "ORF9b"] -use_nextalign: true include_hcov19_prefix: False files: From bd35325442c6f954273fa1aa5cc182a7f4b33b77 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Tue, 22 Oct 2024 12:15:44 -0700 Subject: [PATCH 2/6] Remove unused mutations summary rules & script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit removes the rules `mutation_summary` and `build_mutation_summary`, both of which called the removed script `scripts/mutation_summary.py`. I was looking into updating the script to work with Nextclade v3 outputs but it seems like the rules using the script are not actively used in the Snakemake workflow! In Feb 2021, the script and rule `mutation_summary` were added and the only use of the output `results/mutation_summary_{origin}.tsv.xz` was to be uploaded it as an intermediate file of the builds.¹ In Jan 2022, the upload of the intermediate file was removed² and I cannot find any other uses of the file so the rule `mutation_summary` seems unused. In March 2021, the rule `build_mutation_summary` was added to support the rule `add_mutation_counts`.³ However, `add_mutation_counts` was later updated to use `augur distance`⁴ which does not require the mutation summary output. I cannot find any other uses of the output file `results/{build_name}/mutation_summary.tsv`, so the rule `build_mutation_summary` seems unused. ¹ ² ³ --- docs/src/reference/workflow-config-file.rst | 4 +- scripts/mutation_summary.py | 106 ------------------ .../snakemake_rules/export_for_nextstrain.smk | 32 ------ workflow/snakemake_rules/main_workflow.smk | 28 ----- 4 files changed, 2 insertions(+), 168 deletions(-) delete mode 100644 scripts/mutation_summary.py diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst index e317ab36b..489274785 100644 --- a/docs/src/reference/workflow-config-file.rst +++ b/docs/src/reference/workflow-config-file.rst @@ -515,7 +515,7 @@ alignment_reference - type: string - description: Path to a FASTA-formatted sequence to use for alignment with ``nextalign`` - default: ``defaults/reference_seq.fasta`` -- used in rules: ``align``, ``proximity_score`` (subsampling), ``build_align``, ``build_mutation_summary`` +- used in rules: ``align``, ``proximity_score`` (subsampling), ``build_align`` annotation ~~~~~~~~~~ @@ -523,7 +523,7 @@ annotation - type: string - description: Path to a GFF-formatted annotation of gene coordinates (e.g., a “gene map”) for use by ``nextalign`` and mutation summaries. - default: ``defaults/annotation.gff`` -- used in rules: ``align``, ``build_align``, ``build_mutation_summary`` +- used in rules: ``align``, ``build_align`` outgroup ~~~~~~~~ diff --git a/scripts/mutation_summary.py b/scripts/mutation_summary.py deleted file mode 100644 index a1db4def4..000000000 --- a/scripts/mutation_summary.py +++ /dev/null @@ -1,106 +0,0 @@ -import argparse, os, glob -from augur.io import open_file -from Bio import SeqIO, SeqFeature, Seq -from Bio.SeqIO.FastaIO import SimpleFastaParser -import numpy as np -import pandas as pd - -def read_reference(fname, genemap): - try: - ref = str(SeqIO.read(fname, 'fasta').seq) - except: - with open(fname, 'r') as fh: - ref = "".join([x.strip() for x in fh]) - - translations = {} - with open(genemap, 'r') as fh: - for line in fh: - if line[0]=='#': - continue - entries = [x.strip() for x in line.strip().split('\t')] - start = int(entries[3]) - end = int(entries[4]) - strand = entries[6] - attributes = {x.split('=')[0]:'='.join(x.split('=')[1:]) for x in entries[8].split(';')} - if 'gene_name' in attributes: - name = attributes['gene_name'].strip('"') - else: - name = None - translation = Seq.translate(SeqFeature.SeqFeature(SeqFeature.FeatureLocation(start-1, end, strand=-1 if strand=='-' else 1)).extract(ref)) - translations[name] = str(translation) - - return {"nuc":ref, "translations":translations} - -def summarise_differences(ref, query, isAA): - """ - Summarise the differences between a provided reference and a query - (both of which are numpy arrays with dtype int8) - Returns a string of comma-seperated mutations - """ - # in int8: A = 65 T = 84 C = 67 G = 71 N = 78 - = 45 X = 88 - ambiguous = 88 if isAA else 78 # 'X' or 'N' - # replace all leading and trailing gaps with the ambiguous character - idx_not_gaps = np.where(query!=45)[0] # 45 is '-' (gap) - if idx_not_gaps.size: - query[0:idx_not_gaps[0]] = ambiguous - query[idx_not_gaps[-1]+1:len(query)] = ambiguous - else: - # the query is nothing but gaps! We don't report any mutations here - return "" - # sometimes the query length is longer than the reference. In this case we preserve previous behavior - # by discarding extra characters in the query - if query.size>ref.size: - query = query[0:ref.size] - # find indicies where the query differs from the reference, and is not ambiguous - changes = np.logical_and(ref!=query, query!=ambiguous).nonzero()[0] - # save these as a comma-seperated list of , where the base (position) is 1-based - return ",".join([f"{chr(ref[idx])}{idx+1}{chr(query[idx])}" for idx in changes]) - -def to_numpy_array(input_string): - return np.frombuffer(input_string.upper().encode('utf-8'), dtype=np.int8).copy() - -def to_mutations(aln_file, ref, aa=False): - res = {} - ref_array = to_numpy_array(ref) - with open_file(aln_file, 'r') as fh: - for name, seq in SimpleFastaParser(fh): - res[name] = summarise_differences(ref_array, to_numpy_array(seq), aa) - return res - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="transform nextalign output to sparse format", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--directory', type=str, required=True, help="directory with nextalign output") - parser.add_argument('--alignment', type=str, required=False, help="nucleotide alignment (if not part of default pattern)") - parser.add_argument('--insertions', type=str, required=False, help="insertions (if not part of default pattern)") - parser.add_argument('--basename', type=str, required=True, help="output pattern") - parser.add_argument('--reference', type=str, required=True, help="reference sequence") - parser.add_argument('--genes', nargs="+", required=True, help="list of gene names to summarize mutations for") - parser.add_argument('--genemap', type=str, required=True, help="annotation") - parser.add_argument('--output', type=str, required=True, help="output tsv file") - args = parser.parse_args() - - res = read_reference(args.reference, args.genemap) - ref = res['nuc'] - translations = res['translations'] - - nucleotide_alignment = args.alignment or os.path.join(args.directory, args.basename+'.aligned.fasta*') - insertions = os.path.join(args.directory, args.basename+'.insertions.csv') - - genes = set(args.genes) - gene_files = glob.glob(os.path.join(args.directory, args.basename+'.gene.*.fasta*')) - - compressed = {} - res = to_mutations(nucleotide_alignment, ref) - compressed = {'nucleotide_mutations': pd.DataFrame(res.values(), index=res.keys(), columns=['nucleotide'])} - for fname in gene_files: - # Find the gene name in the current gene file, since the filename may have multiple suffixes. - gene = (set(fname.split('.')) & genes).pop() - res = to_mutations(fname, translations[gene], aa=True) - compressed[gene] = pd.DataFrame(res.values(), index=res.keys(), columns=[gene]) - - res = pd.concat(compressed.values(), axis=1) - res.to_csv(args.output, sep='\t') diff --git a/workflow/snakemake_rules/export_for_nextstrain.smk b/workflow/snakemake_rules/export_for_nextstrain.smk index 723d00804..b9a8f19bd 100644 --- a/workflow/snakemake_rules/export_for_nextstrain.smk +++ b/workflow/snakemake_rules/export_for_nextstrain.smk @@ -93,38 +93,6 @@ rule export_all_regions: """ -rule mutation_summary: - message: "Summarizing {input.alignment}" - input: - alignment = rules.align.output.alignment, - insertions = rules.align.output.insertions, - translations = rules.align.output.translations, - reference = config["files"]["alignment_reference"], - genemap = config["files"]["annotation"] - output: - mutation_summary = "results/mutation_summary_{origin}.tsv.xz" - log: - "logs/mutation_summary_{origin}.txt" - benchmark: - "benchmarks/mutation_summary_{origin}.txt" - params: - outdir = "results/translations", - basename = "seqs_{origin}", - genes=config["genes"], - conda: config["conda_environment"] - shell: - r""" - python3 scripts/mutation_summary.py \ - --alignment {input.alignment} \ - --insertions {input.insertions} \ - --directory {params.outdir} \ - --basename {params.basename} \ - --reference {input.reference} \ - --genes {params.genes:q} \ - --genemap {input.genemap} \ - --output {output.mutation_summary} 2>&1 | tee {log} - """ - # # Rule for generating a per-build auspice config # diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index 4b3f92f12..c50c2f792 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -936,34 +936,6 @@ rule translate: --output {output.node_data} 2>&1 | tee {log} """ -rule build_mutation_summary: - message: "Summarizing {input.alignment}" - input: - alignment = rules.build_align.output.alignment, - insertions = rules.build_align.output.insertions, - translations = rules.build_align.output.translations, - reference = config["files"]["alignment_reference"], - genemap = config["files"]["annotation"] - output: - mutation_summary = "results/{build_name}/mutation_summary.tsv" - log: - "logs/mutation_summary_{build_name}.txt" - params: - outdir = "results/{build_name}/translations", - basename = "aligned" - conda: config["conda_environment"] - shell: - r""" - python3 scripts/mutation_summary.py \ - --alignment {input.alignment} \ - --insertions {input.insertions} \ - --directory {params.outdir} \ - --basename {params.basename} \ - --reference {input.reference} \ - --genemap {input.genemap} \ - --output {output.mutation_summary} 2>&1 | tee {log} - """ - rule distances: input: tree = rules.refine.output.tree, From 852eaf8d9837d04b71e0fd83b9c96f5b9db95ee3 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 24 Oct 2024 11:11:20 -0700 Subject: [PATCH 3/6] Remove unused outputs for `align` The only downstream use of the insertions and translations outputs of `align` rule was the `mutation_summary` rule which was removed in the previous commit. Even though this is no longer doing translations, we are still keeping the `annotation` config param and the `defaults/annotation.gff` file for codon-aware nucleotide alignment. --- docs/src/reference/workflow-config-file.rst | 4 ++-- workflow/snakemake_rules/main_workflow.smk | 11 +++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst index 489274785..4c3ef0754 100644 --- a/docs/src/reference/workflow-config-file.rst +++ b/docs/src/reference/workflow-config-file.rst @@ -521,9 +521,9 @@ annotation ~~~~~~~~~~ - type: string -- description: Path to a GFF-formatted annotation of gene coordinates (e.g., a “gene map”) for use by ``nextalign`` and mutation summaries. +- description: Path to a GFF-formatted annotation of gene coordinates (e.g., a “gene map”) for use by ``nextalign`` for codon-aware alignment. - default: ``defaults/annotation.gff`` -- used in rules: ``align``, ``build_align`` +- used in rules: ``align`` outgroup ~~~~~~~~ diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index c50c2f792..e5f7858a9 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -65,15 +65,13 @@ rule align: """ input: sequences = lambda wildcards: _get_path_for_input("sequences", wildcards.origin), + # Annotation used for codon-aware nucleotide alignment + # genemap = config["files"]["annotation"], reference = config["files"]["alignment_reference"] output: alignment = "results/aligned_{origin}.fasta.xz", - insertions = "results/insertions_{origin}.tsv", - translations = expand("results/translations/seqs_{{origin}}.gene.{gene}.fasta.xz", gene=config.get('genes', ['S'])) params: - output_translations = lambda w: f"results/translations/seqs_{w.origin}.gene.{{gene}}.fasta", - output_translations_toxz = "results/translations/seqs_{origin}.gene.*.fasta", strain_prefixes=config["strip_strain_prefixes"], # Strip the compression suffix for the intermediate output from the aligner. uncompressed_alignment=lambda wildcards, output: Path(output.alignment).with_suffix(""), @@ -96,11 +94,8 @@ rule align: --jobs={threads} \ --reference {input.reference} \ --genemap {input.genemap} \ - --output-translations {params.output_translations} \ - --output-fasta {params.uncompressed_alignment} \ - --output-insertions {output.insertions} > {log} 2>&1; + --output-fasta {params.uncompressed_alignment} > {log} 2>&1; xz -2 -T {threads} {params.uncompressed_alignment}; - xz -2 -T {threads} {params.output_translations_toxz} """ def _get_subsampling_settings(wildcards): From 8d80e6f9a661d9a21e72147dfc06c061a1c3b9f5 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 23 Oct 2024 09:50:04 -0700 Subject: [PATCH 4/6] Migrate to Nextclade v3 Major change includes the removal of the insertions.csv output from the `build_align` rule. This output was only used by the `build_mutation_summary` rule, which was removed in a previous commit. Note: Using `nextclade` instead of `nextclade3` to allow the workflow to continue to work with Snakemake's `--use-conda` interface. --- workflow/envs/nextstrain.yaml | 3 +-- workflow/snakemake_rules/main_workflow.smk | 20 +++++++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/workflow/envs/nextstrain.yaml b/workflow/envs/nextstrain.yaml index 080c872f1..bf59f7072 100644 --- a/workflow/envs/nextstrain.yaml +++ b/workflow/envs/nextstrain.yaml @@ -7,8 +7,7 @@ dependencies: - augur=22.4.0 - epiweeks=2.1.2 - iqtree=2.2.0.3 - - nextalign=2.14.0 - - nextclade=2.14.0 + - nextclade=3.9.0 - pangolin=3.1.20 - pangolearn=2022.01.20 - python>=3.8* diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index e5f7858a9..4e9ac00a4 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -67,7 +67,7 @@ rule align: sequences = lambda wildcards: _get_path_for_input("sequences", wildcards.origin), # Annotation used for codon-aware nucleotide alignment # - genemap = config["files"]["annotation"], + annotation = config["files"]["annotation"], reference = config["files"]["alignment_reference"] output: alignment = "results/aligned_{origin}.fasta.xz", @@ -90,10 +90,10 @@ rule align: --sequences {input.sequences} \ --strip-prefixes {params.strain_prefixes:q} \ --output /dev/stdout 2> {params.sanitize_log} \ - | nextalign run \ + | nextclade run \ --jobs={threads} \ - --reference {input.reference} \ - --genemap {input.genemap} \ + --input-ref {input.reference} \ + --input-annotation {input.annotation} \ --output-fasta {params.uncompressed_alignment} > {log} 2>&1; xz -2 -T {threads} {params.uncompressed_alignment}; """ @@ -461,8 +461,8 @@ rule prepare_nextclade: conda: config["conda_environment"] shell: r""" - nextclade2 --version - nextclade2 dataset get --name {params.name} --output-zip {output.nextclade_dataset} + nextclade --version + nextclade dataset get --name {params.name} --output-zip {output.nextclade_dataset} """ rule build_align: @@ -476,14 +476,13 @@ rule build_align: nextclade_dataset = "data/sars-cov-2-nextclade-defaults.zip", output: alignment = "results/{build_name}/aligned.fasta", - insertions = "results/{build_name}/insertions.tsv", nextclade_qc = 'results/{build_name}/nextclade_qc.tsv', translations = expand("results/{{build_name}}/translations/aligned.gene.{gene}.fasta", gene=config.get('genes', ['S'])) params: outdir = "results/{build_name}/translations", strain_prefixes=config["strip_strain_prefixes"], sanitize_log="logs/sanitize_sequences_before_nextclade_{build_name}.txt", - output_translations = lambda w: f"results/{w.build_name}/translations/aligned.gene.{{gene}}.fasta" + output_translations = lambda w: f"results/{w.build_name}/translations/aligned.gene.{{cds}}.fasta" log: "logs/align_{build_name}.txt" benchmark: @@ -498,13 +497,12 @@ rule build_align: --sequences {input.sequences} \ --strip-prefixes {params.strain_prefixes:q} \ --output /dev/stdout 2> {params.sanitize_log} \ - | nextclade2 run \ + | nextclade run \ --jobs {threads} \ --input-dataset {input.nextclade_dataset} \ --output-tsv {output.nextclade_qc} \ --output-fasta {output.alignment} \ - --output-translations {params.output_translations} \ - --output-insertions {output.insertions} 2>&1 | tee {log} + --output-translations {params.output_translations} 2>&1 | tee {log} """ rule join_metadata_and_nextclade_qc: From ef94e27a4e779dbe58b2675e9bfae62c789f0c9d Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 23 Oct 2024 14:14:52 -0700 Subject: [PATCH 5/6] docs: replace `nextalign` with `nextclade` --- defaults/parameters.yaml | 2 +- docs/src/reference/workflow-config-file.rst | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/defaults/parameters.yaml b/defaults/parameters.yaml index 0e16b8bd9..472831243 100644 --- a/defaults/parameters.yaml +++ b/defaults/parameters.yaml @@ -71,7 +71,7 @@ files: mutational_fitness_distance_map: "defaults/mutational_fitness_distance_map.json" sites_to_mask: "defaults/sites_ignored_for_tree_topology.txt" -# Define genes to translate during alignment by nextalign. +# Define genes to translate during alignment by nextclade. genes: ["ORF1a", "ORF1b", "S", "ORF3a", "E", "M", "ORF6", "ORF7a", "ORF7b", "ORF8", "N", "ORF9b"] # Filter settings diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst index 4c3ef0754..60a54d823 100644 --- a/docs/src/reference/workflow-config-file.rst +++ b/docs/src/reference/workflow-config-file.rst @@ -431,7 +431,7 @@ genes ----- - type: array -- description: A list of genes for which ``nextalign`` should generate amino acid sequences during the alignment process. Gene names must match the names provided in the gene map from the ``annotation`` parameter. +- description: A list of genes for which ``nextclade`` should generate amino acid sequences during the alignment process. Gene names must match the names provided in the gene map from the ``annotation`` parameter. - default: ``["ORF1a", "ORF1b", "S", "ORF3a", "M", "N"]`` - used in rules: ``align``, ``build_align``, ``translate``, ``mutational_fitness`` @@ -513,15 +513,15 @@ alignment_reference ~~~~~~~~~~~~~~~~~~~ - type: string -- description: Path to a FASTA-formatted sequence to use for alignment with ``nextalign`` +- description: Path to a FASTA-formatted sequence to use for alignment with ``nextclade`` - default: ``defaults/reference_seq.fasta`` -- used in rules: ``align``, ``proximity_score`` (subsampling), ``build_align`` +- used in rules: ``align``, ``proximity_score`` (subsampling) annotation ~~~~~~~~~~ - type: string -- description: Path to a GFF-formatted annotation of gene coordinates (e.g., a “gene map”) for use by ``nextalign`` for codon-aware alignment. +- description: Path to a GFF-formatted annotation of gene coordinates (e.g., a “gene map”) for use by ``nextclade`` for codon-aware alignment. - default: ``defaults/annotation.gff`` - used in rules: ``align`` From f269e69b4921f93c37a0964accd4bdfc418ea1c5 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 23 Oct 2024 15:33:33 -0700 Subject: [PATCH 6/6] Update changelog for update to Nextclade v3 --- docs/src/reference/change_log.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/src/reference/change_log.md b/docs/src/reference/change_log.md index ddb6334d8..8d5f61194 100644 --- a/docs/src/reference/change_log.md +++ b/docs/src/reference/change_log.md @@ -5,6 +5,10 @@ We also use this change log to document new features that maintain backward comp ## New features since last version update +## v14 (23 October 2024) + +- 23 October 2024: Update workflow to use Nextclade v3. This includes the removal of unused mutation summary script and rules that expected Nextclade v2 outputs. Dropping the mutation summary rules removed the need for the full alignment rule `align` to produce the insertions and translations outputs, so they have been removed. The `build_align` rule no longer produces a separate `insertions.tsv` since insertions are now included in the `nextclade_qc.tsv`. [PR 1160](https://github.com/nextstrain/ncov/pull/1160) + - 2 October 2024: Include a new parameter for `clade_recency` under `colors`. This parameter is used to define which clades should receive a color from the standard rainbow palette. A value of `6M` will cause clades with strains in the tree sampled within the last 6 months to be colored and earlier strains to not receive a color (and be colored in a palette of grays by Auspice). This `clade_recency` parameter is used in `builds.yaml` in `nextstrain_profiles` to color clades according for the `1m`, `2m`, `6m` and `all-time` timepoints. If `clade_recency` is not supplied then all clades will be colored. [PR 1132](https://github.com/nextstrain/ncov/pull/1132) - 30 September 2024: Use population-based weighted sampling for `nextstrain_profiles`. This requires a minimum Augur version of 25.3.0. PRs [1106](https://github.com/nextstrain/ncov/pull/1106), [1150](https://github.com/nextstrain/ncov/pull/1150), [1151](https://github.com/nextstrain/ncov/pull/1151)