Skip to content

Commit

Permalink
fix: include mehari cmd and version info in VCF header (#673)
Browse files Browse the repository at this point in the history
  • Loading branch information
tedil authored Jan 24, 2025
1 parent 1d3c230 commit 8b9fa2e
Showing 1 changed file with 42 additions and 2 deletions.
44 changes: 42 additions & 2 deletions src/annotate/seqvars/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Annotation of sequence variants.
use std::collections::{HashMap, HashSet};
use std::env;
use std::fmt::Display;
use std::fs::File;
use std::io::{Cursor, Read, Write};
Expand All @@ -16,6 +17,8 @@ use biocommons_bioutils::assemblies::Assembly;
use clap::{Args as ClapArgs, Parser};
use flate2::write::GzEncoder;
use flate2::Compression;
use itertools::Itertools;
use noodles::vcf::header::record::key;

Check warning on line 21 in src/annotate/seqvars/mod.rs

View workflow job for this annotation

GitHub Actions / clippy

unused import: `noodles::vcf::header::record::key`

warning: unused import: `noodles::vcf::header::record::key` --> src/annotate/seqvars/mod.rs:21:5 | 21 | use noodles::vcf::header::record::key; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: `#[warn(unused_imports)]` on by default
use noodles::vcf::header::record::value::map::format::Number as FormatNumber;
use noodles::vcf::header::record::value::map::format::Type as FormatType;
use noodles::vcf::header::record::value::map::info::Number;
Expand Down Expand Up @@ -207,7 +210,7 @@ pub struct PathOutput {
pub path_output_tsv: Option<String>,
}

fn build_header(header_in: &VcfHeader) -> VcfHeader {
fn build_header(header_in: &VcfHeader, additional_records: &[(String, String)]) -> VcfHeader {
let mut header_out = header_in.clone();

header_out.infos_mut().insert(
Expand Down Expand Up @@ -326,6 +329,15 @@ fn build_header(header_in: &VcfHeader) -> VcfHeader {
Map::<Info>::new(Number::Count(1), InfoType::String, "ClinVar VCV accession"),
);

for (key, value) in additional_records {
header_out
.insert(
key.parse().expect("invalid key"),
noodles::vcf::header::record::Value::from(value.as_ref()),
)
.unwrap();
}

header_out
}

Expand Down Expand Up @@ -1410,6 +1422,24 @@ struct Annotator {
annotators: Vec<AnnotatorEnum>,
}

impl Annotator {
fn versions_for_vcf_header(&self) -> Vec<(String, String)> {
// TODO also extract version information for frequencies and clinvar

let tx_db_version = self
.annotators
.iter()
.filter_map(|a| match a {
AnnotatorEnum::Consequence(a) => a.predictor.data_version(),
_ => None,
})
.next();
tx_db_version
.map(|v| vec![("mehariTxDbVersion".to_string(), v)])
.unwrap_or_default()
}
}

pub struct FrequencyAnnotator {
db: DBWithThreadMode<MultiThreaded>,
}
Expand Down Expand Up @@ -1865,7 +1895,6 @@ async fn run_with_writer(
let mut reader = open_variant_reader(&args.path_input_vcf).await?;

let mut header_in = reader.read_header().await?;
let header_out = build_header(&header_in);

// Work around glnexus issue with RNC.
if let Some(format) = header_in.formats_mut().get_mut("RNC") {
Expand All @@ -1883,6 +1912,11 @@ async fn run_with_writer(
tracing::info!("Determined input assembly to be {:?}", &assembly);

let annotator = setup_annotator(args)?;
let mut additional_header_info = annotator.versions_for_vcf_header();
additional_header_info.push(("mehariCmd".into(), env::args().join(" ")));
additional_header_info.push(("mehariVersion".into(), env!("CARGO_PKG_VERSION").into()));

let header_out = build_header(&header_in, &additional_header_info);

// Perform the VCF annotation.
tracing::info!("Annotating VCF ...");
Expand Down Expand Up @@ -2037,6 +2071,12 @@ mod test {
run(&args_common, &args).await?;

let actual = std::fs::read_to_string(args.output.path_output_vcf.unwrap())?;
// remove vcf header lines starting with ##mehari
let actual = actual
.lines()
.filter(|line| !line.starts_with("##mehari"))
.collect::<Vec<_>>()
.join("\n");
insta::assert_snapshot!(actual);

Ok(())
Expand Down

0 comments on commit 8b9fa2e

Please sign in to comment.