diff --git a/openapi.schema.yaml b/openapi.schema.yaml index 132a81a6..f91bdac3 100644 --- a/openapi.schema.yaml +++ b/openapi.schema.yaml @@ -56,6 +56,58 @@ paths: application/json: schema: $ref: '#/components/schemas/CustomError' + /api/v1/seqvars/clinvar: + get: + tags: + - seqvars_clinvar + summary: Query for ClinVar information of a variant. + operationId: seqvarsClinvar + parameters: + - name: genome_release + in: query + description: The assembly. + required: true + schema: + $ref: '#/components/schemas/GenomeRelease' + - name: chromosome + in: query + description: SPDI sequence. + required: true + schema: + type: string + - name: position + in: query + description: SPDI position. + required: true + schema: + type: integer + format: int32 + minimum: 0 + - name: reference + in: query + description: SPDI deletion. + required: true + schema: + type: string + - name: alternative + in: query + description: SPDI insertion. + required: true + schema: + type: string + responses: + '200': + description: Clinvar information. + content: + application/json: + schema: + $ref: '#/components/schemas/ClinvarResponse' + '500': + description: Internal server error. + content: + application/json: + schema: + $ref: '#/components/schemas/CustomError' /api/v1/seqvars/csq: get: tags: @@ -115,6 +167,58 @@ paths: application/json: schema: $ref: '#/components/schemas/CustomError' + /api/v1/seqvars/frequency: + get: + tags: + - seqvars_frequencies + summary: Query for gnomAD frequencies of a variant. + operationId: seqvarsFrequency + parameters: + - name: genome_release + in: query + description: The assembly. + required: true + schema: + $ref: '#/components/schemas/GenomeRelease' + - name: chromosome + in: query + description: SPDI sequence. + required: true + schema: + type: string + - name: position + in: query + description: SPDI position. + required: true + schema: + type: integer + format: int32 + minimum: 0 + - name: reference + in: query + description: SPDI deletion. + required: true + schema: + type: string + - name: alternative + in: query + description: SPDI insertion. + required: true + schema: + type: string + responses: + '200': + description: Frequency information. + content: + application/json: + schema: + $ref: '#/components/schemas/FrequencyResponse' + '500': + description: Internal server error. + content: + application/json: + schema: + $ref: '#/components/schemas/CustomError' /api/v1/strucvars/csq: get: tags: @@ -198,6 +302,98 @@ components: enum: - grch37 - grch38 + AutosomalResultEntry: + type: object + required: + - gnomad_exomes_an + - gnomad_exomes_hom + - gnomad_exomes_het + - gnomad_genomes_an + - gnomad_genomes_hom + - gnomad_genomes_het + properties: + gnomad_exomes_an: + type: integer + format: int32 + minimum: 0 + gnomad_exomes_hom: + type: integer + format: int32 + minimum: 0 + gnomad_exomes_het: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_an: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_hom: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_het: + type: integer + format: int32 + minimum: 0 + ClinvarQuery: + type: object + description: Query parameters of the `/api/v1/seqvars/clinvar` endpoint. + required: + - genome_release + - chromosome + - position + - reference + - alternative + properties: + genome_release: + $ref: '#/components/schemas/GenomeRelease' + chromosome: + type: string + description: SPDI sequence. + position: + type: integer + format: int32 + description: SPDI position. + minimum: 0 + reference: + type: string + description: SPDI deletion. + alternative: + type: string + description: SPDI insertion. + ClinvarResponse: + type: object + description: Response of the `/api/v1/seqvars/clinvar` endpoint. + required: + - version + - query + - result + properties: + version: + $ref: '#/components/schemas/VersionsInfoResponse' + query: + $ref: '#/components/schemas/ClinvarQuery' + result: + type: array + items: + $ref: '#/components/schemas/ClinvarResultEntry' + description: The resulting records for the scored genes. + ClinvarResultEntry: + type: object + description: One entry in `ClinvarResponse`. + required: + - clinvar_vcv + - clinvar_germline_classification + properties: + clinvar_vcv: + type: array + items: + type: string + clinvar_germline_classification: + type: array + items: + type: string Consequence: type: string description: Putative impact. @@ -339,6 +535,70 @@ components: value: type: string description: Enum for `AnnField::feature_type`. + FrequencyQuery: + type: object + description: Query parameters of the `/api/v1/seqvars/frequency` endpoint. + required: + - genome_release + - chromosome + - position + - reference + - alternative + properties: + genome_release: + $ref: '#/components/schemas/GenomeRelease' + chromosome: + type: string + description: SPDI sequence. + position: + type: integer + format: int32 + description: SPDI position. + minimum: 0 + reference: + type: string + description: SPDI deletion. + alternative: + type: string + description: SPDI insertion. + FrequencyResponse: + type: object + description: Response of the `/api/v1/seqvars/frequency` endpoint. + required: + - version + - query + - result + properties: + version: + $ref: '#/components/schemas/VersionsInfoResponse' + query: + $ref: '#/components/schemas/FrequencyQuery' + result: + type: array + items: + $ref: '#/components/schemas/FrequencyResultEntry' + description: The resulting records for the scored genes. + FrequencyResultEntry: + oneOf: + - type: object + required: + - Autosomal + properties: + Autosomal: + $ref: '#/components/schemas/AutosomalResultEntry' + - type: object + required: + - Gonosomal + properties: + Gonosomal: + $ref: '#/components/schemas/GonosomalResultEntry' + - type: object + required: + - Mitochondrial + properties: + Mitochondrial: + $ref: '#/components/schemas/MitochondrialResultEntry' + description: One entry in `FrequencyResponse`. GenesTranscriptsListQuery: type: object description: Query arguments for the `/api/v1/genes/transcripts` endpoint. @@ -412,6 +672,50 @@ components: enum: - grch37 - grch38 + GonosomalResultEntry: + type: object + required: + - gnomad_exomes_an + - gnomad_exomes_hom + - gnomad_exomes_het + - gnomad_exomes_hemi + - gnomad_genomes_an + - gnomad_genomes_hom + - gnomad_genomes_het + - gnomad_genomes_hemi + properties: + gnomad_exomes_an: + type: integer + format: int32 + minimum: 0 + gnomad_exomes_hom: + type: integer + format: int32 + minimum: 0 + gnomad_exomes_het: + type: integer + format: int32 + minimum: 0 + gnomad_exomes_hemi: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_an: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_hom: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_het: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_hemi: + type: integer + format: int32 + minimum: 0 Message: type: string description: A message to be used in `AnnField::messages`. @@ -426,6 +730,40 @@ components: - info_realign_three_prime - info_compound_annotation - info_non_reference_annotation + MitochondrialResultEntry: + type: object + required: + - helix_an + - helix_hom + - helix_het + - gnomad_genomes_an + - gnomad_genomes_hom + - gnomad_genomes_het + properties: + helix_an: + type: integer + format: int32 + minimum: 0 + helix_hom: + type: integer + format: int32 + minimum: 0 + helix_het: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_an: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_hom: + type: integer + format: int32 + minimum: 0 + gnomad_genomes_het: + type: integer + format: int32 + minimum: 0 Pos: type: object description: Position, optionally with total length. @@ -519,6 +857,7 @@ components: - feature_id - feature_biotype - feature_tag + - strand properties: consequences: type: array @@ -574,6 +913,10 @@ components: format: int32 description: Distance to feature. nullable: true + strand: + type: integer + format: int32 + description: Strand of the alignment messages: type: array items: diff --git a/src/annotate/cli.rs b/src/annotate/cli.rs new file mode 100644 index 00000000..ed76b084 --- /dev/null +++ b/src/annotate/cli.rs @@ -0,0 +1,123 @@ +use clap::Args as ClapArgs; +use strum::{Display, VariantArray}; + +#[derive(Debug, ClapArgs)] +#[group(required = true, multiple = true)] +pub struct Sources { + /// Transcript database containing the transcript information. + /// + /// Pre-built databases are available at https://github.com/varfish-org/mehari-data-tx/releases + #[arg(long)] + pub transcripts: Option>, + + /// Frequency database. + /// + /// The frequency database contains gnomAD frequencies for the variants. + /// Pre-built databases are available at TODO + #[arg(long)] + pub frequencies: Option>, + + /// ClinVar database. + /// + /// The ClinVar database contains clinical significance information for the variants. + /// Pre-built databases are available at https://github.com/varfish-org/annonars-data-clinvar/releases + #[arg(long)] + pub clinvar: Option>, +} + +#[derive(Debug, ClapArgs, Default, Clone)] +pub struct TranscriptSettings { + /// The transcript source. + #[arg(long, value_enum, default_value_t = TranscriptSource::Both)] + pub transcript_source: TranscriptSource, + + /// Whether to report only the most severe consequence, grouped by gene, transcript, or allele. + #[arg(long)] + pub report_most_severe_consequence_by: Option, + + /// Which kind of transcript to pick / restrict to. Default is not to pick at all. + /// + /// Depending on `--pick-transcript-mode`, if multiple transcripts match the selection, + /// either the first one is kept or all are kept. + #[arg(long)] + pub pick_transcript: Vec, + + /// Determines how to handle multiple transcripts. Default is to keep all. + /// + /// When transcript picking is enabled via `--pick-transcript`, + /// either keep the first one found or keep all that match. + #[arg(long, default_value = "all")] + pub pick_transcript_mode: TranscriptPickMode, +} + +#[derive( + Debug, + Copy, + Clone, + PartialEq, + Eq, + PartialOrd, + Ord, + Display, + clap::ValueEnum, + VariantArray, + parse_display::FromStr, +)] +pub enum ConsequenceBy { + Gene, + Transcript, + // or "Variant"? + Allele, +} + +#[derive( + Debug, + Copy, + Clone, + PartialEq, + Eq, + PartialOrd, + Ord, + Display, + clap::ValueEnum, + VariantArray, + parse_display::FromStr, +)] +pub enum TranscriptPickType { + ManeSelect, + ManePlusClinical, + Length, + EnsemblCanonical, + RefSeqSelect, + GencodePrimary, + Basic, +} + +#[derive(Debug, Copy, Clone, Display, clap::ValueEnum, Default)] +pub enum TranscriptPickMode { + #[default] + First, + All, +} + +/// Enum that allows to select the transcript source. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + Default, + serde::Deserialize, + serde::Serialize, + clap::ValueEnum, +)] +pub enum TranscriptSource { + /// ENSEMBL + Ensembl, + /// RefSeq + RefSeq, + /// Both + #[default] + Both, +} diff --git a/src/annotate/mod.rs b/src/annotate/mod.rs index 302471c2..70fad2fe 100644 --- a/src/annotate/mod.rs +++ b/src/annotate/mod.rs @@ -4,6 +4,7 @@ use noodles::vcf::header::FileFormat; use noodles::vcf::variant::record::samples::series::value::genotype::Phasing; use noodles::vcf::variant::record_buf::samples::sample::value::Genotype; +pub(crate) mod cli; pub mod seqvars; pub mod strucvars; diff --git a/src/annotate/seqvars/csq.rs b/src/annotate/seqvars/csq.rs index a905c38f..9a32a8b8 100644 --- a/src/annotate/seqvars/csq.rs +++ b/src/annotate/seqvars/csq.rs @@ -1,4 +1,9 @@ //! Compute molecular consequence of variants. +use super::{ + ann::{Allele, AnnField, Consequence, FeatureBiotype, FeatureType, Pos, Rank, SoFeature}, + provider::Provider as MehariProvider, +}; +use crate::annotate::cli::{ConsequenceBy, TranscriptSource}; use crate::pbs::txs::{GenomeAlignment, Strand, TranscriptBiotype, TranscriptTag}; use enumflags2::BitFlags; use hgvs::parser::{NoRef, ProteinEdit, UncertainLengthChange}; @@ -14,12 +19,6 @@ use std::cmp::Ordering; use std::ops::Range; use std::{collections::HashMap, sync::Arc}; -use super::{ - ann::{Allele, AnnField, Consequence, FeatureBiotype, FeatureType, Pos, Rank, SoFeature}, - provider::Provider as MehariProvider, - ConsequenceBy, -}; - /// A variant description how VCF would do it. #[derive(Debug, PartialEq, Eq, Clone, Default)] pub struct VcfVariant { @@ -33,28 +32,6 @@ pub struct VcfVariant { pub alternative: String, } -/// Enum that allows to select the transcript source. -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - Default, - serde::Deserialize, - serde::Serialize, - clap::ValueEnum, -)] -pub enum TranscriptSource { - /// ENSEMBL - Ensembl, - /// RefSeq - RefSeq, - /// Both - #[default] - Both, -} - /// Configuration for consequence prediction. #[derive(Debug, Clone, derive_builder::Builder)] #[builder(pattern = "immutable")] @@ -84,7 +61,7 @@ impl Default for Config { pub struct ConsequencePredictor { /// The internal transcript provider for locating transcripts. #[derivative(Debug = "ignore")] - provider: Arc, + pub(crate) provider: Arc, /// Assembly mapper for variant consequence prediction. #[derivative(Debug = "ignore")] mapper: assembly::Mapper, @@ -1261,10 +1238,10 @@ impl ConsequencePredictor { #[cfg(test)] mod test { use super::*; + use crate::annotate::cli::{TranscriptPickType, TranscriptSettings}; use crate::annotate::seqvars::provider::ConfigBuilder as MehariProviderConfigBuilder; use crate::annotate::seqvars::{ load_tx_db, run_with_writer, Args, AsyncAnnotatedVariantWriter, PathOutput, - TranscriptPickType, }; use crate::common::noodles::{open_variant_reader, open_variant_writer, NoodlesVariantReader}; use csv::ReaderBuilder; @@ -1743,10 +1720,11 @@ mod test { path_output_vcf: Some(output.as_ref().to_str().unwrap().into()), path_output_tsv: None, }, - transcript_source: Default::default(), - report_most_severe_consequence_by: Some(ConsequenceBy::Allele), - pick_transcript: vec![TranscriptPickType::ManeSelect], - pick_transcript_mode: Default::default(), + transcript_settings: TranscriptSettings { + report_most_severe_consequence_by: Some(ConsequenceBy::Allele), + pick_transcript: vec![TranscriptPickType::ManeSelect], + ..Default::default() + }, max_var_count: None, hgnc: None, sources: crate::annotate::seqvars::Sources { diff --git a/src/annotate/seqvars/mod.rs b/src/annotate/seqvars/mod.rs index 238662f4..9cf1bd7b 100644 --- a/src/annotate/seqvars/mod.rs +++ b/src/annotate/seqvars/mod.rs @@ -9,6 +9,20 @@ use std::str::FromStr; use std::sync::Arc; use std::time::Instant; +use self::ann::{AnnField, Consequence, FeatureBiotype}; +use crate::annotate::cli::{Sources, TranscriptSettings}; +use crate::annotate::genotype_string; +use crate::annotate::seqvars::csq::{ + ConfigBuilder as ConsequencePredictorConfigBuilder, ConsequencePredictor, VcfVariant, +}; +use crate::annotate::seqvars::provider::{ + ConfigBuilder as MehariProviderConfigBuilder, Provider as MehariProvider, +}; +use crate::common::noodles::{open_variant_reader, open_variant_writer, NoodlesVariantReader}; +use crate::common::{guess_assembly, GenomeRelease}; +use crate::db::merge::merge_transcript_databases; +use crate::pbs::txs::TxSeqDatabase; +use crate::ped::{PedigreeByName, Sex}; use annonars::common::cli::is_canonical; use annonars::common::keys; use annonars::freqs::serialized::{auto, mt, xy}; @@ -18,7 +32,6 @@ use clap::{Args as ClapArgs, Parser}; use flate2::write::GzEncoder; use flate2::Compression; use itertools::Itertools; -use noodles::vcf::header::record::key; use noodles::vcf::header::record::value::map::format::Number as FormatNumber; use noodles::vcf::header::record::value::map::format::Type as FormatType; use noodles::vcf::header::record::value::map::info::Number; @@ -38,25 +51,10 @@ use prost::Message; use rocksdb::{DBWithThreadMode, MultiThreaded}; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; -use strum::{Display, VariantArray}; +use strum::Display; use thousands::Separable; use tokio::io::AsyncWriteExt; -use crate::annotate::genotype_string; -use crate::annotate::seqvars::csq::{ - ConfigBuilder as ConsequencePredictorConfigBuilder, ConsequencePredictor, VcfVariant, -}; -use crate::annotate::seqvars::provider::{ - ConfigBuilder as MehariProviderConfigBuilder, Provider as MehariProvider, -}; -use crate::common::noodles::{open_variant_reader, open_variant_writer, NoodlesVariantReader}; -use crate::common::{guess_assembly, GenomeRelease}; -use crate::db::merge::merge_transcript_databases; -use crate::pbs::txs::TxSeqDatabase; -use crate::ped::{PedigreeByName, Sex}; - -use self::ann::{AnnField, Consequence, FeatureBiotype}; - pub mod ann; pub mod binning; pub mod csq; @@ -94,28 +92,6 @@ pub struct Args { #[command(flatten)] pub output: PathOutput, - /// The transcript source. - #[arg(long, value_enum, default_value_t = csq::TranscriptSource::Both)] - pub transcript_source: csq::TranscriptSource, - - /// Whether to report only the worst consequence for each picked transcript. - #[arg(long)] - pub report_most_severe_consequence_by: Option, - - /// Which kind of transcript to pick / restrict to. Default is not to pick at all. - /// - /// Depending on `--pick-transcript-mode`, if multiple transcripts match the selection, - /// either the first one is kept or all are kept. - #[arg(long)] - pub pick_transcript: Vec, - - /// Determines how to handle multiple transcripts. Default is to keep all. - /// - /// When transcript picking is enabled via `--pick-transcript`, - /// either keep the first one found or keep all that match. - #[arg(long, default_value = "all", requires = "pick_transcript")] - pub pick_transcript_mode: TranscriptPickMode, - /// For debug purposes, maximal number of variants to annotate. #[arg(long)] pub max_var_count: Option, @@ -127,67 +103,10 @@ pub struct Args { /// What to annotate and which source to use. #[command(flatten)] pub sources: Sources, -} - -#[derive( - Debug, - Copy, - Clone, - PartialEq, - Eq, - PartialOrd, - Ord, - Display, - clap::ValueEnum, - VariantArray, - parse_display::FromStr, -)] -pub enum ConsequenceBy { - Gene, - Transcript, - // or "Variant"? - Allele, -} - -#[derive( - Debug, - Copy, - Clone, - PartialEq, - Eq, - PartialOrd, - Ord, - Display, - clap::ValueEnum, - VariantArray, - parse_display::FromStr, -)] -pub enum TranscriptPickType { - ManeSelect, - ManePlusClinical, - Length, - EnsemblCanonical, - RefSeqSelect, - GencodePrimary, - Basic, -} -#[derive(Debug, Copy, Clone, Display, clap::ValueEnum, Default)] -pub enum TranscriptPickMode { - #[default] - First, - All, -} - -#[derive(Debug, ClapArgs)] -#[group(required = true, multiple = true)] -pub struct Sources { - #[arg(long)] - transcripts: Option>, - #[arg(long)] - frequencies: Option, - #[arg(long)] - clinvar: Option, + /// Transcript annotation related settings + #[command(flatten)] + pub transcript_settings: TranscriptSettings, } #[derive(Debug, Display, Copy, Clone, clap::ValueEnum, PartialEq, Eq, parse_display::FromStr)] @@ -1402,7 +1321,7 @@ impl AsyncAnnotatedVariantWriter for VarFishSeqvarTsvWriter { } #[allow(clippy::large_enum_variant)] -enum AnnotatorEnum { +pub(crate) enum AnnotatorEnum { Frequency(FrequencyAnnotator), Clinvar(ClinvarAnnotator), Consequence(ConsequenceAnnotator), @@ -1418,7 +1337,7 @@ impl AnnotatorEnum { } } -struct Annotator { +pub(crate) struct Annotator { annotators: Vec, } @@ -1440,6 +1359,7 @@ impl Annotator { } } +#[derive(Debug)] pub struct FrequencyAnnotator { db: DBWithThreadMode, } @@ -1448,7 +1368,7 @@ impl FrequencyAnnotator { Self { db } } - fn from_path(path: impl AsRef + Display) -> anyhow::Result { + pub(crate) fn from_path(path: impl AsRef + Display) -> anyhow::Result { // Open the frequency RocksDB database in read only mode. tracing::info!("Opening frequency database"); tracing::debug!("RocksDB path = {}", &path); @@ -1511,7 +1431,7 @@ impl FrequencyAnnotator { Ok(()) } - /// Annotate record on gonomosomal chromosome with gnomAD exomes/genomes. + /// Annotate record on gonosomal chromosome with gnomAD exomes/genomes. pub fn annotate_record_xy( &self, key: &[u8], @@ -1521,51 +1441,47 @@ impl FrequencyAnnotator { .db .get_cf(self.db.cf_handle("gonosomal").as_ref().unwrap(), key)? { - let auto_record = xy::Record::from_buf(&freq); + let xy_record = xy::Record::from_buf(&freq); vcf_record.info_mut().insert( "gnomad_exomes_an".into(), - Some(field::Value::Integer(auto_record.gnomad_exomes.an as i32)), + Some(field::Value::Integer(xy_record.gnomad_exomes.an as i32)), ); vcf_record.info_mut().insert( "gnomad_exomes_hom".into(), - Some(field::Value::Integer( - auto_record.gnomad_exomes.ac_hom as i32, - )), + Some(field::Value::Integer(xy_record.gnomad_exomes.ac_hom as i32)), ); vcf_record.info_mut().insert( "gnomad_exomes_het".into(), - Some(field::Value::Integer( - auto_record.gnomad_exomes.ac_het as i32, - )), + Some(field::Value::Integer(xy_record.gnomad_exomes.ac_het as i32)), ); vcf_record.info_mut().insert( "gnomad_exomes_hemi".into(), Some(field::Value::Integer( - auto_record.gnomad_exomes.ac_hemi as i32, + xy_record.gnomad_exomes.ac_hemi as i32, )), ); vcf_record.info_mut().insert( "gnomad_genomes_an".into(), - Some(field::Value::Integer(auto_record.gnomad_genomes.an as i32)), + Some(field::Value::Integer(xy_record.gnomad_genomes.an as i32)), ); vcf_record.info_mut().insert( "gnomad_genomes_hom".into(), Some(field::Value::Integer( - auto_record.gnomad_genomes.ac_hom as i32, + xy_record.gnomad_genomes.ac_hom as i32, )), ); vcf_record.info_mut().insert( "gnomad_genomes_het".into(), Some(field::Value::Integer( - auto_record.gnomad_genomes.ac_het as i32, + xy_record.gnomad_genomes.ac_het as i32, )), ); vcf_record.info_mut().insert( "gnomad_genomes_hemi".into(), Some(field::Value::Integer( - auto_record.gnomad_genomes.ac_hemi as i32, + xy_record.gnomad_genomes.ac_hemi as i32, )), ); }; @@ -1635,8 +1551,98 @@ impl FrequencyAnnotator { } Ok(()) } + + pub(crate) fn annotate_variant( + &self, + vcf_var: &VcfVariant, + ) -> anyhow::Result< + Option, + > { + // Only attempt lookups into RocksDB for canonical contigs. + if !is_canonical(&vcf_var.chromosome) { + return Ok(None); + } + + // Build key for RocksDB database + let vcf_var = keys::Var::from( + &vcf_var.chromosome, + vcf_var.position, + &vcf_var.reference, + &vcf_var.alternative, + ); + let key: Vec = vcf_var.clone().into(); + use crate::server::run::actix_server::seqvars_frequencies::*; + // Annotate with frequency. + if CHROM_AUTO.contains(vcf_var.chrom.as_str()) { + if let Some(freq) = self + .db + .get_cf(self.db.cf_handle("autosomal").as_ref().unwrap(), key)? + { + let val = auto::Record::from_buf(&freq); + Ok(Some(FrequencyResultEntry::Autosomal( + AutosomalResultEntry { + gnomad_exomes_an: val.gnomad_exomes.an, + gnomad_exomes_hom: val.gnomad_exomes.ac_hom, + gnomad_exomes_het: val.gnomad_exomes.ac_het, + gnomad_genomes_an: val.gnomad_genomes.an, + gnomad_genomes_hom: val.gnomad_genomes.ac_hom, + gnomad_genomes_het: val.gnomad_genomes.ac_het, + }, + ))) + } else { + Err(anyhow!("No frequency data found for variant {:?}", vcf_var)) + } + } else if CHROM_XY.contains(vcf_var.chrom.as_str()) { + if let Some(freq) = self + .db + .get_cf(self.db.cf_handle("gonosomal").as_ref().unwrap(), key)? + { + let val = xy::Record::from_buf(&freq); + Ok(Some(FrequencyResultEntry::Gonosomal( + GonosomalResultEntry { + gnomad_exomes_an: val.gnomad_exomes.an, + gnomad_exomes_hom: val.gnomad_exomes.ac_hom, + gnomad_exomes_het: val.gnomad_exomes.ac_het, + gnomad_exomes_hemi: val.gnomad_exomes.ac_hemi, + gnomad_genomes_an: val.gnomad_genomes.an, + gnomad_genomes_hom: val.gnomad_genomes.ac_hom, + gnomad_genomes_het: val.gnomad_genomes.ac_het, + gnomad_genomes_hemi: val.gnomad_genomes.ac_hemi, + }, + ))) + } else { + Err(anyhow!("No frequency data found for variant {:?}", vcf_var)) + } + } else if CHROM_MT.contains(vcf_var.chrom.as_str()) { + if let Some(freq) = self + .db + .get_cf(self.db.cf_handle("mitochondrial").as_ref().unwrap(), key)? + { + let val = mt::Record::from_buf(&freq); + Ok(Some(FrequencyResultEntry::Mitochondrial( + MitochondrialResultEntry { + helix_an: val.helixmtdb.an, + helix_hom: val.helixmtdb.ac_hom, + helix_het: val.helixmtdb.ac_het, + gnomad_genomes_an: val.gnomad_mtdna.an, + gnomad_genomes_hom: val.gnomad_mtdna.ac_hom, + gnomad_genomes_het: val.gnomad_mtdna.ac_het, + }, + ))) + } else { + Err(anyhow!("No frequency data found for variant {:?}", vcf_var)) + } + } else { + tracing::trace!( + "Record @{:?} on non-canonical chromosome, skipping.", + &vcf_var + ); + Ok(None) + } + } } +#[derive(Debug)] pub struct ClinvarAnnotator { db: DBWithThreadMode, } @@ -1668,7 +1674,7 @@ impl ClinvarAnnotator { Self { db } } - fn from_path(path: impl AsRef + Display) -> anyhow::Result { + pub(crate) fn from_path(path: impl AsRef + Display) -> anyhow::Result { tracing::info!("Opening ClinVar database"); tracing::debug!("RocksDB path = {}", &path); let options = rocksdb::Options::default(); @@ -1747,10 +1753,68 @@ impl ClinvarAnnotator { } Ok(()) } + + pub(crate) fn annotate_variant( + &self, + vcf_var: &VcfVariant, + ) -> anyhow::Result> + { + // Only attempt lookups into RocksDB for canonical contigs. + if !is_canonical(&vcf_var.chromosome) { + return Ok(None); + } + + // Build key for RocksDB database + let vcf_var = keys::Var::from( + &vcf_var.chromosome, + vcf_var.position, + &vcf_var.reference, + &vcf_var.alternative, + ); + let key: Vec = vcf_var.clone().into(); + + if let Some(raw_value) = self + .db + .get_cf(self.db.cf_handle("clinvar").as_ref().unwrap(), key)? + { + let record_list = annonars::pbs::clinvar::minimal::ExtractedVcvRecordList::decode( + &mut Cursor::new(&raw_value), + )?; + + let mut clinvar_vcvs = Vec::new(); + let mut clinvar_germline_classifications = Vec::new(); + for clinvar_record in record_list.records.iter() { + let accession = clinvar_record.accession.as_ref().expect("must have VCV"); + let vcv = format!("{}.{}", accession.accession, accession.version); + let classifications = clinvar_record + .classifications + .as_ref() + .expect("must have classifications"); + if let Some(germline_classification) = &classifications.germline_classification { + let description = germline_classification + .description + .as_ref() + .expect("description missing") + .to_string(); + clinvar_vcvs.push(vcv); + clinvar_germline_classifications.push(description); + } + } + + Ok(Some( + crate::server::run::actix_server::seqvars_clinvar::ClinvarResultEntry { + clinvar_vcv: clinvar_vcvs, + clinvar_germline_classification: clinvar_germline_classifications, + }, + )) + } else { + Ok(None) + } + } } -struct ConsequenceAnnotator { - predictor: ConsequencePredictor, +pub(crate) struct ConsequenceAnnotator { + pub(crate) predictor: ConsequencePredictor, } impl ConsequenceAnnotator { @@ -1758,7 +1822,11 @@ impl ConsequenceAnnotator { Self { predictor } } - fn from_db_and_args(tx_db: TxSeqDatabase, args: &Args) -> anyhow::Result { + pub(crate) fn from_db_and_settings( + tx_db: TxSeqDatabase, + transcript_settings: &TranscriptSettings, + ) -> anyhow::Result { + let args = transcript_settings; let provider = Arc::new(MehariProvider::new( tx_db, MehariProviderConfigBuilder::default() @@ -1806,7 +1874,7 @@ impl ConsequenceAnnotator { } impl Annotator { - fn new(annotators: Vec) -> Self { + pub(crate) fn new(annotators: Vec) -> Self { Self { annotators } } @@ -1911,7 +1979,8 @@ async fn run_with_writer( writer.set_assembly(assembly); tracing::info!("Determined input assembly to be {:?}", &assembly); - let annotator = setup_annotator(args)?; + let annotator = + setup_seqvars_annotator(&args.sources, &args.transcript_settings, Some(assembly))?; let mut additional_header_info = annotator.versions_for_vcf_header(); additional_header_info.push(("mehariCmd".into(), env::args().join(" "))); additional_header_info.push(("mehariVersion".into(), env!("CARGO_PKG_VERSION").into())); @@ -1975,39 +2044,142 @@ async fn run_with_writer( Ok(()) } -fn setup_annotator(args: &Args) -> Result { +pub(crate) fn proto_assembly_from(assembly: &Assembly) -> Option { + crate::pbs::txs::Assembly::from_str_name(&format!( + "ASSEMBLY_{}", + match assembly { + Assembly::Grch38 => "GRCH38", + _ => "GRCH37", + } + )) +} + +pub(crate) fn setup_seqvars_annotator( + sources: &Sources, + transcript_settings: &TranscriptSettings, + assembly: Option, +) -> Result { let mut annotators = vec![]; // Add the frequency annotator if requested. - if let Some(rocksdb_path) = &args.sources.frequencies { - annotators.push(FrequencyAnnotator::from_path(rocksdb_path).map(AnnotatorEnum::Frequency)?) + if let Some(rocksdb_paths) = &sources.frequencies { + let freq_dbs = initialize_frequency_annotators_for_assembly(rocksdb_paths, assembly)?; + for freq_db in freq_dbs { + annotators.push(AnnotatorEnum::Frequency(freq_db)) + } } // Add the ClinVar annotator if requested. - if let Some(rocksdb_path) = &args.sources.clinvar { - annotators.push(ClinvarAnnotator::from_path(rocksdb_path).map(AnnotatorEnum::Clinvar)?) + if let Some(rocksdb_paths) = &sources.clinvar { + let clinvar_dbs = initialize_clinvar_annotators_for_assembly(rocksdb_paths, assembly)?; + for clinvar_db in clinvar_dbs { + annotators.push(AnnotatorEnum::Clinvar(clinvar_db)) + } } // Add the consequence annotator if requested. - if let Some(tx_sources) = &args.sources.transcripts { + if let Some(tx_sources) = &sources.transcripts { tracing::info!("Opening transcript database(s)"); - let tx_db = merge_transcript_databases( - tx_sources - .iter() - .map(load_tx_db) - .collect::>>()?, - )?; + let databases = load_transcript_dbs_for_assembly(tx_sources, assembly)?; - annotators.push( - ConsequenceAnnotator::from_db_and_args(tx_db, args).map(AnnotatorEnum::Consequence)?, - ); + if databases.is_empty() { + tracing::warn!("No suitable transcript databases found for requested assembly {:?}, therefore no consequence prediction will occur.", &assembly); + } else { + let tx_db = merge_transcript_databases(databases)?; + tracing::info!( + "Loaded transcript database(s) from {}", + &tx_sources.join(", ") + ); + annotators.push( + ConsequenceAnnotator::from_db_and_settings(tx_db, transcript_settings) + .map(AnnotatorEnum::Consequence)?, + ); + } } let annotator = Annotator::new(annotators); Ok(annotator) } +pub(crate) fn initialize_clinvar_annotators_for_assembly( + rocksdb_paths: &[String], + assembly: Option, +) -> Result, Error> { + rocksdb_paths + .iter() + .filter_map(|rocksdb_path| { + let skip = assembly.map_or(false, |a| !rocksdb_path.contains(path_component(a))); + if !skip { + tracing::info!( + "Loading ClinVar database for assembly {:?} from {}", + &assembly, + &rocksdb_path + ); + Some(ClinvarAnnotator::from_path(rocksdb_path)) + } else { + tracing::warn!( + "Skipping ClinVar database as its assembly does not match the requested one ({:?})", + &assembly + ); + None + } + }) + .collect() +} + +pub(crate) fn initialize_frequency_annotators_for_assembly( + rocksdb_paths: &[String], + assembly: Option, +) -> Result, Error> { + rocksdb_paths.iter().filter_map(|rocksdb_path| { + let skip = assembly.map_or(false, |a| !rocksdb_path.contains(path_component(a))); + if !skip { + tracing::info!( + "Loading frequency database for assembly {:?} from {}", + &assembly, + &rocksdb_path + ); + Some(FrequencyAnnotator::from_path(rocksdb_path)) + } else { + tracing::warn!("Skipping frequency database as its assembly does not match the requested one ({:?})", &assembly); + None + } + }).collect() +} + +pub(crate) fn load_transcript_dbs_for_assembly( + tx_sources: &Vec, + assembly: Option, +) -> Result, Error> { + let pb_assembly = assembly.as_ref().and_then(proto_assembly_from); + + // Filter out any transcript databases that do not match the requested assembly. + let check_assembly = |db: &TxSeqDatabase, assembly: crate::pbs::txs::Assembly| { + db.source_version + .iter() + .map(|s| s.assembly) + .any(|a| a == i32::from(assembly)) + }; + let databases = tx_sources + .iter() + .enumerate() + .map(|(i, path)| (i, load_tx_db(path))) + .filter_map(|(i, txdb)| match txdb { + Ok(db) => match pb_assembly { + Some(assembly) if check_assembly(&db, assembly) => Some(Ok(db)), + Some(_) => { + tracing::info!("Skipping transcript database {} as its version {:?} does not support the requested assembly ({:?})", &tx_sources[i], &db.source_version, &assembly); + None + }, + None => Some(Ok(db)), + }, + Err(_) => Some(txdb), + }) + .collect::>>()?; + Ok(databases) +} + /// Create for all alternate alleles from the given VCF record. pub fn from_vcf_allele(value: &noodles::vcf::variant::RecordBuf, allele_no: usize) -> keys::Var { let chrom = value.reference_sequence_name().to_string(); @@ -2027,14 +2199,14 @@ pub fn from_vcf_allele(value: &noodles::vcf::variant::RecordBuf, allele_no: usiz #[cfg(test)] mod test { + use super::binning::bin_from_range; + use super::{run, Args, PathOutput}; + use crate::annotate::cli::ConsequenceBy; + use crate::annotate::cli::{Sources, TranscriptSettings}; use clap_verbosity_flag::Verbosity; use pretty_assertions::assert_eq; use temp_testdir::TempDir; - use super::binning::bin_from_range; - use super::{csq::TranscriptSource, run, Args, ConsequenceBy, PathOutput}; - use crate::annotate::seqvars::Sources; - #[tokio::test] async fn smoke_test_output_vcf() -> Result<(), anyhow::Error> { let temp = TempDir::default(); @@ -2047,10 +2219,10 @@ mod test { let assembly = "grch37"; let args = Args { genome_release: None, - report_most_severe_consequence_by: Some(ConsequenceBy::Gene), - transcript_source: TranscriptSource::Both, - pick_transcript: vec![], - pick_transcript_mode: Default::default(), + transcript_settings: TranscriptSettings { + report_most_severe_consequence_by: Some(ConsequenceBy::Gene), + ..Default::default() + }, path_input_vcf: String::from("tests/data/annotate/seqvars/brca1.examples.vcf"), output: PathOutput { path_output_vcf: Some(path_out.into_os_string().into_string().unwrap()), @@ -2061,8 +2233,8 @@ mod test { "tests/data/annotate/seqvars/brca1.examples.ped", )), sources: Sources { - frequencies: Some(format!("{prefix}/{assembly}/seqvars/freqs")), - clinvar: Some(format!("{prefix}/{assembly}/seqvars/clinvar")), + frequencies: Some(vec![format!("{prefix}/{assembly}/seqvars/freqs")]), + clinvar: Some(vec![format!("{prefix}/{assembly}/seqvars/clinvar")]), transcripts: Some(vec![format!("{prefix}/{assembly}/txs.bin.zst")]), }, hgnc: None, @@ -2094,10 +2266,10 @@ mod test { let assembly = "grch37"; let args = Args { genome_release: None, - report_most_severe_consequence_by: Some(ConsequenceBy::Gene), - transcript_source: TranscriptSource::Both, - pick_transcript: vec![], - pick_transcript_mode: Default::default(), + transcript_settings: TranscriptSettings { + report_most_severe_consequence_by: Some(ConsequenceBy::Gene), + ..Default::default() + }, path_input_vcf: String::from("tests/data/annotate/seqvars/brca1.examples.vcf"), output: PathOutput { path_output_vcf: None, @@ -2108,8 +2280,8 @@ mod test { "tests/data/annotate/seqvars/brca1.examples.ped", )), sources: Sources { - frequencies: Some(format!("{prefix}/{assembly}/seqvars/freqs")), - clinvar: Some(format!("{prefix}/{assembly}/seqvars/clinvar")), + frequencies: Some(vec![format!("{prefix}/{assembly}/seqvars/freqs")]), + clinvar: Some(vec![format!("{prefix}/{assembly}/seqvars/clinvar")]), transcripts: Some(vec![format!("{prefix}/{assembly}/txs.bin.zst")]), }, hgnc: Some(format!("{prefix}/hgnc.tsv")), @@ -2147,10 +2319,10 @@ mod test { let assembly = "grch37"; let args = Args { genome_release: None, - report_most_severe_consequence_by: Some(ConsequenceBy::Gene), - transcript_source: TranscriptSource::Both, - pick_transcript: vec![], - pick_transcript_mode: Default::default(), + transcript_settings: TranscriptSettings { + report_most_severe_consequence_by: Some(ConsequenceBy::Gene), + ..Default::default() + }, path_input_vcf: String::from("tests/data/annotate/seqvars/badly_formed_vcf_entry.vcf"), output: PathOutput { path_output_vcf: None, @@ -2161,8 +2333,8 @@ mod test { "tests/data/annotate/seqvars/badly_formed_vcf_entry.ped", )), sources: Sources { - frequencies: Some(format!("{prefix}/{assembly}/seqvars/freqs")), - clinvar: Some(format!("{prefix}/{assembly}/seqvars/clinvar")), + frequencies: Some(vec![format!("{prefix}/{assembly}/seqvars/freqs")]), + clinvar: Some(vec![format!("{prefix}/{assembly}/seqvars/clinvar")]), transcripts: Some(vec![format!("{prefix}/{assembly}/txs.bin.zst")]), }, hgnc: Some(format!("{prefix}/hgnc.tsv")), @@ -2194,10 +2366,10 @@ mod test { let assembly = "grch37"; let args = Args { genome_release: None, - report_most_severe_consequence_by: Some(ConsequenceBy::Gene), - transcript_source: TranscriptSource::Both, - pick_transcript: vec![], - pick_transcript_mode: Default::default(), + transcript_settings: TranscriptSettings { + report_most_severe_consequence_by: Some(ConsequenceBy::Gene), + ..Default::default() + }, path_input_vcf: String::from("tests/data/annotate/seqvars/mitochondrial_variants.vcf"), output: PathOutput { path_output_vcf: None, @@ -2208,8 +2380,8 @@ mod test { "tests/data/annotate/seqvars/mitochondrial_variants.ped", )), sources: Sources { - frequencies: Some(format!("{prefix}/{assembly}/seqvars/freqs")), - clinvar: Some(format!("{prefix}/{assembly}/seqvars/clinvar")), + frequencies: Some(vec![format!("{prefix}/{assembly}/seqvars/freqs")]), + clinvar: Some(vec![format!("{prefix}/{assembly}/seqvars/clinvar")]), transcripts: Some(vec![format!("{prefix}/{assembly}/txs.bin.zst")]), }, hgnc: Some(format!("{prefix}/hgnc.tsv")), @@ -2243,10 +2415,10 @@ mod test { let assembly = "grch37"; let args = Args { genome_release: None, - report_most_severe_consequence_by: Some(ConsequenceBy::Gene), - transcript_source: TranscriptSource::Both, - pick_transcript: vec![], - pick_transcript_mode: Default::default(), + transcript_settings: TranscriptSettings { + report_most_severe_consequence_by: Some(ConsequenceBy::Gene), + ..Default::default() + }, path_input_vcf: String::from("tests/data/annotate/seqvars/clair3-glnexus-min.vcf"), output: PathOutput { path_output_vcf: None, @@ -2257,8 +2429,8 @@ mod test { "tests/data/annotate/seqvars/clair3-glnexus-min.ped", )), sources: Sources { - frequencies: Some(format!("{prefix}/{assembly}/seqvars/freqs")), - clinvar: Some(format!("{prefix}/{assembly}/seqvars/clinvar")), + frequencies: Some(vec![format!("{prefix}/{assembly}/seqvars/freqs")]), + clinvar: Some(vec![format!("{prefix}/{assembly}/seqvars/clinvar")]), transcripts: Some(vec![format!("{prefix}/{assembly}/txs.bin.zst")]), }, hgnc: Some(format!("{prefix}/hgnc.tsv")), @@ -2292,10 +2464,10 @@ mod test { let assembly = "grch38"; let args = Args { genome_release: None, - report_most_severe_consequence_by: Some(ConsequenceBy::Gene), - transcript_source: TranscriptSource::Both, - pick_transcript: vec![], - pick_transcript_mode: Default::default(), + transcript_settings: TranscriptSettings { + report_most_severe_consequence_by: Some(ConsequenceBy::Gene), + ..Default::default() + }, path_input_vcf: String::from("tests/data/annotate/seqvars/brca2_zar1l/brca2_zar1l.vcf"), output: PathOutput { path_output_vcf: None, @@ -2306,8 +2478,8 @@ mod test { "tests/data/annotate/seqvars/brca2_zar1l/brca2_zar1l.ped", )), sources: Sources { - frequencies: Some(format!("{prefix}/{assembly}/seqvars/freqs")), - clinvar: Some(format!("{prefix}/{assembly}/seqvars/clinvar")), + frequencies: Some(vec![format!("{prefix}/{assembly}/seqvars/freqs")]), + clinvar: Some(vec![format!("{prefix}/{assembly}/seqvars/clinvar")]), transcripts: Some(vec![format!("{prefix}/{assembly}/txs.bin.zst")]), }, hgnc: Some(format!("{prefix}/hgnc.tsv")), diff --git a/src/annotate/seqvars/provider.rs b/src/annotate/seqvars/provider.rs index e01a9f84..6ffe561c 100644 --- a/src/annotate/seqvars/provider.rs +++ b/src/annotate/seqvars/provider.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; -use crate::annotate::seqvars::{TranscriptPickMode, TranscriptPickType}; +use crate::annotate::cli::{TranscriptPickMode, TranscriptPickType}; use crate::db::create::Reason; use crate::db::TranscriptDatabase; use crate::{ diff --git a/src/annotate/strucvars/mod.rs b/src/annotate/strucvars/mod.rs index e85fcda9..69ada2b5 100644 --- a/src/annotate/strucvars/mod.rs +++ b/src/annotate/strucvars/mod.rs @@ -58,16 +58,14 @@ mod maelstrom; #[derive(Parser, Debug, Clone)] #[command(about = "Annotate structural variant VCF files", long_about = None)] pub struct Args { - /// Path to the mehari database folder. - #[arg(long)] - pub path_db: String, - /// Genome release to use, default is to auto-detect. #[arg(long, value_enum)] pub genome_release: Option, + /// Path to the input PED file. #[arg(long)] pub path_input_ped: String, + /// Path to the input VCF files. #[arg(long, required = true)] pub path_input_vcf: Vec, @@ -87,9 +85,11 @@ pub struct Args { /// Minimal reciprocal overlap to require. #[arg(long, default_value_t = 0.8)] pub min_overlap: f32, + /// Slack to use around break-ends. #[arg(long, default_value_t = 50)] pub slack_bnd: i32, + /// Slack to use around insertions. #[arg(long, default_value_t = 50)] pub slack_ins: i32, @@ -97,6 +97,7 @@ pub struct Args { /// Seed for random number generator (UUIDs), if any. #[arg(long)] pub rng_seed: Option, + /// Optionally, value to write to `##fileDate`. #[arg(long)] pub file_date: Option, @@ -4170,7 +4171,6 @@ mod test { let out_path = temp.join(format!("out{}", suffix)); let args = Args { - path_db: String::from("tests/data/db/create"), genome_release: Some(GenomeRelease::Grch37), path_input_ped: String::from("tests/data/annotate/strucvars/maelstrom/delly2-min.ped"), path_input_vcf: vec![String::from( @@ -4227,7 +4227,6 @@ mod test { let out_path = temp.join("out.vcf"); let args = Args { - path_db: String::from("tests/data/db/create"), genome_release: Some(GenomeRelease::Grch38), path_input_ped: String::from("tests/data/annotate/strucvars/test.order.ped"), path_input_vcf: vec![String::from("tests/data/annotate/strucvars/test.order.vcf")], diff --git a/src/common/noodles.rs b/src/common/noodles.rs index ac79ad19..55b14151 100644 --- a/src/common/noodles.rs +++ b/src/common/noodles.rs @@ -60,7 +60,7 @@ pub trait NoodlesVariantReader { async fn records<'a>( &'a mut self, header: &'a Header, - ) -> LocalBoxStream>; + ) -> LocalBoxStream<'a, std::io::Result>; } impl NoodlesVariantReader for VariantReader { @@ -76,7 +76,7 @@ impl NoodlesVariantReader for VariantReader { async fn records<'a>( &'a mut self, header: &'a Header, - ) -> LocalBoxStream> { + ) -> LocalBoxStream<'a, std::io::Result> { match self { VariantReader::Vcf(r) => r.record_bufs(header).boxed_local(), VariantReader::Bcf(r) => r diff --git a/src/server/run/actix_server/mod.rs b/src/server/run/actix_server/mod.rs index e888cce0..1119692a 100644 --- a/src/server/run/actix_server/mod.rs +++ b/src/server/run/actix_server/mod.rs @@ -6,11 +6,14 @@ use actix_web::ResponseError; use utoipa::OpenApi as _; use crate::annotate::seqvars::provider::Provider as MehariProvider; +use crate::annotate::seqvars::{ClinvarAnnotator, FrequencyAnnotator}; use crate::annotate::strucvars::csq::ConsequencePredictor as StrucvarConsequencePredictor; use crate::{annotate::seqvars::csq::ConsequencePredictor, common::GenomeRelease}; pub mod gene_txs; +pub mod seqvars_clinvar; pub mod seqvars_csq; +pub mod seqvars_frequencies; pub mod strucvars_csq; pub mod versions; @@ -42,11 +45,19 @@ pub struct WebServerData { /// `MehariProvider` to provide the transcript info. #[derivative(Debug = "ignore")] pub provider: std::collections::HashMap>, + /// The sequence variant consequence predictors for each assembly. pub seqvars_predictors: std::collections::HashMap, - /// The structural variant consequence predictors for eacha ssembly. + + /// The structural variant consequence predictors for each assembly. pub strucvars_predictors: std::collections::HashMap, + + /// The frequency annotators for each assembly. + pub frequency_annotators: std::collections::HashMap, + + /// The clinvar annotators for each assembly. + pub clinvar_annotators: std::collections::HashMap, } /// Main entry point for running the REST server. @@ -64,6 +75,10 @@ pub async fn main( .service(seqvars_csq::handle_with_openapi) .service(strucvars_csq::handle) .service(strucvars_csq::handle_with_openapi) + .service(seqvars_frequencies::handle) + .service(seqvars_frequencies::handle_with_openapi) + .service(seqvars_clinvar::handle) + .service(seqvars_clinvar::handle_with_openapi) .service(versions::handle) .service( utoipa_swagger_ui::SwaggerUi::new("/swagger-ui/{_:.*}") diff --git a/src/server/run/actix_server/seqvars_clinvar.rs b/src/server/run/actix_server/seqvars_clinvar.rs new file mode 100644 index 00000000..50f1eca2 --- /dev/null +++ b/src/server/run/actix_server/seqvars_clinvar.rs @@ -0,0 +1,132 @@ +//! Implementation of endpoint `/api/v1/seqvars/clinvar`. +//! +//! Also includes the implementation of the `/seqvars/clinvar` endpoint (deprecated). + +use actix_web::{ + get, + web::{self, Data, Json, Path}, +}; + +use crate::{annotate::seqvars::csq::VcfVariant, common::GenomeRelease}; + +use super::{versions::VersionsInfoResponse, CustomError}; + +/// Query parameters of the `/api/v1/seqvars/clinvar` endpoint. +#[derive( + Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::IntoParams, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +#[serde_with::skip_serializing_none] +pub(crate) struct ClinvarQuery { + /// The assembly. + pub genome_release: GenomeRelease, + /// SPDI sequence. + pub chromosome: String, + /// SPDI position. + pub position: u32, + /// SPDI deletion. + pub reference: String, + /// SPDI insertion. + pub alternative: String, +} + +/// One entry in `ClinvarResponse`. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +pub(crate) struct ClinvarResultEntry { + pub clinvar_vcv: Vec, + pub clinvar_germline_classification: Vec, +} + +/// Response of the `/api/v1/seqvars/clinvar` endpoint. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +pub(crate) struct ClinvarResponse { + /// Version information. + pub version: VersionsInfoResponse, + + /// The original query records. + pub query: ClinvarQuery, + + /// The resulting records for the scored genes. + pub result: Vec, +} + +/// Implementation of endpoints. +async fn handle_impl( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, super::CustomError> { + let ClinvarQuery { + genome_release, + chromosome, + position, + reference, + alternative, + } = query.clone().into_inner(); + + let annotator = data + .clinvar_annotators + .get(&genome_release) + .ok_or_else(|| { + super::CustomError::new(anyhow::anyhow!( + "genome release not supported: {:?}", + &query.genome_release + )) + })?; + + let mut result = Vec::new(); + let g_var = VcfVariant { + chromosome, + position: position as i32, + reference, + alternative, + }; + let annotations = annotator + .annotate_variant(&g_var) + .map_err(|e| super::CustomError::new(anyhow::anyhow!("annotation failed: {}", &e)))?; + if let Some(annotations) = annotations { + result.push(annotations); + } + + let result = ClinvarResponse { + version: VersionsInfoResponse::from_web_server_data(data.into_inner().as_ref()) + .map_err(|e| CustomError::new(anyhow::anyhow!("Problem determining version: {}", e)))?, + query: query.into_inner(), + result, + }; + + Ok(Json(result)) +} + +/// Query for ClinVar information of a variant. +#[allow(clippy::unused_async)] +#[get("/seqvars/clinvar")] +async fn handle( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, super::CustomError> { + handle_impl(data, _path, query).await +} + +/// Query for ClinVar information of a variant. +#[allow(clippy::unused_async)] +#[utoipa::path( + get, + operation_id = "seqvarsClinvar", + params( + ClinvarQuery + ), + responses( + (status = 200, description = "Clinvar information.", body = ClinvarResponse), + (status = 500, description = "Internal server error.", body = CustomError) + ) +)] +#[get("/api/v1/seqvars/clinvar")] +async fn handle_with_openapi( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, super::CustomError> { + handle_impl(data, _path, query).await +} diff --git a/src/server/run/actix_server/seqvars_csq.rs b/src/server/run/actix_server/seqvars_csq.rs index 9ffb91c1..e25ff51b 100644 --- a/src/server/run/actix_server/seqvars_csq.rs +++ b/src/server/run/actix_server/seqvars_csq.rs @@ -73,6 +73,8 @@ pub(crate) struct SeqvarsCsqResultEntry { pub protein_pos: Option, /// Distance to feature. pub distance: Option, + /// Strand of the alignment + pub strand: i32, /// Optional list of warnings and error messages. pub messages: Option>, } @@ -146,6 +148,7 @@ async fn handle_impl( cds_pos, protein_pos, distance, + strand, messages, .. } = ann_field; @@ -173,6 +176,7 @@ async fn handle_impl( cds_pos, protein_pos, distance, + strand, messages, }; result.push(entry); diff --git a/src/server/run/actix_server/seqvars_frequencies.rs b/src/server/run/actix_server/seqvars_frequencies.rs new file mode 100644 index 00000000..c0477c6f --- /dev/null +++ b/src/server/run/actix_server/seqvars_frequencies.rs @@ -0,0 +1,180 @@ +//! Implementation of endpoint `/api/v1/seqvars/frequency`. +//! +//! Also includes the implementation of the `/seqvars/frequency` endpoint (deprecated). + +use actix_web::{ + get, + web::{self, Data, Json, Path}, +}; + +use crate::{annotate::seqvars::csq::VcfVariant, common::GenomeRelease}; + +use super::{versions::VersionsInfoResponse, CustomError}; + +/// Query parameters of the `/api/v1/seqvars/frequency` endpoint. +#[derive( + Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::IntoParams, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +#[serde_with::skip_serializing_none] +pub(crate) struct FrequencyQuery { + /// The assembly. + pub genome_release: GenomeRelease, + /// SPDI sequence. + pub chromosome: String, + /// SPDI position. + pub position: u32, + /// SPDI deletion. + pub reference: String, + /// SPDI insertion. + pub alternative: String, +} + +/// One entry in `FrequencyResponse`. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +pub(crate) enum FrequencyResultEntry { + Autosomal(AutosomalResultEntry), + Gonosomal(GonosomalResultEntry), + Mitochondrial(MitochondrialResultEntry), +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +pub(crate) struct AutosomalResultEntry { + pub gnomad_exomes_an: u32, + + pub gnomad_exomes_hom: u32, + + pub gnomad_exomes_het: u32, + + pub gnomad_genomes_an: u32, + + pub gnomad_genomes_hom: u32, + + pub gnomad_genomes_het: u32, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +pub(crate) struct GonosomalResultEntry { + pub gnomad_exomes_an: u32, + + pub gnomad_exomes_hom: u32, + + pub gnomad_exomes_het: u32, + + pub gnomad_exomes_hemi: u32, + + pub gnomad_genomes_an: u32, + + pub gnomad_genomes_hom: u32, + + pub gnomad_genomes_het: u32, + + pub gnomad_genomes_hemi: u32, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +pub(crate) struct MitochondrialResultEntry { + pub helix_an: u32, + + pub helix_hom: u32, + + pub helix_het: u32, + + pub gnomad_genomes_an: u32, + + pub gnomad_genomes_hom: u32, + + pub gnomad_genomes_het: u32, +} + +/// Response of the `/api/v1/seqvars/frequency` endpoint. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +pub(crate) struct FrequencyResponse { + /// Version information. + pub version: VersionsInfoResponse, + /// The original query records. + pub query: FrequencyQuery, + /// The resulting records for the scored genes. + pub result: Vec, +} + +/// Implementation of endpoints. +async fn handle_impl( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, super::CustomError> { + let FrequencyQuery { + genome_release, + chromosome, + position, + reference, + alternative, + } = query.clone().into_inner(); + + let annotator = data + .frequency_annotators + .get(&genome_release) + .ok_or_else(|| { + super::CustomError::new(anyhow::anyhow!( + "genome release not supported: {:?}", + &query.genome_release + )) + })?; + + let mut result = Vec::new(); + let g_var = VcfVariant { + chromosome, + position: position as i32, + reference, + alternative, + }; + let frequencies = annotator + .annotate_variant(&g_var) + .map_err(|e| super::CustomError::new(anyhow::anyhow!("annotation failed: {}", &e)))?; + if let Some(frequencies) = frequencies { + result.push(frequencies); + } + + let result = FrequencyResponse { + version: VersionsInfoResponse::from_web_server_data(data.into_inner().as_ref()) + .map_err(|e| CustomError::new(anyhow::anyhow!("Problem determining version: {}", e)))?, + query: query.into_inner(), + result, + }; + + Ok(Json(result)) +} + +/// Query for gnomAD frequencies of a variant. +#[allow(clippy::unused_async)] +#[get("/seqvars/frequency")] +async fn handle( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, super::CustomError> { + handle_impl(data, _path, query).await +} + +/// Query for gnomAD frequencies of a variant. +#[allow(clippy::unused_async)] +#[utoipa::path( + get, + operation_id = "seqvarsFrequency", + params( + FrequencyQuery + ), + responses( + (status = 200, description = "Frequency information.", body = FrequencyResponse), + (status = 500, description = "Internal server error.", body = CustomError) + ) +)] +#[get("/api/v1/seqvars/frequency")] +async fn handle_with_openapi( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, super::CustomError> { + handle_impl(data, _path, query).await +} diff --git a/src/server/run/mod.rs b/src/server/run/mod.rs index 6e0dadc6..d7994568 100644 --- a/src/server/run/mod.rs +++ b/src/server/run/mod.rs @@ -1,15 +1,20 @@ -use std::sync::Arc; - +use crate::annotate::cli::{Sources, TranscriptSettings}; +use crate::annotate::seqvars::csq::ConfigBuilder; +use crate::annotate::seqvars::{ + initialize_clinvar_annotators_for_assembly, initialize_frequency_annotators_for_assembly, + load_transcript_dbs_for_assembly, ConsequenceAnnotator, +}; +use crate::db::merge::merge_transcript_databases; use crate::{ annotate::{ - seqvars::{ - csq::ConsequencePredictor as SeqvarConsequencePredictor, load_tx_db, path_component, - provider::Provider as MehariProvider, - }, + seqvars::csq::ConsequencePredictor as SeqvarConsequencePredictor, strucvars::csq::ConsequencePredictor as StrucvarConsequencePredictor, }, common::GenomeRelease, }; +use clap::ValueEnum; +use std::collections::HashMap; +use strum::EnumString; /// Implementation of Actix server. pub mod actix_server; @@ -28,9 +33,16 @@ pub mod openapi { ExonAlignment, GenesTranscriptsListQuery, GenesTranscriptsListResponse, GenomeAlignment, Strand, Transcript, TranscriptBiotype, TranscriptTag, }; + use crate::server::run::actix_server::seqvars_clinvar::{ + ClinvarQuery, ClinvarResponse, ClinvarResultEntry, + }; use crate::server::run::actix_server::seqvars_csq::{ SeqvarsCsqQuery, SeqvarsCsqResponse, SeqvarsCsqResultEntry, }; + use crate::server::run::actix_server::seqvars_frequencies::{ + AutosomalResultEntry, FrequencyQuery, FrequencyResponse, FrequencyResultEntry, + GonosomalResultEntry, MitochondrialResultEntry, + }; use crate::server::run::actix_server::strucvars_csq::{ StrucvarsCsqQuery, StrucvarsCsqResponse, }; @@ -38,7 +50,10 @@ pub mod openapi { Assembly, DataVersionEntry, SoftwareVersions, VersionsInfoResponse, }; - use super::actix_server::{gene_txs, seqvars_csq, strucvars_csq, versions, CustomError}; + use super::actix_server::{ + gene_txs, seqvars_clinvar, seqvars_csq, seqvars_frequencies, strucvars_csq, versions, + CustomError, + }; /// Utoipa-based `OpenAPI` generation helper. #[derive(utoipa::OpenApi)] @@ -48,7 +63,9 @@ pub mod openapi { gene_txs::handle_with_openapi, seqvars_csq::handle_with_openapi, strucvars_csq::handle_with_openapi, - strucvars_csq::handle_with_openapi + strucvars_csq::handle_with_openapi, + seqvars_frequencies::handle_with_openapi, + seqvars_clinvar::handle_with_openapi, ), components(schemas( Assembly, @@ -81,6 +98,15 @@ pub mod openapi { Transcript, TranscriptBiotype, TranscriptTag, + FrequencyQuery, + FrequencyResponse, + FrequencyResultEntry, + AutosomalResultEntry, + GonosomalResultEntry, + MitochondrialResultEntry, + ClinvarQuery, + ClinvarResponse, + ClinvarResultEntry, )) )] pub struct ApiDoc; @@ -90,9 +116,13 @@ pub mod openapi { #[derive(clap::Parser, Debug)] #[command(about = "Run Mehari REST API server", long_about = None)] pub struct Args { - /// Path to the mehari database folder. - #[arg(long)] - pub path_db: String, + /// What to annotate and which source to use. + #[command(flatten)] + pub sources: Sources, + + /// Transcript related settings. + #[command(flatten)] + pub transcript_settings: TranscriptSettings, /// Whether to suppress printing hints. #[arg(long, default_value_t = false)] @@ -101,13 +131,21 @@ pub struct Args { /// IP to listen on. #[arg(long, default_value = "127.0.0.1")] pub listen_host: String, + /// Port to listen on. #[arg(long, default_value_t = 8080)] pub listen_port: u16, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumString)] +enum Endpoint { + Transcripts, + Frequency, + Clinvar, +} + /// Print some hints via `tracing::info!`. -pub fn print_hints(args: &Args) { +fn print_hints(args: &Args, enabled_sources: &[(GenomeRelease, Endpoint)]) { tracing::info!( "Launching server main on http://{}:{} ...", args.listen_host.as_str(), @@ -119,41 +157,63 @@ pub fn print_hints(args: &Args) { return; } - // The endpoint `/genes/txs` provides transcript information. - tracing::info!( - " try: http://{}:{}/genes/txs?hgncId=HGNC:1100&\ - genomeBuild=GENOME_BUILD_GRCH37", - args.listen_host.as_str(), - args.listen_port - ); - // The endpoint `/tx/csq` to comput ethe consequence of a variant; without and with filtering - // for HGNC gene ID. - tracing::info!( - " try: http://{}:{}/seqvars/csq?genome_release=grch37\ - &chromosome=17&position=48275363&reference=C&alternative=A", - args.listen_host.as_str(), - args.listen_port - ); - tracing::info!( - " try: http://{}:{}/seqvars/csq?genome_release=grch37\ - &chromosome=17&position=48275363&reference=C&alternative=A&hgnc_id=HGNC:2197", - args.listen_host.as_str(), - args.listen_port - ); - // The endpoint `/strucvars/csq` computes the consequence of an SV. - tracing::info!( - " try: http://{}:{}/strucvars/csq?genome_release=grch37\ - &chromosome=17&start=48275360&&stop=48275370&sv_type=DEL", - args.listen_host.as_str(), - args.listen_port - ); - // The endpoint `/structvars/csq` computes the consequence of an SV. - tracing::info!( - " try: http://{}:{}/strucvars/csq?genome_release=grch37\ - &chromosome=17&start=48275360&&stop=48275370&sv_type=DEL", - args.listen_host.as_str(), - args.listen_port + use Endpoint::*; + use GenomeRelease::*; + + let prefix = format!( + "try: http://{host}:{port}/api/v1/", + host = args.listen_host, + port = args.listen_port ); + let examples: HashMap<(GenomeRelease, Endpoint), Vec<&str>> = HashMap::from([ + ( + (Grch37, Transcripts), + vec![ + r#"genes/transcripts?hgnc_id=HGNC:1100&genome_build=grch37"#, + r#"seqvars/csq?genome_release=grch37&chromosome=17&position=48275363&reference=C&alternative=A"#, + r#"seqvars/csq?genome_release=grch37&chromosome=17&position=48275363&reference=C&alternative=A&hgnc_id=HGNC:2197"#, + r#"strucvars/csq?genome_release=grch37&chromosome=17&start=48275360&&stop=48275370&sv_type=DEL""#, + ], + ), + ( + (Grch37, Frequency), + vec![ + r#"seqvars/frequency?genome_release=grch37&chromosome=17&position=48275363&reference=C&alternative=A"#, + ], + ), + ( + (Grch37, Clinvar), + vec![ + r#"seqvars/clinvar?genome_release=grch37&chromosome=17&position=48275363&reference=C&alternative=A"#, + ], + ), + ( + (Grch38, Transcripts), + vec![ + r#"genes/transcripts?hgnc_id=HGNC:1100&genome_build=grch38"#, + r#"seqvars/csq?genome_release=grch38&chromosome=2&position=26364839&reference=C&alternative=T"#, + ], + ), + ( + (Grch38, Frequency), + vec![ + r#"seqvars/frequency?genome_release=grch38&chromosome=2&position=26364839&reference=C&alternative=T"#, + ], + ), + ( + (Grch38, Clinvar), + vec![ + r#"seqvars/clinvar?genome_release=grch38&chromosome=2&position=26364839&reference=C&alternative=T"#, + ], + ), + ]); + for (genome_release, endpoint) in enabled_sources { + if let Some(examples) = examples.get(&(*genome_release, *endpoint)) { + for example in examples { + tracing::info!("{}{}", prefix, example); + } + } + } } /// Main entry point for `server run` sub command. @@ -174,34 +234,95 @@ pub async fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), a tracing::info!("Loading database..."); let before_loading = std::time::Instant::now(); let mut data = actix_server::WebServerData::default(); - for genome_release in [GenomeRelease::Grch37, GenomeRelease::Grch38] { + + let mut enabled_sources = vec![]; + use Endpoint::*; + + for genome_release in GenomeRelease::value_variants().iter().copied() { + tracing::info!("Loading genome release {:?}", genome_release); let assembly = genome_release.into(); - let path = format!("{}/{}/txs.bin.zst", &args.path_db, path_component(assembly)); - if !std::path::Path::new(&path).exists() { - tracing::warn!("No transcript database found at {}", &path); - continue; + + if let Some(tx_db_paths) = args.sources.transcripts.as_ref() { + tracing::info!(" - building seqvars predictors"); + let tx_dbs = load_transcript_dbs_for_assembly(tx_db_paths, Some(assembly))?; + if tx_dbs.is_empty() { + tracing::warn!( + "No transcript databases loaded, respective endpoint will be unavailable." + ); + } else { + let tx_db = merge_transcript_databases(tx_dbs)?; + let annotator = + ConsequenceAnnotator::from_db_and_settings(tx_db, &args.transcript_settings)?; + let config = ConfigBuilder::default() + .report_most_severe_consequence_by( + args.transcript_settings.report_most_severe_consequence_by, + ) + .transcript_source(args.transcript_settings.transcript_source) + .build()?; + + let provider = annotator.predictor.provider.clone(); + data.provider.insert(genome_release, provider.clone()); + data.seqvars_predictors.insert( + genome_release, + SeqvarConsequencePredictor::new(provider.clone(), config), + ); + + tracing::info!(" - building strucvars predictors"); + data.strucvars_predictors.insert( + genome_release, + StrucvarConsequencePredictor::new(provider.clone(), assembly), + ); + enabled_sources.push((genome_release, Transcripts)); + } + } else { + tracing::warn!( + "No predictors for genome release {:?}, respective endpoint will be unavailable.", + genome_release + ); + } + + if let Some(paths) = args.sources.frequencies.as_ref() { + let annotators = initialize_frequency_annotators_for_assembly(paths, Some(assembly))?; + + match annotators.len() { + 0 => tracing::warn!( + "No frequency databases loaded, respective endpoint will be unavailable." + ), + 1 => { + let frequency_db = annotators.into_iter().next().unwrap(); + data.frequency_annotators + .insert(genome_release, frequency_db); + enabled_sources.push((genome_release, Frequency)); + } + _ => tracing::warn!( + "Multiple frequency databases loaded. This is not supported. The respective endpoint will be unavailable." + ), + } + } + + if let Some(paths) = args.sources.clinvar.as_ref() { + let annotators = initialize_clinvar_annotators_for_assembly(paths, Some(assembly))?; + + match annotators.len() { + 0 => tracing::warn!( + "No clinvar databases loaded, respective endpoint will be unavailable." + ), + 1 => { + let annotator = annotators.into_iter().next().unwrap(); + data.clinvar_annotators.insert(genome_release, annotator); + enabled_sources.push((genome_release, Clinvar)); + } + _ => tracing::warn!( + "Multiple clinvar databases specified. This is not supported. The respective endpoint will be unavailable." + ), + } } - tracing::info!(" - loading {}", &path); - let tx_db = load_tx_db(&path)?; - tracing::info!(" - building interval trees"); - let provider = Arc::new(MehariProvider::new(tx_db, Default::default())); - data.provider.insert(genome_release, provider.clone()); - tracing::info!(" - building seqvars predictors"); - data.seqvars_predictors.insert( - genome_release, - SeqvarConsequencePredictor::new(provider.clone(), Default::default()), - ); - tracing::info!(" - building strucvars predictors"); - data.strucvars_predictors.insert( - genome_release, - StrucvarConsequencePredictor::new(provider.clone(), assembly), - ); } let data = actix_web::web::Data::new(data); - tracing::info!("...done loading data {:?}", before_loading.elapsed()); + tracing::info!("... done loading data {:?}", before_loading.elapsed()); // Print the server URL and some hints (the latter: unless suppressed). - print_hints(args); + print_hints(args, &enabled_sources); // Launch the Actix web server. actix_server::main(args, data).await?; diff --git a/src/verify/seqvars.rs b/src/verify/seqvars.rs index 97462026..89bbfca0 100644 --- a/src/verify/seqvars.rs +++ b/src/verify/seqvars.rs @@ -7,11 +7,11 @@ use std::{ time::Instant, }; +use crate::annotate::cli::{ConsequenceBy, TranscriptPickMode, TranscriptPickType}; use crate::annotate::seqvars::{ csq::{ConfigBuilder as ConsequencePredictorConfigBuilder, ConsequencePredictor, VcfVariant}, load_tx_db, path_component, provider::{ConfigBuilder as MehariProviderConfigBuilder, Provider as MehariProvider}, - ConsequenceBy, TranscriptPickMode, TranscriptPickType, }; use biocommons_bioutils::assemblies::Assembly; use clap::Parser; diff --git a/utils/docker/entrypoint.sh b/utils/docker/entrypoint.sh index 90b34d11..e0979adf 100644 --- a/utils/docker/entrypoint.sh +++ b/utils/docker/entrypoint.sh @@ -7,16 +7,23 @@ set -euo pipefail # # PATH_DB -- path to the database directory containing, # e.g., `grch3{7,8}/*.zst`. -# default: /data/hpo +# default: /data/mehari/db # HTTP_HOST -- host to listen on # default: 0.0.0.0 # HTTP_PORT -- port # default: 8080 -PATH_DB=${PATH_DB-/data/mehari} +PATH_DB=${PATH_DB-/data/mehari/db} HTTP_HOST=${HTTP_HOST-0.0.0.0} HTTP_PORT=${HTTP_PORT-8080} +PATH_TRANSCRIPTS_37=$PATH_DB/grch37/seqvars/txs.bin.zst +PATH_TRANSCRIPTS_38=$PATH_DB/grch38/seqvars/txs.bin.zst +PATH_FREQUENCIES_37=$PATH_DB/grch37/seqvars/freqs +PATH_FREQUENCIES_38=$PATH_DB/grch38/seqvars/freqs +PATH_CLINVAR_37=$PATH_DB/grch37/seqvars/clinvar +PATH_CLINVAR_38=$PATH_DB/grch38/seqvars/clinvar + first=${1-} if [ "$first" == exec ]; then @@ -26,7 +33,12 @@ else exec \ mehari \ server run \ - --path-db "$PATH_DB" \ + $(test -e "$PATH_TRANSCRIPTS_37" && echo --transcripts "$PATH_TRANSCRIPTS_37") \ + $(test -e "$PATH_TRANSCRIPTS_38" && echo --transcripts "$PATH_TRANSCRIPTS_38") \ + $(test -e "$PATH_FREQUENCIES_37" && echo --frequencies "$PATH_FREQUENCIES_37") \ + $(test -e "$PATH_FREQUENCIES_38" && echo --frequencies "$PATH_FREQUENCIES_38") \ + $(test -e "$PATH_CLINVAR_37" && echo --clinvar "$PATH_CLINVAR_37") \ + $(test -e "$PATH_CLINVAR_38" && echo --clinvar "$PATH_CLINVAR_38") \ --listen-host "$HTTP_HOST" \ --listen-port "$HTTP_PORT" fi