Skip to content

Commit

Permalink
fix: make "db mk-inhouse" work with BNS/INS (#322)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Mar 7, 2024
1 parent 6c8bcdc commit 816de4d
Show file tree
Hide file tree
Showing 12 changed files with 48 additions and 59 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release-please.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ on:
push:
branches:
- main
- releases/*
- release/*

name: release-please

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ on:
push:
branches:
- main
- releases/*
- release/*
pull_request:
branches:
- main
- releases/*
- release/*

jobs:
Formatting:
Expand Down
6 changes: 3 additions & 3 deletions src/db/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ fn copy_db_def(

debug!("Compute and verify MD5 checksum");
// First create path to `.md5` file, then read in the MD5 string.
let md5_path = base_path.join(&format!("{file_name}{file_ext}.md5"));
let md5_str = read_md5_file(&md5_path)?.to_lowercase();
let md5_path = base_path.join(format!("{file_name}{file_ext}.md5"));
let md5_str = read_md5_file(md5_path)?.to_lowercase();

let file_path = base_path.join(&format!("{file_name}{file_ext}"));
let file_path = base_path.join(format!("{file_name}{file_ext}"));
let chk_md5 = md5sum(&file_path)?.to_lowercase();
if md5_str != chk_md5 {
return Err(anyhow::anyhow!(
Expand Down
2 changes: 1 addition & 1 deletion src/db/genes/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ pub mod dbnsfp_gene {
}

/// Serialize `Vec<String>`, counterpart to `deserialize_vec`.
fn serialize_vec<S>(x: &Vec<String>, s: S) -> Result<S::Ok, S::Error>
fn serialize_vec<S>(x: &[String], s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
Expand Down
37 changes: 20 additions & 17 deletions src/db/mk_inhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ pub mod input {
/// SV type of the record
#[serde(deserialize_with = "from_varfish_sv_type")]
pub sv_type: SvType,
/// Field with postgres-serialized JSON.
pub info: String,
/// number of hom. alt. carriers
pub num_hom_alt: u32,
/// number of hom. ref. carriers
Expand Down Expand Up @@ -276,22 +278,29 @@ fn merge_to_out(
// Read in all records and perform the "merge compression"
let mut reader = JsonLinesReader::new(reader);
while let Ok(Some(record)) = reader.read::<input::Record>() {
let info = record.info.replace("\"\"\"", "\"");
let record = {
let mut record = output::Record::from_db_record(record);
record.begin -= 1; // need to get 0-based coordinates for DB
record.end = match record.sv_type {
SvType::Bnd => {
// Obtain "pos2" from JSON-encoded info field.
let info = serde_json::from_str::<serde_json::Value>(&info)?;
info.as_object()
.and_then(|o| o.get("pos2"))
.and_then(|v| v.as_i64())
.ok_or_else(|| anyhow::anyhow!("Cannot find 'pos2' in info field"))?
as i32
}
SvType::Ins => record.begin + 1,
_ => record.end,
};
record
};
let begin = match record.sv_type {
SvType::Bnd => record.begin - 1 - args.slack_bnd,
SvType::Ins => record.begin - 1 - args.slack_ins,
_ => record.begin,
let query = match record.sv_type {
SvType::Bnd => record.begin..(record.begin + 1),
_ => record.begin..record.end,
};
let end = match record.sv_type {
SvType::Bnd => record.begin + args.slack_bnd,
SvType::Ins => record.begin + args.slack_ins,
_ => record.end,
};
let query = begin..end;
let mut found_any_cluster = false;
for mut it_tree in tree.find_mut(&query) {
let cluster_idx = *it_tree.data();
Expand All @@ -317,13 +326,7 @@ fn merge_to_out(
}
if !found_any_cluster {
// create new cluster
tree.insert(
match record.sv_type {
SvType::Bnd | SvType::Ins => (record.begin - 1)..record.begin,
_ => (record.begin - 1)..record.end,
},
clusters.len(),
);
tree.insert(query, clusters.len());
clusters.push(vec![records.len()]);
}
// always register the record
Expand Down
5 changes: 0 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,6 @@ fn main() -> Result<(), anyhow::Error> {
.compact()
.finish();

// Common variables for CLI commands from annonars.
let annonars_common = annonars::common::cli::Args {
verbose: cli.common.verbose.clone(),
};

// Install collector and go into sub commands.
let term = Term::stderr();
tracing::subscriber::with_default(collector, || {
Expand Down
5 changes: 2 additions & 3 deletions src/pheno/algos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,10 @@ mod test {
.omim_disease(&OmimDiseaseId::from(154700))
.expect("marfan symdrome must be in HPO");
let hpo_marfan = HpoGroup::from_iter(
omim_marfan
&omim_marfan
.to_hpo_set(&hpo)
.child_nodes()
.without_modifier()
.into_iter(),
.without_modifier(),
);

let score = phenomizer::score(&prepare(query), &hpo_marfan, &hpo);
Expand Down
13 changes: 3 additions & 10 deletions src/pheno/prepare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ fn run_simulation(
let path = std::path::Path::new(path_gene_logs).join(num_terms.to_string());
std::fs::create_dir_all(&path).expect("cannot create logs directory");
Some(
std::fs::File::create(&format!("{}/{}.txt", path.display(), gene.symbol()))
std::fs::File::create(format!("{}/{}.txt", path.display(), gene.symbol()))
.expect("could not open file"),
)
} else {
Expand Down Expand Up @@ -130,10 +130,7 @@ fn run_simulation(
let s = phenomizer::score(
&ts,
&HpoGroup::from_iter(
gene.to_hpo_set(ontology)
.child_nodes()
.without_modifier()
.into_iter(),
&gene.to_hpo_set(ontology).child_nodes().without_modifier(),
),
ontology,
);
Expand All @@ -147,11 +144,7 @@ fn run_simulation(
s,
gene.symbol(),
ts.iter()
.map(|t| format!(
"{} ({})",
t.to_string(),
ontology.hpo(t).unwrap().name()
))
.map(|t| format!("{} ({})", t, ontology.hpo(t).unwrap().name()))
.collect::<Vec<_>>()
.join(", ")
)
Expand Down
2 changes: 1 addition & 1 deletion src/pheno/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow:
let path_rocksdb = format!("{}/resnik", args.path_hpo_dir);
let db = rocksdb::DB::open_cf_for_read_only(
&rocksdb::Options::default(),
&path_rocksdb,
path_rocksdb,
["meta", "resnik_pvalues"],
true,
)?;
Expand Down
3 changes: 2 additions & 1 deletion src/server/annos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ pub mod actix_server {
#[serde_with::serde_as]
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "kebab-case")]
#[allow(dead_code)]
struct Request {
/// Genome release version.
pub genome_release: String,
Expand Down Expand Up @@ -730,7 +731,7 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow:
tracing::info!("Opening databases...");
let before_opening = Instant::now();
// Argument lists from the command line with the corresponding database enum value.
let paths_db_pairs = vec![
let paths_db_pairs = [
(&args.path_cadd, AnnoDb::Cadd),
(&args.path_dbnsfp, AnnoDb::Dbnsfp),
(&args.path_dbsnp, AnnoDb::Dbsnp),
Expand Down
26 changes: 12 additions & 14 deletions src/server/pheno.rs
Original file line number Diff line number Diff line change
Expand Up @@ -494,20 +494,18 @@ pub mod actix_server {
) -> Self {
let hpo_terms = if hpo_terms {
Some(
HpoGroup::from_iter(
omim_disease.to_hpo_set(ontology).child_nodes().into_iter(),
)
.into_iter()
.map(|term_id| ontology.hpo(term_id))
.filter(|term| term.is_some())
.map(|term| {
let term = term.expect("filtered above");
ResultHpoTerm {
term_id: term.id().to_string(),
name: term.name().to_string(),
}
})
.collect(),
HpoGroup::from_iter(&omim_disease.to_hpo_set(ontology).child_nodes())
.into_iter()
.map(|term_id| ontology.hpo(term_id))
.filter(|term| term.is_some())
.map(|term| {
let term = term.expect("filtered above");
ResultHpoTerm {
term_id: term.id().to_string(),
name: term.name().to_string(),
}
})
.collect(),
)
} else {
None
Expand Down
2 changes: 1 addition & 1 deletion src/server/rest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow:
} else {
PathBuf::from_str(&args.path_db)?.join("conf.toml")
};
let toml_str = std::fs::read_to_string(&path_conf)?;
let toml_str = std::fs::read_to_string(path_conf)?;
toml::from_str(&toml_str)?
};

Expand Down

0 comments on commit 816de4d

Please sign in to comment.