Skip to content

Commit feaefc1

Browse files
authored
feat: port over code from worker (#1) (#2)
1 parent 904302e commit feaefc1

File tree

78 files changed

+10890
-30
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+10890
-30
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/tests/** filter=lfs diff=lfs merge=lfs -text

.github/workflows/release-please.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ jobs:
2626
toolchain: stable
2727
override: true
2828

29+
- name: Setup protoc
30+
uses: arduino/setup-protoc@v1.1.2
31+
with:
32+
repo-token: ${{ secrets.GITHUB_TOKEN }}
33+
2934
- uses: Swatinem/rust-cache@v2
3035
if: ${{ steps.release.outputs.release_created }}
3136

.github/workflows/rust.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ jobs:
2222
override: true
2323
components: rustfmt
2424

25+
- name: Setup protoc
26+
uses: arduino/setup-protoc@v1.1.2
27+
with:
28+
repo-token: ${{ secrets.GITHUB_TOKEN }}
29+
2530
- name: Check format
2631
run: |
2732
cargo fmt -- --check
@@ -39,6 +44,11 @@ jobs:
3944
override: true
4045
components: clippy
4146

47+
- name: Setup protoc
48+
uses: arduino/setup-protoc@v1.1.2
49+
with:
50+
repo-token: ${{ secrets.GITHUB_TOKEN }}
51+
4252
- name: Lint with clippy
4353
uses: actions-rs/clippy-check@v1
4454
with:
@@ -59,6 +69,11 @@ jobs:
5969
toolchain: stable
6070
override: true
6171

72+
- name: Setup protoc
73+
uses: arduino/setup-protoc@v1.1.2
74+
with:
75+
repo-token: ${{ secrets.GITHUB_TOKEN }}
76+
6277
- uses: Swatinem/rust-cache@v2
6378

6479
- name: Run cargo-tarpaulin

Cargo.toml

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,76 @@ license = "Apache-2.0"
88
homepage = "https://github.com/bihealth/viguno"
99
readme = "README.md"
1010

11-
[lib]
12-
name = "the_repo_name"
13-
path = "src/lib.rs"
14-
1511
[dependencies]
12+
actix-files = "0.6"
13+
actix-web = "4.3"
14+
anyhow = "1.0"
15+
base16ct = "0.2"
16+
bgzip = "0.3"
17+
bio = "1.1"
18+
byte-unit = "4.0"
19+
chrono = { version = "0.4", features = ["serde"] }
20+
clap = { version = "4.1", features = ["derive", "help", "env"] }
21+
clap-verbosity-flag = "2.0"
22+
console = "0.15"
23+
csv = "1.1"
24+
derive_more = "0.99"
25+
enum-map = { version = "2.4", features = ["serde"] }
26+
env_logger = "0.10"
27+
fastrand = "1.9"
28+
flate2 = "1.0"
29+
hgvs = "0.8"
30+
hpo = "0.8"
31+
indexmap = { version = "1.9", features = ["serde"] }
32+
indicatif = { version = "0.17", features = ["rayon"] }
33+
itertools = "0.10"
34+
lazy_static = "1.4"
35+
linked-hash-map = { version = "0.5", features = ["serde", "serde_impl"] }
36+
log = "0.4"
37+
md-5 = "0.10"
38+
multimap = "0.9"
39+
noodles-bgzf = "0.22"
40+
noodles-core = "0.11"
41+
noodles-csi = "0.19"
42+
noodles-tabix = "0.22"
43+
noodles-vcf = "0.31"
44+
once_cell = "1.18"
45+
pretty_assertions = "1.3"
46+
procfs = "0.15"
47+
prost = "0.11"
48+
rayon = "1.7"
49+
regex = "1.7"
50+
result = "1.0"
51+
rocksdb = { version = "0.21", features = ["multi-threaded-cf"] }
52+
rocksdb-utils-lookup = "0.1"
53+
serde = { version = "1.0", features = ["serde_derive"] }
54+
serde-jsonlines = "0.4"
55+
serde_json = "1.0"
56+
serde_with = "3.0"
57+
sha2 = "0.10"
58+
shellexpand = "3.0"
59+
strum = "0.24"
60+
strum_macros = "0.24"
61+
temp_testdir = "0.2"
62+
tempdir = "0.3"
63+
test-log = "0.2"
1664
thiserror = "1.0"
65+
thousands = "0.2"
66+
toml = { version = "0.7", features = ["preserve_order", "parse", "display"] }
1767
tracing = "0.1"
68+
tracing-subscriber = "0.3"
69+
uuid = { version = "1.2", features = ["v4", "fast-rng", "serde"] }
70+
71+
[build-dependencies]
72+
prost-build = "0.11"
1873

1974
[dev-dependencies]
20-
anyhow = "1.0"
21-
clap = { version = "4.1", features = ["derive", "env"] }
22-
clap-verbosity-flag = {version = "2.0"}
23-
env_logger = "0.10"
75+
file_diff = "1.0"
76+
insta = { version = "1.29", features = ["yaml"] }
2477
pretty_assertions = "1.3"
78+
serde_test = "1.0"
2579
temp_testdir = "0.2"
26-
test-log = "0.2"
27-
tracing-subscriber = {version = "0.3" }
80+
81+
[[bin]]
82+
name = "viguno"
83+
path = "src/main.rs"

build.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// The custom build script, needed as we use protocolbuffers.
2+
3+
fn main() {
4+
prost_build::Config::new()
5+
.protoc_arg("-Isrc/proto")
6+
// Add serde serialization and deserialization to the generated code.
7+
.type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]")
8+
// Skip serializing `None` values.
9+
.type_attribute(".", "#[serde_with::skip_serializing_none]")
10+
// Define the protobuf files to compile.
11+
.compile_protos(&["viguno/v1/simulation.proto"], &["src/"])
12+
.unwrap();
13+
}

src/algos/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
//! Phenotype-related algorithms.
2+
3+
pub mod phenomizer;

src/algos/phenomizer.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
//! Similarity computation using the Phenomizer method.
2+
3+
use hpo::{
4+
similarity::{Builtins, Similarity},
5+
term::{HpoGroup, InformationContentKind},
6+
Ontology,
7+
};
8+
9+
// TODO: this is equivalent to `StandardCombiner::FunSimAvg` USE IT.
10+
11+
/// Compute symmetric similarity score.
12+
pub fn score(q: &HpoGroup, d: &HpoGroup, o: &Ontology) -> f32 {
13+
let s = Builtins::Resnik(InformationContentKind::Gene);
14+
(score_dir(q, d, o, &s) + score_dir(d, q, o, &s)) / 2.0
15+
}
16+
17+
/// "Directed" score part of phenomizer score.
18+
///
19+
/// # Panics
20+
///
21+
/// If there are more query terms than fit an `f32` value.
22+
fn score_dir(qs: &HpoGroup, ds: &HpoGroup, o: &Ontology, s: &impl Similarity) -> f32 {
23+
// Handle case of empty `qs`.
24+
if qs.is_empty() {
25+
return 0f32;
26+
}
27+
28+
// For each `q in qs` compute max similarity to any `d in ds`.
29+
let mut tmp: Vec<f32> = Vec::new();
30+
for q in qs {
31+
if let Some(q) = o.hpo(q) {
32+
tmp.push(
33+
ds.iter()
34+
.filter_map(|d| o.hpo(d).map(|d| q.similarity_score(&d, s)))
35+
.max_by(|a, b| a.partial_cmp(b).expect("try to compare NaN"))
36+
.unwrap_or_default(),
37+
);
38+
}
39+
}
40+
41+
// NB: we allow loss of precision in this function for the following statement.
42+
let len: u16 = qs.len().try_into().expect("more than 2^16 query terms");
43+
let len: f32 = len.try_into().expect("too many query terms for f32");
44+
tmp.iter().sum::<f32>() / len
45+
}
46+
47+
#[cfg(test)]
48+
mod test {
49+
use super::*;
50+
use hpo::{annotations::OmimDiseaseId, term::HpoGroup, HpoTermId, Ontology};
51+
52+
fn load_hpo() -> Result<Ontology, anyhow::Error> {
53+
Ok(Ontology::from_standard("tests/data/hpo")?)
54+
}
55+
56+
fn prepare(terms: &[&str]) -> HpoGroup {
57+
HpoGroup::from(
58+
terms
59+
.iter()
60+
.map(|s| HpoTermId::from((*s).to_string()))
61+
.collect::<Vec<_>>(),
62+
)
63+
}
64+
65+
#[test]
66+
fn phenomizer_score_gene() -> Result<(), anyhow::Error> {
67+
let hpo = load_hpo()?;
68+
69+
let query = &[
70+
// slender build
71+
"HP:0001533",
72+
// high, narrow palate
73+
"HP:0002705",
74+
];
75+
let omim_marfan = hpo
76+
.omim_disease(&OmimDiseaseId::from(154_700))
77+
.expect("marfan symdrome must be in HPO");
78+
let hpo_marfan = omim_marfan
79+
.to_hpo_set(&hpo)
80+
.child_nodes()
81+
.without_modifier()
82+
.into_iter()
83+
.collect::<HpoGroup>();
84+
85+
let score = score(&prepare(query), &hpo_marfan, &hpo);
86+
87+
assert!((score - 1.770_859_7).abs() < 0.00001, "score = {score}");
88+
89+
Ok(())
90+
}
91+
}

src/common.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//! Functionality shared between all commands.
2+
3+
use clap::Parser;
4+
use clap_verbosity_flag::{InfoLevel, Verbosity};
5+
6+
/// Shared command line arguments.
7+
#[derive(Parser, Debug)]
8+
pub struct Args {
9+
/// Verbosity of the program
10+
#[clap(flatten)]
11+
pub verbose: Verbosity<InfoLevel>,
12+
}
13+
14+
/// Construct the `indicatif` style for progress bars.
15+
///
16+
/// # Panics
17+
///
18+
/// In the case when writing the ETA seconds could not be written to the progress bar.
19+
pub fn indicatif_style() -> indicatif::ProgressStyle {
20+
let tpl = "{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] \
21+
{human_pos}/{human_len} ({per_sec})";
22+
indicatif::ProgressStyle::with_template(tpl)
23+
.unwrap()
24+
.with_key(
25+
"eta",
26+
|state: &indicatif::ProgressState, w: &mut dyn std::fmt::Write| {
27+
write!(w, "{:.1}s", state.eta().as_secs_f64())
28+
.expect("could not write the ETA as seconds to progress bar");
29+
},
30+
)
31+
.progress_chars("#>-")
32+
}
33+
34+
/// Construct an `indicatif` progress bar with the common style.
35+
///
36+
/// Also, we will enable a steady tick every 0.1s and hide in tests.
37+
pub fn progress_bar(#[allow(unused_variables)] len: usize) -> indicatif::ProgressBar {
38+
#[cfg(test)]
39+
let pb = indicatif::ProgressBar::hidden();
40+
#[cfg(not(test))]
41+
let pb = indicatif::ProgressBar::new(len as u64).with_style(indicatif_style());
42+
pb.enable_steady_tick(std::time::Duration::from_millis(100));
43+
pb
44+
}
45+
46+
/// Load HPO either from binary `$path_hpo/hpo.bin` if it exist, otherwise load as
47+
/// standard directory from `$path_hpo`.
48+
///
49+
/// # Errors
50+
///
51+
/// In the case of loading failure.
52+
pub fn load_hpo<P: AsRef<std::path::Path>>(path: P) -> Result<hpo::Ontology, anyhow::Error> {
53+
if path.as_ref().join("hpo.bin").exists() {
54+
tracing::info!(
55+
" attempting to load binary HPO file from {}",
56+
path.as_ref().display()
57+
);
58+
Ok(hpo::Ontology::from_binary(path.as_ref().join("hpo.bin"))?)
59+
} else {
60+
tracing::info!(
61+
" attempting to load HPO from standard file {}",
62+
path.as_ref().display()
63+
);
64+
Ok(hpo::Ontology::from_standard(&format!(
65+
"{}",
66+
path.as_ref().display()
67+
))?)
68+
}
69+
}

src/convert/mod.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//! Conversion of HPO text files to `hpo` binary format.
2+
3+
use std::io::Write;
4+
5+
use clap::Parser;
6+
7+
/// Command line arguments for `convert` sub command.
8+
#[derive(Parser, Debug)]
9+
#[command(author, version, about = "Convert HPO text files to binary format", long_about = None)]
10+
pub struct Args {
11+
/// Path to the directory with the HPO files.
12+
#[arg(long, required = true)]
13+
pub path_hpo_dir: String,
14+
/// Path to the output binary file.
15+
#[arg(long, required = true)]
16+
pub path_out_bin: String,
17+
}
18+
19+
/// Main entry point for `convert` command.
20+
///
21+
/// # Errors
22+
///
23+
/// In the case of query execution failure.
24+
///
25+
/// # Panics
26+
///
27+
/// In the case of term lookup failure.
28+
pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow::Error> {
29+
tracing::info!("args_common = {:?}", &args_common);
30+
tracing::info!("args = {:?}", &args);
31+
32+
if let Some(level) = args_common.verbose.log_level() {
33+
match level {
34+
log::Level::Trace | log::Level::Debug => {
35+
std::env::set_var("RUST_LOG", "debug");
36+
env_logger::init_from_env(env_logger::Env::new().default_filter_or("info"));
37+
}
38+
_ => (),
39+
}
40+
}
41+
42+
tracing::info!("Loading HPO...");
43+
let before_loading = std::time::Instant::now();
44+
let hpo = crate::common::load_hpo(&args.path_hpo_dir)?;
45+
tracing::info!("...done loading HPO in {:?}", before_loading.elapsed());
46+
tracing::info!("Ontology [{}] with {} terms", hpo.hpo_version(), hpo.len());
47+
48+
tracing::info!("Writing binary file...");
49+
let before_writing = std::time::Instant::now();
50+
let filename = &args.path_out_bin;
51+
let mut fh = std::fs::File::create(filename).expect("Cannot create file");
52+
fh.write_all(&hpo.as_bytes())?;
53+
tracing::info!("...done writing binary in {:?}", before_writing.elapsed());
54+
55+
tracing::info!("All done. Have a nice day!");
56+
57+
Ok(())
58+
}

src/error.rs

Lines changed: 0 additions & 10 deletions
This file was deleted.

src/lib.rs

Lines changed: 0 additions & 9 deletions
This file was deleted.

0 commit comments

Comments
 (0)