Skip to content

Commit

Permalink
Add benchmark CLI argument 'fp' and 'id_type'
Browse files Browse the repository at this point in the history
  • Loading branch information
khb7840 committed Nov 1, 2024
1 parent 92e3de7 commit 48cf44b
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 9 deletions.
4 changes: 3 additions & 1 deletion src/cli/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ fn parse_arg() -> Result<AppArgs, Box<dyn std::error::Error>> {
plddt_cutoff: args.value_from_str(["-l", "--plddt"]).unwrap_or(0.0),
node_count: args.value_from_str("--node").unwrap_or(2),
header: args.contains("--header"),
serial_query: args.contains("--serial-query"),
serial_query: args.contains("--serial"),
output: args.value_from_str(["-o", "--output"]).unwrap_or("".into()),
verbose: args.contains(["-v", "--verbose"]),
help: args.contains(["-h", "--help"]),
Expand All @@ -69,6 +69,8 @@ fn parse_arg() -> Result<AppArgs, Box<dyn std::error::Error>> {
answer: args.opt_value_from_str(["-a", "--answer"])?,
index: args.opt_value_from_str(["-i", "--index"])?,
format: args.value_from_str(["-f", "--format"]).unwrap_or("tsv".into()),
fp: args.opt_value_from_str("--fp")?,
id_type: args.value_from_str("--id").unwrap_or("filename".into()),
}),
Some("test") => Ok(AppArgs::Test {
index_path: args.value_from_str(["-i", "--index"])?,
Expand Down
2 changes: 2 additions & 0 deletions src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ pub enum AppArgs {
answer: Option<String>,
index: Option<String>,
format: String,
fp: Option<f64>,
id_type: String,
},
Test {
index_path: String,
Expand Down
26 changes: 21 additions & 5 deletions src/cli/workflows/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ use std::collections::HashSet;
use std::io::BufRead;

use crate::cli::*;
use crate::controller::mode::{parse_path_set_by_id_type, parse_path_vec_by_id_type, IdType};
use crate::index::lookup::load_lookup_from_file;
use crate::prelude::*;

use crate::cli::config::read_index_config_from_file;
use crate::utils::benchmark::measure_up_to_k_fp;

// usage: folddisco benchmark -r <result.tsv> -a <answer.tsv> -i <index> -f tsv
// usage: folddisco benchmark -r <result.tsv> -a <answer.tsv> -i <index> -f default
Expand All @@ -16,6 +18,8 @@ pub fn benchmark(env: AppArgs) {
answer,
index,
format,
fp,
id_type,
} => {
if result.is_none() || answer.is_none() || index.is_none() {
print_log_msg(FAIL, "Result, answer, and index files must be provided");
Expand All @@ -26,18 +30,26 @@ pub fn benchmark(env: AppArgs) {
let index_path = index.unwrap();
let lookup_path = format!("{}.lookup", index_path);
let config_path = format!("{}.type", index_path);

let format = format.as_str();
let id_type = IdType::get_with_str(&id_type);

// let result = read_one_column_of_tsv(&result_path, 0);
let result = read_one_column_of_tsv_as_vec(&result_path, 0);
let answer = read_one_column_of_tsv(&answer_path, 0);
let lookup = load_lookup_from_file(&lookup_path);
let lookup = lookup.into_iter().map(|(id, _, _, _)| id).collect::<HashSet<_>>();
let config = read_index_config_from_file(&config_path);
// Parse path by id type
let result = parse_path_vec_by_id_type(&result, &id_type);
let answer = parse_path_set_by_id_type(&answer, &id_type);
let lookup = parse_path_set_by_id_type(&lookup, &id_type);

let result = HashSet::from_iter(result);
let metric = compare_target_answer_set(&result, &answer, &lookup);
// let metric = measure_up_to_k_fp(&result, &answer, &lookup, 5.0);
let config = read_index_config_from_file(&config_path);
let result_set = HashSet::from_iter(result.iter().cloned());
let metric = if let Some(fp) = fp {
measure_up_to_k_fp(&result, &answer, &lookup, fp)
} else {
compare_target_answer_set(&result_set, &answer, &lookup)
};

match format {
"tsv" => {
Expand Down Expand Up @@ -140,11 +152,15 @@ mod tests {
let answer = Some("data/zinc_answer.tsv".to_string());
let index = Some("analysis/h_sapiens/d16a4/index_id".to_string());
let format = "tsv";
let fp = None;
let id_type = String::from("filename");
let env = AppArgs::Benchmark {
result,
answer,
index,
format: format.to_string(),
fp,
id_type
};
benchmark(env);
}
Expand Down
2 changes: 1 addition & 1 deletion src/cli/workflows/build_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ pub fn build_index(env: AppArgs) {
IndexMode::Big => {}
}
let lookup_path = format!("{}.lookup", index_path);
let id_vec = parse_path_vec_by_id_type(&fold_disco.path_vec, id_type.clone());
let id_vec = parse_path_vec_by_id_type(&fold_disco.path_vec, &id_type);
measure_time!(save_lookup_to_file(
&lookup_path, &id_vec, &fold_disco.numeric_id_vec,
Some(&fold_disco.nres_vec), Some(&fold_disco.plddt_vec)
Expand Down
13 changes: 11 additions & 2 deletions src/controller/mode.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

use std::collections::HashSet;
use std::fs;
use std::path::Path;

Expand Down Expand Up @@ -200,14 +201,22 @@ pub fn parse_path_by_id_type_with_string(path: &str, id_type: &IdType, string: &
}


pub fn parse_path_vec_by_id_type(path_vec: &Vec<String>, id_type: IdType) -> Vec<String> {
pub fn parse_path_vec_by_id_type(path_vec: &Vec<String>, id_type: &IdType) -> Vec<String> {
let mut parsed_path_vec = Vec::with_capacity(path_vec.len());
for path in path_vec {
parsed_path_vec.push(parse_path_by_id_type(&path, &id_type));
parsed_path_vec.push(parse_path_by_id_type(&path, id_type));
}
parsed_path_vec
}

pub fn parse_path_set_by_id_type(path_set: &HashSet<String>, id_type: &IdType) -> HashSet<String> {
let mut parsed_path_set = HashSet::with_capacity(path_set.len());
for path in path_set {
parsed_path_set.insert(parse_path_by_id_type(&path, id_type));
}
parsed_path_set
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IndexMode {
Id,
Expand Down

0 comments on commit 48cf44b

Please sign in to comment.