Skip to content

Commit

Permalink
fix: -c and -i flag disfunctionning when querying large genus
Browse files Browse the repository at this point in the history
  • Loading branch information
Ebedthan committed Jul 31, 2024
1 parent c72a871 commit 21a7ac3
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 65 deletions.
7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,12 @@ cargo-dist-version = "0.19.1"
# The installers to generate for each app
installers = []
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"]
targets = [
"aarch64-apple-darwin",
"x86_64-apple-darwin",
"x86_64-unknown-linux-gnu",
"x86_64-pc-windows-msvc",
]
# CI backends to support
ci = "github"
# Publish jobs to run in CI
Expand Down
4 changes: 2 additions & 2 deletions src/api/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,14 +169,14 @@ mod tests {
.set_ncbi_type_material_only(true)
.set_outfmt("json");

let expected_url = "https://api.gtdb.ecogenomic.org/search/gtdb?search=test_search&page=1&itemsPerPage=100&searchField=all&gtdbSpeciesRepOnly=true&ncbiTypeMaterialOnly=true";
let expected_url = "https://api.gtdb.ecogenomic.org/search/gtdb?search=test_search&page=1&itemsPerPage=1000000000&searchField=all&gtdbSpeciesRepOnly=true&ncbiTypeMaterialOnly=true";
assert_eq!(api.request(), expected_url);
}

#[test]
fn test_search_api_request_default() {
let api = SearchAPI::default();
let expected_url = "https://api.gtdb.ecogenomic.org/search/gtdb/csv?page=1&itemsPerPage=100&searchField=all";
let expected_url = "https://api.gtdb.ecogenomic.org/search/gtdb/csv?page=1&itemsPerPage=1000000000&searchField=all";
assert_eq!(api.request(), expected_url);
}
}
14 changes: 13 additions & 1 deletion src/cli/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,19 @@ impl SearchArgs {
if args.contains_id("out") {
search_args.set_output(args.get_one::<String>("out").cloned());
}
search_args.set_outfmt(args.get_one::<String>("outfmt").unwrap().to_string());
if args.get_flag("count") || args.get_flag("id") {
// If the user set --count or --id flag, automatically set
// --outfmt=json.
// This will help cope with potential issue arising when the queried
// taxon has big data and cannot be fitted into a string (which is the corresponding
// CSV and TSV output representation).
// An example of such taxa is Escherichia. Before fixing this issue, when lauching
// xgt search -ki g__Escherichia
// we would get: Error: response too big for into_string
search_args.set_outfmt("json".to_string());
} else {
search_args.set_outfmt(args.get_one::<String>("outfmt").unwrap().to_string());
}

search_args.set_disable_certificate_verification(args.get_flag("insecure"));

Expand Down
101 changes: 40 additions & 61 deletions src/cmd/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,46 +244,12 @@ pub fn search(args: cli::search::SearchArgs) -> Result<()> {
}
};

// Handle response for OutputFormat::CSV and OutputFormat::TSV
// based on the args supplied by user
let handle_response = |result: String| -> Result<()> {
// If --count
if args.is_only_num_entries() {
utils::write_to_output(
result
.trim_end()
.split("\r\n")
.skip(1)
.count()
.to_string()
.as_bytes(),
args.get_output().clone(),
)
// If --id
} else if args.is_only_print_ids() {
let ids = result
.split("\r\n")
.skip(1)
.map(|l| {
l.split(if args.get_outfmt() == OutputFormat::Tsv {
'\t'
} else {
','
})
.next()
.unwrap_or("")
})
.collect::<Vec<&str>>()
.join("\n");
utils::write_to_output(ids.as_bytes(), args.get_output().clone())
} else {
// Everything else
utils::write_to_output(result.as_bytes(), args.get_output().clone())
}
};

// Handle response for OutputFormat::Json
if args.get_outfmt() == OutputFormat::Json {
// If -c or -i just use JSON output format to count entries or
// return ids list as converting using into_string can
// throw an error of too big to convert to string especially
// when querying data related to large genus like Escherichia
// See cli/search.rs#L166-L178
if args.is_only_print_ids() || args.is_only_num_entries() {
let mut search_result: SearchResults = response.into_json()?;
if args.is_whole_words_matching() {
search_result.filter_json(needle.clone(), args.get_search_field());
Expand All @@ -293,37 +259,50 @@ pub fn search(args: cli::search::SearchArgs) -> Result<()> {
search_result.get_total_rows() != 0,
"No matching data found in GTDB"
);

let result_str = if args.is_only_num_entries() {
search_result.get_total_rows().to_string()
} else if args.is_only_print_ids() {
search_result
.rows
.iter()
.map(|x| x.gid.clone())
.collect::<Vec<String>>()
.join("\n")
} else {
search_result
.rows
.iter()
.map(|x| serde_json::to_string_pretty(x).unwrap())
.map(|x| x.gid.clone())
.collect::<Vec<String>>()
.join("\n")
};
utils::write_to_output(result_str.as_bytes(), args.get_output().clone())?;
} else {
// Implement the handling of response for OutputFormat::CSV and OutputFormat::TSV
let result = response.into_string()?;
if args.is_whole_words_matching() {
filter_xsv(
result.clone(),
needle,
args.get_search_field(),
args.get_outfmt(),
// Handle response for OutputFormat::Json
if args.get_outfmt() == OutputFormat::Json {
let mut search_result: SearchResults = response.into_json()?;
if args.is_whole_words_matching() {
search_result.filter_json(needle.clone(), args.get_search_field());
}

ensure!(
search_result.get_total_rows() != 0,
"No matching data found in GTDB"
);

let result_str = search_result
.rows
.iter()
.map(|x| serde_json::to_string_pretty(x).unwrap())
.collect::<Vec<String>>()
.join("\n");
utils::write_to_output(result_str.as_bytes(), args.get_output().clone())?;
} else {
// Implement the handling of response for OutputFormat::CSV and OutputFormat::TSV
let result = response.into_string()?;
if args.is_whole_words_matching() {
filter_xsv(
result.clone(),
needle,
args.get_search_field(),
args.get_outfmt(),
);
}
utils::write_to_output(result.as_bytes(), args.get_output().clone())?;
}
handle_response(result)?;
}
}

Expand Down Expand Up @@ -405,6 +384,7 @@ mod tests {
args.add_needle("g__Azorhizobium");
args.set_id(true);
args.set_output(Some("test3.txt".to_string()));
args.set_outfmt("json".to_string());
args.set_disable_certificate_verification(true);
let res = search(args.clone());
assert!(res.is_ok());
Expand All @@ -420,9 +400,8 @@ GCA_023448105.1
GCF_000010525.1
GCF_000473085.1
GCF_004364705.1
GCF_014635325.1
"#
.to_string(),
GCF_014635325.1"#
.to_string(),
expected
);
std::fs::remove_file("test3.txt").unwrap();
Expand Down

0 comments on commit 21a7ac3

Please sign in to comment.