Skip to content

Commit a3d2b25

Browse files
author
dagou
committed
bug fix
1 parent 609a086 commit a3d2b25

File tree

5 files changed

+33
-37
lines changed

5 files changed

+33
-37
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,11 @@ Options:
346346
Minimum quality score for FASTQ data [default: 0]
347347
-p, --num-threads <NUM_THREADS>
348348
The number of threads to use [default: 10]
349-
--batch-size <BATCH_SIZE>
349+
--buffer-size <BUFFER_SIZE>
350350
[default: 16777216]
351+
--batch-size <BATCH_SIZE>
352+
The size of each batch for processing taxid match results, used to control memory usage
353+
[default: 16]
351354
-T, --confidence-threshold <CONFIDENCE_THRESHOLD>
352355
Confidence score threshold [default: 0]
353356
-g, --minimum-hit-groups <MINIMUM_HIT_GROUPS>
@@ -358,8 +361,6 @@ Options:
358361
In comb. w/ -R, provide minimizer information in report
359362
-z, --report-zero-counts
360363
In comb. w/ -R, report taxa w/ 0 count
361-
--full-output
362-
output file contains all unclassified sequence
363364
-h, --help
364365
Print help (see more with '--help')
365366
-V, --version

kr2r/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "kr2r"
3-
version = "0.6.3"
3+
version = "0.6.8"
44
edition = "2021"
55
authors = ["eric9n@gmail.com"]
66

kr2r/src/args.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,9 @@ pub struct ClassifyArgs {
117117
#[clap(short = 'z', long, value_parser, default_value_t = false)]
118118
pub report_zero_counts: bool,
119119

120-
/// output file contains all unclassified sequence
121-
#[clap(long, value_parser, default_value_t = false)]
122-
pub full_output: bool,
123-
120+
// /// output file contains all unclassified sequence
121+
// #[clap(long, value_parser, default_value_t = false)]
122+
// pub full_output: bool,
124123
/// A list of input file paths (FASTA/FASTQ) to be processed by the classify program.
125124
/// Supports fasta or fastq format files (e.g., .fasta, .fastq) and gzip compressed files (e.g., .fasta.gz, .fastq.gz).
126125
// #[clap(short = 'F', long = "files")]

kr2r/src/bin/kun.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ impl From<ClassifyArgs> for resolve::Args {
9797
kraken_output_dir: item.kraken_output_dir,
9898
report_kmer_data: item.report_kmer_data,
9999
report_zero_counts: item.report_zero_counts,
100-
full_output: item.full_output,
100+
// full_output: item.full_output,
101101
num_threads: item.num_threads,
102102
}
103103
}

kr2r/src/bin/resolve.rs

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use kr2r::utils::{find_and_trans_bin_files, find_and_trans_files, open_file};
88
use kr2r::HitGroup;
99
// use rayon::prelude::*;
1010
use seqkmer::{buffer_map_parallel, trim_pair_info, OptionPair};
11-
use std::collections::{HashMap, HashSet};
11+
use std::collections::HashMap;
1212
use std::fs::File;
1313
use std::io::{self, BufRead, BufReader, BufWriter, Read, Result, Write};
1414
use std::path::{Path, PathBuf};
@@ -65,9 +65,9 @@ pub struct Args {
6565
#[clap(long = "output-dir", value_parser)]
6666
pub kraken_output_dir: Option<PathBuf>,
6767

68-
/// output file contains all unclassified sequence
69-
#[clap(long, value_parser, default_value_t = false)]
70-
pub full_output: bool,
68+
// /// output file contains all unclassified sequence
69+
// #[clap(long, value_parser, default_value_t = false)]
70+
// pub full_output: bool,
7171
/// Confidence score threshold, default is 0.0.
7272
#[clap(
7373
short = 'T',
@@ -120,8 +120,7 @@ fn process_batch<P: AsRef<Path>>(
120120
id_map: &HashMap<u32, (String, String, usize, Option<usize>)>,
121121
writer: &mut Box<dyn Write + Send>,
122122
value_mask: usize,
123-
) -> Result<(TaxonCountersDash, usize, HashSet<u32>)> {
124-
let hit_seq_id_set = HashSet::new();
123+
) -> Result<(TaxonCountersDash, usize)> {
125124
let confidence_threshold = args.confidence_threshold;
126125
let minimum_hit_groups = args.minimum_hit_groups;
127126

@@ -138,6 +137,7 @@ fn process_batch<P: AsRef<Path>>(
138137
if let Some(item) = id_map.get(&k) {
139138
let mut rows = rows.to_owned();
140139
rows.sort_unstable();
140+
141141
let dna_id = trim_pair_info(&item.0);
142142
let range =
143143
OptionPair::from(((0, item.2), item.3.map(|size| (item.2, size + item.2))));
@@ -179,11 +179,7 @@ fn process_batch<P: AsRef<Path>>(
179179
.expect("failed");
180180
}
181181

182-
Ok((
183-
cur_taxon_counts,
184-
classify_counter.load(Ordering::SeqCst),
185-
hit_seq_id_set,
186-
))
182+
Ok((cur_taxon_counts, classify_counter.load(Ordering::SeqCst)))
187183
}
188184

189185
pub fn run(args: Args) -> Result<()> {
@@ -218,7 +214,7 @@ pub fn run(args: Args) -> Result<()> {
218214
}
219215
None => Box::new(BufWriter::new(io::stdout())) as Box<dyn Write + Send>,
220216
};
221-
let (thread_taxon_counts, thread_classified, hit_seq_set) = process_batch::<PathBuf>(
217+
let (thread_taxon_counts, thread_classified) = process_batch::<PathBuf>(
222218
sam_files,
223219
&args,
224220
&taxo,
@@ -227,22 +223,22 @@ pub fn run(args: Args) -> Result<()> {
227223
value_mask,
228224
)?;
229225

230-
if args.full_output {
231-
sample_id_map
232-
.iter()
233-
.filter(|(key, _)| !hit_seq_set.contains(key))
234-
.for_each(|(_, value)| {
235-
let dna_id = trim_pair_info(&value.0); // 假设 key 是 &str 类型
236-
let output_line = format!(
237-
"U\t{}\t0\t{}\t{}\n",
238-
dna_id,
239-
value.1,
240-
if value.3.is_none() { "" } else { " |:| " }
241-
);
242-
243-
writer.write_all(output_line.as_bytes()).unwrap();
244-
});
245-
}
226+
// if args.full_output {
227+
// sample_id_map
228+
// .iter()
229+
// .filter(|(key, _)| !hit_seq_set.contains(key))
230+
// .for_each(|(_, value)| {
231+
// let dna_id = trim_pair_info(&value.0); // 假设 key 是 &str 类型
232+
// let output_line = format!(
233+
// "U\t{}\t0\t{}\t{}\n",
234+
// dna_id,
235+
// value.1,
236+
// if value.3.is_none() { "" } else { " |:| " }
237+
// );
238+
239+
// writer.write_all(output_line.as_bytes()).unwrap();
240+
// });
241+
// }
246242

247243
let mut sample_taxon_counts: HashMap<
248244
u64,

0 commit comments

Comments
 (0)