Skip to content

Commit

Permalink
seq kmer
Browse files Browse the repository at this point in the history
  • Loading branch information
dagou committed Jun 22, 2024
1 parent e6dfa89 commit 836a343
Show file tree
Hide file tree
Showing 11 changed files with 17 additions and 31 deletions.
2 changes: 1 addition & 1 deletion kr2r/src/bin/annotate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ pub fn run(args: Args) -> Result<()> {

// 开始计时
let start = Instant::now();
println!("start...");
println!("annotate start...");
for chunk_file in chunk_files {
println!("chunk_file {:?}", chunk_file);
process_chunk_file(&args, chunk_file, &hash_files)?;
Expand Down
15 changes: 6 additions & 9 deletions kr2r/src/bin/build_k2_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use kr2r::db::{
write_config_to_file,
};
use kr2r::utils::{
create_partition_files, create_partition_writers, find_library_fna_files, format_bytes,
get_file_limit, read_id_to_taxon_map,
create_partition_files, create_partition_writers, find_library_fna_files, get_file_limit,
read_id_to_taxon_map,
};
use kr2r::IndexOptions;
use std::fs::remove_file;
Expand All @@ -25,10 +25,9 @@ pub struct Args {
#[clap(long, value_parser = parse_size, default_value = "1G", help = "Specifies the hash file capacity.\nAcceptable formats include numeric values followed by 'K', 'M', or 'G' (e.g., '1.5G', '250M', '1024K').\nNote: The specified capacity affects the index size, with a factor of 4 applied.\nFor example, specifying '1G' results in an index size of '4G'.\nDefault: 1G (capacity 1G = file size 4G)")]
pub hash_capacity: usize,

/// chunk temp directory
#[clap(long)]
pub chunk_dir: PathBuf,

// chunk temp directory
// #[clap(long)]
// pub chunk_dir: PathBuf,
/// 包含原始配置
#[clap(flatten)]
pub build: Build,
Expand Down Expand Up @@ -83,11 +82,9 @@ pub fn run(args: Args, required_capacity: usize) -> Result<(), Box<dyn std::erro
panic!("Exceeds File Number Limit");
}

let chunk_files = create_partition_files(partition, &args.chunk_dir, "chunk");
let chunk_files = create_partition_files(partition, &k2d_dir, "chunk");
let mut writers = create_partition_writers(&chunk_files);

println!("chunk_size {}", format_bytes(chunk_size as f64));

let fna_files = find_library_fna_files(args.build.database);

for fna_file in &fna_files {
Expand Down
3 changes: 1 addition & 2 deletions kr2r/src/bin/classify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ where

let _ = read_parallel(
reader,
args.num_threads as usize - 2,
args.num_threads as usize,
&meros,
|seqs| {
Expand Down Expand Up @@ -341,7 +340,7 @@ pub fn run(args: Args) -> Result<()> {
if hash_config.hash_capacity == 0 {
panic!("`hash_capacity` can't be zero!");
}
println!("start...");
println!("classify start...");
let start = Instant::now();
let meros = idx_opts.as_meros();
let hash_files = find_and_sort_files(&args.k2d_dir, "hash", ".k2d")?;
Expand Down
3 changes: 2 additions & 1 deletion kr2r/src/bin/estimate_capacity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ fn process_sequence(
read_parallel(
&mut reader,
args.threads,
args.threads - 2,
&meros,
|record_set| {
let mut minimizer_set = HashSet::new();
Expand Down Expand Up @@ -152,6 +151,8 @@ pub fn run(args: Args) -> usize {
panic!("Error: No library.fna files found in the specified directory. Please ensure that the directory contains at least one library.fna file and try again.");
}

println!("estimate start... ");

for fna_file in fna_files {
let args_clone = Args {
database: source.clone(),
Expand Down
2 changes: 1 addition & 1 deletion kr2r/src/bin/hashshard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ pub fn run(args: Args) -> IOResult<()> {
let hash_config = HashConfig::from_hash_header(index_filename)?;

let partition = (hash_config.capacity + args.hash_capacity - 1) / args.hash_capacity;
println!("start...");
println!("hashshard start...");
// 开始计时
let start = Instant::now();

Expand Down
11 changes: 2 additions & 9 deletions kr2r/src/bin/kun.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@ struct BuildArgs {
#[clap(long)]
pub k2d_dir: Option<PathBuf>,

/// chunk directory
#[clap(long)]
chunk_dir: PathBuf,

#[clap(long, value_parser = parse_size, default_value = "1G", help = "Specifies the hash file capacity.\nAcceptable formats include numeric values followed by 'K', 'M', or 'G' (e.g., '1.5G', '250M', '1024K').\nNote: The specified capacity affects the index size, with a factor of 4 applied.\nFor example, specifying '1G' results in an index size of '4G'.\nDefault: 1G (capacity 1G = file size 4G)")]
pub hash_capacity: usize,

// chunk_dir: PathBuf,
#[clap(flatten)]
pub build: Build,

Expand Down Expand Up @@ -117,8 +111,7 @@ impl From<BuildArgs> for build_k2_db::Args {
build: item.build,
k2d_dir: item.k2d_dir,
taxo: item.taxo,
chunk_dir: item.chunk_dir,
hash_capacity: item.hash_capacity,
hash_capacity: parse_size("1G").unwrap(),
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion kr2r/src/bin/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ pub fn run(args: Args) -> Result<()> {

// 开始计时
let start = Instant::now();
println!("start...");
println!("resolve start...");

for i in 0..partition {
let sample_file = &sample_files[i];
Expand Down
3 changes: 1 addition & 2 deletions kr2r/src/bin/splitr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ where

read_parallel(
reader,
args.num_threads as usize - 2,
args.num_threads as usize,
&meros,
|seqs| {
Expand Down Expand Up @@ -268,7 +267,7 @@ pub fn run(args: Args) -> Result<()> {
if hash_config.hash_capacity == 0 {
panic!("`hash_capacity` can't be zero!");
}
println!("start...");
println!("splitr start...");
let file_num_limit = get_file_limit();
if hash_config.partition >= file_num_limit {
panic!("Exceeds File Number Limit");
Expand Down
2 changes: 0 additions & 2 deletions kr2r/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,12 @@ pub fn convert_fna_to_k2_format<P: AsRef<Path>>(
threads: u32,
) {
let mut reader = FastaReader::from_path(fna_file, 1).unwrap();
let queue_len = (threads - 2) as usize;
let value_bits = hash_config.value_bits;
let cell_size = std::mem::size_of::<Slot<u32>>();

read_parallel(
&mut reader,
threads as usize,
queue_len,
&meros,
|seqs| {
let mut k2_cell_list = Vec::new();
Expand Down
2 changes: 1 addition & 1 deletion seqkmer/src/fastq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ where
{
fn next(&mut self) -> Result<Option<Vec<Base<Vec<u8>>>>> {
let seqs: Vec<Base<Vec<u8>>> = (0..self.batch_size)
.filter_map(|_| self.read_next().transpose()) // 将 Result<Option<_>, _> 转换为 Option<Result<_, _>>
.filter_map(|_| self.read_next().transpose())
.collect::<Result<Vec<_>>>()?;

Ok(Some(seqs).filter(|v| !v.is_empty()))
Expand Down
3 changes: 1 addition & 2 deletions seqkmer/src/parallel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ pub fn create_reader(
pub fn read_parallel<R, W, O, F, Out>(
reader: &mut R,
n_threads: usize,
buffer_len: usize,
meros: &Meros,
work: W,
func: F,
Expand All @@ -54,7 +53,7 @@ where
F: FnOnce(&mut ParallelResult<Option<O>>) -> Out + Send,
{
assert!(n_threads > 2);
assert!(n_threads <= buffer_len);
let buffer_len = n_threads + 2;
let (sender, receiver) = bounded::<Vec<Base<Vec<u8>>>>(buffer_len);
let (done_send, done_recv) = bounded::<Option<O>>(buffer_len);
let receiver = Arc::new(receiver); // 使用 Arc 来共享 receiver
Expand Down

0 comments on commit 836a343

Please sign in to comment.