From 9b045fb9a02598e02d515214223338bfa9cbb07c Mon Sep 17 00:00:00 2001 From: Ken Chou Date: Sat, 17 May 2025 03:18:44 +0800 Subject: [PATCH 1/3] progress bar --- Cargo.toml | 1 + src/main.rs | 45 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7bc3503..a12d0cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ clap = { version = "4.5.38", features = ["cargo", "derive", "string"] } colored = "2.2.0" dirs-next = "2.0.0" fancy-regex = "0.13.0" +indicatif = "0.17.11" itertools = "0.13.0" md-5 = { version = "0.10.6", features = ["md5-asm"] } nary_tree = "0.4.3" diff --git a/src/main.rs b/src/main.rs index e4835eb..88655e4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use std::path::PathBuf; use std::sync::Arc; use colored::*; +use indicatif::{ProgressBar, ProgressStyle}; use rayon::prelude::*; use walkdir::WalkDir; @@ -30,7 +31,19 @@ fn main() -> std::io::Result<()> { println!("{:#?}", pattern_matcher); } + println!("正在扫描文件..."); + let spinner = ProgressBar::new_spinner(); + spinner.set_style( + ProgressStyle::default_spinner() + .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ ") + .template("{spinner:.green} {msg}") + .unwrap() + ); + spinner.set_message("scanning files..."); + spinner.enable_steady_tick(std::time::Duration::from_millis(100)); + // 仅扫描一次文件系统,收集所有路径 + let mut file_count = 0; let entries: Vec<_> = WalkDir::new(&app_options.target_path) .sort_by(|a, b| { let depth_a = a.depth(); @@ -47,17 +60,44 @@ fn main() -> std::io::Result<()> { }) .into_iter() .filter_entry(|e| !app_options.skip_parent_tmp || util::is_not_hidden(e)) - .filter_map(|e| e.ok()) + .filter_map(|e| { + if let Ok(_) = &e { + file_count += 1; + if file_count % 1000 == 0 { + spinner.set_message(format!("已扫描 {} 个文件...", file_count)); + } + } + e.ok() + }) .collect(); + spinner.finish_with_message(format!("扫描完成,共 {} 个文件", file_count)); // 并行处理文件信息 let options_ref = &app_options; let matcher_ref = &pattern_matcher; + println!("正在处理文件..."); + let process_bar = ProgressBar::new(entries.len() as u64); + process_bar.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {pos}/{len} {msg}") + .unwrap() + .progress_chars("█▓▒░ ") + ); + let file_info_results: Vec<_> = entries .par_iter() .filter_map(|entry| { let filepath = entry.path(); + // 更新进度条 + process_bar.inc(1); + // 显示当前处理的文件名 + if let Some(name) = filepath.file_name().and_then(|n| n.to_str()) { + if process_bar.position() % 100 == 0 { + process_bar.set_message(format!("处理: {}", name)); + } + } + // 处理无效文件名:输出警告并跳过 let filename = match entry.file_name().to_str() { Some(name) => name, @@ -74,6 +114,7 @@ fn main() -> std::io::Result<()> { let p = pattern.unwrap(); return Some((filepath.to_path_buf(), (p, data::Operation::Delete))); } else if options_ref.enable_hash_matching { + process_bar.set_message(format!("计算MD5: {}", filename)); (matched, pattern) = matcher_ref.match_remove_hash(filepath.to_str().unwrap()); if matched { let p = pattern.unwrap(); @@ -114,6 +155,8 @@ fn main() -> std::io::Result<()> { )) }) .collect(); + // 完成进度条 + process_bar.finish_with_message("文件处理完成"); // 构建文件信息映射 let mut file_info: HashMap = HashMap::new(); From 25887682a8d4454692dab7b2039909b10ef6bd64 Mon Sep 17 00:00:00 2001 From: Ken Chou Date: Sat, 17 May 2025 03:33:13 +0800 Subject: [PATCH 2/3] update progress --- src/main.rs | 7 +++---- src/pmatcher.rs | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 88655e4..e7533d9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -114,8 +114,7 @@ fn main() -> std::io::Result<()> { let p = pattern.unwrap(); return Some((filepath.to_path_buf(), (p, data::Operation::Delete))); } else if options_ref.enable_hash_matching { - process_bar.set_message(format!("计算MD5: {}", filename)); - (matched, pattern) = matcher_ref.match_remove_hash(filepath.to_str().unwrap()); + (matched, pattern) = matcher_ref.match_remove_hash_with_progress(filepath.to_str().unwrap(), Some(&process_bar)); if matched { let p = pattern.unwrap(); return Some((filepath.to_path_buf(), (p, data::Operation::Delete))); @@ -155,8 +154,6 @@ fn main() -> std::io::Result<()> { )) }) .collect(); - // 完成进度条 - process_bar.finish_with_message("文件处理完成"); // 构建文件信息映射 let mut file_info: HashMap = HashMap::new(); @@ -250,6 +247,8 @@ fn main() -> std::io::Result<()> { } } } + // 完成进度条 + process_bar.finish_with_message("文件处理完成"); // 执行删除操作 if app_options.enable_deletion { diff --git a/src/pmatcher.rs b/src/pmatcher.rs index 5987b3d..838cc90 100644 --- a/src/pmatcher.rs +++ b/src/pmatcher.rs @@ -4,6 +4,7 @@ use std::io::{self, BufReader, Read}; use std::path::{Path, PathBuf}; use fancy_regex::Regex; +use indicatif::ProgressBar; use md5::{Digest, Md5}; use crate::fnmatch_regex; @@ -40,6 +41,7 @@ impl PatternMatcher { (false, None) // return } + #[allow(dead_code)] pub fn match_remove_hash(&self, test_file: &str) -> (bool, Option) { let filename = Path::new(test_file).file_name().unwrap().to_str().unwrap(); for (re, hash_list) in &self.patterns_to_remove_with_hash { @@ -55,6 +57,25 @@ impl PatternMatcher { (false, None) } + #[allow(dead_code)] + pub fn match_remove_hash_with_progress(&self, test_file: &str, progress: Option<&ProgressBar>) -> (bool, Option) { + let filename = Path::new(test_file).file_name().unwrap().to_str().unwrap(); + for (re, hash_list) in &self.patterns_to_remove_with_hash { + if re.is_match(filename).unwrap() { + if let Some(pb) = progress { + pb.set_message(format!("计算MD5: {}", filename)); + } + + if let Ok(hash) = calculate_md5(test_file) { + if hash_list.contains(&hash) { + return (true, Some(format!("{}:{}", re, hash))); + } + } + } + } + (false, None) + } + pub fn clean_filename(&self, filename: &str) -> String { let mut new_filename = PathBuf::from(filename.to_string()) .file_name() From aa4d4d060b8fc5364a054ac090e0f3661f83e7ce Mon Sep 17 00:00:00 2001 From: Ken Chou Date: Sat, 17 May 2025 13:16:49 +0800 Subject: [PATCH 3/3] refactor: Optimize file processing and improve progress handling --- src/main.rs | 125 ++++++++++++++++++++++++++++-------------------- src/pmatcher.rs | 54 ++++++++++++++++++--- 2 files changed, 120 insertions(+), 59 deletions(-) diff --git a/src/main.rs b/src/main.rs index e7533d9..c03613a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -37,7 +37,7 @@ fn main() -> std::io::Result<()> { ProgressStyle::default_spinner() .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ ") .template("{spinner:.green} {msg}") - .unwrap() + .unwrap(), ); spinner.set_message("scanning files..."); spinner.enable_steady_tick(std::time::Duration::from_millis(100)); @@ -80,9 +80,11 @@ fn main() -> std::io::Result<()> { let process_bar = ProgressBar::new(entries.len() as u64); process_bar.set_style( ProgressStyle::default_bar() - .template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {pos}/{len} {msg}") + .template( + "{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {pos}/{len}\n{msg}", + ) .unwrap() - .progress_chars("█▓▒░ ") + .progress_chars("█▓▒░ "), ); let file_info_results: Vec<_> = entries @@ -114,7 +116,7 @@ fn main() -> std::io::Result<()> { let p = pattern.unwrap(); return Some((filepath.to_path_buf(), (p, data::Operation::Delete))); } else if options_ref.enable_hash_matching { - (matched, pattern) = matcher_ref.match_remove_hash_with_progress(filepath.to_str().unwrap(), Some(&process_bar)); + (matched, pattern) = matcher_ref.match_remove_hash(filepath.to_str().unwrap()); if matched { let p = pattern.unwrap(); return Some((filepath.to_path_buf(), (p, data::Operation::Delete))); @@ -182,71 +184,88 @@ fn main() -> std::io::Result<()> { )); } - // 处理递归的空目录删除 - 优化算法 + // 处理递归的空目录删除 - 优化版本 if app_options.enable_deletion && app_options.enable_prune_empty_dir { - let mut to_delete: HashSet = file_info - .iter() - .filter(|(_, (_, op))| *op == data::Operation::Delete) - .map(|(path, _)| path.clone()) - .collect(); - - // 构建目录树结构 - let mut dir_children: HashMap> = HashMap::new(); - - for path in all_paths.iter() { - if path.is_dir() { - dir_children.insert(path.clone(), Vec::new()); + process_bar.set_message("空目录检测中...".to_string()); + + // 第一阶段:收集需要删除的目录 + let dirs_to_mark_delete = { + // 创建局部作用域,确保借用在此结束 + let paths_set: HashSet<&PathBuf> = all_paths.iter().collect(); + let mut to_delete: HashSet<&PathBuf> = file_info + .iter() + .filter(|(_, (_, op))| *op == data::Operation::Delete) + .map(|(path, _)| path) + .collect(); + + // 目录子项映射 + let mut dir_children: HashMap<&PathBuf, Vec<&PathBuf>> = HashMap::new(); + + let dirs: Vec<&PathBuf> = all_paths.iter() + .filter(|p| p.is_dir()) + .collect(); + + // 初始化目录映射 + for &dir in &dirs { + dir_children.insert(dir, Vec::new()); } - // 找到父目录并添加为子项 - if let Some(parent) = path.parent().map(|p| p.to_path_buf()) { - if all_paths.contains(&parent) && !to_delete.contains(path) { - dir_children - .entry(parent) - .or_insert_with(Vec::new) - .push(path.clone()); + // 构建父子关系 + for path in all_paths.iter() { + if let Some(parent) = path.parent().map(PathBuf::from) { + if let Some(actual_parent) = paths_set.get(&parent) { + if !to_delete.contains(path) { + dir_children.entry(actual_parent).or_default().push(path); + } + } } } - } - // 查找空目录 - 从不包含其他目录的目录开始 - // 使用 capacity 预分配容器大小 - let mut empty_dirs = Vec::with_capacity(dir_children.len() / 2); - let mut changed = true; + // 识别所有空目录 + let mut empty_dirs_result = Vec::new(); + let mut empty_dirs = Vec::with_capacity(dirs.len() / 2); + + for _ in 0..dirs.len() { + empty_dirs.clear(); - while changed { - changed = false; + for &dir in dirs.iter() { + if !to_delete.contains(&dir) && + dir_children.get(dir).map_or(true, |c| c.is_empty()) { + empty_dirs.push(dir); + } + } - for (dir, children) in &dir_children { - if !to_delete.contains(dir) && children.is_empty() { - empty_dirs.push(dir.clone()); - changed = true; + if empty_dirs.is_empty() { + break; } - } - // 将空目录标记为删除 - for dir in &empty_dirs { - file_info.insert( - dir.clone(), - ("".to_string(), data::Operation::Delete), - ); - to_delete.insert(dir.clone()); - - // 从父目录的子列表中移除 - if let Some(parent) = dir.parent().map(|p| p.to_path_buf()) { - if let Some(siblings) = dir_children.get_mut(&parent) { - siblings.retain(|p| p != dir); + for &dir in &empty_dirs { + empty_dirs_result.push(dir.clone()); + to_delete.insert(dir); + + // 更新父目录的子列表 + if let Some(parent_buf) = dir.parent().map(PathBuf::from) { + if let Some(parent) = paths_set.get(&parent_buf) { + if let Some(children) = dir_children.get_mut(parent) { + children.retain(|&p| p != dir); + } + } } } } - if !empty_dirs.is_empty() { - empty_dirs.clear(); - } else { - break; - } + empty_dirs_result + }; // to_delete 的生命周期在这里结束 + + // 第二阶段:更新 file_info + for dir in dirs_to_mark_delete { + file_info.insert( + dir, + ("".to_string(), data::Operation::Delete), + ); } } + // 完成进度条 process_bar.finish_with_message("文件处理完成"); diff --git a/src/pmatcher.rs b/src/pmatcher.rs index 838cc90..dcfdcb4 100644 --- a/src/pmatcher.rs +++ b/src/pmatcher.rs @@ -43,9 +43,19 @@ impl PatternMatcher { #[allow(dead_code)] pub fn match_remove_hash(&self, test_file: &str) -> (bool, Option) { - let filename = Path::new(test_file).file_name().unwrap().to_str().unwrap(); + let filepath = Path::new(test_file); + let filename = match filepath.file_name().and_then(|n| n.to_str()) { + Some(name) => name, + None => return (false, None), // 避免无效文件名 + }; for (re, hash_list) in &self.patterns_to_remove_with_hash { if re.is_match(filename).unwrap() { + // 跳过大文件检查 + if let Ok(metadata) = std::fs::metadata(filepath) { + if metadata.len() > 100 * 1024 * 1024 { + return (false, None); + } + } // 处理 Result 类型 if let Ok(hash) = calculate_md5(test_file) { if hash_list.contains(&hash) { @@ -58,12 +68,42 @@ impl PatternMatcher { } #[allow(dead_code)] - pub fn match_remove_hash_with_progress(&self, test_file: &str, progress: Option<&ProgressBar>) -> (bool, Option) { - let filename = Path::new(test_file).file_name().unwrap().to_str().unwrap(); + pub fn match_remove_hash_with_progress( + &self, + test_file: &str, + progress: Option<&ProgressBar>, + ) -> (bool, Option) { + let filepath = Path::new(test_file); + let filename = match filepath.file_name().and_then(|n| n.to_str()) { + Some(name) => name, + None => return (false, None), // 避免无效文件名 + }; + + // 避免频繁更新和过长消息 + let mut last_update = std::time::Instant::now(); + for (re, hash_list) in &self.patterns_to_remove_with_hash { if re.is_match(filename).unwrap() { + // 跳过大文件检查 + if let Ok(metadata) = std::fs::metadata(filepath) { + if metadata.len() > 100 * 1024 * 1024 { + return (false, None); + } + } + + // 限制频率更新消息,避免栈溢出 if let Some(pb) = progress { - pb.set_message(format!("计算MD5: {}", filename)); + let now = std::time::Instant::now(); + if now.duration_since(last_update).as_millis() > 100 { + // 截断文件名以避免过长 + let short_name = if filename.len() > 50 { + format!("{}...", &filename[0..47]) + } else { + filename.to_string() + }; + pb.set_message(format!("计算MD5: {}", short_name)); + last_update = now; + } } if let Ok(hash) = calculate_md5(test_file) { @@ -95,8 +135,10 @@ impl PatternMatcher { fn calculate_md5(filepath: &str) -> io::Result { let file = File::open(filepath)?; - let mut reader = BufReader::with_capacity(1024 * 1024, file); - let mut buffer = [0; 4096]; + let mut reader = BufReader::with_capacity(8 * 1024 * 1024, file); + + // 使用堆分配的 Vec 代替栈上的大数组 + let mut buffer = vec![0; 64 * 1024]; // 64KB 缓冲区,在堆上分配 let mut hasher = Md5::new(); loop {