From b16909848e932caa154a60237f90327470ad42fd Mon Sep 17 00:00:00 2001 From: Ken Chou Date: Sat, 17 May 2025 01:10:33 +0800 Subject: [PATCH 1/4] feat: Improve file system handling with single scan and smart deletions --- Cargo.toml | 6 +-- src/data.rs | 2 +- src/main.rs | 153 +++++++++++++++++++++++++--------------------------- 3 files changed, 77 insertions(+), 84 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9a0ad83..bea5b73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,15 +7,15 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap = { version = "4.5.13", features = ["cargo", "derive", "string"] } -colored = "2.1.0" +clap = { version = "4.5.38", features = ["cargo", "derive", "string"] } +colored = "2.2.0" dirs-next = "2.0.0" fancy-regex = "0.13.0" itertools = "0.13.0" md-5 = { version = "0.10.6", features = ["md5-asm"] } nary_tree = "0.4.3" quick-error = "2.0.1" -serde = { version = "1.0.204", features = ["derive"] } +serde = { version = "1.0.219", features = ["derive"] } serde_yaml = "0.9.34" walkdir = "2.5.0" diff --git a/src/data.rs b/src/data.rs index 568407d..521bb15 100644 --- a/src/data.rs +++ b/src/data.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum Operation { None, Delete, diff --git a/src/main.rs b/src/main.rs index aec9e05..340a11a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use std::collections::{HashMap, HashSet}; use std::fs::rename; use std::path::PathBuf; @@ -25,14 +26,20 @@ fn main() -> std::io::Result<()> { println!("{:#?}", pattern_matcher); } - let mut operation_list: Vec<(PathBuf, String, data::Operation)> = vec![]; // Path, Pattern, Operation + // 仅扫描一次文件系统,收集所有信息 + let mut file_info: HashMap = HashMap::new(); + let mut all_paths: Vec = Vec::new(); + + // 第一次扫描文件系统,收集所有文件和目录信息 for entry in WalkDir::new(&app_options.target_path) - // .contents_first(true) .sort_by(|a, b| { - a.file_type() - .is_dir() - .cmp(&b.file_type().is_dir()) - .reverse() + // 深度优先排序,从深到浅(先处理最深层级) + let depth_a = a.depth(); + let depth_b = b.depth(); + depth_b.cmp(&depth_a) + // 若深度相同,目录排在文件前面 + .then(a.file_type().is_dir().cmp(&b.file_type().is_dir()).reverse()) + // 若都是目录或都是文件,按名称排序 .then(a.file_name().cmp(b.file_name())) }) .into_iter() @@ -41,59 +48,59 @@ fn main() -> std::io::Result<()> { { let filepath = entry.path(); let filename = entry.file_name().to_str().unwrap(); + all_paths.push(filepath.to_path_buf()); + // 检查是否需要删除 if app_options.enable_deletion { let (mut matched, mut pattern) = pattern_matcher.match_remove_pattern(filename); if matched { let p = pattern.unwrap(); - operation_list.push((filepath.to_path_buf(), p, data::Operation::Delete)); + file_info.insert(filepath.to_path_buf(), (p, data::Operation::Delete)); continue; } else if app_options.enable_hash_matching { - // test filename and hash + // 只在必要时计算哈希 (matched, pattern) = pattern_matcher.match_remove_hash(filepath.to_str().unwrap()); if matched { let p = pattern.unwrap(); - operation_list.push((filepath.to_path_buf(), p, data::Operation::Delete)); + file_info.insert(filepath.to_path_buf(), (p, data::Operation::Delete)); continue; } } } + // 检查是否需要重命名 if app_options.enable_renaming { let new_filename = pattern_matcher.clean_filename(filename); if new_filename != filename { - operation_list.push(( - filepath.to_path_buf(), - new_filename, - data::Operation::Rename, - )); + file_info.insert(filepath.to_path_buf(), (new_filename, data::Operation::Rename)); continue; } } + // 检查是否为空目录 if app_options.enable_prune_empty_dir && filepath.is_dir() && filepath.read_dir()?.next().is_none() { - operation_list.push(( - filepath.to_path_buf(), - "".to_string(), - data::Operation::Delete, - )) + file_info.insert(filepath.to_path_buf(), ("".to_string(), data::Operation::Delete)); + continue; } - operation_list.push(( - filepath.to_path_buf(), - "".to_string(), - data::Operation::None, - )); + // 不需要操作的文件 + file_info.insert(filepath.to_path_buf(), ("".to_string(), data::Operation::None)); } + // 将 HashMap 转换为 Vec 用于打印目录树 + let operation_list: Vec<(PathBuf, String, data::Operation)> = file_info + .iter() + .map(|(path, (pattern, op))| (path.clone(), pattern.clone(), (*op).clone())) + .collect(); + if app_options.is_debug_mode() { println!("* operation_list: {:#?}", operation_list); } - // dir tree + // 打印目录树 if app_options.verbose >= 2 { tprint::print_tree(p2tree::path_list_to_tree( &operation_list, @@ -101,56 +108,33 @@ fn main() -> std::io::Result<()> { )); } - // Remove the entries that don't require operation. - operation_list.retain(|(_, _, op)| !matches!(op, data::Operation::None)); - - // 创建所有操作的列表 - let mut all_delete_operations: Vec<(PathBuf, String)> = Vec::new(); - - // 添加初始的删除操作 - for (file_path, pattern, op) in operation_list.iter() { - if *op == data::Operation::Delete { - all_delete_operations.push((file_path.clone(), pattern.clone())); - } - } - - // 如果启用了空目录清理,模拟删除过程找出所有会变空的目录 + // 处理递归的空目录删除 if app_options.enable_deletion && app_options.enable_prune_empty_dir { - // 创建当前文件系统状态的副本以进行模拟 - let mut remaining_paths = std::collections::HashSet::new(); - for entry in WalkDir::new(&app_options.target_path) - .into_iter() - .filter_entry(|e| !app_options.skip_parent_tmp || util::is_not_hidden(e)) - .filter_map(|e| e.ok()) - { - remaining_paths.insert(entry.path().to_path_buf()); - } + // 创建待删除路径集合 + let mut to_delete: HashSet = file_info + .iter() + .filter(|(_, (_, op))| *op == data::Operation::Delete) + .map(|(path, _)| path.clone()) + .collect(); - // 从集合中移除所有已标记为删除的文件和目录 - for (path, _) in &all_delete_operations { - remaining_paths.remove(path); - } + // 剩余路径集合 + let mut remaining_paths: HashSet = all_paths + .iter() + .filter(|path| !to_delete.contains(*path)) + .cloned() + .collect(); - // 反复检查并"删除"空目录,直到没有新的空目录 + // 递归查找和标记空目录 let mut found_empty_dirs = true; while found_empty_dirs { found_empty_dirs = false; let mut new_empty_dirs = Vec::new(); - // 按深度降序排列的路径(先处理最深的目录) - let mut sorted_paths: Vec = remaining_paths.iter().cloned().collect(); - sorted_paths.sort_by(|a, b| { - let depth_a = a.components().count(); - let depth_b = b.components().count(); - depth_b.cmp(&depth_a) - }); - - // 查找新的空目录 - for path in sorted_paths.iter() { + // 遍历所有剩余路径,查找空目录 + for path in remaining_paths.iter() { if path.is_dir() { let mut is_empty = true; - // 检查这个目录是否为空(没有子项) - for child in sorted_paths.iter() { + for child in remaining_paths.iter() { if child != path && child.starts_with(path) { is_empty = false; break; @@ -164,24 +148,31 @@ fn main() -> std::io::Result<()> { } } - // 将新发现的空目录添加到删除列表 + // 更新删除列表和剩余列表 for empty_dir in &new_empty_dirs { - all_delete_operations.push((empty_dir.clone(), "".to_string())); + file_info.insert(empty_dir.clone(), ("".to_string(), data::Operation::Delete)); + to_delete.insert(empty_dir.clone()); remaining_paths.remove(empty_dir); } } } + // 构建最终的删除和重命名操作列表 + let mut all_delete_operations: Vec<(PathBuf, String)> = file_info + .iter() + .filter(|(_, (_, op))| *op == data::Operation::Delete) + .map(|(path, (pattern, _))| (path.clone(), pattern.clone())) + .collect(); + + // 按深度优先排序删除操作 + all_delete_operations.sort_by(|(path_a, _), (path_b, _)| { + let depth_a = path_a.components().count(); + let depth_b = path_b.components().count(); + depth_b.cmp(&depth_a) + }); + // 执行删除操作 if app_options.enable_deletion { - // 按深度优先顺序排序删除操作(先删除最深的路径) - all_delete_operations.sort_by(|(path_a, _), (path_b, _)| { - let depth_a = path_a.components().count(); - let depth_b = path_b.components().count(); - depth_b.cmp(&depth_a) - }); - - // 输出并执行所有删除操作 for (file_path, pattern) in all_delete_operations { if app_options.verbose > 0 { println!("{} {:#?} <== {}", "[-]".red(), file_path, pattern); @@ -197,13 +188,16 @@ fn main() -> std::io::Result<()> { // 执行重命名操作 if app_options.enable_renaming { - for (file_path, new_file_name, _) in operation_list + let rename_operations: Vec<(PathBuf, String)> = file_info .iter() - .filter(|(_, _, op)| *op == data::Operation::Rename) - { + .filter(|(_, (_, op))| *op == data::Operation::Rename) + .map(|(path, (pattern, _))| (path.clone(), pattern.clone())) + .collect(); + + for (file_path, new_file_name) in rename_operations { println!("{} {:#?} ==> {}", "[*]".yellow(), file_path, new_file_name); let mut new_filepath = file_path.clone(); - new_filepath.set_file_name(new_file_name); + new_filepath.set_file_name(&new_file_name); if app_options.prune { println!("--> {}", new_filepath.display().to_string().cyan()); rename(file_path, new_filepath)?; @@ -213,4 +207,3 @@ fn main() -> std::io::Result<()> { Ok(()) } -//EOP From cb4e05acc74edc573bee0473d81830d802d7ba06 Mon Sep 17 00:00:00 2001 From: Ken Chou Date: Sat, 17 May 2025 01:23:04 +0800 Subject: [PATCH 2/4] feat: add rayon parallel processing --- Cargo.toml | 1 + src/main.rs | 177 ++++++++++++++++++++++++++++------------------------ 2 files changed, 97 insertions(+), 81 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bea5b73..7bc3503 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ itertools = "0.13.0" md-5 = { version = "0.10.6", features = ["md5-asm"] } nary_tree = "0.4.3" quick-error = "2.0.1" +rayon = "1.10.0" serde = { version = "1.0.219", features = ["derive"] } serde_yaml = "0.9.34" walkdir = "2.5.0" diff --git a/src/main.rs b/src/main.rs index 340a11a..26b2adb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,10 @@ use std::collections::{HashMap, HashSet}; use std::fs::rename; use std::path::PathBuf; +use std::sync::Arc; use colored::*; +use rayon::prelude::*; use walkdir::WalkDir; mod cli; @@ -21,79 +23,82 @@ fn main() -> std::io::Result<()> { println!("{:#?}", app_options); } - let pattern_matcher = pmatcher::PatternMatcher::from_config_file(&app_options.config_file); + let pattern_matcher = Arc::new(pmatcher::PatternMatcher::from_config_file(&app_options.config_file)); if app_options.is_debug_mode() { println!("{:#?}", pattern_matcher); } - // 仅扫描一次文件系统,收集所有信息 - let mut file_info: HashMap = HashMap::new(); - let mut all_paths: Vec = Vec::new(); - - // 第一次扫描文件系统,收集所有文件和目录信息 - for entry in WalkDir::new(&app_options.target_path) + // 仅扫描一次文件系统,收集所有路径 + let entries: Vec<_> = WalkDir::new(&app_options.target_path) .sort_by(|a, b| { - // 深度优先排序,从深到浅(先处理最深层级) let depth_a = a.depth(); let depth_b = b.depth(); depth_b.cmp(&depth_a) - // 若深度相同,目录排在文件前面 .then(a.file_type().is_dir().cmp(&b.file_type().is_dir()).reverse()) - // 若都是目录或都是文件,按名称排序 .then(a.file_name().cmp(b.file_name())) }) .into_iter() .filter_entry(|e| !app_options.skip_parent_tmp || util::is_not_hidden(e)) .filter_map(|e| e.ok()) - { - let filepath = entry.path(); - let filename = entry.file_name().to_str().unwrap(); - all_paths.push(filepath.to_path_buf()); - - // 检查是否需要删除 - if app_options.enable_deletion { - let (mut matched, mut pattern) = pattern_matcher.match_remove_pattern(filename); - if matched { - let p = pattern.unwrap(); - file_info.insert(filepath.to_path_buf(), (p, data::Operation::Delete)); - continue; - } else if app_options.enable_hash_matching { - // 只在必要时计算哈希 - (matched, pattern) = pattern_matcher.match_remove_hash(filepath.to_str().unwrap()); + .collect(); + + // 并行处理文件信息 + let options_ref = &app_options; + let matcher_ref = &pattern_matcher; + + let file_info_results: Vec<_> = entries.par_iter() + .map(|entry| { + let filepath = entry.path(); + let filename = entry.file_name().to_str().unwrap(); + + // 检查是否需要删除 + if options_ref.enable_deletion { + let (mut matched, mut pattern) = matcher_ref.match_remove_pattern(filename); if matched { let p = pattern.unwrap(); - file_info.insert(filepath.to_path_buf(), (p, data::Operation::Delete)); - continue; + return (filepath.to_path_buf(), (p, data::Operation::Delete)); + } else if options_ref.enable_hash_matching { + (matched, pattern) = matcher_ref.match_remove_hash(filepath.to_str().unwrap()); + if matched { + let p = pattern.unwrap(); + return (filepath.to_path_buf(), (p, data::Operation::Delete)); + } } } - } - // 检查是否需要重命名 - if app_options.enable_renaming { - let new_filename = pattern_matcher.clean_filename(filename); - if new_filename != filename { - file_info.insert(filepath.to_path_buf(), (new_filename, data::Operation::Rename)); - continue; + // 检查是否需要重命名 + if options_ref.enable_renaming { + let new_filename = matcher_ref.clean_filename(filename); + if new_filename != filename { + return (filepath.to_path_buf(), (new_filename, data::Operation::Rename)); + } } - } - // 检查是否为空目录 - if app_options.enable_prune_empty_dir - && filepath.is_dir() - && filepath.read_dir()?.next().is_none() - { - file_info.insert(filepath.to_path_buf(), ("".to_string(), data::Operation::Delete)); - continue; - } + // 检查是否为空目录 + if options_ref.enable_prune_empty_dir && filepath.is_dir() { + if filepath.read_dir().map(|mut d| d.next().is_none()).unwrap_or(false) { + return (filepath.to_path_buf(), ("".to_string(), data::Operation::Delete)); + } + } - // 不需要操作的文件 - file_info.insert(filepath.to_path_buf(), ("".to_string(), data::Operation::None)); + // 不需要操作的文件 + (filepath.to_path_buf(), ("".to_string(), data::Operation::None)) + }) + .collect(); + + // 构建文件信息映射 + let mut file_info: HashMap = HashMap::new(); + let mut all_paths: Vec = Vec::with_capacity(file_info_results.len()); + + for (path, info) in file_info_results { + all_paths.push(path.clone()); + file_info.insert(path, info); } - // 将 HashMap 转换为 Vec 用于打印目录树 + // 构建操作列表 let operation_list: Vec<(PathBuf, String, data::Operation)> = file_info .iter() - .map(|(path, (pattern, op))| (path.clone(), pattern.clone(), (*op).clone())) + .map(|(path, (pattern, op))| (path.clone(), pattern.clone(), op.clone())) .collect(); if app_options.is_debug_mode() { @@ -108,56 +113,66 @@ fn main() -> std::io::Result<()> { )); } - // 处理递归的空目录删除 + // 处理递归的空目录删除 - 优化算法 if app_options.enable_deletion && app_options.enable_prune_empty_dir { - // 创建待删除路径集合 let mut to_delete: HashSet = file_info .iter() .filter(|(_, (_, op))| *op == data::Operation::Delete) .map(|(path, _)| path.clone()) .collect(); - // 剩余路径集合 - let mut remaining_paths: HashSet = all_paths - .iter() - .filter(|path| !to_delete.contains(*path)) - .cloned() - .collect(); + // 构建目录树结构 + let mut dir_children: HashMap> = HashMap::new(); - // 递归查找和标记空目录 - let mut found_empty_dirs = true; - while found_empty_dirs { - found_empty_dirs = false; - let mut new_empty_dirs = Vec::new(); - - // 遍历所有剩余路径,查找空目录 - for path in remaining_paths.iter() { - if path.is_dir() { - let mut is_empty = true; - for child in remaining_paths.iter() { - if child != path && child.starts_with(path) { - is_empty = false; - break; - } - } + for path in all_paths.iter() { + if path.is_dir() { + dir_children.insert(path.clone(), Vec::new()); + } + + // 找到父目录并添加为子项 + if let Some(parent) = path.parent().map(|p| p.to_path_buf()) { + if all_paths.contains(&parent) && !to_delete.contains(path) { + dir_children.entry(parent).or_insert_with(Vec::new).push(path.clone()); + } + } + } + + // 查找空目录 - 从不包含其他目录的目录开始 + let mut empty_dirs = Vec::new(); + let mut changed = true; + + while changed { + changed = false; + + for (dir, children) in &dir_children { + if !to_delete.contains(dir) && children.is_empty() { + empty_dirs.push(dir.clone()); + changed = true; + } + } - if is_empty { - new_empty_dirs.push(path.clone()); - found_empty_dirs = true; + // 将空目录标记为删除 + for dir in &empty_dirs { + file_info.insert(dir.clone(), ("".to_string(), data::Operation::Delete)); + to_delete.insert(dir.clone()); + + // 从父目录的子列表中移除 + if let Some(parent) = dir.parent().map(|p| p.to_path_buf()) { + if let Some(siblings) = dir_children.get_mut(&parent) { + siblings.retain(|p| p != dir); } } } - // 更新删除列表和剩余列表 - for empty_dir in &new_empty_dirs { - file_info.insert(empty_dir.clone(), ("".to_string(), data::Operation::Delete)); - to_delete.insert(empty_dir.clone()); - remaining_paths.remove(empty_dir); + if !empty_dirs.is_empty() { + empty_dirs.clear(); + } else { + break; } } } - // 构建最终的删除和重命名操作列表 + // 构建最终的删除操作列表 let mut all_delete_operations: Vec<(PathBuf, String)> = file_info .iter() .filter(|(_, (_, op))| *op == data::Operation::Delete) @@ -165,7 +180,7 @@ fn main() -> std::io::Result<()> { .collect(); // 按深度优先排序删除操作 - all_delete_operations.sort_by(|(path_a, _), (path_b, _)| { + all_delete_operations.par_sort_by(|(path_a, _), (path_b, _)| { let depth_a = path_a.components().count(); let depth_b = path_b.components().count(); depth_b.cmp(&depth_a) From 2efc90dc990f62a520cd9b8206e6a7328466ddb4 Mon Sep 17 00:00:00 2001 From: Ken Chou Date: Sat, 17 May 2025 01:45:44 +0800 Subject: [PATCH 3/4] feat: improve file operation error handling and performance --- src/main.rs | 104 +++++++++++++++++++++++++++++++++++++----------- src/pmatcher.rs | 35 +++++++++++----- 2 files changed, 104 insertions(+), 35 deletions(-) diff --git a/src/main.rs b/src/main.rs index 26b2adb..51c88fc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,7 +23,9 @@ fn main() -> std::io::Result<()> { println!("{:#?}", app_options); } - let pattern_matcher = Arc::new(pmatcher::PatternMatcher::from_config_file(&app_options.config_file)); + let pattern_matcher = Arc::new(pmatcher::PatternMatcher::from_config_file( + &app_options.config_file, + )); if app_options.is_debug_mode() { println!("{:#?}", pattern_matcher); } @@ -33,8 +35,14 @@ fn main() -> std::io::Result<()> { .sort_by(|a, b| { let depth_a = a.depth(); let depth_b = b.depth(); - depth_b.cmp(&depth_a) - .then(a.file_type().is_dir().cmp(&b.file_type().is_dir()).reverse()) + depth_b + .cmp(&depth_a) + .then( + a.file_type() + .is_dir() + .cmp(&b.file_type().is_dir()) + .reverse(), + ) .then(a.file_name().cmp(b.file_name())) }) .into_iter() @@ -46,22 +54,30 @@ fn main() -> std::io::Result<()> { let options_ref = &app_options; let matcher_ref = &pattern_matcher; - let file_info_results: Vec<_> = entries.par_iter() - .map(|entry| { + let file_info_results: Vec<_> = entries + .par_iter() + .filter_map(|entry| { let filepath = entry.path(); - let filename = entry.file_name().to_str().unwrap(); + // 处理无效文件名:输出警告并跳过 + let filename = match entry.file_name().to_str() { + Some(name) => name, + None => { + eprintln!("{} 跳过无效文件名: {:?}", "[警告]".yellow(), filepath); + return None; // 跳过这个条目 + } + }; // 检查是否需要删除 if options_ref.enable_deletion { let (mut matched, mut pattern) = matcher_ref.match_remove_pattern(filename); if matched { let p = pattern.unwrap(); - return (filepath.to_path_buf(), (p, data::Operation::Delete)); + return Some((filepath.to_path_buf(), (p, data::Operation::Delete))); } else if options_ref.enable_hash_matching { (matched, pattern) = matcher_ref.match_remove_hash(filepath.to_str().unwrap()); if matched { let p = pattern.unwrap(); - return (filepath.to_path_buf(), (p, data::Operation::Delete)); + return Some((filepath.to_path_buf(), (p, data::Operation::Delete))); } } } @@ -70,19 +86,32 @@ fn main() -> std::io::Result<()> { if options_ref.enable_renaming { let new_filename = matcher_ref.clean_filename(filename); if new_filename != filename { - return (filepath.to_path_buf(), (new_filename, data::Operation::Rename)); + return Some(( + filepath.to_path_buf(), + (new_filename, data::Operation::Rename), + )); } } // 检查是否为空目录 if options_ref.enable_prune_empty_dir && filepath.is_dir() { - if filepath.read_dir().map(|mut d| d.next().is_none()).unwrap_or(false) { - return (filepath.to_path_buf(), ("".to_string(), data::Operation::Delete)); + if filepath + .read_dir() + .map(|mut d| d.next().is_none()) + .unwrap_or(false) + { + return Some(( + filepath.to_path_buf(), + ("".to_string(), data::Operation::Delete), + )); } } // 不需要操作的文件 - (filepath.to_path_buf(), ("".to_string(), data::Operation::None)) + Some(( + filepath.to_path_buf(), + ("".to_string(), data::Operation::None), + )) }) .collect(); @@ -132,13 +161,17 @@ fn main() -> std::io::Result<()> { // 找到父目录并添加为子项 if let Some(parent) = path.parent().map(|p| p.to_path_buf()) { if all_paths.contains(&parent) && !to_delete.contains(path) { - dir_children.entry(parent).or_insert_with(Vec::new).push(path.clone()); + dir_children + .entry(parent) + .or_insert_with(Vec::new) + .push(path.clone()); } } } // 查找空目录 - 从不包含其他目录的目录开始 - let mut empty_dirs = Vec::new(); + // 使用 capacity 预分配容器大小 + let mut empty_dirs = Vec::with_capacity(dir_children.len() / 2); let mut changed = true; while changed { @@ -153,7 +186,10 @@ fn main() -> std::io::Result<()> { // 将空目录标记为删除 for dir in &empty_dirs { - file_info.insert(dir.clone(), ("".to_string(), data::Operation::Delete)); + file_info.insert( + dir.clone(), + ("".to_string(), data::Operation::Delete), + ); to_delete.insert(dir.clone()); // 从父目录的子列表中移除 @@ -188,16 +224,36 @@ fn main() -> std::io::Result<()> { // 执行删除操作 if app_options.enable_deletion { - for (file_path, pattern) in all_delete_operations { - if app_options.verbose > 0 { - println!("{} {:#?} <== {}", "[-]".red(), file_path, pattern); - } else { - println!("{} {:#?}", "[-]".red(), file_path); - } + // 按深度分组 + let mut grouped_by_depth: HashMap> = HashMap::new(); + for (path, pattern) in all_delete_operations { + let depth = path.components().count(); + grouped_by_depth.entry(depth).or_default().push((path, pattern)); + } - if app_options.prune && file_path.exists() { - util::remove_path(file_path)?; - } + // 从最深层开始,逐层处理 + let mut depths: Vec<_> = grouped_by_depth.keys().collect(); + depths.sort_by(|a, b| b.cmp(a)); // 降序排列 + + for depth in depths { + let operations = &grouped_by_depth[depth]; + // 同一深度的可以并行处理 + operations.par_iter().for_each(|(file_path, pattern)| { + // 删除操作代码... + if app_options.verbose > 0 { + println!("{} {:#?} <== {}", "[-]".red(), file_path, pattern); + } else { + println!("{} {:#?}", "[-]".red(), file_path); + } + + if app_options.prune { + match util::remove_path(file_path.clone()) { + Ok(_) => (), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => (), + Err(e) => eprintln!("{} 删除文件失败 {:?}: {}", "[错误]".red(), file_path, e), + } + } + }); } } diff --git a/src/pmatcher.rs b/src/pmatcher.rs index f706ceb..5987b3d 100644 --- a/src/pmatcher.rs +++ b/src/pmatcher.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use std::fs::File; -use std::io::Read; +use std::io::{self, BufReader, Read}; use std::path::{Path, PathBuf}; use fancy_regex::Regex; @@ -44,19 +44,15 @@ impl PatternMatcher { let filename = Path::new(test_file).file_name().unwrap().to_str().unwrap(); for (re, hash_list) in &self.patterns_to_remove_with_hash { if re.is_match(filename).unwrap() { - let mut file = File::open(test_file).unwrap(); - let mut buffer = Vec::new(); - file.read_to_end(&mut buffer).unwrap(); - let mut hash_calculator = Md5::new(); - hash_calculator.update(&buffer); - - let hash = format!("{:x}", hash_calculator.finalize()); - if hash_list.contains(&hash) { - return (true, Some(format!("{}:{}", re, hash))); + // 处理 Result 类型 + if let Ok(hash) = calculate_md5(test_file) { + if hash_list.contains(&hash) { + return (true, Some(format!("{}:{}", re, hash))); + } } } } - (false, None) // return + (false, None) } pub fn clean_filename(&self, filename: &str) -> String { @@ -76,6 +72,23 @@ impl PatternMatcher { } } +fn calculate_md5(filepath: &str) -> io::Result { + let file = File::open(filepath)?; + let mut reader = BufReader::with_capacity(1024 * 1024, file); + let mut buffer = [0; 4096]; + let mut hasher = Md5::new(); + + loop { + let bytes_read = reader.read(&mut buffer)?; + if bytes_read == 0 { + break; + } + hasher.update(&buffer[..bytes_read]); + } + + Ok(format!("{:x}", hasher.finalize())) +} + fn create_patterns_with_hash(patterns: HashMap>) -> Vec<(Regex, Vec)> { patterns .into_iter() From 18d43826508290bfe21ccf7cd5649f66a374d1e6 Mon Sep 17 00:00:00 2001 From: Ken Chou Date: Sat, 17 May 2025 02:27:59 +0800 Subject: [PATCH 4/4] refactor --- src/main.rs | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/src/main.rs b/src/main.rs index 51c88fc..e4835eb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -208,37 +208,12 @@ fn main() -> std::io::Result<()> { } } - // 构建最终的删除操作列表 - let mut all_delete_operations: Vec<(PathBuf, String)> = file_info - .iter() - .filter(|(_, (_, op))| *op == data::Operation::Delete) - .map(|(path, (pattern, _))| (path.clone(), pattern.clone())) - .collect(); - - // 按深度优先排序删除操作 - all_delete_operations.par_sort_by(|(path_a, _), (path_b, _)| { - let depth_a = path_a.components().count(); - let depth_b = path_b.components().count(); - depth_b.cmp(&depth_a) - }); - // 执行删除操作 if app_options.enable_deletion { - // 按深度分组 - let mut grouped_by_depth: HashMap> = HashMap::new(); - for (path, pattern) in all_delete_operations { - let depth = path.components().count(); - grouped_by_depth.entry(depth).or_default().push((path, pattern)); - } - - // 从最深层开始,逐层处理 - let mut depths: Vec<_> = grouped_by_depth.keys().collect(); - depths.sort_by(|a, b| b.cmp(a)); // 降序排列 - - for depth in depths { - let operations = &grouped_by_depth[depth]; - // 同一深度的可以并行处理 - operations.par_iter().for_each(|(file_path, pattern)| { + file_info + .iter() + .filter(|(_, (_, op))| *op == data::Operation::Delete) + .for_each(|(file_path, (pattern, _))| { // 删除操作代码... if app_options.verbose > 0 { println!("{} {:#?} <== {}", "[-]".red(), file_path, pattern); @@ -250,11 +225,12 @@ fn main() -> std::io::Result<()> { match util::remove_path(file_path.clone()) { Ok(_) => (), Err(e) if e.kind() == std::io::ErrorKind::NotFound => (), - Err(e) => eprintln!("{} 删除文件失败 {:?}: {}", "[错误]".red(), file_path, e), + Err(e) => { + eprintln!("{} 删除文件失败 {:?}: {}", "[错误]".red(), file_path, e) + } } } }); - } } // 执行重命名操作