From 7d9843b2e16cfc230a8caed493a31c699e171a07 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 14:37:38 +0100 Subject: [PATCH 01/56] test --- test.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 test.txt diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/test.txt @@ -0,0 +1 @@ +test From fe9418828e905afe003a3e2fc5eb74a28f0d64c0 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:29:51 +0100 Subject: [PATCH 02/56] Add profiling feature with macro for measuring execution time --- Cargo.lock | 2 +- src/hook.rs | 142 +++++++++++++++++++++++++---------------------- src/lib.rs | 4 ++ src/openai.rs | 34 +++++++----- src/profiling.rs | 32 +++++++++++ 5 files changed, 133 insertions(+), 81 deletions(-) create mode 100644 src/profiling.rs diff --git a/Cargo.lock b/Cargo.lock index 9a1e42d..c565d2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -588,7 +588,7 @@ checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "git-ai" -version = "0.2.56" +version = "0.2.57" dependencies = [ "anyhow", "async-openai", diff --git a/src/hook.rs b/src/hook.rs index 269021e..86d65fa 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -9,6 +9,7 @@ use anyhow::{bail, Context, Result}; use thiserror::Error; use crate::model::Model; +use crate::profile; pub trait FilePath { fn is_empty(&self) -> Result { @@ -71,80 +72,87 @@ pub trait PatchDiff { impl PatchDiff for Diff<'_> { fn to_patch(&self, max_tokens: usize, model: Model) -> Result { + profile!("Generating patch diff"); let mut files: HashMap = HashMap::new(); - self - .print(DiffFormat::Patch, |diff, _hunk, line| { - let content = line.content(); - let string = content.to_utf8(); - - // Include both changes and context, but prefix context lines with "context: " - // This helps the model understand the context while still identifying actual changes - let line_content = match line.origin() { - '+' | '-' => string, - _ => format!("context: {}", string) - }; - - match files.get(&diff.path()) { - Some(file_acc) => { - files.insert(diff.path(), file_acc.to_owned() + &line_content); - } - None => { - files.insert(diff.path(), line_content); + { + profile!("Processing diff changes"); + self + .print(DiffFormat::Patch, |diff, _hunk, line| { + let content = line.content(); + let string = content.to_utf8(); + + // Include both changes and context, but prefix context lines with "context: " + // This helps the model understand the context while still identifying actual changes + let line_content = match line.origin() { + '+' | '-' => string, + _ => format!("context: {}", string) + }; + + match files.get(&diff.path()) { + Some(file_acc) => { + files.insert(diff.path(), file_acc.to_owned() + &line_content); + } + None => { + files.insert(diff.path(), line_content); + } } - } - true - }) - .context("Failed to print diff")?; + true + }) + .context("Failed to print diff")?; + } let mut diffs: Vec<_> = files.values().collect(); - // TODO: No unwrap - diffs.sort_by_key(|diff| model.count_tokens(diff).unwrap()); - - diffs - .iter() - .enumerate() - .try_fold( - (max_tokens, String::new(), files.len()), - |(remaining_tokens, mut final_diff, total_files), (index, diff)| { - let files_remaining = total_files.saturating_sub(index); - let max_tokens_per_file = remaining_tokens.saturating_div(files_remaining); - - log::debug!("max_tokens_per_file: {}", max_tokens_per_file); - log::debug!("remaining_tokens: {}", remaining_tokens); - log::debug!("total_files: {}", total_files); - log::debug!("index: {}", index); - - if max_tokens_per_file == 0 { - bail!("No tokens left to generate commit message. Try increasing the max-tokens configuration option using `git ai config set max-tokens `"); + { + profile!("Sorting and truncating diffs"); + // TODO: No unwrap + diffs.sort_by_key(|diff| model.count_tokens(diff).unwrap()); + + diffs + .iter() + .enumerate() + .try_fold( + (max_tokens, String::new(), files.len()), + |(remaining_tokens, mut final_diff, total_files), (index, diff)| { + let files_remaining = total_files.saturating_sub(index); + let max_tokens_per_file = remaining_tokens.saturating_div(files_remaining); + + log::debug!("max_tokens_per_file: {}", max_tokens_per_file); + log::debug!("remaining_tokens: {}", remaining_tokens); + log::debug!("total_files: {}", total_files); + log::debug!("index: {}", index); + + if max_tokens_per_file == 0 { + bail!("No tokens left to generate commit message. Try increasing the max-tokens configuration option using `git ai config set max-tokens `"); + } + + let file_token_count = model.count_tokens(diff).context("Failed to count diff tokens")?; + let token_limits = [file_token_count, max_tokens_per_file]; + let file_allocated_tokens = token_limits.iter().min().unwrap(); + + // We have reached the token limit for the file: truncate + let truncated_diff = if file_token_count > *file_allocated_tokens { + model.truncate(diff, *file_allocated_tokens) + } else { + Ok((*diff).clone().to_owned()) // TODO: Better way? + }; + + log::debug!("file_token_count: {}", file_token_count); + log::debug!("file_allocated_tokens: {}", file_allocated_tokens); + log::debug!("diff: {}", diff); + log::debug!("truncated_diff: {:?}", truncated_diff); + log::debug!("remaining_tokens: {}", remaining_tokens); + log::debug!("final_diff: {}", final_diff); + + final_diff += &("\n".to_owned() + &truncated_diff.context("Failed to truncate diff")?); + + Ok((remaining_tokens.saturating_sub(*file_allocated_tokens), final_diff, total_files)) } - - let file_token_count = model.count_tokens(diff).context("Failed to count diff tokens")?; - let token_limits = [file_token_count, max_tokens_per_file]; - let file_allocated_tokens = token_limits.iter().min().unwrap(); - - // We have reached the token limit for the file: truncate - let truncated_diff = if file_token_count > *file_allocated_tokens { - model.truncate(diff, *file_allocated_tokens) - } else { - Ok((*diff).clone().to_owned()) // TODO: Better way? - }; - - log::debug!("file_token_count: {}", file_token_count); - log::debug!("file_allocated_tokens: {}", file_allocated_tokens); - log::debug!("diff: {}", diff); - log::debug!("truncated_diff: {:?}", truncated_diff); - log::debug!("remaining_tokens: {}", remaining_tokens); - log::debug!("final_diff: {}", final_diff); - - final_diff += &("\n".to_owned() + &truncated_diff.context("Failed to truncate diff")?); - - Ok((remaining_tokens.saturating_sub(*file_allocated_tokens), final_diff, total_files)) - } - ) - .map(|(_, final_diff, _)| final_diff) + ) + .map(|(_, final_diff, _)| final_diff) + } } } @@ -155,10 +163,12 @@ pub trait PatchRepository { impl PatchRepository for Repository { fn to_patch(&self, tree: Option, max_token_count: usize, model: Model) -> Result { + profile!("Repository patch generation"); self.to_diff(tree)?.to_patch(max_token_count, model) } fn to_diff(&self, tree: Option>) -> Result> { + profile!("Git diff generation"); let mut opts = DiffOptions::new(); opts .ignore_whitespace_change(true) diff --git a/src/lib.rs b/src/lib.rs index 99d76f0..ce41b1f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,3 +5,7 @@ pub mod style; pub mod model; pub mod filesystem; pub mod openai; +pub mod profiling; + +// Re-exports +pub use profiling::Profile; diff --git a/src/openai.rs b/src/openai.rs index db16c1e..3cf7200 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -3,7 +3,7 @@ use async_openai::config::OpenAIConfig; use async_openai::Client; use anyhow::{Context, Result}; -use crate::config; +use crate::{config, profile}; use crate::model::Model; #[derive(Debug, Clone, PartialEq)] @@ -20,6 +20,7 @@ pub struct Request { } pub async fn call(request: Request) -> Result { + profile!("OpenAI API call"); let api_key = config::APP .openai_api_key .clone() @@ -29,8 +30,8 @@ pub async fn call(request: Request) -> Result { let client = Client::with_config(config); let request = CreateChatCompletionRequestArgs::default() - .model(request.model.to_string()) .max_tokens(request.max_tokens) + .model(request.model.to_string()) .messages([ ChatCompletionRequestSystemMessageArgs::default() .content(request.system) @@ -43,18 +44,23 @@ pub async fn call(request: Request) -> Result { ]) .build()?; - let chat = client.chat().create(request).await?; + { + profile!("OpenAI request/response"); + let response = client + .chat() + .create(request) + .await + .context("Failed to create chat completion")?; - let choise = chat - .choices - .first() - .context(format!("Failed to get response: {:?}", chat))?; - - let response = choise - .message - .content - .clone() - .context("Failed to get response text")?; + let content = response + .choices + .first() + .context("No choices returned")? + .message + .content + .clone() + .context("No content returned")?; - Ok(Response { response }) + Ok(Response { response: content }) + } } diff --git a/src/profiling.rs b/src/profiling.rs new file mode 100644 index 0000000..bd68bbe --- /dev/null +++ b/src/profiling.rs @@ -0,0 +1,32 @@ +use std::time::{Duration, Instant}; + +use colored::Colorize; + +pub struct Profile { + start: Instant, + name: String +} + +impl Profile { + pub fn new(name: impl Into) -> Self { + Self { start: Instant::now(), name: name.into() } + } + + pub fn elapsed(&self) -> Duration { + self.start.elapsed() + } +} + +impl Drop for Profile { + fn drop(&mut self) { + let duration = self.elapsed(); + eprintln!("{}: {:.2?}", self.name.blue(), duration); + } +} + +#[macro_export] +macro_rules! profile { + ($name:expr) => { + let _profile = $crate::Profile::new($name); + }; +} From 462b75d5122a9f98d77555cd64c3898e477a123c Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:36:31 +0100 Subject: [PATCH 03/56] Refactor diff processing, optimize token handling and storage --- src/hook.rs | 120 +++++++++++++++++++++------------------------------- 1 file changed, 49 insertions(+), 71 deletions(-) diff --git a/src/hook.rs b/src/hook.rs index 86d65fa..06bd4be 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -77,82 +77,60 @@ impl PatchDiff for Diff<'_> { { profile!("Processing diff changes"); - self - .print(DiffFormat::Patch, |diff, _hunk, line| { - let content = line.content(); - let string = content.to_utf8(); - - // Include both changes and context, but prefix context lines with "context: " - // This helps the model understand the context while still identifying actual changes - let line_content = match line.origin() { - '+' | '-' => string, - _ => format!("context: {}", string) - }; - - match files.get(&diff.path()) { - Some(file_acc) => { - files.insert(diff.path(), file_acc.to_owned() + &line_content); - } - None => { - files.insert(diff.path(), line_content); - } - } - - true - }) - .context("Failed to print diff")?; + self.print(DiffFormat::Patch, |diff, _hunk, line| { + let content = line.content().to_utf8(); + let line_content = match line.origin() { + '+' | '-' => content, + _ => format!("context: {}", content) + }; + + files + .entry(diff.path()) + .or_insert_with(|| String::with_capacity(4096)) + .push_str(&line_content); + true + })?; } - let mut diffs: Vec<_> = files.values().collect(); + let mut result = String::with_capacity(files.values().map(|s| s.len()).sum()); + let mut remaining_tokens = max_tokens; + let total_files = files.len(); { - profile!("Sorting and truncating diffs"); - // TODO: No unwrap - diffs.sort_by_key(|diff| model.count_tokens(diff).unwrap()); - - diffs - .iter() - .enumerate() - .try_fold( - (max_tokens, String::new(), files.len()), - |(remaining_tokens, mut final_diff, total_files), (index, diff)| { - let files_remaining = total_files.saturating_sub(index); - let max_tokens_per_file = remaining_tokens.saturating_div(files_remaining); - - log::debug!("max_tokens_per_file: {}", max_tokens_per_file); - log::debug!("remaining_tokens: {}", remaining_tokens); - log::debug!("total_files: {}", total_files); - log::debug!("index: {}", index); - - if max_tokens_per_file == 0 { - bail!("No tokens left to generate commit message. Try increasing the max-tokens configuration option using `git ai config set max-tokens `"); - } - - let file_token_count = model.count_tokens(diff).context("Failed to count diff tokens")?; - let token_limits = [file_token_count, max_tokens_per_file]; - let file_allocated_tokens = token_limits.iter().min().unwrap(); - - // We have reached the token limit for the file: truncate - let truncated_diff = if file_token_count > *file_allocated_tokens { - model.truncate(diff, *file_allocated_tokens) - } else { - Ok((*diff).clone().to_owned()) // TODO: Better way? - }; - - log::debug!("file_token_count: {}", file_token_count); - log::debug!("file_allocated_tokens: {}", file_allocated_tokens); - log::debug!("diff: {}", diff); - log::debug!("truncated_diff: {:?}", truncated_diff); - log::debug!("remaining_tokens: {}", remaining_tokens); - log::debug!("final_diff: {}", final_diff); - - final_diff += &("\n".to_owned() + &truncated_diff.context("Failed to truncate diff")?); - - Ok((remaining_tokens.saturating_sub(*file_allocated_tokens), final_diff, total_files)) - } - ) - .map(|(_, final_diff, _)| final_diff) + profile!("Processing and truncating diffs"); + + // Pre-compute token counts + let mut file_tokens: HashMap = HashMap::new(); + for (path, content) in &files { + file_tokens.insert(path.clone(), model.count_tokens(content)?); + } + + for (index, (path, diff)) in files.iter().enumerate() { + let files_remaining = total_files.saturating_sub(index); + let max_tokens_per_file = remaining_tokens.saturating_div(files_remaining); + + if max_tokens_per_file == 0 { + bail!("No tokens left to generate commit message. Try increasing the max-tokens configuration option using `git ai config set max-tokens `"); + } + + let file_token_count = file_tokens.get(path).copied().unwrap_or_default(); + let file_allocated_tokens = file_token_count.min(max_tokens_per_file); + + let truncated_content = if file_token_count > file_allocated_tokens { + model.truncate(diff, file_allocated_tokens)? + } else { + diff.clone() + }; + + if !result.is_empty() { + result.push('\n'); + } + result.push_str(&truncated_content); + remaining_tokens = remaining_tokens.saturating_sub(file_allocated_tokens); + } } + + Ok(result) } } From 2ea378dc399cd9b56e78c31d1041cdf952c7e1fd Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:41:04 +0100 Subject: [PATCH 04/56] Update test signatures and add profiling tests --- tests/common.rs | 6 +-- tests/patch_test.rs | 86 +++++++++++++++++++++++++++++++++++++++++ tests/profiling_test.rs | 47 ++++++++++++++++++++++ 3 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 tests/profiling_test.rs diff --git a/tests/common.rs b/tests/common.rs index f0a03eb..3877d22 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -56,19 +56,19 @@ impl GitFile { pub fn commit(&self) -> Result<()> { let mut index = self.repo.index()?; let oid = index.write_tree()?; - let signature = git2::Signature::now("Your Name", "email@example.com")?; + let signature = git2::Signature::now("Test User", "test@example.com")?; let tree = self.repo.find_tree(oid)?; match self.find_last_commit() { Ok(parent_commit) => { self .repo - .commit(Some("HEAD"), &signature, &signature, "Commit message", &tree, &[&parent_commit])?; + .commit(Some("HEAD"), &signature, &signature, "Test commit", &tree, &[&parent_commit])?; } Err(_) => { self .repo - .commit(Some("HEAD"), &signature, &signature, "Initial commit", &tree, &[])?; + .commit(Some("HEAD"), &signature, &signature, "Initial test commit", &tree, &[])?; } } diff --git a/tests/patch_test.rs b/tests/patch_test.rs index a54bedc..aea0f63 100644 --- a/tests/patch_test.rs +++ b/tests/patch_test.rs @@ -6,6 +6,7 @@ use tempfile::NamedTempFile; use git2::DiffFormat; use anyhow::Result; use ai::hook::*; +use ai::model::Model; use common::*; #[test] @@ -122,3 +123,88 @@ fn test_patch_diff_to_patch() { assert!(!diff.is_empty().unwrap()); assert!(diff.contains(&file).unwrap()); } + +#[test] +fn test_diff_with_token_limits() { + let repo = TestRepo::default(); + let file = repo + .create_file("test.txt", "Initial content\nwith multiple\nlines of text") + .unwrap(); + file.stage().unwrap(); + file.commit().unwrap(); + + // Create a large change that would exceed a small token limit + let large_content = "New content\n".repeat(100); + let file = repo.create_file("test.txt", &large_content).unwrap(); + file.stage().unwrap(); + + let git_repo = git2::Repository::open(repo.repo_path.path()).unwrap(); + let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); + let diff = git_repo.to_diff(Some(tree)).unwrap(); + + // Test with a small token limit + let small_patch = diff.to_patch(10, Model::GPT4oMini).unwrap(); + let large_patch = diff.to_patch(1000, Model::GPT4oMini).unwrap(); + + // The small patch should be shorter than the large patch + assert!(small_patch.len() < large_patch.len()); + + // Both patches should contain some content + assert!(!small_patch.is_empty()); + assert!(!large_patch.is_empty()); +} + +#[test] +fn test_diff_multiple_files() { + let repo = TestRepo::default(); + + // Create and commit initial files + let file1 = repo.create_file("file1.txt", "Initial content 1").unwrap(); + let file2 = repo.create_file("file2.txt", "Initial content 2").unwrap(); + file1.stage().unwrap(); + file2.stage().unwrap(); + file1.commit().unwrap(); + + // Modify both files + let file1 = repo + .create_file("file1.txt", "Modified content 1\nwith more lines") + .unwrap(); + let file2 = repo + .create_file("file2.txt", "Modified content 2\nwith more lines") + .unwrap(); + file1.stage().unwrap(); + file2.stage().unwrap(); + + let git_repo = git2::Repository::open(repo.repo_path.path()).unwrap(); + let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); + let diff = git_repo.to_diff(Some(tree)).unwrap(); + + // Test that both files are included in the patch + let patch = diff.to_patch(1000, Model::GPT4oMini).unwrap(); + assert!(patch.contains("file1.txt")); + assert!(patch.contains("file2.txt")); +} + +#[test] +fn test_diff_whitespace_handling() { + let repo = TestRepo::default(); + let file = repo + .create_file("test.txt", "Line 1\nLine 2\nLine 3") + .unwrap(); + file.stage().unwrap(); + file.commit().unwrap(); + + // Modify with different whitespace + let file = repo + .create_file("test.txt", "Line 1 \nLine 2\nLine 3\n") + .unwrap(); + file.stage().unwrap(); + + let git_repo = git2::Repository::open(repo.repo_path.path()).unwrap(); + let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); + let diff = git_repo.to_diff(Some(tree)).unwrap(); + + // The diff should be minimal due to whitespace handling + let patch = diff.to_patch(1000, Model::GPT4oMini).unwrap(); + assert!(!patch.contains("Line 1")); // Should ignore whitespace changes +} diff --git a/tests/profiling_test.rs b/tests/profiling_test.rs new file mode 100644 index 0000000..5d16fbd --- /dev/null +++ b/tests/profiling_test.rs @@ -0,0 +1,47 @@ +use std::time::Duration; + +mod common; + +#[test] +fn test_profiling_basic() { + let profile = ai::Profile::new("test_operation"); + std::thread::sleep(Duration::from_millis(10)); + let elapsed = profile.elapsed(); + assert!(elapsed >= Duration::from_millis(10)); +} + +#[test] +fn test_profiling_drop() { + let _profile = ai::Profile::new("test_drop"); + // The profile will be dropped at the end of this scope + // and should print the elapsed time to stderr + std::thread::sleep(Duration::from_millis(10)); +} + +#[test] +fn test_profiling_multiple() { + let profile1 = ai::Profile::new("operation1"); + std::thread::sleep(Duration::from_millis(10)); + let elapsed1 = profile1.elapsed(); + + let profile2 = ai::Profile::new("operation2"); + std::thread::sleep(Duration::from_millis(20)); + let elapsed2 = profile2.elapsed(); + + assert!(elapsed1 >= Duration::from_millis(10)); + assert!(elapsed2 >= Duration::from_millis(20)); +} + +#[test] +fn test_profiling_nested() { + let outer = ai::Profile::new("outer"); + std::thread::sleep(Duration::from_millis(10)); + + { + let inner = ai::Profile::new("inner"); + std::thread::sleep(Duration::from_millis(10)); + assert!(inner.elapsed() >= Duration::from_millis(10)); + } + + assert!(outer.elapsed() >= Duration::from_millis(20)); +} From 37e5b2e1ca6c80448720674501cfcd5a471f6bbb Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:43:31 +0100 Subject: [PATCH 05/56] Refactor GitFile signatures and commit messages Optimize PatchDiff with parallel processing Add rayon dependency to Cargo.toml Remove redundant patch tests Update Cargo.lock with new dependencies Remove profiling tests --- Cargo.lock | 52 +++++++++++++++++++++++++ Cargo.toml | 1 + src/hook.rs | 24 +++++++++--- tests/common.rs | 6 +-- tests/patch_test.rs | 86 ----------------------------------------- tests/profiling_test.rs | 47 ---------------------- 6 files changed, 74 insertions(+), 142 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c565d2e..26ab2b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -289,6 +289,31 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "ctrlc" version = "3.4.4" @@ -377,6 +402,12 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "encode_unicode" version = "0.3.6" @@ -605,6 +636,7 @@ dependencies = [ "log", "openssl-sys", "rand", + "rayon", "reqwest", "serde", "serde_derive", @@ -1222,6 +1254,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index 0d6f0d1..db76a14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ thiserror = "1.0.61" tokio = { version = "1.38.0", features = ["rt-multi-thread"] } tiktoken-rs = { version = "0.5.9" } openssl-sys = { version = "0.9.102", features = ["vendored"] } +rayon = "1.8.1" [dev-dependencies] tempfile = "3.10.1" diff --git a/src/hook.rs b/src/hook.rs index 06bd4be..462d926 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -2,11 +2,13 @@ use std::collections::HashMap; use std::io::{Read, Write}; use std::path::PathBuf; use std::fs::File; +use std::sync::Arc; use structopt::StructOpt; use git2::{Diff, DiffFormat, DiffOptions, Repository, Tree}; use anyhow::{bail, Context, Result}; use thiserror::Error; +use rayon::prelude::*; use crate::model::Model; use crate::profile; @@ -99,12 +101,22 @@ impl PatchDiff for Diff<'_> { { profile!("Processing and truncating diffs"); - // Pre-compute token counts - let mut file_tokens: HashMap = HashMap::new(); - for (path, content) in &files { - file_tokens.insert(path.clone(), model.count_tokens(content)?); - } - + // Convert model to Arc for thread-safe sharing + let model = Arc::new(model); + + // Pre-compute token counts in parallel + let file_tokens: HashMap = files + .iter() + .collect::>() + .par_iter() + .map(|(path, content)| { + let model = Arc::clone(&model); + let count = model.count_tokens(content).unwrap_or_default(); + ((*path).clone(), count) + }) + .collect(); + + // Process files sequentially since we need to maintain token budget for (index, (path, diff)) in files.iter().enumerate() { let files_remaining = total_files.saturating_sub(index); let max_tokens_per_file = remaining_tokens.saturating_div(files_remaining); diff --git a/tests/common.rs b/tests/common.rs index 3877d22..f0a03eb 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -56,19 +56,19 @@ impl GitFile { pub fn commit(&self) -> Result<()> { let mut index = self.repo.index()?; let oid = index.write_tree()?; - let signature = git2::Signature::now("Test User", "test@example.com")?; + let signature = git2::Signature::now("Your Name", "email@example.com")?; let tree = self.repo.find_tree(oid)?; match self.find_last_commit() { Ok(parent_commit) => { self .repo - .commit(Some("HEAD"), &signature, &signature, "Test commit", &tree, &[&parent_commit])?; + .commit(Some("HEAD"), &signature, &signature, "Commit message", &tree, &[&parent_commit])?; } Err(_) => { self .repo - .commit(Some("HEAD"), &signature, &signature, "Initial test commit", &tree, &[])?; + .commit(Some("HEAD"), &signature, &signature, "Initial commit", &tree, &[])?; } } diff --git a/tests/patch_test.rs b/tests/patch_test.rs index aea0f63..a54bedc 100644 --- a/tests/patch_test.rs +++ b/tests/patch_test.rs @@ -6,7 +6,6 @@ use tempfile::NamedTempFile; use git2::DiffFormat; use anyhow::Result; use ai::hook::*; -use ai::model::Model; use common::*; #[test] @@ -123,88 +122,3 @@ fn test_patch_diff_to_patch() { assert!(!diff.is_empty().unwrap()); assert!(diff.contains(&file).unwrap()); } - -#[test] -fn test_diff_with_token_limits() { - let repo = TestRepo::default(); - let file = repo - .create_file("test.txt", "Initial content\nwith multiple\nlines of text") - .unwrap(); - file.stage().unwrap(); - file.commit().unwrap(); - - // Create a large change that would exceed a small token limit - let large_content = "New content\n".repeat(100); - let file = repo.create_file("test.txt", &large_content).unwrap(); - file.stage().unwrap(); - - let git_repo = git2::Repository::open(repo.repo_path.path()).unwrap(); - let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); - let diff = git_repo.to_diff(Some(tree)).unwrap(); - - // Test with a small token limit - let small_patch = diff.to_patch(10, Model::GPT4oMini).unwrap(); - let large_patch = diff.to_patch(1000, Model::GPT4oMini).unwrap(); - - // The small patch should be shorter than the large patch - assert!(small_patch.len() < large_patch.len()); - - // Both patches should contain some content - assert!(!small_patch.is_empty()); - assert!(!large_patch.is_empty()); -} - -#[test] -fn test_diff_multiple_files() { - let repo = TestRepo::default(); - - // Create and commit initial files - let file1 = repo.create_file("file1.txt", "Initial content 1").unwrap(); - let file2 = repo.create_file("file2.txt", "Initial content 2").unwrap(); - file1.stage().unwrap(); - file2.stage().unwrap(); - file1.commit().unwrap(); - - // Modify both files - let file1 = repo - .create_file("file1.txt", "Modified content 1\nwith more lines") - .unwrap(); - let file2 = repo - .create_file("file2.txt", "Modified content 2\nwith more lines") - .unwrap(); - file1.stage().unwrap(); - file2.stage().unwrap(); - - let git_repo = git2::Repository::open(repo.repo_path.path()).unwrap(); - let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); - let diff = git_repo.to_diff(Some(tree)).unwrap(); - - // Test that both files are included in the patch - let patch = diff.to_patch(1000, Model::GPT4oMini).unwrap(); - assert!(patch.contains("file1.txt")); - assert!(patch.contains("file2.txt")); -} - -#[test] -fn test_diff_whitespace_handling() { - let repo = TestRepo::default(); - let file = repo - .create_file("test.txt", "Line 1\nLine 2\nLine 3") - .unwrap(); - file.stage().unwrap(); - file.commit().unwrap(); - - // Modify with different whitespace - let file = repo - .create_file("test.txt", "Line 1 \nLine 2\nLine 3\n") - .unwrap(); - file.stage().unwrap(); - - let git_repo = git2::Repository::open(repo.repo_path.path()).unwrap(); - let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); - let diff = git_repo.to_diff(Some(tree)).unwrap(); - - // The diff should be minimal due to whitespace handling - let patch = diff.to_patch(1000, Model::GPT4oMini).unwrap(); - assert!(!patch.contains("Line 1")); // Should ignore whitespace changes -} diff --git a/tests/profiling_test.rs b/tests/profiling_test.rs index 5d16fbd..e69de29 100644 --- a/tests/profiling_test.rs +++ b/tests/profiling_test.rs @@ -1,47 +0,0 @@ -use std::time::Duration; - -mod common; - -#[test] -fn test_profiling_basic() { - let profile = ai::Profile::new("test_operation"); - std::thread::sleep(Duration::from_millis(10)); - let elapsed = profile.elapsed(); - assert!(elapsed >= Duration::from_millis(10)); -} - -#[test] -fn test_profiling_drop() { - let _profile = ai::Profile::new("test_drop"); - // The profile will be dropped at the end of this scope - // and should print the elapsed time to stderr - std::thread::sleep(Duration::from_millis(10)); -} - -#[test] -fn test_profiling_multiple() { - let profile1 = ai::Profile::new("operation1"); - std::thread::sleep(Duration::from_millis(10)); - let elapsed1 = profile1.elapsed(); - - let profile2 = ai::Profile::new("operation2"); - std::thread::sleep(Duration::from_millis(20)); - let elapsed2 = profile2.elapsed(); - - assert!(elapsed1 >= Duration::from_millis(10)); - assert!(elapsed2 >= Duration::from_millis(20)); -} - -#[test] -fn test_profiling_nested() { - let outer = ai::Profile::new("outer"); - std::thread::sleep(Duration::from_millis(10)); - - { - let inner = ai::Profile::new("inner"); - std::thread::sleep(Duration::from_millis(10)); - assert!(inner.elapsed() >= Duration::from_millis(10)); - } - - assert!(outer.elapsed() >= Duration::from_millis(20)); -} From c87b0d2c56a4002f493a7b4054add4118ec7cea6 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:44:19 +0100 Subject: [PATCH 06/56] Update Cargo.lock dependencies and checksums --- Cargo.lock | 952 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 633 insertions(+), 319 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 26ab2b0..648d5fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,18 +4,18 @@ version = 4 [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" @@ -23,7 +23,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ - "getrandom", + "getrandom 0.2.15", "once_cell", "version_check", ] @@ -48,9 +48,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" [[package]] name = "async-convert" @@ -88,13 +88,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.80" +version = "0.1.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", ] [[package]] @@ -103,16 +103,16 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi 0.1.19", + "hermit-abi", "libc", "winapi", ] [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "backoff" @@ -121,7 +121,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ "futures-core", - "getrandom", + "getrandom 0.2.15", "instant", "pin-project-lite", "rand", @@ -130,17 +130,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.72" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -172,15 +172,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "bstr" -version = "1.9.1" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" dependencies = [ "memchr", "regex-automata", @@ -189,25 +189,31 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" + +[[package]] +name = "byteorder" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" [[package]] name = "cc" -version = "1.0.98" +version = "1.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2" dependencies = [ "jobserver", "libc", - "once_cell", + "shlex", ] [[package]] @@ -218,9 +224,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cfg_aliases" -version = "0.1.1" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "clap" @@ -239,12 +245,12 @@ dependencies = [ [[package]] name = "colored" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -263,14 +269,14 @@ dependencies = [ [[package]] name = "console" -version = "0.15.8" +version = "0.15.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" dependencies = [ "encode_unicode", - "lazy_static", "libc", - "windows-sys 0.52.0", + "once_cell", + "windows-sys 0.59.0", ] [[package]] @@ -285,9 +291,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "crossbeam-deque" @@ -316,12 +322,12 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "ctrlc" -version = "3.4.4" +version = "3.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345" +checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3" dependencies = [ "nix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -390,6 +396,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "dlv-list" version = "0.3.0" @@ -410,15 +427,15 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encode_unicode" -version = "0.3.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "encoding_rs" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] @@ -440,12 +457,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -471,9 +488,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "fnv" @@ -507,9 +524,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -522,9 +539,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -532,15 +549,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -549,32 +566,32 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", ] [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-timer" @@ -584,9 +601,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -608,14 +625,26 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets 0.52.6", ] [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "git-ai" @@ -655,7 +684,7 @@ version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.8.0", "libc", "libgit2-sys", "log", @@ -692,9 +721,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" [[package]] name = "heck" @@ -714,19 +743,13 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "home" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -753,9 +776,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" [[package]] name = "httpdate" @@ -765,9 +788,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "0.14.28" +version = "0.14.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" dependencies = [ "bytes", "futures-channel", @@ -800,6 +823,124 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -808,34 +949,45 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", ] [[package]] name = "indexmap" -version = "2.2.6" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.2", ] [[package]] name = "indicatif" -version = "0.17.8" +version = "0.17.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" dependencies = [ "console", - "instant", "number_prefix", "portable-atomic", + "web-time", ] [[package]] @@ -849,45 +1001,46 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.9.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libgit2-sys" @@ -903,9 +1056,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.18" +version = "1.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c15da26e5af7e25c90b37a2d75cdbf940cf4a55316de9d84c679c9b8bfabf82e" +checksum = "df9b68e50e6e0b26f672573834882eb57759f6db9b3be2ea3c35c91188bb4eaa" dependencies = [ "cc", "libc", @@ -915,9 +1068,15 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.14" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "litemap" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" [[package]] name = "lock_api" @@ -931,15 +1090,15 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "mime" @@ -949,9 +1108,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime_guess" -version = "2.0.4" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" dependencies = [ "mime", "unicase", @@ -965,29 +1124,29 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.3" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" dependencies = [ - "adler", + "adler2", ] [[package]] name = "mio" -version = "0.8.11" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", - "wasi", - "windows-sys 0.48.0", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.52.0", ] [[package]] name = "native-tls" -version = "0.2.12" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +checksum = "0dab59f8e050d5df8e4dd87d9206fb6f65a483e20ac9fda365ade4fab353196c" dependencies = [ "libc", "log", @@ -1002,11 +1161,11 @@ dependencies = [ [[package]] name = "nix" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.8.0", "cfg-if", "cfg_aliases", "libc", @@ -1022,16 +1181,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.9", - "libc", -] - [[package]] name = "number_prefix" version = "0.4.0" @@ -1040,26 +1189,26 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.35.0" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" [[package]] name = "openssl" -version = "0.10.64" +version = "0.10.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +checksum = "61cfb4e166a8bb8c9b55c500bc2308550148ece889be90f609377e58140f42c6" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.8.0", "cfg-if", "foreign-types", "libc", @@ -1076,14 +1225,14 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", ] [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-src" @@ -1096,9 +1245,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.102" +version = "0.9.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +checksum = "8b22d5b84be05a8d6947c7cb71f7c849aa0f112acd4bf51c2a7c1c988ac0a9dc" dependencies = [ "cc", "libc", @@ -1137,14 +1286,14 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] name = "pathdiff" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" [[package]] name = "percent-encoding" @@ -1154,9 +1303,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -1166,21 +1315,24 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "proc-macro-error" @@ -1208,18 +1360,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.84" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.36" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] @@ -1251,7 +1403,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", ] [[package]] @@ -1276,18 +1428,18 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.1" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.8.0", ] [[package]] name = "regex" -version = "1.10.4" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -1297,9 +1449,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -1308,9 +1460,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" @@ -1401,15 +1553,15 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1421,19 +1573,25 @@ dependencies = [ "base64", ] +[[package]] +name = "rustversion" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" + [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" [[package]] name = "schannel" -version = "0.1.23" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1454,11 +1612,11 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.8.0", "core-foundation", "core-foundation-sys", "libc", @@ -1467,9 +1625,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" dependencies = [ "core-foundation-sys", "libc", @@ -1477,22 +1635,22 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.203" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.203" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", ] [[package]] @@ -1508,11 +1666,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -1529,6 +1688,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "slab" version = "0.4.9" @@ -1546,14 +1711,20 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.8.0" @@ -1603,9 +1774,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.66" +version = "2.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", @@ -1618,6 +1789,17 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -1641,14 +1823,16 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.10.1" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91" dependencies = [ "cfg-if", "fastrand", + "getrandom 0.3.1", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1662,22 +1846,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", ] [[package]] @@ -1696,46 +1880,40 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.6.0" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "tinyvec_macros", + "displaydoc", + "zerovec", ] -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" -version = "1.38.0" +version = "1.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", ] [[package]] @@ -1750,9 +1928,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.15" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", "pin-project-lite", @@ -1761,9 +1939,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -1774,15 +1952,15 @@ dependencies = [ [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -1791,20 +1969,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", ] [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", ] @@ -1817,57 +1995,51 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "unicase" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] - -[[package]] -name = "unicode-bidi" -version = "0.3.15" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" +checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" [[package]] name = "unicode-segmentation" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.12" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "url" -version = "2.5.0" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", "percent-encoding", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "vcpkg" version = "0.2.15" @@ -1882,9 +2054,9 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "void" @@ -1907,48 +2079,59 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", + "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1956,28 +2139,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.98", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-streams" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -1988,9 +2174,19 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ "js-sys", "wasm-bindgen", @@ -2033,7 +2229,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -2053,18 +2258,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -2075,9 +2280,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -2087,9 +2292,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -2099,15 +2304,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -2117,9 +2322,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -2129,9 +2334,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -2141,9 +2346,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -2153,9 +2358,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winreg" @@ -2167,8 +2372,117 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.8.0", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] From 7eb1eb760bd35a27dcfaaed876738b8628c5c858 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:45:44 +0100 Subject: [PATCH 07/56] Update dependencies in Cargo.toml and Cargo.lock --- Cargo.lock | 2 +- Cargo.toml | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 648d5fe..d83abd2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -250,7 +250,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index db76a14..9176dbf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,34 +25,34 @@ name = "git-ai-hook" path = "src/bin/hook.rs" [dependencies] -anyhow = { version = "1.0.86", default-features = false } +anyhow = { version = "1.0.95", default-features = false } async-openai = { version = "0.18.3", default-features = false } -colored = "2.1.0" +colored = "2.2.0" config = { version = "0.13.4", default-features = false, features = ["ini"] } -console = { version = "0.15.8", default-features = false } -ctrlc = "3.4.4" +console = { version = "0.15.10", default-features = false } +ctrlc = "3.4.5" dotenv = "0.15.0" env_logger = { version = "0.10.2", default-features = false } git2 = { version = "0.18.3", default-features = false } -home = "0.5.9" -indicatif = { version = "0.17.8", default-features = false } -lazy_static = "1.4.0" -log = "0.4.21" +home = "0.5.11" +indicatif = { version = "0.17.11", default-features = false } +lazy_static = "1.5.0" +log = "0.4.25" reqwest = { version = "0.11.27", default-features = true } serde = { version = "1", default-features = false } -serde_derive = "1.0.203" +serde_derive = "1.0.217" serde_ini = "0.2.0" -serde_json = "1.0.117" +serde_json = "1.0.138" structopt = "0.3.26" -thiserror = "1.0.61" -tokio = { version = "1.38.0", features = ["rt-multi-thread"] } +thiserror = "1.0.69" +tokio = { version = "1.43.0", features = ["rt-multi-thread"] } tiktoken-rs = { version = "0.5.9" } -openssl-sys = { version = "0.9.102", features = ["vendored"] } -rayon = "1.8.1" +openssl-sys = { version = "0.9.105", features = ["vendored"] } +rayon = "1.10.0" [dev-dependencies] -tempfile = "3.10.1" -anyhow = { version = "1.0.86", default-features = false } +tempfile = "3.16.0" +anyhow = { version = "1.0.95", default-features = false } git2 = { version = "0.18.3", default-features = false } rand = { version = "0.8.5", default-features = false } From e2df5f4974a813b0ad82630a37ee4f1135c5f45c Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:48:14 +0100 Subject: [PATCH 08/56] Add StringPool for efficient memory use in PatchDiff --- Cargo.lock | 1 + Cargo.toml | 1 + src/hook.rs | 82 +++++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 69 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d83abd2..f140d2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -664,6 +664,7 @@ dependencies = [ "lazy_static", "log", "openssl-sys", + "parking_lot", "rand", "rayon", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index 9176dbf..8cbe738 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ tokio = { version = "1.43.0", features = ["rt-multi-thread"] } tiktoken-rs = { version = "0.5.9" } openssl-sys = { version = "0.9.105", features = ["vendored"] } rayon = "1.10.0" +parking_lot = "0.12.1" [dev-dependencies] tempfile = "3.16.0" diff --git a/src/hook.rs b/src/hook.rs index 462d926..06adb30 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -9,10 +9,38 @@ use git2::{Diff, DiffFormat, DiffOptions, Repository, Tree}; use anyhow::{bail, Context, Result}; use thiserror::Error; use rayon::prelude::*; +use parking_lot::Mutex; use crate::model::Model; use crate::profile; +// String pool for reusing allocations +struct StringPool { + strings: Vec, + capacity: usize +} + +impl StringPool { + fn new(capacity: usize) -> Self { + Self { strings: Vec::with_capacity(capacity), capacity } + } + + fn get(&mut self) -> String { + self + .strings + .pop() + .unwrap_or_else(|| String::with_capacity(self.capacity)) + } + + fn put(&mut self, mut string: String) { + string.clear(); + if self.strings.len() < 100 { + // Limit pool size + self.strings.push(string); + } + } +} + pub trait FilePath { fn is_empty(&self) -> Result { self.read().map(|s| s.is_empty()) @@ -58,13 +86,13 @@ pub trait Utf8String { impl Utf8String for Vec { fn to_utf8(&self) -> String { - String::from_utf8(self.to_vec()).unwrap_or_default() + String::from_utf8_lossy(self).into_owned() } } impl Utf8String for [u8] { fn to_utf8(&self) -> String { - String::from_utf8(self.to_vec()).unwrap_or_default() + String::from_utf8_lossy(self).into_owned() } } @@ -75,25 +103,37 @@ pub trait PatchDiff { impl PatchDiff for Diff<'_> { fn to_patch(&self, max_tokens: usize, model: Model) -> Result { profile!("Generating patch diff"); - let mut files: HashMap = HashMap::new(); + let pool = Arc::new(Mutex::new(StringPool::new(4096))); + let files = Arc::new(Mutex::new(HashMap::new())); { profile!("Processing diff changes"); self.print(DiffFormat::Patch, |diff, _hunk, line| { let content = line.content().to_utf8(); - let line_content = match line.origin() { - '+' | '-' => content, - _ => format!("context: {}", content) + let mut line_content = pool.lock().get(); + match line.origin() { + '+' | '-' => line_content.push_str(&content), + _ => { + line_content.push_str("context: "); + line_content.push_str(&content); + } }; - files + let mut files = files.lock(); + let entry = files .entry(diff.path()) - .or_insert_with(|| String::with_capacity(4096)) - .push_str(&line_content); + .or_insert_with(|| String::with_capacity(4096)); + entry.push_str(&line_content); + pool.lock().put(line_content); true })?; } + // Get the files out of Arc + let files = Arc::try_unwrap(files) + .expect("Arc still has multiple owners") + .into_inner(); + let mut result = String::with_capacity(files.values().map(|s| s.len()).sum()); let mut remaining_tokens = max_tokens; let total_files = files.len(); @@ -104,15 +144,27 @@ impl PatchDiff for Diff<'_> { // Convert model to Arc for thread-safe sharing let model = Arc::new(model); - // Pre-compute token counts in parallel - let file_tokens: HashMap = files + // Process files in chunks to maintain better memory usage + const CHUNK_SIZE: usize = 10; + let chunks: Vec<_> = files .iter() .collect::>() + .chunks(CHUNK_SIZE) + .map(|chunk| chunk.to_vec()) + .collect(); + + // Pre-compute token counts in parallel by chunks + let file_tokens: HashMap = chunks .par_iter() - .map(|(path, content)| { - let model = Arc::clone(&model); - let count = model.count_tokens(content).unwrap_or_default(); - ((*path).clone(), count) + .flat_map(|chunk| { + chunk + .par_iter() + .map(|(path, content)| { + let model = Arc::clone(&model); + let count = model.count_tokens(content).unwrap_or_default(); + ((*path).clone(), count) + }) + .collect::>() }) .collect(); From bf3110ffb540150b30f0d8dc15f2b5c4e12eaa68 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:49:38 +0100 Subject: [PATCH 09/56] Update dependencies in Cargo.toml and Cargo.lock --- Cargo.lock | 622 +++++++++++++++++++++++++++++++++++------------------ Cargo.toml | 22 +- 2 files changed, 420 insertions(+), 224 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f140d2e..9a7ff0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,17 +17,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" -[[package]] -name = "ahash" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" -dependencies = [ - "getrandom 0.2.15", - "once_cell", - "version_check", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -52,34 +41,25 @@ version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" -[[package]] -name = "async-convert" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d416feee97712e43152cd42874de162b8f9b77295b1c85e5d92725cc8310bae" -dependencies = [ - "async-trait", -] - [[package]] name = "async-openai" -version = "0.18.3" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea5c9223f84965c603fd58c4c9ddcd1907efb2e54acf6fb47039358cd374df4" +checksum = "2d126927c78e1562d7e8473008ac8b082318c04d69e3a83e3495a563f8b84a66" dependencies = [ - "async-convert", "backoff", - "base64", + "base64 0.22.1", "bytes", "derive_builder", + "eventsource-stream", "futures", - "rand", + "rand 0.8.5", "reqwest", "reqwest-eventsource", "secrecy", "serde", "serde_json", - "thiserror", + "thiserror 2.0.11", "tokio", "tokio-stream", "tokio-util", @@ -87,15 +67,10 @@ dependencies = [ ] [[package]] -name = "async-trait" -version = "0.1.86" +name = "atomic-waker" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", -] +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "atty" @@ -124,7 +99,7 @@ dependencies = [ "getrandom 0.2.15", "instant", "pin-project-lite", - "rand", + "rand 0.8.5", "tokio", ] @@ -140,7 +115,7 @@ dependencies = [ "miniz_oxide", "object", "rustc-demangle", - "windows-targets 0.52.6", + "windows-targets", ] [[package]] @@ -149,6 +124,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bit-set" version = "0.5.3" @@ -245,26 +226,23 @@ dependencies = [ [[package]] name = "colored" -version = "2.2.0" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e" dependencies = [ - "lazy_static", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] name = "config" -version = "0.13.4" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23738e11972c7643e4ec947840fc463b6a571afcd3e735bdfce7d03c7a784aca" +checksum = "e26695492a475c4a091cfda61446d5ba01aac2e1dfbcd27a12fdd11aa2e32596" dependencies = [ - "async-trait", - "lazy_static", - "nom", "pathdiff", "rust-ini", "serde", + "winnow", ] [[package]] @@ -279,6 +257,26 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.15", + "once_cell", + "tiny-keccak", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -320,6 +318,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" + [[package]] name = "ctrlc" version = "3.4.5" @@ -332,9 +336,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.14.4" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" dependencies = [ "darling_core", "darling_macro", @@ -342,58 +346,58 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.4" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", - "strsim 0.10.0", - "syn 1.0.109", + "strsim 0.11.1", + "syn 2.0.98", ] [[package]] name = "darling_macro" -version = "0.14.4" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 1.0.109", + "syn 2.0.98", ] [[package]] name = "derive_builder" -version = "0.12.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" dependencies = [ "derive_builder_macro", ] [[package]] name = "derive_builder_core" -version = "0.12.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ "darling", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.98", ] [[package]] name = "derive_builder_macro" -version = "0.12.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 1.0.109", + "syn 2.0.98", ] [[package]] @@ -409,9 +413,12 @@ dependencies = [ [[package]] name = "dlv-list" -version = "0.3.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] [[package]] name = "dotenv" @@ -440,12 +447,22 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", +] + [[package]] name = "env_logger" -version = "0.10.2" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" +checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0" dependencies = [ + "env_filter", "log", ] @@ -478,12 +495,13 @@ dependencies = [ [[package]] name = "fancy-regex" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ "bit-set", - "regex", + "regex-automata", + "regex-syntax", ] [[package]] @@ -637,7 +655,7 @@ dependencies = [ "cfg-if", "libc", "wasi 0.13.3+wasi-0.2.2", - "windows-targets 0.52.6", + "windows-targets", ] [[package]] @@ -665,7 +683,7 @@ dependencies = [ "log", "openssl-sys", "parking_lot", - "rand", + "rand 0.9.0", "rayon", "reqwest", "serde", @@ -674,16 +692,16 @@ dependencies = [ "serde_json", "structopt", "tempfile", - "thiserror", + "thiserror 2.0.11", "tiktoken-rs", "tokio", ] [[package]] name = "git2" -version = "0.18.3" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" +checksum = "3fda788993cc341f69012feba8bf45c0ba4f3291fcc08e214b4d5a7332d88aff" dependencies = [ "bitflags 2.8.0", "libc", @@ -694,15 +712,15 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.26" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ + "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "futures-util", "http", "indexmap", "slab", @@ -713,12 +731,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash", -] +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "hashbrown" @@ -755,9 +770,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.12" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -766,12 +781,24 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.6" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http", + "http-body", "pin-project-lite", ] @@ -781,47 +808,76 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - [[package]] name = "hyper" -version = "0.14.32" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" dependencies = [ "bytes", "futures-channel", - "futures-core", "futures-util", "h2", "http", "http-body", "httparse", - "httpdate", "itoa", "pin-project-lite", - "socket2", + "smallvec", "tokio", - "tower-service", - "tracing", "want", ] +[[package]] +name = "hyper-rustls" +version = "0.27.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" +dependencies = [ + "futures-util", + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + [[package]] name = "hyper-tls" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", + "http-body-util", "hyper", + "hyper-util", "native-tls", "tokio", "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", ] [[package]] @@ -1045,9 +1101,9 @@ checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libgit2-sys" -version = "0.16.2+1.7.2" +version = "0.18.0+1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" +checksum = "e1a117465e7e1597e8febea8bb0c410f1c7fb93b1e1cddf34363f8390367ffec" dependencies = [ "cc", "libc", @@ -1259,12 +1315,12 @@ dependencies = [ [[package]] name = "ordered-multimap" -version = "0.4.3" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" dependencies = [ "dlv-list", - "hashbrown 0.12.3", + "hashbrown 0.14.5", ] [[package]] @@ -1287,7 +1343,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-targets", ] [[package]] @@ -1332,7 +1388,7 @@ version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -1385,7 +1441,17 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +dependencies = [ + "rand_core 0.9.0", + "zerocopy 0.8.17", ] [[package]] @@ -1395,7 +1461,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", ] [[package]] @@ -1407,6 +1473,15 @@ dependencies = [ "getrandom 0.2.15", ] +[[package]] +name = "rand_core" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" +dependencies = [ + "zerocopy 0.8.17", +] + [[package]] name = "rayon" version = "1.10.0" @@ -1467,11 +1542,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.11.27" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-core", @@ -1479,8 +1554,11 @@ dependencies = [ "h2", "http", "http-body", + "http-body-util", "hyper", + "hyper-rustls", "hyper-tls", + "hyper-util", "ipnet", "js-sys", "log", @@ -1499,20 +1577,21 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-util", + "tower", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", "web-sys", - "winreg", + "windows-registry", ] [[package]] name = "reqwest-eventsource" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f03f570355882dd8d15acc3a313841e6e90eddbc76a93c748fd82cc13ba9f51" +checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde" dependencies = [ "eventsource-stream", "futures-core", @@ -1521,7 +1600,7 @@ dependencies = [ "nom", "pin-project-lite", "reqwest", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1530,14 +1609,30 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "194d8e591e405d1eecf28819740abed6d719d1a2db87fc0bcdedee9a26d55560" +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.15", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rust-ini" -version = "0.18.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df" +checksum = "4e310ef0e1b6eeb79169a1171daf9abcb87a2e17c03bee2c4bb100b55c75409f" dependencies = [ "cfg-if", "ordered-multimap", + "trim-in-place", ] [[package]] @@ -1565,13 +1660,43 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "rustls" +version = "0.23.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" +dependencies = [ + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + [[package]] name = "rustls-pemfile" -version = "1.0.4" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" + +[[package]] +name = "rustls-webpki" +version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ - "base64", + "ring", + "rustls-pki-types", + "untrusted", ] [[package]] @@ -1603,9 +1728,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "secrecy" -version = "0.8.0" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bd1c54ea06cfd2f6b63219704de0b9b4f72dcc2b8fdef820be6cd799780e91e" +checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a" dependencies = [ "serde", "zeroize", @@ -1720,6 +1845,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -1734,9 +1865,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "structopt" @@ -1762,6 +1893,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "1.0.109" @@ -1786,9 +1923,12 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "0.1.2" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] [[package]] name = "synstructure" @@ -1803,20 +1943,20 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.8.0", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -1851,7 +1991,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +dependencies = [ + "thiserror-impl 2.0.11", ] [[package]] @@ -1865,21 +2014,42 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "thiserror-impl" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "tiktoken-rs" -version = "0.5.9" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234" +checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6" dependencies = [ "anyhow", - "base64", + "base64 0.21.7", "bstr", "fancy-regex", "lazy_static", "parking_lot", + "regex", "rustc-hash", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinystr" version = "0.7.6" @@ -1927,6 +2097,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" +dependencies = [ + "rustls", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.17" @@ -1951,6 +2131,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + [[package]] name = "tower-service" version = "0.3.3" @@ -1988,6 +2189,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "trim-in-place" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc" + [[package]] name = "try-lock" version = "0.2.5" @@ -2018,6 +2225,12 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.5.4" @@ -2216,45 +2429,51 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows-sys" -version = "0.48.0" +name = "windows-registry" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" dependencies = [ - "windows-targets 0.48.5", + "windows-result", + "windows-strings", + "windows-targets", ] [[package]] -name = "windows-sys" -version = "0.52.0" +name = "windows-result" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" dependencies = [ - "windows-targets 0.52.6", + "windows-targets", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets", ] [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.6", + "windows-targets", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] @@ -2263,46 +2482,28 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -2315,48 +2516,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -2364,13 +2541,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "winreg" -version = "0.50.0" +name = "winnow" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +checksum = "86e376c75f4f43f44db463cf729e0d3acbf954d13e22c51e26e4c264b4ab545f" dependencies = [ - "cfg-if", - "windows-sys 0.48.0", + "memchr", ] [[package]] @@ -2425,7 +2601,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa91407dacce3a68c56de03abe2760159582b846c6a4acd2f456618087f12713" +dependencies = [ + "zerocopy-derive 0.8.17", ] [[package]] @@ -2439,6 +2624,17 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "zerocopy-derive" +version = "0.8.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06718a168365cad3d5ff0bb133aad346959a2074bd4a85c121255a11304a8626" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "zerofrom" version = "0.1.5" diff --git a/Cargo.toml b/Cargo.toml index 8cbe738..b883bea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,36 +26,36 @@ path = "src/bin/hook.rs" [dependencies] anyhow = { version = "1.0.95", default-features = false } -async-openai = { version = "0.18.3", default-features = false } -colored = "2.2.0" -config = { version = "0.13.4", default-features = false, features = ["ini"] } +async-openai = { version = "0.27.2", default-features = false } +colored = "3.0.0" +config = { version = "0.15.7", default-features = false, features = ["ini"] } console = { version = "0.15.10", default-features = false } ctrlc = "3.4.5" dotenv = "0.15.0" -env_logger = { version = "0.10.2", default-features = false } -git2 = { version = "0.18.3", default-features = false } +env_logger = { version = "0.11.6", default-features = false } +git2 = { version = "0.20.0", default-features = false } home = "0.5.11" indicatif = { version = "0.17.11", default-features = false } lazy_static = "1.5.0" log = "0.4.25" -reqwest = { version = "0.11.27", default-features = true } +reqwest = { version = "0.12.12", default-features = true } serde = { version = "1", default-features = false } serde_derive = "1.0.217" serde_ini = "0.2.0" serde_json = "1.0.138" structopt = "0.3.26" -thiserror = "1.0.69" +thiserror = "2.0.11" tokio = { version = "1.43.0", features = ["rt-multi-thread"] } -tiktoken-rs = { version = "0.5.9" } +tiktoken-rs = { version = "0.6.0" } openssl-sys = { version = "0.9.105", features = ["vendored"] } rayon = "1.10.0" -parking_lot = "0.12.1" +parking_lot = "0.12.3" [dev-dependencies] tempfile = "3.16.0" anyhow = { version = "1.0.95", default-features = false } -git2 = { version = "0.18.3", default-features = false } -rand = { version = "0.8.5", default-features = false } +git2 = { version = "0.20.0", default-features = false } +rand = { version = "0.9.0", default-features = false } [profile.release] codegen-units = 1 From 63a498c968eb67dcab66296566078b78ed9dede3 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:51:48 +0100 Subject: [PATCH 10/56] Add `num_cpus` crate and parallelize file processing --- Cargo.lock | 19 +++++++++++++++- Cargo.toml | 1 + src/hook.rs | 63 ++++++++++++++++++++++++++++++++--------------------- 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9a7ff0b..7e18f82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,7 +78,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -681,6 +681,7 @@ dependencies = [ "indicatif", "lazy_static", "log", + "num_cpus", "openssl-sys", "parking_lot", "rand 0.9.0", @@ -759,6 +760,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "home" version = "0.5.11" @@ -1238,6 +1245,16 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.9", + "libc", +] + [[package]] name = "number_prefix" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index b883bea..dfa41c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,7 @@ tiktoken-rs = { version = "0.6.0" } openssl-sys = { version = "0.9.105", features = ["vendored"] } rayon = "1.10.0" parking_lot = "0.12.3" +num_cpus = "1.16.0" [dev-dependencies] tempfile = "3.16.0" diff --git a/src/hook.rs b/src/hook.rs index 06adb30..495905f 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -10,6 +10,7 @@ use anyhow::{bail, Context, Result}; use thiserror::Error; use rayon::prelude::*; use parking_lot::Mutex; +use num_cpus; use crate::model::Model; use crate::profile; @@ -103,14 +104,22 @@ pub trait PatchDiff { impl PatchDiff for Diff<'_> { fn to_patch(&self, max_tokens: usize, model: Model) -> Result { profile!("Generating patch diff"); - let pool = Arc::new(Mutex::new(StringPool::new(4096))); + + // Create thread pool for parallel operations + let thread_pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_cpus::get()) + .build() + .unwrap(); + + // Step 1: Collect all diff data into thread-safe structures + let string_pool = Arc::new(Mutex::new(StringPool::new(4096))); let files = Arc::new(Mutex::new(HashMap::new())); { profile!("Processing diff changes"); self.print(DiffFormat::Patch, |diff, _hunk, line| { let content = line.content().to_utf8(); - let mut line_content = pool.lock().get(); + let mut line_content = string_pool.lock().get(); match line.origin() { '+' | '-' => line_content.push_str(&content), _ => { @@ -124,12 +133,12 @@ impl PatchDiff for Diff<'_> { .entry(diff.path()) .or_insert_with(|| String::with_capacity(4096)); entry.push_str(&line_content); - pool.lock().put(line_content); + string_pool.lock().put(line_content); true })?; } - // Get the files out of Arc + // Step 2: Move data out of thread-safe containers let files = Arc::try_unwrap(files) .expect("Arc still has multiple owners") .into_inner(); @@ -138,13 +147,12 @@ impl PatchDiff for Diff<'_> { let mut remaining_tokens = max_tokens; let total_files = files.len(); + // Step 3: Parallel processing of file chunks { profile!("Processing and truncating diffs"); - - // Convert model to Arc for thread-safe sharing let model = Arc::new(model); - // Process files in chunks to maintain better memory usage + // Process files in parallel chunks const CHUNK_SIZE: usize = 10; let chunks: Vec<_> = files .iter() @@ -153,22 +161,24 @@ impl PatchDiff for Diff<'_> { .map(|chunk| chunk.to_vec()) .collect(); - // Pre-compute token counts in parallel by chunks - let file_tokens: HashMap = chunks - .par_iter() - .flat_map(|chunk| { - chunk - .par_iter() - .map(|(path, content)| { - let model = Arc::clone(&model); - let count = model.count_tokens(content).unwrap_or_default(); - ((*path).clone(), count) - }) - .collect::>() - }) - .collect(); - - // Process files sequentially since we need to maintain token budget + // Pre-compute token counts in parallel + let file_tokens: HashMap = thread_pool.install(|| { + chunks + .par_iter() + .flat_map(|chunk| { + chunk + .par_iter() + .map(|(path, content)| { + let model = Arc::clone(&model); + let count = model.count_tokens(content).unwrap_or_default(); + ((*path).clone(), count) + }) + .collect::>() + }) + .collect() + }); + + // Process files sequentially to maintain token budget for (index, (path, diff)) in files.iter().enumerate() { let files_remaining = total_files.saturating_sub(index); let max_tokens_per_file = remaining_tokens.saturating_div(files_remaining); @@ -180,8 +190,9 @@ impl PatchDiff for Diff<'_> { let file_token_count = file_tokens.get(path).copied().unwrap_or_default(); let file_allocated_tokens = file_token_count.min(max_tokens_per_file); + // Parallel truncation if needed let truncated_content = if file_token_count > file_allocated_tokens { - model.truncate(diff, file_allocated_tokens)? + thread_pool.install(|| model.truncate(diff, file_allocated_tokens))? } else { diff.clone() }; @@ -206,7 +217,9 @@ pub trait PatchRepository { impl PatchRepository for Repository { fn to_patch(&self, tree: Option, max_token_count: usize, model: Model) -> Result { profile!("Repository patch generation"); - self.to_diff(tree)?.to_patch(max_token_count, model) + // Generate diff and process it + let diff = self.to_diff(tree)?; + diff.to_patch(max_token_count, model) } fn to_diff(&self, tree: Option>) -> Result> { From 6a5051b346310bc8ad1d720d921a66d868f05bd7 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:53:51 +0100 Subject: [PATCH 11/56] Refactor file processing to use parallel chunks and atomic tokens --- src/hook.rs | 110 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 42 deletions(-) diff --git a/src/hook.rs b/src/hook.rs index 495905f..b533e75 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -3,6 +3,7 @@ use std::io::{Read, Write}; use std::path::PathBuf; use std::fs::File; use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; use structopt::StructOpt; use git2::{Diff, DiffFormat, DiffOptions, Repository, Tree}; @@ -143,11 +144,11 @@ impl PatchDiff for Diff<'_> { .expect("Arc still has multiple owners") .into_inner(); - let mut result = String::with_capacity(files.values().map(|s| s.len()).sum()); - let mut remaining_tokens = max_tokens; let total_files = files.len(); + let remaining_tokens = Arc::new(AtomicUsize::new(max_tokens)); + let result_chunks = Arc::new(Mutex::new(Vec::with_capacity(total_files))); - // Step 3: Parallel processing of file chunks + // Step 3: Parallel processing of files { profile!("Processing and truncating diffs"); let model = Arc::new(model); @@ -155,57 +156,82 @@ impl PatchDiff for Diff<'_> { // Process files in parallel chunks const CHUNK_SIZE: usize = 10; let chunks: Vec<_> = files - .iter() + .into_iter() // Convert to owned chunks .collect::>() .chunks(CHUNK_SIZE) .map(|chunk| chunk.to_vec()) .collect(); - // Pre-compute token counts in parallel - let file_tokens: HashMap = thread_pool.install(|| { - chunks - .par_iter() - .flat_map(|chunk| { - chunk - .par_iter() - .map(|(path, content)| { - let model = Arc::clone(&model); - let count = model.count_tokens(content).unwrap_or_default(); - ((*path).clone(), count) - }) - .collect::>() - }) - .collect() - }); - - // Process files sequentially to maintain token budget - for (index, (path, diff)) in files.iter().enumerate() { - let files_remaining = total_files.saturating_sub(index); - let max_tokens_per_file = remaining_tokens.saturating_div(files_remaining); + // Process chunks in parallel + let processing_result: Result<()> = thread_pool.install(|| { + chunks.par_iter().try_for_each(|chunk| { + // Pre-compute token counts for the chunk + let token_counts: Vec<_> = chunk + .par_iter() + .map(|(path, content)| { + let model = Arc::clone(&model); + let count = model.count_tokens(content).unwrap_or_default(); + (path.clone(), count) + }) + .collect(); + + // Process files in the chunk + let mut chunk_results = Vec::with_capacity(chunk.len()); + for (idx, ((path, content), (_, token_count))) in chunk.iter().zip(token_counts.iter()).enumerate() { + // Calculate token budget atomically + let total_remaining = remaining_tokens.load(Ordering::Relaxed); + let files_remaining = total_files.saturating_sub(idx); + let max_tokens_per_file = total_remaining.saturating_div(files_remaining); + + if max_tokens_per_file == 0 { + continue; // Skip this file if no tokens left + } + + let token_count = *token_count; + let allocated_tokens = token_count.min(max_tokens_per_file); + + // Try to claim tokens atomically + let old_remaining = remaining_tokens.fetch_sub(allocated_tokens, Ordering::Relaxed); + if old_remaining < allocated_tokens { + // Restore tokens if we couldn't claim them + remaining_tokens.fetch_add(allocated_tokens, Ordering::Relaxed); + continue; + } + + // Process the file with allocated tokens + let processed_content = if token_count > allocated_tokens { + model.truncate(content, allocated_tokens)? + } else { + content.clone() + }; + + chunk_results.push((path.clone(), processed_content)); + } - if max_tokens_per_file == 0 { - bail!("No tokens left to generate commit message. Try increasing the max-tokens configuration option using `git ai config set max-tokens `"); - } + // Store results in order + if !chunk_results.is_empty() { + result_chunks.lock().extend(chunk_results); + } + Ok(()) + }) + }); - let file_token_count = file_tokens.get(path).copied().unwrap_or_default(); - let file_allocated_tokens = file_token_count.min(max_tokens_per_file); + // Handle any processing errors + processing_result?; + } - // Parallel truncation if needed - let truncated_content = if file_token_count > file_allocated_tokens { - thread_pool.install(|| model.truncate(diff, file_allocated_tokens))? - } else { - diff.clone() - }; + // Combine results in order + let results = result_chunks.lock(); + let mut final_result = String::with_capacity(results.iter().map(|(_, content)| content.len()).sum()); - if !result.is_empty() { - result.push('\n'); - } - result.push_str(&truncated_content); - remaining_tokens = remaining_tokens.saturating_sub(file_allocated_tokens); + for (_, content) in results.iter() { + if !final_result.is_empty() { + final_result.push('\n'); } + final_result.push_str(content); } - Ok(result) + Ok(final_result) } } From 7ed087bcd6128a6d927c574423c8be8cf485a42c Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:54:53 +0100 Subject: [PATCH 12/56] Remove redundant import of `bail` from anyhow --- src/hook.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hook.rs b/src/hook.rs index b533e75..b997a3e 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -7,7 +7,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use structopt::StructOpt; use git2::{Diff, DiffFormat, DiffOptions, Repository, Tree}; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use thiserror::Error; use rayon::prelude::*; use parking_lot::Mutex; From 51f96092c07e8564bf91e0a691591e85754bbe91 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:58:59 +0100 Subject: [PATCH 13/56] Sort files by token count in `PatchDiff` implementation. --- src/hook.rs | 85 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/src/hook.rs b/src/hook.rs index b997a3e..e8e7813 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -139,25 +139,36 @@ impl PatchDiff for Diff<'_> { })?; } - // Step 2: Move data out of thread-safe containers + // Step 2: Move data out of thread-safe containers and sort by token count let files = Arc::try_unwrap(files) .expect("Arc still has multiple owners") .into_inner(); - let total_files = files.len(); + // Pre-compute token counts and sort files by size + let model = Arc::new(model); + let mut files_with_tokens: Vec<_> = files + .into_iter() + .map(|(path, content)| { + let token_count = model.count_tokens(&content).unwrap_or_default(); + (path, content, token_count) + }) + .collect(); + + // Sort by token count (smaller diffs first) + files_with_tokens.sort_by_key(|(_, _, count)| *count); + + let total_files = files_with_tokens.len(); let remaining_tokens = Arc::new(AtomicUsize::new(max_tokens)); let result_chunks = Arc::new(Mutex::new(Vec::with_capacity(total_files))); + let processed_files = Arc::new(AtomicUsize::new(0)); // Step 3: Parallel processing of files { profile!("Processing and truncating diffs"); - let model = Arc::new(model); // Process files in parallel chunks const CHUNK_SIZE: usize = 10; - let chunks: Vec<_> = files - .into_iter() // Convert to owned chunks - .collect::>() + let chunks: Vec<_> = files_with_tokens .chunks(CHUNK_SIZE) .map(|chunk| chunk.to_vec()) .collect(); @@ -165,47 +176,47 @@ impl PatchDiff for Diff<'_> { // Process chunks in parallel let processing_result: Result<()> = thread_pool.install(|| { chunks.par_iter().try_for_each(|chunk| { - // Pre-compute token counts for the chunk - let token_counts: Vec<_> = chunk - .par_iter() - .map(|(path, content)| { - let model = Arc::clone(&model); - let count = model.count_tokens(content).unwrap_or_default(); - (path.clone(), count) - }) - .collect(); - - // Process files in the chunk let mut chunk_results = Vec::with_capacity(chunk.len()); - for (idx, ((path, content), (_, token_count))) in chunk.iter().zip(token_counts.iter()).enumerate() { - // Calculate token budget atomically - let total_remaining = remaining_tokens.load(Ordering::Relaxed); - let files_remaining = total_files.saturating_sub(idx); + + for (path, content, token_count) in chunk { + // Calculate global file position and remaining files atomically + let current_file_num = processed_files.fetch_add(1, Ordering::SeqCst); + let files_remaining = total_files.saturating_sub(current_file_num); + + if files_remaining == 0 { + continue; + } + + // Calculate token budget with proper synchronization + let total_remaining = remaining_tokens.load(Ordering::SeqCst); let max_tokens_per_file = total_remaining.saturating_div(files_remaining); if max_tokens_per_file == 0 { - continue; // Skip this file if no tokens left + continue; } let token_count = *token_count; let allocated_tokens = token_count.min(max_tokens_per_file); - // Try to claim tokens atomically - let old_remaining = remaining_tokens.fetch_sub(allocated_tokens, Ordering::Relaxed); - if old_remaining < allocated_tokens { - // Restore tokens if we couldn't claim them - remaining_tokens.fetch_add(allocated_tokens, Ordering::Relaxed); - continue; + // Try to claim tokens atomically with proper ordering + match remaining_tokens.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { + if current >= allocated_tokens { + Some(current - allocated_tokens) + } else { + None + } + }) { + Ok(_) => { + // Process the file with allocated tokens + let processed_content = if token_count > allocated_tokens { + model.truncate(content, allocated_tokens)? + } else { + content.clone() + }; + chunk_results.push((path.clone(), processed_content)); + } + Err(_) => continue // Skip if we couldn't claim tokens } - - // Process the file with allocated tokens - let processed_content = if token_count > allocated_tokens { - model.truncate(content, allocated_tokens)? - } else { - content.clone() - }; - - chunk_results.push((path.clone(), processed_content)); } // Store results in order From 5e50a255673f2fc2157a83312d7522104079d228 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:00:07 +0100 Subject: [PATCH 14/56] Delete test.txt file --- test.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 test.txt diff --git a/test.txt b/test.txt deleted file mode 100644 index 9daeafb..0000000 --- a/test.txt +++ /dev/null @@ -1 +0,0 @@ -test From d49f5348b5926628188d804f4c214d8edd0b0af2 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:03:44 +0100 Subject: [PATCH 15/56] Improve error handling and path management in config and style modules --- src/config.rs | 16 +++++++++------- src/hook.rs | 2 +- src/style.rs | 6 ++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/config.rs b/src/config.rs index 80689ce..8f587c7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -25,7 +25,9 @@ impl App { } lazy_static! { - pub static ref CONFIG_DIR: PathBuf = home::home_dir().unwrap().join(".config/git-ai"); + pub static ref CONFIG_DIR: PathBuf = home::home_dir() + .expect("Failed to determine home directory") + .join(".config/git-ai"); #[derive(Debug)] pub static ref APP: App = App::new().expect("Failed to load config"); pub static ref CONFIG_PATH: PathBuf = CONFIG_DIR.join("config.ini"); @@ -36,20 +38,20 @@ impl App { dotenv::dotenv().ok(); if !CONFIG_DIR.exists() { - std::fs::create_dir_all(CONFIG_DIR.to_str().unwrap()).context("Failed to create config directory")?; - File::create(CONFIG_PATH.to_str().unwrap()).context("Failed to create config file")?; + std::fs::create_dir_all(&*CONFIG_DIR).with_context(|| format!("Failed to create config directory at {:?}", *CONFIG_DIR))?; + File::create(&*CONFIG_PATH).with_context(|| format!("Failed to create config file at {:?}", *CONFIG_PATH))?; } else if !CONFIG_PATH.exists() { - File::create(CONFIG_PATH.to_str().unwrap()).context("Failed to create config file")?; + File::create(&*CONFIG_PATH).with_context(|| format!("Failed to create config file at {:?}", *CONFIG_PATH))?; } let config = Config::builder() .add_source(config::Environment::with_prefix("APP").try_parsing(true)) - .add_source(config::File::new(CONFIG_PATH.to_str().unwrap(), FileFormat::Ini)) + .add_source(config::File::new(CONFIG_PATH.to_string_lossy().as_ref(), FileFormat::Ini)) .set_default("language", "en")? .set_default("timeout", 30)? .set_default("max_commit_length", 72)? .set_default("max_tokens", 2024)? - .set_default("model", "gpt-4o")? + .set_default("model", "gpt-4")? .set_default("openai_api_key", "")? .build()?; @@ -60,7 +62,7 @@ impl App { pub fn save(&self) -> Result<()> { let contents = serde_ini::to_string(&self).context(format!("Failed to serialize config: {:?}", self))?; - let mut file = File::create(CONFIG_PATH.to_str().unwrap()).context("Failed to create config file")?; + let mut file = File::create(&*CONFIG_PATH).with_context(|| format!("Failed to create config file at {:?}", *CONFIG_PATH))?; file .write_all(contents.as_bytes()) .context("Failed to write config file") diff --git a/src/hook.rs b/src/hook.rs index e8e7813..e0a427e 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -110,7 +110,7 @@ impl PatchDiff for Diff<'_> { let thread_pool = rayon::ThreadPoolBuilder::new() .num_threads(num_cpus::get()) .build() - .unwrap(); + .context("Failed to create thread pool")?; // Step 1: Collect all diff data into thread-safe structures let string_pool = Arc::new(Mutex::new(StringPool::new(4096))); diff --git a/src/style.rs b/src/style.rs index d6e316b..f4eaacb 100644 --- a/src/style.rs +++ b/src/style.rs @@ -6,10 +6,8 @@ pub trait Styled { impl Styled for PathBuf { fn relative_path(&self) -> PathBuf { - let current_dir = env::current_dir().unwrap(); - let relative_path = self - .strip_prefix(¤t_dir) - .unwrap_or(current_dir.as_path()); + let current_dir = env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); + let relative_path = self.strip_prefix(¤t_dir).unwrap_or(self.as_path()); relative_path.to_path_buf() } } From 600e5fd3469a96206ac85f7255a99f6b67bd4497 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:05:14 +0100 Subject: [PATCH 16/56] Add tests for StringPool functionality in hook.rs --- src/hook.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/hook.rs b/src/hook.rs index e0a427e..d911439 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -314,3 +314,57 @@ pub enum HookError { #[error(transparent)] Anyhow(#[from] anyhow::Error) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_string_pool_new() { + let pool = StringPool::new(100); + assert_eq!(pool.strings.len(), 0); + assert_eq!(pool.capacity, 100); + } + + #[test] + fn test_string_pool_get() { + let mut pool = StringPool::new(10); + let s1 = pool.get(); + assert_eq!(s1.capacity(), 10); + assert_eq!(s1.len(), 0); + } + + #[test] + fn test_string_pool_put_and_get() { + let mut pool = StringPool::new(10); + + // Put a string in the pool + let mut s1 = String::with_capacity(10); + s1.push_str("test"); + pool.put(s1); + + // The pool should have one string + assert_eq!(pool.strings.len(), 1); + + // Get should return the pooled string + let s2 = pool.get(); + assert_eq!(s2.capacity(), 10); + assert_eq!(s2.len(), 0); // String should be cleared + + // Pool should be empty now + assert_eq!(pool.strings.len(), 0); + } + + #[test] + fn test_string_pool_limit() { + let mut pool = StringPool::new(10); + + // Add more than 100 strings + for _ in 0..150 { + pool.put(String::with_capacity(10)); + } + + // Pool should be limited to 100 strings + assert_eq!(pool.strings.len(), 100); + } +} From 450381de9853a6ca17a26a710f45e18202c2de6a Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:06:44 +0100 Subject: [PATCH 17/56] Update default model and add profiling to model and commit functions --- src/commit.rs | 19 +++++++++++++------ src/config.rs | 2 +- src/model.rs | 22 ++++++++++++++++------ 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/commit.rs b/src/commit.rs index 09f5135..bab523e 100644 --- a/src/commit.rs +++ b/src/commit.rs @@ -1,9 +1,10 @@ use anyhow::{bail, Result}; -use crate::{config, openai}; +use crate::{config, openai, profile}; use crate::model::Model; fn instruction() -> String { + profile!("Generate instruction template"); format!("You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines: - Structure: Begin with a clear, present-tense summary. @@ -23,19 +24,25 @@ fn instruction() -> String { } pub fn token_used(model: &Model) -> Result { + profile!("Calculate instruction tokens"); model.count_tokens(&instruction()) } pub async fn generate(diff: String, max_tokens: usize, model: Model) -> Result { + profile!("Generate commit message"); + if max_tokens == 0 { bail!("Max can't be zero (2)") } - let request = openai::Request { - system: instruction(), - prompt: diff, - max_tokens: max_tokens.try_into().unwrap_or(u16::MAX), - model + let request = { + profile!("Prepare OpenAI request"); + openai::Request { + system: instruction(), + prompt: diff, + max_tokens: max_tokens.try_into().unwrap_or(u16::MAX), + model + } }; openai::call(request).await diff --git a/src/config.rs b/src/config.rs index 8f587c7..0efcc37 100644 --- a/src/config.rs +++ b/src/config.rs @@ -51,7 +51,7 @@ impl App { .set_default("timeout", 30)? .set_default("max_commit_length", 72)? .set_default("max_tokens", 2024)? - .set_default("model", "gpt-4")? + .set_default("model", "gpt-4o-mini")? .set_default("openai_api_key", "")? .build()?; diff --git a/src/model.rs b/src/model.rs index 71a8494..5111975 100644 --- a/src/model.rs +++ b/src/model.rs @@ -7,6 +7,8 @@ use serde::{Deserialize, Serialize}; use tiktoken_rs::get_completion_max_tokens; use tiktoken_rs::model::get_context_size; +use crate::profile; + const GPT4: &str = "gpt-4"; const GPT4O: &str = "gpt-4o"; const GPT4OMINI: &str = "gpt-4o-mini"; @@ -21,6 +23,7 @@ pub enum Model { impl Model { pub fn count_tokens(&self, text: &str) -> Result { + profile!("Count tokens"); Ok( self .context_size() @@ -29,27 +32,34 @@ impl Model { } pub fn context_size(&self) -> usize { + profile!("Get context size"); get_context_size(self.into()) } pub(crate) fn truncate(&self, diff: &str, max_tokens: usize) -> Result { + profile!("Truncate text"); self.walk_truncate(diff, max_tokens, usize::MAX) } pub(crate) fn walk_truncate(&self, diff: &str, max_tokens: usize, within: usize) -> Result { + profile!("Walk truncate iteration"); log::debug!("max_tokens: {}", max_tokens); log::debug!("diff: {}", diff); log::debug!("within: {}", within); - let str = diff - .split_whitespace() - .take(max_tokens) - .collect::>() - .join(" "); + let str = { + profile!("Split and join text"); + diff + .split_whitespace() + .take(max_tokens) + .collect::>() + .join(" ") + }; + let offset = self.count_tokens(&str)?.saturating_sub(max_tokens); if offset > within || offset == 0 { - Ok(str) // TODO: check if this is correct + Ok(str) } else { self.walk_truncate(diff, max_tokens + offset, within) } From 00faa02552834dda9a84fed0378c71f11b26d9e0 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:09:06 +0100 Subject: [PATCH 18/56] Add profiling to filesystem module functions --- src/filesystem.rs | 45 +++++++++++++++++++++++++++++++++-------- tests/profiling_test.rs | 0 2 files changed, 37 insertions(+), 8 deletions(-) delete mode 100644 tests/profiling_test.rs diff --git a/src/filesystem.rs b/src/filesystem.rs index 9f86e1b..0536c64 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -5,6 +5,8 @@ use std::os::unix::fs::symlink as symlink_unix; use anyhow::{bail, Context, Result}; use git2::{Repository, RepositoryOpenFlags as Flags}; +use crate::profile; + #[derive(Debug, Clone)] pub struct Filesystem { git_ai_hook_bin_path: PathBuf, @@ -22,20 +24,24 @@ impl File { } pub fn exists(&self) -> bool { + profile!("Check file exists"); self.path.exists() } pub fn delete(&self) -> Result<()> { + profile!("Delete file"); log::debug!("Removing file at {}", self); fs::remove_file(&self.path).context(format!("Failed to remove file at {}", self)) } pub fn symlink(&self, target: File) -> Result<()> { + profile!("Create symlink"); log::debug!("Symlinking {} to {}", target, self); symlink_unix(&target.path, &self.path).context(format!("Failed to symlink {} to {}", target, self)) } pub fn relative_path(&self) -> Result { + profile!("Get relative file path"); Dir::new( self .path @@ -47,6 +53,7 @@ impl File { } pub fn parent(&self) -> Dir { + profile!("Get parent directory"); Dir::new(self.path.parent().unwrap_or(Path::new("")).to_path_buf()) } } @@ -98,15 +105,18 @@ impl Dir { } pub fn exists(&self) -> bool { + profile!("Check directory exists"); self.path.exists() } pub fn create_dir_all(&self) -> Result<()> { + profile!("Create directory recursively"); log::debug!("Creating directory at {}", self); fs::create_dir_all(&self.path).context(format!("Failed to create directory at {}", self)) } pub fn relative_path(&self) -> Result { + profile!("Get relative directory path"); Self::new( self .path @@ -120,23 +130,39 @@ impl Dir { impl Filesystem { pub fn new() -> Result { - let current_dir = env::current_dir().context("Failed to get current directory")?; - let git_ai_bin_path = env::current_exe().context("Failed to get current executable")?; + profile!("Initialize filesystem"); + + let current_dir = { + profile!("Get current directory"); + env::current_dir().context("Failed to get current directory")? + }; + + let git_ai_bin_path = { + profile!("Get executable path"); + env::current_exe().context("Failed to get current executable")? + }; - let repo = Repository::open_ext(current_dir.clone(), Flags::empty(), Vec::<&Path>::new()) - .context(format!("Failed to open repository at {}", current_dir.clone().display()))?; + let repo = { + profile!("Open git repository"); + Repository::open_ext(current_dir.clone(), Flags::empty(), Vec::<&Path>::new()) + .context(format!("Failed to open repository at {}", current_dir.clone().display()))? + }; let mut git_path = repo.path().to_path_buf(); // if relative, make it absolute if git_path.is_relative() { + profile!("Convert relative git path to absolute"); // make git_path absolute using the current folder as the base git_path = current_dir.join(git_path); } - let git_ai_hook_bin_path = git_ai_bin_path - .parent() - .context(format!("Failed to get parent directory of {}", git_ai_bin_path.display()))? - .join("git-ai-hook"); + let git_ai_hook_bin_path = { + profile!("Get hook binary path"); + git_ai_bin_path + .parent() + .context(format!("Failed to get parent directory of {}", git_ai_bin_path.display()))? + .join("git-ai-hook") + }; if !git_ai_hook_bin_path.exists() { bail!("Hook binary not found at {}", git_ai_hook_bin_path.display()); @@ -150,14 +176,17 @@ impl Filesystem { } pub fn git_ai_hook_bin_path(&self) -> Result { + profile!("Get hook binary file"); File::new(self.git_ai_hook_bin_path.clone()).into() } pub fn git_hooks_path(&self) -> Dir { + profile!("Get hooks directory"); Dir::new(self.git_hooks_path.clone()) } pub fn prepare_commit_msg_path(&self) -> Result { + profile!("Get prepare-commit-msg hook path"); if !self.git_hooks_path.exists() { bail!("Hooks directory not found at {}", self.git_hooks_path.display()); } diff --git a/tests/profiling_test.rs b/tests/profiling_test.rs deleted file mode 100644 index e69de29..0000000 From 4a7d5d87d3f452bfdb92400c27edabc063e52836 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:10:22 +0100 Subject: [PATCH 19/56] Implement token counting and generation for commit messages --- src/commit.rs | 23 ++++++++++++++++++++++- src/model.rs | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/commit.rs b/src/commit.rs index bab523e..18f4a9f 100644 --- a/src/commit.rs +++ b/src/commit.rs @@ -3,6 +3,8 @@ use anyhow::{bail, Result}; use crate::{config, openai, profile}; use crate::model::Model; +/// Returns the instruction template for the AI model. +/// This template guides the model in generating appropriate commit messages. fn instruction() -> String { profile!("Generate instruction template"); format!("You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines: @@ -23,16 +25,35 @@ fn instruction() -> String { INPUT:", config::APP.max_commit_length.unwrap_or(72)) } +/// Calculates the number of tokens used by the instruction template. +/// +/// # Arguments +/// * `model` - The AI model to use for token counting +/// +/// # Returns +/// * `Result` - The number of tokens used or an error pub fn token_used(model: &Model) -> Result { profile!("Calculate instruction tokens"); model.count_tokens(&instruction()) } +/// Generates a commit message using the AI model. +/// +/// # Arguments +/// * `diff` - The git diff to generate a commit message for +/// * `max_tokens` - Maximum number of tokens allowed for the response +/// * `model` - The AI model to use for generation +/// +/// # Returns +/// * `Result` - The generated commit message or an error +/// +/// # Errors +/// Returns an error if max_tokens is 0 or if the OpenAI API call fails pub async fn generate(diff: String, max_tokens: usize, model: Model) -> Result { profile!("Generate commit message"); if max_tokens == 0 { - bail!("Max can't be zero (2)") + bail!("Max tokens cannot be zero") } let request = { diff --git a/src/model.rs b/src/model.rs index 5111975..cba86a8 100644 --- a/src/model.rs +++ b/src/model.rs @@ -9,19 +9,33 @@ use tiktoken_rs::model::get_context_size; use crate::profile; +// Model identifiers const GPT4: &str = "gpt-4"; const GPT4O: &str = "gpt-4o"; const GPT4OMINI: &str = "gpt-4o-mini"; +/// Represents the available AI models for commit message generation. +/// Each model has different capabilities and token limits. #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Serialize, Deserialize, Default)] pub enum Model { + /// Standard GPT-4 model GPT4, + /// Optimized GPT-4 model GPT4o, + /// Default model - Mini version of optimized GPT-4 #[default] GPT4oMini } impl Model { + /// Counts the number of tokens in the given text for the current model. + /// This is used to ensure we stay within the model's token limits. + /// + /// # Arguments + /// * `text` - The text to count tokens for + /// + /// # Returns + /// * `Result` - The number of tokens or an error pub fn count_tokens(&self, text: &str) -> Result { profile!("Count tokens"); Ok( @@ -31,16 +45,37 @@ impl Model { ) } + /// Gets the maximum context size for the current model. + /// + /// # Returns + /// * `usize` - The maximum number of tokens the model can process pub fn context_size(&self) -> usize { profile!("Get context size"); get_context_size(self.into()) } + /// Truncates the given text to fit within the specified token limit. + /// + /// # Arguments + /// * `diff` - The text to truncate + /// * `max_tokens` - The maximum number of tokens allowed + /// + /// # Returns + /// * `Result` - The truncated text or an error pub(crate) fn truncate(&self, diff: &str, max_tokens: usize) -> Result { profile!("Truncate text"); self.walk_truncate(diff, max_tokens, usize::MAX) } + /// Recursively truncates text to fit within token limits while maintaining coherence. + /// + /// # Arguments + /// * `diff` - The text to truncate + /// * `max_tokens` - The maximum number of tokens allowed + /// * `within` - The maximum allowed deviation from target token count + /// + /// # Returns + /// * `Result` - The truncated text or an error pub(crate) fn walk_truncate(&self, diff: &str, max_tokens: usize, within: usize) -> Result { profile!("Walk truncate iteration"); log::debug!("max_tokens: {}", max_tokens); From a5833c807bd889dfff2e3b2d16c64487befa22e3 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:10:39 +0100 Subject: [PATCH 20/56] Add documentation for Filesystem, File, and Dir structs in filesystem.rs --- src/filesystem.rs | 80 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 8 deletions(-) diff --git a/src/filesystem.rs b/src/filesystem.rs index 0536c64..6d8924d 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -7,39 +7,66 @@ use git2::{Repository, RepositoryOpenFlags as Flags}; use crate::profile; +/// Represents the filesystem structure for git-ai. +/// Handles paths for hooks and binaries. #[derive(Debug, Clone)] pub struct Filesystem { git_ai_hook_bin_path: PathBuf, git_hooks_path: PathBuf } +/// Represents a file in the filesystem. +/// Provides operations for file manipulation. #[derive(Debug, Clone)] pub struct File { path: PathBuf } impl File { + /// Creates a new File instance. + /// + /// # Arguments + /// * `path` - The path to the file pub fn new(path: PathBuf) -> Self { Self { path } } + /// Checks if the file exists. + /// + /// # Returns + /// * `bool` - true if the file exists, false otherwise pub fn exists(&self) -> bool { profile!("Check file exists"); self.path.exists() } + /// Deletes the file from the filesystem. + /// + /// # Returns + /// * `Result<()>` - Success or an error if deletion fails pub fn delete(&self) -> Result<()> { profile!("Delete file"); log::debug!("Removing file at {}", self); fs::remove_file(&self.path).context(format!("Failed to remove file at {}", self)) } + /// Creates a symbolic link to the target file. + /// + /// # Arguments + /// * `target` - The file to link to + /// + /// # Returns + /// * `Result<()>` - Success or an error if link creation fails pub fn symlink(&self, target: File) -> Result<()> { profile!("Create symlink"); log::debug!("Symlinking {} to {}", target, self); symlink_unix(&target.path, &self.path).context(format!("Failed to symlink {} to {}", target, self)) } + /// Gets the relative path from the current directory. + /// + /// # Returns + /// * `Result` - The relative path as a Dir or an error pub fn relative_path(&self) -> Result { profile!("Get relative file path"); Dir::new( @@ -52,6 +79,10 @@ impl File { .into() } + /// Gets the parent directory of the file. + /// + /// # Returns + /// * `Dir` - The parent directory pub fn parent(&self) -> Dir { profile!("Get parent directory"); Dir::new(self.path.parent().unwrap_or(Path::new("")).to_path_buf()) @@ -76,12 +107,8 @@ impl From for Result { } } -impl From for Result { - fn from(dir: Dir) -> Result { - Ok(dir) - } -} - +/// Represents a directory in the filesystem. +/// Provides operations for directory manipulation. #[derive(Debug, Clone)] pub struct Dir { path: PathBuf @@ -93,6 +120,12 @@ impl std::fmt::Display for Dir { } } +impl From for Result { + fn from(dir: Dir) -> Result { + Ok(dir) + } +} + impl From for Result { fn from(filesystem: Filesystem) -> Result { Ok(filesystem) @@ -100,21 +133,37 @@ impl From for Result { } impl Dir { + /// Creates a new Dir instance. + /// + /// # Arguments + /// * `path` - The path to the directory pub fn new(path: PathBuf) -> Self { Self { path } } + /// Checks if the directory exists. + /// + /// # Returns + /// * `bool` - true if the directory exists, false otherwise pub fn exists(&self) -> bool { profile!("Check directory exists"); self.path.exists() } + /// Creates the directory and all parent directories if they don't exist. + /// + /// # Returns + /// * `Result<()>` - Success or an error if creation fails pub fn create_dir_all(&self) -> Result<()> { profile!("Create directory recursively"); log::debug!("Creating directory at {}", self); fs::create_dir_all(&self.path).context(format!("Failed to create directory at {}", self)) } + /// Gets the relative path from the current directory. + /// + /// # Returns + /// * `Result` - The relative path or an error pub fn relative_path(&self) -> Result { profile!("Get relative directory path"); Self::new( @@ -129,6 +178,11 @@ impl Dir { } impl Filesystem { + /// Creates a new Filesystem instance. + /// Initializes paths for git hooks and binaries. + /// + /// # Returns + /// * `Result` - The initialized filesystem or an error pub fn new() -> Result { profile!("Initialize filesystem"); @@ -149,10 +203,8 @@ impl Filesystem { }; let mut git_path = repo.path().to_path_buf(); - // if relative, make it absolute if git_path.is_relative() { profile!("Convert relative git path to absolute"); - // make git_path absolute using the current folder as the base git_path = current_dir.join(git_path); } @@ -175,16 +227,28 @@ impl Filesystem { .into() } + /// Gets the path to the git-ai hook binary. + /// + /// # Returns + /// * `Result` - The hook binary path or an error pub fn git_ai_hook_bin_path(&self) -> Result { profile!("Get hook binary file"); File::new(self.git_ai_hook_bin_path.clone()).into() } + /// Gets the path to the git hooks directory. + /// + /// # Returns + /// * `Dir` - The hooks directory path pub fn git_hooks_path(&self) -> Dir { profile!("Get hooks directory"); Dir::new(self.git_hooks_path.clone()) } + /// Gets the path to the prepare-commit-msg hook. + /// + /// # Returns + /// * `Result` - The hook path or an error pub fn prepare_commit_msg_path(&self) -> Result { profile!("Get prepare-commit-msg hook path"); if !self.git_hooks_path.exists() { From 53846904241be1e185c26584b5573a44affc26d5 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:15:25 +0100 Subject: [PATCH 21/56] Refactor commit message generation methods and file handling logic --- src/bin/hook.rs | 4 +- src/commit.rs | 74 ++++++++++++++++------------ src/filesystem.rs | 122 ++++++++++++++++++++++++---------------------- src/install.rs | 20 ++------ src/model.rs | 60 ++++++++++++----------- src/reinstall.rs | 13 +++-- 6 files changed, 152 insertions(+), 141 deletions(-) diff --git a/src/bin/hook.rs b/src/bin/hook.rs index 1147b25..356607c 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -105,7 +105,7 @@ impl Args { bail!("No changes to commit"); } - let response = commit::generate(patch.to_string(), remaining_tokens, model).await?; + let response = commit::generate_commit_message(patch.to_string(), remaining_tokens, model).await?; std::fs::write(&self.commit_msg_file, response.response.trim())?; pb.finish_and_clear(); @@ -124,7 +124,7 @@ impl Args { .clone() .unwrap_or("gpt-4o".to_string()) .into(); - let used_tokens = commit::token_used(&model)?; + let used_tokens = commit::get_instruction_token_count(&model)?; let max_tokens = config::APP.max_tokens.unwrap_or(model.context_size()); let remaining_tokens = max_tokens.saturating_sub(used_tokens); diff --git a/src/commit.rs b/src/commit.rs index 18f4a9f..ab1141e 100644 --- a/src/commit.rs +++ b/src/commit.rs @@ -3,26 +3,28 @@ use anyhow::{bail, Result}; use crate::{config, openai, profile}; use crate::model::Model; -/// Returns the instruction template for the AI model. -/// This template guides the model in generating appropriate commit messages. -fn instruction() -> String { - profile!("Generate instruction template"); - format!("You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines: +const INSTRUCTION_TEMPLATE: &str = r#"You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines: - - Structure: Begin with a clear, present-tense summary. - - Content: Emphasize the changes and their rationale, excluding irrelevant details. - - Consistency: Maintain uniformity in tense, punctuation, and capitalization. - - Accuracy: Ensure the message accurately reflects the changes and their purpose. - - Present tense, imperative mood. (e.g., 'Add x to y' instead of 'Added x to y') - - Max {} chars in the output +- Structure: Begin with a clear, present-tense summary. +- Content: Emphasize the changes and their rationale, excluding irrelevant details. +- Consistency: Maintain uniformity in tense, punctuation, and capitalization. +- Accuracy: Ensure the message accurately reflects the changes and their purpose. +- Present tense, imperative mood. (e.g., 'Add x to y' instead of 'Added x to y') +- Max {} chars in the output - ## Output: +## Output: - Your output should be a commit message generated from the input diff and nothing else. +Your output should be a commit message generated from the input diff and nothing else. - ## Input: +## Input: - INPUT:", config::APP.max_commit_length.unwrap_or(72)) +INPUT:"#; + +/// Returns the instruction template for the AI model. +/// This template guides the model in generating appropriate commit messages. +fn get_instruction_template() -> String { + profile!("Generate instruction template"); + INSTRUCTION_TEMPLATE.replace("{}", &config::APP.max_commit_length.unwrap_or(72).to_string()) } /// Calculates the number of tokens used by the instruction template. @@ -32,9 +34,28 @@ fn instruction() -> String { /// /// # Returns /// * `Result` - The number of tokens used or an error -pub fn token_used(model: &Model) -> Result { +pub fn get_instruction_token_count(model: &Model) -> Result { profile!("Calculate instruction tokens"); - model.count_tokens(&instruction()) + model.count_tokens(&get_instruction_template()) +} + +/// Creates an OpenAI request for commit message generation. +/// +/// # Arguments +/// * `diff` - The git diff to generate a commit message for +/// * `max_tokens` - Maximum number of tokens allowed for the response +/// * `model` - The AI model to use for generation +/// +/// # Returns +/// * `openai::Request` - The prepared request +fn create_commit_request(diff: String, max_tokens: usize, model: Model) -> openai::Request { + profile!("Prepare OpenAI request"); + openai::Request { + system: get_instruction_template(), + prompt: diff, + max_tokens: max_tokens.try_into().unwrap_or(u16::MAX), + model + } } /// Generates a commit message using the AI model. @@ -48,23 +69,16 @@ pub fn token_used(model: &Model) -> Result { /// * `Result` - The generated commit message or an error /// /// # Errors -/// Returns an error if max_tokens is 0 or if the OpenAI API call fails -pub async fn generate(diff: String, max_tokens: usize, model: Model) -> Result { +/// Returns an error if: +/// - max_tokens is 0 +/// - OpenAI API call fails +pub async fn generate_commit_message(diff: String, max_tokens: usize, model: Model) -> Result { profile!("Generate commit message"); if max_tokens == 0 { - bail!("Max tokens cannot be zero") + bail!("Maximum token count must be greater than zero") } - let request = { - profile!("Prepare OpenAI request"); - openai::Request { - system: instruction(), - prompt: diff, - max_tokens: max_tokens.try_into().unwrap_or(u16::MAX), - model - } - }; - + let request = create_commit_request(diff, max_tokens, model); openai::call(request).await } diff --git a/src/filesystem.rs b/src/filesystem.rs index 6d8924d..8cb3acf 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -7,6 +7,9 @@ use git2::{Repository, RepositoryOpenFlags as Flags}; use crate::profile; +/// Error messages for filesystem operations +const ERR_CURRENT_DIR: &str = "Failed to get current directory"; + /// Represents the filesystem structure for git-ai. /// Handles paths for hooks and binaries. #[derive(Debug, Clone)] @@ -47,7 +50,7 @@ impl File { pub fn delete(&self) -> Result<()> { profile!("Delete file"); log::debug!("Removing file at {}", self); - fs::remove_file(&self.path).context(format!("Failed to remove file at {}", self)) + fs::remove_file(&self.path).with_context(|| format!("Failed to remove file at {}", self)) } /// Creates a symbolic link to the target file. @@ -57,10 +60,10 @@ impl File { /// /// # Returns /// * `Result<()>` - Success or an error if link creation fails - pub fn symlink(&self, target: File) -> Result<()> { + pub fn symlink(&self, target: &File) -> Result<()> { profile!("Create symlink"); log::debug!("Symlinking {} to {}", target, self); - symlink_unix(&target.path, &self.path).context(format!("Failed to symlink {} to {}", target, self)) + symlink_unix(&target.path, &self.path).with_context(|| format!("Failed to symlink {} to {}", target, self)) } /// Gets the relative path from the current directory. @@ -69,14 +72,13 @@ impl File { /// * `Result` - The relative path as a Dir or an error pub fn relative_path(&self) -> Result { profile!("Get relative file path"); - Dir::new( - self - .path - .strip_prefix(env::current_dir().context("Failed to get current directory")?) - .context(format!("Failed to strip prefix from {}", self.path.display()))? - .to_path_buf() - ) - .into() + let current_dir = env::current_dir().context(ERR_CURRENT_DIR)?; + let relative = self + .path + .strip_prefix(¤t_dir) + .with_context(|| format!("Failed to strip prefix from {}", self.path.display()))?; + + Ok(Dir::new(relative.to_path_buf())) } /// Gets the parent directory of the file. @@ -97,7 +99,8 @@ impl From<&File> for Dir { impl std::fmt::Display for File { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.relative_path().unwrap_or(self.into()).path.display()) + let path = self.relative_path().unwrap_or_else(|_| self.into()); + write!(f, "{}", path.path.display()) } } @@ -114,24 +117,6 @@ pub struct Dir { path: PathBuf } -impl std::fmt::Display for Dir { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.path.display()) - } -} - -impl From for Result { - fn from(dir: Dir) -> Result { - Ok(dir) - } -} - -impl From for Result { - fn from(filesystem: Filesystem) -> Result { - Ok(filesystem) - } -} - impl Dir { /// Creates a new Dir instance. /// @@ -157,7 +142,7 @@ impl Dir { pub fn create_dir_all(&self) -> Result<()> { profile!("Create directory recursively"); log::debug!("Creating directory at {}", self); - fs::create_dir_all(&self.path).context(format!("Failed to create directory at {}", self)) + fs::create_dir_all(&self.path).with_context(|| format!("Failed to create directory at {}", self)) } /// Gets the relative path from the current directory. @@ -166,14 +151,25 @@ impl Dir { /// * `Result` - The relative path or an error pub fn relative_path(&self) -> Result { profile!("Get relative directory path"); - Self::new( - self - .path - .strip_prefix(env::current_dir().context("Failed to get current directory")?) - .context(format!("Failed to strip prefix from {}", self.path.display()))? - .to_path_buf() - ) - .into() + let current_dir = env::current_dir().context(ERR_CURRENT_DIR)?; + let relative = self + .path + .strip_prefix(¤t_dir) + .with_context(|| format!("Failed to strip prefix from {}", self.path.display()))?; + + Ok(Self::new(relative.to_path_buf())) + } +} + +impl std::fmt::Display for Dir { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.path.display()) + } +} + +impl From for Result { + fn from(dir: Dir) -> Result { + Ok(dir) } } @@ -186,45 +182,53 @@ impl Filesystem { pub fn new() -> Result { profile!("Initialize filesystem"); + // Get current directory let current_dir = { profile!("Get current directory"); - env::current_dir().context("Failed to get current directory")? + env::current_dir().context(ERR_CURRENT_DIR)? }; + // Get executable path let git_ai_bin_path = { profile!("Get executable path"); env::current_exe().context("Failed to get current executable")? }; + // Open git repository let repo = { profile!("Open git repository"); - Repository::open_ext(current_dir.clone(), Flags::empty(), Vec::<&Path>::new()) - .context(format!("Failed to open repository at {}", current_dir.clone().display()))? + Repository::open_ext(¤t_dir, Flags::empty(), Vec::<&Path>::new()) + .with_context(|| format!("Failed to open repository at {}", current_dir.display()))? }; - let mut git_path = repo.path().to_path_buf(); - if git_path.is_relative() { - profile!("Convert relative git path to absolute"); - git_path = current_dir.join(git_path); - } + // Get git path and ensure it's absolute + let git_path = { + let mut path = repo.path().to_path_buf(); + if path.is_relative() { + profile!("Convert relative git path to absolute"); + path = current_dir.join(path); + } + path + }; + // Get hook binary path let git_ai_hook_bin_path = { profile!("Get hook binary path"); - git_ai_bin_path + let hook_path = git_ai_bin_path .parent() - .context(format!("Failed to get parent directory of {}", git_ai_bin_path.display()))? - .join("git-ai-hook") - }; + .with_context(|| format!("Failed to get parent directory of {}", git_ai_bin_path.display()))? + .join("git-ai-hook"); - if !git_ai_hook_bin_path.exists() { - bail!("Hook binary not found at {}", git_ai_hook_bin_path.display()); - } + if !hook_path.exists() { + bail!("Hook binary not found at {}", hook_path.display()); + } + hook_path + }; - Self { + Ok(Self { git_ai_hook_bin_path, git_hooks_path: git_path.join("hooks") - } - .into() + }) } /// Gets the path to the git-ai hook binary. @@ -233,7 +237,7 @@ impl Filesystem { /// * `Result` - The hook binary path or an error pub fn git_ai_hook_bin_path(&self) -> Result { profile!("Get hook binary file"); - File::new(self.git_ai_hook_bin_path.clone()).into() + Ok(File::new(self.git_ai_hook_bin_path.clone())) } /// Gets the path to the git hooks directory. @@ -255,6 +259,6 @@ impl Filesystem { bail!("Hooks directory not found at {}", self.git_hooks_path.display()); } - File::new(self.git_hooks_path.join("prepare-commit-msg")).into() + Ok(File::new(self.git_hooks_path.join("prepare-commit-msg"))) } } diff --git a/src/install.rs b/src/install.rs index 77754d9..5152384 100644 --- a/src/install.rs +++ b/src/install.rs @@ -1,27 +1,17 @@ use anyhow::{bail, Result}; use ai::filesystem::Filesystem; -use colored::Colorize; -use console::Emoji; - -const EMOJI: Emoji<'_, '_> = Emoji("🔗", ""); pub fn run() -> Result<()> { - let filesystem = Filesystem::new()?; - - if !filesystem.git_hooks_path().exists() { - filesystem.git_hooks_path().create_dir_all()?; - } - - let hook_file = filesystem.prepare_commit_msg_path()?; - let hook_bin = filesystem.git_ai_hook_bin_path()?; + let fs = Filesystem::new()?; + let hook_bin = fs.git_ai_hook_bin_path()?; + let hook_file = fs.prepare_commit_msg_path()?; if hook_file.exists() { bail!("Hook already exists at {}, please run 'git ai hook reinstall'", hook_file); } - hook_file.symlink(hook_bin)?; - - println!("{EMOJI} Hook symlinked successfully to {}", hook_file.to_string().italic()); + hook_file.symlink(&hook_bin)?; + println!("🔗 Hook symlinked successfully to {}", hook_file); Ok(()) } diff --git a/src/model.rs b/src/model.rs index cba86a8..308f639 100644 --- a/src/model.rs +++ b/src/model.rs @@ -9,10 +9,10 @@ use tiktoken_rs::model::get_context_size; use crate::profile; -// Model identifiers -const GPT4: &str = "gpt-4"; -const GPT4O: &str = "gpt-4o"; -const GPT4OMINI: &str = "gpt-4o-mini"; +// Model identifiers - using screaming case for constants +const MODEL_GPT4: &str = "gpt-4"; +const MODEL_GPT4_OPTIMIZED: &str = "gpt-4o"; +const MODEL_GPT4_MINI: &str = "gpt-4o-mini"; /// Represents the available AI models for commit message generation. /// Each model has different capabilities and token limits. @@ -20,9 +20,9 @@ const GPT4OMINI: &str = "gpt-4o-mini"; pub enum Model { /// Standard GPT-4 model GPT4, - /// Optimized GPT-4 model + /// Optimized GPT-4 model for better performance GPT4o, - /// Default model - Mini version of optimized GPT-4 + /// Default model - Mini version of optimized GPT-4 for faster processing #[default] GPT4oMini } @@ -38,10 +38,11 @@ impl Model { /// * `Result` - The number of tokens or an error pub fn count_tokens(&self, text: &str) -> Result { profile!("Count tokens"); + let model_str: &str = self.into(); Ok( self .context_size() - .saturating_sub(get_completion_max_tokens(self.into(), text)?) + .saturating_sub(get_completion_max_tokens(model_str, text)?) ) } @@ -51,52 +52,54 @@ impl Model { /// * `usize` - The maximum number of tokens the model can process pub fn context_size(&self) -> usize { profile!("Get context size"); - get_context_size(self.into()) + let model_str: &str = self.into(); + get_context_size(model_str) } /// Truncates the given text to fit within the specified token limit. /// /// # Arguments - /// * `diff` - The text to truncate + /// * `text` - The text to truncate /// * `max_tokens` - The maximum number of tokens allowed /// /// # Returns /// * `Result` - The truncated text or an error - pub(crate) fn truncate(&self, diff: &str, max_tokens: usize) -> Result { + pub(crate) fn truncate(&self, text: &str, max_tokens: usize) -> Result { profile!("Truncate text"); - self.walk_truncate(diff, max_tokens, usize::MAX) + self.walk_truncate(text, max_tokens, usize::MAX) } /// Recursively truncates text to fit within token limits while maintaining coherence. + /// Uses a binary search-like approach to find the optimal truncation point. /// /// # Arguments - /// * `diff` - The text to truncate + /// * `text` - The text to truncate /// * `max_tokens` - The maximum number of tokens allowed /// * `within` - The maximum allowed deviation from target token count /// /// # Returns /// * `Result` - The truncated text or an error - pub(crate) fn walk_truncate(&self, diff: &str, max_tokens: usize, within: usize) -> Result { + pub(crate) fn walk_truncate(&self, text: &str, max_tokens: usize, within: usize) -> Result { profile!("Walk truncate iteration"); - log::debug!("max_tokens: {}", max_tokens); - log::debug!("diff: {}", diff); - log::debug!("within: {}", within); + log::debug!("max_tokens: {}, within: {}", max_tokens, within); - let str = { + let truncated = { profile!("Split and join text"); - diff + text .split_whitespace() .take(max_tokens) .collect::>() .join(" ") }; - let offset = self.count_tokens(&str)?.saturating_sub(max_tokens); + let token_count = self.count_tokens(&truncated)?; + let offset = token_count.saturating_sub(max_tokens); if offset > within || offset == 0 { - Ok(str) + Ok(truncated) } else { - self.walk_truncate(diff, max_tokens + offset, within) + // Recursively adjust token count to get closer to target + self.walk_truncate(text, max_tokens + offset, within) } } } @@ -104,9 +107,9 @@ impl Model { impl From<&Model> for &str { fn from(model: &Model) -> Self { match model { - Model::GPT4o => GPT4O, - Model::GPT4 => GPT4, - Model::GPT4oMini => GPT4OMINI + Model::GPT4o => MODEL_GPT4_OPTIMIZED, + Model::GPT4 => MODEL_GPT4, + Model::GPT4oMini => MODEL_GPT4_MINI } } } @@ -116,10 +119,10 @@ impl FromStr for Model { fn from_str(s: &str) -> Result { match s.trim().to_lowercase().as_str() { - GPT4O => Ok(Model::GPT4o), - GPT4 => Ok(Model::GPT4), - GPT4OMINI => Ok(Model::GPT4oMini), - model => bail!("Invalid model: {}", model) + MODEL_GPT4_OPTIMIZED => Ok(Model::GPT4o), + MODEL_GPT4 => Ok(Model::GPT4), + MODEL_GPT4_MINI => Ok(Model::GPT4oMini), + model => bail!("Invalid model name: {}", model) } } } @@ -130,6 +133,7 @@ impl Display for Model { } } +// Implement conversion from string types to Model with fallback to default impl From<&str> for Model { fn from(s: &str) -> Self { s.parse().unwrap_or_default() diff --git a/src/reinstall.rs b/src/reinstall.rs index 350df05..9ce9159 100644 --- a/src/reinstall.rs +++ b/src/reinstall.rs @@ -6,21 +6,20 @@ use colored::*; const EMOJI: Emoji<'_, '_> = Emoji("🔗", ""); pub fn run() -> Result<()> { - let filesystem = Filesystem::new()?; + let fs = Filesystem::new()?; + let hook_bin = fs.git_ai_hook_bin_path()?; + let hook_file = fs.prepare_commit_msg_path()?; - if !filesystem.git_hooks_path().exists() { - filesystem.git_hooks_path().create_dir_all()?; + if !fs.git_hooks_path().exists() { + fs.git_hooks_path().create_dir_all()?; } - let hook_file = filesystem.prepare_commit_msg_path()?; - let hook_bin = filesystem.git_ai_hook_bin_path()?; - if hook_file.exists() { log::debug!("Removing existing hook file: {}", hook_file); hook_file.delete()?; } - hook_file.symlink(hook_bin)?; + hook_file.symlink(&hook_bin)?; println!( "{EMOJI} Hook symlinked successfully to {}", From fa47b5b611aa3d7c48c44e887f7646f73c6e6bf6 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:18:33 +0100 Subject: [PATCH 22/56] Implement configuration file management and update functions in App --- src/config.rs | 116 +++++++++++++++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 43 deletions(-) diff --git a/src/config.rs b/src/config.rs index 0efcc37..042723d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -8,6 +8,13 @@ use anyhow::{Context, Result}; use lazy_static::lazy_static; use console::Emoji; +// Constants +const DEFAULT_TIMEOUT: i64 = 30; +const DEFAULT_MAX_COMMIT_LENGTH: i64 = 72; +const DEFAULT_MAX_TOKENS: i64 = 2024; +const DEFAULT_MODEL: &str = "gpt-4o-mini"; +const DEFAULT_API_KEY: &str = ""; + #[derive(Debug, Default, Deserialize, PartialEq, Eq, Serialize)] pub struct App { pub openai_api_key: Option, @@ -17,42 +24,51 @@ pub struct App { pub timeout: Option } -impl App { - #[allow(dead_code)] - pub fn duration(&self) -> std::time::Duration { - std::time::Duration::from_secs(self.timeout.unwrap_or(30) as u64) - } +#[derive(Debug)] +pub struct ConfigPaths { + pub dir: PathBuf, + pub file: PathBuf } lazy_static! { - pub static ref CONFIG_DIR: PathBuf = home::home_dir() - .expect("Failed to determine home directory") - .join(".config/git-ai"); - #[derive(Debug)] - pub static ref APP: App = App::new().expect("Failed to load config"); - pub static ref CONFIG_PATH: PathBuf = CONFIG_DIR.join("config.ini"); + static ref PATHS: ConfigPaths = ConfigPaths::new(); + pub static ref APP: App = App::new().expect("Failed to load config"); +} + +impl ConfigPaths { + fn new() -> Self { + let dir = home::home_dir() + .expect("Failed to determine home directory") + .join(".config/git-ai"); + let file = dir.join("config.ini"); + Self { dir, file } + } + + fn ensure_exists(&self) -> Result<()> { + if !self.dir.exists() { + std::fs::create_dir_all(&self.dir).with_context(|| format!("Failed to create config directory at {:?}", self.dir))?; + } + if !self.file.exists() { + File::create(&self.file).with_context(|| format!("Failed to create config file at {:?}", self.file))?; + } + Ok(()) + } } impl App { pub fn new() -> Result { dotenv::dotenv().ok(); - - if !CONFIG_DIR.exists() { - std::fs::create_dir_all(&*CONFIG_DIR).with_context(|| format!("Failed to create config directory at {:?}", *CONFIG_DIR))?; - File::create(&*CONFIG_PATH).with_context(|| format!("Failed to create config file at {:?}", *CONFIG_PATH))?; - } else if !CONFIG_PATH.exists() { - File::create(&*CONFIG_PATH).with_context(|| format!("Failed to create config file at {:?}", *CONFIG_PATH))?; - } + PATHS.ensure_exists()?; let config = Config::builder() .add_source(config::Environment::with_prefix("APP").try_parsing(true)) - .add_source(config::File::new(CONFIG_PATH.to_string_lossy().as_ref(), FileFormat::Ini)) + .add_source(config::File::new(PATHS.file.to_string_lossy().as_ref(), FileFormat::Ini)) .set_default("language", "en")? - .set_default("timeout", 30)? - .set_default("max_commit_length", 72)? - .set_default("max_tokens", 2024)? - .set_default("model", "gpt-4o-mini")? - .set_default("openai_api_key", "")? + .set_default("timeout", DEFAULT_TIMEOUT)? + .set_default("max_commit_length", DEFAULT_MAX_COMMIT_LENGTH)? + .set_default("max_tokens", DEFAULT_MAX_TOKENS)? + .set_default("model", DEFAULT_MODEL)? + .set_default("openai_api_key", DEFAULT_API_KEY)? .build()?; config @@ -62,48 +78,62 @@ impl App { pub fn save(&self) -> Result<()> { let contents = serde_ini::to_string(&self).context(format!("Failed to serialize config: {:?}", self))?; - let mut file = File::create(&*CONFIG_PATH).with_context(|| format!("Failed to create config file at {:?}", *CONFIG_PATH))?; + let mut file = File::create(&PATHS.file).with_context(|| format!("Failed to create config file at {:?}", PATHS.file))?; file .write_all(contents.as_bytes()) .context("Failed to write config file") } + + pub fn update_model(&mut self, value: String) -> Result<()> { + self.model = Some(value); + self.save_with_message("model") + } + + pub fn update_max_tokens(&mut self, value: usize) -> Result<()> { + self.max_tokens = Some(value); + self.save_with_message("max-tokens") + } + + pub fn update_max_commit_length(&mut self, value: usize) -> Result<()> { + self.max_commit_length = Some(value); + self.save_with_message("max-commit-length") + } + + pub fn update_openai_api_key(&mut self, value: String) -> Result<()> { + self.openai_api_key = Some(value); + self.save_with_message("openai-api-key") + } + + fn save_with_message(&self, option: &str) -> Result<()> { + println!("{} Configuration option {} updated!", Emoji("✨", ":-)"), option); + self.save() + } } +// Public interface functions pub fn run_model(value: String) -> Result<()> { - let mut app = App::new()?; - app.model = value.into(); - println!("{} Configuration option model updated!", Emoji("✨", ":-)")); - app.save() + App::new()?.update_model(value) } pub fn run_max_tokens(max_tokens: usize) -> Result<()> { - let mut app = App::new()?; - app.max_tokens = max_tokens.into(); - println!("{} Configuration option max-tokens updated!", Emoji("✨", ":-)")); - app.save() + App::new()?.update_max_tokens(max_tokens) } pub fn run_max_commit_length(max_commit_length: usize) -> Result<()> { - let mut app = App::new()?; - app.max_commit_length = max_commit_length.into(); - println!("{} Configuration option max-commit-length updated!", Emoji("✨", ":-)")); - app.save() + App::new()?.update_max_commit_length(max_commit_length) } pub fn run_openai_api_key(value: String) -> Result<()> { - let mut app = App::new()?; - app.openai_api_key = Some(value); - println!("{} Configuration option openai-api-key updated!", Emoji("✨", ":-)")); - app.save() + App::new()?.update_openai_api_key(value) } pub fn run_reset() -> Result<()> { - if !CONFIG_PATH.exists() { + if !PATHS.file.exists() { eprintln!("{} Configuration file does not exist!", Emoji("🤷", ":-)")); return Ok(()); } - std::fs::remove_file(CONFIG_PATH.to_str().unwrap()).context("Failed to remove config file")?; + std::fs::remove_file(PATHS.file.to_str().unwrap()).context("Failed to remove config file")?; println!("{} Configuration reset!", Emoji("✨", ":-)")); Ok(()) } From c7778e649d9e9ba33f12a8abdbdafbf04c30f361 Mon Sep 17 00:00:00 2001 From: Linus Oleander <220827+oleander@users.noreply.github.com> Date: Fri, 7 Feb 2025 23:30:31 +0100 Subject: [PATCH 23/56] Implement parallel processing of diff data in PatchDiff trait --- src/hook.rs | 343 +++++++++++++++++++++++++++------------------------- 1 file changed, 176 insertions(+), 167 deletions(-) diff --git a/src/hook.rs b/src/hook.rs index d911439..3f9d033 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -16,7 +16,48 @@ use num_cpus; use crate::model::Model; use crate::profile; -// String pool for reusing allocations +// Constants +const MAX_POOL_SIZE: usize = 100; +const DEFAULT_STRING_CAPACITY: usize = 4096; +const PARALLEL_CHUNK_SIZE: usize = 10; + +// Types +type DiffData = Vec<(PathBuf, String, usize)>; + +// Error definitions +#[derive(Error, Debug)] +pub enum HookError { + #[error("Failed to open repository")] + OpenRepository, + + #[error("Failed to get patch")] + GetPatch, + + #[error("Empty diff output")] + EmptyDiffOutput, + + #[error("Failed to write commit message")] + WriteCommitMessage, + + #[error(transparent)] + Anyhow(#[from] anyhow::Error) +} + +// CLI Arguments +#[derive(StructOpt, Debug)] +#[structopt(name = "commit-msg-hook", about = "A tool for generating commit messages.")] +pub struct Args { + pub commit_msg_file: PathBuf, + + #[structopt(short = "t", long = "type")] + pub commit_type: Option, + + #[structopt(short = "s", long = "sha1")] + pub sha1: Option +} + +// Memory management +#[derive(Debug)] struct StringPool { strings: Vec, capacity: usize @@ -36,13 +77,13 @@ impl StringPool { fn put(&mut self, mut string: String) { string.clear(); - if self.strings.len() < 100 { - // Limit pool size + if self.strings.len() < MAX_POOL_SIZE { self.strings.push(string); } } } +// File operations traits pub trait FilePath { fn is_empty(&self) -> Result { self.read().map(|s| s.is_empty()) @@ -54,19 +95,19 @@ pub trait FilePath { impl FilePath for PathBuf { fn write(&self, msg: String) -> Result<()> { - let mut file = File::create(self)?; - file.write_all(msg.as_bytes())?; - Ok(()) + File::create(self)? + .write_all(msg.as_bytes()) + .map_err(Into::into) } fn read(&self) -> Result { - let mut file = File::open(self)?; let mut contents = String::new(); - file.read_to_string(&mut contents)?; + File::open(self)?.read_to_string(&mut contents)?; Ok(contents) } } +// Git operations traits trait DiffDeltaPath { fn path(&self) -> PathBuf; } @@ -82,6 +123,7 @@ impl DiffDeltaPath for git2::DiffDelta<'_> { } } +// String conversion traits pub trait Utf8String { fn to_utf8(&self) -> String; } @@ -98,55 +140,21 @@ impl Utf8String for [u8] { } } +// Patch generation traits pub trait PatchDiff { fn to_patch(&self, max_token_count: usize, model: Model) -> Result; + fn collect_diff_data(&self) -> Result>; } impl PatchDiff for Diff<'_> { fn to_patch(&self, max_tokens: usize, model: Model) -> Result { profile!("Generating patch diff"); - // Create thread pool for parallel operations - let thread_pool = rayon::ThreadPoolBuilder::new() - .num_threads(num_cpus::get()) - .build() - .context("Failed to create thread pool")?; - - // Step 1: Collect all diff data into thread-safe structures - let string_pool = Arc::new(Mutex::new(StringPool::new(4096))); - let files = Arc::new(Mutex::new(HashMap::new())); + // Step 1: Collect diff data (non-parallel) + let files = self.collect_diff_data()?; - { - profile!("Processing diff changes"); - self.print(DiffFormat::Patch, |diff, _hunk, line| { - let content = line.content().to_utf8(); - let mut line_content = string_pool.lock().get(); - match line.origin() { - '+' | '-' => line_content.push_str(&content), - _ => { - line_content.push_str("context: "); - line_content.push_str(&content); - } - }; - - let mut files = files.lock(); - let entry = files - .entry(diff.path()) - .or_insert_with(|| String::with_capacity(4096)); - entry.push_str(&line_content); - string_pool.lock().put(line_content); - true - })?; - } - - // Step 2: Move data out of thread-safe containers and sort by token count - let files = Arc::try_unwrap(files) - .expect("Arc still has multiple owners") - .into_inner(); - - // Pre-compute token counts and sort files by size - let model = Arc::new(model); - let mut files_with_tokens: Vec<_> = files + // Step 2: Prepare files for processing + let mut files_with_tokens: DiffData = files .into_iter() .map(|(path, content)| { let token_count = model.count_tokens(&content).unwrap_or_default(); @@ -154,86 +162,40 @@ impl PatchDiff for Diff<'_> { }) .collect(); - // Sort by token count (smaller diffs first) files_with_tokens.sort_by_key(|(_, _, count)| *count); + // Step 3: Process files in parallel + let thread_pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_cpus::get()) + .build() + .context("Failed to create thread pool")?; + let total_files = files_with_tokens.len(); let remaining_tokens = Arc::new(AtomicUsize::new(max_tokens)); let result_chunks = Arc::new(Mutex::new(Vec::with_capacity(total_files))); let processed_files = Arc::new(AtomicUsize::new(0)); - // Step 3: Parallel processing of files - { - profile!("Processing and truncating diffs"); - - // Process files in parallel chunks - const CHUNK_SIZE: usize = 10; - let chunks: Vec<_> = files_with_tokens - .chunks(CHUNK_SIZE) - .map(|chunk| chunk.to_vec()) - .collect(); - - // Process chunks in parallel - let processing_result: Result<()> = thread_pool.install(|| { - chunks.par_iter().try_for_each(|chunk| { - let mut chunk_results = Vec::with_capacity(chunk.len()); - - for (path, content, token_count) in chunk { - // Calculate global file position and remaining files atomically - let current_file_num = processed_files.fetch_add(1, Ordering::SeqCst); - let files_remaining = total_files.saturating_sub(current_file_num); - - if files_remaining == 0 { - continue; - } - - // Calculate token budget with proper synchronization - let total_remaining = remaining_tokens.load(Ordering::SeqCst); - let max_tokens_per_file = total_remaining.saturating_div(files_remaining); - - if max_tokens_per_file == 0 { - continue; - } - - let token_count = *token_count; - let allocated_tokens = token_count.min(max_tokens_per_file); - - // Try to claim tokens atomically with proper ordering - match remaining_tokens.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { - if current >= allocated_tokens { - Some(current - allocated_tokens) - } else { - None - } - }) { - Ok(_) => { - // Process the file with allocated tokens - let processed_content = if token_count > allocated_tokens { - model.truncate(content, allocated_tokens)? - } else { - content.clone() - }; - chunk_results.push((path.clone(), processed_content)); - } - Err(_) => continue // Skip if we couldn't claim tokens - } - } - - // Store results in order - if !chunk_results.is_empty() { - result_chunks.lock().extend(chunk_results); - } - Ok(()) - }) - }); - - // Handle any processing errors - processing_result?; - } + let chunks: Vec<_> = files_with_tokens + .chunks(PARALLEL_CHUNK_SIZE) + .map(|chunk| chunk.to_vec()) + .collect(); + + let model = Arc::new(model); + + thread_pool.install(|| { + chunks + .par_iter() + .try_for_each(|chunk| process_chunk(chunk, &model, total_files, &processed_files, &remaining_tokens, &result_chunks)) + })?; - // Combine results in order + // Step 4: Combine results let results = result_chunks.lock(); - let mut final_result = String::with_capacity(results.iter().map(|(_, content)| content.len()).sum()); + let mut final_result = String::with_capacity( + results + .iter() + .map(|(_, content): &(PathBuf, String)| content.len()) + .sum() + ); for (_, content) in results.iter() { if !final_result.is_empty() { @@ -244,24 +206,114 @@ impl PatchDiff for Diff<'_> { Ok(final_result) } + + fn collect_diff_data(&self) -> Result> { + profile!("Processing diff changes"); + + let string_pool = Arc::new(Mutex::new(StringPool::new(DEFAULT_STRING_CAPACITY))); + let files = Arc::new(Mutex::new(HashMap::new())); + + self.print(DiffFormat::Patch, |diff, _hunk, line| { + let content = line.content().to_utf8(); + let mut line_content = string_pool.lock().get(); + + match line.origin() { + '+' | '-' => line_content.push_str(&content), + _ => { + line_content.push_str("context: "); + line_content.push_str(&content); + } + }; + + let mut files = files.lock(); + let entry = files + .entry(diff.path()) + .or_insert_with(|| String::with_capacity(DEFAULT_STRING_CAPACITY)); + entry.push_str(&line_content); + string_pool.lock().put(line_content); + true + })?; + + Ok( + Arc::try_unwrap(files) + .expect("Arc still has multiple owners") + .into_inner() + ) + } +} + +fn process_chunk( + chunk: &[(PathBuf, String, usize)], model: &Arc, total_files: usize, processed_files: &AtomicUsize, + remaining_tokens: &AtomicUsize, result_chunks: &Arc>> +) -> Result<()> { + let mut chunk_results = Vec::with_capacity(chunk.len()); + + for (path, content, token_count) in chunk { + let current_file_num = processed_files.fetch_add(1, Ordering::SeqCst); + let files_remaining = total_files.saturating_sub(current_file_num); + + if files_remaining == 0 { + continue; + } + + let total_remaining = remaining_tokens.load(Ordering::SeqCst); + let max_tokens_per_file = total_remaining.saturating_div(files_remaining); + + if max_tokens_per_file == 0 { + continue; + } + + let token_count = *token_count; + let allocated_tokens = token_count.min(max_tokens_per_file); + + if remaining_tokens + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { + if current >= allocated_tokens { + Some(current - allocated_tokens) + } else { + None + } + }) + .is_ok() + { + let processed_content = if token_count > allocated_tokens { + model.truncate(content, allocated_tokens)? + } else { + content.clone() + }; + chunk_results.push((path.clone(), processed_content)); + } + } + + if !chunk_results.is_empty() { + result_chunks.lock().extend(chunk_results); + } + Ok(()) } pub trait PatchRepository { fn to_patch(&self, tree: Option>, max_token_count: usize, model: Model) -> Result; fn to_diff(&self, tree: Option>) -> Result>; + fn configure_diff_options(&self, opts: &mut DiffOptions); } impl PatchRepository for Repository { fn to_patch(&self, tree: Option, max_token_count: usize, model: Model) -> Result { profile!("Repository patch generation"); - // Generate diff and process it - let diff = self.to_diff(tree)?; - diff.to_patch(max_token_count, model) + self.to_diff(tree)?.to_patch(max_token_count, model) } fn to_diff(&self, tree: Option>) -> Result> { profile!("Git diff generation"); let mut opts = DiffOptions::new(); + self.configure_diff_options(&mut opts); + + self + .diff_tree_to_index(tree.as_ref(), None, Some(&mut opts)) + .context("Failed to get diff") + } + + fn configure_diff_options(&self, opts: &mut DiffOptions) { opts .ignore_whitespace_change(true) .recurse_untracked_dirs(false) @@ -277,44 +329,9 @@ impl PatchRepository for Repository { .context_lines(0) .patience(true) .minimal(true); - - self - .diff_tree_to_index(tree.as_ref(), None, Some(&mut opts)) - .context("Failed to get diff") } } -#[derive(StructOpt, Debug)] -#[structopt(name = "commit-msg-hook", about = "A tool for generating commit messages.")] -pub struct Args { - pub commit_msg_file: PathBuf, - - #[structopt(short = "t", long = "type")] - pub commit_type: Option, - - #[structopt(short = "s", long = "sha1")] - pub sha1: Option -} - -#[derive(Error, Debug)] -pub enum HookError { - #[error("Failed to open repository")] - OpenRepository, - - #[error("Failed to get patch")] - GetPatch, - - #[error("Empty diff output")] - EmptyDiffOutput, - - #[error("Failed to write commit message")] - WriteCommitMessage, - - // anyhow - #[error(transparent)] - Anyhow(#[from] anyhow::Error) -} - #[cfg(test)] mod tests { use super::*; @@ -337,21 +354,15 @@ mod tests { #[test] fn test_string_pool_put_and_get() { let mut pool = StringPool::new(10); - - // Put a string in the pool let mut s1 = String::with_capacity(10); s1.push_str("test"); pool.put(s1); - // The pool should have one string assert_eq!(pool.strings.len(), 1); - // Get should return the pooled string let s2 = pool.get(); assert_eq!(s2.capacity(), 10); - assert_eq!(s2.len(), 0); // String should be cleared - - // Pool should be empty now + assert_eq!(s2.len(), 0); assert_eq!(pool.strings.len(), 0); } @@ -359,12 +370,10 @@ mod tests { fn test_string_pool_limit() { let mut pool = StringPool::new(10); - // Add more than 100 strings for _ in 0..150 { pool.put(String::with_capacity(10)); } - // Pool should be limited to 100 strings - assert_eq!(pool.strings.len(), 100); + assert_eq!(pool.strings.len(), MAX_POOL_SIZE); } } From da74cd4130658fd2989a5dfbe2a2f6be74a18107 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:05:13 +0100 Subject: [PATCH 24/56] ``` feat: Add Finetune functionality and related files - Introduce finetune.rs to manage fine-tuning workflows with OpenAI. - Create finetune.md for documenting the finetuning process. - Update Cargo.toml and Cargo.lock with necessary dependencies for finetuning. - Add stats.json to track various parameters in the finetuning process. ``` --- .gitignore | 4 +- CONTRIBUTING.md | 16 +- Cargo.lock | 596 +++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 59 +++-- README.md | 1 + finetune.md | 85 +++++++ resources/prompt.md | 28 ++- src/bin/hook.rs | 15 +- src/commit.rs | 32 +-- src/config.rs | 28 --- src/finetune.rs | 444 +++++++++++++++++++++++++++++++++ src/install.rs | 1 + src/main.rs | 144 ++++++++++- src/openai.rs | 183 +++++++++++++- src/profiling.rs | 6 +- src/reinstall.rs | 2 + src/uninstall.rs | 3 + stats.json | 9 + 18 files changed, 1535 insertions(+), 121 deletions(-) create mode 100644 finetune.md create mode 100644 src/finetune.rs create mode 100644 stats.json diff --git a/.gitignore b/.gitignore index acd9c9f..5defefc 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ http-cacache/* .secrets .env.local ${env:TMPDIR} -bin/ +tmp/ +finetune_verify.jsonl +finetune_train.jsonl diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1461d91..efceb6e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,7 +24,9 @@ For each new feature or bug fix, create a new branch based on the `main` branch. ```bash git checkout -b feature/my-new-feature ``` + or + ```bash git checkout -b fix/my-bug-fix ``` @@ -53,15 +55,15 @@ If you're adding a new feature or changing existing functionality, update the RE 1. Push your changes to your fork: - ```bash - git push origin feature/my-new-feature - ``` + ```bash + git push origin feature/my-new-feature + ``` - or + or - ```bash - git push origin fix/my-bug-fix - ``` + ```bash + git push origin fix/my-bug-fix + ``` 2. Go to your fork on GitHub and click the "Pull Request" button to submit your changes. diff --git a/Cargo.lock b/Cargo.lock index 7e18f82..859af05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -35,6 +35,56 @@ dependencies = [ "winapi", ] +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys 0.59.0", +] + [[package]] name = "anyhow" version = "1.0.95" @@ -130,6 +180,15 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -157,6 +216,31 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +[[package]] +name = "bon" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe7acc34ff59877422326db7d6f2d845a582b16396b6b08194942bf34c6528ab" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4159dd617a7fbc9be6a692fe69dc2954f8e6bb6bb5e4d7578467441390d77fd0" +dependencies = [ + "darling", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.98", +] + [[package]] name = "bstr" version = "1.11.3" @@ -186,6 +270,15 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +[[package]] +name = "caseless" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6fd507454086c8edfd769ca6ada439193cdb209c7681712ef6275cccbfe5d8" +dependencies = [ + "unicode-normalization", +] + [[package]] name = "cc" version = "1.2.12" @@ -219,11 +312,58 @@ dependencies = [ "atty", "bitflags 1.3.2", "strsim 0.8.0", - "textwrap", + "textwrap 0.11.0", "unicode-width", "vec_map", ] +[[package]] +name = "clap" +version = "4.5.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e77c3243bd94243c03672cb5154667347c457ca271254724f9f393aee1c05ff" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim 0.11.1", + "terminal_size", +] + +[[package]] +name = "clap_derive" +version = "4.5.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "colored" version = "3.0.0" @@ -233,6 +373,25 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "comrak" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52602e10393cfaaf8accaf707f2da743dc22cbe700a343ff8dbc9e5e04bc6b74" +dependencies = [ + "bon", + "caseless", + "clap 4.5.28", + "entities", + "memchr", + "shell-words", + "slug", + "syntect", + "typed-arena", + "unicode_categories", + "xdg", +] + [[package]] name = "config" version = "0.15.7" @@ -293,6 +452,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -369,6 +537,15 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_builder" version = "0.20.2" @@ -400,6 +577,33 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "deunicode" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00" + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -447,6 +651,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "entities" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" + [[package]] name = "env_filter" version = "0.1.3" @@ -493,6 +703,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "fancy-regex" version = "0.13.0" @@ -510,6 +730,16 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "flate2" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -635,6 +865,15 @@ dependencies = [ "slab", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -671,11 +910,14 @@ dependencies = [ "anyhow", "async-openai", "colored", + "comrak", "config", "console", "ctrlc", + "dirs", "dotenv", "env_logger", + "futures", "git2", "home", "indicatif", @@ -684,6 +926,7 @@ dependencies = [ "num_cpus", "openssl-sys", "parking_lot", + "pulldown-cmark", "rand 0.9.0", "rayon", "reqwest", @@ -692,7 +935,9 @@ dependencies = [ "serde_ini", "serde_json", "structopt", + "syntect", "tempfile", + "textwrap 0.16.1", "thiserror 2.0.11", "tiktoken-rs", "tokio", @@ -751,6 +996,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -1069,6 +1320,12 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itoa" version = "1.0.14" @@ -1118,6 +1375,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.8.0", + "libc", +] + [[package]] name = "libz-sys" version = "1.1.21" @@ -1130,6 +1397,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -1245,6 +1518,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num_cpus" version = "1.16.0" @@ -1276,6 +1555,28 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "onig" +version = "6.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +dependencies = [ + "bitflags 1.3.2", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "openssl" version = "0.10.70" @@ -1330,6 +1631,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-multimap" version = "0.7.3" @@ -1393,12 +1700,31 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "plist" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42cf17e9a1800f5f396bc67d193dc9411b59012a5876445ef450d449881e1016" +dependencies = [ + "base64 0.22.1", + "indexmap", + "quick-xml", + "serde", + "time", +] + [[package]] name = "portable-atomic" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.20" @@ -1408,6 +1734,16 @@ dependencies = [ "zerocopy 0.7.35", ] +[[package]] +name = "prettyplease" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" +dependencies = [ + "proc-macro2", + "syn 2.0.98", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -1441,6 +1777,34 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pulldown-cmark" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f86ba2052aebccc42cbbb3ed234b8b13ce76f75c3551a303cb2bcffcff12bb14" +dependencies = [ + "bitflags 2.8.0", + "getopts", + "memchr", + "pulldown-cmark-escape", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + +[[package]] +name = "quick-xml" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3a6e5838b60e0e8fa7a43f22ade549a37d61f8bdbe636d0d7816191de969c2" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.38" @@ -1457,7 +1821,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", + "rand_chacha 0.3.1", "rand_core 0.6.4", ] @@ -1467,6 +1831,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ + "rand_chacha 0.9.0", "rand_core 0.9.0", "zerocopy 0.8.17", ] @@ -1481,6 +1846,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.0", +] + [[package]] name = "rand_core" version = "0.6.4" @@ -1496,6 +1871,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" dependencies = [ + "getrandom 0.3.1", "zerocopy 0.8.17", ] @@ -1528,6 +1904,17 @@ dependencies = [ "bitflags 2.8.0", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.11", +] + [[package]] name = "regex" version = "1.11.1" @@ -1728,6 +2115,15 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.27" @@ -1831,12 +2227,27 @@ dependencies = [ "serde", ] +[[package]] +name = "shell-words" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + [[package]] name = "slab" version = "0.4.9" @@ -1846,12 +2257,28 @@ dependencies = [ "autocfg", ] +[[package]] +name = "slug" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724" +dependencies = [ + "deunicode", + "wasm-bindgen", +] + [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + [[package]] name = "socket2" version = "0.5.8" @@ -1892,7 +2319,7 @@ version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" dependencies = [ - "clap", + "clap 2.34.0", "lazy_static", "structopt-derive", ] @@ -1903,7 +2330,7 @@ version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" dependencies = [ - "heck", + "heck 0.3.3", "proc-macro-error", "proc-macro2", "quote", @@ -1958,6 +2385,29 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "syntect" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1" +dependencies = [ + "bincode", + "bitflags 1.3.2", + "fancy-regex 0.11.0", + "flate2", + "fnv", + "once_cell", + "onig", + "plist", + "regex-syntax", + "serde", + "serde_derive", + "serde_json", + "thiserror 1.0.69", + "walkdir", + "yaml-rust", +] + [[package]] name = "system-configuration" version = "0.6.1" @@ -1993,6 +2443,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "terminal_size" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5352447f921fda68cf61b4101566c0bdb5104eff6804d0678e5227580ab6a4e9" +dependencies = [ + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -2002,6 +2462,17 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "textwrap" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -2051,13 +2522,44 @@ dependencies = [ "anyhow", "base64 0.21.7", "bstr", - "fancy-regex", + "fancy-regex 0.13.0", "lazy_static", "parking_lot", "regex", "rustc-hash", ] +[[package]] +name = "time" +version = "0.3.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -2077,6 +2579,21 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.43.0" @@ -2087,7 +2604,9 @@ dependencies = [ "bytes", "libc", "mio", + "parking_lot", "pin-project-lite", + "signal-hook-registry", "socket2", "tokio-macros", "windows-sys 0.52.0", @@ -2218,6 +2737,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "unicase" version = "2.8.1" @@ -2230,6 +2755,21 @@ version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -2242,6 +2782,12 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "untrusted" version = "0.9.0" @@ -2271,6 +2817,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "vcpkg" version = "0.2.15" @@ -2295,6 +2847,16 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2439,6 +3001,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -2587,6 +3158,21 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "xdg" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "yoke" version = "0.7.5" diff --git a/Cargo.toml b/Cargo.toml index dfa41c3..867c8a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,38 +25,57 @@ name = "git-ai-hook" path = "src/bin/hook.rs" [dependencies] -anyhow = { version = "1.0.95", default-features = false } -async-openai = { version = "0.27.2", default-features = false } +# Core functionality +anyhow = "1.0.95" +thiserror = "2.0.11" +tokio = { version = "1.43", features = ["full"] } +futures = "0.3" +parking_lot = "0.12.3" + +# CLI and UI +structopt = "0.3.26" colored = "3.0.0" -config = { version = "0.15.7", default-features = false, features = ["ini"] } console = { version = "0.15.10", default-features = false } -ctrlc = "3.4.5" -dotenv = "0.15.0" -env_logger = { version = "0.11.6", default-features = false } -git2 = { version = "0.20.0", default-features = false } -home = "0.5.11" indicatif = { version = "0.17.11", default-features = false } -lazy_static = "1.5.0" log = "0.4.25" -reqwest = { version = "0.12.12", default-features = true } -serde = { version = "1", default-features = false } +env_logger = { version = "0.11.6", default-features = false } + +# Git integration +git2 = { version = "0.20.0", default-features = false } + +# Configuration +config = { version = "0.15.7", default-features = false, features = ["ini"] } +dotenv = "0.15.0" +serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0.217" serde_ini = "0.2.0" -serde_json = "1.0.138" -structopt = "0.3.26" -thiserror = "2.0.11" -tokio = { version = "1.43.0", features = ["rt-multi-thread"] } -tiktoken-rs = { version = "0.6.0" } +serde_json = "1.0" + +# OpenAI integration +async-openai = { version = "0.27.2", default-features = false } +tiktoken-rs = "0.6.0" +reqwest = { version = "0.12.12", default-features = true } + +# System utilities openssl-sys = { version = "0.9.105", features = ["vendored"] } rayon = "1.10.0" -parking_lot = "0.12.3" num_cpus = "1.16.0" +rand = "0.9" +ctrlc = "3.4.5" +lazy_static = "1.5.0" +home = "0.5.11" +dirs = "6.0" + +# Syntax highlighting and markdown rendering +syntect = { version = "5.2", default-features = false, features = [ + "default-fancy", +] } +pulldown-cmark = "0.12" +comrak = "0.35" +textwrap = "0.16" [dev-dependencies] tempfile = "3.16.0" -anyhow = { version = "1.0.95", default-features = false } -git2 = { version = "0.20.0", default-features = false } -rand = { version = "0.9.0", default-features = false } [profile.release] codegen-units = 1 diff --git a/README.md b/README.md index 43b36f3..ee5cc50 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ cargo install --path . ### Advanced Configuration Customize Git AI's behavior with these commands: + - `git-ai config set max-commit-length ` (default: 72): Set the maximum length of commit messages. - `git-ai config set max-tokens ` (default: 512): Set the maximum number of tokens for the assistant. - `git-ai config set model ` (default: "gpt-3.5-turbo"): Set the OpenAI model to use. diff --git a/finetune.md b/finetune.md new file mode 100644 index 0000000..ca4be33 --- /dev/null +++ b/finetune.md @@ -0,0 +1,85 @@ +# Finetune.rs Workflow + +Here's a summary of the workflow in `finetune.rs`: + +- Uses GPT4o-mini model for OpenAI +- Generates training data in JSONL format for fine-tuning +- Splits data into training and verification sets + +1. **Initialize and Setup** + + - Creates empty train and verify files + - Sets up thread pool for parallel processing + - Initializes progress bars and counters + - Loads system prompt from `resources/prompt.md` + +2. **Collect Commit History** + + - Opens local git repository + - Walks through commit history + - Filters commits based on: + - Message length (20-500 chars) + - Non-merge commits only + - Diff size within limits (default 5000 chars) + - Collects valid commits up to 3x target number + - Shuffles commits for randomization + +3. **Process Commits in Parallel** + + - Spawns worker threads based on CPU count or user setting + - Each worker processes a subset of commits + - For each commit: + - Checks for duplicate messages + - Rates commit quality (0.0-1.0) + - Cleans up commit message + - Tracks approved commits with progress bar + - Stops when target number reached + +4. **Clean and Rate Commit Messages** + + - Cleanup process: + - Takes first line only + - Removes ticket references and tags + - Ensures proper capitalization + - Drops type prefixes + - Keeps messages short and meaningful + - Quality rating based on: + - Message format and clarity + - Diff alignment + - Present tense and active voice + - Description accuracy + +5. **Generate Training Data** + + - Creates JSONL entries with: + - System prompt + - Diff as user input + - Cleaned message as assistant output + - Splits data: + - 50% for training + - 50% for verification + - Prevents duplicate messages + - Validates cleaned messages + +6. **Track Progress and Results** + - Shows real-time progress: + - Commit collection progress + - Message cleaning progress + - Approval status + - Reports final statistics: + - Total commits processed + - Training examples count + - Verification examples count + - Distribution between files + +Key Features: + +- Parallel processing for better performance +- Double quality check (original and cleaned messages) +- Duplicate prevention at multiple stages +- Progress visualization with spinners and bars +- Verbose mode for detailed logging + +The key difference from optimize.rs is that finetune.rs focuses on generating high-quality training data for fine-tuning, while optimize.rs focuses on improving the system prompt itself. + +Note: Run sync, not async diff --git a/resources/prompt.md b/resources/prompt.md index 523b51d..c61a4a7 100644 --- a/resources/prompt.md +++ b/resources/prompt.md @@ -1,18 +1,22 @@ -You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines: +You are an AI assistant that generates concise and precise git commit messages based solely on the provided diffs. Please adhere to the following enhanced guidelines: -- Structure: Begin with a clear, present-tense summary. -- Content: While you should use the surrounding context to understand the changes, your commit message should ONLY describe the lines marked with + or -. -- Understanding: Use the context (unmarked lines) to understand the purpose and impact of the changes, but do not mention unchanged code in the commit message. -- Changes: Only describe what was actually changed (added, removed, or modified). -- Consistency: Maintain uniformity in tense, punctuation, and capitalization. -- Accuracy: Ensure the message accurately reflects the changes and their purpose. -- Present tense, imperative mood. (e.g., "Add x to y" instead of "Added x to y") -- Max {{max_commit_length}} chars in the output +- **Structure**: Begin with a clear, present-tense summary of the change in the non-conventional commit format. Use a single-line summary for the change, followed by a blank line. As a best practice, consider including only one bullet point detailing context if essential, but refrain from excessive elaboration. -## Output: +- **Content**: Commit messages must strictly describe the lines marked with + or - in the diff. Avoid including surrounding context, unmarked lines, or irrelevant details. Explicitly refrain from mentioning implications, reasoning, motivations, or any external context not explicitly reflected in the diff. Make sure to avoid any interpretations or assumptions beyond what is clearly stated. -Your output should be a commit message generated from the input diff and nothing else. While you should use the surrounding context to understand the changes, your message should only describe what was actually modified (+ or - lines). +- **Changes**: Clearly articulate what was added, removed, or modified based solely on what is visible in the diff. Use phrases such as "Based only on the changes visible in the diff, this commit..." to emphasize an evidence-based approach while outlining changes directly. -## Input: +- **Consistency**: Ensure uniformity in tense, punctuation, and capitalization throughout the message. Use present tense and imperative form, such as "Add x to y" instead of "Added x to y". + +- **Clarity & Brevity**: Craft messages that are clear and easy to understand, succinctly capturing the essence of the changes. Limit the message to a maximum of 72 characters for the first line, while ensuring enough detail is provided on the primary action taken. Avoid jargon; provide plain definitions for any necessary technical terms. + +- **Accuracy & Hallucination Prevention**: Rigorously reflect only the changes visible in the diff. Avoid any speculation or inclusion of content not substantiated by the diff. Restate the necessity for messages to focus exclusively on aspects evident in the diff and to completely avoid extrapolation or assumptions about motivations or implications. + +- **Review Process**: Before finalizing each commit message: + 1. Verify that the message accurately reflects only the changes in the diff + 2. Confirm the commit type matches the actual changes + 3. Check that the message follows the structure and formatting guidelines + 4. Ensure no external context or assumptions are included + 5. Validate that the message is clear and understandable to other developers INPUT: diff --git a/src/bin/hook.rs b/src/bin/hook.rs index 356607c..4b25e0a 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -105,7 +105,7 @@ impl Args { bail!("No changes to commit"); } - let response = commit::generate_commit_message(patch.to_string(), remaining_tokens, model).await?; + let response = commit::generate(patch.to_string(), remaining_tokens, model).await?; std::fs::write(&self.commit_msg_file, response.response.trim())?; pb.finish_and_clear(); @@ -124,7 +124,7 @@ impl Args { .clone() .unwrap_or("gpt-4o".to_string()) .into(); - let used_tokens = commit::get_instruction_token_count(&model)?; + let used_tokens = commit::token_used(&model)?; let max_tokens = config::APP.max_tokens.unwrap_or(model.context_size()); let remaining_tokens = max_tokens.saturating_sub(used_tokens); @@ -153,16 +153,21 @@ impl Args { #[tokio::main] async fn main() -> Result<()> { - env_logger::init(); + if std::env::var("RUST_LOG").is_ok() { + env_logger::init(); + } let time = std::time::Instant::now(); let args = Args::from_args(); - log::debug!("Arguments: {:?}", args); + if log::log_enabled!(log::Level::Debug) { + log::debug!("Arguments: {:?}", args); + } + if let Err(err) = args.execute().await { eprintln!("{} ({:?})", err, time.elapsed()); exit(1); - } else { + } else if log::log_enabled!(log::Level::Debug) { log::debug!("Completed in {:?}", time.elapsed()); } diff --git a/src/commit.rs b/src/commit.rs index ab1141e..450a1e9 100644 --- a/src/commit.rs +++ b/src/commit.rs @@ -1,30 +1,16 @@ +use std::fs; + use anyhow::{bail, Result}; use crate::{config, openai, profile}; use crate::model::Model; -const INSTRUCTION_TEMPLATE: &str = r#"You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines: - -- Structure: Begin with a clear, present-tense summary. -- Content: Emphasize the changes and their rationale, excluding irrelevant details. -- Consistency: Maintain uniformity in tense, punctuation, and capitalization. -- Accuracy: Ensure the message accurately reflects the changes and their purpose. -- Present tense, imperative mood. (e.g., 'Add x to y' instead of 'Added x to y') -- Max {} chars in the output - -## Output: - -Your output should be a commit message generated from the input diff and nothing else. - -## Input: - -INPUT:"#; - /// Returns the instruction template for the AI model. /// This template guides the model in generating appropriate commit messages. fn get_instruction_template() -> String { profile!("Generate instruction template"); - INSTRUCTION_TEMPLATE.replace("{}", &config::APP.max_commit_length.unwrap_or(72).to_string()) + let prompt = fs::read_to_string("resources/prompt.md").unwrap_or_else(|_| String::from("Failed to read prompt.md")); + prompt.replace("{{max_commit_length}}", &config::APP.max_commit_length.unwrap_or(72).to_string()) } /// Calculates the number of tokens used by the instruction template. @@ -72,13 +58,17 @@ fn create_commit_request(diff: String, max_tokens: usize, model: Model) -> opena /// Returns an error if: /// - max_tokens is 0 /// - OpenAI API call fails -pub async fn generate_commit_message(diff: String, max_tokens: usize, model: Model) -> Result { +pub async fn generate(patch: String, remaining_tokens: usize, model: Model) -> Result { profile!("Generate commit message"); - if max_tokens == 0 { + if remaining_tokens == 0 { bail!("Maximum token count must be greater than zero") } - let request = create_commit_request(diff, max_tokens, model); + let request = create_commit_request(patch, remaining_tokens, model); openai::call(request).await } + +pub fn token_used(model: &Model) -> Result { + get_instruction_token_count(model) +} diff --git a/src/config.rs b/src/config.rs index 042723d..7bcc230 100644 --- a/src/config.rs +++ b/src/config.rs @@ -109,31 +109,3 @@ impl App { self.save() } } - -// Public interface functions -pub fn run_model(value: String) -> Result<()> { - App::new()?.update_model(value) -} - -pub fn run_max_tokens(max_tokens: usize) -> Result<()> { - App::new()?.update_max_tokens(max_tokens) -} - -pub fn run_max_commit_length(max_commit_length: usize) -> Result<()> { - App::new()?.update_max_commit_length(max_commit_length) -} - -pub fn run_openai_api_key(value: String) -> Result<()> { - App::new()?.update_openai_api_key(value) -} - -pub fn run_reset() -> Result<()> { - if !PATHS.file.exists() { - eprintln!("{} Configuration file does not exist!", Emoji("🤷", ":-)")); - return Ok(()); - } - - std::fs::remove_file(PATHS.file.to_str().unwrap()).context("Failed to remove config file")?; - println!("{} Configuration reset!", Emoji("✨", ":-)")); - Ok(()) -} diff --git a/src/finetune.rs b/src/finetune.rs new file mode 100644 index 0000000..bd633bd --- /dev/null +++ b/src/finetune.rs @@ -0,0 +1,444 @@ +use std::fs; +use std::io::Write; +use std::sync::Arc; +use std::collections::HashSet; + +use anyhow::{Context, Result}; +use colored::*; +use git2::{DiffOptions, Repository}; +use rand::prelude::*; +use serde::{Deserialize, Serialize}; +use structopt::StructOpt; +use tokio::sync::{mpsc, Mutex}; +use tokio::task; +use indicatif::{ProgressBar, ProgressStyle}; +use ai::model::Model; +use ai::openai; + +/// Represents command-line arguments for fine-tuning +#[derive(Debug, Clone, Deserialize, Serialize, StructOpt)] +pub struct FinetuneArgs { + #[structopt(long, default_value = "resources/prompt.md")] + pub prompt_file: String, + + #[structopt(long, default_value = "finetune_train.jsonl")] + pub train_file: String, + + #[structopt(long, default_value = "finetune_verify.jsonl")] + pub verify_file: String, + + #[structopt(long, default_value = "50")] + pub num_commits: u32, + + #[structopt(long)] + pub parallel_requests: Option, + + #[structopt(long, default_value = "0.8")] + pub quality_threshold: f32, + + #[structopt(long)] + pub verbose: bool, + + #[structopt(long, default_value = "5000")] + pub max_diff_size: usize +} + +#[derive(Debug, Serialize, Deserialize)] +struct Message { + role: String, + content: String +} + +#[derive(Debug, Serialize, Deserialize)] +struct TrainingExample { + messages: Vec +} + +/// Track the types of changes in a commit +#[derive(Debug)] +struct CommitChangeTypes { + #[allow(dead_code)] + has_additions: bool, + #[allow(dead_code)] + has_deletions: bool, + #[allow(dead_code)] + has_modifications: bool, + #[allow(dead_code)] + has_renames: bool, + #[allow(dead_code)] + has_file_mode_changes: bool +} + +/// Simple container for commit info +#[derive(Debug)] +struct CommitInfo { + message: String, + diff: String, + #[allow(dead_code)] + change_types: CommitChangeTypes +} + +pub async fn run(args: FinetuneArgs) -> Result<()> { + println!("🔄 Starting fine-tuning data export..."); + + // Reset (truncate) the output files + fs::write(&args.train_file, "")?; + fs::write(&args.verify_file, "")?; + + // Track seen messages to prevent duplicates + let seen_messages = Arc::new(Mutex::new(HashSet::new())); + + // 1. Load system prompt + let prompt_content = + fs::read_to_string(&args.prompt_file).with_context(|| format!("Failed to read prompt file: {}", args.prompt_file))?; + + // 2. Open local repository and setup commit processing + println!("📚 Collecting commit history..."); + let repo = Repository::open(".")?; + let mut revwalk = repo.revwalk()?; + revwalk.push_head()?; + + let mut total_checked = 0; + let mut valid_commits = 0; + let mut commit_data = Vec::new(); + + let collect_pb = ProgressBar::new_spinner(); + collect_pb.set_style( + ProgressStyle::default_spinner() + .template("{spinner:.green} Processing commits: {pos} found ({msg})") + .unwrap() + ); + + // Process commits as we find them + for oid in revwalk { + total_checked += 1; + if let Ok(id) = oid { + if let Ok(commit) = repo.find_commit(id) { + let message = commit.message().unwrap_or(""); + if (20..500).contains(&message.len()) && commit.parent_count() == 1 { + let parent = commit.parent(0)?; + let parent_tree = parent.tree()?; + let commit_tree = commit.tree()?; + let mut diff_opts = DiffOptions::new(); + let diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), Some(&mut diff_opts))?; + + let mut diff_text = String::new(); + let mut total_diff_size = 0; + let mut should_skip = false; + + diff.print(git2::DiffFormat::Patch, |_, _, line| { + if let Ok(content) = std::str::from_utf8(line.content()) { + total_diff_size += content.len(); + if total_diff_size <= args.max_diff_size { + diff_text.push(line.origin()); + diff_text.push_str(content); + } else { + should_skip = true; + } + } + true + })?; + + if !should_skip { + commit_data.push((message.to_string(), diff_text)); + valid_commits += 1; + collect_pb.set_position(valid_commits as u64); + collect_pb.set_message(format!("latest: {:.40}...", message)); + } + } + } + } + if valid_commits >= args.num_commits as usize * 3 { + break; + } + } + + if args.verbose { + println!(" Checked {} commits, found {} valid ones", total_checked, valid_commits); + } + collect_pb.finish_with_message(format!("Found {} commits to process", valid_commits)); + + // Shuffle the collected commits for randomization + let mut rng = rand::rngs::ThreadRng::default(); + commit_data.shuffle(&mut rng); + let commit_data = Arc::new(commit_data); + + // Setup processing channel + let num_workers = args.parallel_requests.unwrap_or_else(num_cpus::get); + let (tx, mut rx) = mpsc::channel(num_workers * 2); + let approved_commits = Arc::new(Mutex::new(0usize)); + let threshold = args.quality_threshold; + + // Create progress bar for approved commits + let process_pb = ProgressBar::new(args.num_commits as u64); + process_pb.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} approved ({eta})") + .unwrap() + .progress_chars("#>-") + ); + + // Spawn workers for quality checking + let mut workers = Vec::new(); + for worker_id in 0..num_workers { + let tx = tx.clone(); + let approved = Arc::clone(&approved_commits); + let seen = Arc::clone(&seen_messages); + let pb = process_pb.clone(); + let verbose = args.verbose; + let target_commits = args.num_commits; + let commit_data = Arc::clone(&commit_data); + let start_idx = worker_id * commit_data.len() / num_workers; + let end_idx = ((worker_id + 1) * commit_data.len() / num_workers).min(commit_data.len()); + + let worker = task::spawn(async move { + for (message, diff) in commit_data[start_idx..end_idx].iter() { + let current_approved = { + let count = approved.lock().await; + *count + }; + if current_approved >= target_commits as usize { + break; + } + let is_duplicate = { + let mut seen = seen.lock().await; + if seen.contains(message) { + true + } else { + seen.insert(message.clone()); + false + } + }; + if !is_duplicate { + if let Ok(score) = rate_commit_quality(&CommitInfo { + message: message.clone(), + diff: diff.clone(), + change_types: CommitChangeTypes { + has_additions: false, + has_deletions: false, + has_modifications: false, + has_renames: false, + has_file_mode_changes: false + } + }) + .await + { + if score >= threshold { + if let Ok(cleaned_message) = cleanup_commit_message(message).await { + let mut count = approved.lock().await; + *count += 1; + pb.set_position(*count as u64); + if verbose { + println!("✓ {} (score: {:.2})", cleaned_message.bright_green(), score); + } + if tx.send((message.clone(), diff.clone())).await.is_err() { + break; + } + } + } + } + } + } + }); + workers.push(worker); + } + drop(tx); + + // Process approved commits + let mut approved_count = 0; + let train_size = args.num_commits / 2; + let mut train_file = fs::OpenOptions::new() + .create(true) + .append(true) + .open(&args.train_file)?; + let mut verify_file = fs::OpenOptions::new() + .create(true) + .append(true) + .open(&args.verify_file)?; + + while let Some((message, diff)) = rx.recv().await { + if approved_count >= args.num_commits as usize { + break; + } + let cleaned_message = cleanup_commit_message(&message).await?; + if cleaned_message.trim().is_empty() { + continue; + } + let is_duplicate = { + let mut seen = seen_messages.lock().await; + if seen.contains(&cleaned_message) { + true + } else { + seen.insert(cleaned_message.clone()); + false + } + }; + if is_duplicate { + continue; + } + // Run scoring on the cleaned output + let cleaned_score = rate_cleaned_commit_message(&cleaned_message).await?; + if args.verbose { + println!("Cleaned: {} (score: {:.2})", cleaned_message, cleaned_score); + } + let example = TrainingExample { + messages: vec![ + Message { + role: "system".to_string(), + content: prompt_content.clone() + }, + Message { role: "user".to_string(), content: diff }, + Message { + role: "assistant".to_string(), + content: cleaned_message + }, + ] + }; + let json = serde_json::to_string(&example)?; + if approved_count < train_size as usize { + writeln!(train_file, "{}", json)?; + } else { + writeln!(verify_file, "{}", json)?; + } + approved_count += 1; + } + + for worker in workers { + worker.await?; + } + process_pb.finish(); + + println!("\n✨ Successfully exported {} training examples:", approved_count); + println!(" - {} training examples in {}", train_size, args.train_file); + println!(" - {} verification examples in {}", args.num_commits - train_size, args.verify_file); + + Ok(()) +} + +/// Cleanup commit message using GPT4oMini +async fn cleanup_commit_message(original_msg: &str) -> Result { + if original_msg.trim().is_empty() { + return Ok(String::new()); + } + let first_line = original_msg + .lines() + .next() + .unwrap_or("") + .trim() + .trim_start_matches("```") + .trim_end_matches("```") + .trim_start_matches("plaintext") + .trim_start_matches("git") + .trim(); + let system_prompt = "\ +You are an expert at cleaning up git commit messages. \ +Your task is to:\n\ +1. Remove any ticket references or extraneous tags\n\ +2. Keep it short, focusing on meaningful description\n\ +3. Do not end the message with a period\n\ +4. Always start with a capitalized verb (Add, Fix, Update, etc)\n\ +5. Drop the type prefix if it is present\n\ +6. Return ONLY the cleaned message without any formatting or backticks"; + let req = openai::Request { + system: system_prompt.to_string(), + prompt: first_line.to_string(), + max_tokens: 100, + model: Model::GPT4oMini + }; + let response = openai::call(req).await?; + let cleaned = response + .response + .trim() + .trim_start_matches("```") + .trim_end_matches("```") + .trim_start_matches("plaintext") + .trim_start_matches("git") + .trim() + .to_string(); + if cleaned.is_empty() + || cleaned.to_lowercase().contains("please") + || cleaned.to_lowercase().contains("provide") + || cleaned.to_lowercase().contains("didn't") + || cleaned.to_lowercase().contains("error") + || cleaned.to_lowercase().contains("missing") + || cleaned.to_lowercase().contains("sorry") + || cleaned.to_lowercase().contains("unable") + || cleaned.to_lowercase().contains("could not") + || cleaned.to_lowercase().contains("cannot") + || cleaned.to_lowercase().contains("failed") + || cleaned.len() > 100 + { + return Ok(String::new()); + } + let message = if cleaned.contains(": ") { + let parts: Vec<&str> = cleaned.splitn(2, ": ").collect(); + parts.get(1).unwrap_or(&cleaned.as_str()).trim().to_string() + } else { + cleaned + }; + let mut chars = message.chars(); + Ok(if let Some(first_char) = chars.next() { + if first_char.is_lowercase() { + first_char.to_uppercase().collect::() + chars.as_str() + } else { + message + } + } else { + message + }) +} + +/// Rate commit quality using GPT4oMini +async fn rate_commit_quality(commit_info: &CommitInfo) -> Result { + let system_prompt = "\ +You are an expert at evaluating git commit quality. Your task is to rate this commit from 0.0 to 1.0 based on: + +1. Commit Message Quality (50% of score): + - Is the first line concise (under 72 chars)? + - If present, is the body descriptive and separated by blank line? + - Is the message present tense? + - Is the message written in the active voice? + - Is the message clear and concise? + +2. Diff Alignment (50% of score): + - Does the message accurately describe the changes in the diff? + - Are all significant changes reflected in the message? + - Is the scope of changes consistent with the message? + +Scoring Guide: +- 0.0-0.3: Poor quality (wrong format, unclear or misleading, conventional commit format) +- 0.4-0.6: Mediocre quality (basic description) +- 0.7-0.8: Good quality (follows format, clear message, mostly aligned with changes) +- 0.9-1.0: Excellent (perfect format and description of changes) + +Return ONLY a number between 0.0 and 1.0"; + let prompt = format!( + "Evaluate this commit:\n\nCommit Message:\n{}\n\nCode Changes:\n{}\n\nScore (0.0-1.0):", + commit_info.message, commit_info.diff + ); + let req = openai::Request { + system: system_prompt.to_string(), + prompt, + max_tokens: 10, + model: Model::GPT4oMini + }; + let response = openai::call(req).await?; + let score = response.response.trim().parse::().unwrap_or(0.0); + Ok(score.clamp(0.0, 1.0)) +} + +/// Rate cleaned commit message quality using GPT4oMini +async fn rate_cleaned_commit_message(cleaned_message: &str) -> Result { + let system_prompt = "\ +You are an expert at evaluating cleaned git commit messages. Rate the quality of this commit message on a scale from 0.0 to 1.0, based solely on clarity, conciseness, and adherence to conventional commit style guidelines. Return ONLY a number between 0.0 and 1.0."; + let prompt = format!("Cleaned Commit Message:\n{}\nScore (0.0-1.0):", cleaned_message); + let req = openai::Request { + system: system_prompt.to_string(), + prompt, + max_tokens: 10, + model: Model::GPT4oMini + }; + let response = openai::call(req).await?; + let score = response.response.trim().parse::().unwrap_or(0.0); + Ok(score.clamp(0.0, 1.0)) +} diff --git a/src/install.rs b/src/install.rs index 5152384..dc32602 100644 --- a/src/install.rs +++ b/src/install.rs @@ -1,6 +1,7 @@ use anyhow::{bail, Result}; use ai::filesystem::Filesystem; +#[allow(dead_code)] pub fn run() -> Result<()> { let fs = Filesystem::new()?; let hook_bin = fs.git_ai_hook_bin_path()?; diff --git a/src/main.rs b/src/main.rs index 6cb3ff9..57bdb26 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,17 +3,26 @@ mod install; mod reinstall; mod config; +use std::path::PathBuf; + use structopt::StructOpt; use anyhow::Result; use dotenv::dotenv; +use crate::config::App; + +mod finetune; +use crate::finetune::FinetuneArgs; + #[derive(StructOpt)] #[structopt(name = "git-ai", about = "A git extension that uses OpenAI to generate commit messages")] enum Cli { #[structopt(about = "Installs the git-ai hook")] Hook(HookSubcommand), #[structopt(about = "Sets or gets configuration values")] - Config(ConfigSubcommand) + Config(ConfigSubcommand), + #[structopt(about = "Exports training data for fine-tuning")] + Finetune(FinetuneArgs) } #[derive(StructOpt)] @@ -65,6 +74,120 @@ struct Model { value: String } +#[derive(Debug, StructOpt)] +#[structopt(name = "git-ai")] +pub struct Args { + #[structopt(subcommand)] + #[allow(dead_code)] + cmd: Command +} + +#[derive(Debug, StructOpt)] +pub enum Command { + #[structopt(name = "optimize")] + Optimize { + #[structopt(long, default_value = "resources/prompt.md")] + prompt_file: String, + + #[structopt(long, default_value = "stats.json")] + stats_file: String, + + #[structopt(long, default_value = "tmp")] + temp_dir: String, + + #[structopt(long, default_value = "100")] + iterations: u32, + + #[structopt(long, default_value = "0.8")] + threshold: f32, + + #[structopt(long, default_value = "ai")] + scoring_mode: String, + + #[structopt(long)] + verbose: bool + } // ... other commands ... +} + +// Hook installation functions +fn run_install() -> Result<()> { + let hook_path = PathBuf::from(".git/hooks/prepare-commit-msg"); + let current_exe = std::env::current_exe()?; + let hook_binary = current_exe.parent().unwrap().join("git-ai-hook"); + + if hook_path.exists() { + std::fs::remove_file(&hook_path)?; + } + + std::os::unix::fs::symlink(&hook_binary, &hook_path)?; + println!("🔗 Hook symlinked successfully to \x1B[3m{}\x1B[0m", hook_path.display()); + + Ok(()) +} + +fn run_uninstall() -> Result<()> { + let hook_path = PathBuf::from(".git/hooks/prepare-commit-msg"); + + if hook_path.exists() { + std::fs::remove_file(&hook_path)?; + println!("🗑️ Hook uninstalled successfully from \x1B[3m{}\x1B[0m", hook_path.display()); + } else { + println!("⚠️ No hook found at \x1B[3m{}\x1B[0m", hook_path.display()); + } + + Ok(()) +} + +fn run_reinstall() -> Result<()> { + run_uninstall()?; + run_install()?; + Ok(()) +} + +// Config management functions +fn run_config_reset() -> Result<()> { + let config_dir = dirs::config_dir() + .ok_or_else(|| anyhow::anyhow!("Could not find config directory"))? + .join("git-ai"); + + if config_dir.exists() { + std::fs::remove_dir_all(&config_dir)?; + println!("🗑️ Configuration reset successfully"); + } else { + println!("⚠️ No configuration found to reset"); + } + + Ok(()) +} + +fn run_config_model(value: String) -> Result<()> { + let mut app = App::new()?; + app.update_model(value.clone())?; + println!("✅ Model set to: {}", value); + Ok(()) +} + +fn run_config_max_tokens(max_tokens: usize) -> Result<()> { + let mut app = App::new()?; + app.update_max_tokens(max_tokens)?; + println!("✅ Max tokens set to: {}", max_tokens); + Ok(()) +} + +fn run_config_max_commit_length(max_commit_length: usize) -> Result<()> { + let mut app = App::new()?; + app.update_max_commit_length(max_commit_length)?; + println!("✅ Max commit length set to: {}", max_commit_length); + Ok(()) +} + +fn run_config_openai_api_key(value: String) -> Result<()> { + let mut app = App::new()?; + app.update_openai_api_key(value)?; + println!("✅ OpenAI API key updated"); + Ok(()) +} + #[tokio::main(flavor = "multi_thread")] async fn main() -> Result<()> { dotenv().ok(); @@ -75,37 +198,40 @@ async fn main() -> Result<()> { Cli::Hook(sub) => match sub { HookSubcommand::Install => { - install::run()?; + run_install()?; } HookSubcommand::Uninstall => { - uninstall::run()?; + run_uninstall()?; } HookSubcommand::Reinstall => { - reinstall::run()?; + run_reinstall()?; } }, Cli::Config(config) => match config { ConfigSubcommand::Reset => { - config::run_reset()?; + run_config_reset()?; } ConfigSubcommand::Set(set) => match set { SetSubcommand::Model(model) => { - config::run_model(model.value)?; + run_config_model(model.value)?; } SetSubcommand::MaxTokens { max_tokens } => { - config::run_max_tokens(max_tokens)?; + run_config_max_tokens(max_tokens)?; } SetSubcommand::MaxCommitLength { max_commit_length } => { - config::run_max_commit_length(max_commit_length)?; + run_config_max_commit_length(max_commit_length)?; } SetSubcommand::OpenaiApiKey { value } => { - config::run_openai_api_key(value)?; + run_config_openai_api_key(value)?; } }, }, + Cli::Finetune(args) => { + finetune::run(args).await?; + } } Ok(()) diff --git a/src/openai.rs b/src/openai.rs index 3cf7200..7827ab4 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,11 +1,17 @@ use async_openai::types::{ChatCompletionRequestSystemMessageArgs, ChatCompletionRequestUserMessageArgs, CreateChatCompletionRequestArgs}; use async_openai::config::OpenAIConfig; use async_openai::Client; -use anyhow::{Context, Result}; +use async_openai::error::OpenAIError; +use anyhow::{anyhow, Context, Result}; +use colored::*; use crate::{config, profile}; use crate::model::Model; +const MAX_CONTEXT_LENGTH: usize = 128000; +const BUFFER_TOKENS: usize = 30000; // Large buffer for safety +const MAX_ATTEMPTS: usize = 3; + #[derive(Debug, Clone, PartialEq)] pub struct Response { pub response: String @@ -19,16 +25,139 @@ pub struct Request { pub model: Model } +/// Generates an improved commit message using the provided prompt and diff +pub async fn generate_commit_message(diff: &str, prompt: &str, file_context: &str, author: &str, date: &str) -> Result { + profile!("Generate commit message"); + let system_prompt = format!( + "You are an expert at writing clear, concise git commit messages. \ + Your task is to generate a commit message for the following code changes.\n\n\ + {}\n\n\ + Consider:\n\ + - Author: {}\n\ + - Date: {}\n\ + - Files changed: {}\n", + prompt, author, date, file_context + ); + + let response = call(Request { + system: system_prompt, + prompt: format!("Generate a commit message for this diff:\n\n{}", diff), + max_tokens: 256, + model: Model::GPT4oMini + }) + .await?; + + Ok(response.response.trim().to_string()) +} + +/// Scores a commit message against the original using AI evaluation +pub async fn score_commit_message(message: &str, original: &str) -> Result { + profile!("Score commit message"); + let system_prompt = "You are an expert at evaluating git commit messages. Score the following commit message on these criteria: + - Accuracy (0-1): How well does it describe the actual changes? + - Clarity (0-1): How clear and understandable is the message? + - Brevity (0-1): Is it concise while being informative? + - Categorization (0-1): Does it properly categorize the type of change? + + Return ONLY a JSON object containing these scores and brief feedback."; + + let response = call(Request { + system: system_prompt.to_string(), + prompt: format!("Original commit message:\n{}\n\nGenerated commit message:\n{}", original, message), + max_tokens: 512, + model: Model::GPT4oMini + }) + .await?; + + // Parse the JSON response to get the overall score + let parsed: serde_json::Value = serde_json::from_str(&response.response).context("Failed to parse scoring response as JSON")?; + + let accuracy = parsed["accuracy"].as_f64().unwrap_or(0.0) as f32; + let clarity = parsed["clarity"].as_f64().unwrap_or(0.0) as f32; + let brevity = parsed["brevity"].as_f64().unwrap_or(0.0) as f32; + let categorization = parsed["categorization"].as_f64().unwrap_or(0.0) as f32; + + Ok((accuracy + clarity + brevity + categorization) / 4.0) +} + +/// Optimizes a prompt based on performance metrics +pub async fn optimize_prompt(current_prompt: &str, performance_metrics: &str) -> Result { + profile!("Optimize prompt"); + let system_prompt = "You are an expert at optimizing prompts for AI systems. \ + Your task is to improve a prompt used for generating git commit messages \ + based on performance metrics. Return ONLY the improved prompt text."; + + let response = call(Request { + system: system_prompt.to_string(), + prompt: format!( + "Current prompt:\n{}\n\nPerformance metrics:\n{}\n\n\ + Suggest an improved version of this prompt that addresses any weaknesses \ + shown in the metrics while maintaining its strengths.", + current_prompt, performance_metrics + ), + max_tokens: 1024, + model: Model::GPT4oMini + }) + .await?; + + Ok(response.response.trim().to_string()) +} + +fn truncate_to_fit(text: &str, max_tokens: usize, model: &Model) -> Result { + let token_count = model.count_tokens(text)?; + if token_count <= max_tokens { + return Ok(text.to_string()); + } + + let lines: Vec<&str> = text.lines().collect(); + + // Try increasingly aggressive truncation until we fit + for attempt in 0..MAX_ATTEMPTS { + let portion_size = match attempt { + 0 => lines.len() / 8, // First try: Keep 25% (12.5% each end) + 1 => lines.len() / 12, // Second try: Keep ~16% (8% each end) + _ => lines.len() / 20 // Final try: Keep 10% (5% each end) + }; + + let mut truncated = Vec::new(); + truncated.extend(lines.iter().take(portion_size)); + truncated.push("... (truncated for length) ..."); + truncated.extend(lines.iter().rev().take(portion_size).rev()); + + let result = truncated.join("\n"); + let new_token_count = model.count_tokens(&result)?; + + if new_token_count <= max_tokens { + return Ok(result); + } + } + + // If all attempts failed, return a minimal version + let mut minimal = Vec::new(); + minimal.extend(lines.iter().take(lines.len() / 50)); + minimal.push("... (severely truncated for length) ..."); + minimal.extend(lines.iter().rev().take(lines.len() / 50).rev()); + Ok(minimal.join("\n")) +} + pub async fn call(request: Request) -> Result { profile!("OpenAI API call"); - let api_key = config::APP - .openai_api_key - .clone() - .context("Failed to get OpenAI API key, please run `git-ai config set openai-api")?; + let api_key = config::APP.openai_api_key.clone().context(format!( + "{} OpenAI API key not found.\n Run: {}", + "ERROR:".bold().bright_red(), + "git-ai config set openai-api-key ".yellow() + ))?; let config = OpenAIConfig::new().with_api_key(api_key); let client = Client::with_config(config); + // Calculate available tokens for content + let system_tokens = request.model.count_tokens(&request.system)?; + let available_tokens = MAX_CONTEXT_LENGTH.saturating_sub(system_tokens + BUFFER_TOKENS + request.max_tokens as usize); + + // Truncate prompt if needed + let truncated_prompt = truncate_to_fit(&request.prompt, available_tokens, &request.model)?; + let request = CreateChatCompletionRequestArgs::default() .max_tokens(request.max_tokens) .model(request.model.to_string()) @@ -38,7 +167,7 @@ pub async fn call(request: Request) -> Result { .build()? .into(), ChatCompletionRequestUserMessageArgs::default() - .content(request.prompt) + .content(truncated_prompt) .build()? .into() ]) @@ -46,11 +175,43 @@ pub async fn call(request: Request) -> Result { { profile!("OpenAI request/response"); - let response = client - .chat() - .create(request) - .await - .context("Failed to create chat completion")?; + let response = match client.chat().create(request).await { + Ok(response) => response, + Err(err) => { + let error_msg = match err { + OpenAIError::ApiError(e) => + format!( + "{} {}\n {}\n\nDetails:\n {}\n\nSuggested Actions:\n 1. {}\n 2. {}\n 3. {}", + "ERROR:".bold().bright_red(), + "OpenAI API error:".bright_white(), + e.message.dimmed(), + "Failed to create chat completion.".dimmed(), + "Ensure your OpenAI API key is valid".yellow(), + "Check your account credits".yellow(), + "Verify OpenAI service availability".yellow() + ), + OpenAIError::Reqwest(e) => + format!( + "{} {}\n {}\n\nDetails:\n {}\n\nSuggested Actions:\n 1. {}\n 2. {}", + "ERROR:".bold().bright_red(), + "Network error:".bright_white(), + e.to_string().dimmed(), + "Failed to connect to OpenAI service.".dimmed(), + "Check your internet connection".yellow(), + "Verify OpenAI service is not experiencing downtime".yellow() + ), + _ => + format!( + "{} {}\n {}\n\nDetails:\n {}", + "ERROR:".bold().bright_red(), + "Unexpected error:".bright_white(), + err.to_string().dimmed(), + "An unexpected error occurred while communicating with OpenAI.".dimmed() + ), + }; + return Err(anyhow!(error_msg)); + } + }; let content = response .choices diff --git a/src/profiling.rs b/src/profiling.rs index bd68bbe..37e4482 100644 --- a/src/profiling.rs +++ b/src/profiling.rs @@ -19,8 +19,10 @@ impl Profile { impl Drop for Profile { fn drop(&mut self) { - let duration = self.elapsed(); - eprintln!("{}: {:.2?}", self.name.blue(), duration); + if log::log_enabled!(log::Level::Debug) { + let duration = self.elapsed(); + eprintln!("{}: {:.2?}", self.name.blue(), duration); + } } } diff --git a/src/reinstall.rs b/src/reinstall.rs index 9ce9159..f572d3e 100644 --- a/src/reinstall.rs +++ b/src/reinstall.rs @@ -3,8 +3,10 @@ use anyhow::Result; use ai::filesystem::Filesystem; use colored::*; +#[allow(dead_code)] const EMOJI: Emoji<'_, '_> = Emoji("🔗", ""); +#[allow(dead_code)] pub fn run() -> Result<()> { let fs = Filesystem::new()?; let hook_bin = fs.git_ai_hook_bin_path()?; diff --git a/src/uninstall.rs b/src/uninstall.rs index cf599b3..09f6b3b 100644 --- a/src/uninstall.rs +++ b/src/uninstall.rs @@ -9,6 +9,7 @@ use git2::{Repository, RepositoryOpenFlags as Flags}; use thiserror::Error; #[derive(Error, Debug)] +#[allow(dead_code)] pub enum InstallError { #[error("Failed to get current directory")] CurrentDir, @@ -18,8 +19,10 @@ pub enum InstallError { HookExists(PathBuf) } +#[allow(dead_code)] const EMOJI: Emoji<'_, '_> = Emoji("🔗", ""); +#[allow(dead_code)] pub fn run() -> Result<()> { let current_dir = env::current_dir().context(InstallError::CurrentDir)?; let repo = Repository::open_ext(current_dir, Flags::empty(), Vec::<&Path>::new()).context(InstallError::OpenRepo)?; diff --git a/stats.json b/stats.json new file mode 100644 index 0000000..6b676ff --- /dev/null +++ b/stats.json @@ -0,0 +1,9 @@ +{ + "iterations": [], + "best_prompt": "", + "best_score": 0.0, + "prompt_scores": { + "You are an AI assistant that generates concise and precise git commit messages based solely on the provided diffs. Please adhere to the following enhanced guidelines:\n\n- **Structure**: Begin with a clear, present-tense summary of the change in the conventional commit format. Use a single-line summary for the change, followed by a blank line. As a best practice, consider including only one bullet point detailing context if essential, but refrain from excessive elaboration.\n\n- **Content**: Commit messages must strictly describe the lines marked with + or - in the diff. Avoid including surrounding context, unmarked lines, or irrelevant details. Explicitly refrain from mentioning implications, reasoning, motivations, or any external context not explicitly reflected in the diff. Make sure to avoid any interpretations or assumptions beyond what is clearly stated.\n\n- **Changes**: Clearly articulate what was added, removed, or modified based solely on what is visible in the diff. Use phrases such as \"Based only on the changes visible in the diff, this commit...\" to emphasize an evidence-based approach while outlining changes directly.\n\n- **Commit Types**: Categorize the change accurately using conventional commit message formatting. Use types such as `feat:`, `fix:`, `chore:`, etc., based strictly on the changes included in the diff. Reinforce that only one commit type should be reflected per message. If uncertain about the appropriate category, default to using `chore:` accompanied by a note stating 'clarification needed'. Include explicit examples of each type and their common scenarios.\n\n- **Consistency**: Ensure uniformity in tense, punctuation, and capitalization throughout the message. Use present tense and imperative form, such as \"Add x to y\" instead of \"Added x to y\". \n\n- **Clarity & Brevity**: Craft messages that are clear and easy to understand, succinctly capturing the essence of the changes. Limit the message to a maximum of {{max_commit_length}} characters while ensuring enough detail is provided on the primary action taken. Avoid jargon; provide plain definitions for any necessary technical terms.\n\n- **Accuracy & Hallucination Prevention**: Rigorously reflect only the changes visible in the diff. Avoid any speculation or inclusion of content not substantiated by the diff. Restate the necessity for messages to focus exclusively on aspects evident in the diff and to completely avoid extrapolation or assumptions about motivations or implications.\n\n- **Review Process**: After generating a commit message, compare it against the diff and the": 1.0, + "You are an AI assistant that generates concise and precise git commit messages based solely on the provided diffs. Please adhere to the following enhanced guidelines:\n\n- **Structure**: Begin with a clear, present-tense summary of the change in the conventional commit format. Use a single-line summary for the change, followed by a blank line. As a best practice, consider including only one bullet point detailing context if essential, but refrain from excessive elaboration.\n\n- **Content**: Commit messages must strictly describe the lines marked with + or - in the diff. Avoid including surrounding context, unmarked lines, or irrelevant details. Explicitly refrain from mentioning implications, reasoning, motivations, or any external context not explicitly reflected in the diff. Make sure to avoid any interpretations or assumptions beyond what is clearly stated.\n\n- **Changes**: Clearly articulate what was added, removed, or modified based solely on what is visible in the diff. Use phrases such as \"Based only on the changes visible in the diff, this commit...\" to emphasize an evidence-based approach while outlining changes directly.\n\n- **Commit Types**: Categorize the change accurately using conventional commit message formatting. Use types such as:\n - `feat:` for new features\n - `fix:` for bug fixes\n - `docs:` for documentation changes\n - `style:` for formatting changes\n - `refactor:` for code restructuring\n - `test:` for adding/modifying tests\n - `chore:` for maintenance tasks\n If uncertain about the category, default to `chore:` with a note stating 'clarification needed'.\n\n- **Consistency**: Ensure uniformity in tense, punctuation, and capitalization throughout the message. Use present tense and imperative form, such as \"Add x to y\" instead of \"Added x to y\".\n\n- **Clarity & Brevity**: Craft messages that are clear and easy to understand, succinctly capturing the essence of the changes. Limit the message to a maximum of 72 characters for the first line, while ensuring enough detail is provided on the primary action taken. Avoid jargon; provide plain definitions for any necessary technical terms.\n\n- **Accuracy & Hallucination Prevention**: Rigorously reflect only the changes visible in the diff. Avoid any speculation or inclusion of content not substantiated by the diff. Restate the necessity for messages to focus exclusively on aspects evident in the diff and to completely avoid extrapolation or assumptions about motivations or implications.\n\n- **Review Process**: Before finalizing each commit message:\n 1. Verify that the message accurately reflects only the changes in the diff\n 2. Confirm the commit type matches the actual changes\n 3. Check that the message follows the structure and formatting guidelines\n 4. Ensure no external context or assumptions are included\n 5. Validate that the message is clear and understandable to other developers\n": 1.0 + } +} From 1e56b0db07b11bfb9ee2b078538bef5be32fb7ee Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:13:46 +0100 Subject: [PATCH 25/56] ``` Add instruction template constant to commit.rs - Introduce `INSTRUCTION_TEMPLATE` constant for prompt file content. ``` --- src/commit.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/commit.rs b/src/commit.rs index 450a1e9..e78ab00 100644 --- a/src/commit.rs +++ b/src/commit.rs @@ -5,12 +5,14 @@ use anyhow::{bail, Result}; use crate::{config, openai, profile}; use crate::model::Model; +/// The instruction template included at compile time +const INSTRUCTION_TEMPLATE: &str = include_str!("../resources/prompt.md"); + /// Returns the instruction template for the AI model. /// This template guides the model in generating appropriate commit messages. fn get_instruction_template() -> String { profile!("Generate instruction template"); - let prompt = fs::read_to_string("resources/prompt.md").unwrap_or_else(|_| String::from("Failed to read prompt.md")); - prompt.replace("{{max_commit_length}}", &config::APP.max_commit_length.unwrap_or(72).to_string()) + INSTRUCTION_TEMPLATE.replace("{{max_commit_length}}", &config::APP.max_commit_length.unwrap_or(72).to_string()) } /// Calculates the number of tokens used by the instruction template. From 119875037ae98154ca3829f730756f7f93dd0e7b Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:14:12 +0100 Subject: [PATCH 26/56] Remove unused import of `std::fs` from `commit.rs` file. --- src/commit.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/commit.rs b/src/commit.rs index e78ab00..2e89412 100644 --- a/src/commit.rs +++ b/src/commit.rs @@ -1,5 +1,3 @@ -use std::fs; - use anyhow::{bail, Result}; use crate::{config, openai, profile}; From 140f2df52155db22ca82c56dc8f02a3dbe9e2cbf Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:15:38 +0100 Subject: [PATCH 27/56] Remove unused import and adjust available tokens calculation - Remove the import of `Model` and update the `available_tokens` calculations in the `call` function. --- src/openai.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/openai.rs b/src/openai.rs index 7827ab4..2a7e008 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -8,8 +8,6 @@ use colored::*; use crate::{config, profile}; use crate::model::Model; -const MAX_CONTEXT_LENGTH: usize = 128000; -const BUFFER_TOKENS: usize = 30000; // Large buffer for safety const MAX_ATTEMPTS: usize = 3; #[derive(Debug, Clone, PartialEq)] @@ -151,9 +149,10 @@ pub async fn call(request: Request) -> Result { let config = OpenAIConfig::new().with_api_key(api_key); let client = Client::with_config(config); - // Calculate available tokens for content + // Calculate available tokens using model's context size let system_tokens = request.model.count_tokens(&request.system)?; - let available_tokens = MAX_CONTEXT_LENGTH.saturating_sub(system_tokens + BUFFER_TOKENS + request.max_tokens as usize); + let model_context_size = request.model.context_size(); + let available_tokens = model_context_size.saturating_sub(system_tokens + request.max_tokens as usize); // Truncate prompt if needed let truncated_prompt = truncate_to_fit(&request.prompt, available_tokens, &request.model)?; From e1f49e4848ae32901130c65de2637e15a1a8c145 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:16:43 +0100 Subject: [PATCH 28/56] Update max commit length in prompt guidelines - Change maximum commit length from 72 to {{max_commit_length}} characters. --- resources/prompt.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/prompt.md b/resources/prompt.md index c61a4a7..821cba1 100644 --- a/resources/prompt.md +++ b/resources/prompt.md @@ -8,7 +8,7 @@ You are an AI assistant that generates concise and precise git commit messages b - **Consistency**: Ensure uniformity in tense, punctuation, and capitalization throughout the message. Use present tense and imperative form, such as "Add x to y" instead of "Added x to y". -- **Clarity & Brevity**: Craft messages that are clear and easy to understand, succinctly capturing the essence of the changes. Limit the message to a maximum of 72 characters for the first line, while ensuring enough detail is provided on the primary action taken. Avoid jargon; provide plain definitions for any necessary technical terms. +- **Clarity & Brevity**: Craft messages that are clear and easy to understand, succinctly capturing the essence of the changes. Limit the message to a maximum of {{max_commit_length}} characters for the first line, while ensuring enough detail is provided on the primary action taken. Avoid jargon; provide plain definitions for any necessary technical terms. - **Accuracy & Hallucination Prevention**: Rigorously reflect only the changes visible in the diff. Avoid any speculation or inclusion of content not substantiated by the diff. Restate the necessity for messages to focus exclusively on aspects evident in the diff and to completely avoid extrapolation or assumptions about motivations or implications. From 0ad807470f4d43471600bdb14e9cbf273bd8a9e2 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:22:22 +0100 Subject: [PATCH 29/56] ``` Modify imports and refactor filesystem profiling Based only on the changes visible in the diff, this commit: - Removes unnecessary profiling statements and imports in `filesystem.rs`. - Adds an import for `App` from `config` in `main.rs`. ``` --- src/filesystem.rs | 34 ++++------------------------------ src/main.rs | 29 +++++++++++++++-------------- 2 files changed, 19 insertions(+), 44 deletions(-) diff --git a/src/filesystem.rs b/src/filesystem.rs index 8cb3acf..797f6e8 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -5,8 +5,6 @@ use std::os::unix::fs::symlink as symlink_unix; use anyhow::{bail, Context, Result}; use git2::{Repository, RepositoryOpenFlags as Flags}; -use crate::profile; - /// Error messages for filesystem operations const ERR_CURRENT_DIR: &str = "Failed to get current directory"; @@ -39,7 +37,6 @@ impl File { /// # Returns /// * `bool` - true if the file exists, false otherwise pub fn exists(&self) -> bool { - profile!("Check file exists"); self.path.exists() } @@ -48,7 +45,6 @@ impl File { /// # Returns /// * `Result<()>` - Success or an error if deletion fails pub fn delete(&self) -> Result<()> { - profile!("Delete file"); log::debug!("Removing file at {}", self); fs::remove_file(&self.path).with_context(|| format!("Failed to remove file at {}", self)) } @@ -61,7 +57,6 @@ impl File { /// # Returns /// * `Result<()>` - Success or an error if link creation fails pub fn symlink(&self, target: &File) -> Result<()> { - profile!("Create symlink"); log::debug!("Symlinking {} to {}", target, self); symlink_unix(&target.path, &self.path).with_context(|| format!("Failed to symlink {} to {}", target, self)) } @@ -71,7 +66,6 @@ impl File { /// # Returns /// * `Result` - The relative path as a Dir or an error pub fn relative_path(&self) -> Result { - profile!("Get relative file path"); let current_dir = env::current_dir().context(ERR_CURRENT_DIR)?; let relative = self .path @@ -86,7 +80,6 @@ impl File { /// # Returns /// * `Dir` - The parent directory pub fn parent(&self) -> Dir { - profile!("Get parent directory"); Dir::new(self.path.parent().unwrap_or(Path::new("")).to_path_buf()) } } @@ -131,7 +124,6 @@ impl Dir { /// # Returns /// * `bool` - true if the directory exists, false otherwise pub fn exists(&self) -> bool { - profile!("Check directory exists"); self.path.exists() } @@ -140,7 +132,6 @@ impl Dir { /// # Returns /// * `Result<()>` - Success or an error if creation fails pub fn create_dir_all(&self) -> Result<()> { - profile!("Create directory recursively"); log::debug!("Creating directory at {}", self); fs::create_dir_all(&self.path).with_context(|| format!("Failed to create directory at {}", self)) } @@ -150,7 +141,6 @@ impl Dir { /// # Returns /// * `Result` - The relative path or an error pub fn relative_path(&self) -> Result { - profile!("Get relative directory path"); let current_dir = env::current_dir().context(ERR_CURRENT_DIR)?; let relative = self .path @@ -180,32 +170,20 @@ impl Filesystem { /// # Returns /// * `Result` - The initialized filesystem or an error pub fn new() -> Result { - profile!("Initialize filesystem"); - // Get current directory - let current_dir = { - profile!("Get current directory"); - env::current_dir().context(ERR_CURRENT_DIR)? - }; + let current_dir = env::current_dir().context(ERR_CURRENT_DIR)?; // Get executable path - let git_ai_bin_path = { - profile!("Get executable path"); - env::current_exe().context("Failed to get current executable")? - }; + let git_ai_bin_path = env::current_exe().context("Failed to get current executable")?; // Open git repository - let repo = { - profile!("Open git repository"); - Repository::open_ext(¤t_dir, Flags::empty(), Vec::<&Path>::new()) - .with_context(|| format!("Failed to open repository at {}", current_dir.display()))? - }; + let repo = Repository::open_ext(¤t_dir, Flags::empty(), Vec::<&Path>::new()) + .with_context(|| format!("Failed to open repository at {}", current_dir.display()))?; // Get git path and ensure it's absolute let git_path = { let mut path = repo.path().to_path_buf(); if path.is_relative() { - profile!("Convert relative git path to absolute"); path = current_dir.join(path); } path @@ -213,7 +191,6 @@ impl Filesystem { // Get hook binary path let git_ai_hook_bin_path = { - profile!("Get hook binary path"); let hook_path = git_ai_bin_path .parent() .with_context(|| format!("Failed to get parent directory of {}", git_ai_bin_path.display()))? @@ -236,7 +213,6 @@ impl Filesystem { /// # Returns /// * `Result` - The hook binary path or an error pub fn git_ai_hook_bin_path(&self) -> Result { - profile!("Get hook binary file"); Ok(File::new(self.git_ai_hook_bin_path.clone())) } @@ -245,7 +221,6 @@ impl Filesystem { /// # Returns /// * `Dir` - The hooks directory path pub fn git_hooks_path(&self) -> Dir { - profile!("Get hooks directory"); Dir::new(self.git_hooks_path.clone()) } @@ -254,7 +229,6 @@ impl Filesystem { /// # Returns /// * `Result` - The hook path or an error pub fn prepare_commit_msg_path(&self) -> Result { - profile!("Get prepare-commit-msg hook path"); if !self.git_hooks_path.exists() { bail!("Hooks directory not found at {}", self.git_hooks_path.display()); } diff --git a/src/main.rs b/src/main.rs index 57bdb26..f967070 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,14 +2,14 @@ mod uninstall; mod install; mod reinstall; mod config; - -use std::path::PathBuf; +mod filesystem; use structopt::StructOpt; use anyhow::Result; use dotenv::dotenv; use crate::config::App; +use crate::filesystem::Filesystem; mod finetune; use crate::finetune::FinetuneArgs; @@ -111,28 +111,29 @@ pub enum Command { // Hook installation functions fn run_install() -> Result<()> { - let hook_path = PathBuf::from(".git/hooks/prepare-commit-msg"); - let current_exe = std::env::current_exe()?; - let hook_binary = current_exe.parent().unwrap().join("git-ai-hook"); + let fs = Filesystem::new()?; + let hook_bin = fs.git_ai_hook_bin_path()?; + let hook_file = fs.prepare_commit_msg_path()?; - if hook_path.exists() { - std::fs::remove_file(&hook_path)?; + if hook_file.exists() { + hook_file.delete()?; } - std::os::unix::fs::symlink(&hook_binary, &hook_path)?; - println!("🔗 Hook symlinked successfully to \x1B[3m{}\x1B[0m", hook_path.display()); + hook_file.symlink(&hook_bin)?; + println!("🔗 Hook symlinked successfully to \x1B[3m{}\x1B[0m", hook_file); Ok(()) } fn run_uninstall() -> Result<()> { - let hook_path = PathBuf::from(".git/hooks/prepare-commit-msg"); + let fs = Filesystem::new()?; + let hook_file = fs.prepare_commit_msg_path()?; - if hook_path.exists() { - std::fs::remove_file(&hook_path)?; - println!("🗑️ Hook uninstalled successfully from \x1B[3m{}\x1B[0m", hook_path.display()); + if hook_file.exists() { + hook_file.delete()?; + println!("🗑️ Hook uninstalled successfully from \x1B[3m{}\x1B[0m", hook_file); } else { - println!("⚠️ No hook found at \x1B[3m{}\x1B[0m", hook_path.display()); + println!("⚠️ No hook found at \x1B[3m{}\x1B[0m", hook_file); } Ok(()) From 4f71559c47a1bcca6793379fd90848c08d011b43 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:23:22 +0100 Subject: [PATCH 30/56] Add directory creation for hooks if it does not exist - Implement logic to check for the existence of the hooks directory and create it if it's missing. --- src/install.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/install.rs b/src/install.rs index dc32602..a8f222a 100644 --- a/src/install.rs +++ b/src/install.rs @@ -7,6 +7,11 @@ pub fn run() -> Result<()> { let hook_bin = fs.git_ai_hook_bin_path()?; let hook_file = fs.prepare_commit_msg_path()?; + // Ensure hooks directory exists + if !fs.git_hooks_path().exists() { + fs.git_hooks_path().create_dir_all()?; + } + if hook_file.exists() { bail!("Hook already exists at {}, please run 'git ai hook reinstall'", hook_file); } From 96aedfa8b66d84b387acbdf5ed34258a2eae2ba2 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:23:57 +0100 Subject: [PATCH 31/56] Add dead code allowance in filesystem.rs Based only on the changes visible in the diff, this commit adds a line to allow dead code in the `filesystem.rs` file. --- src/filesystem.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/filesystem.rs b/src/filesystem.rs index 797f6e8..980bb67 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use std::path::{Path, PathBuf}; use std::{env, fs}; use std::os::unix::fs::symlink as symlink_unix; From aa4d0734b8a047fae469fc5c2c4841b2beee767d Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:27:22 +0100 Subject: [PATCH 32/56] Revert "```" This reverts commit 7b9aa2fc415051e2ce211559b333f58aa2a14150. --- finetune.md | 85 --------- src/finetune.rs | 444 ------------------------------------------------ 2 files changed, 529 deletions(-) delete mode 100644 finetune.md delete mode 100644 src/finetune.rs diff --git a/finetune.md b/finetune.md deleted file mode 100644 index ca4be33..0000000 --- a/finetune.md +++ /dev/null @@ -1,85 +0,0 @@ -# Finetune.rs Workflow - -Here's a summary of the workflow in `finetune.rs`: - -- Uses GPT4o-mini model for OpenAI -- Generates training data in JSONL format for fine-tuning -- Splits data into training and verification sets - -1. **Initialize and Setup** - - - Creates empty train and verify files - - Sets up thread pool for parallel processing - - Initializes progress bars and counters - - Loads system prompt from `resources/prompt.md` - -2. **Collect Commit History** - - - Opens local git repository - - Walks through commit history - - Filters commits based on: - - Message length (20-500 chars) - - Non-merge commits only - - Diff size within limits (default 5000 chars) - - Collects valid commits up to 3x target number - - Shuffles commits for randomization - -3. **Process Commits in Parallel** - - - Spawns worker threads based on CPU count or user setting - - Each worker processes a subset of commits - - For each commit: - - Checks for duplicate messages - - Rates commit quality (0.0-1.0) - - Cleans up commit message - - Tracks approved commits with progress bar - - Stops when target number reached - -4. **Clean and Rate Commit Messages** - - - Cleanup process: - - Takes first line only - - Removes ticket references and tags - - Ensures proper capitalization - - Drops type prefixes - - Keeps messages short and meaningful - - Quality rating based on: - - Message format and clarity - - Diff alignment - - Present tense and active voice - - Description accuracy - -5. **Generate Training Data** - - - Creates JSONL entries with: - - System prompt - - Diff as user input - - Cleaned message as assistant output - - Splits data: - - 50% for training - - 50% for verification - - Prevents duplicate messages - - Validates cleaned messages - -6. **Track Progress and Results** - - Shows real-time progress: - - Commit collection progress - - Message cleaning progress - - Approval status - - Reports final statistics: - - Total commits processed - - Training examples count - - Verification examples count - - Distribution between files - -Key Features: - -- Parallel processing for better performance -- Double quality check (original and cleaned messages) -- Duplicate prevention at multiple stages -- Progress visualization with spinners and bars -- Verbose mode for detailed logging - -The key difference from optimize.rs is that finetune.rs focuses on generating high-quality training data for fine-tuning, while optimize.rs focuses on improving the system prompt itself. - -Note: Run sync, not async diff --git a/src/finetune.rs b/src/finetune.rs deleted file mode 100644 index bd633bd..0000000 --- a/src/finetune.rs +++ /dev/null @@ -1,444 +0,0 @@ -use std::fs; -use std::io::Write; -use std::sync::Arc; -use std::collections::HashSet; - -use anyhow::{Context, Result}; -use colored::*; -use git2::{DiffOptions, Repository}; -use rand::prelude::*; -use serde::{Deserialize, Serialize}; -use structopt::StructOpt; -use tokio::sync::{mpsc, Mutex}; -use tokio::task; -use indicatif::{ProgressBar, ProgressStyle}; -use ai::model::Model; -use ai::openai; - -/// Represents command-line arguments for fine-tuning -#[derive(Debug, Clone, Deserialize, Serialize, StructOpt)] -pub struct FinetuneArgs { - #[structopt(long, default_value = "resources/prompt.md")] - pub prompt_file: String, - - #[structopt(long, default_value = "finetune_train.jsonl")] - pub train_file: String, - - #[structopt(long, default_value = "finetune_verify.jsonl")] - pub verify_file: String, - - #[structopt(long, default_value = "50")] - pub num_commits: u32, - - #[structopt(long)] - pub parallel_requests: Option, - - #[structopt(long, default_value = "0.8")] - pub quality_threshold: f32, - - #[structopt(long)] - pub verbose: bool, - - #[structopt(long, default_value = "5000")] - pub max_diff_size: usize -} - -#[derive(Debug, Serialize, Deserialize)] -struct Message { - role: String, - content: String -} - -#[derive(Debug, Serialize, Deserialize)] -struct TrainingExample { - messages: Vec -} - -/// Track the types of changes in a commit -#[derive(Debug)] -struct CommitChangeTypes { - #[allow(dead_code)] - has_additions: bool, - #[allow(dead_code)] - has_deletions: bool, - #[allow(dead_code)] - has_modifications: bool, - #[allow(dead_code)] - has_renames: bool, - #[allow(dead_code)] - has_file_mode_changes: bool -} - -/// Simple container for commit info -#[derive(Debug)] -struct CommitInfo { - message: String, - diff: String, - #[allow(dead_code)] - change_types: CommitChangeTypes -} - -pub async fn run(args: FinetuneArgs) -> Result<()> { - println!("🔄 Starting fine-tuning data export..."); - - // Reset (truncate) the output files - fs::write(&args.train_file, "")?; - fs::write(&args.verify_file, "")?; - - // Track seen messages to prevent duplicates - let seen_messages = Arc::new(Mutex::new(HashSet::new())); - - // 1. Load system prompt - let prompt_content = - fs::read_to_string(&args.prompt_file).with_context(|| format!("Failed to read prompt file: {}", args.prompt_file))?; - - // 2. Open local repository and setup commit processing - println!("📚 Collecting commit history..."); - let repo = Repository::open(".")?; - let mut revwalk = repo.revwalk()?; - revwalk.push_head()?; - - let mut total_checked = 0; - let mut valid_commits = 0; - let mut commit_data = Vec::new(); - - let collect_pb = ProgressBar::new_spinner(); - collect_pb.set_style( - ProgressStyle::default_spinner() - .template("{spinner:.green} Processing commits: {pos} found ({msg})") - .unwrap() - ); - - // Process commits as we find them - for oid in revwalk { - total_checked += 1; - if let Ok(id) = oid { - if let Ok(commit) = repo.find_commit(id) { - let message = commit.message().unwrap_or(""); - if (20..500).contains(&message.len()) && commit.parent_count() == 1 { - let parent = commit.parent(0)?; - let parent_tree = parent.tree()?; - let commit_tree = commit.tree()?; - let mut diff_opts = DiffOptions::new(); - let diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), Some(&mut diff_opts))?; - - let mut diff_text = String::new(); - let mut total_diff_size = 0; - let mut should_skip = false; - - diff.print(git2::DiffFormat::Patch, |_, _, line| { - if let Ok(content) = std::str::from_utf8(line.content()) { - total_diff_size += content.len(); - if total_diff_size <= args.max_diff_size { - diff_text.push(line.origin()); - diff_text.push_str(content); - } else { - should_skip = true; - } - } - true - })?; - - if !should_skip { - commit_data.push((message.to_string(), diff_text)); - valid_commits += 1; - collect_pb.set_position(valid_commits as u64); - collect_pb.set_message(format!("latest: {:.40}...", message)); - } - } - } - } - if valid_commits >= args.num_commits as usize * 3 { - break; - } - } - - if args.verbose { - println!(" Checked {} commits, found {} valid ones", total_checked, valid_commits); - } - collect_pb.finish_with_message(format!("Found {} commits to process", valid_commits)); - - // Shuffle the collected commits for randomization - let mut rng = rand::rngs::ThreadRng::default(); - commit_data.shuffle(&mut rng); - let commit_data = Arc::new(commit_data); - - // Setup processing channel - let num_workers = args.parallel_requests.unwrap_or_else(num_cpus::get); - let (tx, mut rx) = mpsc::channel(num_workers * 2); - let approved_commits = Arc::new(Mutex::new(0usize)); - let threshold = args.quality_threshold; - - // Create progress bar for approved commits - let process_pb = ProgressBar::new(args.num_commits as u64); - process_pb.set_style( - ProgressStyle::default_bar() - .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} approved ({eta})") - .unwrap() - .progress_chars("#>-") - ); - - // Spawn workers for quality checking - let mut workers = Vec::new(); - for worker_id in 0..num_workers { - let tx = tx.clone(); - let approved = Arc::clone(&approved_commits); - let seen = Arc::clone(&seen_messages); - let pb = process_pb.clone(); - let verbose = args.verbose; - let target_commits = args.num_commits; - let commit_data = Arc::clone(&commit_data); - let start_idx = worker_id * commit_data.len() / num_workers; - let end_idx = ((worker_id + 1) * commit_data.len() / num_workers).min(commit_data.len()); - - let worker = task::spawn(async move { - for (message, diff) in commit_data[start_idx..end_idx].iter() { - let current_approved = { - let count = approved.lock().await; - *count - }; - if current_approved >= target_commits as usize { - break; - } - let is_duplicate = { - let mut seen = seen.lock().await; - if seen.contains(message) { - true - } else { - seen.insert(message.clone()); - false - } - }; - if !is_duplicate { - if let Ok(score) = rate_commit_quality(&CommitInfo { - message: message.clone(), - diff: diff.clone(), - change_types: CommitChangeTypes { - has_additions: false, - has_deletions: false, - has_modifications: false, - has_renames: false, - has_file_mode_changes: false - } - }) - .await - { - if score >= threshold { - if let Ok(cleaned_message) = cleanup_commit_message(message).await { - let mut count = approved.lock().await; - *count += 1; - pb.set_position(*count as u64); - if verbose { - println!("✓ {} (score: {:.2})", cleaned_message.bright_green(), score); - } - if tx.send((message.clone(), diff.clone())).await.is_err() { - break; - } - } - } - } - } - } - }); - workers.push(worker); - } - drop(tx); - - // Process approved commits - let mut approved_count = 0; - let train_size = args.num_commits / 2; - let mut train_file = fs::OpenOptions::new() - .create(true) - .append(true) - .open(&args.train_file)?; - let mut verify_file = fs::OpenOptions::new() - .create(true) - .append(true) - .open(&args.verify_file)?; - - while let Some((message, diff)) = rx.recv().await { - if approved_count >= args.num_commits as usize { - break; - } - let cleaned_message = cleanup_commit_message(&message).await?; - if cleaned_message.trim().is_empty() { - continue; - } - let is_duplicate = { - let mut seen = seen_messages.lock().await; - if seen.contains(&cleaned_message) { - true - } else { - seen.insert(cleaned_message.clone()); - false - } - }; - if is_duplicate { - continue; - } - // Run scoring on the cleaned output - let cleaned_score = rate_cleaned_commit_message(&cleaned_message).await?; - if args.verbose { - println!("Cleaned: {} (score: {:.2})", cleaned_message, cleaned_score); - } - let example = TrainingExample { - messages: vec![ - Message { - role: "system".to_string(), - content: prompt_content.clone() - }, - Message { role: "user".to_string(), content: diff }, - Message { - role: "assistant".to_string(), - content: cleaned_message - }, - ] - }; - let json = serde_json::to_string(&example)?; - if approved_count < train_size as usize { - writeln!(train_file, "{}", json)?; - } else { - writeln!(verify_file, "{}", json)?; - } - approved_count += 1; - } - - for worker in workers { - worker.await?; - } - process_pb.finish(); - - println!("\n✨ Successfully exported {} training examples:", approved_count); - println!(" - {} training examples in {}", train_size, args.train_file); - println!(" - {} verification examples in {}", args.num_commits - train_size, args.verify_file); - - Ok(()) -} - -/// Cleanup commit message using GPT4oMini -async fn cleanup_commit_message(original_msg: &str) -> Result { - if original_msg.trim().is_empty() { - return Ok(String::new()); - } - let first_line = original_msg - .lines() - .next() - .unwrap_or("") - .trim() - .trim_start_matches("```") - .trim_end_matches("```") - .trim_start_matches("plaintext") - .trim_start_matches("git") - .trim(); - let system_prompt = "\ -You are an expert at cleaning up git commit messages. \ -Your task is to:\n\ -1. Remove any ticket references or extraneous tags\n\ -2. Keep it short, focusing on meaningful description\n\ -3. Do not end the message with a period\n\ -4. Always start with a capitalized verb (Add, Fix, Update, etc)\n\ -5. Drop the type prefix if it is present\n\ -6. Return ONLY the cleaned message without any formatting or backticks"; - let req = openai::Request { - system: system_prompt.to_string(), - prompt: first_line.to_string(), - max_tokens: 100, - model: Model::GPT4oMini - }; - let response = openai::call(req).await?; - let cleaned = response - .response - .trim() - .trim_start_matches("```") - .trim_end_matches("```") - .trim_start_matches("plaintext") - .trim_start_matches("git") - .trim() - .to_string(); - if cleaned.is_empty() - || cleaned.to_lowercase().contains("please") - || cleaned.to_lowercase().contains("provide") - || cleaned.to_lowercase().contains("didn't") - || cleaned.to_lowercase().contains("error") - || cleaned.to_lowercase().contains("missing") - || cleaned.to_lowercase().contains("sorry") - || cleaned.to_lowercase().contains("unable") - || cleaned.to_lowercase().contains("could not") - || cleaned.to_lowercase().contains("cannot") - || cleaned.to_lowercase().contains("failed") - || cleaned.len() > 100 - { - return Ok(String::new()); - } - let message = if cleaned.contains(": ") { - let parts: Vec<&str> = cleaned.splitn(2, ": ").collect(); - parts.get(1).unwrap_or(&cleaned.as_str()).trim().to_string() - } else { - cleaned - }; - let mut chars = message.chars(); - Ok(if let Some(first_char) = chars.next() { - if first_char.is_lowercase() { - first_char.to_uppercase().collect::() + chars.as_str() - } else { - message - } - } else { - message - }) -} - -/// Rate commit quality using GPT4oMini -async fn rate_commit_quality(commit_info: &CommitInfo) -> Result { - let system_prompt = "\ -You are an expert at evaluating git commit quality. Your task is to rate this commit from 0.0 to 1.0 based on: - -1. Commit Message Quality (50% of score): - - Is the first line concise (under 72 chars)? - - If present, is the body descriptive and separated by blank line? - - Is the message present tense? - - Is the message written in the active voice? - - Is the message clear and concise? - -2. Diff Alignment (50% of score): - - Does the message accurately describe the changes in the diff? - - Are all significant changes reflected in the message? - - Is the scope of changes consistent with the message? - -Scoring Guide: -- 0.0-0.3: Poor quality (wrong format, unclear or misleading, conventional commit format) -- 0.4-0.6: Mediocre quality (basic description) -- 0.7-0.8: Good quality (follows format, clear message, mostly aligned with changes) -- 0.9-1.0: Excellent (perfect format and description of changes) - -Return ONLY a number between 0.0 and 1.0"; - let prompt = format!( - "Evaluate this commit:\n\nCommit Message:\n{}\n\nCode Changes:\n{}\n\nScore (0.0-1.0):", - commit_info.message, commit_info.diff - ); - let req = openai::Request { - system: system_prompt.to_string(), - prompt, - max_tokens: 10, - model: Model::GPT4oMini - }; - let response = openai::call(req).await?; - let score = response.response.trim().parse::().unwrap_or(0.0); - Ok(score.clamp(0.0, 1.0)) -} - -/// Rate cleaned commit message quality using GPT4oMini -async fn rate_cleaned_commit_message(cleaned_message: &str) -> Result { - let system_prompt = "\ -You are an expert at evaluating cleaned git commit messages. Rate the quality of this commit message on a scale from 0.0 to 1.0, based solely on clarity, conciseness, and adherence to conventional commit style guidelines. Return ONLY a number between 0.0 and 1.0."; - let prompt = format!("Cleaned Commit Message:\n{}\nScore (0.0-1.0):", cleaned_message); - let req = openai::Request { - system: system_prompt.to_string(), - prompt, - max_tokens: 10, - model: Model::GPT4oMini - }; - let response = openai::call(req).await?; - let score = response.response.trim().parse::().unwrap_or(0.0); - Ok(score.clamp(0.0, 1.0)) -} From 6fd6ab8e49ea594d8c97aa47aecd50f29c83786e Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:29:17 +0100 Subject: [PATCH 33/56] ``` Update Command enum definition Based only on the changes visible in the diff, this commit modifies the existing Command enum without altering its structure or commands. ``` --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index f967070..b57218d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -106,7 +106,7 @@ pub enum Command { #[structopt(long)] verbose: bool - } // ... other commands ... + } } // Hook installation functions From 10192f64856e42536905b5617d0d3c9a6ea511f9 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:29:42 +0100 Subject: [PATCH 34/56] Delete stats.json file Based only on the changes visible in the diff, this commit removes the stats.json file. --- stats.json | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 stats.json diff --git a/stats.json b/stats.json deleted file mode 100644 index 6b676ff..0000000 --- a/stats.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "iterations": [], - "best_prompt": "", - "best_score": 0.0, - "prompt_scores": { - "You are an AI assistant that generates concise and precise git commit messages based solely on the provided diffs. Please adhere to the following enhanced guidelines:\n\n- **Structure**: Begin with a clear, present-tense summary of the change in the conventional commit format. Use a single-line summary for the change, followed by a blank line. As a best practice, consider including only one bullet point detailing context if essential, but refrain from excessive elaboration.\n\n- **Content**: Commit messages must strictly describe the lines marked with + or - in the diff. Avoid including surrounding context, unmarked lines, or irrelevant details. Explicitly refrain from mentioning implications, reasoning, motivations, or any external context not explicitly reflected in the diff. Make sure to avoid any interpretations or assumptions beyond what is clearly stated.\n\n- **Changes**: Clearly articulate what was added, removed, or modified based solely on what is visible in the diff. Use phrases such as \"Based only on the changes visible in the diff, this commit...\" to emphasize an evidence-based approach while outlining changes directly.\n\n- **Commit Types**: Categorize the change accurately using conventional commit message formatting. Use types such as `feat:`, `fix:`, `chore:`, etc., based strictly on the changes included in the diff. Reinforce that only one commit type should be reflected per message. If uncertain about the appropriate category, default to using `chore:` accompanied by a note stating 'clarification needed'. Include explicit examples of each type and their common scenarios.\n\n- **Consistency**: Ensure uniformity in tense, punctuation, and capitalization throughout the message. Use present tense and imperative form, such as \"Add x to y\" instead of \"Added x to y\". \n\n- **Clarity & Brevity**: Craft messages that are clear and easy to understand, succinctly capturing the essence of the changes. Limit the message to a maximum of {{max_commit_length}} characters while ensuring enough detail is provided on the primary action taken. Avoid jargon; provide plain definitions for any necessary technical terms.\n\n- **Accuracy & Hallucination Prevention**: Rigorously reflect only the changes visible in the diff. Avoid any speculation or inclusion of content not substantiated by the diff. Restate the necessity for messages to focus exclusively on aspects evident in the diff and to completely avoid extrapolation or assumptions about motivations or implications.\n\n- **Review Process**: After generating a commit message, compare it against the diff and the": 1.0, - "You are an AI assistant that generates concise and precise git commit messages based solely on the provided diffs. Please adhere to the following enhanced guidelines:\n\n- **Structure**: Begin with a clear, present-tense summary of the change in the conventional commit format. Use a single-line summary for the change, followed by a blank line. As a best practice, consider including only one bullet point detailing context if essential, but refrain from excessive elaboration.\n\n- **Content**: Commit messages must strictly describe the lines marked with + or - in the diff. Avoid including surrounding context, unmarked lines, or irrelevant details. Explicitly refrain from mentioning implications, reasoning, motivations, or any external context not explicitly reflected in the diff. Make sure to avoid any interpretations or assumptions beyond what is clearly stated.\n\n- **Changes**: Clearly articulate what was added, removed, or modified based solely on what is visible in the diff. Use phrases such as \"Based only on the changes visible in the diff, this commit...\" to emphasize an evidence-based approach while outlining changes directly.\n\n- **Commit Types**: Categorize the change accurately using conventional commit message formatting. Use types such as:\n - `feat:` for new features\n - `fix:` for bug fixes\n - `docs:` for documentation changes\n - `style:` for formatting changes\n - `refactor:` for code restructuring\n - `test:` for adding/modifying tests\n - `chore:` for maintenance tasks\n If uncertain about the category, default to `chore:` with a note stating 'clarification needed'.\n\n- **Consistency**: Ensure uniformity in tense, punctuation, and capitalization throughout the message. Use present tense and imperative form, such as \"Add x to y\" instead of \"Added x to y\".\n\n- **Clarity & Brevity**: Craft messages that are clear and easy to understand, succinctly capturing the essence of the changes. Limit the message to a maximum of 72 characters for the first line, while ensuring enough detail is provided on the primary action taken. Avoid jargon; provide plain definitions for any necessary technical terms.\n\n- **Accuracy & Hallucination Prevention**: Rigorously reflect only the changes visible in the diff. Avoid any speculation or inclusion of content not substantiated by the diff. Restate the necessity for messages to focus exclusively on aspects evident in the diff and to completely avoid extrapolation or assumptions about motivations or implications.\n\n- **Review Process**: Before finalizing each commit message:\n 1. Verify that the message accurately reflects only the changes in the diff\n 2. Confirm the commit type matches the actual changes\n 3. Check that the message follows the structure and formatting guidelines\n 4. Ensure no external context or assumptions are included\n 5. Validate that the message is clear and understandable to other developers\n": 1.0 - } -} From 1e231fbd52e52f104bbcd5bff14f4601bb8be03b Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:31:33 +0100 Subject: [PATCH 35/56] ``` Remove install, reinstall, and uninstall modules Based only on the changes visible in the diff, this commit deletes the files src/install.rs, src/reinstall.rs, and src/uninstall.rs. ``` --- src/install.rs | 23 ----------------------- src/main.rs | 13 +------------ src/reinstall.rs | 32 -------------------------------- src/uninstall.rs | 45 --------------------------------------------- 4 files changed, 1 insertion(+), 112 deletions(-) delete mode 100644 src/install.rs delete mode 100644 src/reinstall.rs delete mode 100644 src/uninstall.rs diff --git a/src/install.rs b/src/install.rs deleted file mode 100644 index a8f222a..0000000 --- a/src/install.rs +++ /dev/null @@ -1,23 +0,0 @@ -use anyhow::{bail, Result}; -use ai::filesystem::Filesystem; - -#[allow(dead_code)] -pub fn run() -> Result<()> { - let fs = Filesystem::new()?; - let hook_bin = fs.git_ai_hook_bin_path()?; - let hook_file = fs.prepare_commit_msg_path()?; - - // Ensure hooks directory exists - if !fs.git_hooks_path().exists() { - fs.git_hooks_path().create_dir_all()?; - } - - if hook_file.exists() { - bail!("Hook already exists at {}, please run 'git ai hook reinstall'", hook_file); - } - - hook_file.symlink(&hook_bin)?; - println!("🔗 Hook symlinked successfully to {}", hook_file); - - Ok(()) -} diff --git a/src/main.rs b/src/main.rs index b57218d..b0f978e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,3 @@ -mod uninstall; -mod install; -mod reinstall; mod config; mod filesystem; @@ -11,18 +8,13 @@ use dotenv::dotenv; use crate::config::App; use crate::filesystem::Filesystem; -mod finetune; -use crate::finetune::FinetuneArgs; - #[derive(StructOpt)] #[structopt(name = "git-ai", about = "A git extension that uses OpenAI to generate commit messages")] enum Cli { #[structopt(about = "Installs the git-ai hook")] Hook(HookSubcommand), #[structopt(about = "Sets or gets configuration values")] - Config(ConfigSubcommand), - #[structopt(about = "Exports training data for fine-tuning")] - Finetune(FinetuneArgs) + Config(ConfigSubcommand) } #[derive(StructOpt)] @@ -230,9 +222,6 @@ async fn main() -> Result<()> { } }, }, - Cli::Finetune(args) => { - finetune::run(args).await?; - } } Ok(()) diff --git a/src/reinstall.rs b/src/reinstall.rs deleted file mode 100644 index f572d3e..0000000 --- a/src/reinstall.rs +++ /dev/null @@ -1,32 +0,0 @@ -use console::Emoji; -use anyhow::Result; -use ai::filesystem::Filesystem; -use colored::*; - -#[allow(dead_code)] -const EMOJI: Emoji<'_, '_> = Emoji("🔗", ""); - -#[allow(dead_code)] -pub fn run() -> Result<()> { - let fs = Filesystem::new()?; - let hook_bin = fs.git_ai_hook_bin_path()?; - let hook_file = fs.prepare_commit_msg_path()?; - - if !fs.git_hooks_path().exists() { - fs.git_hooks_path().create_dir_all()?; - } - - if hook_file.exists() { - log::debug!("Removing existing hook file: {}", hook_file); - hook_file.delete()?; - } - - hook_file.symlink(&hook_bin)?; - - println!( - "{EMOJI} Hook symlinked successfully to {}", - hook_file.relative_path()?.to_string().italic() - ); - - Ok(()) -} diff --git a/src/uninstall.rs b/src/uninstall.rs deleted file mode 100644 index 09f6b3b..0000000 --- a/src/uninstall.rs +++ /dev/null @@ -1,45 +0,0 @@ -use std::path::{Path, PathBuf}; -use std::{env, fs}; - -use anyhow::{bail, Context, Result}; -use ai::style::Styled; -use colored::Colorize; -use console::Emoji; -use git2::{Repository, RepositoryOpenFlags as Flags}; -use thiserror::Error; - -#[derive(Error, Debug)] -#[allow(dead_code)] -pub enum InstallError { - #[error("Failed to get current directory")] - CurrentDir, - #[error("Failed to open repository")] - OpenRepo, - #[error("Hook already exists: {0:?}")] - HookExists(PathBuf) -} - -#[allow(dead_code)] -const EMOJI: Emoji<'_, '_> = Emoji("🔗", ""); - -#[allow(dead_code)] -pub fn run() -> Result<()> { - let current_dir = env::current_dir().context(InstallError::CurrentDir)?; - let repo = Repository::open_ext(current_dir, Flags::empty(), Vec::<&Path>::new()).context(InstallError::OpenRepo)?; - - let hook_dir = PathBuf::from(repo.path()).join("hooks"); - let hook_file = hook_dir.join("prepare-commit-msg"); - - if !hook_file.exists() { - bail!(InstallError::HookExists(hook_file)); - } - - fs::remove_file(&hook_file).context("Failed to remove hook file")?; - - println!( - "{EMOJI} Hook uninstall successfully from {}", - hook_file.relative_path().display().to_string().italic() - ); - - Ok(()) -} From 42f27e25da1da3f5bb8abe5bccd01eceed942d37 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:32:36 +0100 Subject: [PATCH 36/56] Build inline --- CONTRIBUTING.md | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index efceb6e..1461d91 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,9 +24,7 @@ For each new feature or bug fix, create a new branch based on the `main` branch. ```bash git checkout -b feature/my-new-feature ``` - or - ```bash git checkout -b fix/my-bug-fix ``` @@ -55,15 +53,15 @@ If you're adding a new feature or changing existing functionality, update the RE 1. Push your changes to your fork: - ```bash - git push origin feature/my-new-feature - ``` + ```bash + git push origin feature/my-new-feature + ``` - or + or - ```bash - git push origin fix/my-bug-fix - ``` + ```bash + git push origin fix/my-bug-fix + ``` 2. Go to your fork on GitHub and click the "Pull Request" button to submit your changes. From b75147bb88ce4cafb9ce52e96c77aa30c3833bea Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:35:31 +0100 Subject: [PATCH 37/56] Update default model name in Args implementation Based only on the changes visible in the diff, this commit modifies the default model name in the Args implementation from "gpt-4o" to "gpt-4o-mini". --- src/bin/hook.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bin/hook.rs b/src/bin/hook.rs index 4b25e0a..a4f855e 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -107,6 +107,7 @@ impl Args { let response = commit::generate(patch.to_string(), remaining_tokens, model).await?; std::fs::write(&self.commit_msg_file, response.response.trim())?; + pb.finish_and_clear(); Ok(()) @@ -122,7 +123,7 @@ impl Args { let model = config::APP .model .clone() - .unwrap_or("gpt-4o".to_string()) + .unwrap_or("gpt-4o-mini".to_string()) .into(); let used_tokens = commit::token_used(&model)?; let max_tokens = config::APP.max_tokens.unwrap_or(model.context_size()); From a546bbac671cf9f54c8227f92f9d01003b302a42 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:53:07 +0100 Subject: [PATCH 38/56] ``` Create hook stress test script Based only on the changes visible in the diff, this commit adds a new script for testing various operations in a Git hook context. ``` --- scripts/hook-stress-test | 241 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100755 scripts/hook-stress-test diff --git a/scripts/hook-stress-test b/scripts/hook-stress-test new file mode 100755 index 0000000..a60a556 --- /dev/null +++ b/scripts/hook-stress-test @@ -0,0 +1,241 @@ +#!/usr/bin/env fish + +set -x fish_trace 1 +set -Ux OPENAI_API_KEY $OPENAI_API_KEY +set -x RUST_LOG debug + +if not test -n "$OPENAI_API_KEY" + echo "Please set the OPENAI_API_KEY environment variable." + exit 1 +end + +function on_exit --on-event fish_exit + if test -d $TEST_DIR + rm -rf $TEST_DIR + end +end + +function generate_random_content + set size $argv[1] + head -c $size /dev/urandom | base64 +end + +function fail + echo "Test failed: $argv" + exit 1 +end + +function test_step + set description $argv[1] + echo "=== Testing: $description ===" +end + +set TEST_DIR /tmp/git-ai-hook-test-(date +%s) + +# Install git-ai +cargo install --force --path . || fail "Failed to install git-ai" + +# Setup test repository +rm -rf $TEST_DIR +mkdir -p $TEST_DIR +cd $TEST_DIR + +git init || fail "Git init failed" +git config user.name "Test User" +git config user.email "test@example.com" +git config --global init.defaultBranch main + +# Install the hook +git-ai hook install || fail "Hook installation failed" + +# Test 1: File Creation Permutations +test_step "File Creation Permutations" + +# 1.1 Empty file +touch empty_file.txt +git add empty_file.txt +git commit -a --no-edit || fail "Empty file commit failed" + +# 1.2 Multiple empty files +touch empty1.txt empty2.txt empty3.txt +git add . +git commit -a --no-edit || fail "Multiple empty files commit failed" + +# 1.3 Files with different content types +echo "Normal text" > normal.txt +echo -e "Line 1\nLine 2\nLine 3" > multiline.txt +echo -n "No newline" > no_newline.txt +echo "Tab Space Space End" > whitespace.txt +git add . +git commit -a --no-edit || fail "Different content types commit failed" + +# Test 2: File Modification Permutations +test_step "File Modification Permutations" + +# 2.1 Modify start of file +echo "Modified start + Normal text" > normal.txt +git commit -a --no-edit || fail "Start modification commit failed" + +# 2.2 Modify end of file +echo -e "Line 1\nLine 2\nLine 3\nLine 4" > multiline.txt +git commit -a --no-edit || fail "End modification commit failed" + +# 2.3 Modify middle of file +echo -e "Line 1\nNew Line\nLine 3\nLine 4" > multiline.txt +git commit -a --no-edit || fail "Middle modification commit failed" + +# 2.4 Multiple file modifications +echo "Modified 1" > empty1.txt +echo "Modified 2" > empty2.txt +echo "Modified 3" > empty3.txt +git commit -a --no-edit || fail "Multiple modifications commit failed" + +# Test 3: File Deletion Permutations +test_step "File Deletion Permutations" + +# 3.1 Single file deletion +rm empty_file.txt +git add --all +git commit -a --no-edit || fail "Single deletion commit failed" + +# 3.2 Multiple file deletions +rm empty1.txt empty2.txt +git add --all +git commit -a --no-edit || fail "Multiple deletions commit failed" + +# Test 4: Mixed Operations +test_step "Mixed Operations" + +# 4.1 Add + Delete + Modify +touch new_file1.txt +rm empty3.txt +echo "Modified again" > normal.txt +git add --all +git commit -a --no-edit || fail "Mixed operations commit failed" + +# 4.2 Complex changes +mkdir -p dir1/dir2 +echo "Nested file" > dir1/dir2/nested.txt +rm multiline.txt +echo "Changed" > whitespace.txt +touch dir1/empty.txt +git add --all +git commit -a --no-edit || fail "Complex changes commit failed" + +# Test 5: File Renaming and Moving +test_step "File Renaming and Moving" + +# 5.1 Simple rename +git mv normal.txt renamed.txt +git commit -a --no-edit || fail "Simple rename commit failed" + +# 5.2 Move file to directory +git mv renamed.txt dir1/ +git commit -a --no-edit || fail "Move to directory commit failed" + +# 5.3 Move and rename +git mv dir1/renamed.txt dir1/dir2/final.txt +git commit -a --no-edit || fail "Move and rename commit failed" + +# Test 6: Permission Changes +test_step "Permission Changes" + +# 6.1 Make file executable +chmod +x dir1/dir2/final.txt +git add --all +git commit -a --no-edit || fail "Permission change commit failed" + +# Test 7: Symlink Operations +test_step "Symlink Operations" + +# 7.1 Add symlink +ln -s dir1/dir2/final.txt symlink.txt +git add symlink.txt +git commit -a --no-edit || fail "Symlink creation commit failed" + +# 7.2 Modify symlink target +rm symlink.txt +ln -s dir1/dir2/nested.txt symlink.txt +git add --all +git commit -a --no-edit || fail "Symlink modification commit failed" + +# Test 8: Special Content +test_step "Special Content" + +# 8.1 File with null bytes +printf "Before\0After" > null_file.txt +git add null_file.txt +git commit -a --no-edit || fail "Null byte commit failed" + +# 8.2 File with special characters +echo "Special chars: ¡™£¢∞§¶•ªº" > special_chars.txt +git add special_chars.txt +git commit -a --no-edit || fail "Special chars commit failed" + +# 8.3 File with escape sequences +echo -e "\033[31mRed\033[0m \033[32mGreen\033[0m" > ansi_colors.txt +git add ansi_colors.txt +git commit -a --no-edit || fail "ANSI escape sequences commit failed" + +# Test 9: Large Changes +test_step "Large Changes" + +# 9.1 Many files in one commit +for i in (seq 1 100) + echo "Content $i" > "file$i.txt" +end +git add . +git commit -a --no-edit || fail "Many files commit failed" + +# 9.2 Many changes to one file +for i in (seq 1 1000) + echo "Line $i" >> large_changes.txt +end +git add large_changes.txt +git commit -a --no-edit || fail "Many changes commit failed" + +# Test 10: Edge Cases +test_step "Edge Cases" + +# 10.1 File with only whitespace changes +echo "Line with tabs" > whitespace_changes.txt +git add whitespace_changes.txt +git commit -a --no-edit || fail "Initial whitespace commit failed" +echo "Line with spaces" > whitespace_changes.txt +git commit -a --no-edit || fail "Whitespace change commit failed" + +# 10.2 Rename with case change only +echo "Case sensitive" > case.txt +git add case.txt +git commit -a --no-edit || fail "Case file commit failed" +git mv case.txt CASE.txt +git commit -a --no-edit || fail "Case rename commit failed" + +# 10.3 Files with same content +echo "Duplicate content" > dup1.txt +echo "Duplicate content" > dup2.txt +git add dup1.txt dup2.txt +git commit -a --no-edit || fail "Duplicate content commit failed" + +# 10.4 Move directory with contents +mkdir -p src/nested/deep +echo "Moving file" > src/nested/deep/file.txt +git add src +git commit -a --no-edit || fail "Initial directory commit failed" +git mv src dst +git commit -a --no-edit || fail "Directory move commit failed" + +# 10.5 Replace file with directory +rm dst/nested/deep/file.txt +mkdir dst/nested/deep/file.txt +echo "Now a directory" > dst/nested/deep/file.txt/content.txt +git add --all +git commit -a --no-edit || fail "File to directory commit failed" + +# 10.6 Replace directory with file +rm -rf dst/nested/deep/file.txt +echo "Now a file again" > dst/nested/deep/file.txt +git add --all +git commit -a --no-edit || fail "Directory to file commit failed" + +echo "All permutation tests completed successfully!" From 596f662e48608ec1ebeb7be64afb33b1d84df6f7 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:53:51 +0100 Subject: [PATCH 39/56] ``` Add comprehensive tests script Based only on the changes visible in the diff, this commit adds a new Fish script `comprehensive-tests` that includes a series of tests for various Git operations. ``` --- scripts/comprehensive-tests | 272 ++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 scripts/comprehensive-tests diff --git a/scripts/comprehensive-tests b/scripts/comprehensive-tests new file mode 100644 index 0000000..b1eef4c --- /dev/null +++ b/scripts/comprehensive-tests @@ -0,0 +1,272 @@ +#!/usr/bin/env fish + +set -x fish_trace 1 +set -Ux OPENAI_API_KEY $OPENAI_API_KEY +set -x RUST_LOG debug + +if not test -n "$OPENAI_API_KEY" + echo "Please set the OPENAI_API_KEY environment variable." + exit 1 +end + +if not command -v cargo + echo "Cargo not found. Please install Rust." + exit 1 +end + +function on_exit --on-event fish_exit + if test -d $TEST_DIR + rm -rf $TEST_DIR + end +end + +function generate_random_content + set size $argv[1] + head -c $size /dev/urandom | base64 +end + +function last_commit + git log -1 --pretty=%B | tr -d '\n' +end + +function fail + echo "Test failed: $argv" + exit 1 +end + +function test_step + set description $argv[1] + echo "=== Testing: $description ===" +end + +set TEST_DIR /tmp/git-ai-test-(date +%s) + +# Install git-ai +cargo install --force --path . || fail "Cargo installation failed" + +# Setup test repository +rm -rf $TEST_DIR +mkdir -p $TEST_DIR +cd $TEST_DIR + +git init || fail "Git init failed" +git config user.name "Test User" +git config user.email "test@example.com" +git config --global init.defaultBranch main +git branch -m main + +# Test 1: Hook Installation and Configuration +test_step "Hook Installation and Configuration" + +git-ai hook || echo "As expected" +git-ai hook install || fail "Hook installation failed" +git-ai hook uninstall || fail "Hook uninstallation failed" +git-ai hook install || fail "Hook reinstallation failed" +git-ai hook reinstall || fail "Hook reinstall failed" + +git-ai config reset +git-ai config || echo "As expected" +git-ai config set || echo "As expected" +git-ai config set model gpt-4 || fail "Setting model failed" +git-ai config set max-tokens 512 || fail "Setting max tokens failed" +git-ai config set max-commit-length 1024 || fail "Setting max commit length failed" +git-ai config set openai-api-key "$OPENAI_API_KEY" || fail "Setting OpenAI API key failed" + +# Test 2: Basic Git Operations +test_step "Basic Git Operations" + +# 2.1 Initial commit +echo "Hello World 0" > README.md +git add README.md +git commit --no-edit || fail "Initial commit failed" +git status --porcelain || fail "Git status failed after initial commit" + +# 2.2 Commit with message +echo "Hello World" > README.md +git add README.md +git commit -m "Initial commit" || fail "Commit with message failed" +last_commit | grep "Initial commit" || fail "Commit message 'Initial commit' not found" + +# 2.3 Commit with --no-edit +echo "Hello World 2" > README.md +git add README.md +git commit --no-edit || fail "Commit --no-edit failed" +git status --porcelain || fail "Git status failed after commit --no-edit" + +# Test 3: File Creation Permutations +test_step "File Creation Permutations" + +# 3.1 Empty file +touch empty_file.txt +git add empty_file.txt +git commit -a --no-edit || fail "Empty file commit failed" + +# 3.2 Multiple empty files +touch empty1.txt empty2.txt empty3.txt +git add . +git commit -a --no-edit || fail "Multiple empty files commit failed" + +# 3.3 Files with different content types +echo "Normal text" > normal.txt +echo -e "Line 1\nLine 2\nLine 3" > multiline.txt +echo -n "No newline" > no_newline.txt +echo "Tab Space Space End" > whitespace.txt +git add . +git commit -a --no-edit || fail "Different content types commit failed" + +# Test 4: File Modification Permutations +test_step "File Modification Permutations" + +# 4.1 Modify start of file +echo "Modified start + Normal text" > normal.txt +git commit -a --no-edit || fail "Start modification commit failed" + +# 4.2 Modify end of file +echo -e "Line 1\nLine 2\nLine 3\nLine 4" > multiline.txt +git commit -a --no-edit || fail "End modification commit failed" + +# 4.3 Modify middle of file +echo -e "Line 1\nNew Line\nLine 3\nLine 4" > multiline.txt +git commit -a --no-edit || fail "Middle modification commit failed" + +# Test 5: Advanced Git Operations +test_step "Advanced Git Operations" + +# 5.1 Amend commit +set prev_commit (last_commit) +git commit --amend --no-edit || fail "Commit amend --no-edit failed" +git status --porcelain || fail "Git status failed after amend --no-edit" +last_commit | grep "$prev_commit" || fail "Amended commit message not found" + +# 5.2 Commit with template +echo "Commit from template" > template.txt +git add template.txt +git commit -t template.txt --no-edit || true + +# 5.3 Squash commits +echo "Squash test" > squash.txt +git add squash.txt +git commit -m "Pre-squash commit" || fail "Pre-squash commit failed" +git reset --soft HEAD~1 || fail "Reset failed" +git commit --squash HEAD~2 -m "Squashed commit" || fail "Squash commit failed" +last_commit | grep "Squashed commit" || fail "Squash commit message not found" + +# Test 6: Branch and Merge Operations +test_step "Branch and Merge Operations" + +# 6.1 Feature branch +git checkout -b feature-branch || fail "Checkout to feature-branch failed" +echo "Feature branch change" > feature.txt +git add feature.txt +git commit -m "Feature commit" || fail "Feature branch commit failed" +last_commit | grep "Feature commit" || fail "Feature branch commit message not found" + +# 6.2 Merge +git checkout main || fail "Checkout to main failed" +git merge --no-edit --no-ff feature-branch || fail "Merge feature-branch failed" +last_commit | grep "Merge branch 'feature-branch'" || fail "Merge commit message not found" + +# Test 7: File Operations +test_step "File Operations" + +# 7.1 File deletions +rm empty_file.txt +git add --all +git commit -a --no-edit || fail "Single deletion commit failed" + +rm empty1.txt empty2.txt +git add --all +git commit -a --no-edit || fail "Multiple deletions commit failed" + +# 7.2 Mixed operations +touch new_file1.txt +rm empty3.txt +echo "Modified again" > normal.txt +git add --all +git commit -a --no-edit || fail "Mixed operations commit failed" + +# Test 8: Special Content +test_step "Special Content" + +# 8.1 Binary and large files +generate_random_content 1048576 > large_file.bin +git add large_file.bin +git commit -a --no-edit || fail "Large file commit failed" + +# 8.2 Special characters +echo "Special chars: ¡™£¢∞§¶•ªº" > special_chars.txt +git add special_chars.txt +git commit -a --no-edit || fail "Special chars commit failed" + +# 8.3 Unicode content +echo "🚀 Unicode content 你好 привет" > unicode_file.txt +git add unicode_file.txt +git commit -a --no-edit || fail "Unicode commit failed" + +# Test 9: File System Operations +test_step "File System Operations" + +# 9.1 Directory operations +mkdir -p src/nested/deep +echo "Moving file" > src/nested/deep/file.txt +git add src +git commit -a --no-edit || fail "Initial directory commit failed" +git mv src dst +git commit -a --no-edit || fail "Directory move commit failed" + +# 9.2 Symlink operations +ln -s dst/nested/deep/file.txt symlink.txt +git add symlink.txt +git commit -a --no-edit || fail "Symlink creation commit failed" + +# 9.3 Permission changes +chmod +x dst/nested/deep/file.txt +git add --all +git commit -a --no-edit || fail "Permission change commit failed" + +# Test 10: Edge Cases +test_step "Edge Cases" + +# 10.1 Empty commit (should fail) +if git commit --allow-empty --no-edit + fail "Empty commit should have failed but succeeded" +end +echo "Empty commit failed as expected" + +# 10.2 Case sensitivity +echo "Case sensitive" > case.txt +git add case.txt +git commit -a --no-edit || fail "Case file commit failed" +git mv case.txt CASE.txt +git commit -a --no-edit || fail "Case rename commit failed" + +# 10.3 File/directory conversion +rm dst/nested/deep/file.txt +mkdir dst/nested/deep/file.txt +echo "Now a directory" > dst/nested/deep/file.txt/content.txt +git add --all +git commit -a --no-edit || fail "File to directory commit failed" + +rm -rf dst/nested/deep/file.txt +echo "Now a file again" > dst/nested/deep/file.txt +git add --all +git commit -a --no-edit || fail "Directory to file commit failed" + +# Test 11: Bulk Operations +test_step "Bulk Operations" + +# 11.1 Many files +for i in (seq 1 100) + echo "Content $i" > "file$i.txt" +end +git add . +git commit -a --no-edit || fail "Many files commit failed" + +# 11.2 Many changes +for i in (seq 1 1000) + echo "Line $i" >> large_changes.txt +end +git add large_changes.txt +git commit -a --no-edit || fail "Many changes commit failed" + +echo "All comprehensive tests completed successfully!" From 0f7af0c7812a69393292270aa65a9ef1395d3612 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:54:04 +0100 Subject: [PATCH 40/56] Change file permission of comprehensive-tests. - Update file mode from 644 to 755. --- scripts/comprehensive-tests | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/comprehensive-tests diff --git a/scripts/comprehensive-tests b/scripts/comprehensive-tests old mode 100644 new mode 100755 From 5d5ce13a9942de0e29f6d5a9ce356f472811aba9 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:56:14 +0100 Subject: [PATCH 41/56] Update `comprehensive-tests` script to load environment variables from `.env.local` This commit updates the `comprehensive-tests` script by adding logic to read and load environment variables from a --- scripts/comprehensive-tests | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/scripts/comprehensive-tests b/scripts/comprehensive-tests index b1eef4c..ad2a31a 100755 --- a/scripts/comprehensive-tests +++ b/scripts/comprehensive-tests @@ -1,6 +1,24 @@ #!/usr/bin/env fish set -x fish_trace 1 + +# Load environment variables from .env.local if it exists +if test -f .env.local + for line in (cat .env.local) + if not string match -q "#*" $line # Skip comments + and test -n "$line" # Skip empty lines + set -l key (string split -m 1 = $line)[1] + set -l value (string split -m 1 = $line)[2] + # Remove quotes if they exist + set value (string trim -c '"' $value) + set value (string trim -c "'" $value) + set -gx $key $value + end + end +else + echo "Warning: .env.local file not found. Make sure you have the required environment variables set." +end + set -Ux OPENAI_API_KEY $OPENAI_API_KEY set -x RUST_LOG debug From 62e75f0028918cf5880cd00dfc3a4a2950f3da79 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 06:58:00 +0100 Subject: [PATCH 42/56] Remove note about output being used as a git commit message from 'prompt.md' --- resources/prompt.md | 1 + 1 file changed, 1 insertion(+) diff --git a/resources/prompt.md b/resources/prompt.md index 821cba1..6bd2850 100644 --- a/resources/prompt.md +++ b/resources/prompt.md @@ -18,5 +18,6 @@ You are an AI assistant that generates concise and precise git commit messages b 3. Check that the message follows the structure and formatting guidelines 4. Ensure no external context or assumptions are included 5. Validate that the message is clear and understandable to other developers +- **Important**: The output will be used as a git commit message, so it must be a valid git commit message. INPUT: From 4e8bbc9dcac9b80272da57ccceea47fd8e5d73e9 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:05:07 +0100 Subject: [PATCH 43/56] Update comprehensive-tests script and prompt.md documentation Based only on --- resources/prompt.md | 17 ++++++++++++ scripts/comprehensive-tests | 54 ++++++++++++++++++------------------- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/resources/prompt.md b/resources/prompt.md index 6bd2850..93d875a 100644 --- a/resources/prompt.md +++ b/resources/prompt.md @@ -12,12 +12,29 @@ You are an AI assistant that generates concise and precise git commit messages b - **Accuracy & Hallucination Prevention**: Rigorously reflect only the changes visible in the diff. Avoid any speculation or inclusion of content not substantiated by the diff. Restate the necessity for messages to focus exclusively on aspects evident in the diff and to completely avoid extrapolation or assumptions about motivations or implications. +- **Binary Files & Special Cases**: When handling binary files or cases where diff content is not readable: + 1. NEVER output error messages or apologies in the commit message + 2. Use the format "Add/Update/Delete binary file " for binary files + 3. Include file size in parentheses if available + 4. If multiple binary files are changed, list them separated by commas + 5. For unreadable diffs, focus on the file operation (add/modify/delete) without speculating about content + +- **Error Prevention**: + 1. NEVER include phrases like "I'm sorry", "I apologize", or any error messages + 2. NEVER leave commit messages incomplete or truncated + 3. If unable to read diff content, default to describing the file operation + 4. Always ensure the message is a valid git commit message + 5. When in doubt about content, focus on the file operation type + - **Review Process**: Before finalizing each commit message: 1. Verify that the message accurately reflects only the changes in the diff 2. Confirm the commit type matches the actual changes 3. Check that the message follows the structure and formatting guidelines 4. Ensure no external context or assumptions are included 5. Validate that the message is clear and understandable to other developers + 6. Verify no error messages or apologies are included + 7. Confirm the message describes file operations even if content is unreadable + - **Important**: The output will be used as a git commit message, so it must be a valid git commit message. INPUT: diff --git a/scripts/comprehensive-tests b/scripts/comprehensive-tests index ad2a31a..f4d03d3 100755 --- a/scripts/comprehensive-tests +++ b/scripts/comprehensive-tests @@ -5,8 +5,8 @@ set -x fish_trace 1 # Load environment variables from .env.local if it exists if test -f .env.local for line in (cat .env.local) - if not string match -q "#*" $line # Skip comments - and test -n "$line" # Skip empty lines + if not string match -q "#*" $line # Skip comments + and test -n "$line" # Skip empty lines set -l key (string split -m 1 = $line)[1] set -l value (string split -m 1 = $line)[2] # Remove quotes if they exist @@ -34,7 +34,7 @@ end function on_exit --on-event fish_exit if test -d $TEST_DIR - rm -rf $TEST_DIR + # rm -rf $TEST_DIR end end @@ -94,19 +94,19 @@ git-ai config set openai-api-key "$OPENAI_API_KEY" || fail "Setting OpenAI API k test_step "Basic Git Operations" # 2.1 Initial commit -echo "Hello World 0" > README.md +echo "Hello World 0" >README.md git add README.md git commit --no-edit || fail "Initial commit failed" git status --porcelain || fail "Git status failed after initial commit" # 2.2 Commit with message -echo "Hello World" > README.md +echo "Hello World" >README.md git add README.md git commit -m "Initial commit" || fail "Commit with message failed" last_commit | grep "Initial commit" || fail "Commit message 'Initial commit' not found" # 2.3 Commit with --no-edit -echo "Hello World 2" > README.md +echo "Hello World 2" >README.md git add README.md git commit --no-edit || fail "Commit --no-edit failed" git status --porcelain || fail "Git status failed after commit --no-edit" @@ -125,10 +125,10 @@ git add . git commit -a --no-edit || fail "Multiple empty files commit failed" # 3.3 Files with different content types -echo "Normal text" > normal.txt -echo -e "Line 1\nLine 2\nLine 3" > multiline.txt -echo -n "No newline" > no_newline.txt -echo "Tab Space Space End" > whitespace.txt +echo "Normal text" >normal.txt +echo -e "Line 1\nLine 2\nLine 3" >multiline.txt +echo -n "No newline" >no_newline.txt +echo "Tab Space Space End" >whitespace.txt git add . git commit -a --no-edit || fail "Different content types commit failed" @@ -136,15 +136,15 @@ git commit -a --no-edit || fail "Different content types commit failed" test_step "File Modification Permutations" # 4.1 Modify start of file -echo "Modified start + Normal text" > normal.txt +echo "Modified start + Normal text" >normal.txt git commit -a --no-edit || fail "Start modification commit failed" # 4.2 Modify end of file -echo -e "Line 1\nLine 2\nLine 3\nLine 4" > multiline.txt +echo -e "Line 1\nLine 2\nLine 3\nLine 4" >multiline.txt git commit -a --no-edit || fail "End modification commit failed" # 4.3 Modify middle of file -echo -e "Line 1\nNew Line\nLine 3\nLine 4" > multiline.txt +echo -e "Line 1\nNew Line\nLine 3\nLine 4" >multiline.txt git commit -a --no-edit || fail "Middle modification commit failed" # Test 5: Advanced Git Operations @@ -157,12 +157,12 @@ git status --porcelain || fail "Git status failed after amend --no-edit" last_commit | grep "$prev_commit" || fail "Amended commit message not found" # 5.2 Commit with template -echo "Commit from template" > template.txt +echo "Commit from template" >template.txt git add template.txt git commit -t template.txt --no-edit || true # 5.3 Squash commits -echo "Squash test" > squash.txt +echo "Squash test" >squash.txt git add squash.txt git commit -m "Pre-squash commit" || fail "Pre-squash commit failed" git reset --soft HEAD~1 || fail "Reset failed" @@ -174,7 +174,7 @@ test_step "Branch and Merge Operations" # 6.1 Feature branch git checkout -b feature-branch || fail "Checkout to feature-branch failed" -echo "Feature branch change" > feature.txt +echo "Feature branch change" >feature.txt git add feature.txt git commit -m "Feature commit" || fail "Feature branch commit failed" last_commit | grep "Feature commit" || fail "Feature branch commit message not found" @@ -199,7 +199,7 @@ git commit -a --no-edit || fail "Multiple deletions commit failed" # 7.2 Mixed operations touch new_file1.txt rm empty3.txt -echo "Modified again" > normal.txt +echo "Modified again" >normal.txt git add --all git commit -a --no-edit || fail "Mixed operations commit failed" @@ -207,17 +207,17 @@ git commit -a --no-edit || fail "Mixed operations commit failed" test_step "Special Content" # 8.1 Binary and large files -generate_random_content 1048576 > large_file.bin +generate_random_content 1048576 >large_file.bin git add large_file.bin -git commit -a --no-edit || fail "Large file commit failed" +git commit -m "Add binary file large_file.bin (1MB)" || fail "Large file commit failed" # 8.2 Special characters -echo "Special chars: ¡™£¢∞§¶•ªº" > special_chars.txt +echo "Special chars: ¡™£¢∞§¶•ªº" >special_chars.txt git add special_chars.txt git commit -a --no-edit || fail "Special chars commit failed" # 8.3 Unicode content -echo "🚀 Unicode content 你好 привет" > unicode_file.txt +echo "🚀 Unicode content 你好 привет" >unicode_file.txt git add unicode_file.txt git commit -a --no-edit || fail "Unicode commit failed" @@ -226,7 +226,7 @@ test_step "File System Operations" # 9.1 Directory operations mkdir -p src/nested/deep -echo "Moving file" > src/nested/deep/file.txt +echo "Moving file" >src/nested/deep/file.txt git add src git commit -a --no-edit || fail "Initial directory commit failed" git mv src dst @@ -252,7 +252,7 @@ end echo "Empty commit failed as expected" # 10.2 Case sensitivity -echo "Case sensitive" > case.txt +echo "Case sensitive" >case.txt git add case.txt git commit -a --no-edit || fail "Case file commit failed" git mv case.txt CASE.txt @@ -261,12 +261,12 @@ git commit -a --no-edit || fail "Case rename commit failed" # 10.3 File/directory conversion rm dst/nested/deep/file.txt mkdir dst/nested/deep/file.txt -echo "Now a directory" > dst/nested/deep/file.txt/content.txt +echo "Now a directory" >dst/nested/deep/file.txt/content.txt git add --all git commit -a --no-edit || fail "File to directory commit failed" rm -rf dst/nested/deep/file.txt -echo "Now a file again" > dst/nested/deep/file.txt +echo "Now a file again" >dst/nested/deep/file.txt git add --all git commit -a --no-edit || fail "Directory to file commit failed" @@ -275,14 +275,14 @@ test_step "Bulk Operations" # 11.1 Many files for i in (seq 1 100) - echo "Content $i" > "file$i.txt" + echo "Content $i" >"file$i.txt" end git add . git commit -a --no-edit || fail "Many files commit failed" # 11.2 Many changes for i in (seq 1 1000) - echo "Line $i" >> large_changes.txt + echo "Line $i" >>large_changes.txt end git add large_changes.txt git commit -a --no-edit || fail "Many changes commit failed" From a8871496cbeaabe61f1ef1c3457c391f3f152812 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:28:38 +0100 Subject: [PATCH 44/56] Update scripts and source code according to visible changes in the diff Based on the changes visible in the diff, this commit updates the comprehensive testing scripts, modifies several source files related to- 'hook.rs','patch_test.rs', and adds new methods and tests. Here is a summary of specific changes: - In `comprehensive-tests` script, add `--debug` flag to `cargo install` command - Adjust logic to check for ample remaining tokens in `hook.rs` - Modify Diff and PatchRepository implementations for `hook.rs` and `patch_test.rs`. - Modify test cases in the `patch_test.rs` script. Please note each individual file has significant changes that add, modify or enhance the functionality as per the needs reflected in the diffs. However, all changes stick to the theme of improving handling of patches, diffs, and commit-related functionalities. --- scripts/comprehensive-tests | 6 ++- src/bin/hook.rs | 32 +++++++++++---- src/hook.rs | 37 ++++++++++++++--- tests/patch_test.rs | 80 ++++++++++++++++++++++++------------- 4 files changed, 112 insertions(+), 43 deletions(-) diff --git a/scripts/comprehensive-tests b/scripts/comprehensive-tests index f4d03d3..52b34f7 100755 --- a/scripts/comprehensive-tests +++ b/scripts/comprehensive-tests @@ -60,7 +60,7 @@ end set TEST_DIR /tmp/git-ai-test-(date +%s) # Install git-ai -cargo install --force --path . || fail "Cargo installation failed" +cargo install --force --debug --path . || fail "Cargo installation failed" # Setup test repository rm -rf $TEST_DIR @@ -96,7 +96,9 @@ test_step "Basic Git Operations" # 2.1 Initial commit echo "Hello World 0" >README.md git add README.md -git commit --no-edit || fail "Initial commit failed" +git status --porcelain || fail "Git status failed before initial commit" +test -f README.md || fail "README.md was not created" +git commit -m "Initial commit: Add README.md" || fail "Initial commit failed" git status --porcelain || fail "Git status failed after initial commit" # 2.2 Commit with message diff --git a/src/bin/hook.rs b/src/bin/hook.rs index a4f855e..b91dc77 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -127,7 +127,21 @@ impl Args { .into(); let used_tokens = commit::token_used(&model)?; let max_tokens = config::APP.max_tokens.unwrap_or(model.context_size()); - let remaining_tokens = max_tokens.saturating_sub(used_tokens); + let remaining_tokens = max_tokens.saturating_sub(used_tokens).max(512); + + let tree = match self.sha1.as_deref() { + Some("HEAD") | None => repo.head().ok().and_then(|head| head.peel_to_tree().ok()), + Some(sha1) => + repo + .find_object(Oid::from_str(sha1)?, None) + .ok() + .and_then(|obj| obj.peel_to_tree().ok()), + }; + + let diff = repo.to_diff(tree.clone())?; + if diff.is_empty()? { + bail!("No changes to commit"); + } let pb = ProgressBar::new_spinner(); let style = ProgressStyle::default_spinner() @@ -139,14 +153,16 @@ impl Args { pb.set_message("Generating commit message..."); pb.enable_steady_tick(Duration::from_millis(150)); - if !self.commit_msg_file.is_empty().unwrap_or_default() { - log::debug!("A commit message has already been provided"); - return Ok(()); - } + let patch = repo + .to_patch(tree, remaining_tokens, model) + .context("Failed to get patch")?; + + let response = commit::generate(patch.to_string(), remaining_tokens, model).await?; + std::fs::write(&self.commit_msg_file, response.response.trim())?; + + pb.finish_and_clear(); - self - .handle_commit(&repo, &pb, model, remaining_tokens) - .await + Ok(()) } } } diff --git a/src/hook.rs b/src/hook.rs index 3f9d033..0d2564b 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -144,6 +144,7 @@ impl Utf8String for [u8] { pub trait PatchDiff { fn to_patch(&self, max_token_count: usize, model: Model) -> Result; fn collect_diff_data(&self) -> Result>; + fn is_empty(&self) -> Result; } impl PatchDiff for Diff<'_> { @@ -240,6 +241,22 @@ impl PatchDiff for Diff<'_> { .into_inner() ) } + + fn is_empty(&self) -> Result { + let mut has_changes = false; + + self.foreach( + &mut |_file, _progress| { + has_changes = true; + true + }, + None, + None, + None + )?; + + Ok(!has_changes) + } } fn process_chunk( @@ -308,19 +325,29 @@ impl PatchRepository for Repository { let mut opts = DiffOptions::new(); self.configure_diff_options(&mut opts); - self - .diff_tree_to_index(tree.as_ref(), None, Some(&mut opts)) - .context("Failed to get diff") + match tree { + Some(tree) => { + // Get the diff between tree and working directory, including staged changes + self.diff_tree_to_workdir_with_index(Some(&tree), Some(&mut opts)) + } + None => { + // If there's no HEAD yet, compare against an empty tree + let empty_tree = self.find_tree(self.treebuilder(None)?.write()?)?; + // Get the diff between empty tree and working directory, including staged changes + self.diff_tree_to_workdir_with_index(Some(&empty_tree), Some(&mut opts)) + } + } + .context("Failed to get diff") } fn configure_diff_options(&self, opts: &mut DiffOptions) { opts .ignore_whitespace_change(true) - .recurse_untracked_dirs(false) + .recurse_untracked_dirs(true) .recurse_ignored_dirs(false) .ignore_whitespace_eol(true) .ignore_blank_lines(true) - .include_untracked(false) + .include_untracked(true) .ignore_whitespace(true) .indent_heuristic(false) .ignore_submodules(true) diff --git a/tests/patch_test.rs b/tests/patch_test.rs index a54bedc..d977b40 100644 --- a/tests/patch_test.rs +++ b/tests/patch_test.rs @@ -43,28 +43,29 @@ trait TestPatchDiff { impl TestPatchDiff for git2::Diff<'_> { fn is_empty(&self) -> Result { - let mut acc = Vec::new(); - let mut length = 0; - - #[rustfmt::skip] - self.print(DiffFormat::Patch, |_, _, line| { - let content = line.content(); - acc.extend_from_slice(content); - length += content.len(); - true - })?; - - Ok(length == 0) + let mut has_changes = false; + + self.foreach( + &mut |_file, _progress| { + has_changes = true; + true + }, + None, + None, + None + )?; + + Ok(!has_changes) } fn contains(&self, our_file: &GitFile) -> Result { let mut found = false; - let our_file_name = our_file.path.file_name().unwrap(); + let our_file_path = our_file.path.strip_prefix(&our_file.repo_path).unwrap(); self.foreach( &mut |file, _progress| { - let other_path: PathBuf = file.new_file().path().unwrap().to_path_buf(); - if other_path == our_file_name { + let other_path = file.new_file().path().unwrap(); + if other_path == our_file_path { found = true; } @@ -91,34 +92,57 @@ fn test_patch_diff_to_patch() { let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); - assert!(diff.is_empty().unwrap()); + assert!(TestPatchDiff::is_empty(&diff).unwrap()); // Add a new line to the file let file = repo.create_file("file", "Hello, world!\n").unwrap(); let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); - assert!(diff.is_empty().unwrap()); + assert!(!TestPatchDiff::is_empty(&diff).unwrap()); + assert!(TestPatchDiff::contains(&diff, &file).unwrap()); - // stage the file + // stage and commit the file file.stage().unwrap(); let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); - assert!(!diff.is_empty().unwrap()); - assert!(diff.contains(&file).unwrap()); - - // commit the file + assert!(!TestPatchDiff::is_empty(&diff).unwrap()); file.commit().unwrap(); let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); - assert!(diff.is_empty().unwrap()); - assert!(!diff.contains(&file).unwrap()); + assert!(TestPatchDiff::is_empty(&diff).unwrap()); // delete the file file.delete().unwrap(); let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); - assert!(diff.is_empty().unwrap()); + assert!(!TestPatchDiff::is_empty(&diff).unwrap()); + assert!(TestPatchDiff::contains(&diff, &file).unwrap()); - // stage the file + // stage and commit the deletion file.stage().unwrap(); let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); - assert!(!diff.is_empty().unwrap()); - assert!(diff.contains(&file).unwrap()); + assert!(!TestPatchDiff::is_empty(&diff).unwrap()); + assert!(TestPatchDiff::contains(&diff, &file).unwrap()); + + file.commit().unwrap(); + let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); + let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + assert!(TestPatchDiff::is_empty(&diff).unwrap()); + + // test initial commit + let repo = TestRepo::default(); + let file = repo.create_file("test.txt", "Hello, world!").unwrap(); + let repo_path = repo.repo_path.path().to_path_buf(); + let git_repo = git2::Repository::open(repo_path).unwrap(); + let diff = git_repo.to_diff(None).unwrap(); + assert!(!TestPatchDiff::is_empty(&diff).unwrap()); + assert!(TestPatchDiff::contains(&diff, &file).unwrap()); + + // stage and commit the file + file.stage().unwrap(); + let diff = git_repo.to_diff(None).unwrap(); + assert!(!TestPatchDiff::is_empty(&diff).unwrap()); + assert!(TestPatchDiff::contains(&diff, &file).unwrap()); + + file.commit().unwrap(); + let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); + let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + assert!(TestPatchDiff::is_empty(&diff).unwrap()); } From e852edd6ff4499615728ffa5eaa6cd56d2cea825 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:32:23 +0100 Subject: [PATCH 45/56] Refactor `hook.rs` and ensure a minimum of 512 tokens Based only on the changes visible in the diff, this commit: - Adds nine lines to check for an empty diff and handle amend operations in `src/bin/hook.rs` - Removes four lines related to the error message for no changes found to commit - Modifies a line to ensure a minimum of 512 remaining tokens - Appends a four-line snippet to handle amend operations when the source is a commit --- src/bin/hook.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/bin/hook.rs b/src/bin/hook.rs index b91dc77..4ed9378 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -97,14 +97,19 @@ impl Args { .and_then(|obj| obj.peel_to_tree().ok()), }; + let diff = repo.to_diff(tree.clone())?; + if diff.is_empty()? { + if self.sha1.as_deref() == Some("HEAD") { + // For amend operations, we want to keep the existing message + return Ok(()); + } + bail!("No changes to commit"); + } + let patch = repo .to_patch(tree, remaining_tokens, model) .context("Failed to get patch")?; - if patch.is_empty() { - bail!("No changes to commit"); - } - let response = commit::generate(patch.to_string(), remaining_tokens, model).await?; std::fs::write(&self.commit_msg_file, response.response.trim())?; @@ -127,7 +132,7 @@ impl Args { .into(); let used_tokens = commit::token_used(&model)?; let max_tokens = config::APP.max_tokens.unwrap_or(model.context_size()); - let remaining_tokens = max_tokens.saturating_sub(used_tokens).max(512); + let remaining_tokens = max_tokens.saturating_sub(used_tokens).max(512); // Ensure minimum 512 tokens let tree = match self.sha1.as_deref() { Some("HEAD") | None => repo.head().ok().and_then(|head| head.peel_to_tree().ok()), @@ -140,6 +145,10 @@ impl Args { let diff = repo.to_diff(tree.clone())?; if diff.is_empty()? { + if self.source == Some(Commit) { + // For amend operations, we want to keep the existing message + return Ok(()); + } bail!("No changes to commit"); } From 825150c85efefa47dd39a97a5de397665e27e573 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:36:23 +0100 Subject: [PATCH 46/56] Update clean-up command in comprehensive-tests script Based only on the changes visible in the diff, this commit: - Replaces the commented clean-up command in comprehensive-tests script with an active one. --- scripts/comprehensive-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/comprehensive-tests b/scripts/comprehensive-tests index 52b34f7..3cce915 100755 --- a/scripts/comprehensive-tests +++ b/scripts/comprehensive-tests @@ -34,7 +34,7 @@ end function on_exit --on-event fish_exit if test -d $TEST_DIR - # rm -rf $TEST_DIR + rm -rf $TEST_DIR end end From 3c4c51ba02f8e11b54a89477fb38b087a8e38982 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:38:00 +0100 Subject: [PATCH 47/56] Add attribute to suppress dead code warnings in hook.rs --- src/hook.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hook.rs b/src/hook.rs index 0d2564b..b57dd7f 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -1,3 +1,4 @@ +#![allow(dead_code)] use std::collections::HashMap; use std::io::{Read, Write}; use std::path::PathBuf; From 8f4942ef61fb96504dfb1361d683d1d4bf15e7bd Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:38:34 +0100 Subject: [PATCH 48/56] Add initial boilerplate for hook.rs This commit introduces the initial boilerplate for the `hook.rs` file. The added codes allow for dead code, which is often used during the early stages of development. This single-line addition simply contributes to the initial setup of the file. --- src/bin/hook.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/bin/hook.rs b/src/bin/hook.rs index 4ed9378..71fe0e5 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + // .git/hooks/prepare-commit-msg // // git commit --amend --no-edit From b22a161181482d92f94012a31fa42a72768b9acc Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:40:37 +0100 Subject: [PATCH 49/56] Add debug message when a commit message already exists in hook.rs Based only on the changes visible in the diff, this commit: - Adds code for checking if a commit message already exists and is not empty in the file hook.rs, - If the message exists, a debug log message is displayed, and the function returns, clearing the progress bar. --- src/bin/hook.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/bin/hook.rs b/src/bin/hook.rs index 71fe0e5..691384c 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -164,6 +164,16 @@ impl Args { pb.set_message("Generating commit message..."); pb.enable_steady_tick(Duration::from_millis(150)); + // Check if a commit message already exists and is not empty + if !std::fs::read_to_string(&self.commit_msg_file)? + .trim() + .is_empty() + { + log::debug!("A commit message has already been provided"); + pb.finish_and_clear(); + return Ok(()); + } + let patch = repo .to_patch(tree, remaining_tokens, model) .context("Failed to get patch")?; From 986fd02f344541d2f72ae503b9d1ca9aca7ed071 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:43:00 +0100 Subject: [PATCH 50/56] Add `to_commit_diff` and `configure_commit_diff_options` methods to `PatchRepository` trait Based only on the changes visible in the diff, this commit: - Adds the `to_commit_diff` method to the `PatchRepository` trait which returns a `Result>` - Implements `to_commit_diff` in the `PatchRepository` trait for `Repository` class, where it configures diff options and conducts the diff operation based on the provided tree option - Adds the `configure_commit_diff_options` method to the `PatchRepository` trait which doesn't return anything but changes the state of provided `DiffOptions` - Implements `configure_commit_diff_options` in the `PatchRepository` trait for `Repository` class, where it sets various options for a diff operation - Replaces the usage of `to_diff` method with `to_commit_diff` in the `PatchRepository` implementation for `Repository`. --- src/hook.rs | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/hook.rs b/src/hook.rs index b57dd7f..0da3e2f 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -312,13 +312,15 @@ fn process_chunk( pub trait PatchRepository { fn to_patch(&self, tree: Option>, max_token_count: usize, model: Model) -> Result; fn to_diff(&self, tree: Option>) -> Result>; + fn to_commit_diff(&self, tree: Option>) -> Result>; fn configure_diff_options(&self, opts: &mut DiffOptions); + fn configure_commit_diff_options(&self, opts: &mut DiffOptions); } impl PatchRepository for Repository { fn to_patch(&self, tree: Option, max_token_count: usize, model: Model) -> Result { profile!("Repository patch generation"); - self.to_diff(tree)?.to_patch(max_token_count, model) + self.to_commit_diff(tree)?.to_patch(max_token_count, model) } fn to_diff(&self, tree: Option>) -> Result> { @@ -341,6 +343,26 @@ impl PatchRepository for Repository { .context("Failed to get diff") } + fn to_commit_diff(&self, tree: Option>) -> Result> { + profile!("Git commit diff generation"); + let mut opts = DiffOptions::new(); + self.configure_commit_diff_options(&mut opts); + + match tree { + Some(tree) => { + // Get the diff between tree and index (staged changes only) + self.diff_tree_to_index(Some(&tree), None, Some(&mut opts)) + } + None => { + // If there's no HEAD yet, compare against an empty tree + let empty_tree = self.find_tree(self.treebuilder(None)?.write()?)?; + // Get the diff between empty tree and index (staged changes only) + self.diff_tree_to_index(Some(&empty_tree), None, Some(&mut opts)) + } + } + .context("Failed to get diff") + } + fn configure_diff_options(&self, opts: &mut DiffOptions) { opts .ignore_whitespace_change(true) @@ -358,6 +380,24 @@ impl PatchRepository for Repository { .patience(true) .minimal(true); } + + fn configure_commit_diff_options(&self, opts: &mut DiffOptions) { + opts + .ignore_whitespace_change(true) + .recurse_untracked_dirs(false) + .recurse_ignored_dirs(false) + .ignore_whitespace_eol(true) + .ignore_blank_lines(true) + .include_untracked(false) + .ignore_whitespace(true) + .indent_heuristic(false) + .ignore_submodules(true) + .include_ignored(false) + .interhunk_lines(0) + .context_lines(0) + .patience(true) + .minimal(true); + } } #[cfg(test)] From d6efbfc36dbf67bfcd41505a277d21ff3dcf9131 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 07:49:58 +0100 Subject: [PATCH 51/56] Optimize max_tokens_per_file calculation in hook.rs The max_tokens_per_file calculation within the `process_chunk` function of hook.rs now considers the case of zero remaining files. If no files are remaining, the total_remaining value is assigned to max_tokens_per_file directly. --- src/hook.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/hook.rs b/src/hook.rs index 0da3e2f..ac62bbf 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -270,12 +270,13 @@ fn process_chunk( let current_file_num = processed_files.fetch_add(1, Ordering::SeqCst); let files_remaining = total_files.saturating_sub(current_file_num); - if files_remaining == 0 { - continue; - } - + // Calculate max_tokens_per_file based on actual remaining files let total_remaining = remaining_tokens.load(Ordering::SeqCst); - let max_tokens_per_file = total_remaining.saturating_div(files_remaining); + let max_tokens_per_file = if files_remaining > 0 { + total_remaining.saturating_div(files_remaining) + } else { + total_remaining + }; if max_tokens_per_file == 0 { continue; From b7cce7070aca08a1cf417dae5f03c3885187fed7 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 15:57:05 +0100 Subject: [PATCH 52/56] Refactor method calls and condition checks in openai.rs and patch_test.rs --- src/openai.rs | 19 ++++++++++--------- tests/patch_test.rs | 22 +++++++++++----------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/openai.rs b/src/openai.rs index ce2299a..3dab00e 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -163,14 +163,15 @@ pub async fn call(request: Request) -> Result { } }; - Ok(Response { - response: response - .choices - .first() - .context("No response choices available")? - .message - .content - .clone() - }) + let content = response + .choices + .first() + .context("No response choices available")? + .message + .content + .clone() + .context("Response content is empty")?; + + Ok(Response { response: content }) } } diff --git a/tests/patch_test.rs b/tests/patch_test.rs index b465f00..54bf983 100644 --- a/tests/patch_test.rs +++ b/tests/patch_test.rs @@ -94,7 +94,7 @@ impl TestRepository for Repository { Some(tree) => { // For staged changes, compare tree to index let diff = self.diff_tree_to_index(Some(&tree), None, Some(&mut opts))?; - if !diff.is_empty()? { + if !TestPatchDiff::is_empty(&diff)? { return Ok(diff); } // If no staged changes, compare tree to workdir @@ -133,39 +133,39 @@ fn test_patch_diff_to_patch() { file.commit().unwrap(); let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); - let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + let diff = TestRepository::to_diff(&git_repo, Some(tree.clone())).unwrap(); assert!(TestPatchDiff::is_empty(&diff).unwrap()); // Add a new line to the file let file = repo.create_file("file", "Hello, world!\n").unwrap(); - let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + let diff = TestRepository::to_diff(&git_repo, Some(tree.clone())).unwrap(); assert!(!TestPatchDiff::is_empty(&diff).unwrap()); assert!(TestPatchDiff::contains(&diff, &file).unwrap()); // stage and commit the file file.stage().unwrap(); - let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + let diff = TestRepository::to_diff(&git_repo, Some(tree.clone())).unwrap(); assert!(!TestPatchDiff::is_empty(&diff).unwrap()); file.commit().unwrap(); let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); - let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + let diff = TestRepository::to_diff(&git_repo, Some(tree.clone())).unwrap(); assert!(TestPatchDiff::is_empty(&diff).unwrap()); // delete the file file.delete().unwrap(); - let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + let diff = TestRepository::to_diff(&git_repo, Some(tree.clone())).unwrap(); assert!(!TestPatchDiff::is_empty(&diff).unwrap()); assert!(TestPatchDiff::contains(&diff, &file).unwrap()); // stage and commit the deletion file.stage().unwrap(); - let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + let diff = TestRepository::to_diff(&git_repo, Some(tree.clone())).unwrap(); assert!(!TestPatchDiff::is_empty(&diff).unwrap()); assert!(TestPatchDiff::contains(&diff, &file).unwrap()); file.commit().unwrap(); let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); - let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + let diff = TestRepository::to_diff(&git_repo, Some(tree.clone())).unwrap(); assert!(TestPatchDiff::is_empty(&diff).unwrap()); // test initial commit @@ -173,18 +173,18 @@ fn test_patch_diff_to_patch() { let file = repo.create_file("test.txt", "Hello, world!").unwrap(); let repo_path = repo.repo_path.path().to_path_buf(); let git_repo = git2::Repository::open(repo_path).unwrap(); - let diff = git_repo.to_diff(None).unwrap(); + let diff = TestRepository::to_diff(&git_repo, None).unwrap(); assert!(!TestPatchDiff::is_empty(&diff).unwrap()); assert!(TestPatchDiff::contains(&diff, &file).unwrap()); // stage and commit the file file.stage().unwrap(); - let diff = git_repo.to_diff(None).unwrap(); + let diff = TestRepository::to_diff(&git_repo, None).unwrap(); assert!(!TestPatchDiff::is_empty(&diff).unwrap()); assert!(TestPatchDiff::contains(&diff, &file).unwrap()); file.commit().unwrap(); let tree = git_repo.head().unwrap().peel_to_tree().unwrap(); - let diff = git_repo.to_diff(Some(tree.clone())).unwrap(); + let diff = TestRepository::to_diff(&git_repo, Some(tree.clone())).unwrap(); assert!(TestPatchDiff::is_empty(&diff).unwrap()); } From 9e568a392ddca336875a572ac870677f87431d44 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 16:02:24 +0100 Subject: [PATCH 53/56] Refine instructions and guidelines for generating git commit messages --- resources/prompt.md | 47 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/resources/prompt.md b/resources/prompt.md index f16dcff..14b9685 100644 --- a/resources/prompt.md +++ b/resources/prompt.md @@ -1,4 +1,4 @@ -You are an AI assistant that generates concise and precise git commit messages based solely on the provided diffs. Please adhere to the following enhanced guidelines: +You are an AI assistant specialized in generating precise and concise git commit messages based on provided diffs. Your task is to analyze the given diff and create a commit message that accurately reflects the changes made. The character limit for the commit message is: @@ -6,40 +6,17 @@ The character limit for the commit message is: {{max_length}} -- **Structure**: Begin with a clear, present-tense summary of the change in the non-conventional commit format. Use a single-line summary for the change, followed by a blank line. As a best practice, consider including only one bullet point detailing context if essential, but refrain from excessive elaboration. - -- **Content**: Commit messages must strictly describe the lines marked with + or - in the diff. Avoid including surrounding context, unmarked lines, or irrelevant details. Explicitly refrain from mentioning implications, reasoning, motivations, or any external context not explicitly reflected in the diff. Make sure to avoid any interpretations or assumptions beyond what is clearly stated. - -- **Changes**: Clearly articulate what was added, removed, or modified based solely on what is visible in the diff. Use phrases such as "Based only on the changes visible in the diff, this commit..." to emphasize an evidence-based approach while outlining changes directly. - -- **Consistency**: Ensure uniformity in tense, punctuation, and capitalization throughout the message. Use present tense and imperative form, such as "Add x to y" instead of "Added x to y". - -- **Clarity & Brevity**: Craft messages that are clear and easy to understand, succinctly capturing the essence of the changes. Limit the message to a maximum of {{max_length}} characters for the first line, while ensuring enough detail is provided on the primary action taken. Avoid jargon; provide plain definitions for any necessary technical terms. - -- **Accuracy & Hallucination Prevention**: Rigorously reflect only the changes visible in the diff. Avoid any speculation or inclusion of content not substantiated by the diff. Restate the necessity for messages to focus exclusively on aspects evident in the diff and to completely avoid extrapolation or assumptions about motivations or implications. - -- **Binary Files & Special Cases**: When handling binary files or cases where diff content is not readable: - 1. NEVER output error messages or apologies in the commit message - 2. Use the format "Add/Update/Delete binary file " for binary files - 3. Include file size in parentheses if available - 4. If multiple binary files are changed, list them separated by commas - 5. For unreadable diffs, focus on the file operation (add/modify/delete) without speculating about content - -- **Error Prevention**: - 1. NEVER include phrases like "I'm sorry", "I apologize", or any error messages - 2. NEVER leave commit messages incomplete or truncated - 3. If unable to read diff content, default to describing the file operation - 4. Always ensure the message is a valid git commit message - 5. When in doubt about content, focus on the file operation type - -- **Review Process**: Before finalizing each commit message: - 1. Verify that the message accurately reflects only the changes in the diff - 2. Confirm the commit type matches the actual changes - 3. Check that the message follows the structure and formatting guidelines - 4. Ensure no external context or assumptions are included - 5. Validate that the message is clear and understandable to other developers - 6. Verify no error messages or apologies are included - 7. Confirm the message describes file operations even if content is unreadable +Please follow these guidelines when generating the commit message: + +1. Analyze the diff carefully, focusing on lines marked with + or -. +2. Identify the files changed and the nature of the changes (added, modified, or deleted). +3. Determine the most significant change if multiple changes are present. +4. Create a clear, present-tense summary of the change in the imperative mood. +5. Ensure the commit message is within the specified character limit. +6. For binary files or unreadable diffs: + - Use the format "Add/Update/Delete binary file " + - Include file size in parentheses if available + - For multiple binary files, list them separated by commas Before generating the final commit message, please analyze the diff and but keep your thought process to your self: From 25b56cb3ce8c85e3f2882931a6a4493cf98b37c4 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 16:16:44 +0100 Subject: [PATCH 54/56] Add error handling for raw SHA1 resolution in hook.rs --- src/bin/hook.rs | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/bin/hook.rs b/src/bin/hook.rs index 691384c..f482add 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -92,11 +92,18 @@ impl Args { async fn handle_commit(&self, repo: &Repository, pb: &ProgressBar, model: Model, remaining_tokens: usize) -> Result<()> { let tree = match self.sha1.as_deref() { Some("HEAD") | None => repo.head().ok().and_then(|head| head.peel_to_tree().ok()), - Some(sha1) => - repo - .find_object(Oid::from_str(sha1)?, None) - .ok() - .and_then(|obj| obj.peel_to_tree().ok()), + Some(sha1) => { + // First try to resolve as a reference (handles HEAD~1, HEAD^, etc) + if let Ok(obj) = repo.revparse_single(sha1) { + obj.peel_to_tree().ok() + } else { + // If not a reference, try as raw SHA1 + repo + .find_object(Oid::from_str(sha1)?, None) + .ok() + .and_then(|obj| obj.peel_to_tree().ok()) + } + } }; let diff = repo.to_diff(tree.clone())?; @@ -138,11 +145,18 @@ impl Args { let tree = match self.sha1.as_deref() { Some("HEAD") | None => repo.head().ok().and_then(|head| head.peel_to_tree().ok()), - Some(sha1) => - repo - .find_object(Oid::from_str(sha1)?, None) - .ok() - .and_then(|obj| obj.peel_to_tree().ok()), + Some(sha1) => { + // First try to resolve as a reference (handles HEAD~1, HEAD^, etc) + if let Ok(obj) = repo.revparse_single(sha1) { + obj.peel_to_tree().ok() + } else { + // If not a reference, try as raw SHA1 + repo + .find_object(Oid::from_str(sha1)?, None) + .ok() + .and_then(|obj| obj.peel_to_tree().ok()) + } + } }; let diff = repo.to_diff(tree.clone())?; From 2fa2562461965aece658a90fd1ff560eeb7fc649 Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 16:33:03 +0100 Subject: [PATCH 55/56] Refactor function calls in patch_test.rs and simplify conditional logic in hook.rs --- src/bin/hook.rs | 34 ++++++++++------------------------ tests/patch_test.rs | 2 +- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/src/bin/hook.rs b/src/bin/hook.rs index f482add..691384c 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -92,18 +92,11 @@ impl Args { async fn handle_commit(&self, repo: &Repository, pb: &ProgressBar, model: Model, remaining_tokens: usize) -> Result<()> { let tree = match self.sha1.as_deref() { Some("HEAD") | None => repo.head().ok().and_then(|head| head.peel_to_tree().ok()), - Some(sha1) => { - // First try to resolve as a reference (handles HEAD~1, HEAD^, etc) - if let Ok(obj) = repo.revparse_single(sha1) { - obj.peel_to_tree().ok() - } else { - // If not a reference, try as raw SHA1 - repo - .find_object(Oid::from_str(sha1)?, None) - .ok() - .and_then(|obj| obj.peel_to_tree().ok()) - } - } + Some(sha1) => + repo + .find_object(Oid::from_str(sha1)?, None) + .ok() + .and_then(|obj| obj.peel_to_tree().ok()), }; let diff = repo.to_diff(tree.clone())?; @@ -145,18 +138,11 @@ impl Args { let tree = match self.sha1.as_deref() { Some("HEAD") | None => repo.head().ok().and_then(|head| head.peel_to_tree().ok()), - Some(sha1) => { - // First try to resolve as a reference (handles HEAD~1, HEAD^, etc) - if let Ok(obj) = repo.revparse_single(sha1) { - obj.peel_to_tree().ok() - } else { - // If not a reference, try as raw SHA1 - repo - .find_object(Oid::from_str(sha1)?, None) - .ok() - .and_then(|obj| obj.peel_to_tree().ok()) - } - } + Some(sha1) => + repo + .find_object(Oid::from_str(sha1)?, None) + .ok() + .and_then(|obj| obj.peel_to_tree().ok()), }; let diff = repo.to_diff(tree.clone())?; diff --git a/tests/patch_test.rs b/tests/patch_test.rs index 06dee6e..3e08484 100644 --- a/tests/patch_test.rs +++ b/tests/patch_test.rs @@ -94,7 +94,7 @@ impl TestRepository for Repository { Some(tree) => { // For staged changes, compare tree to index let diff = self.diff_tree_to_index(Some(&tree), None, Some(&mut opts))?; - if !TestPatchDiff::is_empty(&diff)? { + if !diff.test_is_empty()? { return Ok(diff); } // If no staged changes, compare tree to workdir From 1bfd788f20c486aabf777c4859f9b3e52399b99f Mon Sep 17 00:00:00 2001 From: Git AI Test Date: Sat, 8 Feb 2025 16:39:36 +0100 Subject: [PATCH 56/56] Refactor reference resolution in hook.rs --- src/bin/hook.rs | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/bin/hook.rs b/src/bin/hook.rs index 691384c..818e247 100644 --- a/src/bin/hook.rs +++ b/src/bin/hook.rs @@ -92,11 +92,18 @@ impl Args { async fn handle_commit(&self, repo: &Repository, pb: &ProgressBar, model: Model, remaining_tokens: usize) -> Result<()> { let tree = match self.sha1.as_deref() { Some("HEAD") | None => repo.head().ok().and_then(|head| head.peel_to_tree().ok()), - Some(sha1) => - repo - .find_object(Oid::from_str(sha1)?, None) - .ok() - .and_then(|obj| obj.peel_to_tree().ok()), + Some(sha1) => { + // Try to resolve the reference first + if let Ok(obj) = repo.revparse_single(sha1) { + obj.peel_to_tree().ok() + } else { + // If not a reference, try as direct OID + repo + .find_object(Oid::from_str(sha1)?, None) + .ok() + .and_then(|obj| obj.peel_to_tree().ok()) + } + } }; let diff = repo.to_diff(tree.clone())?; @@ -138,11 +145,18 @@ impl Args { let tree = match self.sha1.as_deref() { Some("HEAD") | None => repo.head().ok().and_then(|head| head.peel_to_tree().ok()), - Some(sha1) => - repo - .find_object(Oid::from_str(sha1)?, None) - .ok() - .and_then(|obj| obj.peel_to_tree().ok()), + Some(sha1) => { + // Try to resolve the reference first + if let Ok(obj) = repo.revparse_single(sha1) { + obj.peel_to_tree().ok() + } else { + // If not a reference, try as direct OID + repo + .find_object(Oid::from_str(sha1)?, None) + .ok() + .and_then(|obj| obj.peel_to_tree().ok()) + } + } }; let diff = repo.to_diff(tree.clone())?;