oleander · oleander · Feb 8, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ http-cacache/*
 ${env:TMPDIR}
 bin/
 tmp/
+finetune_verify.jsonl
+finetune_train.jsonl
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -26,7 +26,7 @@ path = "src/bin/hook.rs"
 
 [dependencies]
 # Core functionality
-anyhow = "1.0.95"
+anyhow = { version = "1.0.95", features = ["backtrace"] }
 thiserror = "2.0.11"
 tokio = { version = "1.43", features = ["full"] }
 futures = "0.3"
@@ -52,9 +52,11 @@ serde_derive = "1.0.217"
 serde_ini = "0.2.0"
 serde_json = "1.0"
 
+# OpenAI integration
 async-openai = { version = "0.27.2", default-features = false }
 tiktoken-rs = "0.6.0"
 reqwest = { version = "0.12.12", default-features = true }
+
 # System utilities
 openssl-sys = { version = "0.9.105", features = ["vendored"] }
 rayon = "1.10.0"
@@ -64,6 +66,7 @@ ctrlc = "3.4.5"
 lazy_static = "1.5.0"
 home = "0.5.11"
 dirs = "6.0"
+
 # Syntax highlighting and markdown rendering
 syntect = { version = "5.2", default-features = false, features = [
   "default-fancy",
@@ -74,6 +77,7 @@ textwrap = "0.16"
 structopt = "0.3.26"
 mustache = "0.9.0"
 maplit = "1.0.2"
+
 [dev-dependencies]
 tempfile = "3.16.0"
 

diff --git a/src/commit.rs b/src/commit.rs
@@ -1,5 +1,6 @@
-use anyhow::{bail, Result};
+use anyhow::{anyhow, bail, Result};
 use maplit::hashmap;
+use mustache;
 
 use crate::{config, openai, profile};
 use crate::model::Model;
@@ -13,11 +14,11 @@ fn get_instruction_template() -> Result<String> {
   profile!("Generate instruction template");
   let max_length = config::APP.max_commit_length.unwrap_or(72).to_string();
   let template = mustache::compile_str(INSTRUCTION_TEMPLATE)
-    .map_err(|e| anyhow::anyhow!("Template compilation error: {}", e))?
+    .map_err(|e| anyhow!("Template compilation error: {}", e))?
     .render_to_string(&hashmap! {
       "max_length" => max_length
     })
-    .map_err(|e| anyhow::anyhow!("Template rendering error: {}", e))?;
+    .map_err(|e| anyhow!("Template rendering error: {}", e))?;
   Ok(template)
 }
 
@@ -43,18 +44,11 @@ pub fn get_instruction_token_count(model: &Model) -> Result<usize> {
 ///
 /// # Returns
 /// * `Result<openai::Request>` - The prepared request
-pub fn create_commit_request(diff: String, max_tokens: usize, model: Model) -> Result<openai::Request> {
+fn create_commit_request(diff: String, max_tokens: usize, model: Model) -> Result<openai::Request> {
   profile!("Prepare OpenAI request");
-  let max_length = config::APP.max_commit_length.unwrap_or(72).to_string();
-  let instruction_template = mustache::compile_str(INSTRUCTION_TEMPLATE)
-    .map_err(|e| anyhow::anyhow!("Template compilation error: {}", e))?
-    .render_to_string(&hashmap! {
-      "max_length" => max_length
-    })
-    .map_err(|e| anyhow::anyhow!("Template rendering error: {}", e))?;
-
+  let template = get_instruction_template()?;
   Ok(openai::Request {
-    system: instruction_template,
+    system: template,
     prompt: diff,
     max_tokens: max_tokens.try_into().unwrap_or(u16::MAX),
     model

diff --git a/src/hook.rs b/src/hook.rs
@@ -279,33 +279,28 @@ fn process_chunk(
     };
 
     if max_tokens_per_file == 0 {
-      // No tokens left to allocate, skip remaining files
-      break;
+      continue;
     }
 
     let token_count = *token_count;
     let allocated_tokens = token_count.min(max_tokens_per_file);
 
-    // Attempt to atomically update remaining tokens
-    match remaining_tokens.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| {
-      if current >= allocated_tokens {
-        Some(current - allocated_tokens)
-      } else {
-        None
-      }
-    }) {
-      Ok(_) => {
-        let processed_content = if token_count > allocated_tokens {
-          model.truncate(content, allocated_tokens)?
+    if remaining_tokens
+      .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| {
+        if current >= allocated_tokens {
+          Some(current - allocated_tokens)
         } else {
-          content.clone()
-        };
-        chunk_results.push((path.clone(), processed_content));
-      }
-      Err(_) => {
-        // Failed to allocate tokens, skip remaining files
-        break;
-      }
+          None
+        }
+      })
+      .is_ok()
+    {
+      let processed_content = if token_count > allocated_tokens {
+        model.truncate(content, allocated_tokens)?
+      } else {
+        content.clone()
+      };
+      chunk_results.push((path.clone(), processed_content));
     }
   }
 
@@ -371,7 +366,7 @@ impl PatchRepository for Repository {
 
   fn configure_diff_options(&self, opts: &mut DiffOptions) {
     opts
-      .ignore_whitespace_change(false)
+      .ignore_whitespace_change(true)
       .recurse_untracked_dirs(true)
       .recurse_ignored_dirs(false)
       .ignore_whitespace_eol(true)
@@ -408,8 +403,6 @@ impl PatchRepository for Repository {
 
 #[cfg(test)]
 mod tests {
-  use tempfile::TempDir;
-
   use super::*;
 
   #[test]
@@ -452,152 +445,4 @@ mod tests {
 
     assert_eq!(pool.strings.len(), MAX_POOL_SIZE);
   }
-
-  #[test]
-  fn test_process_chunk_token_allocation() {
-    let model = Arc::new(Model::default());
-    let total_files = 3;
-    let processed_files = Arc::new(AtomicUsize::new(0));
-    let remaining_tokens = Arc::new(AtomicUsize::new(60)); // Reduced to force allocation limits
-    let result_chunks = Arc::new(Mutex::new(Vec::new()));
-
-    let chunk = vec![
-      (PathBuf::from("file1.txt"), "content1".to_string(), 50),
-      (PathBuf::from("file2.txt"), "content2".to_string(), 40),
-      (PathBuf::from("file3.txt"), "content3".to_string(), 30),
-    ];
-
-    process_chunk(&chunk, &model, total_files, &processed_files, &remaining_tokens, &result_chunks).unwrap();
-
-    let results = result_chunks.lock();
-    // With 60 total tokens and 3 files:
-    // First file gets 20 tokens (60/3)
-    // Second file gets 30 tokens (40/2)
-    // Third file gets 10 tokens (10/1)
-    assert_eq!(results.len(), 3);
-    assert_eq!(remaining_tokens.load(Ordering::SeqCst), 0);
-    assert_eq!(processed_files.load(Ordering::SeqCst), 3);
-  }
-
-  #[test]
-  fn test_process_chunk_concurrent_safety() {
-    use std::thread;
-
-    let model = Arc::new(Model::default());
-    let total_files = 6;
-    let processed_files = Arc::new(AtomicUsize::new(0));
-    let remaining_tokens = Arc::new(AtomicUsize::new(100));
-    let result_chunks = Arc::new(Mutex::new(Vec::new()));
-
-    let chunk1 = vec![
-      (PathBuf::from("file1.txt"), "content1".to_string(), 20),
-      (PathBuf::from("file2.txt"), "content2".to_string(), 20),
-      (PathBuf::from("file3.txt"), "content3".to_string(), 20),
-    ];
-
-    let chunk2 = vec![
-      (PathBuf::from("file4.txt"), "content4".to_string(), 20),
-      (PathBuf::from("file5.txt"), "content5".to_string(), 20),
-      (PathBuf::from("file6.txt"), "content6".to_string(), 20),
-    ];
-
-    // Clone values for thread 2
-    let model2 = model.clone();
-    let processed_files2 = processed_files.clone();
-    let remaining_tokens2 = remaining_tokens.clone();
-    let result_chunks2 = result_chunks.clone();
-
-    // Clone values for main thread access after threads complete
-    let processed_files_main = processed_files.clone();
-    let remaining_tokens_main = remaining_tokens.clone();
-    let result_chunks_main = result_chunks.clone();
-
-    let t1 = thread::spawn(move || {
-      process_chunk(&chunk1, &model, total_files, &processed_files, &remaining_tokens, &result_chunks).unwrap();
-    });
-
-    let t2 = thread::spawn(move || {
-      process_chunk(&chunk2, &model2, total_files, &processed_files2, &remaining_tokens2, &result_chunks2).unwrap();
-    });
-
-    t1.join().unwrap();
-    t2.join().unwrap();
-
-    let results = result_chunks_main.lock();
-    assert_eq!(results.len(), 6);
-    assert_eq!(remaining_tokens_main.load(Ordering::SeqCst), 0);
-    assert_eq!(processed_files_main.load(Ordering::SeqCst), 6);
-  }
-
-  #[test]
-  fn test_to_commit_diff_with_head() -> Result<()> {
-    let temp_dir = TempDir::new()?;
-    let repo = Repository::init(temp_dir.path())?;
-    let mut index = repo.index()?;
-
-    // Create a file and stage it
-    let file_path = temp_dir.path().join("test.txt");
-    std::fs::write(&file_path, "initial content")?;
-    index.add_path(file_path.strip_prefix(temp_dir.path())?)?;
-    index.write()?;
-
-    // Create initial commit
-    let tree_id = index.write_tree()?;
-    let tree = repo.find_tree(tree_id)?;
-    let signature = git2::Signature::now("test", "test@example.com")?;
-    repo.commit(Some("HEAD"), &signature, &signature, "Initial commit", &tree, &[])?;
-
-    // Modify and stage the file
-    std::fs::write(&file_path, "modified content")?;
-    index.add_path(file_path.strip_prefix(temp_dir.path())?)?;
-    index.write()?;
-
-    // Get HEAD tree
-    let head = repo.head()?.peel_to_tree()?;
-
-    // Get diff
-    let diff = repo.to_commit_diff(Some(head))?;
-
-    // Verify diff shows only staged changes
-    let mut diff_found = false;
-    diff.print(DiffFormat::Patch, |_delta, _hunk, line| {
-      let content = line.content().to_utf8();
-      if line.origin() == '+' && content.contains("modified content") {
-        diff_found = true;
-      }
-      true
-    })?;
-
-    assert!(diff_found, "Expected to find staged changes in diff");
-    Ok(())
-  }
-
-  #[test]
-  fn test_to_commit_diff_without_head() -> Result<()> {
-    let temp_dir = TempDir::new()?;
-    let repo = Repository::init(temp_dir.path())?;
-    let mut index = repo.index()?;
-
-    // Create and stage a new file
-    let file_path = temp_dir.path().join("test.txt");
-    std::fs::write(&file_path, "test content")?;
-    index.add_path(file_path.strip_prefix(temp_dir.path())?)?;
-    index.write()?;
-
-    // Get diff (no HEAD exists yet)
-    let diff = repo.to_commit_diff(None)?;
-
-    // Verify diff shows staged changes
-    let mut diff_found = false;
-    diff.print(DiffFormat::Patch, |_delta, _hunk, line| {
-      let content = line.content().to_utf8();
-      if line.origin() == '+' && content.contains("test content") {
-        diff_found = true;
-      }
-      true
-    })?;
-
-    assert!(diff_found, "Expected to find staged changes in diff");
-    Ok(())
-  }
 }
diff --git a/src/install.rs b/src/install.rs
diff --git a/src/openai.rs b/src/openai.rs
@@ -145,17 +145,18 @@ pub async fn call(request: Request) -> Result<Response> {
               "ERROR:".bold().bright_red(),
               "Network error:".bright_white(),
               e.to_string().dimmed(),
-              "Failed to connect to OpenAI service.".dimmed(),
+              "Failed to connect to OpenAI API.".dimmed(),
               "Check your internet connection".yellow(),
-              "Verify OpenAI service is not experiencing downtime".yellow()
+              "Verify OpenAI service availability".yellow()
             ),
           _ =>
             format!(
-              "{} {}\n    {}\n\nDetails:\n    {}",
+              "{} {}\n    {}\n\nDetails:\n    {}\n\nSuggested Actions:\n    1. {}",
               "ERROR:".bold().bright_red(),
               "Unexpected error:".bright_white(),
               err.to_string().dimmed(),
-              "An unexpected error occurred while communicating with OpenAI.".dimmed()
+              "An unexpected error occurred while calling OpenAI API.".dimmed(),
+              "Please report this issue on GitHub".yellow()
             ),
         };
         return Err(anyhow!(error_msg));
@@ -165,11 +166,11 @@ pub async fn call(request: Request) -> Result<Response> {
     let content = response
       .choices
       .first()
-      .context("No choices returned")?
+      .context("No response choices available")?
       .message
       .content
       .clone()
-      .context("No content returned")?;
+      .context("Response content is empty")?;
 
     Ok(Response { response: content })
   }

diff --git a/src/profiling.rs b/src/profiling.rs
@@ -29,7 +29,6 @@ impl Drop for Profile {
 #[macro_export]
 macro_rules! profile {
   ($name:expr) => {
-    // Currently a no-op, but can be expanded for actual profiling
-    let _profile_span = $name;
+    let _profile = $crate::Profile::new($name);
   };
 }