Improve OpenAI Integration (#34)

* feat: OpenAI Integration changes * Add `profile` module and tracing dependency Based only on the changes visible in the diff, this commit: - Adds a new `profile` module in `src/lib.rs`. - Inserts a new macro in the `profile` module in `src/profile.rs`. - Adds a new dependency, `tracing`, in `Cargo.toml`. - Updates the version from `0.2.56` to `0.2.57` in `Cargo.lock`. - Includes the new `tracing` dependency to the dependencies list in `Cargo.lock`. * Add Ollama client implementation for model interactions - Introduce `OllamaClient` for handling requests to the Ollama API. - Implement `OllamaClientTrait` with methods for generating responses and checking model availability. - Create new `client.rs` module for request/response handling and to integrate with both Ollama and OpenAI models. - Modify `Cargo.toml` to include the `parking_lot` dependency for better synchronization. - Set up a structured way to format prompts and parse JSON responses from the Ollama API, ensuring robust error handling and logging. * Remove OllamaClient implementation and references from the codebase. Delete the `ollama.rs` file, which contained the `OllamaClient` struct and its associated trait. Additionally, update `client.rs` to remove all dependencies and usages related to `OllamaClient`. Ensure the logic still supports OpenAI models, maintaining compatibility. This cleanup optimizes the code by eliminating unnecessary components and streamlining model availability checks. * Refactor `truncate_to_fit` function for improved token handling Enhance the `truncate_to_fit` function by adjusting the line retention strategies based on the attempt count. Implement logic to minimize output size progressively with retries, ensuring that the final result adheres to the specified maximum token limit. Return a minimal version of content if all truncation attempts fail, or a truncation message if content is too large. * Fix redundant line in truncate_to_fit function by removing duplicate halving of current_size * Add .cursorignore file to exclude target, tmp, .DS_Store, and .git from cursor operations. * ``` Rename function `get_instruction_token_count` to `create_commit_request` in `commit.rs` Remove the `prompt`, `file_context`, `author`, and `date` parameters from `generate_commit_message` in `openai.rs` ``` * Update prompt file by adding 30 new lines of content - Based only on the changes visible in the diff, this commit adds a significant amount of new content to the prompt.md file. * Remove 'assert_matches' feature flag from hook module Based only on the changes visible in the diff, this commit removes the '#![feature(assert_matches)]' line from the prompt.md file. * Remove lines 43-42 from prompt.md --- Add two lines at the end of prompt.md for additional context --- Add two lines at the end of rust.yml for unspecified purpose * Remove unused 'mod common;' declaration from patch test file * "Update Cargo.toml, commit.rs, Cargo.lock, and prompt.md files to incorporate 'mustache' dependency and enhance commit request creation" * "Refactor commit.rs to use mustache for template rendering and error handling" * Remove commit message scoring and prompt optimization functions in openai.rs * Update the 'get_instruction_template' and 'create_commit_request' functions in commit.rs and modify prompt.md * Remove prompt instruction lines from resources/prompt.md file --------- Co-authored-by: Git AI Test <test@example.com>
oleander · Feb 8, 2025 · be19fde · be19fde
1 parent 044baf0
commit be19fde
Show file tree

Hide file tree

Showing 9 changed files with 349 additions and 115 deletions.
diff --git a/.cursorignore b/.cursorignore
@@ -0,0 +1,4 @@
+target/
+tmp/
+.DS_Store
+.git
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -32,6 +32,7 @@ tokio = { version = "1.43", features = ["full"] }
 futures = "0.3"
 parking_lot = "0.12.3"
 async-trait = "0.1"
+tracing = "0.1"
 
 # CLI and UI
 structopt = "0.3.26"
@@ -56,6 +57,7 @@ async-openai = { version = "0.27.2", default-features = false }
 ollama-rs = { version = "0.1", features = ["stream"] }
 tiktoken-rs = "0.6.0"
 reqwest = { version = "0.12.12", default-features = true }
+
 # System utilities
 openssl-sys = { version = "0.9.105", features = ["vendored"] }
 rayon = "1.10.0"
@@ -65,13 +67,15 @@ ctrlc = "3.4.5"
 lazy_static = "1.5.0"
 home = "0.5.11"
 dirs = "6.0"
+
 # Syntax highlighting and markdown rendering
-syntect = { version = "5.2", default-features = false, features = [
-  "default-fancy",
-] }
+syntect = { version = "5.2", default-features = false, features = ["default-fancy"] }
 pulldown-cmark = "0.12"
 comrak = "0.35"
 textwrap = "0.16"
+mustache = "0.9.0"
+maplit = "1.0.2"
+
 [dev-dependencies]
 tempfile = "3.16.0"
 

diff --git a/resources/prompt.md b/resources/prompt.md
@@ -1,18 +1,36 @@
-You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines:
+You are an AI assistant specialized in generating precise and concise git commit messages based on provided diffs. Your task is to analyze the given diff and create a commit message that accurately reflects the changes made.
 
-- Structure: Begin with a clear, present-tense summary.
-- Content: While you should use the surrounding context to understand the changes, your commit message should ONLY describe the lines marked with + or -.
-- Understanding: Use the context (unmarked lines) to understand the purpose and impact of the changes, but do not mention unchanged code in the commit message.
-- Changes: Only describe what was actually changed (added, removed, or modified).
-- Consistency: Maintain uniformity in tense, punctuation, and capitalization.
-- Accuracy: Ensure the message accurately reflects the changes and their purpose.
-- Present tense, imperative mood. (e.g., "Add x to y" instead of "Added x to y")
-- Max {{max_commit_length}} chars in the output
+The character limit for the commit message is:
 
-## Output:
+<max_length>
+{{max_length}}
+</max_length>
 
-Your output should be a commit message generated from the input diff and nothing else. While you should use the surrounding context to understand the changes, your message should only describe what was actually modified (+ or - lines).
+Please follow these guidelines when generating the commit message:
 
-## Input:
+1. Analyze the diff carefully, focusing on lines marked with + or -.
+2. Identify the files changed and the nature of the changes (added, modified, or deleted).
+3. Determine the most significant change if multiple changes are present.
+4. Create a clear, present-tense summary of the change in the imperative mood.
+5. Ensure the commit message is within the specified character limit.
+6. For binary files or unreadable diffs:
+   - Use the format "Add/Update/Delete binary file <filename>"
+   - Include file size in parentheses if available
+   - For multiple binary files, list them separated by commas
 
-INPUT:
+Before generating the final commit message, please analyze the diff and but keep your thought process to your self:
+
+1. Count and list all files changed in the diff, noting whether they were added, modified, or deleted. Prepend each file with a number.
+2. For each changed file, summarize the key changes in bullet points and quote specific relevant lines from the diff.
+3. Identify any binary files or unreadable diffs separately.
+4. Determine the most significant change if multiple changes are present.
+5. Consider the impact of each change and its relevance to the overall commit message.
+6. Brainstorm keywords that could be used in the commit message.
+7. Propose three potential single-line summaries based on the breakdown.
+8. Count the characters in each proposed summary, ensuring they meet the specified character limit.
+9. Select the best summary that accurately reflects the most significant change and meets the character limit.
+10. Prefixes such as `refactor:`, `fix` should be removed
+
+After your analysis, provide only the final commit message as output. Ensure it is clear, concise, and accurately reflects the content of the diff while adhering to the character limit. Do not include any additional text or explanations in your final output.
+
+<DIFF>
diff --git a/src/commit.rs b/src/commit.rs
@@ -1,4 +1,5 @@
 use anyhow::{bail, Result};
+use maplit::hashmap;
 
 use crate::{config, openai, profile};
 use crate::model::Model;
@@ -8,9 +9,16 @@ const INSTRUCTION_TEMPLATE: &str = include_str!("../resources/prompt.md");
 
 /// Returns the instruction template for the AI model.
 /// This template guides the model in generating appropriate commit messages.
-fn get_instruction_template() -> String {
+fn get_instruction_template() -> Result<String> {
   profile!("Generate instruction template");
-  INSTRUCTION_TEMPLATE.replace("{{max_commit_length}}", &config::APP.max_commit_length.unwrap_or(72).to_string())
+  let max_length = config::APP.max_commit_length.unwrap_or(72).to_string();
+  let template = mustache::compile_str(INSTRUCTION_TEMPLATE)
+    .map_err(|e| anyhow::anyhow!("Template compilation error: {}", e))?
+    .render_to_string(&hashmap! {
+      "max_length" => max_length
+    })
+    .map_err(|e| anyhow::anyhow!("Template rendering error: {}", e))?;
+  Ok(template)
 }
 
 /// Calculates the number of tokens used by the instruction template.
@@ -22,7 +30,8 @@ fn get_instruction_template() -> String {
 /// * `Result<usize>` - The number of tokens used or an error
 pub fn get_instruction_token_count(model: &Model) -> Result<usize> {
   profile!("Calculate instruction tokens");
-  model.count_tokens(&get_instruction_template())
+  let template = get_instruction_template()?;
+  model.count_tokens(&template)
 }
 
 /// Creates an OpenAI request for commit message generation.
@@ -33,15 +42,23 @@ pub fn get_instruction_token_count(model: &Model) -> Result<usize> {
 /// * `model` - The AI model to use for generation
 ///
 /// # Returns
-/// * `openai::Request` - The prepared request
-fn create_commit_request(diff: String, max_tokens: usize, model: Model) -> openai::Request {
+/// * `Result<openai::Request>` - The prepared request
+pub fn create_commit_request(diff: String, max_tokens: usize, model: Model) -> Result<openai::Request> {
   profile!("Prepare OpenAI request");
-  openai::Request {
-    system: get_instruction_template(),
+  let max_length = config::APP.max_commit_length.unwrap_or(72).to_string();
+  let instruction_template = mustache::compile_str(INSTRUCTION_TEMPLATE)
+    .map_err(|e| anyhow::anyhow!("Template compilation error: {}", e))?
+    .render_to_string(&hashmap! {
+      "max_length" => max_length
+    })
+    .map_err(|e| anyhow::anyhow!("Template rendering error: {}", e))?;
+
+  Ok(openai::Request {
+    system: instruction_template,
     prompt: diff,
     max_tokens: max_tokens.try_into().unwrap_or(u16::MAX),
     model
-  }
+  })
 }
 
 /// Generates a commit message using the AI model.
@@ -65,7 +82,7 @@ pub async fn generate(patch: String, remaining_tokens: usize, model: Model) -> R
     bail!("Maximum token count must be greater than zero")
   }
 
-  let request = create_commit_request(patch, remaining_tokens, model);
+  let request = create_commit_request(patch, remaining_tokens, model)?;
   openai::call(request).await
 }