Skip to content

Commit 5d931d8

Browse files
author
nollied
authored
Merge pull request #89 from nollied/diff-parsing
Add diff_parser module and update git diff parsing in core/git/diff.py and unit tests
2 parents 11249c1 + 34f4c11 commit 5d931d8

File tree

4 files changed

+423
-22
lines changed

4 files changed

+423
-22
lines changed

mindflow/core/git/diff.py

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from mindflow.utils.prompt_builders import build_context_prompt
1414
from mindflow.utils.prompts import GIT_DIFF_PROMPT_PREFIX
1515

16+
from mindflow.utils.diff_parser import parse_git_diff, IGNORE_FILE_EXTENSIONS
17+
1618

1719
def run_diff(args: Tuple[str]) -> str:
1820
"""
@@ -25,12 +27,16 @@ def run_diff(args: Tuple[str]) -> str:
2527

2628
# Execute the git diff command and retrieve the output as a string
2729
diff_result = subprocess.check_output(command).decode("utf-8")
28-
2930
if diff_result.strip() == "":
3031
return "No staged changes."
3132

33+
diff_dict, excluded_filenames = parse_git_diff(diff_result)
34+
35+
if len(diff_dict) <= 0:
36+
return "No staged changes."
37+
3238
batched_parsed_diff_result = batch_git_diffs(
33-
parse_git_diff(diff_result), token_limit=completion_model.hard_token_limit
39+
diff_dict, token_limit=completion_model.hard_token_limit
3440
)
3541

3642
response: str = ""
@@ -58,37 +64,22 @@ def run_diff(args: Tuple[str]) -> str:
5864
for future in concurrent.futures.as_completed(futures):
5965
response += future.result()
6066

67+
if len(excluded_filenames) > 0:
68+
response += f"\n\nNOTE: The following files were excluded from the diff: {', '.join(excluded_filenames)}"
69+
6170
return response
6271

6372

6473
import re
6574

6675

67-
def parse_git_diff(diff_output: str) -> List[Tuple[str, str]]:
68-
file_diffs: List[Dict[str, List[str]]] = []
69-
current_diff: Optional[Dict[str, List[str]]] = None
70-
for line in diff_output.split("\n"):
71-
if line.startswith("diff --git"):
72-
if current_diff is not None:
73-
file_diffs.append(current_diff)
74-
current_diff = {"file_name": None, "content": []} # type: ignore
75-
match = re.match(r"^diff --git a/(.+?) b/.+?$", line)
76-
if match:
77-
current_diff["file_name"] = match.group(1) # type: ignore
78-
if current_diff is not None:
79-
current_diff["content"].append(line)
80-
if current_diff is not None:
81-
file_diffs.append(current_diff)
82-
return [(diff["file_name"], "\n".join(diff["content"])) for diff in file_diffs] # type: ignore
83-
84-
8576
def batch_git_diffs(
86-
file_diffs: List[Tuple[str, str]], token_limit: int
77+
file_diffs: Dict[str, str], token_limit: int
8778
) -> List[List[Tuple[str, str]]]:
8879
batches = []
8980
current_batch: List = []
9081
current_batch_size = 0
91-
for file_name, diff_content in file_diffs:
82+
for file_name, diff_content in file_diffs.items():
9283
if len(diff_content) > token_limit:
9384
chunks = [
9485
diff_content[i : i + token_limit]

0 commit comments

Comments
 (0)