Skip to content

Commit db2a0c3

Browse files
authored
Merge pull request #95 from nollied/cleanup
Added new modules and methods, updated version number, and added new dependencies and imports.
2 parents 25d77e0 + aa1c3c3 commit db2a0c3

File tree

24 files changed

+315
-111
lines changed

24 files changed

+315
-111
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ Make some changes to your git repo and stage them. Then, run `mf commit`! You sh
7878
1 file changed, 14 insertions(+)
7979
```
8080

81-
### Create PRs With GPT Titles And Body
82-
Make some changes to your branch and stage, and then commit them. Then, run `mf pr`! A PR should be created with a title and body generated by GPT, and a link to the PR should be printed to the console.
81+
### Create PRs/MRs With GPT Titles And Body
82+
Make some changes to your branch and stage, and then commit them. Then, run `mf pr` for GitHub or `mf mr` for GitLab! A pull request/merge request should be created with a title and body generated by GPT, and a link to the PR should be printed to the console.
8383
- To use this feature, you must first install and authenticate the [GitHub CLI](https://cli.github.com/).
8484

8585
## How does it work?

mindflow/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.3.12"
1+
__version__ = "0.3.13"

mindflow/core/chat.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,35 @@
1+
from typing import Optional
12
from mindflow.settings import Settings
3+
from mindflow.utils.constants import MinimumReservedLength
4+
from mindflow.utils.prompts import CHAT_PROMPT_PREFIX
5+
from mindflow.utils.token import get_token_count
26

37

48
def run_chat(prompt: str) -> str:
59
"""
610
This function is used to generate a prompt and then use it as a prompt for GPT bot.
711
"""
812
settings = Settings()
13+
completion_model = settings.mindflow_models.query.model
14+
15+
if (
16+
get_token_count(completion_model, CHAT_PROMPT_PREFIX + prompt)
17+
> completion_model.hard_token_limit - MinimumReservedLength.CHAT.value
18+
):
19+
print("The prompt is too long. Please try again with a shorter prompt.")
20+
return ""
21+
922
# Prompt GPT through Mindflow API or locally
10-
response: str = settings.mindflow_models.query.model(
23+
response: Optional[str] = completion_model(
1124
[
1225
{
1326
"role": "system",
14-
"content": "You are a helpful virtual assistant responding to a users query using your general knowledge and the text provided below.",
27+
"content": CHAT_PROMPT_PREFIX,
1528
},
1629
{"role": "user", "content": prompt},
1730
]
1831
)
32+
33+
if response is None:
34+
return "Unable to generate response. Please try again. If the problem persists, please raise an issue at: https://github.com/nollied/mindflow-cli/issues."
1935
return response

mindflow/core/git/commit.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import subprocess
2-
from typing import Tuple, Optional
2+
from typing import Tuple, Optional, Union
33

44
from mindflow.core.git.diff import run_diff
55
from mindflow.settings import Settings
6+
from mindflow.utils.errors import ModelError
67
from mindflow.utils.prompt_builders import build_context_prompt
78
from mindflow.utils.prompts import COMMIT_PROMPT_PREFIX
89

@@ -20,9 +21,11 @@ def run_commit(args: Tuple[str], message_overwrite: Optional[str] = None) -> str
2021
if diff_output == "No staged changes.":
2122
return diff_output
2223

23-
response: str = settings.mindflow_models.query.model(
24+
response: Union[ModelError, str] = settings.mindflow_models.query.model(
2425
build_context_prompt(COMMIT_PROMPT_PREFIX, diff_output)
2526
)
27+
if isinstance(response, ModelError):
28+
return response.commit_message
2629

2730
# add co-authorship to commit message
2831
response += "\n\nCo-authored-by: MindFlow <mf@mindflo.ai>"

mindflow/core/git/diff.py

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@
33
"""
44
import concurrent.futures
55
import subprocess
6-
from typing import Dict
6+
from typing import Dict, Union
77
from typing import List
8-
from typing import Optional
98
from typing import Tuple
109

1110
from mindflow.db.objects.model import ConfiguredModel
1211
from mindflow.settings import Settings
12+
from mindflow.utils.constants import MinimumReservedLength
13+
from mindflow.utils.errors import ModelError
1314
from mindflow.utils.prompt_builders import build_context_prompt
1415
from mindflow.utils.prompts import GIT_DIFF_PROMPT_PREFIX
1516

16-
from mindflow.utils.diff_parser import parse_git_diff, IGNORE_FILE_EXTENSIONS
17+
from mindflow.utils.diff_parser import parse_git_diff
18+
from mindflow.utils.token import get_token_count
1719

1820

1921
def run_diff(args: Tuple[str]) -> str:
@@ -35,18 +37,19 @@ def run_diff(args: Tuple[str]) -> str:
3537
if len(diff_dict) <= 0:
3638
return "No staged changes."
3739

38-
batched_parsed_diff_result = batch_git_diffs(
39-
diff_dict, token_limit=completion_model.hard_token_limit
40-
)
40+
batched_parsed_diff_result = batch_git_diffs(diff_dict, completion_model)
4141

42-
response: str = ""
42+
diff_summary: str = ""
4343
if len(batched_parsed_diff_result) == 1:
4444
content = ""
4545
for file_name, diff_content in batched_parsed_diff_result[0]:
4646
content += f"*{file_name}*\n DIFF CONTENT: {diff_content}\n\n"
47-
response = completion_model(
47+
diff_response: Union[ModelError, str] = completion_model(
4848
build_context_prompt(GIT_DIFF_PROMPT_PREFIX, content)
4949
)
50+
if isinstance(diff_response, ModelError):
51+
return diff_response.diff_message
52+
diff_summary += diff_response
5053
else:
5154
with concurrent.futures.ThreadPoolExecutor() as executor:
5255
futures = []
@@ -62,43 +65,73 @@ def run_diff(args: Tuple[str]) -> str:
6265

6366
# Process the results as they become available
6467
for future in concurrent.futures.as_completed(futures):
65-
response += future.result()
68+
diff_partial_response: Union[ModelError, str] = future.result()
69+
if isinstance(diff_partial_response, ModelError):
70+
return diff_partial_response.diff_partial_message
71+
72+
diff_summary += diff_partial_response
6673

6774
if len(excluded_filenames) > 0:
68-
response += f"\n\nNOTE: The following files were excluded from the diff: {', '.join(excluded_filenames)}"
75+
diff_summary += f"\n\nNOTE: The following files were excluded from the diff: {', '.join(excluded_filenames)}"
6976

70-
return response
77+
return diff_summary
7178

7279

7380
import re
7481

7582

7683
def batch_git_diffs(
77-
file_diffs: Dict[str, str], token_limit: int
84+
file_diffs: Dict[str, str], model: ConfiguredModel
7885
) -> List[List[Tuple[str, str]]]:
7986
batches = []
8087
current_batch: List = []
81-
current_batch_size = 0
88+
current_batch_text = ""
8289
for file_name, diff_content in file_diffs.items():
83-
if len(diff_content) > token_limit:
84-
chunks = [
85-
diff_content[i : i + token_limit]
86-
for i in range(0, len(diff_content), token_limit)
87-
]
90+
if (
91+
get_token_count(model, diff_content)
92+
> model.hard_token_limit - MinimumReservedLength.DIFF.value
93+
):
94+
## Split the diff into chunks that are less than the token limit
95+
chunks = [diff_content]
96+
while True:
97+
new_chunks = []
98+
for chunk in chunks:
99+
if (
100+
get_token_count(model, chunk)
101+
> model.hard_token_limit - MinimumReservedLength.DIFF.value
102+
):
103+
half_len = len(chunk) // 2
104+
left_half = chunk[:half_len]
105+
right_half = chunk[half_len:]
106+
new_chunks.extend([left_half, right_half])
107+
else:
108+
new_chunks.append(chunk)
109+
if new_chunks == chunks:
110+
break
111+
chunks = new_chunks
112+
113+
## Add the chunks to the batch or multiple batches
88114
for chunk in chunks:
89-
if current_batch_size + len(chunk) > token_limit * 2:
115+
if (
116+
get_token_count(model, current_batch_text + chunk)
117+
> model.hard_token_limit - MinimumReservedLength.DIFF.value
118+
):
90119
batches.append(current_batch)
91120
current_batch = []
92-
current_batch_size = 0
121+
current_batch_text = ""
93122
current_batch.append((file_name, chunk))
94-
current_batch_size += len(chunk)
95-
elif current_batch_size + len(diff_content) > token_limit * 2:
123+
current_batch_text += chunk
124+
125+
elif (
126+
get_token_count(model, current_batch_text + diff_content)
127+
> model.hard_token_limit - MinimumReservedLength.DIFF.value
128+
):
96129
batches.append(current_batch)
97130
current_batch = [(file_name, diff_content)]
98-
current_batch_size = len(diff_content)
131+
current_batch_text = diff_content
99132
else:
100133
current_batch.append((file_name, diff_content))
101-
current_batch_size += len(diff_content)
134+
current_batch_text += diff_content
102135
if current_batch:
103136
batches.append(current_batch)
104137
return batches

mindflow/core/git/mr.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,12 @@ def run_mr(
3232
return
3333

3434
if not title or not description:
35-
title, description = create_title_and_body(base_branch, title, description)
35+
tital_description_tuple = create_title_and_body(base_branch, title, description)
36+
37+
if not tital_description_tuple:
38+
return
39+
40+
title, description = tital_description_tuple
3641

3742
create_merge_request(args, title, description)
3843

mindflow/core/git/pr.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import concurrent.futures
22
import subprocess
3-
from typing import List, Optional, Tuple
3+
from typing import List, Optional, Tuple, Union
44

55
from mindflow.core.git.diff import run_diff
66
from mindflow.settings import Settings
77
from mindflow.utils.command_parse import get_flag_value
8+
from mindflow.utils.errors import ModelError
89
from mindflow.utils.prompt_builders import build_context_prompt
910
from mindflow.utils.prompts import PR_BODY_PREFIX
1011
from mindflow.utils.prompts import PR_TITLE_PREFIX
@@ -35,8 +36,12 @@ def run_pr(args: Tuple[str], title: Optional[str] = None, body: Optional[str] =
3536
return
3637

3738
if not title or not body:
38-
title, body = create_title_and_body(base_branch, title, body)
39+
tital_body_tuple = create_title_and_body(base_branch, title, body)
3940

41+
if not tital_body_tuple:
42+
return
43+
44+
title, body = tital_body_tuple
4045
create_pull_request(args, title, body)
4146

4247

@@ -58,11 +63,13 @@ def is_valid_pr(head_branch: str, base_branch: str) -> bool:
5863

5964
def create_title_and_body(
6065
base_branch, title: Optional[str], body: Optional[str]
61-
) -> Tuple[str, str]:
66+
) -> Optional[Tuple[str, str]]:
6267
settings = Settings()
6368

6469
diff_output = run_diff((base_branch,))
6570

71+
title_response: Union[ModelError, str]
72+
body_response: Union[ModelError, str]
6673
if title is None and body is None:
6774
pr_title_prompt = build_context_prompt(PR_TITLE_PREFIX, diff_output)
6875
pr_body_prompt = build_context_prompt(PR_BODY_PREFIX, diff_output)
@@ -75,16 +82,25 @@ def create_title_and_body(
7582
settings.mindflow_models.query.model, pr_body_prompt
7683
)
7784

78-
title = future_title.result()
79-
body = future_body.result()
85+
title_response = future_title.result()
86+
body_response = future_body.result()
8087
else:
8188
if title is None:
8289
pr_title_prompt = build_context_prompt(PR_TITLE_PREFIX, diff_output)
83-
title = settings.mindflow_models.query.model(pr_title_prompt)
90+
title_response = settings.mindflow_models.query.model(pr_title_prompt)
8491
if body is None:
8592
pr_body_prompt = build_context_prompt(PR_BODY_PREFIX, diff_output)
86-
body = settings.mindflow_models.query.model(pr_body_prompt)
93+
body_response = settings.mindflow_models.query.model(pr_body_prompt)
94+
95+
if isinstance(title_response, ModelError):
96+
print(title_response.pr_message)
97+
return None
98+
if isinstance(body_response, ModelError):
99+
print(body_response.pr_message)
100+
return None
87101

102+
title = title if title is not None else title_response
103+
body = body if body is not None else body_response
88104
return title, body
89105

90106

mindflow/core/index.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
"""
22
`generate` command
33
"""
4-
from asyncio import Future
54
from concurrent.futures import ThreadPoolExecutor
65
from copy import deepcopy
7-
from typing import List
6+
import logging
7+
from typing import List, Union
88
from typing import Optional
99

1010
import numpy as np
@@ -18,8 +18,10 @@
1818
from mindflow.resolving.resolve import resolve_all
1919
from mindflow.resolving.resolve import return_if_indexable
2020
from mindflow.settings import Settings
21+
from mindflow.utils.errors import ModelError
2122
from mindflow.utils.prompt_builders import build_context_prompt
2223
from mindflow.utils.prompts import INDEX_PROMPT_PREFIX
24+
from mindflow.utils.token import get_batch_token_count, get_token_count
2325

2426

2527
def run_index(document_paths: List[str], refresh: bool, force: bool) -> None:
@@ -97,9 +99,14 @@ def __init__(
9799
self.start = start
98100
self.end = end
99101
if text:
100-
self.summary = completion_model(
102+
response: Union[str, ModelError] = completion_model(
101103
build_context_prompt(INDEX_PROMPT_PREFIX, text)
102104
)
105+
if isinstance(response, ModelError):
106+
self.summary = ""
107+
print(response.index_message)
108+
else:
109+
self.summary = response
103110

104111
def set_leaves(self, leaves: List["Node"]) -> None:
105112
self.leaves = leaves
@@ -136,15 +143,6 @@ def iterative_to_dict(self) -> dict:
136143
return node_dict
137144

138145

139-
def count_tokens(text: str) -> int:
140-
"""
141-
Counts/estimates the number of tokens this text will consume by GPT.
142-
"""
143-
# tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
144-
# count = len(tokenizer(text)['input_ids'])
145-
return len(text) // 4 # Token Estimation for speed
146-
147-
148146
# This function is used to split a string into chunks of a specified token limit using binary search
149147
def binary_split_raw_text_to_nodes(
150148
completion_model: ConfiguredModel, text: str
@@ -156,7 +154,10 @@ def binary_split_raw_text_to_nodes(
156154
stack = [(0, len(text))]
157155
while stack:
158156
start, end = stack.pop()
159-
if count_tokens(text[start:end]) < completion_model.soft_token_limit:
157+
if (
158+
get_token_count(completion_model, text[start:end])
159+
< completion_model.soft_token_limit
160+
):
160161
nodes.append(Node(completion_model, start, end, text[start:end]))
161162
else:
162163
mid = ((end - start) // 2) + start
@@ -176,7 +177,9 @@ def binary_split_nodes_to_chunks(
176177
while stack:
177178
nodes, start, end = stack.pop()
178179
if (
179-
sum(count_tokens(node.summary) for node in nodes[start:end])
180+
get_batch_token_count(
181+
completion_model, [node.summary for node in nodes[start:end]]
182+
)
180183
< completion_model.soft_token_limit
181184
):
182185
chunks.append(nodes[start:end])
@@ -195,7 +198,10 @@ def create_nodes(completion_model: ConfiguredModel, leaf_nodes: List[Node]) -> N
195198
while stack:
196199
leaf_nodes, start, end = stack.pop()
197200
if (
198-
sum(count_tokens(leaf_node.summary) for leaf_node in leaf_nodes[start:end])
201+
get_batch_token_count(
202+
completion_model,
203+
[leaf_node.summary for leaf_node in leaf_nodes[start:end]],
204+
)
199205
> completion_model.soft_token_limit
200206
):
201207
node_chunks: List[List[Node]] = binary_split_nodes_to_chunks(
@@ -222,7 +228,7 @@ def create_text_search_tree(completion_model: ConfiguredModel, text: str) -> dic
222228
"""
223229
This function is used to create a tree of responses from the OpenAI API
224230
"""
225-
if count_tokens(text) < completion_model.soft_token_limit:
231+
if get_token_count(completion_model, text) < completion_model.soft_token_limit:
226232
return Node(completion_model, 0, len(text), text).to_dict()
227233

228234
return create_nodes(

0 commit comments

Comments
 (0)