Skip to content

Commit

Permalink
Refactor command functions to return strings and improve readability:…
Browse files Browse the repository at this point in the history
… Co-authored-by: MindFlow <mf@mindflo.ai>
  • Loading branch information
steegecs committed Jun 20, 2023
1 parent 23baa8e commit ec3f07c
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 33 deletions.
2 changes: 1 addition & 1 deletion mindflow/cli/commands/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
@click.argument("document_paths", type=str, nargs=-1, required=True)
@click.option("--refresh", is_flag=True, default=False)
def index(document_paths: List[str], refresh: bool) -> None:
run_index(document_paths, refresh)
print(run_index(document_paths, refresh))
2 changes: 1 addition & 1 deletion mindflow/core/commands/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)


def run_chat(document_paths: List[str], user_query: str):
def run_chat(document_paths: List[str], user_query: str) -> str:
settings = Settings()
completion_model = settings.mindflow_models.query.model

Expand Down
29 changes: 13 additions & 16 deletions mindflow/core/commands/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,24 @@
from mindflow.core.resolving.resolve import resolve_paths_to_document_references


def run_delete(document_paths: List[str]):
def run_delete(document_paths: List[str]) -> str:
"""Delete documents from MindFlow index."""
document_references: List[DocumentReference] = resolve_paths_to_document_references(
document_paths
)
document_ids = [
document_id
for document_id in [
get_document_id(document_reference.path, document_reference.document_type)
for document_reference in document_references
]
if document_id is not None
]
if not (documents := Document.load_bulk_ignore_missing(document_ids)):
return "No documents to delete"
document_references: List[DocumentReference] = resolve_paths_to_document_references(document_paths)

document_ids = [document_id for document_id in [get_document_id(document_reference.path, document_reference.document_type) for document_reference in document_references] if document_id is not None]

if not document_ids:
return "No document IDs resolved. Nothing to delete."

documents = Document.load_bulk_ignore_missing(document_ids)
if not documents:
return "No documents found to delete."

document_chunk_ids = get_document_chunk_ids(documents)
if not DocumentChunk.load_bulk_ignore_missing(document_chunk_ids):
return "No documents to delete"
return "No document chunks found to delete."

Document.delete_bulk(document_ids)
DocumentChunk.delete_bulk(document_chunk_ids)

return "Documents deleted"
return True, "Documents and associated chunks deleted successfully."
2 changes: 1 addition & 1 deletion mindflow/core/commands/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from mindflow.core.token_counting import get_token_count_of_messages_for_model


def run_code_generation(output_path: str, prompt: str):
def run_code_generation(output_path: str, prompt: str) -> str:
settings = Settings()
completion_model = settings.mindflow_models.query.model

Expand Down
4 changes: 3 additions & 1 deletion mindflow/core/commands/git/commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,6 @@ def run_commit(
+ list(args)
)

return execute_command_and_print_without_trace(["git", "commit", "-m"] + [message_overwrite] + list(args))
return execute_command_and_print_without_trace(
["git", "commit", "-m"] + [message_overwrite] + list(args)
)
4 changes: 3 additions & 1 deletion mindflow/core/commands/git/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def run_diff(args: Tuple[str], detailed: bool = True) -> Optional[str]:
settings = Settings()
completion_model: ConfiguredModel = settings.mindflow_models.query.model

diff_result = execute_command_and_print_without_trace(["git", "diff"] + list(args)).strip()
diff_result = execute_command_and_print_without_trace(
["git", "diff"] + list(args)
).strip()
if not diff_result:
return None

Expand Down
14 changes: 7 additions & 7 deletions mindflow/core/commands/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from mindflow.core.token_counting import get_token_count_of_text_for_model


def run_index(document_paths: List[str], verbose: bool = True) -> None:
def run_index(document_paths: List[str]) -> str:
settings = Settings()
completion_model: ConfiguredModel = settings.mindflow_models.index.model
embedding_model: ConfiguredModel = settings.mindflow_models.embedding.model
Expand All @@ -40,15 +40,15 @@ def run_index(document_paths: List[str], verbose: bool = True) -> None:
document_references, completion_model
)
):
if verbose:
print("No documents to index")
return
return "No documents to index"

print_total_size_of_documents(indexable_documents)
print_total_tokens_and_ask_to_continue(indexable_documents, completion_model)

index_documents(indexable_documents, completion_model, embedding_model)

return "Successfully indexed documents"


def print_total_size_of_documents(documents: List[Document]):
print(
Expand Down Expand Up @@ -155,14 +155,14 @@ def get_indexable_document(
return None

document_text_bytes = document_text.encode("utf-8")
doc_hash = hashlib.sha256(document_text_bytes).hexdigest()
document_hash = hashlib.sha256(document_text_bytes).hexdigest()

if document and document.id == doc_hash:
if document and document.id == document_hash:
return None

return Document(
{
"id": doc_hash,
"id": document_hash,
"path": document_reference.path,
"document_type": document_reference.document_type,
"num_chunks": document.num_chunks if document else 0,
Expand Down
7 changes: 2 additions & 5 deletions mindflow/core/commands/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from mindflow.core.token_counting import get_token_count_of_text_for_model


def run_query(document_paths: List[str], query: str):
def run_query(document_paths: List[str], query: str) -> str:
"""Query files, folders, and websites."""
settings = Settings()
completion_model = settings.mindflow_models.query.model
Expand Down Expand Up @@ -52,10 +52,7 @@ def run_query(document_paths: List[str], query: str):
top_k=100,
)
):
print(
"No index for requested hashes. Please generate index for passed content."
)
sys.exit(1)
return "No index for requested hashes. Please generate index for passed content."

document_selection_batch: List[Tuple[str, DocumentChunk]] = [
(document_hash_to_path[document_chunk.id.split("_")[0]], document_chunk)
Expand Down
1 change: 1 addition & 0 deletions mindflow/core/file_processing/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import List
from typing import Union


def is_path_within_git_repo(path: Union[str, os.PathLike]) -> bool:
try:
output = subprocess.run(
Expand Down

0 comments on commit ec3f07c

Please sign in to comment.