diff --git a/lesa/cli/main.py b/lesa/cli/main.py index d9e9e16..06010c5 100644 --- a/lesa/cli/main.py +++ b/lesa/cli/main.py @@ -1,6 +1,7 @@ import typer import time import shutil +from datetime import datetime from rich.text import Text from rich.panel import Panel from rich.table import Table @@ -96,18 +97,41 @@ def embed(): console.print(f"[red]Error: Ollama server is not running![/red]") console.print(f"Start the ollama server by running [cyan]lesa start[/cyan] command.") raise typer.Exit(1) - - with console.status("Creating configuration files...", spinner="earth") as status: + + if not dm.check_configuration_folder(): + console.print("[yellow]Configuration folder not found. Initializing...[/yellow]") dm.init() - status.update("Extracting text from files...") - for file in dm.files: - console.log(f"Extracting text from {file}...") - docs = dm.extract_file_text(filepath=file) - console.log(f"Embedding documents from {file}...") - dm.embed_documents(docs) - console.log("Saving vector embeddings...") - dm.save_vector_store() - console.print("[green]Initialized configuration for embedding files[/green]") + + if not dm.retrieve_config_key("embeddings_initialized"): + with console.status("Extracting text from files...", spinner="earth") as status: + for file in dm.files: + console.log(f"Extracting text from {file}...") + docs = dm.extract_file_text(filepath=file) + console.log(f"Embedding documents from {file}...") + dm.embed_documents(docs) + console.log("Saving vector embeddings...") + dm.save_vector_store() + dm.update_config_key("embeddings_initialized", True) + dm.update_config_key('embeddings_init_time', datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + console.print("[green]Initialized configuration for embedding files[/green]") + + else: + changes = dm.check_for_changes() + if len(changes.get('new_files')) == 0 and len(changes.get('modified_files')) == 0 and len(changes.get('deleted_files')) == 0: + console.print("[yellow]No changes detected in the directory[/yellow]") + else: + dm.scan_files() + console.print("[green]Files in the directory have been changed since last embedding. Embedding again...[/green]") + with console.status("Extracting text from files...", spinner="earth") as status: + for file in dm.files: + console.log(f"Extracting text from {file}...") + docs = dm.extract_file_text(filepath=file) + console.log(f"Embedding documents from {file}...") + dm.embed_documents(docs) + console.log("Saving vector embeddings...") + dm.save_vector_store() + dm.update_config_key('embeddings_init_time', datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + console.print("[green]Embeddings updated successfully![/green]") @app.command() def read(file_path: str = typer.Argument(..., help="Path of the file to read")): diff --git a/lesa/core/conversations.py b/lesa/core/conversations.py index eb89d1a..5fc1fa6 100644 --- a/lesa/core/conversations.py +++ b/lesa/core/conversations.py @@ -12,7 +12,6 @@ from langchain.chains.combine_documents import create_stuff_documents_chain from langchain.chains.retrieval import create_retrieval_chain -from lesa.core.documents import DocumentManager from lesa.core.ollama import OllamaManager from lesa.core.directory_manager import DirectoryManager @@ -36,7 +35,6 @@ def __init__(self, self.base_path = os.path.abspath(base_path) self.embedding_path = os.path.join(self.base_path, self.CONFIG_DIR, 'embeddings') - self.document_manager = DocumentManager() self.ollama_manager = OllamaManager() self.embeddings_manager = DirectoryManager(document_model=document_model) @@ -134,7 +132,7 @@ def embed_single_document_and_chat(self, :return: List of embedded documents """ - documents = self.document_manager.extract_document_text(file_path) + documents = self.embeddings_manager.extract_file_text(file_path) docs = self.embeddings_manager.text_splitter.split_documents(documents=documents) # Embed documents diff --git a/lesa/core/directory_manager.py b/lesa/core/directory_manager.py index 82a9b47..7131ea5 100644 --- a/lesa/core/directory_manager.py +++ b/lesa/core/directory_manager.py @@ -160,7 +160,9 @@ def init(self) -> None: file_metadata = self._scan_directory() config_data = { - 'initialized_at': datetime.now().isoformat(), + 'initialized_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + 'embeddings_initialized': False, + 'embeddings_init_time': None, 'base_path': self.base_path, 'files': file_metadata } @@ -169,7 +171,6 @@ def init(self) -> None: json.dump(config_data, f, indent=4) self.scan_files() - # print(f"Lesa embeddings initialized in {self.base_path}") def _scan_directory(self, ignore_patterns: Optional[List[str]] = None) -> Dict[str, Dict]: """ @@ -233,6 +234,42 @@ def check_for_changes(self) -> Dict: return changes + def check_configuration_folder(self) -> bool: + """ + Check if the configuration folder exists. + + :return: True if the configuration folder exists + """ + if os.path.exists(self.config_path) and os.path.exists(self.config_file_path): + return True + + def update_config_key(self, key: str, value: any) -> None: + """ + Update a specific key in the configuration file. + + :param key: Key to update + :param value: New value for the key + """ + with open(self.config_file_path, 'r') as f: + config_data = json.load(f) + + config_data[key] = value + + with open(self.config_file_path, 'w') as f: + json.dump(config_data, f, indent=4) + + def retrieve_config_key(self, key: str) -> any: + """ + Retrieve a specific key from the configuration file. + + :param key: Key to retrieve + :return: Value of the key + """ + with open(self.config_file_path, 'r') as f: + config_data = json.load(f) + + return config_data.get(key, None) + def update_configuration(self) -> None: """ Update the configuration after detecting changes.