From ab03ee6534dca0e86577e45a939fc21b93cb0a4c Mon Sep 17 00:00:00 2001 From: Milutin-S Date: Tue, 28 May 2024 17:38:03 +0200 Subject: [PATCH] Add docstring. --- database/utils.py | 7 +++---- database/vector_database.py | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/database/utils.py b/database/utils.py index 1037631..b6f8e6d 100644 --- a/database/utils.py +++ b/database/utils.py @@ -26,6 +26,7 @@ def create_collection( vector_size: int = 1536, distance: Distance = Distance.COSINE, ) -> bool: + """Create a collection in Qdrant.""" logger.info(f'Creating collection: "{name}" with vector size: {vector_size}.') return client.recreate_collection( collection_name=name, @@ -53,6 +54,7 @@ def upsert( collection: str, points: List[PointStruct], ) -> UpdateResult: + """Upsert data points into a Qdrant collection.""" return client.upsert(collection_name=collection, points=points) @@ -88,10 +90,7 @@ def embed_text( client: OpenAI, text: Union[str, list], model: str ) -> CreateEmbeddingResponse: """ - - Default model (OpenAI): text-embedding-3-small - - Max input Tokens: 8191 - - TikToken model: cl100k_base - - Embedding size: 1536 + Create embeddings using OpenAI API. """ response = client.embeddings.create(input=text, model=model) return response diff --git a/database/vector_database.py b/database/vector_database.py index 759d15c..1d49a71 100644 --- a/database/vector_database.py +++ b/database/vector_database.py @@ -17,6 +17,7 @@ def main(args: argparse.Namespace) -> None: + """Main function to create embeddings and vector database.""" logger.info("Creating embeddings.") create_embeddings( scraped_dir=args.scraped_dir, @@ -34,7 +35,7 @@ def main(args: argparse.Namespace) -> None: for path in tqdm(data_paths, total=len(data_paths), desc="Creating collections"): # Check if this is necessary collection_name = path.stem.replace("-", "_") - collection_name = collection_name + "_TESTIC" + collection_name = collection_name points = load_and_process_embeddings(path=path) create_collection(client=qdrant_client, name=collection_name)