Fixes in HNSWLib.

souradipp76 · Apr 28, 2024 · 52c6b57 · 52c6b57
1 parent 77b842e
commit 52c6b57
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 16 deletions.
diff --git a/doc_generator/query/__init__.py b/doc_generator/query/__init__.py
@@ -16,6 +16,7 @@ def display_welcome_message(project_name):
     print(f"Welcome to the {project_name} chatbot.")
     print(f"Ask any questions related to the {project_name} codebase, and I'll try to help. Type 'exit' to quit.\n")
 
+
 def query(repo_config: AutodocRepoConfig, user_confg: AutodocUserConfig):
     data_path = os.path.join(repo_config.output, 'docs', 'data')
     embeddings = get_embeddings(repo_config.llms[0])

diff --git a/doc_generator/utils/HNSWLib.py b/doc_generator/utils/HNSWLib.py
@@ -6,7 +6,6 @@
 from abc import abstractmethod
 from typing import List, Optional
 from langchain_community.docstore.in_memory import InMemoryDocstore
-from langchain_core.embeddings import embeddings
 from langchain_core.embeddings.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStore
 from langchain_core.documents import Document
@@ -77,7 +76,7 @@ def add_vectors(self, vectors: List[List[float]], documents: List[Document]):
         docstore_size = len(self.docstore._dict)
         for i, vector in enumerate(vectors):
             self._index.add_items(np.array(vector), np.array([docstore_size + i]))
-            self.docstore.add({docstore_size + i: documents[i]})
+            self.docstore.add({str(docstore_size + i): documents[i]})
 
     def add_documents(self, documents: List[Document]) -> List[str]:
         texts = [doc.page_content for doc in documents]
@@ -105,10 +104,11 @@ def similarity_search_by_vector(self, query: List[float], k: int = 4) -> List:
             print(f"k ({k}) is greater than the number of elements in the index ({total}), setting k to {total}")
             k = total
         labels, distances = self._index.knn_query(query, k)
-        return [(self.docstore.search(str(label)), distance) for label, distance in zip(labels, distances)]
+        return [(self.docstore._dict[str(label)], distance) for label, distance in zip(labels[0], distances[0])]
 
     def similarity_search(self, query: str, k: int = 4) -> List[Document]:
-        return self.similarity_search_by_vector(self._embeddings.embed_query(query), k)
+        results = self.similarity_search_by_vector(self._embeddings.embed_query(query), k)
+        return [result[0] for result in results]
 
     def save(self, directory: str):
         print(f"Saving in directory {directory}")
@@ -134,10 +134,9 @@ def load(directory: str, embeddings: Embeddings):
         with open(os.path.join(directory, 'docstore.json'), 'r') as f:
             doc_data = json.load(f)
         for id, value in doc_data:
-            args.docstore.add({id: Document(
+            args.docstore.add({str(id): Document(
                 page_content=value['page_content'], 
-                metadata=value['metadata'],
-                type=value['type']
+                metadata=value['metadata']
             )})
 
         args.index = index

diff --git a/doc_generator/utils/createChatChain.py b/doc_generator/utils/createChatChain.py
@@ -1,4 +1,4 @@
-from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
+from langchain.chains.conversational_retrieval.base import ChatVectorDBChain
 from langchain.chains.llm import LLMChain
 from langchain.chains.question_answering import load_qa_chain
 from langchain.prompts import PromptTemplate
@@ -48,12 +48,7 @@ def make_qa_prompt(project_name, repository_url, content_type, chat_prompt, targ
     Assume the reader does not know anything about how the project is structured or which folders/files do what and what functions are written in which files and what these functions do.
     If you don't know how to fill up the readme.md file in one of its sections, leave that part blank. Don't try to make up an answer.
     Do not include information that is not directly relevant to repository, even though the names of the functions might be common or is frequently used in several other places.
-    Now lets start describing how the readme.md file will be structured.
-    The first section will be Installation. Here provide a list of packages from the requirements.txt folder in the repository that needs to be installed. Mention what versions of those packages need to be installed. Also add the commands that need to be put in the terminal to install those packages. For instance, for installing a py-package, provide a command pip install py-package. If there is no requirements.txt or similar folder in the repository, then find out what frameworks and packages have been imported in all the files after going through the code provide their names and the required versions that need to be installed. Remind the user that it is usually best-practice in Python projects to install into a sandboxed virtual environment, This will be locked to a specific Python version and contain only the Python libraries that you install into it, so that your Python projects do not get affected.
-    The second section will be Usage. Here provide a list of commands that need to be run in the terminal to use various features of the project. For instance, if the project has a command called run, then provide a command to run that command. Go through various files and various modules and after reading each function, provide an example usage of that function. Write two lines about each function, what the functionality of that function is, what parameters it take as input, what is outputs, if it is dependent on the output of any other function in the whole repository, what other functions call/use this function, and finally provide a toy example of the usage of the function. Do this for every function in all files that exist in the repository. Structure them in the same way the repository has been structured. Finally provide some run commands on how to run the main function in the terminal.
-    The third section will be Development. Here provide a list of commands that need to be run in the terminal to develop. Comment here on how to make format, make lint, check types and generate tests for the code that has been written in the files of the repository. Try to create unit tests and write tests for functions that are called/ used by many other functions, to test that they work correctly. Write some commands on how to run those tests.
-    The fourth section will be Conclusion. Here provide a general idea of what the project does and how it works. Here you can also provide how the user can generate a github.io page for the repository documentation using the four sections that you generated above in the readme.md file. Tell how to create a workflow and yml file and github pages can be used to create a documentation for the project. Put the readme.md file in the repository.
-
+    
     {additional_instructions}
     Question: {{question}}
 
@@ -87,8 +82,8 @@ def make_chain(project_name, repository_url, content_type, chat_prompt, target_a
         prompt=qa_prompt
     )
 
-    return ConversationalRetrievalChain(
-        retriever=vectorstore.as_retriever(),
+    return ChatVectorDBChain(
+        vectorstore=vectorstore,
         combine_docs_chain=doc_chain,
         question_generator=question_generator
     )