Fixed linting errors.

souradipp76 · Nov 8, 2024 · 98b0973 · 98b0973
1 parent 98a2dba
commit 98b0973
Show file tree

Hide file tree

Showing 16 changed files with 647 additions and 484 deletions.
diff --git a/doc_generator/base.py b/doc_generator/base.py
@@ -14,4 +14,4 @@
 """
 
 # example constant variable
-NAME = "doc_generator"
+NAME = "doc_generator"
diff --git a/doc_generator/index/convert_json_to_markdown.py b/doc_generator/index/convert_json_to_markdown.py
@@ -1,11 +1,17 @@
 """
 Convert Json to Markdown
 """
+
 import json
 from pathlib import Path
 
-from doc_generator.types import AutodocRepoConfig, FileSummary, \
-    FolderSummary, ProcessFileParams, TraverseFileSystemParams
+from doc_generator.types import (
+    AutodocRepoConfig,
+    FileSummary,
+    FolderSummary,
+    ProcessFileParams,
+    TraverseFileSystemParams,
+)
 from doc_generator.utils.traverse_file_system import traverse_file_system
 from doc_generator.utils.file_utils import get_file_name
 
@@ -29,60 +35,65 @@ def count_files(process_file_params: ProcessFileParams):
         files += 1
         return
 
-    traverse_file_system(TraverseFileSystemParams(
-        str(input_root),
-        project_name,
-        count_files,
-        None,
-        [],
-        file_prompt,
-        folder_prompt,
-        content_type,
-        target_audience,
-        link_hosted
-    ))
+    traverse_file_system(
+        TraverseFileSystemParams(
+            str(input_root),
+            project_name,
+            count_files,
+            None,
+            [],
+            file_prompt,
+            folder_prompt,
+            content_type,
+            target_audience,
+            link_hosted,
+        )
+    )
 
     # Process and create markdown files for each code file in the project
     def process_file(process_file_params: ProcessFileParams) -> None:
         file_path = Path(process_file_params.file_path)
         file_name = process_file_params.file_name
-        content = file_path.read_text(encoding='utf-8')
+        content = file_path.read_text(encoding="utf-8")
 
         if not content or len(content) == 0:
             return
 
         markdown_file_path = output_root.joinpath(
-            file_path.relative_to(input_root))
+            file_path.relative_to(input_root)
+        )
 
         # Create the output directory if it doesn't exist
         markdown_file_path.parent.mkdir(parents=True, exist_ok=True)
 
         # Parse JSON content based on the file name
         data = json.loads(content)
-        if file_name == 'summary.json':
+        if file_name == "summary.json":
             data = FolderSummary(**data)
         else:
             data = FileSummary(**data)
 
         # Only include the file if it has a summary
-        markdown = ''
+        markdown = ""
         if data.summary:
             markdown = f"[View code on GitHub]({data.url})\n\n{data.summary}\n"
             if data.questions:
                 markdown += f"## Questions: \n{data.questions}"
 
-        output_path = get_file_name(markdown_file_path, '.', '.md')
-        output_path.write_text(markdown, encoding='utf-8')
+        output_path = get_file_name(markdown_file_path, ".", ".md")
+        output_path.write_text(markdown, encoding="utf-8")
 
-    traverse_file_system(TraverseFileSystemParams(
-        str(input_root),
-        project_name,
-        process_file,
-        None,
-        [],
-        file_prompt,
-        folder_prompt,
-        content_type,
-        target_audience,
-        link_hosted
-    ))
+    traverse_file_system(
+        TraverseFileSystemParams(
+            str(input_root),
+            project_name,
+            process_file,
+            None,
+            [],
+            file_prompt,
+            folder_prompt,
+            content_type,
+            target_audience,
+            link_hosted,
+        )
+    )
diff --git a/doc_generator/index/create_vector_store.py b/doc_generator/index/create_vector_store.py
@@ -1,6 +1,7 @@
 """
 Create Vector Store
 """
+
 import fnmatch
 import os
 from pathlib import Path
@@ -14,31 +15,34 @@
 
 
 def should_ignore(file_name: str, ignore: List[str]):
-    return any(fnmatch.fnmatch(file_name, pattern)
-                for pattern in ignore)
+    return any(fnmatch.fnmatch(file_name, pattern) for pattern in ignore)
+
 
 def process_file(file_path: str, ignore: List[str]):
     """
     Process File
     """
+
     def read_file(path):
-        with open(path, 'r', encoding='utf8') as file:
+        with open(path, "r", encoding="utf8") as file:
             return file.read()
 
     if should_ignore(file_path, ignore):
         return None
 
     try:
         file_contents = read_file(file_path)
-        metadata = {'source': file_path}
+        metadata = {"source": file_path}
         doc = Document(page_content=file_contents, metadata=metadata)
         return doc
     except Exception as e:
         print(f"Error reading file {file_path}: {str(e)}")
         return None
 
 
-def process_directory(directory_path: str, ignore: List[str]) -> List[Document]:
+def process_directory(
+    directory_path: str, ignore: List[str]
+) -> List[Document]:
     """
     Process Directory
     """
@@ -47,8 +51,10 @@ def process_directory(directory_path: str, ignore: List[str]) -> List[Document]:
         files = os.listdir(directory_path)
     except Exception as e:
         print(e)
-        raise FileNotFoundError(f"Could not read directory: {directory_path}. \
-                                Did you run `sh download.sh`?") from e
+        raise FileNotFoundError(
+            f"Could not read directory: {directory_path}. \
+                                Did you run `sh download.sh`?"
+        ) from e
 
     for file in files:
         if should_ignore(file, ignore):
@@ -68,6 +74,7 @@ class RepoLoader(BaseLoader):
     """
     RepoLoader
     """
+
     def __init__(self, file_path: str, ignore: List[str]):
         super().__init__()
         self.file_path = file_path
@@ -77,7 +84,13 @@ def load(self) -> List[Document]:
         return process_directory(self.file_path, self.ignore)
 
 
-def create_vector_store(root: str, output: str, ignore: List[str], llms: List[LLMModels], device: str) -> None:
+def create_vector_store(
+    root: str,
+    output: str,
+    ignore: List[str],
+    llms: List[LLMModels],
+    device: str,
+) -> None:
     """
     Create Vector Store
     """
@@ -88,19 +101,16 @@ def create_vector_store(root: str, output: str, ignore: List[str], llms: List[LL
     # Split the text into chunks
     print(f"Splitting text into chunks for {len(raw_docs)} docs")
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,
-        chunk_overlap=100
+        chunk_size=1000, chunk_overlap=100
     )
     docs = text_splitter.split_documents(raw_docs)
     # Create the vectorstore
-    print('Creating vector store....')
+    print("Creating vector store....")
     vector_store = HNSWLib.from_documents(
-        docs,
-        get_embeddings(llm.name, device),
-        docstore=InMemoryDocstore()
+        docs, get_embeddings(llm.name, device), docstore=InMemoryDocstore()
     )
 
-    print('Saving vector store output....')
+    print("Saving vector store output....")
     vector_store.save(output)
 
-    print('Done creating vector store....')
+    print("Done creating vector store....")
diff --git a/doc_generator/index/index.py b/doc_generator/index/index.py
@@ -1,6 +1,7 @@
 """
 Index
 """
+
 from pathlib import Path
 from doc_generator.types import AutodocRepoConfig
 
@@ -11,59 +12,69 @@
 
 def index(config: AutodocRepoConfig):
     """Index"""
-    json_path = Path(config.output) / 'docs' / 'json'
-    markdown_path = Path(config.output) / 'docs' / 'markdown'
-    data_path = Path(config.output) / 'docs' / 'data'
+    json_path = Path(config.output) / "docs" / "json"
+    markdown_path = Path(config.output) / "docs" / "markdown"
+    data_path = Path(config.output) / "docs" / "data"
 
     # Ensure directories exist
     json_path.mkdir(parents=True, exist_ok=True)
     markdown_path.mkdir(parents=True, exist_ok=True)
     data_path.mkdir(parents=True, exist_ok=True)
 
     # Process the repository to create JSON files
-    print('Processing repository...')
-    process_repository(AutodocRepoConfig(
-        name=config.name,
-        repository_url=config.repository_url,
-        root=config.root,
-        output=str(json_path),
-        llms=config.llms,
-        priority=config.priority,
-        max_concurrent_calls=config.max_concurrent_calls,
-        add_questions=config.add_questions,
-        ignore=config.ignore,
-        file_prompt=config.file_prompt,
-        folder_prompt=config.folder_prompt,
-        chat_prompt=config.chat_prompt,
-        content_type=config.content_type,
-        target_audience=config.target_audience,
-        link_hosted=config.link_hosted,
-        peft_model_path=config.peft_model_path,
-        device=config.device
-    ))
+    print("Processing repository...")
+    process_repository(
+        AutodocRepoConfig(
+            name=config.name,
+            repository_url=config.repository_url,
+            root=config.root,
+            output=str(json_path),
+            llms=config.llms,
+            priority=config.priority,
+            max_concurrent_calls=config.max_concurrent_calls,
+            add_questions=config.add_questions,
+            ignore=config.ignore,
+            file_prompt=config.file_prompt,
+            folder_prompt=config.folder_prompt,
+            chat_prompt=config.chat_prompt,
+            content_type=config.content_type,
+            target_audience=config.target_audience,
+            link_hosted=config.link_hosted,
+            peft_model_path=config.peft_model_path,
+            device=config.device,
+        )
+    )
 
     # Convert the JSON files to Markdown
-    print('Creating markdown files...')
-    convert_json_to_markdown(AutodocRepoConfig(
-        name=config.name,
-        repository_url=config.repository_url,
-        root=str(json_path),
-        output=str(markdown_path),
-        llms=config.llms,
-        priority=config.priority,
-        max_concurrent_calls=config.max_concurrent_calls,
-        add_questions=config.add_questions,
-        ignore=config.ignore,
-        file_prompt=config.file_prompt,
-        folder_prompt=config.folder_prompt,
-        chat_prompt=config.chat_prompt,
-        content_type=config.content_type,
-        target_audience=config.target_audience,
-        link_hosted=config.link_hosted,
-        peft_model_path=config.peft_model_path,
-        device=config.device
-    ))
+    print("Creating markdown files...")
+    convert_json_to_markdown(
+        AutodocRepoConfig(
+            name=config.name,
+            repository_url=config.repository_url,
+            root=str(json_path),
+            output=str(markdown_path),
+            llms=config.llms,
+            priority=config.priority,
+            max_concurrent_calls=config.max_concurrent_calls,
+            add_questions=config.add_questions,
+            ignore=config.ignore,
+            file_prompt=config.file_prompt,
+            folder_prompt=config.folder_prompt,
+            chat_prompt=config.chat_prompt,
+            content_type=config.content_type,
+            target_audience=config.target_audience,
+            link_hosted=config.link_hosted,
+            peft_model_path=config.peft_model_path,
+            device=config.device,
+        )
+    )
 
     # Create a vector store from the Markdown documents
-    print('Creating vector files...')
-    create_vector_store(str(config.root), str(data_path), config.ignore, config.llms, config.device)
+    print("Creating vector files...")
+    create_vector_store(
+        str(config.root),
+        str(data_path),
+        config.ignore,
+        config.llms,
+        config.device,
+    )