diff --git a/.github/workflows/deploy_mkdocs.yml b/.github/workflows/deploy_mkdocs.yml index d8f9ce5..250093d 100644 --- a/.github/workflows/deploy_mkdocs.yml +++ b/.github/workflows/deploy_mkdocs.yml @@ -8,6 +8,12 @@ on: # Allows you to run this workflow manually from the Actions tab workflow_dispatch: +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. concurrency: @@ -40,4 +46,4 @@ jobs: # Build the MkDocs site - name: Build and Publish MkDocs site - run: mkdocs gh-deploy \ No newline at end of file + run: mkdocs gh-deploy --force \ No newline at end of file diff --git a/HISTORY.md b/HISTORY.md index fb9a858..5b0e2e1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,7 +2,7 @@ Changelog ========= -(unreleased) +Release Version 1.1.3 ------------ - Updated Makefile. [Souradip Pal] - Fixed device settings. [Souradip Pal] diff --git a/docs/index.md b/docs/index.md index 546d19f..0044930 100644 --- a/docs/index.md +++ b/docs/index.md @@ -121,3 +121,21 @@ query.generate_readme(repo_config, user_config, readme_config) ``` Run the sample script in the `examples/example.py` to see a typical code usage. + +## Contributing + +ReadmeReady is an open-source project that is supported by a community who will gratefully and humbly accept any contributions you might make to the project. + +If you are interested in contributing, read the [CONTRIBUTING.md](https://github.com/souradipp76/ReadMeReady/blob/main/CONTRIBUTING.md) file. + +- Submit a bug report or feature request on [GitHub Issues](https://github.com/souradipp76/ReadMeReady/issues). +- Add to the documentation or help with our website. +- Write unit or integration tests for our project under the `tests` directory. +- Answer questions on our issues, mailing list, Stack Overflow, and elsewhere. +- Write a blog post, tweet, or share our project with others. + +As you can see, there are lots of ways to get involved, and we would be very happy for you to join us! + +## License + +Read the [LICENSE](https://github.com/souradipp76/ReadMeReady/blob/main/LICENSE) file. \ No newline at end of file diff --git a/docs/reference.md b/docs/reference.md index 07afb76..1321370 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -1,30 +1,36 @@ # API Reference -::: readme_ready.index.index +- ::: readme_ready.index.index handler: python options: - members: - - index show_root_heading: true show_source: false + separate_signature: true -::: readme_ready.index.create_vector_store +- ::: readme_ready.index.convert_json_to_markdown + handler: python + options: + show_root_heading: true + show_source: false + separate_signature: true + +- ::: readme_ready.index.create_vector_store handler: python options: - members: - - create_vector_store show_root_heading: true show_source: false + separate_signature: true -::: readme_ready.index.process_repository +- ::: readme_ready.index.process_repository handler: python options: members: - process_repository show_root_heading: true show_source: false + separate_signature: true -::: readme_ready.query.query +- ::: readme_ready.query.query handler: python options: members: @@ -32,12 +38,14 @@ - generate_readme show_root_heading: true show_source: false + separate_signature: true -::: readme_ready.query.create_chat_chain +- ::: readme_ready.query.create_chat_chain handler: python options: members: - make_qa_chain - make_readme_chain show_root_heading: true - show_source: false \ No newline at end of file + show_source: false + separate_signature: true \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index ab092c1..3c31a27 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,4 +2,5 @@ site_name: ReadmeReady theme: readthedocs plugins: - search + - autorefs - mkdocstrings diff --git a/readme_ready/index/convert_json_to_markdown.py b/readme_ready/index/convert_json_to_markdown.py index c90735b..1f87a69 100644 --- a/readme_ready/index/convert_json_to_markdown.py +++ b/readme_ready/index/convert_json_to_markdown.py @@ -1,5 +1,5 @@ """ -Convert Json to Markdown +Utility to Convert Summary JSON to Markdown """ import json @@ -17,7 +17,18 @@ def convert_json_to_markdown(config: AutodocRepoConfig): - """Convert Json to Markdown""" + """ + Convert JSON summary to Markdown documents + + Traverses the root directory, finds the summary JSON for each file + and directory and converts them into Markdown format. + + Args: + config: An AutodocRepoConfig instance containing configuration + settings for indexing, including output paths, repository + details, and processing options. + + """ project_name = config.name input_root = Path(config.root) output_root = Path(config.output) diff --git a/readme_ready/index/create_vector_store.py b/readme_ready/index/create_vector_store.py index e28f740..8f1f094 100644 --- a/readme_ready/index/create_vector_store.py +++ b/readme_ready/index/create_vector_store.py @@ -1,5 +1,5 @@ """ -Create Vector Store +Utilities to Create Vector Store """ import fnmatch @@ -19,9 +19,21 @@ def should_ignore(file_name: str, ignore: List[str]): return any(fnmatch.fnmatch(file_name, pattern) for pattern in ignore) -def process_file(file_path: str, ignore: List[str]): +def process_file(file_path: str, ignore: List[str]) -> Document | None: """ - Process File + Processes a file + + Processes a specified file and converts the content + of the file into Document format. Ignores any file matching + the patterns provided in the ignore list. + + Args: + file_path: The file to be processed. + ignore: A list of file patterns to ignore during processing. + + Returns: + doc: A Document with file contents and metatdata + """ def read_file(path): @@ -45,7 +57,20 @@ def process_directory( directory_path: str, ignore: List[str] ) -> List[Document]: """ - Process Directory + Processes a directory + + Processes a specified directory, and converts all the content + of the files in the directory into Document format. Ignores + files matching the patterns provided in the ignore list. + + Args: + directory_path: The root directory containing the files to be + processed. + ignore: A list of file patterns to ignore during processing. + + Returns: + docs: List of Documents with file contents and metatdata + """ docs = [] try: @@ -73,7 +98,16 @@ def process_directory( class RepoLoader(BaseLoader): """ - RepoLoader + Class to load and process a repository + + A loader class which loads and processes a repsitory given + the root directory path and list of file patterns to ignore. + + Typical usage example: + + loader = RepoLoader(path, ignore) + docs = loader.load() + """ def __init__(self, file_path: str, ignore: List[str]): @@ -93,27 +127,23 @@ def create_vector_store( device: str, ) -> None: """ - Creates a vector store from Markdown documents. - - Loads documents from the specified root directory, splits the text into chunks, - creates a vector store using the selected LLM model, and saves the vector store - to the output path. Ignores files matching the patterns provided in the ignore list. + Creates a vector store from Markdown documents + + Loads documents from the specified root directory, splits the text into + chunks, creates a vector store using the selected LLM, and saves the + vector store to the output path. Ignores files matching the patterns + provided in the ignore list. Args: root: The root directory containing the documents to be processed. output: The directory where the vector store will be saved. ignore: A list of file patterns to ignore during document loading. llms: A list of LLMModels to use for generating embeddings. - device: The device to use for embedding generation (e.g., 'cpu' or 'cuda'). + device: The device to use for embedding generation + (e.g., 'cpu' or 'auto'). - Returns: - None. - - Raises: - IOError: If an error occurs accessing the filesystem. - Exception: If an error occurs during document loading, splitting, or vector store creation. """ - + llm = llms[1] if len(llms) > 1 else llms[0] loader = RepoLoader(root, ignore) raw_docs = loader.load() diff --git a/readme_ready/index/index.py b/readme_ready/index/index.py index 0cd6793..88696c5 100644 --- a/readme_ready/index/index.py +++ b/readme_ready/index/index.py @@ -1,5 +1,5 @@ """ -Index +Utility to Index a Repository and store them into a Vector Store """ from pathlib import Path @@ -13,18 +13,17 @@ def index(config: AutodocRepoConfig) -> None: """ - Indexes a repository to generate documentation and vector store files. + Indexes a repository to generate documentation and vector store files - Processes the repository specified in the config to create JSON files, converts them to Markdown format, - and builds a vector store from the Markdown documents. Creates the necessary directories for JSON, + Processes the repository specified in the config to create JSON files, + converts them to Markdown format, and builds a vector store from the + Markdown documents. Creates the necessary directories for JSON, Markdown, and data outputs as specified in the configuration. Args: - config: An AutodocRepoConfig instance containing configuration settings for indexing, including - output paths, repository details, and processing options. - - Returns: - None. + config: An AutodocRepoConfig instance containing configuration + settings for indexing, including output paths, repository + details, and processing options. """ json_path = Path(config.output) / "docs" / "json" diff --git a/readme_ready/index/process_repository.py b/readme_ready/index/process_repository.py index 186b93e..c10c531 100644 --- a/readme_ready/index/process_repository.py +++ b/readme_ready/index/process_repository.py @@ -1,5 +1,5 @@ """ -Process Repository +Utilities to Process Repository and Summarize File Contents """ import hashlib @@ -34,29 +34,24 @@ from .select_model import select_model -def process_repository(config: AutodocRepoConfig, dry_run=False) -> None: +def process_repository( + config: AutodocRepoConfig, dry_run: bool = False +) -> None: """ - Creates a vector store from Markdown documents. + Process a repository to generate JSON summary using LLMs - Loads documents from the specified root directory, splits the text into chunks, - creates a vector store using the selected LLM model, and saves the vector store - to the output path. Ignores files matching the patterns provided in the ignore list. + Traverses through the repository and summarizes the contents of + each file and directory using an LLM via. a summarization prompt and + saves them into JSON files. Args: - root: The root directory containing the documents to be processed. - output: The directory where the vector store will be saved. - ignore: A list of file patterns to ignore during document loading. - llms: A list of LLMModels to use for generating embeddings. - device: The device to use for embedding generation (e.g., 'cpu' or 'cuda'). - - Returns: - None. - - Raises: - IOError: If an error occurs accessing the filesystem. - Exception: If an error occurs during document loading, splitting, or vector store creation. - """ + config: An AutodocRepoConfig instance containing configuration + settings for indexing, including output paths, repository + details, and processing options. + dry_run: Flag to enable dry run mode where the process runs over the + directory without actual indexing the documents + """ def read_file(path): with open(path, "r", encoding="utf-8") as file: diff --git a/readme_ready/query/create_chat_chain.py b/readme_ready/query/create_chat_chain.py index 2a3121a..9784868 100644 --- a/readme_ready/query/create_chat_chain.py +++ b/readme_ready/query/create_chat_chain.py @@ -1,5 +1,5 @@ """ -Create Chat Chain +Creates Chains for QA Chat or Readme Generation """ from typing import List @@ -12,8 +12,10 @@ create_stuff_documents_chain, ) from langchain.prompts import PromptTemplate +from langchain_core.runnables.base import Runnable from readme_ready.types import LLMModels +from readme_ready.utils.HNSWLib import HNSWLib from readme_ready.utils.llm_utils import ( get_gemma_chat_model, get_llama_chat_model, @@ -139,41 +141,46 @@ def make_readme_prompt( def make_qa_chain( - project_name, - repository_url, - content_type, - chat_prompt, - target_audience, - vectorstore, + project_name: str, + repository_url: str, + content_type: str, + chat_prompt: str, + target_audience: str, + vector_store: HNSWLib, llms: List[LLMModels], device: str = "cpu", - on_token_stream=None, -): + on_token_stream: bool = False, +) -> Runnable: """ - Creates a question-answering (QA) chain for the specified project. + Creates a question-answering (QA) chain for the specified project - Initializes and configures the QA chain using the provided repository and user configurations. - Selects the appropriate language model (LLM), sets up the retriever with a history-aware mechanism, - and combines document chains for processing queries. The chain facilitates interaction with the - vector store to retrieve and process relevant information based on user queries. + Initializes and configures the QA chain using the provided repository + and user configurations. Selects the appropriate language model (LLM), + sets up the retriever with a history-aware mechanism, and combines + document chains for processing queries. The chain facilitates interaction + with the vector store to retrieve and process relevant information + based on user queries. Args: - project_name: The name of the project for which the QA chain is being created. + project_name: The name of the project for which the QA chain is + being created. repository_url: The URL of the repository containing the project. - content_type: The type of content to be processed (e.g., 'code', 'documentation'). + content_type: The type of content to be processed + (e.g., 'code', 'documentation'). chat_prompt: The prompt template used for generating chat responses. target_audience: The intended audience for the QA responses. - vectorstore: An instance of HNSWLib representing the vector store containing document embeddings. - llms: A list of LLMModels to select from for generating embeddings and responses. + vector_store: An instance of HNSWLib representing the vector store + containing document embeddings. + llms: A list of LLMModels to select from for generating embeddings + and responses. device: The device to use for model inference (default is 'cpu'). - on_token_stream: Optional callback for handling token streams during model inference. + on_token_stream: Optional callback for handling token streams during + model inference. Returns: - A retrieval chain configured for question-answering, combining the retriever and document processing chain. + A retrieval chain configured for question-answering, combining the + retriever and document processing chain. - Raises: - ValueError: If no suitable model is found in the provided LLMs. - RuntimeError: If there is an issue initializing the chat models or creating the chains. """ llm = llms[1] if len(llms) > 1 else llms[0] llm_name = llm.value @@ -206,7 +213,7 @@ def make_qa_chain( ) question_generator = create_history_aware_retriever( - question_chat_model, vectorstore.as_retriever(), condense_qa_prompt + question_chat_model, vector_store.as_retriever(), condense_qa_prompt ) model_kwargs = {"temperature": 0.2, "device": device} @@ -254,43 +261,49 @@ def make_qa_chain( def make_readme_chain( - project_name, - repository_url, - content_type, - chat_prompt, - target_audience, - vectorstore, + project_name: str, + repository_url: str, + content_type: str, + chat_prompt: str, + target_audience: str, + vector_store: HNSWLib, llms: List[LLMModels], - peft_model=None, + peft_model: str | None = None, device: str = "cpu", - on_token_stream=None, -): + on_token_stream: bool = False, +) -> Runnable: """ - Creates a README generation chain for the specified project. + Creates a README generation chain for the specified project - Initializes and configures the README generation chain using the provided repository, user, and README configurations. - Selects the appropriate language model (LLM), sets up the document processing chain with the specified prompts, - and integrates with the vector store to generate comprehensive README sections based on project data. - The chain facilitates automated generation of README files tailored to the project's specifications. + Initializes and configures the README generation chain using the provided + repository, user, and README configurations. Selects the appropriate + language model (LLM), sets up the document processing chain with the + specified prompts, and integrates with the vector store to generate + comprehensive README sections based on project data. The chain facilitates + automated generation of README files tailored to the project's + specifications. Args: - project_name: The name of the project for which the README is being generated. + project_name: The name of the project for which the README is + being generated. repository_url: The URL of the repository containing the project. - content_type: The type of content to be included in the README (e.g., 'overview', 'installation'). + content_type: The type of content to be included in the README + (e.g., 'overview', 'installation'). chat_prompt: The prompt template used for generating README content. target_audience: The intended audience for the README. - vectorstore: An instance of HNSWLib representing the vector store containing document embeddings. + vector_store: An instance of HNSWLib representing the vector store + containing document embeddings. llms: A list of LLMModels to select from for generating README content. - peft_model: An optional parameter specifying a PEFT (Parameter-Efficient Fine-Tuning) model for enhanced performance. + peft_model: An optional parameter specifying a PEFT + (Parameter-Efficient Fine-Tuning) model for enhanced performance. device: The device to use for model inference (default is 'cpu'). - on_token_stream: Optional callback for handling token streams during model inference. + on_token_stream: Optional callback for handling token streams during + model inference. Returns: - A retrieval chain configured for README generation, combining the retriever and document processing chain. + A retrieval chain configured for README generation, combining the + retriever and document processing chain. - Raises: - ValueError: If no suitable model is found in the provided LLMs. - RuntimeError: If there is an issue initializing the chat models or creating the chains. """ llm = llms[1] if len(llms) > 1 else llms[0] llm_name = llm.value @@ -340,5 +353,5 @@ def make_readme_chain( ) return create_retrieval_chain( - retriever=vectorstore.as_retriever(), combine_docs_chain=doc_chain + retriever=vector_store.as_retriever(), combine_docs_chain=doc_chain ) diff --git a/readme_ready/query/query.py b/readme_ready/query/query.py index b7b4b1a..a3bd9e2 100644 --- a/readme_ready/query/query.py +++ b/readme_ready/query/query.py @@ -1,5 +1,5 @@ """ -Query +Utility to Query a Code Repository or Generate README """ import os @@ -79,22 +79,19 @@ def init_readme_chain( def query(repo_config: AutodocRepoConfig, user_confg: AutodocUserConfig): """ Queries the repository for information based on user input. - - Initializes a question-answering chain, displays a welcome message, and enters a loop to - prompt the user for questions about the repository. Processes each question by invoking - the QA chain, updates the chat history, and displays the response in Markdown format. The - loop continues until the user types 'exit'. - Args: - repo_config: An AutodocRepoConfig instance containing configuration settings for the repository. - user_confg: An AutodocUserConfig instance containing user-specific configuration settings. + Initializes a question-answering chain, displays a welcome message, + and enters a loop to prompt the user for questions about the repository. + Processes each question by invoking the QA chain, updates the chat + history, and displays the response in Markdown format. The loop continues + until the user inputs 'exit'. - Returns: - None. + Args: + repo_config: An AutodocRepoConfig instance containing configuration + settings for the repository. + user_confg: An AutodocUserConfig instance containing user-specific + configuration settings. - Raises: - Exception: If an error occurs during the initialization of the QA chain, displaying the welcome message, - or during the invocation of the QA chain. """ chain = init_qa_chain(repo_config, user_confg) @@ -126,23 +123,21 @@ def generate_readme( ): """ Generates a README file based on repository and user configurations. - - Initializes a README generation chain, clears the terminal, and prepares the output file. - Iterates over the specified headings in the README configuration, generates content for each - section by invoking the chain, and writes the content in Markdown format to the README file. - Handles any RuntimeError that occurs during the process. - Args: - repo_config: An AutodocRepoConfig instance containing configuration settings for the repository. - user_config: An AutodocUserConfig instance containing user-specific configuration settings. - readme_config: An AutodocReadmeConfig instance containing configuration settings for README generation. + Initializes a README generation chain, clears the terminal, and prepares + the output file. Iterates over the specified headings in the README + configuration, generates content for each section by invoking the chain, + and writes the content in Markdown format to the README file. Handles any + RuntimeError that occurs during the process. - Returns: - None. + Args: + repo_config: An AutodocRepoConfig instance containing configuration + settings for the repository. + user_config: An AutodocUserConfig instance containing user-specific + configuration settings. + readme_config: An AutodocReadmeConfig instance containing + configuration settings for README generation. - Raises: - IOError: If an error occurs while accessing or writing to the filesystem. - Exception: If an error occurs during the initialization of the README chain or content generation. """ chain = init_readme_chain(repo_config, user_config)