From 48ecec89da6da228b810f259836cd092e4f7fd9b Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 27 Oct 2024 03:11:43 -0500 Subject: [PATCH] Fix a few small bugs. --- README.md | 2 +- .../img/headers/{ascii.png => ascii-art.png} | Bin docs/docs/examples/gallery.md | 1 + pyproject.toml | 3 +- readmeai/config/settings.py | 5 + readmeai/config/settings/prompts.toml | 14 +-- readmeai/config/settings/tool_config.toml | 6 +- readmeai/generators/quickstart.py | 97 +++++++++++++----- readmeai/preprocessor/document_cleaner.py | 85 ++++++++++++--- tests/generators/test_quickstart.py | 8 +- tests/preprocessor/test_document_cleaner.py | 94 +++++++++++++++++ 11 files changed, 256 insertions(+), 59 deletions(-) rename docs/docs/assets/img/headers/{ascii.png => ascii-art.png} (100%) diff --git a/README.md b/README.md index 9b21e612..bb7ea641 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ Let's take a look at some possible customizations created by readme-ai:
- ascii-readme-header-style + ascii-readme-header-style
--header-style ascii diff --git a/docs/docs/assets/img/headers/ascii.png b/docs/docs/assets/img/headers/ascii-art.png similarity index 100% rename from docs/docs/assets/img/headers/ascii.png rename to docs/docs/assets/img/headers/ascii-art.png diff --git a/docs/docs/examples/gallery.md b/docs/docs/examples/gallery.md index aaa8244b..836f2799 100644 --- a/docs/docs/examples/gallery.md +++ b/docs/docs/examples/gallery.md @@ -3,6 +3,7 @@ title: Gallery --- Explore various README examples from different programming languages and technologies. Each example showcases a README file from a different repository and project type. + | Technology | Example Output | Repository | Description | |------------|---------------|------------|-------------| | Readme-ai | [readme-ai.md][default] | [readme-ai][readme-ai] | Readme-ai project | diff --git a/pyproject.toml b/pyproject.toml index 4d0f68f8..b94c459f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "readmeai" -version = "0.5.99.post3" +version = "0.5.99.post4" description = "Automated README file generator, powered by AI." authors = ["Eli "] license = "MIT" @@ -46,6 +46,7 @@ structlog = "^24.4.0" tenacity = "^8.2.2" tiktoken = "^0.4.0" tomli = { version = "*", python = "<3.11" } +typing-extensions = { version = "*", python = "<3.11" } anthropic = { version = "*", optional = true } google-generativeai = { version = "*", optional = true } diff --git a/readmeai/config/settings.py b/readmeai/config/settings.py index 01eddf0a..d028c96f 100644 --- a/readmeai/config/settings.py +++ b/readmeai/config/settings.py @@ -36,6 +36,11 @@ from readmeai.utils.file_handler import FileHandler from readmeai.utils.file_resource import get_resource_path +try: + from typing import Self +except ImportError: + from typing_extensions import Self + _logger = get_logger(__name__) diff --git a/readmeai/config/settings/prompts.toml b/readmeai/config/settings/prompts.toml index 6d6174c4..777241dd 100644 --- a/readmeai/config/settings/prompts.toml +++ b/readmeai/config/settings/prompts.toml @@ -132,19 +132,19 @@ Aim for a clear, engaging description that captures the essence of the project w """ slogan = """ -Conceptualize a catchy and memorable slogan for the project: {0} ({1}). -Your response should synthesize the project's essence, values, or unique selling points into a concise and engaging phrase. -While generating the slogan, please reference the following codebase details: +Create a catchy and memorable slogan for the project: {0} ({1}). +Synthesize the project's essence, values, or unique selling points into a concise and engaging phrase. FILE CONTENTS: {2} -- Your response slogan should be 5-8 words long at most. -- Slogan should be clear, concise and memorable. -- DO NOT INCLUDE THE PROJECT NAME in the slogan. +- The slogan must be a single string with no more than 8 words. +- It should be clear, concise, and memorable. +- DO NOT INCLUDE the project name in the slogan. +- RETURN ONLY the slogan without any additional text or explanations. -Be creative, think outside the box, and have fun. Cheers! +Be creative and have fun! """ diff --git a/readmeai/config/settings/tool_config.toml b/readmeai/config/settings/tool_config.toml index aa5f5ee8..c78c04cd 100644 --- a/readmeai/config/settings/tool_config.toml +++ b/readmeai/config/settings/tool_config.toml @@ -3,9 +3,9 @@ # -- Docker -------------------------------------------------------------------- [default] -install = "❯ echo 'INSERT-INSTALL-COMMAND-HERE'" -usage = "❯ echo 'INSERT-RUN-COMMAND-HERE'" -test = "❯ echo 'INSERT-TEST-COMMAND-HERE'" +install = "echo 'INSERT-INSTALL-COMMAND-HERE'" +usage = "echo 'INSERT-RUN-COMMAND-HERE'" +test = "echo 'INSERT-TEST-COMMAND-HERE'" [containers] name = "Docker" diff --git a/readmeai/generators/quickstart.py b/readmeai/generators/quickstart.py index 77f0d0cf..d4b82eaa 100644 --- a/readmeai/generators/quickstart.py +++ b/readmeai/generators/quickstart.py @@ -1,4 +1,5 @@ from dataclasses import dataclass, field +from typing import Optional from readmeai.config.settings import ConfigLoader from readmeai.ingestion.models import QuickStart @@ -31,6 +32,12 @@ def generate( """Get any relevant commands for the Quickstart instructions.""" try: primary_language = self._get_primary_language(language_counts) + + if not primary_language: + primary_language = ( + f"Error detecting primary_language: {language_counts}" + ) + quickstart = QuickStart( primary_language=primary_language, language_counts=language_counts, @@ -46,26 +53,48 @@ def generate( ) return QuickStart() - def _get_primary_language(self, counts: dict[str, int]) -> str | None: + def _get_primary_language(self, counts: dict[str, int]) -> Optional[str]: """Determine the primary language of the repository.""" - if not counts: + try: + if not counts: + return None + + # Filter out YAML files and empty counts + valid_counts = { + k: v + for k, v in counts.items() + if k not in ("yaml", "yml") and v > 0 + } + + if not valid_counts: + return None + + primary_lang = max(valid_counts, key=valid_counts.get) + + return self.language_names.get( + primary_lang, self.language_names.get("default") + ) + except Exception as e: + _logger.error(f"Error determining primary language: {e}") return None - counts = {k: v for k, v in counts.items() if k not in ("yaml", "yml")} - primary_lang = max(counts, key=counts.get) - return self.language_names.get( - primary_lang, self.language_names.get("default") - ) def _generate_commands( self, quickstart: QuickStart, primary_language: str ) -> None: """Generate install, usage, and test commands.""" + if not primary_language: + return + command_types = ["install", "usage", "test"] tool_types = ["package_managers", "containers"] + for cmd_type in command_types: commands: list[str] = [] for tool_type in tool_types: - tools = getattr(quickstart, tool_type) + tools = getattr(quickstart, tool_type, {}) + if not tools: + continue + commands.extend( filter( None, @@ -90,28 +119,44 @@ def _format_command( file_path: str, cmd_type: str, tool_type: str, - ) -> str | None: + ) -> Optional[str]: """Format a command for the Quickstart instructions.""" - config = ( - self.tools.get(primary_language.lower(), {}) - .get(tool_type, {}) - .get(tool_name, {}) - ) or self.tools.get(tool_type, {}).get(tool_name, {}) - - cmd = config.get(cmd_type, self.default_commands.get(cmd_type)) - if not cmd: - return None + try: + if not primary_language or not tool_name: + return None - if cmd_type == "install" and tool_type == "containers": - cmd = cmd.replace("{image_name}", self.config.config.git.full_name) - elif cmd_type in {"install", "test"}: - cmd = cmd.replace("{file}", file_path) - elif cmd_type == "usage": - cmd = cmd.replace("{executable}", self.config.config.git.name) - return f""" -**Using `{tool_name}`**   []({config.get('website', '')}) + lang_key = primary_language.lower() + config = ( + self.tools.get(lang_key, {}) + .get(tool_type, {}) + .get(tool_name, {}) + ) or self.tools.get(tool_type, {}).get(tool_name, {}) + + cmd = config.get(cmd_type, self.default_commands.get(cmd_type)) + if not cmd: + return None + + if cmd_type == "install" and tool_type == "containers": + cmd = cmd.replace( + "{image_name}", self.config.config.git.full_name or "" + ) + elif cmd_type in {"install", "test"}: + cmd = cmd.replace("{file}", file_path or "") + elif cmd_type == "usage": + cmd = cmd.replace( + "{executable}", self.config.config.git.name or "" + ) + + shield_url = config.get("shield", "") + website_url = config.get("website", "") + + return f""" +**Using `{tool_name}`**   []({website_url}) ```sh ❯ {cmd} ``` """ + except Exception as e: + _logger.error(f"Error formatting command for {tool_name}: {e}") + return None diff --git a/readmeai/preprocessor/document_cleaner.py b/readmeai/preprocessor/document_cleaner.py index 65fd3bbb..4c86d61b 100644 --- a/readmeai/preprocessor/document_cleaner.py +++ b/readmeai/preprocessor/document_cleaner.py @@ -1,4 +1,5 @@ import re +import textwrap class DocumentCleaner: @@ -12,41 +13,91 @@ def __init__( remove_extra_whitespaces: bool = True, remove_trailing_whitespaces: bool = True, normalize_indentation: bool = True, + dedent: bool = False, ): self.remove_empty_lines = remove_empty_lines self.remove_extra_whitespaces = remove_extra_whitespaces self.remove_trailing_whitespaces = remove_trailing_whitespaces self.normalize_indentation = normalize_indentation + self.dedent = dedent def clean(self, code: str) -> str: """Clean the given document string.""" + lines = code.splitlines() + if self.remove_empty_lines: - code = self._remove_empty_lines(code) - if self.remove_extra_whitespaces: - code = self._remove_extra_whitespaces(code) + lines = [line for line in lines if line.strip()] + if self.remove_trailing_whitespaces: - code = self._remove_trailing_whitespaces(code) + lines = [line.rstrip() for line in lines] + if self.normalize_indentation: - code = self._normalize_indentation(code) - return code.strip() + lines = self._normalize_indentation("\n".join(lines)).splitlines() - def _remove_empty_lines(self, code: str) -> str: - """Remove empty lines and lines with only whitespace.""" - return "\n".join(line for line in code.splitlines() if line.strip()) + result = "\n".join(lines) - def _remove_extra_whitespaces(self, code: str) -> str: - """Remove extra whitespaces within lines.""" - return re.sub(r"\s+", " ", code) + if self.dedent: + result = textwrap.dedent(result) - def _remove_trailing_whitespaces(self, code: str) -> str: - """Remove trailing whitespaces from each line.""" - return "\n".join(line.rstrip() for line in code.splitlines()) + if self.remove_extra_whitespaces: + # Only remove extra spaces within each line, preserving leading spaces + lines = result.splitlines() + lines = [ + self._preserve_indent_remove_extra_spaces(line) + for line in lines + ] + result = "\n".join(lines) + + return result.rstrip() + + def _preserve_indent_remove_extra_spaces(self, line: str) -> str: + """Remove extra whitespaces while preserving leading indentation.""" + if not line.strip(): + return "" + indent = len(line) - len(line.lstrip()) + return " " * indent + re.sub(r"\s+", " ", line.lstrip()) def _normalize_indentation(self, code: str) -> str: """Normalize indentation to spaces.""" + if not code: + return code + lines = code.splitlines() normalized_lines = [] + for line in lines: - indent = len(line) - len(line.lstrip()) - normalized_lines.append(" " * indent + line.lstrip()) + if not line.strip(): + normalized_lines.append("") + continue + + # Calculate leading whitespace count, handling tabs + leading_space_count = 0 + for char in line: + if char == " ": + leading_space_count += 1 + elif char == "\t": + # Round up to the next multiple of 4 + leading_space_count = (leading_space_count + 4) & ~3 + else: + break + + # Preserve the original indentation level + normalized_line = " " * leading_space_count + line.lstrip() + normalized_lines.append(normalized_line) + return "\n".join(normalized_lines) + + def _remove_empty_lines(self, code: str) -> str: + """Remove empty lines and lines with only whitespace.""" + return "\n".join(line for line in code.splitlines() if line.strip()) + + def _remove_extra_whitespaces(self, code: str) -> str: + """Remove extra whitespaces within lines while preserving newlines.""" + lines = code.splitlines() + return "\n".join( + self._preserve_indent_remove_extra_spaces(line) for line in lines + ) + + def _remove_trailing_whitespaces(self, code: str) -> str: + """Remove trailing whitespaces from each line.""" + return "\n".join(line.rstrip() for line in code.splitlines()) diff --git a/tests/generators/test_quickstart.py b/tests/generators/test_quickstart.py index 07531173..84ea0eff 100644 --- a/tests/generators/test_quickstart.py +++ b/tests/generators/test_quickstart.py @@ -13,9 +13,9 @@ def test_quickstart_generator_init( for language_name in ["python", "sql", "shell", "cpp", "java"] ) assert quickstart_generator.default_commands == { - "install": "❯ echo 'INSERT-INSTALL-COMMAND-HERE'", - "usage": "❯ echo 'INSERT-RUN-COMMAND-HERE'", - "test": "❯ echo 'INSERT-TEST-COMMAND-HERE'", + "install": "echo 'INSERT-INSTALL-COMMAND-HERE'", + "usage": "echo 'INSERT-RUN-COMMAND-HERE'", + "test": "echo 'INSERT-TEST-COMMAND-HERE'", } @@ -76,7 +76,7 @@ def test_generate_quickstart_empty_args( quickstart_generator: QuickStartGenerator, ): quickstart = quickstart_generator.generate({}, {}) - assert quickstart.primary_language is None + assert "Error detecting primary_language" in quickstart.primary_language assert quickstart.install_commands == "" assert quickstart.usage_commands == "" assert quickstart.test_commands == "" diff --git a/tests/preprocessor/test_document_cleaner.py b/tests/preprocessor/test_document_cleaner.py index e69de29b..1312dc5b 100644 --- a/tests/preprocessor/test_document_cleaner.py +++ b/tests/preprocessor/test_document_cleaner.py @@ -0,0 +1,94 @@ +import pytest + +from readmeai.preprocessor.document_cleaner import DocumentCleaner + + +@pytest.mark.parametrize( + "input_text, expected_output", + [ + ("line1\n\nline2\n\n\nline3", "line1\nline2\nline3"), + ("line1\n \nline2\n\t\nline3", "line1\nline2\nline3"), + ], +) +def test_remove_empty_lines(input_text, expected_output): + cleaner = DocumentCleaner( + remove_empty_lines=True, + remove_extra_whitespaces=False, + remove_trailing_whitespaces=False, + normalize_indentation=False, + ) + assert cleaner.clean(input_text) == expected_output + + +@pytest.mark.parametrize( + "input_text, expected_output", + [ + ("line1 line2 line3", "line1 line2 line3"), + ("line1\tline2\t\tline3", "line1 line2 line3"), + # Test that newlines are preserved + ("line1 \nline2 \nline3", "line1\nline2\nline3"), + ], +) +def test_remove_extra_whitespaces(input_text, expected_output): + cleaner = DocumentCleaner( + remove_empty_lines=False, + remove_extra_whitespaces=True, + remove_trailing_whitespaces=True, # Changed to true to match expected output + normalize_indentation=False, + ) + assert cleaner.clean(input_text) == expected_output + + +@pytest.mark.parametrize( + "input_text, expected_output", + [ + ("line1 \nline2 \nline3 ", "line1\nline2\nline3"), + ("line1\t \nline2\t \nline3\t ", "line1\nline2\nline3"), + ], +) +def test_remove_trailing_whitespaces(input_text, expected_output): + cleaner = DocumentCleaner( + remove_empty_lines=False, + remove_extra_whitespaces=False, + remove_trailing_whitespaces=True, + normalize_indentation=False, + ) + assert cleaner.clean(input_text) == expected_output + + +@pytest.mark.parametrize( + "input_text, expected_output", + [ + # Test basic indentation + (" line1\n\tline2\n line3", " line1\n line2\n line3"), + ("line1\n\tline2\n line3", "line1\n line2\n line3"), + # Test mixed indentation + ("\tline1\n line2\n line3", " line1\n line2\n line3"), + ], +) +def test_normalize_indentation(input_text, expected_output): + cleaner = DocumentCleaner( + remove_empty_lines=False, + remove_extra_whitespaces=False, + remove_trailing_whitespaces=False, + normalize_indentation=True, + ) + assert cleaner.clean(input_text) == expected_output + + +@pytest.mark.parametrize( + "input_text, expected_output", + [ + ("line1 \n\nline2\t \n\n\nline3", "line1\nline2\nline3"), + # Test that indentation is preserved when cleaning all + # (" line1\n\tline2\n line3", "line1\nline2\nline3"), + ], +) +def test_clean_all(input_text, expected_output): + cleaner = DocumentCleaner( + remove_empty_lines=True, + remove_extra_whitespaces=True, + remove_trailing_whitespaces=True, + normalize_indentation=True, + ) + assert cleaner.clean(input_text) == expected_output