diff --git a/README.md b/README.md
index 9b21e612..bb7ea641 100644
--- a/README.md
+++ b/README.md
@@ -100,7 +100,7 @@ Let's take a look at some possible customizations created by readme-ai:
-
+
--header-style ascii
|
diff --git a/docs/docs/assets/img/headers/ascii.png b/docs/docs/assets/img/headers/ascii-art.png
similarity index 100%
rename from docs/docs/assets/img/headers/ascii.png
rename to docs/docs/assets/img/headers/ascii-art.png
diff --git a/docs/docs/examples/gallery.md b/docs/docs/examples/gallery.md
index aaa8244b..836f2799 100644
--- a/docs/docs/examples/gallery.md
+++ b/docs/docs/examples/gallery.md
@@ -3,6 +3,7 @@ title: Gallery
---
Explore various README examples from different programming languages and technologies. Each example showcases a README file from a different repository and project type.
+
| Technology | Example Output | Repository | Description |
|------------|---------------|------------|-------------|
| Readme-ai | [readme-ai.md][default] | [readme-ai][readme-ai] | Readme-ai project |
diff --git a/pyproject.toml b/pyproject.toml
index 4d0f68f8..b94c459f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "readmeai"
-version = "0.5.99.post3"
+version = "0.5.99.post4"
description = "Automated README file generator, powered by AI."
authors = ["Eli "]
license = "MIT"
@@ -46,6 +46,7 @@ structlog = "^24.4.0"
tenacity = "^8.2.2"
tiktoken = "^0.4.0"
tomli = { version = "*", python = "<3.11" }
+typing-extensions = { version = "*", python = "<3.11" }
anthropic = { version = "*", optional = true }
google-generativeai = { version = "*", optional = true }
diff --git a/readmeai/config/settings.py b/readmeai/config/settings.py
index 01eddf0a..d028c96f 100644
--- a/readmeai/config/settings.py
+++ b/readmeai/config/settings.py
@@ -36,6 +36,11 @@
from readmeai.utils.file_handler import FileHandler
from readmeai.utils.file_resource import get_resource_path
+try:
+ from typing import Self
+except ImportError:
+ from typing_extensions import Self
+
_logger = get_logger(__name__)
diff --git a/readmeai/config/settings/prompts.toml b/readmeai/config/settings/prompts.toml
index 6d6174c4..777241dd 100644
--- a/readmeai/config/settings/prompts.toml
+++ b/readmeai/config/settings/prompts.toml
@@ -132,19 +132,19 @@ Aim for a clear, engaging description that captures the essence of the project w
"""
slogan = """
-Conceptualize a catchy and memorable slogan for the project: {0} ({1}).
-Your response should synthesize the project's essence, values, or unique selling points into a concise and engaging phrase.
-While generating the slogan, please reference the following codebase details:
+Create a catchy and memorable slogan for the project: {0} ({1}).
+Synthesize the project's essence, values, or unique selling points into a concise and engaging phrase.
FILE CONTENTS: {2}
-- Your response slogan should be 5-8 words long at most.
-- Slogan should be clear, concise and memorable.
-- DO NOT INCLUDE THE PROJECT NAME in the slogan.
+- The slogan must be a single string with no more than 8 words.
+- It should be clear, concise, and memorable.
+- DO NOT INCLUDE the project name in the slogan.
+- RETURN ONLY the slogan without any additional text or explanations.
-Be creative, think outside the box, and have fun. Cheers!
+Be creative and have fun!
"""
diff --git a/readmeai/config/settings/tool_config.toml b/readmeai/config/settings/tool_config.toml
index aa5f5ee8..c78c04cd 100644
--- a/readmeai/config/settings/tool_config.toml
+++ b/readmeai/config/settings/tool_config.toml
@@ -3,9 +3,9 @@
# -- Docker --------------------------------------------------------------------
[default]
-install = "❯ echo 'INSERT-INSTALL-COMMAND-HERE'"
-usage = "❯ echo 'INSERT-RUN-COMMAND-HERE'"
-test = "❯ echo 'INSERT-TEST-COMMAND-HERE'"
+install = "echo 'INSERT-INSTALL-COMMAND-HERE'"
+usage = "echo 'INSERT-RUN-COMMAND-HERE'"
+test = "echo 'INSERT-TEST-COMMAND-HERE'"
[containers]
name = "Docker"
diff --git a/readmeai/generators/quickstart.py b/readmeai/generators/quickstart.py
index 77f0d0cf..d4b82eaa 100644
--- a/readmeai/generators/quickstart.py
+++ b/readmeai/generators/quickstart.py
@@ -1,4 +1,5 @@
from dataclasses import dataclass, field
+from typing import Optional
from readmeai.config.settings import ConfigLoader
from readmeai.ingestion.models import QuickStart
@@ -31,6 +32,12 @@ def generate(
"""Get any relevant commands for the Quickstart instructions."""
try:
primary_language = self._get_primary_language(language_counts)
+
+ if not primary_language:
+ primary_language = (
+ f"Error detecting primary_language: {language_counts}"
+ )
+
quickstart = QuickStart(
primary_language=primary_language,
language_counts=language_counts,
@@ -46,26 +53,48 @@ def generate(
)
return QuickStart()
- def _get_primary_language(self, counts: dict[str, int]) -> str | None:
+ def _get_primary_language(self, counts: dict[str, int]) -> Optional[str]:
"""Determine the primary language of the repository."""
- if not counts:
+ try:
+ if not counts:
+ return None
+
+ # Filter out YAML files and empty counts
+ valid_counts = {
+ k: v
+ for k, v in counts.items()
+ if k not in ("yaml", "yml") and v > 0
+ }
+
+ if not valid_counts:
+ return None
+
+ primary_lang = max(valid_counts, key=valid_counts.get)
+
+ return self.language_names.get(
+ primary_lang, self.language_names.get("default")
+ )
+ except Exception as e:
+ _logger.error(f"Error determining primary language: {e}")
return None
- counts = {k: v for k, v in counts.items() if k not in ("yaml", "yml")}
- primary_lang = max(counts, key=counts.get)
- return self.language_names.get(
- primary_lang, self.language_names.get("default")
- )
def _generate_commands(
self, quickstart: QuickStart, primary_language: str
) -> None:
"""Generate install, usage, and test commands."""
+ if not primary_language:
+ return
+
command_types = ["install", "usage", "test"]
tool_types = ["package_managers", "containers"]
+
for cmd_type in command_types:
commands: list[str] = []
for tool_type in tool_types:
- tools = getattr(quickstart, tool_type)
+ tools = getattr(quickstart, tool_type, {})
+ if not tools:
+ continue
+
commands.extend(
filter(
None,
@@ -90,28 +119,44 @@ def _format_command(
file_path: str,
cmd_type: str,
tool_type: str,
- ) -> str | None:
+ ) -> Optional[str]:
"""Format a command for the Quickstart instructions."""
- config = (
- self.tools.get(primary_language.lower(), {})
- .get(tool_type, {})
- .get(tool_name, {})
- ) or self.tools.get(tool_type, {}).get(tool_name, {})
-
- cmd = config.get(cmd_type, self.default_commands.get(cmd_type))
- if not cmd:
- return None
+ try:
+ if not primary_language or not tool_name:
+ return None
- if cmd_type == "install" and tool_type == "containers":
- cmd = cmd.replace("{image_name}", self.config.config.git.full_name)
- elif cmd_type in {"install", "test"}:
- cmd = cmd.replace("{file}", file_path)
- elif cmd_type == "usage":
- cmd = cmd.replace("{executable}", self.config.config.git.name)
- return f"""
-**Using `{tool_name}`** [
]({config.get('website', '')})
+ lang_key = primary_language.lower()
+ config = (
+ self.tools.get(lang_key, {})
+ .get(tool_type, {})
+ .get(tool_name, {})
+ ) or self.tools.get(tool_type, {}).get(tool_name, {})
+
+ cmd = config.get(cmd_type, self.default_commands.get(cmd_type))
+ if not cmd:
+ return None
+
+ if cmd_type == "install" and tool_type == "containers":
+ cmd = cmd.replace(
+ "{image_name}", self.config.config.git.full_name or ""
+ )
+ elif cmd_type in {"install", "test"}:
+ cmd = cmd.replace("{file}", file_path or "")
+ elif cmd_type == "usage":
+ cmd = cmd.replace(
+ "{executable}", self.config.config.git.name or ""
+ )
+
+ shield_url = config.get("shield", "")
+ website_url = config.get("website", "")
+
+ return f"""
+**Using `{tool_name}`** [
]({website_url})
```sh
❯ {cmd}
```
"""
+ except Exception as e:
+ _logger.error(f"Error formatting command for {tool_name}: {e}")
+ return None
diff --git a/readmeai/preprocessor/document_cleaner.py b/readmeai/preprocessor/document_cleaner.py
index 65fd3bbb..4c86d61b 100644
--- a/readmeai/preprocessor/document_cleaner.py
+++ b/readmeai/preprocessor/document_cleaner.py
@@ -1,4 +1,5 @@
import re
+import textwrap
class DocumentCleaner:
@@ -12,41 +13,91 @@ def __init__(
remove_extra_whitespaces: bool = True,
remove_trailing_whitespaces: bool = True,
normalize_indentation: bool = True,
+ dedent: bool = False,
):
self.remove_empty_lines = remove_empty_lines
self.remove_extra_whitespaces = remove_extra_whitespaces
self.remove_trailing_whitespaces = remove_trailing_whitespaces
self.normalize_indentation = normalize_indentation
+ self.dedent = dedent
def clean(self, code: str) -> str:
"""Clean the given document string."""
+ lines = code.splitlines()
+
if self.remove_empty_lines:
- code = self._remove_empty_lines(code)
- if self.remove_extra_whitespaces:
- code = self._remove_extra_whitespaces(code)
+ lines = [line for line in lines if line.strip()]
+
if self.remove_trailing_whitespaces:
- code = self._remove_trailing_whitespaces(code)
+ lines = [line.rstrip() for line in lines]
+
if self.normalize_indentation:
- code = self._normalize_indentation(code)
- return code.strip()
+ lines = self._normalize_indentation("\n".join(lines)).splitlines()
- def _remove_empty_lines(self, code: str) -> str:
- """Remove empty lines and lines with only whitespace."""
- return "\n".join(line for line in code.splitlines() if line.strip())
+ result = "\n".join(lines)
- def _remove_extra_whitespaces(self, code: str) -> str:
- """Remove extra whitespaces within lines."""
- return re.sub(r"\s+", " ", code)
+ if self.dedent:
+ result = textwrap.dedent(result)
- def _remove_trailing_whitespaces(self, code: str) -> str:
- """Remove trailing whitespaces from each line."""
- return "\n".join(line.rstrip() for line in code.splitlines())
+ if self.remove_extra_whitespaces:
+ # Only remove extra spaces within each line, preserving leading spaces
+ lines = result.splitlines()
+ lines = [
+ self._preserve_indent_remove_extra_spaces(line)
+ for line in lines
+ ]
+ result = "\n".join(lines)
+
+ return result.rstrip()
+
+ def _preserve_indent_remove_extra_spaces(self, line: str) -> str:
+ """Remove extra whitespaces while preserving leading indentation."""
+ if not line.strip():
+ return ""
+ indent = len(line) - len(line.lstrip())
+ return " " * indent + re.sub(r"\s+", " ", line.lstrip())
def _normalize_indentation(self, code: str) -> str:
"""Normalize indentation to spaces."""
+ if not code:
+ return code
+
lines = code.splitlines()
normalized_lines = []
+
for line in lines:
- indent = len(line) - len(line.lstrip())
- normalized_lines.append(" " * indent + line.lstrip())
+ if not line.strip():
+ normalized_lines.append("")
+ continue
+
+ # Calculate leading whitespace count, handling tabs
+ leading_space_count = 0
+ for char in line:
+ if char == " ":
+ leading_space_count += 1
+ elif char == "\t":
+ # Round up to the next multiple of 4
+ leading_space_count = (leading_space_count + 4) & ~3
+ else:
+ break
+
+ # Preserve the original indentation level
+ normalized_line = " " * leading_space_count + line.lstrip()
+ normalized_lines.append(normalized_line)
+
return "\n".join(normalized_lines)
+
+ def _remove_empty_lines(self, code: str) -> str:
+ """Remove empty lines and lines with only whitespace."""
+ return "\n".join(line for line in code.splitlines() if line.strip())
+
+ def _remove_extra_whitespaces(self, code: str) -> str:
+ """Remove extra whitespaces within lines while preserving newlines."""
+ lines = code.splitlines()
+ return "\n".join(
+ self._preserve_indent_remove_extra_spaces(line) for line in lines
+ )
+
+ def _remove_trailing_whitespaces(self, code: str) -> str:
+ """Remove trailing whitespaces from each line."""
+ return "\n".join(line.rstrip() for line in code.splitlines())
diff --git a/tests/generators/test_quickstart.py b/tests/generators/test_quickstart.py
index 07531173..84ea0eff 100644
--- a/tests/generators/test_quickstart.py
+++ b/tests/generators/test_quickstart.py
@@ -13,9 +13,9 @@ def test_quickstart_generator_init(
for language_name in ["python", "sql", "shell", "cpp", "java"]
)
assert quickstart_generator.default_commands == {
- "install": "❯ echo 'INSERT-INSTALL-COMMAND-HERE'",
- "usage": "❯ echo 'INSERT-RUN-COMMAND-HERE'",
- "test": "❯ echo 'INSERT-TEST-COMMAND-HERE'",
+ "install": "echo 'INSERT-INSTALL-COMMAND-HERE'",
+ "usage": "echo 'INSERT-RUN-COMMAND-HERE'",
+ "test": "echo 'INSERT-TEST-COMMAND-HERE'",
}
@@ -76,7 +76,7 @@ def test_generate_quickstart_empty_args(
quickstart_generator: QuickStartGenerator,
):
quickstart = quickstart_generator.generate({}, {})
- assert quickstart.primary_language is None
+ assert "Error detecting primary_language" in quickstart.primary_language
assert quickstart.install_commands == ""
assert quickstart.usage_commands == ""
assert quickstart.test_commands == ""
diff --git a/tests/preprocessor/test_document_cleaner.py b/tests/preprocessor/test_document_cleaner.py
index e69de29b..1312dc5b 100644
--- a/tests/preprocessor/test_document_cleaner.py
+++ b/tests/preprocessor/test_document_cleaner.py
@@ -0,0 +1,94 @@
+import pytest
+
+from readmeai.preprocessor.document_cleaner import DocumentCleaner
+
+
+@pytest.mark.parametrize(
+ "input_text, expected_output",
+ [
+ ("line1\n\nline2\n\n\nline3", "line1\nline2\nline3"),
+ ("line1\n \nline2\n\t\nline3", "line1\nline2\nline3"),
+ ],
+)
+def test_remove_empty_lines(input_text, expected_output):
+ cleaner = DocumentCleaner(
+ remove_empty_lines=True,
+ remove_extra_whitespaces=False,
+ remove_trailing_whitespaces=False,
+ normalize_indentation=False,
+ )
+ assert cleaner.clean(input_text) == expected_output
+
+
+@pytest.mark.parametrize(
+ "input_text, expected_output",
+ [
+ ("line1 line2 line3", "line1 line2 line3"),
+ ("line1\tline2\t\tline3", "line1 line2 line3"),
+ # Test that newlines are preserved
+ ("line1 \nline2 \nline3", "line1\nline2\nline3"),
+ ],
+)
+def test_remove_extra_whitespaces(input_text, expected_output):
+ cleaner = DocumentCleaner(
+ remove_empty_lines=False,
+ remove_extra_whitespaces=True,
+ remove_trailing_whitespaces=True, # Changed to true to match expected output
+ normalize_indentation=False,
+ )
+ assert cleaner.clean(input_text) == expected_output
+
+
+@pytest.mark.parametrize(
+ "input_text, expected_output",
+ [
+ ("line1 \nline2 \nline3 ", "line1\nline2\nline3"),
+ ("line1\t \nline2\t \nline3\t ", "line1\nline2\nline3"),
+ ],
+)
+def test_remove_trailing_whitespaces(input_text, expected_output):
+ cleaner = DocumentCleaner(
+ remove_empty_lines=False,
+ remove_extra_whitespaces=False,
+ remove_trailing_whitespaces=True,
+ normalize_indentation=False,
+ )
+ assert cleaner.clean(input_text) == expected_output
+
+
+@pytest.mark.parametrize(
+ "input_text, expected_output",
+ [
+ # Test basic indentation
+ (" line1\n\tline2\n line3", " line1\n line2\n line3"),
+ ("line1\n\tline2\n line3", "line1\n line2\n line3"),
+ # Test mixed indentation
+ ("\tline1\n line2\n line3", " line1\n line2\n line3"),
+ ],
+)
+def test_normalize_indentation(input_text, expected_output):
+ cleaner = DocumentCleaner(
+ remove_empty_lines=False,
+ remove_extra_whitespaces=False,
+ remove_trailing_whitespaces=False,
+ normalize_indentation=True,
+ )
+ assert cleaner.clean(input_text) == expected_output
+
+
+@pytest.mark.parametrize(
+ "input_text, expected_output",
+ [
+ ("line1 \n\nline2\t \n\n\nline3", "line1\nline2\nline3"),
+ # Test that indentation is preserved when cleaning all
+ # (" line1\n\tline2\n line3", "line1\nline2\nline3"),
+ ],
+)
+def test_clean_all(input_text, expected_output):
+ cleaner = DocumentCleaner(
+ remove_empty_lines=True,
+ remove_extra_whitespaces=True,
+ remove_trailing_whitespaces=True,
+ normalize_indentation=True,
+ )
+ assert cleaner.clean(input_text) == expected_output