Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions pipeline/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from pipeline.commands.build import build_command
from pipeline.commands.dev import dev_command
from pipeline.tools.code_linter import lint_markdown_file
from pipeline.tools.docusaurus_parser import convert_docusaurus_to_mintlify
from pipeline.tools.links import drop_suffix_from_links, move_file_with_link_updates
from pipeline.tools.notebook.convert import convert_notebook
Expand All @@ -35,6 +36,40 @@ def mv_command(args) -> None: # noqa: ANN001
move_file_with_link_updates(args.old_path, args.new_path, dry_run=args.dry_run)


def lint_code_snippets_command(args) -> None: # noqa: ANN001
"""Handle the lint-code-snippets command for linting code blocks in markdown."""
file_path = args.path

if not file_path.exists():
logger.error("File does not exist: %s", file_path)
sys.exit(1)

if not file_path.is_file():
logger.error("Path is not a file: %s", file_path)
sys.exit(1)

if file_path.suffix.lower() not in {'.md', '.mdx', '.markdown'}:
logger.warning("File does not appear to be a markdown file: %s", file_path)

logger.info("Linting code snippets in: %s", file_path)

updated_content, errors = lint_markdown_file(file_path, dry_run=args.dry_run)

if errors:
for error in errors:
logger.error("Linting error: %s", error)
sys.exit(1)

if args.dry_run:
if updated_content:
print("=== Linted content ===") # noqa: T201 (OK to use print)
print(updated_content) # noqa: T201 (OK to use print)
else:
logger.info("No changes would be made")
else:
logger.info("Code snippet linting completed successfully")


def _find_files_to_migrate(
input_path: Path, migration_type: str = "mkdocs"
) -> list[Path]:
Expand Down Expand Up @@ -240,6 +275,7 @@ def main() -> None:
mv: Move a file and update cross-references to maintain valid links.
migrate: Convert MkDocs markdown files to mintlify format.
migrate-docusaurus: Convert Docusaurus markdown files to mintlify format.
lint-code-snippets: Lint Python and JavaScript code snippets in markdown files.

Exits:
With code 1 if no command is specified or if the initial build fails.
Expand Down Expand Up @@ -337,6 +373,23 @@ def main() -> None:
func=migrate_command, migration_type="docusaurus"
)

# Lint code snippets command
lint_parser = subparsers.add_parser(
"lint-code-snippets",
help="Lint Python and JavaScript code snippets in markdown files",
)
lint_parser.add_argument(
"path",
type=Path,
help="Path to the markdown file to lint",
)
lint_parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be changed without modifying the file",
)
lint_parser.set_defaults(func=lint_code_snippets_command)

args = parser.parse_args()

if not hasattr(args, "func"):
Expand Down
215 changes: 215 additions & 0 deletions pipeline/tools/code_linter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
"""Code snippet linting for markdown/MDX files.

This module provides functionality to lint code snippets within markdown files,
specifically targeting Python and JavaScript code blocks.
"""

from __future__ import annotations

import re
import subprocess
import tempfile
from pathlib import Path
from typing import NamedTuple, Union


class CodeBlock(NamedTuple):
"""Represents a code block found in markdown."""

language: Union[str, None]
content: str
start_line: int
end_line: int


class LintResult(NamedTuple):
"""Result of linting a code block."""

original_content: str
linted_content: str
has_changes: bool
errors: list[str]


def extract_code_blocks(markdown_content: str) -> list[CodeBlock]:
"""Extract all code blocks from markdown content.

Args:
markdown_content: The markdown file content as a string

Returns:
List of CodeBlock objects containing the language, content, and line numbers
"""
code_blocks = []
lines = markdown_content.split('\n')
in_code_block = False
current_block_language = None
current_block_content = []
current_block_start = 0

for line_num, line in enumerate(lines, 1):
# Check for opening code fence
if line.strip().startswith('```'):
if not in_code_block:
# Starting a new code block
in_code_block = True
current_block_start = line_num
# Extract language from the opening fence
fence_content = line.strip()[3:].strip()
current_block_language = fence_content.split()[0] if fence_content else None
current_block_content = []
else:
# Ending current code block
in_code_block = False
code_blocks.append(CodeBlock(
language=current_block_language,
content='\n'.join(current_block_content),
start_line=current_block_start + 1, # Content starts after opening fence
end_line=line_num - 1 # Content ends before closing fence
))
current_block_language = None
current_block_content = []
elif in_code_block:
# We're inside a code block, collect the content
current_block_content.append(line)

return code_blocks


def lint_python_code(code: str) -> LintResult:
"""Lint Python code using ruff (placeholder implementation).

Args:
code: Python code to lint

Returns:
LintResult with linting results
"""
# For now, this is a no-op placeholder
# TODO: Implement actual ruff integration
return LintResult(
original_content=code,
linted_content=code, # No changes for now
has_changes=False,
errors=[]
)


def lint_javascript_code(code: str) -> LintResult:
"""Lint JavaScript code using appropriate linter (placeholder implementation).

Args:
code: JavaScript code to lint

Returns:
LintResult with linting results
"""
# For now, this is a no-op placeholder
# TODO: Implement actual JS linting integration
return LintResult(
original_content=code,
linted_content=code, # No changes for now
has_changes=False,
errors=[]
)


def lint_code_block(block: CodeBlock) -> LintResult:
"""Lint a single code block based on its language.

Args:
block: CodeBlock to lint

Returns:
LintResult with linting results
"""
if not block.language:
# No language specified, skip linting
return LintResult(
original_content=block.content,
linted_content=block.content,
has_changes=False,
errors=[]
)

language = block.language.lower()

if language == 'python' or language == 'py':
return lint_python_code(block.content)
elif language in ('javascript', 'js', 'jsx', 'typescript', 'ts', 'tsx'):
return lint_javascript_code(block.content)
else:
# Unsupported language, skip linting
return LintResult(
original_content=block.content,
linted_content=block.content,
has_changes=False,
errors=[]
)


def apply_lint_changes(markdown_content: str, code_blocks: list[CodeBlock], lint_results: list[LintResult]) -> str:
"""Apply linting changes back to the original markdown content.

Args:
markdown_content: Original markdown content
code_blocks: List of extracted code blocks
lint_results: List of corresponding lint results

Returns:
Updated markdown content with linting changes applied
"""
lines = markdown_content.split('\n')

# Apply changes in reverse order to maintain line numbers
for block, result in reversed(list(zip(code_blocks, lint_results))):
if result.has_changes:
# Replace the content lines of this code block
new_content_lines = result.linted_content.split('\n')
# Replace lines from start_line to end_line (inclusive, 1-based)
lines[block.start_line - 1:block.end_line] = new_content_lines

return '\n'.join(lines)


def lint_markdown_file(file_path: Path, *, dry_run: bool = False) -> tuple[str, list[str]]:
"""Lint all code snippets in a markdown file.

Args:
file_path: Path to the markdown file
dry_run: If True, return the changes without writing to file

Returns:
Tuple of (updated_content, list_of_errors)
"""
if not file_path.exists():
return "", [f"File not found: {file_path}"]

try:
original_content = file_path.read_text(encoding='utf-8')
except Exception as e:
return "", [f"Error reading file: {e}"]

# Extract all code blocks
code_blocks = extract_code_blocks(original_content)

# Lint each code block
lint_results = []
all_errors = []

for block in code_blocks:
result = lint_code_block(block)
lint_results.append(result)
all_errors.extend(result.errors)

# Apply changes
updated_content = apply_lint_changes(original_content, code_blocks, lint_results)

# Write back if not dry run and there are changes
if not dry_run and updated_content != original_content:
try:
file_path.write_text(updated_content, encoding='utf-8')
except Exception as e:
all_errors.append(f"Error writing file: {e}")

return updated_content, all_errors
Loading