From 786c719e16ffaae291d975112e12597db0765366 Mon Sep 17 00:00:00 2001 From: Andrei Gasparian Date: Thu, 19 Mar 2026 20:49:02 +0100 Subject: [PATCH 1/2] [DBA-282] Replace generic coding guidelines with Python-specific doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strip filler advice from docs/coding-guidelines.md and rewrite as docs/python-coding-guidelines.md — grounded in actual databao-cli patterns with code examples. Filename scoped to Python so future TypeScript guidelines slot in cleanly. Update all references. Co-Authored-By: Claude Opus 4.6 --- .claude/skills/review-architecture/SKILL.md | 2 +- .pre-commit-config.yaml | 2 +- CLAUDE.md | 2 +- docs/coding-guidelines.md | 91 -------- docs/python-coding-guidelines.md | 221 ++++++++++++++++++++ scripts/smoke-test-skills.sh | 2 +- 6 files changed, 225 insertions(+), 95 deletions(-) delete mode 100755 docs/coding-guidelines.md create mode 100644 docs/python-coding-guidelines.md diff --git a/.claude/skills/review-architecture/SKILL.md b/.claude/skills/review-architecture/SKILL.md index a8ed6cc9..b9320b9e 100755 --- a/.claude/skills/review-architecture/SKILL.md +++ b/.claude/skills/review-architecture/SKILL.md @@ -13,7 +13,7 @@ before or after significant changes. Review in this order: 1. `docs/architecture.md` -2. `docs/coding-guidelines.md` +2. `docs/python-coding-guidelines.md` 3. `docs/testing-strategy.md` 4. `CLAUDE.md` 5. `README.md` (CLI usage and user-facing workflows) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fa805ca8..bc2c7596 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,4 +39,4 @@ repos: entry: scripts/validate-agent-guidance.sh language: script pass_filenames: false - files: ^(\.claude/skills/|docs/(coding-guidelines|architecture|testing-strategy)\.md|scripts/validate-agent-guidance\.sh)$ \ No newline at end of file + files: ^(\.claude/skills/|docs/(python-coding-guidelines|architecture|testing-strategy)\.md|scripts/validate-agent-guidance\.sh)$ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index b418843f..669784f3 100755 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,7 +11,7 @@ Claude Code entrypoint for agent instructions in this repository. ## References - `docs/architecture.md` -- `docs/coding-guidelines.md` +- `docs/python-coding-guidelines.md` - `docs/testing-strategy.md` ## Project Scope diff --git a/docs/coding-guidelines.md b/docs/coding-guidelines.md deleted file mode 100755 index 758daa70..00000000 --- a/docs/coding-guidelines.md +++ /dev/null @@ -1,91 +0,0 @@ -# Coding Guidelines - -Practical coding rules for contributors and coding agents. -Style and formatting are enforced by ruff and mypy — these guidelines cover -only what linters cannot catch. - -## Language and Tooling - -- Python version target: see `requires-python` in `pyproject.toml`. -- Dependency/runtime management: `uv` + `pyproject.toml`. -- Run project commands through `uv run`. -- Local install/update: `make setup` (or `uv sync --dev` for deps only) -- Do not introduce ad-hoc dependency files (`requirements.txt`, etc.) unless - explicitly requested. - -## Required Quality Commands - -Run the narrowest relevant command first, then broaden scope. - -- Pre-commit (ruff + mypy): `make check` -- Ruff lint: `uv run ruff check src/databao_cli` -- Ruff format: `uv run ruff format src/databao_cli` -- Ruff auto-fix: `uv run ruff check --fix src/databao_cli` -- Mypy: `uv run mypy src/databao_cli` -- Unit tests: `make test` -- E2E tests: `make e2e-test` -- Coverage check: `make test-cov-check` -- Single test file: `uv run pytest tests/test_foo.py -v` -- Single test function: `uv run pytest tests/test_foo.py::test_bar -v` - -Use `ruff check --fix` and `ruff format` to auto-fix style issues — do not -manually fix formatting or linter-enforceable style. - -## General Coding Style - -- Prefer readability over clever/compact tricks. -- Keep functions focused; extract helpers for repeated logic. -- Avoid large refactors when a targeted fix is sufficient. - -## Comments and Docstrings - -- Do not leave inline comments explaining obvious code behavior. -- Do write docstrings for modules, classes, and functions where it adds clarity. - -## Imports - -- Avoid import-time side effects. - -## Typing and Data Models - -- Strict mypy is enabled — add type hints to all public APIs and non-trivial helpers. -- Use Pydantic models or `@dataclass` for structured payloads. -- SQLAlchemy and Pydantic mypy plugins are active. - -## Error Handling and Validation - -- Validate config/arguments early, before expensive runtime work. -- Use specific exceptions: - - `FileNotFoundError` for missing files/directories - - `ValueError` for invalid values or argument combinations - - `RuntimeError` for runtime state failures - - `click.UsageError` for CLI input errors -- Make errors actionable; include the problematic value/path. - -## Logging and Output - -- Use `logging` for runtime behavior and diagnostics. -- LLM-specific error handling lives in `src/databao_cli/log/llm_errors.py`. -- Use `print` only for tiny scripts/utilities where logging is unnecessary. -- Click's `echo`/`secho` is preferred for CLI user output. - -## CLI Patterns - -- All commands use Click decorators (`@click.command`, `@click.group`). -- Global options (`-v`, `-p`) are on the top-level group in `__main__.py`. -- Register new commands in the Click group in `__main__.py`. -- Use Click's built-in validation (types, callbacks) over manual parsing. -- Keep CLI flags stable and intuitive (Click conventions). - -## Testing Guidance - -- Add/adjust tests when changing behavior, commands, or interfaces. -- Prefer small focused unit tests near changed behavior. -- For bug fixes, include at least one regression test when feasible. -- E2E tests use `pexpect`, `testcontainers`, and `allure-pytest`. - -## Anti-Patterns to Avoid - -- Large unrelated rewrites in focused change requests. -- Silent behavior changes without docs/tests updates. -- Broad catch-all exception handling that hides root causes. diff --git a/docs/python-coding-guidelines.md b/docs/python-coding-guidelines.md new file mode 100644 index 00000000..7202afb0 --- /dev/null +++ b/docs/python-coding-guidelines.md @@ -0,0 +1,221 @@ +# Python Coding Guidelines + +Rules for writing Python in `databao-cli`. Covers patterns the linter can't +enforce. For style/formatting, run `ruff check --fix && ruff format` and move on. + +## Tooling + +- Python target: see `requires-python` in `pyproject.toml`. +- Deps managed by `uv` + `pyproject.toml`. No stray `requirements.txt`. +- All commands run via `uv run`. Dev setup: `make setup`. + +## CLI Layer (Click) + +Commands live in `src/databao_cli/commands/`. Thin handlers delegate to `*_impl()`. + +```python +# __main__.py — wiring only +@cli.command() +@click.argument("question", required=False) +@click.option("--one-shot", is_flag=True) +@click.pass_context +def ask(ctx: click.Context, question: str | None, one_shot: bool) -> None: + """Chat with the Databao agent.""" + from databao_cli.commands.ask import ask_impl + ask_impl(ctx, question, one_shot) +``` + +- Keep `__main__.py` scannable — no logic, just registrations. +- Shared state travels in `ctx.obj` (dict), set on the root group. +- Use Click's type system (`click.Path`, `click.Choice`) over manual parsing. +- User-facing output: `click.echo` / `click.secho`, not `print`. +- Errors from bad input: `click.UsageError`. Errors from broken state: `sys.exit(1)` + after a human-readable `click.echo(..., err=True)`. + +## Error Handling + +Don't blanket `try/except`. When you do catch, catch narrow. + +```python +# bad — swallows everything, hides bugs +try: + result = build_context(project) +except Exception: + logger.error("build failed") + +# good — catch what you expect, let the rest crash +try: + result = build_context(project) +except ContextBuildError as e: + click.echo(f"Build failed: {e}", err=True) + sys.exit(1) +``` + +Define domain exceptions when the caller needs to distinguish failure modes: + +```python +class InitDatabaoProjectError(ValueError): + def __init__(self, message: str) -> None: + super().__init__(message) + self.message = message + +class DatabaoProjectDirAlreadyExistsError(InitDatabaoProjectError): ... +``` + +Rules: +- Validate early, before expensive work. +- Exception messages must be actionable — include the path, value, or state + that caused the problem. +- Never `except Exception: pass`. If you're tempted, you're missing a design + decision. +- `RuntimeError` for "should never happen" states. `ValueError` for bad input. + `FileNotFoundError` for missing paths. + +## Data Modelling + +Dataclasses for internal data. Pydantic for validation boundaries (API input, +MCP tool params). + +```python +# internal state — plain dataclass +@dataclass +class ChatMessage: + role: str + content: str + thinking: str | None = None + timestamp: datetime = field(default_factory=datetime.now) + +# immutable layout — frozen dataclass +@dataclass(frozen=True) +class ProjectLayout: + project_dir: Path + + @property + def databao_dir(self) -> Path: + return get_databao_project_dir(self.project_dir) + +# MCP tool params — Pydantic for schema generation +class Message(BaseModel): + role: str = Field(description="Message role: 'user' or 'assistant'") + content: str = Field(description="Message text content") +``` + +- Use `field(default_factory=...)` for mutable defaults. Always. +- Prefer `@property` for derived values on dataclasses. +- Serialization helpers (`to_dict` / `from_dict`) live on the class, not in + utility modules. + +## Type Hints + +Strict mypy is on. No shortcuts. + +- `X | None`, not `Optional[X]`. +- All public functions get full signatures. Internal helpers get at least + return types. +- Use `TYPE_CHECKING` to break circular imports: + +```python +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from databao.agent.core.thread import Thread +``` + +- Forward refs (`"Settings"`) for self-referencing classmethods. + +## Imports + +- No import-time side effects. Heavy imports go inside functions: + +```python +def ask_impl(...): + from databao_cli.executor_utils import build_llm_config # lazy + llm_config = build_llm_config(model) +``` + +- This keeps `__main__.py` fast and avoids pulling Streamlit / heavy deps + into CLI-only paths. + +## Enums for Finite States + +```python +class DatabaoProjectStatus(Enum): + VALID = "valid" + NO_DATASOURCES = "no_datasources" + NOT_INITIALIZED = "not_initialized" +``` + +Don't use string literals for states that have a fixed set of values. +Enums give you typo protection and exhaustiveness hints. + +## Concurrency + +This project uses threading, not asyncio. Streamlit doesn't play well with +event loops. + +- `ThreadPoolExecutor` for background tasks (builds). +- Custom `Thread` subclasses for query execution with result storage. +- Thread-safe IO: `threading.Lock` around shared buffers. +- Streamlit polling: `@st.fragment(run_every=...)`, not busy loops. + +## MCP Tools + +Tools live in `src/databao_cli/mcp/tools/`. Each tool module exports a +`register(mcp, context)` function. + +```python +def register(mcp: "FastMCP", context: "McpContext") -> None: + @mcp.tool() + def databao_ask( + messages: Annotated[list[Message], Field(description="...")], + ) -> str: + request_id = str(uuid6()) + try: + ... + return json.dumps(result, default=str) + except Exception as e: + logger.exception("[%s] Query failed", request_id) + return _error(f"Query failed: {e}", request_id=request_id) +``` + +- Tools return JSON strings. Errors are JSON too (`{"error": "...", "request_id": "..."}`). +- Tag logs with `request_id` for traceability. +- Validate project state before doing work (not initialized? no datasources? + no build output?). + +## Logging + +```python +logger = logging.getLogger(__name__) +``` + +- `logging` for runtime diagnostics. `click.echo` for user output. Don't mix. +- Log at `DEBUG` for internal flow, `INFO` for operations, `WARNING` for + recoverable issues, `ERROR`/`EXCEPTION` for failures. +- LLM provider errors go through `log/llm_errors.py` formatter chain. +- Never log secrets, API keys, or full response bodies at INFO+. + +## Project-Aware Code + +Almost every command starts by locating and validating the project: + +```python +project = ProjectLayout(project_path) +status = databao_project_status(project) +if status == DatabaoProjectStatus.NOT_INITIALIZED: + click.echo("No project found. Run 'databao init'.", err=True) + sys.exit(1) +``` + +This is the pattern. Don't skip validation. Don't invent a new way to find +the project directory. + +## Things to Avoid + +- `except Exception` / `except BaseException` without re-raise. +- Mutable default arguments (use `field(default_factory=...)`). +- `os.path` when `pathlib.Path` works (it always does). +- Raw `dict` for structured data that gets passed around — make a dataclass. +- `asyncio.run()` in Streamlit code paths — use threads. +- Putting logic in `__main__.py` — it's a wiring file. +- `requirements.txt`, `setup.py`, or any non-`pyproject.toml` dep files. diff --git a/scripts/smoke-test-skills.sh b/scripts/smoke-test-skills.sh index 32a1c032..7a97265e 100755 --- a/scripts/smoke-test-skills.sh +++ b/scripts/smoke-test-skills.sh @@ -36,7 +36,7 @@ run_test "setup-environment: make setup" "make setup" run_test "check-coverage: make test-cov-check" "make test-cov-check" # review-architecture: referenced doc files exist and are non-empty -for doc in docs/architecture.md docs/coding-guidelines.md docs/testing-strategy.md README.md; do +for doc in docs/architecture.md docs/python-coding-guidelines.md docs/testing-strategy.md README.md; do run_test "review-architecture: $doc exists and non-empty" "test -s $doc" done From 2d20a23c4f4d1f6d1ce67fcb71d550d6d0e0a4e6 Mon Sep 17 00:00:00 2001 From: Andrei Gasparian Date: Fri, 20 Mar 2026 11:48:39 +0100 Subject: [PATCH 2/2] [DBA-282] Address PR review comments on coding guidelines and smoke tests Co-Authored-By: Claude Opus 4.6 --- docs/python-coding-guidelines.md | 9 ++++++--- scripts/smoke-test-skills.sh | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/python-coding-guidelines.md b/docs/python-coding-guidelines.md index 7202afb0..2cc96d57 100644 --- a/docs/python-coding-guidelines.md +++ b/docs/python-coding-guidelines.md @@ -1,7 +1,7 @@ # Python Coding Guidelines Rules for writing Python in `databao-cli`. Covers patterns the linter can't -enforce. For style/formatting, run `ruff check --fix && ruff format` and move on. +enforce. For style/formatting, run `uv run ruff check --fix && uv run ruff format` and move on. ## Tooling @@ -29,8 +29,8 @@ def ask(ctx: click.Context, question: str | None, one_shot: bool) -> None: - Shared state travels in `ctx.obj` (dict), set on the root group. - Use Click's type system (`click.Path`, `click.Choice`) over manual parsing. - User-facing output: `click.echo` / `click.secho`, not `print`. -- Errors from bad input: `click.UsageError`. Errors from broken state: `sys.exit(1)` - after a human-readable `click.echo(..., err=True)`. +- For user-facing errors (bad input or broken state), emit a clear + `click.echo(..., err=True)` message and then `sys.exit(1)`. ## Error Handling @@ -173,6 +173,9 @@ def register(mcp: "FastMCP", context: "McpContext") -> None: try: ... return json.dumps(result, default=str) + # Broad catch is acceptable here at the MCP tool boundary so we can + # always return a structured error JSON instead of crashing the server. + # Inside the codebase, avoid blanket `except Exception` without re-raise. except Exception as e: logger.exception("[%s] Query failed", request_id) return _error(f"Query failed: {e}", request_id=request_id) diff --git a/scripts/smoke-test-skills.sh b/scripts/smoke-test-skills.sh index 7a97265e..0f2acd99 100755 --- a/scripts/smoke-test-skills.sh +++ b/scripts/smoke-test-skills.sh @@ -37,7 +37,7 @@ run_test "check-coverage: make test-cov-check" "make test-cov-check" # review-architecture: referenced doc files exist and are non-empty for doc in docs/architecture.md docs/python-coding-guidelines.md docs/testing-strategy.md README.md; do - run_test "review-architecture: $doc exists and non-empty" "test -s $doc" + run_test "review-architecture: $doc exists and non-empty" "test -s \"$doc\"" done echo ""