ckrough · ckrough · Dec 20, 2025 · Dec 20, 2025 · Dec 20, 2025 · Dec 20, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,103 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+env:
+  PYTHON_VERSION: "3.13"
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+
+      - name: Download NLTK data
+        run: |
+          python -c "import nltk; nltk.download('averaged_perceptron_tagger_eng'); nltk.download('punkt_tab')"
+
+      - name: Run tests with coverage
+        run: |
+          pytest --cov=src/drover --cov-report=xml --cov-report=term-missing
+
+      - name: Upload coverage report
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report
+          path: coverage.xml
+          retention-days: 7
+
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+          cache: 'pip'
+
+      - name: Install ruff
+        run: pip install ruff
+
+      - name: Check formatting
+        run: ruff format --check src/ tests/
+
+      - name: Check linting
+        run: ruff check src/ tests/
+
+  type-check:
+    name: Type Check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+
+      - name: Run mypy
+        run: mypy src/
+
+  security:
+    name: Security Scan
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+          cache: 'pip'
+
+      - name: Install bandit
+        run: pip install bandit
+
+      - name: Run security scan
+        run: |
+          bandit -r src/ -c pyproject.toml --severity-level medium --confidence-level medium
diff --git a/.gitignore b/.gitignore
@@ -25,6 +25,9 @@ wheels/
 venv/
 ENV/
 
+# Python version (pyenv/uv) - use pyproject.toml requires-python instead
+.python-version
+
 # IDE
 .idea/
 .vscode/settings.json.local

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -55,7 +55,7 @@ bandit -r src/ -f json --severity-level medium --confidence-level medium --quiet
 src/drover/
 ├── __init__.py         # Package init, version definition
 ├── __main__.py         # Entry point for python -m drover
-├── cli.py              # Click CLI commands (classify, tag)
+├── cli.py              # Click CLI commands (classify, tag, evaluate)
 ├── config.py           # Configuration management (Pydantic models)
 ├── loader.py           # DocumentLoader - text extraction from documents
 ├── classifier.py       # LLM-based DocumentClassifier
@@ -465,7 +465,7 @@ JSONL file with expected classifications:
 
 ```jsonl
 {"filename": "bank.pdf", "domain": "financial", "category": "banking", "doctype": "statement"}
-{"filename": "bill.pdf", "domain": "utilities", "category": "electric", "doctype": "bill"}
+{"filename": "bill.pdf", "domain": "financial", "category": "utilities", "doctype": "bill"}
 ```
 
 ### Running Evaluations

diff --git a/README.md b/README.md
@@ -16,6 +16,14 @@
   <a href="#documentation">Documentation</a>
 </p>
 
+<p align="center">
+  <a href="https://github.com/ckrough/drover/actions/workflows/ci.yml">
+    <img src="https://github.com/ckrough/drover/actions/workflows/ci.yml/badge.svg" alt="CI">
+  </a>
+  <img src="https://img.shields.io/badge/python-3.13+-blue.svg" alt="Python 3.13+">
+  <img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License: MIT">
+</p>
+
 ---
 
 Drover uses LLMs to analyze documents and suggest consistent, policy-compliant filesystem paths and filenames. Named after herding dogs that drove livestock, Drover herds your scattered files into an organized folder structure.
@@ -42,7 +50,7 @@ Drover uses LLMs to analyze documents and suggest consistent, policy-compliant f
 
 ```bash
 # Clone and install
-git clone https://github.com/your-org/drover.git
+git clone https://github.com/ckrough/drover.git
 cd drover
 pip install -e .
 

diff --git a/docs/adr/001-chain-of-thought-prompting.md b/docs/adr/001-chain-of-thought-prompting.md
@@ -57,4 +57,4 @@ Testing with household documents showed:
 ## Related
 - `prompts/classification.md` - Prompt template with CoT steps
 - `test_classifier_parse.py::test_parse_response_classification_analysis_tags` - Tests for CoT parsing
-- `classifier.py:400-404` - CoT tag extraction in `_parse_response()`
+- `classifier.py:_parse_response()` - CoT tag extraction (handles `<classification_analysis>` tags)
diff --git a/docs/adr/002-privacy-first-design.md b/docs/adr/002-privacy-first-design.md
@@ -55,7 +55,7 @@ ai:
 # Optional: cloud provider for better accuracy
 # ai:
 #   provider: anthropic
-#   model: claude-3-5-sonnet-latest
+#   model: claude-sonnet-4-20250514
 ```
 
 ### Environment Variables for Secrets

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
 dev = [
     "pytest>=8.0",
     "pytest-asyncio>=0.24",
+    "pytest-cov>=4.0",
     "ruff>=0.8",
     "bandit>=1.7.5",
     "mypy>=1.8.0",

diff --git a/src/drover/actions/tag.py b/src/drover/actions/tag.py
@@ -6,6 +6,7 @@
 import sys
 from enum import StrEnum
 from pathlib import Path
+from types import ModuleType
 from typing import TYPE_CHECKING
 
 from drover.actions.base import ActionPlan, ActionResult
@@ -43,6 +44,8 @@ class TagManager:
     suffix indicates the color index (0 = no color, 1-7 = colors).
     """
 
+    _xattr: ModuleType  # xattr module, lazily imported on macOS only
+
     def __init__(self) -> None:
         """Initialize TagManager, checking platform compatibility."""
         if sys.platform != "darwin":

diff --git a/tests/test_actions_base.py b/tests/test_actions_base.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from drover.actions.base import ActionPlan, ActionResult, FileAction
+from drover.actions.base import ActionPlan, ActionResult
 from drover.actions.runner import ActionRunner
 from drover.config import DroverConfig, ErrorMode
 from drover.models import ClassificationResult
@@ -148,9 +148,7 @@ async def test_dry_run_plans_without_executing(
         action = MockAction()
         runner = ActionRunner(config, action)
 
-        monkeypatch.setattr(
-            runner._service._classifier, "classify", _make_fake_classify(tmp_path)
-        )
+        monkeypatch.setattr(runner._service._classifier, "classify", _make_fake_classify(tmp_path))
 
         outputs: list[ActionPlan | ActionResult] = []
 
@@ -177,9 +175,7 @@ async def test_execute_mode_runs_action(
         action = MockAction()
         runner = ActionRunner(config, action)
 
-        monkeypatch.setattr(
-            runner._service._classifier, "classify", _make_fake_classify(tmp_path)
-        )
+        monkeypatch.setattr(runner._service._classifier, "classify", _make_fake_classify(tmp_path))
 
         outputs: list[ActionPlan | ActionResult] = []
 
@@ -207,19 +203,15 @@ async def test_action_failure_returns_partial_exit_code(
         action = MockAction(should_fail=True)
         runner = ActionRunner(config, action)
 
-        monkeypatch.setattr(
-            runner._service._classifier, "classify", _make_fake_classify(tmp_path)
-        )
+        monkeypatch.setattr(runner._service._classifier, "classify", _make_fake_classify(tmp_path))
 
         exit_code = await runner.run([doc_path], dry_run=False)
 
         # With one file that failed action, should be exit code 2 (all failed)
         assert exit_code == 2
 
     @pytest.mark.asyncio
-    async def test_classification_error_skips_action(
-        self, tmp_path: Path
-    ) -> None:
+    async def test_classification_error_skips_action(self, tmp_path: Path) -> None:
         """Classification errors don't trigger action planning."""
         missing_file = tmp_path / "missing.pdf"
 

diff --git a/tests/test_classifier_metrics.py b/tests/test_classifier_metrics.py
@@ -1,5 +1,4 @@
-"""Tests for DocumentClassifier metrics integration with LangChain callbacks.
-"""
+"""Tests for DocumentClassifier metrics integration with LangChain callbacks."""
 
 import pytest
 

diff --git a/tests/test_classifier_parse.py b/tests/test_classifier_parse.py
@@ -21,7 +21,10 @@ def _make_classifier() -> DocumentClassifier:
 
 def test_parse_response_direct_json() -> None:
     classifier = _make_classifier()
-    payload = '{"domain": "financial", "category": "banking", "doctype": "statement", "vendor": "Bank", "date": "20250101", "subject": "checking"}'
+    payload = (
+        '{"domain": "financial", "category": "banking", "doctype": "statement", '
+        '"vendor": "Bank", "date": "20250101", "subject": "checking"}'
+    )
 
     result = classifier._parse_response(payload)
 
@@ -31,11 +34,12 @@ def test_parse_response_direct_json() -> None:
 
 def test_parse_response_json_in_code_block() -> None:
     classifier = _make_classifier()
+    # Long line intentional - simulates realistic LLM output in code block
     payload = """Here is the answer:
 ```json
 {"domain": "financial", "category": "banking", "doctype": "statement", "vendor": "Bank", "date": "20250101", "subject": "checking"}
 ```
-"""
+"""  # noqa: E501
 
     result = classifier._parse_response(payload)
 
@@ -44,7 +48,11 @@ def test_parse_response_json_in_code_block() -> None:
 
 def test_parse_response_balanced_object_inside_text() -> None:
     classifier = _make_classifier()
-    payload = "Some explanation before {\n  \"domain\": \"financial\",\n  \"category\": \"banking\",\n  \"doctype\": \"statement\",\n  \"vendor\": \"Bank\",\n  \"date\": \"20250101\",\n  \"subject\": \"checking\"\n} and some trailing text."
+    payload = (
+        'Some explanation before {\n  "domain": "financial",\n  "category": "banking",'
+        '\n  "doctype": "statement",\n  "vendor": "Bank",\n  "date": "20250101",'
+        '\n  "subject": "checking"\n} and some trailing text.'
+    )
 
     result = classifier._parse_response(payload)
 
@@ -66,14 +74,14 @@ def test_parse_response_raises_on_invalid_json() -> None:
 def test_parse_response_double_brace_wrapper() -> None:
     """LLM sometimes mirrors `{{ ... }}` examples from the prompt template."""
     classifier = _make_classifier()
-    payload = '''{{
+    payload = """{{
   "domain": "financial",
   "category": "banking",
   "doctype": "statement",
   "vendor": "Bank",
   "date": "20250101",
   "subject": "checking"
-}}'''
+}}"""
 
     result = classifier._parse_response(payload)
 

diff --git a/tests/test_classifier_retry.py b/tests/test_classifier_retry.py
@@ -93,7 +93,10 @@ async def test_invoke_with_retry_success_no_retry(self) -> None:
 
         mock_llm = MagicMock()
         mock_response = MagicMock()
-        mock_response.content = '{"domain": "financial", "category": "banking", "doctype": "statement", "vendor": "Bank", "date": "20250101", "subject": "test"}'
+        mock_response.content = (
+            '{"domain": "financial", "category": "banking", "doctype": "statement", '
+            '"vendor": "Bank", "date": "20250101", "subject": "test"}'
+        )
         mock_llm.ainvoke = AsyncMock(return_value=mock_response)
 
         with patch.object(classifier, "_get_llm", return_value=mock_llm):
@@ -113,7 +116,10 @@ async def test_invoke_with_retry_retries_on_connection_error(self) -> None:
 
         mock_llm = MagicMock()
         mock_response = MagicMock()
-        mock_response.content = '{"domain": "financial", "category": "banking", "doctype": "statement", "vendor": "Bank", "date": "20250101", "subject": "test"}'
+        mock_response.content = (
+            '{"domain": "financial", "category": "banking", "doctype": "statement", '
+            '"vendor": "Bank", "date": "20250101", "subject": "test"}'
+        )
 
         # Fail twice, succeed third time
         mock_llm.ainvoke = AsyncMock(
@@ -141,7 +147,10 @@ async def test_invoke_with_retry_retries_on_timeout_error(self) -> None:
 
         mock_llm = MagicMock()
         mock_response = MagicMock()
-        mock_response.content = '{"domain": "medical", "category": "records", "doctype": "report", "vendor": "Hospital", "date": "20250101", "subject": "test"}'
+        mock_response.content = (
+            '{"domain": "medical", "category": "records", "doctype": "report", '
+            '"vendor": "Hospital", "date": "20250101", "subject": "test"}'
+        )
 
         # Fail once, succeed second time
         mock_llm.ainvoke = AsyncMock(
@@ -167,9 +176,7 @@ async def test_invoke_with_retry_exhausts_retries(self) -> None:
         classifier = _make_classifier(max_retries=2)
 
         mock_llm = MagicMock()
-        mock_llm.ainvoke = AsyncMock(
-            side_effect=ConnectionError("Persistent network error")
-        )
+        mock_llm.ainvoke = AsyncMock(side_effect=ConnectionError("Persistent network error"))
 
         with patch.object(classifier, "_get_llm", return_value=mock_llm):
             from langchain_core.messages import HumanMessage

diff --git a/tests/test_models.py b/tests/test_models.py
@@ -16,9 +16,7 @@ def test_classification_result_success():
     """
     result = ClassificationResult(
         original="receipt.pdf",
-        suggested_path=(
-            "pets/expenses/receipt/receipt-petsmart-food_supplies-20250601.pdf"
-        ),
+        suggested_path=("pets/expenses/receipt/receipt-petsmart-food_supplies-20250601.pdf"),
         suggested_filename="receipt-petsmart-food_supplies-20250601.pdf",
         domain="pets",
         category="expenses",

diff --git a/tests/test_tag_manager.py b/tests/test_tag_manager.py
@@ -7,7 +7,6 @@
 
 from drover.actions.tag import (
     TagAction,
-    TagError,
     TagManager,
     TagMode,
     compute_final_tags,
@@ -175,9 +174,7 @@ def test_empty_field_skipped(self) -> None:
     def test_all_fields(self) -> None:
         """All available fields work correctly."""
         result = self._make_result()
-        tags = tags_from_result(
-            result, ["domain", "category", "doctype", "vendor", "date"]
-        )
+        tags = tags_from_result(result, ["domain", "category", "doctype", "vendor", "date"])
 
         assert tags == ["financial", "banking", "statement", "chase", "2024"]
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,6 +25,9 @@ wheels/ @@
     venv/
     ENV/
+    # Python version (pyenv/uv) - use pyproject.toml requires-python instead
+    .python-version
     # IDE
     .idea/
     .vscode/settings.json.local
@@ Expand Down @@