diff --git a/.github/SECURITY.md b/.github/SECURITY.md deleted file mode 100644 index f6e7a83..0000000 --- a/.github/SECURITY.md +++ /dev/null @@ -1,27 +0,0 @@ -# GitHub Security Policy - -This document outlines our security policy for the Importobot project on GitHub. - -## Reporting Security Issues - -Please refer to our main [SECURITY.md](../../SECURITY.md) file for our complete security policy, including: - -- How to report vulnerabilities -- Supported versions -- Response expectations -- Security best practices - -## GitHub Security Features - -We use GitHub's built-in security features: - -- **Dependabot Alerts**: Automatic monitoring for vulnerable dependencies -- **Code Scanning**: Static analysis to detect security issues -- **Secret Scanning**: Detection of accidentally committed secrets - -## Security Contacts - -For security-related issues, please contact: -- security@importobot.com - -Please do not use GitHub issues to report security vulnerabilities. \ No newline at end of file diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index a552fca..e618edf 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -47,7 +47,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 1 diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml index 2c14d1b..b68f20f 100644 --- a/.github/workflows/claude.yml +++ b/.github/workflows/claude.yml @@ -26,7 +26,7 @@ jobs: actions: read # Required for Claude to read CI results on PRs steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 1 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 69f1ce4..3e08b2d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -16,6 +16,7 @@ on: - "tests/**/*.py" - "scripts/**/*.py" - "examples/json/**/*.json" + - "config/**" - "pyproject.toml" - "Makefile" - ".github/workflows/lint.yml" @@ -27,6 +28,7 @@ on: - "tests/**/*.py" - "scripts/**/*.py" - "examples/json/**/*.json" + - "config/**" - "pyproject.toml" - "Makefile" - ".github/workflows/lint.yml" @@ -36,7 +38,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install uv uses: astral-sh/setup-uv@v7 @@ -60,15 +62,11 @@ jobs: - name: Run ruff format check run: uv run ruff format --check . - - - name: Run pycodestyle run: uv run pycodestyle src/ tests/ scripts/ - name: Run pydocstyle run: uv run pydocstyle src/ scripts/ - - - name: Minimize uv cache run: uv cache prune --ci diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..3023507 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,64 @@ +name: Pre-commit Checks + +permissions: + contents: read + actions: write + +concurrency: + group: pre-commit-${{ github.ref }} + cancel-in-progress: true + +on: + push: + branches: [ main ] + paths: + - "src/**/*.py" + - "tests/**/*.py" + - "config/**" + - "pyproject.toml" + - ".pre-commit-config.yaml" + - ".github/workflows/pre-commit.yml" + pull_request: + types: [opened, synchronize] + branches: [ main ] + paths: + - "src/**/*.py" + - "tests/**/*.py" + - "config/**" + - "pyproject.toml" + - ".pre-commit-config.yaml" + - ".github/workflows/pre-commit.yml" + +jobs: + pre-commit: + name: Pre-commit Hook Validation + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + version: "latest" + enable-cache: true + + - name: Install dependencies + run: uv sync --dev + + - name: Cache pre-commit + uses: actions/cache@v4 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }} + + - name: Run pre-commit on all files + run: uv run pre-commit run --all-files --show-diff-on-failure + + - name: Minimize uv cache + run: uv cache prune --ci diff --git a/.github/workflows/publish-packages.yml b/.github/workflows/publish-packages.yml index 78155eb..1c26382 100644 --- a/.github/workflows/publish-packages.yml +++ b/.github/workflows/publish-packages.yml @@ -14,7 +14,7 @@ jobs: publish: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index f05c11b..4fa4f68 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -1,5 +1,9 @@ name: Security Scanning +permissions: + contents: read + actions: write + concurrency: group: security-${{ github.ref }} cancel-in-progress: true @@ -11,6 +15,7 @@ on: - "src/**/*.py" - "tests/**/*.py" - "examples/json/**/*.json" + - "config/**" - "pyproject.toml" - "Makefile" - ".github/workflows/security.yml" @@ -21,6 +26,7 @@ on: - "src/**/*.py" - "tests/**/*.py" - "examples/json/**/*.json" + - "config/**" - "pyproject.toml" - "Makefile" - ".github/workflows/security.yml" @@ -29,27 +35,33 @@ jobs: security: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.12' + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + version: "latest" + enable-cache: true + - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install bandit safety + # Using uv for consistency + uv sync --dev - name: Run Bandit (Security Linter) run: | - bandit -r src/ -f json -o bandit-report.json --exit-zero - bandit -r src/ -ll --exit-zero + uv run bandit -r src/ -f json -o bandit-report.json --exit-zero + uv run bandit -r src/ -ll --exit-zero - name: Run Safety (Dependency Scanner) run: | - safety check --output json > safety-report.json || true - safety check + uv run safety check --output json > safety-report.json || true + uv run safety check - name: SQL Injection Pattern Check run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c7abdf7..e0612a8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,6 +16,7 @@ on: - "src/**/*.py" - "tests/**/*.py" - "examples/json/**/*.json" + - "config/**" - "pyproject.toml" - "Makefile" - ".github/workflows/test.yml" @@ -26,6 +27,7 @@ on: - "src/**/*.py" - "tests/**/*.py" - "examples/json/**/*.json" + - "config/**" - "pyproject.toml" - "Makefile" - ".github/workflows/test.yml" @@ -39,7 +41,7 @@ jobs: python-version: ["3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install uv uses: astral-sh/setup-uv@v7 @@ -104,7 +106,7 @@ jobs: needs: test steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install uv uses: astral-sh/setup-uv@v7 @@ -140,3 +142,66 @@ jobs: - name: Minimize uv cache run: uv cache prune --ci + + coverage-delta: + name: Modified File Coverage + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + version: "latest" + enable-cache: true + + - name: Set up Python 3.11 + uses: actions/setup-python@v6 + with: + python-version: "3.11" + + - name: Install dependencies + run: uv sync --dev + + - name: Get modified Python files + id: changed-files + env: + BASE_REF: ${{ github.base_ref }} + run: | + CHANGED=$(git diff --name-only "origin/${BASE_REF}...HEAD" -- 'src/**/*.py' | tr '\n' ',' | sed 's/,$//') + echo "files=$CHANGED" >> "$GITHUB_OUTPUT" + if [ -z "$CHANGED" ]; then + echo "No Python source files modified" + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "Modified files: $CHANGED" + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + + - name: Run coverage on modified files + if: steps.changed-files.outputs.skip != 'true' + env: + CHANGED_FILES: ${{ steps.changed-files.outputs.files }} + run: | + COV_ARGS="" + IFS=',' read -ra FILES <<< "$CHANGED_FILES" + for file in "${FILES[@]}"; do + module=$(echo "$file" | sed 's|src/||' | sed 's|\.py$||' | tr '/' '.') + COV_ARGS="$COV_ARGS --cov=${module%.}" + done + echo "Running: uv run pytest $COV_ARGS --cov-report=term-missing --cov-fail-under=60" + uv run pytest $COV_ARGS --cov-report=term-missing --cov-fail-under=60 || { + echo "::warning::Modified file coverage below 60%. Consider adding tests for new code." + exit 0 + } + + - name: Skip message + if: steps.changed-files.outputs.skip == 'true' + run: echo "No Python source files modified - skipping coverage check" + + - name: Minimize uv cache + run: uv cache prune --ci diff --git a/.github/workflows/typecheck.yml b/.github/workflows/typecheck.yml index 53ff5ed..c9e193d 100644 --- a/.github/workflows/typecheck.yml +++ b/.github/workflows/typecheck.yml @@ -15,6 +15,7 @@ on: - "src/**/*.py" - "tests/**/*.py" - "examples/json/**/*.json" + - "config/**" - "pyproject.toml" - "Makefile" - ".github/workflows/typecheck.yml" @@ -25,6 +26,7 @@ on: - "src/**/*.py" - "tests/**/*.py" - "examples/json/**/*.json" + - "config/**" - "pyproject.toml" - "Makefile" - ".github/workflows/typecheck.yml" @@ -34,7 +36,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install uv uses: astral-sh/setup-uv@v7 diff --git a/.github/workflows/update-benchmark-images.yml b/.github/workflows/update-benchmark-images.yml index 58624bc..12ba436 100644 --- a/.github/workflows/update-benchmark-images.yml +++ b/.github/workflows/update-benchmark-images.yml @@ -14,12 +14,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for ASV - name: Checkout wiki - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: repository: ${{ github.repository }}.wiki path: wiki-repo diff --git a/.github/workflows/wiki-sync.yml b/.github/workflows/wiki-sync.yml index fc0145d..3601ec1 100644 --- a/.github/workflows/wiki-sync.yml +++ b/.github/workflows/wiki-sync.yml @@ -15,7 +15,7 @@ jobs: if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'push' }} steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..49ab771 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,58 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-merge-conflict + - id: check-case-conflict + - id: check-docstring-first + - id: debug-statements + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.4 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.18.2 + hooks: + - id: mypy + additional_dependencies: [types-PyYAML, types-psutil, types-requests] + args: [--config-file=pyproject.toml] + files: ^src/ + + - repo: https://github.com/pycqa/bandit + rev: 1.8.6 + hooks: + - id: bandit + args: [-r, src/, -ll] + exclude: tests/ + + - repo: local + hooks: + - id: ty + name: ty + entry: uv run ty check + language: system + files: ^src/ + pass_filenames: true + + - id: pytest-check + name: pytest-check + entry: uv run pytest --collect-only -q + language: system + pass_filenames: false + always_run: true + + - id: no-ai-attribution + name: block-ai-co-authored-by + entry: | + bash -c 'if git log --format="%b" -1 HEAD 2>/dev/null | grep -iqE "Co-Authored-By:.*(Claude|Opus|Sonnet|Haiku|Anthropic|GPT|OpenAI|Copilot)"; then echo "ERROR: Commit contains AI Co-Authored-By line. Remove it."; exit 1; fi' + language: system + stages: [commit-msg] + always_run: true diff --git a/CHANGELOG.md b/CHANGELOG.md index 868d104..97ae4d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,61 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.1.4] - 2025-11-11 - -### Fixed -- **MongoDB Library Integration**: Replaced broken `robotframework-mongodblibrary` with modern `robot-mongodb-library` to resolve `ModuleNotFoundError: No module named 'mongo_connection_manager'` -- **Type Safety**: Fixed type checking errors in `base_generator.py` and `helpers.py` by properly converting `RobotFrameworkLibrary` enums to string values -- **Code Quality**: Fixed line length violation in `keywords_registry.py` by breaking long description string into multiple lines -- **Multi-Step Parsing**: Fixed 5 failing tests by updating filter patterns to include `SeleniumLibrary.*` prefixes, enabling proper parsing of library-prefixed commands -- **Unicode Compatibility**: Removed all non-ASCII characters from output messages and scripts, replacing Unicode symbols (✓, →, •, 🔬) with ASCII alternatives for maximum compatibility - -### Changed -- **Library Generation**: Updated codebase generation mechanism to use `RobotMongoDBLibrary` instead of legacy `MongoDBLibrary` across pattern matcher and keyword registry -- **Keywords Registry**: Updated MongoDB function mappings to reflect actual available functions in the new library (`InsertOne`, `FindOneByID`, `Find`, `Update`, `DeleteOne`, `DeleteOneByID`) -- **Project Configuration**: Added `BENCHMARKS_DIR` constant to `importobot.config` for clean path management, replacing hacky `Path.parent.parent.parent.parent` patterns -- **Documentation Standards**: Enhanced TestRail client documentation with comprehensive docstring explaining Basic authentication vs Bearer token patterns -- **Test Data Quality**: Converted code notes to actionable TODO comments with GitHub issue references for traceability - -### Added -- **Task Management**: Created GitHub issue #83 for implementing proper test data feeding system for P(E|¬H) learning pipeline -- **Cross-Reference Links**: Added clickable link to ADR-0006 in performance validation documentation -- **ASCII Output Standards**: Standardized all CLI output and script messages to use ASCII-only characters for cross-platform compatibility - ## [Unreleased] -### Added -- **Test Suite Quality**: Improved test architecture by introducing 55 named constants across 9 categories, eliminating magic numbers. -- **Modern Test Patterns**: Updated test patterns by replacing `tempfile` with `pytest.tmp_path`, adding type annotations to all test functions, and documenting integration tests with Arrange-Act-Assert. -- **Consistent Type Safety**: Enforced mypy type checking across the entire test suite by removing test overrides. - -### Changed -- **Client Module Refactoring**: Split `importobot.integrations.clients` into focused modules (base.py, jira_xray.py, testlink.py, testrail.py, zephyr.py) to enhance maintainability while retaining full backward compatibility. -- **Documentation Refinement**: Replaced subjective marketing language with factual, technical descriptions throughout the documentation. -- **API Client Modularity**: Implemented lazy loading for API clients, resulting in a 3x improvement in import speed while preserving all existing import paths. - -### Removed -- **Legacy Compatibility Code**: Eliminated backwards compatibility shims no longer needed (Python < 3.8 support, deprecated logging and cache APIs). -- **Redundant Functions**: Removed `setup_logger()` and `get_cache_stats()` aliases in favor of unified APIs. - -### Fixed -- **Test Infrastructure**: Fixed 24 syntax errors from incorrect type annotations and resolved environmental test failures using proper pytest fixtures. -- **Import Organization**: Corrected missing `Any` imports and standardized import patterns across test files. - -### Technical Details -- **Test Quality**: All 1541 tests passed (100% pass rate) with comprehensive type checking. -- **Performance**: No performance regression was detected after module refactoring; lazy loading improved import times. -- **Architecture**: ADR-0006 was added to document client module refactoring decisions and performance validation. - -## [Unreleased] +## [0.1.5] - 2026-02-18 ### Changed -- **Module Refactoring**: Split `importobot.integrations.clients` into focused modules for better maintainability +- **Client Module Refactoring**: Split `importobot.integrations.clients` into focused modules for better maintainability - `base.py` - Shared API client functionality (BaseAPIClient, APISource protocol) - `jira_xray.py` - JIRA/Xray platform client - `testlink.py` - TestLink platform client - `testrail.py` - TestRail platform client - `zephyr.py` - Zephyr platform client +- **API Client Modularity**: Implemented lazy loading for API clients, resulting in a 3x improvement in import speed while preserving all existing import paths. - **Test Quality Improvements**: - Added 55 named constants in `tests/test_constants.py` to eliminate magic numbers, organized into 9 logical categories with clear section markers - Replaced `tempfile` usage with pytest's `tmp_path` fixture (modern pattern) @@ -68,6 +25,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Documented growth strategy: single-file approach until 200 constants, then split into sub-modules - **Type Safety**: Removed mypy test override to enforce type checking across entire test suite - **Documentation Cleanup**: Removed subjective marketing terms ("enterprise", "professional") in favor of factual descriptions +- **CI/CD Modernization**: Upgraded all workflows from `actions/checkout@v5` to `@v6`, added `config/**` to trigger paths +- **Security Workflow**: Migrated from pip to uv with top-level permissions block +- **Lint Target**: `make lint` now runs `ruff format --check` and `pycodestyle` in addition to `ruff check` and `pydocstyle` +- **Typecheck Target**: Removed `pyright` from `make typecheck` (mypy and ty remain) +- **CHANGELOG**: Consolidated duplicate `[Unreleased]` sections into one + +### Added +- **Coverage Delta CI Job**: New `coverage-delta` workflow job measures test coverage on modified source files during PRs (60% minimum threshold) +- **Pre-commit**: Added `.pre-commit-config.yaml`, `pre-commit` dev dependency, and `.github/workflows/pre-commit.yml` CI workflow +- **Validate Target**: `make validate` now runs `pre-commit run --all-files` as a final check ### Removed - **Backwards Compatibility Code** (0.1.x has no external users): @@ -75,6 +42,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Removed `setup_logger()` function - use `get_logger()` instead - Removed `get_cache_stats()` alias - use `get_stats()` instead +### Fixed +- Fixed 24 syntax errors from incorrect type annotation replacements in test files +- Fixed missing `Any` import in `tests/unit/test_hash_file_example.py` +- Fixed environmental test failure in `test_resource_manager.py` by using pytest's `tmp_path` fixture instead of `/tmp` +- Corrected missing `Any` imports and standardized import patterns across test files + ### Breaking Changes #### API Client Module Structure @@ -154,19 +127,35 @@ stats = detection_cache.get_stats() **Migration:** Replace all `.get_cache_stats()` calls with `.get_stats()`. Return value structure is unchanged. -### Fixed -- Fixed 24 syntax errors from incorrect type annotation replacements in test files -- Fixed missing `Any` import in `tests/unit/test_hash_file_example.py` -- Fixed environmental test failure in `test_resource_manager.py` by using pytest's `tmp_path` fixture instead of `/tmp` - ### Technical Details - Blueprint storage classes moved to `blueprints/storage.py` (StepPattern, SuiteSettings, etc.) -- Test suite: **1541/1541 tests passing (100% pass rate)** +- Test suite: **1541/1541 tests passing (100% pass rate)** at time of this work - Mypy enforcement now applies to tests (removed `[[tool.mypy.overrides]]` for `tests.*`) - Architecture Decision Record: `wiki/architecture/ADR-0006-client-module-refactoring.md` - Performance validation: No regression detected, lazy loading provides 3x import speed improvement (see `wiki/architecture/performance-validation-module-split.md`) +## [0.1.4] - 2025-11-11 + +### Fixed +- **MongoDB Library Integration**: Replaced broken `robotframework-mongodblibrary` with modern `robot-mongodb-library` to resolve `ModuleNotFoundError: No module named 'mongo_connection_manager'` +- **Type Safety**: Fixed type checking errors in `base_generator.py` and `helpers.py` by properly converting `RobotFrameworkLibrary` enums to string values +- **Code Quality**: Fixed line length violation in `keywords_registry.py` by breaking long description string into multiple lines +- **Multi-Step Parsing**: Fixed 5 failing tests by updating filter patterns to include `SeleniumLibrary.*` prefixes, enabling proper parsing of library-prefixed commands +- **Unicode Compatibility**: Removed all non-ASCII characters from output messages and scripts, replacing Unicode symbols with ASCII alternatives for maximum compatibility + +### Changed +- **Library Generation**: Updated codebase generation mechanism to use `RobotMongoDBLibrary` instead of legacy `MongoDBLibrary` across pattern matcher and keyword registry +- **Keywords Registry**: Updated MongoDB function mappings to reflect actual available functions in the new library (`InsertOne`, `FindOneByID`, `Find`, `Update`, `DeleteOne`, `DeleteOneByID`) +- **Project Configuration**: Added `BENCHMARKS_DIR` constant to `importobot.config` for clean path management, replacing hacky `Path.parent.parent.parent.parent` patterns +- **Documentation Standards**: Enhanced TestRail client documentation with comprehensive docstring explaining Basic authentication vs Bearer token patterns +- **Test Data Quality**: Converted code notes to actionable TODO comments with GitHub issue references for traceability + +### Added +- **Task Management**: Created GitHub issue #83 for implementing proper test data feeding system for P(E|¬H) learning pipeline +- **Cross-Reference Links**: Added clickable link to ADR-0006 in performance validation documentation +- **ASCII Output Standards**: Standardized all CLI output and script messages to use ASCII-only characters for cross-platform compatibility + ## [0.1.3] - 2025-10-23 ### Added diff --git a/CLAUDE.md b/CLAUDE.md index 134baa1..d9d858c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,7 +19,7 @@ Key principles from experience: Write failing test first, make it pass, then refactor. The converter touches many layers (parsing, validation, rendering), so maintain both unit and integration coverage. Use the existing fixtures in `tests/fixtures/` instead of copying setup code across test files. ### CI/CD Standards -The full test suite (1,946 tests) runs on every change. Red builds block merges. Start with simple design, then refactor once tests protect the behavior. No module ownership—leave the code cleaner than you found it. +The full test suite (2,325 tests) runs on every change. Red builds block merges. PRs also run a coverage-delta job that measures coverage on modified source files (60% threshold). Start with simple design, then refactor once tests protect the behavior. No module ownership—leave the code cleaner than you found it. ### Error Handling Validate JSON immediately with `load_and_parse_json` and fail fast on missing fields. Reject bad CLI input before starting long conversions to avoid wasting time. Keep configuration validation and type hints in sync—this catches many issues during development instead of runtime. @@ -82,7 +82,10 @@ Added `scripts/interactive_demo.py` for customer demonstrations. It shares code **November 2025: MongoDB Library Integration** Replaced broken `robotframework-mongodblibrary` with modern `robot-mongodb-library`. Fixed import errors and updated generation mechanism across codebase to use `RobotMongoDBLibrary` instead of legacy `MongoDBLibrary`. Updated pattern matcher and keyword registry with correct function mappings. Created comprehensive TODO and GitHub issue (#82) for building proper Robot Framework-compatible MongoDB library. Fixed all linting issues including line length violations. -**Test status**: All 2,105+ tests pass with 0 skips. +**February 2026: CI/CD Modernization and Pre-commit (0.1.5)** +Upgraded all GitHub Actions workflows from `actions/checkout@v5` to `actions/checkout@v6` and added `config/**` to trigger paths. Added a `coverage-delta` CI job that measures test coverage on modified source files during PRs (60% minimum). Migrated `security.yml` from pip to uv with top-level permissions. Added `pre-commit` with `.pre-commit-config.yaml` and a dedicated CI workflow. The `make lint` target now runs `ruff format --check` and `pycodestyle` in addition to `ruff check` and `pydocstyle`. The `make validate` target runs `pre-commit run --all-files`. Removed `pyright` from `make typecheck` (mypy and ty remain). Consolidated duplicate `[Unreleased]` sections in CHANGELOG.md. + +**Test status**: All 2,325 tests pass with 0 skips. **Code quality**: Removed pylint from the project (now using ruff/mypy only) and improved test isolation. ## API Integration Enhancements @@ -92,7 +95,7 @@ The `ZephyrClient` adapts to different server configurations with automatic API ## CI/CD -Importobot works in CI/CD pipelines and supports headless environments with headless Chrome. +Importobot works in CI/CD pipelines and supports headless environments with headless Chrome. Pre-commit hooks run locally via `pre-commit run --all-files` and in CI via `.github/workflows/pre-commit.yml`. The `make validate` target includes pre-commit as a final check. ## MCP agent usage @@ -183,7 +186,7 @@ Following pandas model: ### Before Making Changes 1. Run existing tests: `make test` -2. Check code quality: `make lint` (runs ruff and mypy) +2. Check code quality: `make lint` (runs ruff, ruff format check, pycodestyle, and pydocstyle) 3. Understand existing architecture and patterns 4. Review public API impact if changing exposed functionality @@ -196,7 +199,7 @@ Following pandas model: ### After Changes 1. Run all tests: `make test` -2. Check code quality: `make lint` (runs ruff and mypy for linting and type checking) +2. Check code quality: `make lint` (runs ruff, ruff format check, pycodestyle, and pydocstyle) 3. Format code: `make format` 4. Clean artifacts: `make clean` or `make deep-clean` 5. Verify no regressions were introduced diff --git a/Makefile b/Makefile index a0a7363..8dee87a 100644 --- a/Makefile +++ b/Makefile @@ -100,6 +100,10 @@ lint: $(info $(NEWLINE)==================== Running linting ====================$(NEWLINE)) @echo "→ Running ruff (fast)..." @uv run ruff check . + @echo "→ Running ruff format check..." + @uv run ruff format --check . + @echo "→ Running pycodestyle..." + @uv run pycodestyle src/ tests/ scripts/ @echo "→ Running pydocstyle..." @uv run pydocstyle . @@ -114,7 +118,6 @@ format: typecheck: $(info $(NEWLINE)==================== Running type checking ====================$(NEWLINE)) uv run ty check . - uv run pyright uv run mypy -p importobot uv run mypy tests cd scripts && uv run mypy -p importobot_scripts @@ -141,6 +144,8 @@ validate: lint typecheck test @uv run bandit --version >/dev/null 2>&1 || { echo "WARNING: bandit unavailable. Run 'uv sync' to install dev dependencies"; exit 1; } @uv run bandit -r src/ -ll -f json -o bandit-report.json || { echo "WARNING: Security issues found! Check bandit-report.json"; exit 1; } @rm -f bandit-report.json + @echo "→ Running pre-commit hooks..." + @uv run pre-commit run --all-files --show-diff-on-failure || { echo "WARNING: Pre-commit checks failed"; exit 1; } $(info $(NEWLINE)All validation checks passed! Ready for PR review.$(NEWLINE)) # Cleanup diff --git a/README.md b/README.md index 52e8082..c7abe18 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,14 @@ Importobot is a Python package for converting test case exports from Zephyr, Tes ## What's new -**Recent Improvements:** -- **MongoDB Library Support**: Fixed MongoDB library integration by replacing broken `robotframework-mongodblibrary` with modern `robot-mongodb-library` -- **Type Safety**: Enhanced type checking and fixed enum conversion issues -- **Code Quality**: Improved linting compliance and code organization +**0.1.5:** +- **CI/CD**: Upgraded workflows to `actions/checkout@v6`, added PR coverage-delta job (60% threshold on modified files) +- **Pre-commit**: Added `.pre-commit-config.yaml` with linting and formatting hooks +- **Lint**: `make lint` now runs ruff, ruff format check, pycodestyle, and pydocstyle + +**0.1.4:** +- **MongoDB Library**: Replaced broken `robotframework-mongodblibrary` with `robot-mongodb-library` +- **Type Safety**: Fixed enum conversion issues in `base_generator.py` and `helpers.py` See the [changelog](CHANGELOG.md) for a full list of changes. diff --git a/config/intent_patterns.yaml b/config/intent_patterns.yaml new file mode 100644 index 0000000..de22afe --- /dev/null +++ b/config/intent_patterns.yaml @@ -0,0 +1,392 @@ +# Intent patterns for pattern matching engine +# Each pattern maps to an IntentType enum value with regex and priority + +patterns: + # Command execution (highest priority for specific commands) + - intent: FILE_STAT + pattern: '\bstat\b' + priority: 10 + + - intent: COMMAND_EXECUTION + pattern: '\b(?:initiate.*download|execute.*curl|run.*wget|curl|wget)\b' + priority: 10 + + - intent: COMMAND_EXECUTION + pattern: '\b(?:echo|hash|blake2bsum)\b' + priority: 9 + + - intent: COMMAND_EXECUTION + pattern: '\b(?:chmod|chown|stat|truncate|cp|rm|mkdir|rmdir|touch|ls|cat)\b' + priority: 9 + + # File operations (most specific patterns first) + - intent: FILE_EXISTS + pattern: '\b(?:verify|check|ensure).*file.*exists?\b' + priority: 8 + + - intent: FILE_REMOVE + pattern: '\b(?:remove|delete|clean).*file\b' + priority: 7 + + - intent: FILE_TRANSFER + pattern: '\b(?:get|retrieve|transfer).*file\b' + priority: 7 + + - intent: FILE_CREATION + pattern: '\b(?:create|write).*file\b' + priority: 7 + + - intent: FILE_TRANSFER + pattern: '\b(?:copy|move).*file\b' + priority: 6 + + - intent: FILE_EXISTS + pattern: '\b(?:file.*should.*exist|file.*exists)\b' + priority: 6 + + - intent: FILE_REMOVE + pattern: '\b(?:file.*should.*not.*exist|remove.*file)\b' + priority: 6 + + # Database operations (more specific patterns first) + - intent: DATABASE_CONNECT + pattern: '\b(?:connect|establish|open).*(?:database|db connection)\b' + priority: 8 + + - intent: DATABASE_EXECUTE + pattern: '\b(?:execute|run).*(?:sql|query)\b' + priority: 7 + + - intent: DATABASE_DISCONNECT + pattern: '\b(?:disconnect|close|terminate).*(?:database|db)\b' + priority: 6 + + - intent: DATABASE_MODIFY + pattern: '\b(?:insert|update|delete).*(?:record|row)\b' + priority: 6 + + - intent: DATABASE_ROW_COUNT + pattern: '\b(?:verify|check|validate).*(?:row|record).*count\b' + priority: 5 + + # SSH operations + - intent: SSH_CONNECT + pattern: '\b(?:open|establish|create|connect).*(?:ssh|connection|remote|server)\b' + priority: 7 + + - intent: SSH_CONNECT + pattern: '\bconnect.*to.*server\b' + priority: 7 + + - intent: SSH_CONNECT + pattern: '\bconnect.*to.*staging\b' + priority: 7 + + - intent: SSH_CONNECT + pattern: '\bconnect.*to.*production\b' + priority: 7 + + - intent: SSH_CONNECT + pattern: '\bconnect\b' + priority: 6 + + - intent: SSH_DISCONNECT + pattern: '\b(?:close|disconnect|terminate).*(?:connection|ssh|remote)\b' + priority: 6 + + - intent: SSH_EXECUTE + pattern: '\b(?:execute|run).*(?:command|ssh)\b' + priority: 7 + + - intent: SSH_EXECUTE + pattern: '\bstart.*extraction\b' + priority: 7 + + - intent: SSH_EXECUTE + pattern: '\bstart.*command\b' + priority: 7 + + - intent: SSH_LOGIN + pattern: '\blogin.*ssh\b' + priority: 7 + + - intent: SSH_LOGIN + pattern: '\bssh.*login\b' + priority: 7 + + - intent: SSH_LOGIN + pattern: '\blogin.*with.*key\b' + priority: 7 + + - intent: SSH_LOGIN + pattern: '\blogin.*with.*public.*key\b' + priority: 7 + + - intent: SSH_CONFIGURATION + pattern: '\bset.*ssh.*client.*configuration\b' + priority: 7 + + - intent: SSH_FILE_UPLOAD + pattern: '\bupload.*file\b' + priority: 7 + + - intent: SSH_FILE_UPLOAD + pattern: '\bput.*file\b' + priority: 7 + + - intent: SSH_FILE_DOWNLOAD + pattern: '\bdownload.*file\b' + priority: 7 + + - intent: SSH_FILE_DOWNLOAD + pattern: '\bget.*file\b' + priority: 7 + + - intent: SSH_DIRECTORY_CREATE + pattern: '\bcreate.*directory\b' + priority: 8 + + - intent: SSH_DIRECTORY_LIST + pattern: '\blist.*directory\b' + priority: 7 + + - intent: SSH_READ_UNTIL + pattern: '\bread.*until\b' + priority: 7 + + - intent: SSH_WRITE + pattern: '\bwrite\b' + priority: 7 + + - intent: SSH_ENABLE_LOGGING + pattern: '\benable.*logging\b' + priority: 7 + + - intent: SSH_SWITCH_CONNECTION + pattern: '\bswitch.*connection\b' + priority: 7 + + # More flexible SSH patterns that don't explicitly contain "ssh" + - intent: SSH_FILE_UPLOAD + pattern: '\bupload.*configuration.*file\b' + priority: 6 + + - intent: SSH_FILE_UPLOAD + pattern: '\bupload.*application.*archive\b' + priority: 6 + + - intent: SSH_READ_UNTIL + pattern: '\bwait.*for.*extraction\b' + priority: 6 + + - intent: SSH_READ_UNTIL + pattern: '\bwait.*for.*completion\b' + priority: 6 + + - intent: SSH_WRITE + pattern: '\bwrite.*deployment.*script\b' + priority: 6 + + - intent: SSH_READ_UNTIL + pattern: '\bread.*deployment.*output\b' + priority: 6 + + - intent: FILE_VERIFICATION + pattern: '\bverify.*file.*exists\b' + priority: 6 + + - intent: SSH_DIRECTORY_CREATE + pattern: '\blist.*deployment.*contents\b' + priority: 6 + + # Browser operations (higher priority than SSH patterns) + - intent: BROWSER_OPEN + pattern: '\b(?:open|navigate|visit|launch).*(?:browser|page|url|application)\b' + priority: 8 + + - intent: BROWSER_NAVIGATE + pattern: '\b(?:go to|navigate(?:\s+to)?)\b.*\b(?:url|page|site|screen|login|portal|dashboard|home)\b' + priority: 8 + + - intent: BROWSER_NAVIGATE + pattern: '\bnavigate(?:\s+to)?\s+(?:login|home|dashboard|portal|application|app)(?:\s+page|\s+screen)?\b' + priority: 6 + + - intent: INPUT_USERNAME + pattern: '\b(?:enter|input|type|fill).*(?:username|user\s*name|email|e-mail|email\s+address)\b' + priority: 5 + + - intent: INPUT_PASSWORD + pattern: '\b(?:enter|input|type|fill).*password\b' + priority: 5 + + - intent: CREDENTIAL_INPUT + pattern: '\b(?:enter|input|type|fill|provide).*(?:credentials?|login\s+(?:details|info))\b' + priority: 6 + + - intent: CLICK_ACTION + pattern: '\b(?:click|press|tap).*(?:button|element)\b' + priority: 4 + + - intent: CLICK_ACTION + pattern: '\bsubmit\b.*\b(?:form|button|login|request)\b' + priority: 5 + + - intent: CLICK_ACTION + pattern: '\b(?:click|press|tap)\b' + priority: 3 + + # Specific patterns for builtin assertions + - intent: VERIFY_CONTENT + pattern: '\bassert.*page.*contains?\b' + priority: 5 + + - intent: ASSERTION_CONTAINS + pattern: '\bassert.*contains?\b' + priority: 4 + + # Content verification + - intent: CONTENT_VERIFICATION + pattern: '\b(?:verify|check|ensure|assert|validate).*(?:content|contains|displays)\b' + priority: 3 + + - intent: CONTENT_VERIFICATION + pattern: '\b(?:validate|verify|check|ensure|assert)\b' + priority: 2 + + - intent: CONTENT_VERIFICATION + pattern: 'verify\s*:' + priority: 3 + + - intent: ELEMENT_VERIFICATION + pattern: 'element\s*:' + priority: 3 + + # API operations + - intent: API_REQUEST + pattern: '\b(?:make|send|perform).*(?:get|post|put|delete).*(?:request|api)\b' + priority: 5 + + - intent: API_SESSION + pattern: '\b(?:create|establish).*(?:session|api connection)\b' + priority: 4 + + - intent: API_RESPONSE + pattern: '\b(?:verify|check|validate).*(?:response|status)\b' + priority: 3 + + # Monitoring and performance + - intent: PERFORMANCE_MONITORING + pattern: '\b(?:monitor|measure|track).*(?:performance|metrics|load)\b' + priority: 3 + + - intent: PERFORMANCE_TESTING + pattern: '\b(?:test|execute).*(?:performance|load|stress)\b' + priority: 3 + + # Security operations + - intent: SECURITY_TESTING + pattern: '\b(?:security|authenticate|authorization|vulnerability)\b' + priority: 3 + + - intent: SECURITY_SCANNING + pattern: '\b(?:scan|penetration|security.*test)\b' + priority: 3 + + # BuiltIn conversion operations + - intent: CONVERT_TO_INTEGER + pattern: '\bconvert.*to.*integer\b' + priority: 4 + + - intent: CONVERT_TO_STRING + pattern: '\bconvert.*to.*string\b' + priority: 4 + + - intent: CONVERT_TO_BOOLEAN + pattern: '\bconvert.*to.*boolean\b' + priority: 4 + + - intent: CONVERT_TO_NUMBER + pattern: '\bconvert.*to.*number\b' + priority: 4 + + # BuiltIn variable operations + - intent: SET_VARIABLE + pattern: '\bset.*variable\b' + priority: 4 + + - intent: GET_VARIABLE + pattern: '\bget.*variable\b' + priority: 4 + + # BuiltIn collection operations + - intent: CREATE_LIST + pattern: '\bcreate.*list\b' + priority: 4 + + - intent: CREATE_DICTIONARY + pattern: '\bcreate.*dictionary\b' + priority: 4 + + - intent: GET_LENGTH + pattern: '\bget.*length\b' + priority: 4 + + - intent: LENGTH_SHOULD_BE + pattern: '\blength.*should.*be\b' + priority: 4 + + - intent: LENGTH_SHOULD_BE + pattern: '\bcheck.*length.*of.*collection\b' + priority: 4 + + - intent: SHOULD_START_WITH + pattern: '\bshould.*start.*with\b' + priority: 4 + + - intent: SHOULD_END_WITH + pattern: '\bshould.*end.*with\b' + priority: 4 + + - intent: SHOULD_MATCH + pattern: '\bshould.*match\b' + priority: 4 + + # BuiltIn evaluation and control flow + - intent: EVALUATE_EXPRESSION + pattern: '\bevaluate\b' + priority: 4 + + - intent: RUN_KEYWORD_IF + pattern: '\brun.*keyword.*if\b' + priority: 4 + + - intent: RUN_KEYWORD_IF + pattern: '\brun.*keyword.*conditionally\b' + priority: 4 + + - intent: REPEAT_KEYWORD + pattern: '\brepeat.*keyword\b' + priority: 4 + + - intent: FAIL_TEST + pattern: '\bfail\b' + priority: 4 + + - intent: GET_COUNT + pattern: '\bget.*count\b' + priority: 4 + + # BuiltIn logging + - intent: LOG_MESSAGE + pattern: '\blog.*message\b' + priority: 4 + + # BuiltIn string operations + - intent: SHOULD_START_WITH + pattern: '\bverify.*string.*starts.*with\b' + priority: 4 + + - intent: SHOULD_MATCH + pattern: '\bcheck.*string.*matches.*pattern\b' + priority: 4 diff --git a/pyproject.toml b/pyproject.toml index 2173dcc..aeb5c62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ members = [".", "scripts"] [project] name = "importobot" -version = "0.1.4" +version = "0.1.5" description = "Automated test framework converter for migrating test cases from Zephyr, JIRA/Xray, and TestLink to Robot Framework format with bulk processing capabilities" readme = "README.md" requires-python = ">=3.10" @@ -120,6 +120,7 @@ dev = [ "matplotlib>=3.10.7", "detect-secrets>=1.5.0", "bandit>=1.7.0", + "pre-commit>=4.0.0", "hypothesis>=6.142.4", "mutmut>=2.4.4", "pyright>=1.1.407", diff --git a/src/importobot/__init__.py b/src/importobot/__init__.py index 82b0170..b6967e1 100644 --- a/src/importobot/__init__.py +++ b/src/importobot/__init__.py @@ -89,7 +89,7 @@ def convert_directory(input_dir: str, output_dir: str) -> dict[str, Any]: "exceptions", ] -__version__ = "0.1.4" +__version__ = "0.1.5" # Clean up namespace - remove internal imports from dir() del _config, _exceptions, _api diff --git a/src/importobot/core/converter.py b/src/importobot/core/converter.py index d11d869..32ca9c0 100644 --- a/src/importobot/core/converter.py +++ b/src/importobot/core/converter.py @@ -124,14 +124,6 @@ def apply_conversion_suggestions( return json_data, [] -def apply_conversion_suggestions_simple( - json_data: dict[str, Any] | list[Any], -) -> dict[str, Any] | list[Any]: - """Apply improvements to JSON test data, returning only the modified data.""" - improved_data, _ = apply_conversion_suggestions(json_data) - return improved_data - - def save_robot_file(content: str, file_path: str) -> None: """Save Robot Framework content to a specified file.""" validate_string_content(content) diff --git a/src/importobot/core/pattern_matcher.py b/src/importobot/core/pattern_matcher.py index 0334155..ac2fa3b 100644 --- a/src/importobot/core/pattern_matcher.py +++ b/src/importobot/core/pattern_matcher.py @@ -4,9 +4,12 @@ from dataclasses import dataclass from enum import Enum from functools import lru_cache +from pathlib import Path from re import Pattern from typing import Any, ClassVar +import yaml + from importobot.medallion.bronze.evidence_accumulator import EvidenceItem from importobot.medallion.bronze.format_models import EvidenceWeight from importobot.medallion.interfaces.enums import EvidenceSource @@ -59,18 +62,19 @@ def get_conflict_groups(cls) -> dict[str, set["RobotFrameworkLibrary"]]: } @classmethod - def get_conflict_prone_libraries(cls) -> set["RobotFrameworkLibrary"]: + @lru_cache(maxsize=1) + def get_conflict_prone_libraries(cls) -> frozenset["RobotFrameworkLibrary"]: """ Get libraries that commonly have keyword conflicts requiring prefixes. Returns: - Set of libraries that need explicit prefixes for disambiguation + Frozenset of libraries that need explicit prefixes for disambiguation """ - conflict_prone = set() + conflict_prone: set[RobotFrameworkLibrary] = set() for group in cls.get_conflict_groups().values(): if len(group) > 1: # Only groups with actual conflicts conflict_prone.update(group) - return conflict_prone + return frozenset(conflict_prone) @classmethod def from_string(cls, library_name: str) -> "RobotFrameworkLibrary": @@ -207,430 +211,32 @@ def __init__(self) -> None: self._intent_cache: dict[str, IntentType | None] = {} def _build_patterns(self) -> list[IntentPattern]: - """Build list of intent patterns.""" - return [ - # Command execution (highest priority for specific commands) - IntentPattern(IntentType.FILE_STAT, r"\bstat\b", priority=10), - IntentPattern( - IntentType.COMMAND_EXECUTION, - r"\b(?:initiate.*download|execute.*curl|run.*wget|curl|wget)\b", - priority=10, - ), - IntentPattern( - IntentType.COMMAND_EXECUTION, - r"\b(?:echo|hash|blake2bsum)\b", - priority=9, - ), - IntentPattern( - IntentType.COMMAND_EXECUTION, - r"\b(?:chmod|chown|stat|truncate|cp|rm|mkdir|rmdir|touch|ls|cat)\b", - priority=9, - ), - # File operations (most specific patterns first) - IntentPattern( - IntentType.FILE_EXISTS, - r"\b(?:verify|check|ensure).*file.*exists?\b", - priority=8, - ), - IntentPattern( - IntentType.FILE_REMOVE, r"\b(?:remove|delete|clean).*file\b", priority=7 - ), - IntentPattern( - IntentType.FILE_TRANSFER, - r"\b(?:get|retrieve|transfer).*file\b", - priority=7, - ), - IntentPattern( - IntentType.FILE_CREATION, - r"\b(?:create|write).*file\b", - priority=7, - ), - IntentPattern( - IntentType.FILE_TRANSFER, - r"\b(?:copy|move).*file\b", - priority=6, - ), - IntentPattern( - IntentType.FILE_EXISTS, - r"\b(?:file.*should.*exist|file.*exists)\b", - priority=6, - ), - IntentPattern( - IntentType.FILE_REMOVE, - r"\b(?:file.*should.*not.*exist|remove.*file)\b", - priority=6, - ), - # Database operations (more specific patterns first) - IntentPattern( - IntentType.DATABASE_CONNECT, - r"\b(?:connect|establish|open).*(?:database|db connection)\b", - priority=8, - ), - IntentPattern( - IntentType.DATABASE_EXECUTE, - r"\b(?:execute|run).*(?:sql|query)\b", - priority=7, - ), - IntentPattern( - IntentType.DATABASE_DISCONNECT, - r"\b(?:disconnect|close|terminate).*(?:database|db)\b", - priority=6, - ), - IntentPattern( - IntentType.DATABASE_MODIFY, - r"\b(?:insert|update|delete).*(?:record|row)\b", - priority=6, - ), - IntentPattern( - IntentType.DATABASE_ROW_COUNT, - r"\b(?:verify|check|validate).*(?:row|record).*count\b", - priority=5, - ), - # SSH operations - IntentPattern( - IntentType.SSH_CONNECT, - r"\b(?:open|establish|create|connect).*" - r"(?:ssh|connection|remote|server)\b", - priority=7, - ), - IntentPattern( - IntentType.SSH_CONNECT, r"\bconnect.*to.*server\b", priority=7 - ), - IntentPattern( - IntentType.SSH_CONNECT, r"\bconnect.*to.*staging\b", priority=7 - ), - IntentPattern( - IntentType.SSH_CONNECT, r"\bconnect.*to.*production\b", priority=7 - ), - IntentPattern(IntentType.SSH_CONNECT, r"\bconnect\b", priority=6), - IntentPattern( - IntentType.SSH_DISCONNECT, - r"\b(?:close|disconnect|terminate).*(?:connection|ssh|remote)\b", - priority=6, - ), - IntentPattern( - IntentType.SSH_EXECUTE, - r"\b(?:execute|run).*(?:command|ssh)\b", - priority=7, - ), - IntentPattern(IntentType.SSH_EXECUTE, r"\bstart.*extraction\b", priority=7), - IntentPattern(IntentType.SSH_EXECUTE, r"\bstart.*command\b", priority=7), - IntentPattern(IntentType.SSH_LOGIN, r"\blogin.*ssh\b", priority=7), - IntentPattern(IntentType.SSH_LOGIN, r"\bssh.*login\b", priority=7), - IntentPattern(IntentType.SSH_LOGIN, r"\blogin.*with.*key\b", priority=7), - IntentPattern( - IntentType.SSH_LOGIN, r"\blogin.*with.*public.*key\b", priority=7 - ), - IntentPattern( - IntentType.SSH_CONFIGURATION, - r"\bset.*ssh.*client.*configuration\b", - priority=7, - ), - IntentPattern(IntentType.SSH_FILE_UPLOAD, r"\bupload.*file\b", priority=7), - IntentPattern(IntentType.SSH_FILE_UPLOAD, r"\bput.*file\b", priority=7), - IntentPattern( - IntentType.SSH_FILE_DOWNLOAD, r"\bdownload.*file\b", priority=7 - ), - IntentPattern(IntentType.SSH_FILE_DOWNLOAD, r"\bget.*file\b", priority=7), - IntentPattern( - IntentType.SSH_DIRECTORY_CREATE, r"\bcreate.*directory\b", priority=8 - ), - IntentPattern( - IntentType.SSH_DIRECTORY_LIST, r"\blist.*directory\b", priority=7 - ), - IntentPattern(IntentType.SSH_READ_UNTIL, r"\bread.*until\b", priority=7), - IntentPattern(IntentType.SSH_WRITE, r"\bwrite\b", priority=7), - IntentPattern( - IntentType.SSH_ENABLE_LOGGING, r"\benable.*logging\b", priority=7 - ), - IntentPattern( - IntentType.SSH_SWITCH_CONNECTION, r"\bswitch.*connection\b", priority=7 - ), - # More flexible SSH patterns that don't explicitly contain "ssh" - IntentPattern( - IntentType.SSH_FILE_UPLOAD, - r"\bupload.*configuration.*file\b", - priority=6, - ), - IntentPattern( - IntentType.SSH_FILE_UPLOAD, - r"\bupload.*application.*archive\b", - priority=6, - ), - IntentPattern( - IntentType.SSH_READ_UNTIL, r"\bwait.*for.*extraction\b", priority=6 - ), - IntentPattern( - IntentType.SSH_READ_UNTIL, r"\bwait.*for.*completion\b", priority=6 - ), - IntentPattern( - IntentType.SSH_WRITE, r"\bwrite.*deployment.*script\b", priority=6 - ), - IntentPattern( - IntentType.SSH_READ_UNTIL, r"\bread.*deployment.*output\b", priority=6 - ), - IntentPattern( - IntentType.FILE_VERIFICATION, r"\bverify.*file.*exists\b", priority=6 - ), - IntentPattern( - IntentType.SSH_DIRECTORY_CREATE, - r"\blist.*deployment.*contents\b", - priority=6, - ), - # Browser operations (higher priority than SSH patterns) - IntentPattern( - IntentType.BROWSER_OPEN, - r"\b(?:open|navigate|visit|launch).*(?:browser|page|url|application)\b", - priority=8, - ), - IntentPattern( - IntentType.BROWSER_NAVIGATE, - ( - r"\b(?:go to|navigate(?:\s+to)?)\b.*\b(?:url|page|site|screen|" - r"login|portal|dashboard|home)\b" - ), - priority=8, - ), - IntentPattern( - IntentType.BROWSER_NAVIGATE, - ( - r"\bnavigate(?:\s+to)?\s+(?:login|home|dashboard|portal|" - r"application|app)(?:\s+page|\s+screen)?\b" - ), - priority=6, - ), - IntentPattern( - IntentType.INPUT_USERNAME, - ( - r"\b(?:enter|input|type|fill).*(?:username|user\s*name|email|" - r"e-mail|email\s+address)\b" - ), - priority=5, - ), - IntentPattern( - IntentType.INPUT_PASSWORD, - r"\b(?:enter|input|type|fill).*password\b", - priority=5, - ), - IntentPattern( - IntentType.CREDENTIAL_INPUT, - r"\b(?:enter|input|type|fill|provide).*" - r"(?:credentials?|login\s+(?:details|info))\b", - priority=6, # Higher priority than individual username/password - ), - IntentPattern( - IntentType.CLICK_ACTION, - r"\b(?:click|press|tap).*(?:button|element)\b", - priority=4, - ), - IntentPattern( - IntentType.CLICK_ACTION, - r"\bsubmit\b.*\b(?:form|button|login|request)\b", - priority=5, - ), - IntentPattern( - IntentType.CLICK_ACTION, - r"\b(?:click|press|tap)\b", - priority=3, - ), - # Specific patterns for builtin assertions - IntentPattern( - IntentType.VERIFY_CONTENT, - r"\bassert.*page.*contains?\b", - priority=5, - ), - IntentPattern( - IntentType.ASSERTION_CONTAINS, - r"\bassert.*contains?\b", - priority=4, - ), - # Content verification - IntentPattern( - IntentType.CONTENT_VERIFICATION, - ( - r"\b(?:verify|check|ensure|assert|validate)" - r".*(?:content|contains|displays)\b" - ), - priority=3, - ), - # General validation pattern (audit trails, compliance checks, etc.) - IntentPattern( - IntentType.CONTENT_VERIFICATION, - r"\b(?:validate|verify|check|ensure|assert)\b", - priority=2, - ), - # Specific verification format - IntentPattern( - IntentType.CONTENT_VERIFICATION, - r"verify\s*:", - priority=3, - ), - # Element verification format - IntentPattern( - IntentType.ELEMENT_VERIFICATION, - r"element\s*:", - priority=3, - ), - # API operations - IntentPattern( - IntentType.API_REQUEST, - r"\b(?:make|send|perform).*(?:get|post|put|delete).*(?:request|api)\b", - priority=5, - ), - IntentPattern( - IntentType.API_SESSION, - r"\b(?:create|establish).*(?:session|api connection)\b", - priority=4, - ), - IntentPattern( - IntentType.API_RESPONSE, - r"\b(?:verify|check|validate).*(?:response|status)\b", - priority=3, - ), - # Monitoring and performance - IntentPattern( - IntentType.PERFORMANCE_MONITORING, - r"\b(?:monitor|measure|track).*(?:performance|metrics|load)\b", - priority=3, - ), - IntentPattern( - IntentType.PERFORMANCE_TESTING, - r"\b(?:test|execute).*(?:performance|load|stress)\b", - priority=3, - ), - # Security operations - IntentPattern( - IntentType.SECURITY_TESTING, - r"\b(?:security|authenticate|authorization|vulnerability)\b", - priority=3, - ), - IntentPattern( - IntentType.SECURITY_SCANNING, - r"\b(?:scan|penetration|security.*test)\b", - priority=3, - ), - # BuiltIn conversion operations - IntentPattern( - IntentType.CONVERT_TO_INTEGER, - r"\bconvert.*to.*integer\b", - priority=4, - ), - IntentPattern( - IntentType.CONVERT_TO_STRING, - r"\bconvert.*to.*string\b", - priority=4, - ), - IntentPattern( - IntentType.CONVERT_TO_BOOLEAN, - r"\bconvert.*to.*boolean\b", - priority=4, - ), - IntentPattern( - IntentType.CONVERT_TO_NUMBER, - r"\bconvert.*to.*number\b", - priority=4, - ), - # BuiltIn variable operations - IntentPattern( - IntentType.SET_VARIABLE, - r"\bset.*variable\b", - priority=4, - ), - IntentPattern( - IntentType.GET_VARIABLE, - r"\bget.*variable\b", - priority=4, - ), - # BuiltIn collection operations - IntentPattern( - IntentType.CREATE_LIST, - r"\bcreate.*list\b", - priority=4, - ), - IntentPattern( - IntentType.CREATE_DICTIONARY, - r"\bcreate.*dictionary\b", - priority=4, - ), - IntentPattern( - IntentType.GET_LENGTH, - r"\bget.*length\b", - priority=4, - ), - IntentPattern( - IntentType.LENGTH_SHOULD_BE, - r"\blength.*should.*be\b", - priority=4, - ), - IntentPattern( - IntentType.LENGTH_SHOULD_BE, - r"\bcheck.*length.*of.*collection\b", - priority=4, - ), - IntentPattern( - IntentType.SHOULD_START_WITH, - r"\bshould.*start.*with\b", - priority=4, - ), - IntentPattern( - IntentType.SHOULD_END_WITH, - r"\bshould.*end.*with\b", - priority=4, - ), - IntentPattern( - IntentType.SHOULD_MATCH, - r"\bshould.*match\b", - priority=4, - ), - # BuiltIn evaluation and control flow - IntentPattern( - IntentType.EVALUATE_EXPRESSION, - r"\bevaluate\b", - priority=4, - ), - IntentPattern( - IntentType.RUN_KEYWORD_IF, - r"\brun.*keyword.*if\b", - priority=4, - ), - IntentPattern( - IntentType.RUN_KEYWORD_IF, - r"\brun.*keyword.*conditionally\b", - priority=4, - ), - IntentPattern( - IntentType.REPEAT_KEYWORD, - r"\brepeat.*keyword\b", - priority=4, - ), - IntentPattern( - IntentType.FAIL_TEST, - r"\bfail\b", - priority=4, - ), - IntentPattern( - IntentType.GET_COUNT, - r"\bget.*count\b", - priority=4, - ), - # BuiltIn logging - IntentPattern( - IntentType.LOG_MESSAGE, - r"\blog.*message\b", - priority=4, - ), - # BuiltIn string operations - IntentPattern( - IntentType.SHOULD_START_WITH, - r"\bverify.*string.*starts.*with\b", - priority=4, - ), - IntentPattern( - IntentType.SHOULD_MATCH, - r"\bcheck.*string.*matches.*pattern\b", - priority=4, - ), - ] + """Build list of intent patterns from YAML configuration.""" + return self._load_patterns_from_yaml() + + def _load_patterns_from_yaml(self) -> list[IntentPattern]: + """Load intent patterns from YAML configuration file.""" + config_path = ( + Path(__file__).parent.parent.parent.parent + / "config" + / "intent_patterns.yaml" + ) + + with open(config_path, encoding="utf-8") as f: + config = yaml.safe_load(f) + + patterns = [] + for entry in config.get("patterns", []): + intent_name = entry["intent"] + intent_type = IntentType[intent_name] + patterns.append( + IntentPattern( + intent_type=intent_type, + pattern=entry["pattern"], + priority=entry.get("priority", 5), + ) + ) + return patterns def detect_intent(self, text: str) -> IntentType | None: """Detect the primary intent from text.""" diff --git a/src/importobot/medallion/bronze/independent_bayesian_scorer.py b/src/importobot/medallion/bronze/independent_bayesian_scorer.py index 95a8a9d..fc8c6ac 100644 --- a/src/importobot/medallion/bronze/independent_bayesian_scorer.py +++ b/src/importobot/medallion/bronze/independent_bayesian_scorer.py @@ -22,7 +22,6 @@ P_E_NOT_H_LEARNED, P_E_NOT_H_MODE, ) -from .test_case_complexity_analyzer import ComplexityMetrics logger = get_logger() @@ -557,54 +556,6 @@ def _calculate_evidence_strength(self, metrics: EvidenceMetrics | None) -> float # Ensure reasonable bounds return max(0.5, min(2.0, evidence_strength)) - def apply_complexity_amplification( - self, - likelihoods: dict[str, float], - complexity_metrics: dict[str, ComplexityMetrics | None], - ) -> dict[str, float]: - """Apply complexity-based amplification to likelihoods. - - This method enhances discriminative power for complex test cases - while maintaining mathematical soundness through controlled amplification. - - Mathematical Principle: - P_enhanced = P_base * (1 + alpha * complexity_score) - Where alpha is the complexity amplification factor. - - Args: - likelihoods: Base likelihoods for each format - complexity_metrics: Complexity metrics for each format - - Returns: - Enhanced likelihoods with complexity amplification applied - """ - enhanced_likelihoods = likelihoods.copy() - - for format_name, base_likelihood in likelihoods.items(): - complexity = complexity_metrics.get(format_name) - - if complexity: - # Calculate complexity amplification (1.0 to 1.3 max) - amplification = 1.0 + min(complexity.complexity_score * 0.3, 0.3) - - # Apply amplification - enhanced_likelihood = base_likelihood * amplification - - # Cap at maximum allowed likelihood - enhanced_likelihood = min(enhanced_likelihood, 0.95) - - enhanced_likelihoods[format_name] = enhanced_likelihood - - logger.debug( - "Applied %.3fx complexity amplification to %s: %.3f -> %.3f", - amplification, - format_name, - base_likelihood, - enhanced_likelihood, - ) - - return enhanced_likelihoods - def calculate_discriminative_score(self, metrics: EvidenceMetrics) -> float: """Calculate discriminative score emphasizing unique evidence. diff --git a/src/importobot/medallion/bronze_layer.py b/src/importobot/medallion/bronze_layer.py index 85c4cf8..8166b56 100644 --- a/src/importobot/medallion/bronze_layer.py +++ b/src/importobot/medallion/bronze_layer.py @@ -39,6 +39,15 @@ logger = get_logger() +def _resolve_positive_config(value: int | None, default: int, name: str) -> int: + """Resolve config with validation, falling back to default if invalid.""" + resolved = value if value is not None else default + if resolved < 1: + logger.warning("%s %d must be >= 1; using default %d", name, resolved, default) + return default + return resolved + + @dataclass(slots=True) class _FilterContext: """Container for record attributes used during filter evaluation.""" @@ -79,23 +88,15 @@ def __init__( """ super().__init__("bronze", storage_path) self.storage_backend = storage_backend - resolved_max = ( - max_in_memory_records - if max_in_memory_records is not None - else BRONZE_LAYER_MAX_IN_MEMORY_RECORDS + self._max_in_memory_records = _resolve_positive_config( + max_in_memory_records, + BRONZE_LAYER_MAX_IN_MEMORY_RECORDS, + "BronzeLayer max_in_memory_records", ) - if resolved_max < 1: - logger.warning( - "BronzeLayer max_in_memory_records %d must be >= 1; using default %d", - resolved_max, - BRONZE_LAYER_MAX_IN_MEMORY_RECORDS, - ) - resolved_max = BRONZE_LAYER_MAX_IN_MEMORY_RECORDS - self._max_in_memory_records = resolved_max - resolved_ttl = ( - in_memory_ttl_seconds - if in_memory_ttl_seconds is not None - else BRONZE_LAYER_IN_MEMORY_TTL_SECONDS + resolved_ttl = _resolve_positive_config( + in_memory_ttl_seconds, + BRONZE_LAYER_IN_MEMORY_TTL_SECONDS, + "BronzeLayer in_memory_ttl_seconds", ) self._in_memory_ttl_seconds: int | None = ( resolved_ttl if resolved_ttl > 0 else None diff --git a/tests/unit/medallion/bronze/test_resolve_positive_config.py b/tests/unit/medallion/bronze/test_resolve_positive_config.py new file mode 100644 index 0000000..5ea41f4 --- /dev/null +++ b/tests/unit/medallion/bronze/test_resolve_positive_config.py @@ -0,0 +1,89 @@ +"""Tests for the _resolve_positive_config helper in bronze_layer.py.""" + +import logging + +import pytest + +from importobot.medallion.bronze_layer import ( + _resolve_positive_config, # type: ignore[attr-defined] +) + + +class TestResolvePositiveConfig: + """Unit tests for the _resolve_positive_config module-level helper.""" + + def test_none_value_falls_back_to_default(self) -> None: + """When value is None the default must be returned unchanged.""" + result = _resolve_positive_config(None, 100, "param") + + assert result == 100 + + def test_valid_positive_value_is_returned_as_is(self) -> None: + """A positive integer value must be returned without modification.""" + result = _resolve_positive_config(42, 100, "param") + + assert result == 42 + + def test_value_of_exactly_one_is_accepted(self) -> None: + """Exactly 1 is the minimum valid positive integer; kept as-is.""" + result = _resolve_positive_config(1, 100, "param") + + assert result == 1 + + def test_value_less_than_one_returns_default(self) -> None: + """A value below 1 is invalid; the default must be returned.""" + result = _resolve_positive_config(0, 100, "param") + + assert result == 100 + + def test_negative_value_returns_default(self) -> None: + """A negative value is invalid; the default must be returned.""" + result = _resolve_positive_config(-5, 50, "param") + + assert result == 50 + + def test_value_less_than_one_triggers_warning_log( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """A value below 1 must emit a WARNING-level log message.""" + with caplog.at_level(logging.WARNING): + _resolve_positive_config(0, 100, "max_records") + + assert len(caplog.records) >= 1 + warning_messages = [ + r.message for r in caplog.records if r.levelno == logging.WARNING + ] + assert any("max_records" in msg for msg in warning_messages) + + def test_warning_log_includes_invalid_value_and_default( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """Warning log mentions the invalid value and the fallback.""" + with caplog.at_level(logging.WARNING): + _resolve_positive_config(-3, 200, "ttl_seconds") + + warning_text = " ".join( + r.message for r in caplog.records if r.levelno == logging.WARNING + ) + assert "-3" in warning_text + assert "200" in warning_text + + def test_valid_value_does_not_trigger_warning_log( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """No warning emitted when a valid positive value is supplied.""" + with caplog.at_level(logging.WARNING): + _resolve_positive_config(10, 100, "param") + + warning_records = [r for r in caplog.records if r.levelno == logging.WARNING] + assert warning_records == [] + + def test_none_value_does_not_trigger_warning_log( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """Falling back from None to default is silent (no warning).""" + with caplog.at_level(logging.WARNING): + _resolve_positive_config(None, 100, "param") + + warning_records = [r for r in caplog.records if r.levelno == logging.WARNING] + assert warning_records == [] diff --git a/tests/unit/test_pattern_matcher.py b/tests/unit/test_pattern_matcher.py index 0d16dbf..8f7a0d6 100644 --- a/tests/unit/test_pattern_matcher.py +++ b/tests/unit/test_pattern_matcher.py @@ -8,6 +8,7 @@ IntentType, LibraryDetector, PatternMatcher, + RobotFrameworkLibrary, ) @@ -418,3 +419,48 @@ def test_library_detector_integration_with_pattern_matcher(self) -> None: # Should detect appropriate libraries for the intent assert "SSHLibrary" in libraries # For SSH upload assert "OperatingSystem" in libraries # For file verification + + +class TestConflictProneLibrariesCache: + """Tests for RobotFrameworkLibrary.get_conflict_prone_libraries caching behavior.""" + + def test_returns_frozenset(self) -> None: + """get_conflict_prone_libraries should return a frozenset, not a plain set.""" + result = RobotFrameworkLibrary.get_conflict_prone_libraries() + + assert isinstance(result, frozenset) + + def test_contains_expected_conflict_prone_libraries(self) -> None: + """Libraries belonging to conflict groups with >1 member must be included.""" + result = RobotFrameworkLibrary.get_conflict_prone_libraries() + + # The 'web_automation' group contains SeleniumLibrary and AppiumLibrary. + assert RobotFrameworkLibrary.SELENIUM_LIBRARY in result + assert RobotFrameworkLibrary.APPIUM_LIBRARY in result + + def test_cache_returns_same_object_identity(self) -> None: + """Calling the method twice must return the exact same object (lru_cache).""" + first = RobotFrameworkLibrary.get_conflict_prone_libraries() + second = RobotFrameworkLibrary.get_conflict_prone_libraries() + + assert first is second + + def test_result_is_immutable(self) -> None: + """frozenset must raise TypeError on attempted mutation.""" + result = RobotFrameworkLibrary.get_conflict_prone_libraries() + + try: + result.add(RobotFrameworkLibrary.SSH_LIBRARY) # type: ignore[attr-defined] + raise AssertionError("Expected AttributeError was not raised") + except AttributeError: + pass + + def test_single_member_groups_are_excluded(self) -> None: + """Libraries in conflict groups with only one member must not be included.""" + conflict_groups = RobotFrameworkLibrary.get_conflict_groups() + result = RobotFrameworkLibrary.get_conflict_prone_libraries() + + for group in conflict_groups.values(): + if len(group) == 1: + lone_member = next(iter(group)) + assert lone_member not in result diff --git a/tests/unit/test_pattern_matcher_yaml_loading.py b/tests/unit/test_pattern_matcher_yaml_loading.py new file mode 100644 index 0000000..2432cf5 --- /dev/null +++ b/tests/unit/test_pattern_matcher_yaml_loading.py @@ -0,0 +1,312 @@ +"""Tests for YAML-based pattern loading in PatternMatcher.""" + +import re +from pathlib import Path +from unittest.mock import patch + +import pytest +import yaml + +from importobot.core.pattern_matcher import IntentType, PatternMatcher + +EXPECTED_PATTERN_COUNT = 93 +YAML_CONFIG_PATH = ( + Path(__file__).parent.parent.parent + / "src" + / "importobot" + / "core" + / "pattern_matcher.py" +) +YAML_FILE_PATH = Path(__file__).parent.parent.parent / "config" / "intent_patterns.yaml" + + +class TestYamlFileExists: + """Verify the YAML configuration file exists and is loadable.""" + + def test_yaml_file_exists(self) -> None: + """config/intent_patterns.yaml must exist on disk.""" + assert YAML_FILE_PATH.exists(), f"Expected YAML config at {YAML_FILE_PATH}" + + def test_yaml_file_loads_without_error(self) -> None: + """Loading the YAML file must not raise any exception.""" + with open(YAML_FILE_PATH, encoding="utf-8") as f: + config = yaml.safe_load(f) + assert config is not None + + def test_yaml_file_has_patterns_key(self) -> None: + """Top-level 'patterns' key must be present in the YAML.""" + with open(YAML_FILE_PATH, encoding="utf-8") as f: + config = yaml.safe_load(f) + assert "patterns" in config, "YAML must have a top-level 'patterns' key" + assert isinstance(config["patterns"], list) + + +class TestPatternCount: + """Verify the expected number of patterns is present in the YAML.""" + + def test_pattern_count_is_93(self) -> None: + """YAML must contain exactly 93 pattern entries.""" + with open(YAML_FILE_PATH, encoding="utf-8") as f: + config = yaml.safe_load(f) + entries = config.get("patterns", []) + assert len(entries) == EXPECTED_PATTERN_COUNT, ( + f"Expected {EXPECTED_PATTERN_COUNT} patterns, got {len(entries)}" + ) + + def test_pattern_matcher_loads_all_patterns(self) -> None: + """PatternMatcher must hold the same count of patterns as the YAML.""" + matcher = PatternMatcher() + assert len(matcher.patterns) == EXPECTED_PATTERN_COUNT, ( + f"PatternMatcher loaded {len(matcher.patterns)} patterns, " + f"expected {EXPECTED_PATTERN_COUNT}" + ) + + +class TestRequiredFields: + """Verify every pattern entry contains the required fields.""" + + @pytest.fixture(scope="class") + def yaml_entries(self) -> list[dict]: + with open(YAML_FILE_PATH, encoding="utf-8") as f: + config = yaml.safe_load(f) + return config.get("patterns", []) + + def test_every_entry_has_intent_field(self, yaml_entries: list[dict]) -> None: + """Each entry must have an 'intent' field.""" + missing = [i for i, entry in enumerate(yaml_entries) if "intent" not in entry] + assert not missing, f"Entries at indices {missing} are missing 'intent'" + + def test_every_entry_has_pattern_field(self, yaml_entries: list[dict]) -> None: + """Each entry must have a 'pattern' field.""" + missing = [i for i, entry in enumerate(yaml_entries) if "pattern" not in entry] + assert not missing, f"Entries at indices {missing} are missing 'pattern'" + + def test_every_entry_has_priority_field(self, yaml_entries: list[dict]) -> None: + """Each entry must have a 'priority' field.""" + missing = [i for i, entry in enumerate(yaml_entries) if "priority" not in entry] + assert not missing, f"Entries at indices {missing} are missing 'priority'" + + def test_intent_fields_are_strings(self, yaml_entries: list[dict]) -> None: + """All 'intent' values must be non-empty strings.""" + bad = [ + i + for i, entry in enumerate(yaml_entries) + if not isinstance(entry.get("intent"), str) or not entry["intent"].strip() + ] + assert not bad, f"Entries at indices {bad} have invalid 'intent' values" + + def test_pattern_fields_are_strings(self, yaml_entries: list[dict]) -> None: + """All 'pattern' values must be non-empty strings.""" + bad = [ + i + for i, entry in enumerate(yaml_entries) + if not isinstance(entry.get("pattern"), str) or not entry["pattern"].strip() + ] + assert not bad, f"Entries at indices {bad} have invalid 'pattern' values" + + def test_priority_fields_are_integers(self, yaml_entries: list[dict]) -> None: + """All 'priority' values must be integers.""" + bad = [ + i + for i, entry in enumerate(yaml_entries) + if not isinstance(entry.get("priority"), int) + ] + assert not bad, f"Entries at indices {bad} have non-integer 'priority' values" + + +class TestPatternIntegrity: + """Verify every pattern in the YAML compiles to a valid regex.""" + + @pytest.fixture(scope="class") + def yaml_entries(self) -> list[dict]: + with open(YAML_FILE_PATH, encoding="utf-8") as f: + config = yaml.safe_load(f) + return config.get("patterns", []) + + def test_all_patterns_compile_to_valid_regex( + self, yaml_entries: list[dict] + ) -> None: + """Every 'pattern' string must compile without error using re.IGNORECASE.""" + invalid = [] + for i, entry in enumerate(yaml_entries): + pattern_str = entry.get("pattern", "") + try: + re.compile(pattern_str, re.IGNORECASE) + except re.error as exc: + invalid.append((i, pattern_str, str(exc))) + + assert not invalid, "Invalid regex patterns found:\n" + "\n".join( + f" [{i}] {p!r}: {e}" for i, p, e in invalid + ) + + def test_all_intent_names_map_to_valid_enum_values( + self, yaml_entries: list[dict] + ) -> None: + """Every 'intent' value must correspond to an IntentType enum member.""" + unknown = [] + valid_names = {member.name for member in IntentType} + for i, entry in enumerate(yaml_entries): + intent_name = entry.get("intent", "") + if intent_name not in valid_names: + unknown.append((i, intent_name)) + + assert not unknown, "Unknown IntentType names found in YAML:\n" + "\n".join( + f" [{i}] {name!r}" for i, name in unknown + ) + + +class TestFallbackBehavior: + """Verify error behavior when the YAML file is missing.""" + + def test_missing_yaml_raises_file_not_found(self, tmp_path: Path) -> None: + """_load_patterns_from_yaml must raise FileNotFoundError.""" + missing = tmp_path / "nonexistent" / "intent_patterns.yaml" + + def patched(self): # type: ignore[no-untyped-def] + with open(missing, encoding="utf-8") as f: + yaml.safe_load(f) + return [] + + with ( + patch.object(PatternMatcher, "_load_patterns_from_yaml", patched), + pytest.raises(FileNotFoundError), + ): + PatternMatcher() + + def test_missing_yaml_error_message_is_informative(self, tmp_path: Path) -> None: + """FileNotFoundError for a missing YAML should include the path.""" + nonexistent = tmp_path / "gone.yaml" + + def patched(self): # type: ignore[no-untyped-def] + with open(nonexistent, encoding="utf-8") as f: + yaml.safe_load(f) + return [] + + with ( + patch.object(PatternMatcher, "_load_patterns_from_yaml", patched), + pytest.raises(FileNotFoundError) as exc_info, + ): + PatternMatcher() + assert str(nonexistent) in str(exc_info.value) + + +class TestRoundTripConsistency: + """Patterns from YAML produce expected matching behavior.""" + + @pytest.fixture(scope="class") + def matcher(self) -> PatternMatcher: + return PatternMatcher() + + # SSH intents + @pytest.mark.parametrize( + ("text", "expected_intent"), + [ + ("open ssh connection to server", IntentType.SSH_CONNECT), + ("establish remote connection", IntentType.SSH_CONNECT), + ("connect to staging", IntentType.SSH_CONNECT), + ("connect to production", IntentType.SSH_CONNECT), + ("close ssh connection", IntentType.SSH_DISCONNECT), + ("disconnect from remote", IntentType.SSH_DISCONNECT), + ("upload file to remote", IntentType.SSH_FILE_UPLOAD), + ("download file from server", IntentType.SSH_FILE_DOWNLOAD), + ("create directory on server", IntentType.SSH_DIRECTORY_CREATE), + ("list directory contents", IntentType.SSH_DIRECTORY_LIST), + ("login ssh with key", IntentType.SSH_LOGIN), + ("enable logging on connection", IntentType.SSH_ENABLE_LOGGING), + ("switch connection to backup", IntentType.SSH_SWITCH_CONNECTION), + ("read until prompt appears", IntentType.SSH_READ_UNTIL), + ], + ) + def test_ssh_intent_detection( + self, matcher: PatternMatcher, text: str, expected_intent: IntentType + ) -> None: + """SSH-related texts must resolve to the correct SSH intent.""" + assert matcher.detect_intent(text) == expected_intent, ( + f"{text!r} should map to {expected_intent}, " + f"got {matcher.detect_intent(text)}" + ) + + # File operation intents + @pytest.mark.parametrize( + ("text", "expected_intent"), + [ + ("verify file exists on disk", IntentType.FILE_EXISTS), + ("check file exists", IntentType.FILE_EXISTS), + ("remove file from directory", IntentType.FILE_REMOVE), + ("delete file after test", IntentType.FILE_REMOVE), + ("get file from server", IntentType.FILE_TRANSFER), + ("create a new file here", IntentType.FILE_CREATION), + ], + ) + def test_file_operation_intent_detection( + self, matcher: PatternMatcher, text: str, expected_intent: IntentType + ) -> None: + """File operation texts must resolve to the correct file intent.""" + assert matcher.detect_intent(text) == expected_intent, ( + f"{text!r} should map to {expected_intent}, " + f"got {matcher.detect_intent(text)}" + ) + + # Database intents + @pytest.mark.parametrize( + ("text", "expected_intent"), + [ + ("connect to database", IntentType.DATABASE_CONNECT), + ("establish db connection", IntentType.DATABASE_CONNECT), + ("execute sql query", IntentType.DATABASE_EXECUTE), + ("run query against db", IntentType.DATABASE_EXECUTE), + ("disconnect from database", IntentType.DATABASE_DISCONNECT), + ("insert new record into table", IntentType.DATABASE_MODIFY), + ("update record in table", IntentType.DATABASE_MODIFY), + ], + ) + def test_database_intent_detection( + self, matcher: PatternMatcher, text: str, expected_intent: IntentType + ) -> None: + """Database texts must resolve to the correct database intent.""" + assert matcher.detect_intent(text) == expected_intent, ( + f"{text!r} should map to {expected_intent}, " + f"got {matcher.detect_intent(text)}" + ) + + # Command execution intents + @pytest.mark.parametrize( + ("text", "expected_intent"), + [ + ("execute curl command", IntentType.COMMAND_EXECUTION), + ("run wget download", IntentType.COMMAND_EXECUTION), + ("echo hello world", IntentType.COMMAND_EXECUTION), + ], + ) + def test_command_execution_intent_detection( + self, matcher: PatternMatcher, text: str, expected_intent: IntentType + ) -> None: + """Command texts must resolve to COMMAND_EXECUTION.""" + assert matcher.detect_intent(text) == expected_intent, ( + f"{text!r} should map to {expected_intent}, " + f"got {matcher.detect_intent(text)}" + ) + + def test_patterns_sorted_descending_by_priority( + self, matcher: PatternMatcher + ) -> None: + """Patterns list must be sorted highest priority first after YAML load.""" + priorities = [p.priority for p in matcher.patterns] + assert priorities == sorted(priorities, reverse=True) + + def test_no_duplicate_intent_type_on_repeated_instantiation(self) -> None: + """Two independently created PatternMatcher instances must agree on intents.""" + matcher_a = PatternMatcher() + matcher_b = PatternMatcher() + + test_cases = [ + "open ssh connection", + "connect to database", + "verify file exists", + "execute curl command", + "make get request to api", + ] + for text in test_cases: + assert matcher_a.detect_intent(text) == matcher_b.detect_intent(text), ( + f"Inconsistent intent for {text!r} between two PatternMatcher instances" + ) diff --git a/uv.lock b/uv.lock index 5cfcfa8..f2e1c80 100644 --- a/uv.lock +++ b/uv.lock @@ -341,6 +341,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, ] +[[package]] +name = "cfgv" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.4" @@ -919,6 +928,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3e/9f/8010f93e175ecd996f54df9019ee8c58025fc21ed47658b0a58dd25ebe8b/hypothesis-6.142.4-py3-none-any.whl", hash = "sha256:25eecc73fadecd8b491aed822204cfe4be9c98ff5c1e8e038d181136ffc54b5b", size = 533467, upload-time = "2025-10-25T16:19:00.443Z" }, ] +[[package]] +name = "identify" +version = "2.6.16" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5b/8d/e8b97e6bd3fb6fb271346f7981362f1e04d6a7463abd0de79e1fda17c067/identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980", size = 99360, upload-time = "2026-01-12T18:58:58.201Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/58/40fbbcefeda82364720eba5cf2270f98496bdfa19ea75b4cccae79c698e6/identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", size = 99202, upload-time = "2026-01-12T18:58:56.627Z" }, +] + [[package]] name = "idna" version = "3.11" @@ -942,7 +960,7 @@ wheels = [ [[package]] name = "importobot" -version = "0.1.4" +version = "0.1.5" source = { editable = "." } dependencies = [ { name = "asv" }, @@ -999,6 +1017,7 @@ dev = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pandas" }, + { name = "pre-commit" }, { name = "pycodestyle" }, { name = "pydocstyle" }, { name = "pyright" }, @@ -1052,6 +1071,7 @@ dev = [ { name = "mypy", specifier = ">=1.18.2" }, { name = "numpy", specifier = ">=2.2.6" }, { name = "pandas", specifier = ">=2.3.3" }, + { name = "pre-commit", specifier = ">=4.0.0" }, { name = "pycodestyle", specifier = ">=2.11.0" }, { name = "pydocstyle", specifier = ">=6.3.0" }, { name = "pyright", specifier = ">=1.1.407" }, @@ -1914,6 +1934,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "pre-commit" +version = "4.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232, upload-time = "2025-12-16T21:14:33.552Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" }, +] + [[package]] name = "psutil" version = "7.1.3" diff --git a/wiki/Contributing.md b/wiki/Contributing.md index 4666c66..a6a261d 100644 --- a/wiki/Contributing.md +++ b/wiki/Contributing.md @@ -33,6 +33,20 @@ Importobot follows Test-Driven Development (TDD) and Extreme Programming (XP) pr 2. **Green**: Implement the minimum code to pass the test. 3. **Refactor**: Improve the code while keeping tests green. +### Pre-Commit Hooks + +The project uses `pre-commit` to run checks automatically before each commit. Install the hooks after cloning: + +```bash +uv run pre-commit install +``` + +To run all hooks manually: + +```bash +uv run pre-commit run --all-files +``` + ### Pre-Commit Checklist Before committing, ensure your changes meet the following criteria: @@ -40,6 +54,7 @@ Before committing, ensure your changes meet the following criteria: - All tests pass (`uv run pytest`). - Code coverage has not decreased. - All linting and formatting checks pass (`make lint`). +- Pre-commit hooks pass (`uv run pre-commit run --all-files`). - Relevant documentation has been updated if behavior was changed. ### Running Tests diff --git a/wiki/Testing.md b/wiki/Testing.md index 5ac855d..fe167e4 100644 --- a/wiki/Testing.md +++ b/wiki/Testing.md @@ -20,7 +20,7 @@ The Importobot test suite is structured to provide comprehensive coverage: For a detailed breakdown of test types and their locations, see the [Test Structure](How-to-Navigate-this-Codebase.md#test-structure) section in the codebase navigation guide. -Current status: 1,946 tests across modules with 0 skips. +Current status: 2,325 tests across modules with 0 skips. ## Invariant Tests