diff --git a/.github/prompts/commit.prompt.md b/.github/prompts/commit.prompt.md new file mode 100644 index 0000000..e3be0bd --- /dev/null +++ b/.github/prompts/commit.prompt.md @@ -0,0 +1,33 @@ +# Commit Workflow + +Follow this workflow when committing changes: + +1. **Identify changed files** - Determine all files edited in the current session +2. **Lint and format** - Run `uv run ruff check --fix` and `uv run ruff format` on changed files +3. **Stage selectively** - Use `git add ` for specific files, never `git add -A` +4. **Commit with conventional format**: + - Types: `feat`, `fix`, `docs`, `refactor`, `test`, `chore` + - Subject: imperative mood, lowercase, no period + - Body: explain WHY, not just what changed +5. **Fix pre-commit errors** - Iterate until the commit is clean +6. **Push** - Push to remote + +## Conventional Commit Format + +``` +: + + +``` + +### Example + +``` +feat: add semantic search for physics domains + +Enables users to search across IDS entries using natural language +queries. Uses sentence-transformers for embedding generation and +FAISS for similarity search. + +Closes #42 +``` diff --git a/.vscode/settings.json b/.vscode/settings.json index 297f184..b1d9e1c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -62,5 +62,6 @@ } }, "chat.mcp.access": "all", - "cursorpyright.analysis.typeCheckingMode": "basic" + "cursorpyright.analysis.typeCheckingMode": "basic", + "chat.customAgentInSubagent.enabled": true } diff --git a/Dockerfile b/Dockerfile index 4091cd4..6b4cd52 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,8 +36,7 @@ ENV PYTHONPATH="/app" \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ HATCH_BUILD_NO_HOOKS=true \ - OPENAI_BASE_URL=https://openrouter.ai/api/v1 \ - IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b + OPENAI_BASE_URL=https://openrouter.ai/api/v1 # Labels for image provenance LABEL imas_mcp.git_sha=${GIT_SHA} \ @@ -177,9 +176,7 @@ ENV PYTHONPATH="/app" \ DOCS_SERVER_URL=http://localhost:6280 \ DOCS_MCP_TELEMETRY=false \ DOCS_MCP_STORE_PATH=/app/data \ - OPENAI_BASE_URL=https://openrouter.ai/api/v1 \ - IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b \ - DOCS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b + OPENAI_BASE_URL=https://openrouter.ai/api/v1 # Expose port (only needed for streamable-http transport) EXPOSE 8000 diff --git a/README.md b/README.md index 04a3e78..d6621fc 100644 --- a/README.md +++ b/README.md @@ -97,10 +97,20 @@ The IMAS MCP server supports two modes for generating embeddings: 2. **Local embeddings**: Uses sentence-transformers library - Install with `[transformers]` extra: `pip install imas-mcp[transformers]` - Runs models locally without API calls - - Example model: `all-MiniLM-L6-v2` (default) + - Example model: `all-MiniLM-L6-v2` (fallback default) **Configuration:** +Embedding model defaults are configured in `pyproject.toml` under `[tool.imas-mcp]`: + +```toml +[tool.imas-mcp] +imas-embedding-model = "openai/text-embedding-3-large" # For DD embeddings +docs-embedding-model = "openai/text-embedding-3-small" # For documentation +``` + +Environment variables override pyproject.toml settings: + ```bash # API-based (requires API key) export OPENAI_API_KEY="your-api-key" diff --git a/env.example b/env.example index b537a55..5002155 100644 --- a/env.example +++ b/env.example @@ -2,10 +2,14 @@ OPENAI_API_KEY=your_openrouter_api_key_here OPENAI_BASE_URL=https://openrouter.ai/api/v1 -# IMAS DD Embedding Model -IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b +# IMAS DD Embedding Model (overrides pyproject.toml [tool.imas-mcp] default) +# Default: openai/text-embedding-3-large (from pyproject.toml) +# IMAS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-large + +# Docs Embedding Model (overrides pyproject.toml [tool.imas-mcp] default) +# Default: openai/text-embedding-3-small (from pyproject.toml) +# DOCS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-small -DOCS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-small DOCS_MCP_TELEMETRY=false DOCS_MCP_STORE_PATH=./docs-data DOCS_TIMEOUT=10 diff --git a/hatch_build_hooks.py b/hatch_build_hooks.py index e0f9981..fa4dfb7 100644 --- a/hatch_build_hooks.py +++ b/hatch_build_hooks.py @@ -33,7 +33,7 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None: try: from imas_mcp.core.xml_parser import DataDictionaryTransformer - # from imas_mcp.structure.mermaid_generator import MermaidGraphGenerator + from scripts.build_migrations import build_migration_map finally: # Restore original sys.path @@ -76,3 +76,30 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None: dd_accessor=dd_accessor, ids_set=ids_set, use_rich=True ) json_transformer.build() + + # Build path migration map for version upgrades + # This enables migration suggestions for deprecated paths + resolved_dd_version = dd_version or str(dd_accessor.get_version()) + print(f"Building path migration map for version: {resolved_dd_version}") + + from imas_mcp.resource_path_accessor import ResourcePathAccessor + + path_accessor = ResourcePathAccessor(dd_version=resolved_dd_version) + migrations_dir = path_accessor.migrations_dir + migration_file = migrations_dir / "path_migrations.json" + + import json + + migration_data = build_migration_map( + target_version=resolved_dd_version, + ids_filter=ids_set, + verbose=True, + ) + + with open(migration_file, "w") as f: + json.dump(migration_data, f, indent=2) + + print( + f"Built migration map with " + f"{migration_data['metadata']['total_migrations']} migrations" + ) diff --git a/imas_mcp/embeddings/config.py b/imas_mcp/embeddings/config.py index d927fd2..fea8b7a 100644 --- a/imas_mcp/embeddings/config.py +++ b/imas_mcp/embeddings/config.py @@ -7,11 +7,13 @@ # Load .env file for local development from dotenv import load_dotenv +from imas_mcp.settings import get_imas_embedding_model + load_dotenv() # Load .env file values (does not override existing env vars) -# Define constants -IMAS_MCP_EMBEDDING_MODEL = os.getenv("IMAS_MCP_EMBEDDING_MODEL", "all-MiniLM-L6-v2") +# Define constants - uses pyproject.toml defaults with env var override +IMAS_MCP_EMBEDDING_MODEL = get_imas_embedding_model() @dataclass diff --git a/imas_mcp/migrations/__init__.py b/imas_mcp/migrations/__init__.py new file mode 100644 index 0000000..4a5f737 --- /dev/null +++ b/imas_mcp/migrations/__init__.py @@ -0,0 +1,174 @@ +""" +Path migration utilities for IMAS Data Dictionary version upgrades. + +This module provides access to the build-time generated migration map, +enabling path migration suggestions and rename history lookups. +""" + +import json +import logging +from dataclasses import dataclass +from functools import lru_cache + +from imas_mcp import dd_version +from imas_mcp.resource_path_accessor import ResourcePathAccessor + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class MigrationEntry: + """Information about a path migration from old to new version.""" + + new_path: str | None + deprecated_in: str + last_valid_version: str + + +@dataclass(frozen=True) +class RenameHistoryEntry: + """Information about a path that was renamed to the current path.""" + + old_path: str + deprecated_in: str + + +class PathMigrationMap: + """ + Provides access to path migration data for version upgrades. + + Loads the build-time generated migration map and provides lookup methods + for both forward (old→new) and reverse (new→old) path mappings. + """ + + def __init__( + self, + dd_version: str = dd_version, + migration_data: dict | None = None, + ): + """ + Initialize the migration map. + + Args: + dd_version: The DD version to load migrations for. + migration_data: Optional pre-loaded migration data (for testing). + """ + self._dd_version = dd_version + self._data: dict | None = migration_data + self._loaded = migration_data is not None + + def _ensure_loaded(self) -> None: + """Load migration data from disk if not already loaded.""" + if self._loaded: + return + + path_accessor = ResourcePathAccessor(dd_version=self._dd_version) + migration_file = path_accessor.migrations_dir / "path_migrations.json" + + if not migration_file.exists(): + logger.warning( + f"Migration file not found: {migration_file}. " + "Run 'build-migrations' to generate it." + ) + self._data = {"old_to_new": {}, "new_to_old": {}, "metadata": {}} + self._loaded = True + return + + try: + with open(migration_file) as f: + self._data = json.load(f) + logger.debug( + f"Loaded migration map with " + f"{len(self._data.get('old_to_new', {}))} migrations" + ) + except (json.JSONDecodeError, OSError) as e: + logger.error(f"Failed to load migration file: {e}") + self._data = {"old_to_new": {}, "new_to_old": {}, "metadata": {}} + + self._loaded = True + + def get_migration(self, old_path: str) -> MigrationEntry | None: + """ + Get migration info for an old path. + + Args: + old_path: The old path to look up (e.g., "equilibrium/time_slice/..."). + + Returns: + MigrationEntry with new_path, deprecated_in, and last_valid_version, + or None if no migration exists. + """ + self._ensure_loaded() + + if self._data is None: + return None + + entry = self._data.get("old_to_new", {}).get(old_path) + if entry is None: + return None + + return MigrationEntry( + new_path=entry.get("new_path"), + deprecated_in=entry.get("deprecated_in", ""), + last_valid_version=entry.get("last_valid_version", ""), + ) + + def get_rename_history(self, new_path: str) -> list[RenameHistoryEntry]: + """ + Get rename history for a current path. + + Args: + new_path: The current path to look up. + + Returns: + List of RenameHistoryEntry objects for paths that were renamed + to this path, or empty list if no history. + """ + self._ensure_loaded() + + if self._data is None: + return [] + + entries = self._data.get("new_to_old", {}).get(new_path, []) + return [ + RenameHistoryEntry( + old_path=entry.get("old_path", ""), + deprecated_in=entry.get("deprecated_in", ""), + ) + for entry in entries + ] + + @property + def metadata(self) -> dict: + """Get migration map metadata.""" + self._ensure_loaded() + return self._data.get("metadata", {}) if self._data else {} + + @property + def total_migrations(self) -> int: + """Get total number of migrations in the map.""" + return self.metadata.get("total_migrations", 0) + + @property + def target_version(self) -> str: + """Get the target DD version for migrations.""" + return self.metadata.get("target_version", "") + + +@lru_cache(maxsize=1) +def get_migration_map() -> PathMigrationMap: + """ + Get the singleton PathMigrationMap instance. + + Returns: + PathMigrationMap for the current DD version. + """ + return PathMigrationMap() + + +__all__ = [ + "MigrationEntry", + "RenameHistoryEntry", + "PathMigrationMap", + "get_migration_map", +] diff --git a/imas_mcp/resource_path_accessor.py b/imas_mcp/resource_path_accessor.py index 19bd20f..91942c0 100644 --- a/imas_mcp/resource_path_accessor.py +++ b/imas_mcp/resource_path_accessor.py @@ -144,6 +144,11 @@ def mermaid_dir(self) -> Path: """Get the mermaid graphs directory (imas_data_dictionary/{version}/mermaid/).""" return self._get_subdir_path("mermaid", create=True) + @cached_property + def migrations_dir(self) -> Path: + """Get the migrations directory (imas_data_dictionary/{version}/migrations/).""" + return self._get_subdir_path("migrations", create=True) + @property def version(self) -> str: """Get the DD version string.""" diff --git a/imas_mcp/services/docs_server_manager.py b/imas_mcp/services/docs_server_manager.py index ecb7b31..802caae 100644 --- a/imas_mcp/services/docs_server_manager.py +++ b/imas_mcp/services/docs_server_manager.py @@ -27,6 +27,7 @@ from dotenv import load_dotenv from imas_mcp.exceptions import DocsServerError +from imas_mcp.settings import get_docs_embedding_model # Load environment variables from .env file load_dotenv() @@ -386,7 +387,7 @@ async def _start_docs_server_process(self) -> None: env.update( { "DOCS_MCP_EMBEDDING_MODEL": env.get( - "DOCS_MCP_EMBEDDING_MODEL", "openai/text-embedding-3-small" + "DOCS_MCP_EMBEDDING_MODEL", get_docs_embedding_model() ), "DOCS_MCP_TELEMETRY": env.get("DOCS_MCP_TELEMETRY", "false"), "DOCS_MCP_STORE_PATH": str(self.store_path), diff --git a/imas_mcp/settings.py b/imas_mcp/settings.py new file mode 100644 index 0000000..38d7466 --- /dev/null +++ b/imas_mcp/settings.py @@ -0,0 +1,102 @@ +"""Project settings loaded from pyproject.toml [tool.imas-mcp] section. + +This module provides centralized access to project configuration defaults, +with environment variable overrides for runtime flexibility. +""" + +import importlib.resources +import os +from functools import cache + +try: + import tomllib +except ImportError: + import tomli as tomllib # type: ignore[import-not-found] + + +@cache +def _load_pyproject_settings() -> dict[str, str]: + """Load settings from pyproject.toml [tool.imas-mcp] section. + + Returns: + Dictionary of settings from pyproject.toml, empty dict if not found. + """ + try: + # Try package resources first (installed package) + files = importlib.resources.files("imas_mcp") + pyproject_path = files.joinpath("..", "pyproject.toml") + + # If package resource doesn't exist, try filesystem + if not pyproject_path.is_file(): # type: ignore[union-attr] + from pathlib import Path + + # Walk up to find pyproject.toml (for development) + current = Path(__file__).resolve().parent + while current != current.parent: + candidate = current / "pyproject.toml" + if candidate.exists(): + pyproject_path = candidate + break + current = current.parent + else: + return {} + + # Read and parse the TOML file + if hasattr(pyproject_path, "read_text"): + content = pyproject_path.read_text() # type: ignore[union-attr] + else: + from pathlib import Path + + content = Path(pyproject_path).read_text() # type: ignore[arg-type] + + data = tomllib.loads(content) + return data.get("tool", {}).get("imas-mcp", {}) + except Exception: + return {} + + +def get_imas_embedding_model() -> str: + """Get the IMAS DD embedding model name. + + Priority: + 1. IMAS_MCP_EMBEDDING_MODEL environment variable + 2. pyproject.toml [tool.imas-mcp] imas-embedding-model + 3. Fallback default: all-MiniLM-L6-v2 (local model) + + Returns: + Model name string. + """ + if env_model := os.getenv("IMAS_MCP_EMBEDDING_MODEL"): + return env_model + + settings = _load_pyproject_settings() + if model := settings.get("imas-embedding-model"): + return model + + return "all-MiniLM-L6-v2" + + +def get_docs_embedding_model() -> str: + """Get the docs server embedding model name. + + Priority: + 1. DOCS_MCP_EMBEDDING_MODEL environment variable + 2. pyproject.toml [tool.imas-mcp] docs-embedding-model + 3. Fallback default: openai/text-embedding-3-small + + Returns: + Model name string. + """ + if env_model := os.getenv("DOCS_MCP_EMBEDDING_MODEL"): + return env_model + + settings = _load_pyproject_settings() + if model := settings.get("docs-embedding-model"): + return model + + return "openai/text-embedding-3-small" + + +# Computed defaults (for use in module-level constants) +IMAS_MCP_EMBEDDING_MODEL = get_imas_embedding_model() +DOCS_MCP_EMBEDDING_MODEL = get_docs_embedding_model() diff --git a/imas_mcp/tools/path_tool.py b/imas_mcp/tools/path_tool.py index 01fa33e..c1b967c 100644 --- a/imas_mcp/tools/path_tool.py +++ b/imas_mcp/tools/path_tool.py @@ -1,7 +1,8 @@ """ Path tool implementation. -Provides both fast validation and rich data retrieval for IMAS paths. +Provides both fast validation and rich data retrieval for IMAS paths, +with migration suggestions for deprecated paths and rename history. """ import logging @@ -9,9 +10,15 @@ from fastmcp import Context +from imas_mcp.migrations import PathMigrationMap, get_migration_map from imas_mcp.models.constants import SearchMode from imas_mcp.models.result_models import IdsPathResult -from imas_mcp.search.decorators import handle_errors, mcp_tool, measure_performance +from imas_mcp.search.decorators import ( + handle_errors, + mcp_tool, + measure_performance, +) +from imas_mcp.search.document_store import DocumentStore from .base import BaseTool @@ -21,6 +28,28 @@ class PathTool(BaseTool): """Tool for IMAS path validation and data retrieval.""" + def __init__( + self, + document_store: DocumentStore | None = None, + migration_map: PathMigrationMap | None = None, + ): + """ + Initialize PathTool. + + Args: + document_store: Optional DocumentStore instance. + migration_map: Optional PathMigrationMap for testing. Uses singleton if None. + """ + super().__init__(document_store=document_store) + self._migration_map = migration_map + + @property + def migration_map(self) -> PathMigrationMap: + """Get the path migration map (lazy loaded).""" + if self._migration_map is None: + self._migration_map = get_migration_map() + return self._migration_map + @property def tool_name(self) -> str: """Return the name of this tool.""" @@ -43,7 +72,8 @@ async def check_imas_paths( Check if one or more exact IMAS paths exist in the data dictionary. Fast validation tool for batch path existence checking without search overhead. - Directly accesses the data dictionary for immediate results. + Directly accesses the data dictionary for immediate results. Returns migration + suggestions for deprecated paths and rename history for current paths. Args: paths: One or more IMAS paths to validate. Accepts either: @@ -63,25 +93,24 @@ async def check_imas_paths( - ids_name: IDS name if path exists - data_type: Data type if available (optional) - units: Physical units if available (optional) + - migration: Migration info if path is deprecated (optional): + - new_path: The current path to use + - deprecated_in: Version where path was deprecated + - last_valid_version: Last DD version where path was valid + - renamed_from: List of old paths that were renamed to this path (optional) - error: Error message if path format is invalid (optional) Examples: - Single path (string): - check_imas_paths("equilibrium/time_slice/boundary/psi") - → {"summary": {"total": 1, "found": 1, "not_found": 0, "invalid": 0}, - "results": [{"path": "equilibrium/time_slice/boundary/psi", "exists": true, "ids_name": "equilibrium"}]} - - Multiple paths with ids prefix (ensemble checking): - check_imas_paths("time_slice/boundary/psi time_slice/boundary/psi_norm time_slice/boundary/type", ids="equilibrium") - → {"summary": {"total": 3, "found": 3, "not_found": 0, "invalid": 0}, - "results": [ - {"path": "equilibrium/time_slice/boundary/psi", "exists": true, "ids_name": "equilibrium"}, - {"path": "equilibrium/time_slice/boundary/psi_norm", "exists": true, "ids_name": "equilibrium"}, - {"path": "equilibrium/time_slice/boundary/type", "exists": true, "ids_name": "equilibrium"} - ]} - - Multiple paths (list): - check_imas_paths(["time_slice/boundary/psi", "time_slice/boundary/psi_norm"], ids="equilibrium") + Path exists (current): + check_imas_paths("equilibrium/time_slice/constraints/b_field_pol_probe") + → {"results": [{"path": "...", "exists": true, "ids_name": "equilibrium", + "renamed_from": [{"old_path": "equilibrium/time_slice/constraints/bpol_probe", "deprecated_in": "4.0.0"}]}]} + + Path deprecated (has migration): + check_imas_paths("equilibrium/time_slice/constraints/bpol_probe") + → {"results": [{"path": "...", "exists": false, + "migration": {"new_path": "equilibrium/time_slice/constraints/b_field_pol_probe", + "deprecated_in": "4.0.0", "last_valid_version": "3.42.0"}}]} Note: This tool is optimized for exact path validation. For discovering paths @@ -91,7 +120,7 @@ async def check_imas_paths( if isinstance(paths, str): paths_list = paths.split() else: - paths_list = paths + paths_list = list(paths) # Initialize counters and results results = [] @@ -129,7 +158,7 @@ async def check_imas_paths( if document and document.metadata: found_count += 1 metadata = document.metadata - result = { + result: dict[str, Any] = { "path": path, "exists": True, "ids_name": metadata.ids_name, @@ -141,16 +170,36 @@ async def check_imas_paths( if metadata.units: result["units"] = metadata.units + # Add rename history if available + rename_history = self.migration_map.get_rename_history(path) + if rename_history: + result["renamed_from"] = [ + { + "old_path": entry.old_path, + "deprecated_in": entry.deprecated_in, + } + for entry in rename_history + ] + results.append(result) logger.debug(f"Path validation: {path} - exists") else: not_found_count += 1 - results.append( - { - "path": path, - "exists": False, + result = { + "path": path, + "exists": False, + } + + # Check for migration suggestion + migration = self.migration_map.get_migration(path) + if migration: + result["migration"] = { + "new_path": migration.new_path, + "deprecated_in": migration.deprecated_in, + "last_valid_version": migration.last_valid_version, } - ) + + results.append(result) logger.debug(f"Path validation: {path} - not found") except Exception as e: @@ -230,7 +279,7 @@ async def fetch_imas_paths( if isinstance(paths, str): paths_list = paths.split() else: - paths_list = paths + paths_list = list(paths) # Initialize tracking nodes = [] diff --git a/pyproject.toml b/pyproject.toml index b1003d6..be4a3d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ requires = [ "PyYAML>=6.0,<7.0", "imas-data-dictionary>=4.1.0", "imas-data-dictionaries>=4.1.0", + "imas-python>=2.0.1", ] build-backend = "hatchling.build" @@ -58,6 +59,7 @@ dependencies = [ "anyio>=4.0.0,<5.0.0", "scikit-learn>=1.7.2", "psutil>=6.1.1", + "imas-python>=2.0.1", "requests>=2.25.0", ] @@ -112,6 +114,7 @@ build-relationships = "scripts.build_relationships:build_relationships" build-embeddings = "scripts.build_embeddings:build_embeddings" build-database = "scripts.build_database:build_database" build-mermaid = "scripts.build_mermaid:build_mermaid" +build-migrations = "scripts.build_migrations:build_migrations" [tool.hatch.version] source = "vcs" @@ -127,6 +130,16 @@ verbose = true ids-filter = "" imas-dd-version = "" +# Embedding model configuration +# These defaults are used during build and at runtime (can be overridden via env vars) +[tool.imas-mcp] +# Model for IMAS Data Dictionary embeddings (used for semantic search of DD paths) +# text-embedding-3-large: Higher quality, 3072 dimensions, better for the complex DD structure +imas-embedding-model = "openai/text-embedding-3-large" +# Model for documentation embeddings (used by docs-mcp-server) +# text-embedding-3-small: Faster, 1536 dimensions, suitable for general documentation +docs-embedding-model = "openai/text-embedding-3-small" + [tool.hatch.envs.test] dependencies = [ "pytest>=8.3.5,<9.0.0", diff --git a/scripts/add_docs.py b/scripts/add_docs.py index 71d4c8a..4bcd323 100644 --- a/scripts/add_docs.py +++ b/scripts/add_docs.py @@ -17,6 +17,7 @@ build_docs_server_command, get_npx_executable, ) +from imas_mcp.settings import get_docs_embedding_model # Load environment variables from .env file load_dotenv(override=True) @@ -38,10 +39,8 @@ ) @click.option( "--model", - default=lambda: os.getenv( - "DOCS_MCP_EMBEDDING_MODEL", "openai/text-embedding-3-small" - ), - help="Embedding model to use (defaults to DOCS_MCP_EMBEDDING_MODEL env var or openai/text-embedding-3-small)", + default=get_docs_embedding_model, + help="Embedding model to use (defaults to DOCS_MCP_EMBEDDING_MODEL env var or pyproject.toml setting)", ) @click.option( "--ignore-errors/--no-ignore-errors", diff --git a/scripts/build_migrations.py b/scripts/build_migrations.py new file mode 100644 index 0000000..1844536 --- /dev/null +++ b/scripts/build_migrations.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +""" +Build the path migration map for IMAS Data Dictionary version upgrades. + +This script creates a JSON file mapping old paths to new paths across DD versions, +enabling the MCP server to suggest path migrations for deprecated paths and +provide rename history for current paths. +""" + +import json +import logging +import sys +from collections import defaultdict +from datetime import UTC, datetime +from pathlib import Path + +import click +import imas + +from imas_mcp import dd_version +from imas_mcp.resource_path_accessor import ResourcePathAccessor + + +def build_migration_map( + target_version: str, + ids_filter: set[str] | None = None, + verbose: bool = False, +) -> dict: + """ + Build bidirectional path migration map from all older versions to target. + + Args: + target_version: The DD version to migrate paths to. + ids_filter: Optional set of IDS names to include. + verbose: Enable verbose logging. + + Returns: + Dictionary with metadata, old_to_new, and new_to_old mappings. + """ + logger = logging.getLogger(__name__) + + # Get all available DD versions + all_versions = imas.dd_zip.dd_xml_versions() + source_versions = [v for v in all_versions if v < target_version] + + if verbose: + logger.info(f"Building migration map to {target_version}") + logger.info(f"Source versions: {len(source_versions)} versions") + + target_factory = imas.IDSFactory(target_version) + target_ids_names = set(target_factory.ids_names()) + + # Apply IDS filter if provided + if ids_filter: + target_ids_names = target_ids_names & ids_filter + + # Track migrations with version info + old_to_new: dict[str, dict] = {} + new_to_old: dict[str, list[dict]] = defaultdict(list) + + # Track the earliest version where each path was deprecated + deprecation_versions: dict[str, str] = {} + # Track the latest version where each old path was valid + last_valid_versions: dict[str, str] = {} + + for source_version in sorted(source_versions): + if verbose: + logger.debug(f"Processing version {source_version}") + + source_factory = imas.IDSFactory(source_version) + + for ids_name in source_factory.ids_names(): + if ids_name not in target_ids_names: + continue + + try: + version_map, _ = imas.ids_convert.dd_version_map_from_factories( + ids_name, source_factory, target_factory + ) + except Exception as e: + logger.warning( + f"Failed to get version map for {ids_name} " + f"from {source_version}: {e}" + ) + continue + + for old_path, new_path in version_map.old_to_new.path.items(): + full_old = f"{ids_name}/{old_path}" + full_new = f"{ids_name}/{new_path}" if new_path else None + + # Skip if path unchanged + if full_old == full_new: + continue + + # Track last valid version for this old path + # (the version before it was deprecated) + if full_old not in last_valid_versions: + last_valid_versions[full_old] = source_version + + # Update old_to_new mapping + if full_old not in old_to_new: + old_to_new[full_old] = { + "new_path": full_new, + "deprecated_in": target_version, # Will refine below + "last_valid_version": source_version, + } + + # Track deprecation version (first version where path changed) + if full_old not in deprecation_versions: + deprecation_versions[full_old] = source_version + + # Build reverse mapping (new_to_old) + if full_new: + entry = { + "old_path": full_old, + "deprecated_in": deprecation_versions.get( + full_old, target_version + ), + } + # Avoid duplicates + existing_old_paths = [e["old_path"] for e in new_to_old[full_new]] + if full_old not in existing_old_paths: + new_to_old[full_new].append(entry) + + # Refine deprecation versions based on tracking + for old_path, info in old_to_new.items(): + if old_path in deprecation_versions: + # Find the next version after last_valid + last_valid = last_valid_versions.get(old_path, info["last_valid_version"]) + info["last_valid_version"] = last_valid + + # Deprecated in is the target version (current DD) + # since that's when it's no longer valid + if old_path in deprecation_versions: + info["deprecated_in"] = target_version + + # Build final structure + migration_data = { + "metadata": { + "target_version": target_version, + "source_versions": sorted(source_versions), + "generated_at": datetime.now(UTC).isoformat(), + "total_migrations": len(old_to_new), + "paths_with_history": len(new_to_old), + }, + "old_to_new": old_to_new, + "new_to_old": dict(new_to_old), + } + + return migration_data + + +@click.command() +@click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging output") +@click.option("--quiet", "-q", is_flag=True, help="Suppress all logging except errors") +@click.option( + "--force", + "-f", + is_flag=True, + help="Force rebuild even if migration file already exists", +) +@click.option( + "--ids-filter", + type=str, + help="Specific IDS names to include (space-separated)", +) +@click.option( + "--check-only", + is_flag=True, + help="Only check if migration file exists, don't build it", +) +@click.option( + "--output", + "-o", + type=click.Path(), + help="Override output path for migration file", +) +def build_migrations( + verbose: bool, + quiet: bool, + force: bool, + ids_filter: str, + check_only: bool, + output: str | None, +) -> int: + """Build the path migration map for IMAS DD version upgrades. + + This command creates a JSON file mapping old paths to new paths, + enabling migration suggestions for deprecated paths and rename + history for current paths. + + Examples: + build-migrations # Build with default settings + build-migrations -v # Build with verbose logging + build-migrations -f # Force rebuild + build-migrations --ids-filter "equilibrium core_profiles" + """ + # Set up logging + if quiet: + log_level = logging.ERROR + elif verbose: + log_level = logging.DEBUG + else: + log_level = logging.INFO + + logging.basicConfig( + level=log_level, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + logger = logging.getLogger(__name__) + + try: + # Determine output path + if output: + output_path = Path(output) + else: + path_accessor = ResourcePathAccessor(dd_version=dd_version) + output_path = path_accessor.migrations_dir / "path_migrations.json" + + # Check-only mode + if check_only: + if output_path.exists(): + with open(output_path) as f: + data = json.load(f) + metadata = data.get("metadata", {}) + click.echo(f"Migration file exists: {output_path}") + click.echo(f"Target version: {metadata.get('target_version')}") + click.echo(f"Total migrations: {metadata.get('total_migrations')}") + click.echo(f"Generated at: {metadata.get('generated_at')}") + return 0 + else: + click.echo("Migration file does not exist") + return 1 + + # Check if rebuild needed + if output_path.exists() and not force: + logger.info(f"Migration file already exists: {output_path}") + logger.info("Use --force to rebuild") + return 0 + + logger.info(f"Building migration map for DD version {dd_version}...") + + # Parse IDS filter + ids_set: set[str] | None = None + if ids_filter: + ids_set = set(ids_filter.split()) + logger.info(f"Filtering to IDS: {sorted(ids_set)}") + + # Build migration map + migration_data = build_migration_map( + target_version=dd_version, + ids_filter=ids_set, + verbose=verbose, + ) + + # Ensure output directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Write migration file + with open(output_path, "w") as f: + json.dump(migration_data, f, indent=2) + + # Report results + metadata = migration_data["metadata"] + logger.info("Migration map built successfully:") + logger.info(f" - Target version: {metadata['target_version']}") + logger.info(f" - Source versions: {len(metadata['source_versions'])}") + logger.info(f" - Total migrations: {metadata['total_migrations']}") + logger.info(f" - Paths with history: {metadata['paths_with_history']}") + logger.info(f" - Output: {output_path}") + + click.echo( + f"Built migration map with {metadata['total_migrations']} migrations" + ) + click.echo(f"Output: {output_path}") + + return 0 + + except Exception as e: + logger.error(f"Error building migration map: {e}") + if verbose: + logger.exception("Full traceback:") + click.echo(f"Error: {e}", err=True) + return 1 + + +if __name__ == "__main__": + sys.exit(build_migrations()) diff --git a/tests/test_path_tool.py b/tests/test_path_tool.py index 0eb1e88..bc40627 100644 --- a/tests/test_path_tool.py +++ b/tests/test_path_tool.py @@ -2,15 +2,121 @@ import pytest +from imas_mcp.migrations import MigrationEntry, PathMigrationMap, RenameHistoryEntry from imas_mcp.search.document_store import DocumentStore from imas_mcp.tools import PathTool +# ============================================================================ +# Mock PathMigrationMap for testing +# ============================================================================ + + +class MockPathMigrationMap(PathMigrationMap): + """Mock PathMigrationMap with predefined test data.""" + + def __init__(self): + # Initialize with test data, bypassing file loading + migration_data = { + "metadata": { + "target_version": "4.0.1", + "total_migrations": 2, + }, + "old_to_new": { + "equilibrium/time_slice/constraints/bpol_probe": { + "new_path": "equilibrium/time_slice/constraints/b_field_pol_probe", + "deprecated_in": "4.0.0", + "last_valid_version": "3.42.0", + }, + "equilibrium/time_slice/global_quantities/li": { + "new_path": "equilibrium/time_slice/global_quantities/li_3", + "deprecated_in": "4.0.0", + "last_valid_version": "3.41.0", + }, + }, + "new_to_old": { + "equilibrium/time_slice/constraints/b_field_pol_probe": [ + { + "old_path": "equilibrium/time_slice/constraints/bpol_probe", + "deprecated_in": "4.0.0", + } + ], + "equilibrium/time_slice/global_quantities/li_3": [ + { + "old_path": "equilibrium/time_slice/global_quantities/li", + "deprecated_in": "4.0.0", + } + ], + }, + } + super().__init__(dd_version="4.0.1", migration_data=migration_data) + + +@pytest.fixture +def mock_migration_map() -> MockPathMigrationMap: + """Create a mock PathMigrationMap for testing.""" + return MockPathMigrationMap() + @pytest.fixture -def path_tool(): - """Create a PathTool instance for testing.""" +def path_tool(mock_migration_map: MockPathMigrationMap) -> PathTool: + """Create a PathTool instance for testing with mocked migration map.""" doc_store = DocumentStore() - return PathTool(doc_store) + return PathTool(doc_store, migration_map=mock_migration_map) + + +# ============================================================================ +# Tests for PathMigrationMap +# ============================================================================ + + +def test_migration_map_get_migration(mock_migration_map: MockPathMigrationMap): + """Test getting migration info for an old path.""" + migration = mock_migration_map.get_migration( + "equilibrium/time_slice/constraints/bpol_probe" + ) + + assert migration is not None + assert migration.new_path == "equilibrium/time_slice/constraints/b_field_pol_probe" + assert migration.deprecated_in == "4.0.0" + assert migration.last_valid_version == "3.42.0" + + +def test_migration_map_get_migration_not_found( + mock_migration_map: MockPathMigrationMap, +): + """Test getting migration info for a path with no migration.""" + migration = mock_migration_map.get_migration("fake/path/here") + assert migration is None + + +def test_migration_map_get_rename_history(mock_migration_map: MockPathMigrationMap): + """Test getting rename history for a current path.""" + history = mock_migration_map.get_rename_history( + "equilibrium/time_slice/constraints/b_field_pol_probe" + ) + + assert len(history) == 1 + assert history[0].old_path == "equilibrium/time_slice/constraints/bpol_probe" + assert history[0].deprecated_in == "4.0.0" + + +def test_migration_map_get_rename_history_not_found( + mock_migration_map: MockPathMigrationMap, +): + """Test getting rename history for a path with no history.""" + history = mock_migration_map.get_rename_history("fake/path/here") + assert history == [] + + +def test_migration_map_metadata(mock_migration_map: MockPathMigrationMap): + """Test migration map metadata access.""" + assert mock_migration_map.target_version == "4.0.1" + assert mock_migration_map.total_migrations == 2 + + +# ============================================================================ +# Tests for check_imas_paths - Basic validation +# ============================================================================ @pytest.mark.asyncio @@ -157,6 +263,11 @@ async def test_token_efficient_response(path_tool): assert "documentation" not in res +# ============================================================================ +# Tests for check_imas_paths - IDS prefix handling +# ============================================================================ + + @pytest.mark.asyncio async def test_ids_prefix_single_path(path_tool): """Test ids parameter with single path.""" @@ -233,6 +344,45 @@ async def test_ids_prefix_mixed_paths(path_tool): assert result["results"][1]["path"] == "equilibrium/time_slice/boundary/psi_norm" +# ============================================================================ +# Tests for check_imas_paths - Migration suggestions +# ============================================================================ + + +@pytest.mark.asyncio +async def test_deprecated_path_returns_migration(path_tool): + """Test that deprecated paths return migration suggestions.""" + result = await path_tool.check_imas_paths( + "equilibrium/time_slice/constraints/bpol_probe" + ) + + assert result["summary"]["total"] == 1 + assert result["summary"]["not_found"] == 1 + + res = result["results"][0] + assert res["exists"] is False + assert res["path"] == "equilibrium/time_slice/constraints/bpol_probe" + + # Should have migration info + assert "migration" in res + assert ( + res["migration"]["new_path"] + == "equilibrium/time_slice/constraints/b_field_pol_probe" + ) + assert res["migration"]["deprecated_in"] == "4.0.0" + assert res["migration"]["last_valid_version"] == "3.42.0" + + +@pytest.mark.asyncio +async def test_nonexistent_path_no_migration(path_tool): + """Test that truly invalid paths don't have migration info.""" + result = await path_tool.check_imas_paths("fake/nonexistent/path") + + res = result["results"][0] + assert res["exists"] is False + assert "migration" not in res + + # ============================================================================ # Tests for fetch_imas_paths - Rich data retrieval # ============================================================================ @@ -260,7 +410,6 @@ async def test_fetch_single_path(path_tool): assert node.path == "core_profiles/profiles_1d/electrons/temperature" assert node.documentation # Should have documentation assert node.data_type # Should have data_type - # Units might or might not be present depending on the path @pytest.mark.asyncio diff --git a/uv.lock b/uv.lock index 0c313bc..5ffc59e 100644 --- a/uv.lock +++ b/uv.lock @@ -735,6 +735,7 @@ dependencies = [ { name = "click" }, { name = "fastmcp" }, { name = "imas-data-dictionaries" }, + { name = "imas-python" }, { name = "nest-asyncio" }, { name = "networkx" }, { name = "numpy" }, @@ -793,6 +794,7 @@ requires-dist = [ { name = "coverage", marker = "extra == 'test'", specifier = ">=7.0.0" }, { name = "fastmcp", specifier = ">=2.12.0" }, { name = "imas-data-dictionaries", specifier = ">=4.1.0" }, + { name = "imas-python", specifier = ">=2.0.1" }, { name = "nest-asyncio", specifier = ">=1.5.0,<2.0.0" }, { name = "networkx", specifier = ">=3.0,<4.0" }, { name = "numpy", specifier = ">=2.3.1" }, @@ -832,6 +834,24 @@ dev = [ { name = "tqdm-stubs", specifier = ">=0.2.1" }, ] +[[package]] +name = "imas-python" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "imas-data-dictionaries" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "rich" }, + { name = "scipy" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/9d/519fcb1fffaed5482bc575cc45cad7753ef89d1c4f65c5ba490768b84f45/imas_python-2.0.1.tar.gz", hash = "sha256:8448e50390a6e7955dca2ee9b821408bbb0c7d6e901740463ca5d5a7cc018a54", size = 2738406, upload-time = "2025-06-18T12:10:34.679Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/a4/1cfcab270c7b35891a239219f8a3b122307ba4677def11540e5437753578/imas_python-2.0.1-py3-none-any.whl", hash = "sha256:27130247d4dd3385c69debeddfd4c2680d9bd16cedc2f81b1a045478e55f86ed", size = 2394289, upload-time = "2025-06-18T12:10:32.872Z" }, +] + [[package]] name = "importlib-metadata" version = "8.7.0" @@ -2622,6 +2642,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/51/5447876806d1088a0f8f71e16542bf350918128d0a69437df26047c8e46f/widgetsnbextension-4.0.14-py3-none-any.whl", hash = "sha256:4875a9eaf72fbf5079dc372a51a9f268fc38d46f767cbf85c43a36da5cb9b575", size = 2196503, upload-time = "2025-04-10T13:01:23.086Z" }, ] +[[package]] +name = "xxhash" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744, upload-time = "2025-10-02T14:34:34.622Z" }, + { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816, upload-time = "2025-10-02T14:34:36.043Z" }, + { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035, upload-time = "2025-10-02T14:34:37.354Z" }, + { url = "https://files.pythonhosted.org/packages/4c/ed/6224ba353690d73af7a3f1c7cdb1fc1b002e38f783cb991ae338e1eb3d79/xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2", size = 212914, upload-time = "2025-10-02T14:34:38.6Z" }, + { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163, upload-time = "2025-10-02T14:34:39.872Z" }, + { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411, upload-time = "2025-10-02T14:34:41.569Z" }, + { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883, upload-time = "2025-10-02T14:34:43.249Z" }, + { url = "https://files.pythonhosted.org/packages/53/5a/ddbb83eee8e28b778eacfc5a85c969673e4023cdeedcfcef61f36731610b/xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9", size = 210392, upload-time = "2025-10-02T14:34:45.042Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c2/ff69efd07c8c074ccdf0a4f36fcdd3d27363665bcdf4ba399abebe643465/xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e", size = 197898, upload-time = "2025-10-02T14:34:46.302Z" }, + { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655, upload-time = "2025-10-02T14:34:47.571Z" }, + { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001, upload-time = "2025-10-02T14:34:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431, upload-time = "2025-10-02T14:34:50.798Z" }, + { url = "https://files.pythonhosted.org/packages/0f/93/14fde614cadb4ddf5e7cebf8918b7e8fac5ae7861c1875964f17e678205c/xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb", size = 30617, upload-time = "2025-10-02T14:34:51.954Z" }, + { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534, upload-time = "2025-10-02T14:34:53.276Z" }, + { url = "https://files.pythonhosted.org/packages/54/85/6ec269b0952ec7e36ba019125982cf11d91256a778c7c3f98a4c5043d283/xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829", size = 27876, upload-time = "2025-10-02T14:34:54.371Z" }, +] + [[package]] name = "yarl" version = "1.22.0"