Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/prompts/commit.prompt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Commit Workflow

Follow this workflow when committing changes:

1. **Identify changed files** - Determine all files edited in the current session
2. **Lint and format** - Run `uv run ruff check --fix` and `uv run ruff format` on changed files
3. **Stage selectively** - Use `git add <file>` for specific files, never `git add -A`
4. **Commit with conventional format**:
- Types: `feat`, `fix`, `docs`, `refactor`, `test`, `chore`
- Subject: imperative mood, lowercase, no period
- Body: explain WHY, not just what changed
5. **Fix pre-commit errors** - Iterate until the commit is clean
6. **Push** - Push to remote

## Conventional Commit Format

```
<type>: <subject>

<body>
```

### Example

```
feat: add semantic search for physics domains

Enables users to search across IDS entries using natural language
queries. Uses sentence-transformers for embedding generation and
FAISS for similarity search.

Closes #42
```
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,6 @@
}
},
"chat.mcp.access": "all",
"cursorpyright.analysis.typeCheckingMode": "basic"
"cursorpyright.analysis.typeCheckingMode": "basic",
"chat.customAgentInSubagent.enabled": true
}
7 changes: 2 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ ENV PYTHONPATH="/app" \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
HATCH_BUILD_NO_HOOKS=true \
OPENAI_BASE_URL=https://openrouter.ai/api/v1 \
IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b
OPENAI_BASE_URL=https://openrouter.ai/api/v1

# Labels for image provenance
LABEL imas_mcp.git_sha=${GIT_SHA} \
Expand Down Expand Up @@ -177,9 +176,7 @@ ENV PYTHONPATH="/app" \
DOCS_SERVER_URL=http://localhost:6280 \
DOCS_MCP_TELEMETRY=false \
DOCS_MCP_STORE_PATH=/app/data \
OPENAI_BASE_URL=https://openrouter.ai/api/v1 \
IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b \
DOCS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b
OPENAI_BASE_URL=https://openrouter.ai/api/v1

# Expose port (only needed for streamable-http transport)
EXPOSE 8000
Expand Down
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,20 @@ The IMAS MCP server supports two modes for generating embeddings:
2. **Local embeddings**: Uses sentence-transformers library
- Install with `[transformers]` extra: `pip install imas-mcp[transformers]`
- Runs models locally without API calls
- Example model: `all-MiniLM-L6-v2` (default)
- Example model: `all-MiniLM-L6-v2` (fallback default)

**Configuration:**

Embedding model defaults are configured in `pyproject.toml` under `[tool.imas-mcp]`:

```toml
[tool.imas-mcp]
imas-embedding-model = "openai/text-embedding-3-large" # For DD embeddings
docs-embedding-model = "openai/text-embedding-3-small" # For documentation
```

Environment variables override pyproject.toml settings:

```bash
# API-based (requires API key)
export OPENAI_API_KEY="your-api-key"
Expand Down
10 changes: 7 additions & 3 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
OPENAI_API_KEY=your_openrouter_api_key_here
OPENAI_BASE_URL=https://openrouter.ai/api/v1

# IMAS DD Embedding Model
IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b
# IMAS DD Embedding Model (overrides pyproject.toml [tool.imas-mcp] default)
# Default: openai/text-embedding-3-large (from pyproject.toml)
# IMAS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-large

# Docs Embedding Model (overrides pyproject.toml [tool.imas-mcp] default)
# Default: openai/text-embedding-3-small (from pyproject.toml)
# DOCS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-small

DOCS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-small
DOCS_MCP_TELEMETRY=false
DOCS_MCP_STORE_PATH=./docs-data
DOCS_TIMEOUT=10
Expand Down
29 changes: 28 additions & 1 deletion hatch_build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None:

try:
from imas_mcp.core.xml_parser import DataDictionaryTransformer
# from imas_mcp.structure.mermaid_generator import MermaidGraphGenerator
from scripts.build_migrations import build_migration_map

finally:
# Restore original sys.path
Expand Down Expand Up @@ -76,3 +76,30 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None:
dd_accessor=dd_accessor, ids_set=ids_set, use_rich=True
)
json_transformer.build()

# Build path migration map for version upgrades
# This enables migration suggestions for deprecated paths
resolved_dd_version = dd_version or str(dd_accessor.get_version())
print(f"Building path migration map for version: {resolved_dd_version}")

from imas_mcp.resource_path_accessor import ResourcePathAccessor

path_accessor = ResourcePathAccessor(dd_version=resolved_dd_version)
migrations_dir = path_accessor.migrations_dir
migration_file = migrations_dir / "path_migrations.json"

import json

migration_data = build_migration_map(
target_version=resolved_dd_version,
ids_filter=ids_set,
verbose=True,
)

with open(migration_file, "w") as f:
json.dump(migration_data, f, indent=2)

print(
f"Built migration map with "
f"{migration_data['metadata']['total_migrations']} migrations"
)
6 changes: 4 additions & 2 deletions imas_mcp/embeddings/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
# Load .env file for local development
from dotenv import load_dotenv

from imas_mcp.settings import get_imas_embedding_model

load_dotenv() # Load .env file values (does not override existing env vars)


# Define constants
IMAS_MCP_EMBEDDING_MODEL = os.getenv("IMAS_MCP_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
# Define constants - uses pyproject.toml defaults with env var override
IMAS_MCP_EMBEDDING_MODEL = get_imas_embedding_model()


@dataclass
Expand Down
174 changes: 174 additions & 0 deletions imas_mcp/migrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
"""
Path migration utilities for IMAS Data Dictionary version upgrades.

This module provides access to the build-time generated migration map,
enabling path migration suggestions and rename history lookups.
"""

import json
import logging
from dataclasses import dataclass
from functools import lru_cache

from imas_mcp import dd_version
from imas_mcp.resource_path_accessor import ResourcePathAccessor

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class MigrationEntry:
"""Information about a path migration from old to new version."""

new_path: str | None
deprecated_in: str
last_valid_version: str


@dataclass(frozen=True)
class RenameHistoryEntry:
"""Information about a path that was renamed to the current path."""

old_path: str
deprecated_in: str


class PathMigrationMap:
"""
Provides access to path migration data for version upgrades.

Loads the build-time generated migration map and provides lookup methods
for both forward (old→new) and reverse (new→old) path mappings.
"""

def __init__(
self,
dd_version: str = dd_version,
migration_data: dict | None = None,
):
"""
Initialize the migration map.

Args:
dd_version: The DD version to load migrations for.
migration_data: Optional pre-loaded migration data (for testing).
"""
self._dd_version = dd_version
self._data: dict | None = migration_data
self._loaded = migration_data is not None

def _ensure_loaded(self) -> None:
"""Load migration data from disk if not already loaded."""
if self._loaded:
return

path_accessor = ResourcePathAccessor(dd_version=self._dd_version)
migration_file = path_accessor.migrations_dir / "path_migrations.json"

if not migration_file.exists():
logger.warning(
f"Migration file not found: {migration_file}. "
"Run 'build-migrations' to generate it."
)
self._data = {"old_to_new": {}, "new_to_old": {}, "metadata": {}}
self._loaded = True
return

try:
with open(migration_file) as f:
self._data = json.load(f)
logger.debug(
f"Loaded migration map with "
f"{len(self._data.get('old_to_new', {}))} migrations"
)
except (json.JSONDecodeError, OSError) as e:
logger.error(f"Failed to load migration file: {e}")
self._data = {"old_to_new": {}, "new_to_old": {}, "metadata": {}}

self._loaded = True

def get_migration(self, old_path: str) -> MigrationEntry | None:
"""
Get migration info for an old path.

Args:
old_path: The old path to look up (e.g., "equilibrium/time_slice/...").

Returns:
MigrationEntry with new_path, deprecated_in, and last_valid_version,
or None if no migration exists.
"""
self._ensure_loaded()

if self._data is None:
return None

entry = self._data.get("old_to_new", {}).get(old_path)
if entry is None:
return None

return MigrationEntry(
new_path=entry.get("new_path"),
deprecated_in=entry.get("deprecated_in", ""),
last_valid_version=entry.get("last_valid_version", ""),
)

def get_rename_history(self, new_path: str) -> list[RenameHistoryEntry]:
"""
Get rename history for a current path.

Args:
new_path: The current path to look up.

Returns:
List of RenameHistoryEntry objects for paths that were renamed
to this path, or empty list if no history.
"""
self._ensure_loaded()

if self._data is None:
return []

entries = self._data.get("new_to_old", {}).get(new_path, [])
return [
RenameHistoryEntry(
old_path=entry.get("old_path", ""),
deprecated_in=entry.get("deprecated_in", ""),
)
for entry in entries
]

@property
def metadata(self) -> dict:
"""Get migration map metadata."""
self._ensure_loaded()
return self._data.get("metadata", {}) if self._data else {}

@property
def total_migrations(self) -> int:
"""Get total number of migrations in the map."""
return self.metadata.get("total_migrations", 0)

@property
def target_version(self) -> str:
"""Get the target DD version for migrations."""
return self.metadata.get("target_version", "")


@lru_cache(maxsize=1)
def get_migration_map() -> PathMigrationMap:
"""
Get the singleton PathMigrationMap instance.

Returns:
PathMigrationMap for the current DD version.
"""
return PathMigrationMap()


__all__ = [
"MigrationEntry",
"RenameHistoryEntry",
"PathMigrationMap",
"get_migration_map",
]
5 changes: 5 additions & 0 deletions imas_mcp/resource_path_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ def mermaid_dir(self) -> Path:
"""Get the mermaid graphs directory (imas_data_dictionary/{version}/mermaid/)."""
return self._get_subdir_path("mermaid", create=True)

@cached_property
def migrations_dir(self) -> Path:
"""Get the migrations directory (imas_data_dictionary/{version}/migrations/)."""
return self._get_subdir_path("migrations", create=True)

@property
def version(self) -> str:
"""Get the DD version string."""
Expand Down
3 changes: 2 additions & 1 deletion imas_mcp/services/docs_server_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from dotenv import load_dotenv

from imas_mcp.exceptions import DocsServerError
from imas_mcp.settings import get_docs_embedding_model

# Load environment variables from .env file
load_dotenv()
Expand Down Expand Up @@ -386,7 +387,7 @@ async def _start_docs_server_process(self) -> None:
env.update(
{
"DOCS_MCP_EMBEDDING_MODEL": env.get(
"DOCS_MCP_EMBEDDING_MODEL", "openai/text-embedding-3-small"
"DOCS_MCP_EMBEDDING_MODEL", get_docs_embedding_model()
),
"DOCS_MCP_TELEMETRY": env.get("DOCS_MCP_TELEMETRY", "false"),
"DOCS_MCP_STORE_PATH": str(self.store_path),
Expand Down
Loading
Loading