From 3324008f0538d9f23b981863d04b2051afa39e22 Mon Sep 17 00:00:00 2001 From: RaphFrey Date: Mon, 24 Nov 2025 15:16:06 +0100 Subject: [PATCH 1/9] Add update_path method to PathTool for version migration --- imas_mcp/tools/__init__.py | 4 ++ imas_mcp/tools/path_tool.py | 116 ++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/imas_mcp/tools/__init__.py b/imas_mcp/tools/__init__.py index d3212ba..94c8593 100644 --- a/imas_mcp/tools/__init__.py +++ b/imas_mcp/tools/__init__.py @@ -114,6 +114,10 @@ async def export_physics_domain(self, *args, **kwargs): """Delegate to export tool.""" return await self.export_tool.export_physics_domain(*args, **kwargs) + async def update_path(self, *args, **kwargs): + """Delegate to path tool.""" + return await self.path_tool.update_path(*args, **kwargs) + __all__ = [ "BaseTool", diff --git a/imas_mcp/tools/path_tool.py b/imas_mcp/tools/path_tool.py index 01fa33e..7af8686 100644 --- a/imas_mcp/tools/path_tool.py +++ b/imas_mcp/tools/path_tool.py @@ -7,8 +7,11 @@ import logging from typing import Any +import imas from fastmcp import Context +from imas.exception import UnknownDDVersion +from imas_mcp import dd_version from imas_mcp.models.constants import SearchMode from imas_mcp.models.result_models import IdsPathResult from imas_mcp.search.decorators import handle_errors, mcp_tool, measure_performance @@ -300,3 +303,116 @@ async def fetch_imas_paths( f"Path retrieval completed: {found_count}/{len(paths_list)} retrieved" ) return result + + @mcp_tool( + "Find a new IMAS path version for a given path. " + "paths: space-delimited string or list of paths. " + "ids_name: optional IDS name(s) - single IDS for all paths or one per path. " + "version: optional target DD version (defaults to searching all versions)" + ) + async def update_imas_path( + self, + paths: str | list[str], + ids_name: str | list[str] | None = None, + source_dd_version: str | None = None, + ctx: Context | None = None, + ) -> list[str]: + """ + Find updated IMAS path versions across different data dictionary versions. + + Searches for equivalent paths in newer (or specified) DD versions, useful for + migration and version compatibility analysis. + + Args: + paths: One or more IMAS paths to convert. Accepts either: + - Space-delimited string: "time_slice/boundary/psi time_slice/boundary/psi_norm" + - List of paths: ["time_slice/boundary/psi", "profiles_1d/electrons/temperature"] + ids_name: Optional IDS name(s) to use with the paths. Can be: + - Single IDS name (string): Applied to all paths + - List of IDS names: One for each path (must match length of paths) + - Space-delimited string: Multiple IDS names corresponding to paths + - None: Searches across all IDS types (slower) + version: Optional target DD version to convert to. If None, searches all versions + from newest to oldest to find the first match. + ctx: FastMCP context for potential future enhancements + + Returns: + List of converted path strings, one for each input path. Paths that cannot be + found in any version return "PATH not found in any IMAS version". + + Examples: + Single path with IDS: + update_path("time_slice/boundary/psi", ids_name="equilibrium") + → ["time_slice/boundary/psi"] or updated path if changed + + Multiple paths with single IDS: + update_path("time_slice/boundary/psi time_slice/boundary/psi_norm", ids_name="equilibrium") + → List of converted paths + + Multiple paths with multiple IDS: + update_path(["time_slice/boundary/psi", "profiles_1d/electrons/temperature"], + ids_name=["equilibrium", "core_profiles"]) + + Search without IDS (slow): + update_path("time_slice/boundary/psi") + → Searches all IDS types for matching path + + Note: + Specifying ids_name significantly improves performance by limiting the search scope. + """ + if isinstance(paths, str): + paths = paths.split(" ") + if isinstance(ids_name, str): + ids_name = ids_name.split(" ") + + version_list = ( + imas.dd_zip.dd_xml_versions()[::-1] + if source_dd_version is None + else [source_dd_version] + ) + new_paths = len(paths) * [None] + + for v in version_list: + try: + if ids_name is None: + for ids in imas.IDSFactory(v).ids_names(): + new_paths = [ + new_paths[idx] + if new_paths[idx] + else self._convert_path(p, ids, v) + for idx, p in enumerate(paths) + ] + elif len(paths) == len(ids_name): + new_paths = [ + new_paths[idx] + if new_paths[idx] + else self._convert_path(p, i, v) + for idx, (p, i) in enumerate(zip(paths, ids_name, strict=True)) + ] + elif len(ids_name) == 1: + new_paths = [ + new_paths[idx] + if new_paths[idx] + else self._convert_path(p, ids_name[0], v) + for idx, p in enumerate(paths) + ] + else: + raise ValueError( + "ids_name length must be 1 or equal to paths length" + ) + except UnknownDDVersion as e: + logger.info(e) + continue + + return [ + n if n else "PATH not found in any IMAS version" + for i, n in enumerate(new_paths) + ] + + def _convert_path(self, path: str, ids: str, version: str) -> str | None: + """Helper to prefix path with IDS name if provided.""" + version_map, _ = imas.ids_convert.dd_version_map_from_factories( + ids, imas.IDSFactory(dd_version), imas.IDSFactory(version) + ) + new_path = version_map.old_to_new.path.get(path) + return new_path From 438cec52aa47071080ef6be5468d6861e0687bf3 Mon Sep 17 00:00:00 2001 From: RaphFrey Date: Mon, 24 Nov 2025 15:29:37 +0100 Subject: [PATCH 2/9] Rename update_path method to update_imas_path for clarity in Tools class --- imas_mcp/tools/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imas_mcp/tools/__init__.py b/imas_mcp/tools/__init__.py index 6984750..bfcb14c 100644 --- a/imas_mcp/tools/__init__.py +++ b/imas_mcp/tools/__init__.py @@ -132,9 +132,9 @@ async def export_physics_domain(self, *args, **kwargs): """Delegate to export tool.""" return await self.export_tool.export_physics_domain(*args, **kwargs) - async def update_path(self, *args, **kwargs): + async def update_imas_path(self, *args, **kwargs): """Delegate to path tool.""" - return await self.path_tool.update_path(*args, **kwargs) + return await self.path_tool.update_imas_path(*args, **kwargs) # Documentation search delegation methods async def search_docs(self, *args, **kwargs): From 63ffeeb921c0892f3f36aacfeaf23a81859e8357 Mon Sep 17 00:00:00 2001 From: RaphFrey Date: Tue, 25 Nov 2025 12:14:42 +0100 Subject: [PATCH 3/9] Refactor update_imas_path method in PathTool for improved clarity and functionality; add tests for path updates and include imas-python dependency in project configuration. --- imas_mcp/tools/path_tool.py | 80 ++++++++++++------------------------- pyproject.toml | 1 + tests/test_path_tool.py | 68 ++++++++++++++++++++++++++++++- uv.lock | 45 ++++++++++++++++++++- 4 files changed, 137 insertions(+), 57 deletions(-) diff --git a/imas_mcp/tools/path_tool.py b/imas_mcp/tools/path_tool.py index 7af8686..61b6a76 100644 --- a/imas_mcp/tools/path_tool.py +++ b/imas_mcp/tools/path_tool.py @@ -313,57 +313,23 @@ async def fetch_imas_paths( async def update_imas_path( self, paths: str | list[str], - ids_name: str | list[str] | None = None, + ids_name: str | None = None, source_dd_version: str | None = None, ctx: Context | None = None, ) -> list[str]: """ - Find updated IMAS path versions across different data dictionary versions. - - Searches for equivalent paths in newer (or specified) DD versions, useful for - migration and version compatibility analysis. - + update the IMAS path to the served dd_version. Args: - paths: One or more IMAS paths to convert. Accepts either: + paths: One or more IMAS paths to update. Accepts either: - Space-delimited string: "time_slice/boundary/psi time_slice/boundary/psi_norm" - List of paths: ["time_slice/boundary/psi", "profiles_1d/electrons/temperature"] - ids_name: Optional IDS name(s) to use with the paths. Can be: - - Single IDS name (string): Applied to all paths - - List of IDS names: One for each path (must match length of paths) - - Space-delimited string: Multiple IDS names corresponding to paths - - None: Searches across all IDS types (slower) - version: Optional target DD version to convert to. If None, searches all versions - from newest to oldest to find the first match. - ctx: FastMCP context for potential future enhancements - - Returns: - List of converted path strings, one for each input path. Paths that cannot be - found in any version return "PATH not found in any IMAS version". - - Examples: - Single path with IDS: - update_path("time_slice/boundary/psi", ids_name="equilibrium") - → ["time_slice/boundary/psi"] or updated path if changed - - Multiple paths with single IDS: - update_path("time_slice/boundary/psi time_slice/boundary/psi_norm", ids_name="equilibrium") - → List of converted paths - - Multiple paths with multiple IDS: - update_path(["time_slice/boundary/psi", "profiles_1d/electrons/temperature"], - ids_name=["equilibrium", "core_profiles"]) - - Search without IDS (slow): - update_path("time_slice/boundary/psi") - → Searches all IDS types for matching path - - Note: - Specifying ids_name significantly improves performance by limiting the search scope. + ids_name: Optional IDS name(s) corresponding to the paths. + Can be a single IDS name applied to all paths or a list matching the length of paths. + source_dd_version: Optional source DD version to start the search from. + If not provided, searches from the latest version downwards. """ if isinstance(paths, str): paths = paths.split(" ") - if isinstance(ids_name, str): - ids_name = ids_name.split(" ") version_list = ( imas.dd_zip.dd_xml_versions()[::-1] @@ -372,6 +338,8 @@ async def update_imas_path( ) new_paths = len(paths) * [None] + logger.info(f"to version {dd_version}") + for v in version_list: try: if ids_name is None: @@ -382,27 +350,19 @@ async def update_imas_path( else self._convert_path(p, ids, v) for idx, p in enumerate(paths) ] - elif len(paths) == len(ids_name): - new_paths = [ - new_paths[idx] - if new_paths[idx] - else self._convert_path(p, i, v) - for idx, (p, i) in enumerate(zip(paths, ids_name, strict=True)) - ] - elif len(ids_name) == 1: + else: new_paths = [ new_paths[idx] if new_paths[idx] - else self._convert_path(p, ids_name[0], v) + else self._convert_path(p, ids_name, v) for idx, p in enumerate(paths) ] - else: - raise ValueError( - "ids_name length must be 1 or equal to paths length" - ) except UnknownDDVersion as e: logger.info(e) continue + except ValueError as e: + logger.info(e) + continue return [ n if n else "PATH not found in any IMAS version" @@ -411,8 +371,18 @@ async def update_imas_path( def _convert_path(self, path: str, ids: str, version: str) -> str | None: """Helper to prefix path with IDS name if provided.""" + if imas.dd_zip.parse_dd_version(version) > imas.dd_zip.parse_dd_version( + dd_version + ): + raise ValueError( + f"Source DD version {version} is older than served version {dd_version}" + ) + elif imas.dd_zip.parse_dd_version(version) == imas.dd_zip.parse_dd_version( + dd_version + ): + return path version_map, _ = imas.ids_convert.dd_version_map_from_factories( - ids, imas.IDSFactory(dd_version), imas.IDSFactory(version) + ids, imas.IDSFactory(version), imas.IDSFactory(dd_version) ) new_path = version_map.old_to_new.path.get(path) return new_path diff --git a/pyproject.toml b/pyproject.toml index 815fc03..896ec44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ dependencies = [ "anyio>=4.0.0,<5.0.0", "scikit-learn>=1.7.2", "psutil>=6.1.1", + "imas-python>=2.0.1", ] [project.optional-dependencies] diff --git a/tests/test_path_tool.py b/tests/test_path_tool.py index 0eb1e88..2cc69d2 100644 --- a/tests/test_path_tool.py +++ b/tests/test_path_tool.py @@ -6,13 +6,79 @@ from imas_mcp.tools import PathTool +def _full_path(ids: str, path: str) -> str: + return f"{ids}/{path}" + + @pytest.fixture -def path_tool(): +def path_tool() -> PathTool: """Create a PathTool instance for testing.""" doc_store = DocumentStore() return PathTool(doc_store) +@pytest.fixture +def outdated_paths_with_ids() -> tuple[str, list[str]]: + return "equilibrium", [ + "time_slice/constraints/bpol_probe", + "time_slice/constraints/bpol_probe", + ] + + +@pytest.fixture +def new_paths_with_ids() -> tuple[str, list[str]]: + return "equilibrium", [ + "time_slice/constraints/b_field_pol_probe", + "time_slice/constraints/b_field_pol_probe", + ] + + +@pytest.mark.asyncio +async def test_update_imas_path_tool_single( + path_tool: PathTool, + outdated_paths_with_ids: tuple[str, list[str]], + new_paths_with_ids: tuple[str, list[str]], +): + """Basic test to ensure PathTool is instantiated correctly.""" + _, outdated = outdated_paths_with_ids + ids, new = new_paths_with_ids + + for o, n in zip(outdated, new, strict=False): + result = path_tool._convert_path(o, ids=ids, version="3.39.0") + assert result == n + + +@pytest.mark.asyncio +async def test_update_imas_path_tool_list( + path_tool: PathTool, + outdated_paths_with_ids: tuple[str, list[str]], + new_paths_with_ids: tuple[str, list[str]], +): + _, outdated = outdated_paths_with_ids + ids, new = new_paths_with_ids + + result = await path_tool.update_imas_path( + outdated, ids_name=ids, source_dd_version="3.39.0" + ) + assert result == new + + +@pytest.mark.asyncio +async def test_update_imas_path_tool_spaced_list( + path_tool: PathTool, + outdated_paths_with_ids: tuple[str, list[str]], + new_paths_with_ids: tuple[str, list[str]], +): + """Test updating IMAS paths provided as space-separated string.""" + _, outdated = outdated_paths_with_ids + ids, new = new_paths_with_ids + + result = await path_tool.update_imas_path( + " ".join(outdated), ids_name=ids, source_dd_version="3.39.0" + ) + assert result == new + + @pytest.mark.asyncio async def test_single_valid_path(path_tool): """Test validation of a single existing path.""" diff --git a/uv.lock b/uv.lock index cd25334..ec5b509 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" [[package]] @@ -756,6 +756,7 @@ dependencies = [ { name = "fastmcp" }, { name = "huggingface-hub", extra = ["hf-xet"] }, { name = "imas-data-dictionaries" }, + { name = "imas-python" }, { name = "nest-asyncio" }, { name = "networkx" }, { name = "numpy" }, @@ -816,6 +817,7 @@ requires-dist = [ { name = "fastmcp", specifier = ">=2.12.0" }, { name = "huggingface-hub", extras = ["hf-xet"], specifier = ">=0.33.4" }, { name = "imas-data-dictionaries", specifier = ">=4.1.0" }, + { name = "imas-python", specifier = ">=2.0.1" }, { name = "nest-asyncio", specifier = ">=1.5.0,<2.0.0" }, { name = "networkx", specifier = ">=3.0,<4.0" }, { name = "numpy", specifier = ">=2.3.1" }, @@ -855,6 +857,24 @@ dev = [ { name = "tqdm-stubs", specifier = ">=0.2.1" }, ] +[[package]] +name = "imas-python" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "imas-data-dictionaries" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "rich" }, + { name = "scipy" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/9d/519fcb1fffaed5482bc575cc45cad7753ef89d1c4f65c5ba490768b84f45/imas_python-2.0.1.tar.gz", hash = "sha256:8448e50390a6e7955dca2ee9b821408bbb0c7d6e901740463ca5d5a7cc018a54", size = 2738406, upload-time = "2025-06-18T12:10:34.679Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/a4/1cfcab270c7b35891a239219f8a3b122307ba4677def11540e5437753578/imas_python-2.0.1-py3-none-any.whl", hash = "sha256:27130247d4dd3385c69debeddfd4c2680d9bd16cedc2f81b1a045478e55f86ed", size = 2394289, upload-time = "2025-06-18T12:10:32.872Z" }, +] + [[package]] name = "importlib-metadata" version = "8.7.0" @@ -2645,6 +2665,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/51/5447876806d1088a0f8f71e16542bf350918128d0a69437df26047c8e46f/widgetsnbextension-4.0.14-py3-none-any.whl", hash = "sha256:4875a9eaf72fbf5079dc372a51a9f268fc38d46f767cbf85c43a36da5cb9b575", size = 2196503, upload-time = "2025-04-10T13:01:23.086Z" }, ] +[[package]] +name = "xxhash" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744, upload-time = "2025-10-02T14:34:34.622Z" }, + { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816, upload-time = "2025-10-02T14:34:36.043Z" }, + { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035, upload-time = "2025-10-02T14:34:37.354Z" }, + { url = "https://files.pythonhosted.org/packages/4c/ed/6224ba353690d73af7a3f1c7cdb1fc1b002e38f783cb991ae338e1eb3d79/xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2", size = 212914, upload-time = "2025-10-02T14:34:38.6Z" }, + { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163, upload-time = "2025-10-02T14:34:39.872Z" }, + { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411, upload-time = "2025-10-02T14:34:41.569Z" }, + { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883, upload-time = "2025-10-02T14:34:43.249Z" }, + { url = "https://files.pythonhosted.org/packages/53/5a/ddbb83eee8e28b778eacfc5a85c969673e4023cdeedcfcef61f36731610b/xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9", size = 210392, upload-time = "2025-10-02T14:34:45.042Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c2/ff69efd07c8c074ccdf0a4f36fcdd3d27363665bcdf4ba399abebe643465/xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e", size = 197898, upload-time = "2025-10-02T14:34:46.302Z" }, + { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655, upload-time = "2025-10-02T14:34:47.571Z" }, + { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001, upload-time = "2025-10-02T14:34:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431, upload-time = "2025-10-02T14:34:50.798Z" }, + { url = "https://files.pythonhosted.org/packages/0f/93/14fde614cadb4ddf5e7cebf8918b7e8fac5ae7861c1875964f17e678205c/xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb", size = 30617, upload-time = "2025-10-02T14:34:51.954Z" }, + { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534, upload-time = "2025-10-02T14:34:53.276Z" }, + { url = "https://files.pythonhosted.org/packages/54/85/6ec269b0952ec7e36ba019125982cf11d91256a778c7c3f98a4c5043d283/xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829", size = 27876, upload-time = "2025-10-02T14:34:54.371Z" }, +] + [[package]] name = "yarl" version = "1.22.0" From 6fe60cf54356cc07824adea2b42e32211df8b1a0 Mon Sep 17 00:00:00 2001 From: RaphFrey Date: Tue, 2 Dec 2025 14:37:59 +0100 Subject: [PATCH 4/9] added new caching and ensure current path tool --- imas_mcp/search/decorators/__init__.py | 3 +- imas_mcp/search/decorators/cache.py | 40 +++++++++ imas_mcp/tools/path_tool.py | 111 +++++++++++-------------- 3 files changed, 90 insertions(+), 64 deletions(-) diff --git a/imas_mcp/search/decorators/__init__.py b/imas_mcp/search/decorators/__init__.py index 0c9025c..3eef675 100644 --- a/imas_mcp/search/decorators/__init__.py +++ b/imas_mcp/search/decorators/__init__.py @@ -5,7 +5,7 @@ like caching, validation, sampling, performance monitoring, and error handling. """ -from .cache import cache_results, clear_cache, get_cache_stats +from .cache import cache_results, clear_cache, get_cache_stats, persistent_cache from .error_handling import ( SearchError, ServiceError, @@ -25,6 +25,7 @@ "cache_results", "clear_cache", "get_cache_stats", + "persistent_cache", # Validation decorators "validate_input", "create_validation_schema", diff --git a/imas_mcp/search/decorators/cache.py b/imas_mcp/search/decorators/cache.py index 8f669b0..832d134 100644 --- a/imas_mcp/search/decorators/cache.py +++ b/imas_mcp/search/decorators/cache.py @@ -7,11 +7,15 @@ import functools import hashlib import json +import logging +import os import time from collections import OrderedDict from collections.abc import Callable from typing import Any, TypeVar +from imas_mcp import dd_version + F = TypeVar("F", bound=Callable[..., Any]) @@ -203,3 +207,39 @@ def get_cache_stats() -> dict[str, Any]: "size": _cache.size(), "max_size": _cache.max_size, } + + +def persistent_cache(filename="migration_cache.json"): + def decorator(func): + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + cache = {} + if os.path.exists(filename): + try: + logging.info(f"Loading cache from {filename}") + with open(filename) as f: + cache = json.load(f) + except (json.JSONDecodeError, OSError): + logging.warning( + f"Failed to load cache from {filename}, starting with empty cache." + ) + pass + + if dd_version in cache: + logging.info(f"Using cached migration map for version {dd_version}") + return cache[dd_version] + + result = func(self, *args, **kwargs) + + cache[dd_version] = result + try: + with open(filename, "w") as f: + json.dump(cache, f) + except OSError: + logging.warning(f"Failed to write cache to {filename}") + + return result + + return wrapper + + return decorator diff --git a/imas_mcp/tools/path_tool.py b/imas_mcp/tools/path_tool.py index 61b6a76..379b6ca 100644 --- a/imas_mcp/tools/path_tool.py +++ b/imas_mcp/tools/path_tool.py @@ -9,12 +9,17 @@ import imas from fastmcp import Context -from imas.exception import UnknownDDVersion from imas_mcp import dd_version from imas_mcp.models.constants import SearchMode from imas_mcp.models.result_models import IdsPathResult -from imas_mcp.search.decorators import handle_errors, mcp_tool, measure_performance +from imas_mcp.search.decorators import ( + handle_errors, + mcp_tool, + measure_performance, + persistent_cache, +) +from imas_mcp.search.document_store import DocumentStore from .base import BaseTool @@ -24,6 +29,12 @@ class PathTool(BaseTool): """Tool for IMAS path validation and data retrieval.""" + def __init__(self, document_store: DocumentStore | None = None): + super().__init__(document_store=document_store) + self.supported_versions = imas.dd_zip.dd_xml_versions() + self.path_map = self._build_migration_map() + logging.info(f"Upgrade map built for target version {dd_version}.") + @property def tool_name(self) -> str: """Return the name of this tool.""" @@ -304,19 +315,14 @@ async def fetch_imas_paths( ) return result - @mcp_tool( - "Find a new IMAS path version for a given path. " - "paths: space-delimited string or list of paths. " - "ids_name: optional IDS name(s) - single IDS for all paths or one per path. " - "version: optional target DD version (defaults to searching all versions)" - ) - async def update_imas_path( + @mcp_tool("Ensure paths are updated to the current dd_version.") + async def ensure_current_path( self, paths: str | list[str], ids_name: str | None = None, source_dd_version: str | None = None, ctx: Context | None = None, - ) -> list[str]: + ) -> list[str | None]: """ update the IMAS path to the served dd_version. Args: @@ -325,64 +331,43 @@ async def update_imas_path( - List of paths: ["time_slice/boundary/psi", "profiles_1d/electrons/temperature"] ids_name: Optional IDS name(s) corresponding to the paths. Can be a single IDS name applied to all paths or a list matching the length of paths. - source_dd_version: Optional source DD version to start the search from. - If not provided, searches from the latest version downwards. + Returns: + List of paths existing in the target dd_version, or None if no mapping exists. + """ if isinstance(paths, str): paths = paths.split(" ") - version_list = ( - imas.dd_zip.dd_xml_versions()[::-1] - if source_dd_version is None - else [source_dd_version] - ) - new_paths = len(paths) * [None] - - logger.info(f"to version {dd_version}") - - for v in version_list: - try: - if ids_name is None: - for ids in imas.IDSFactory(v).ids_names(): - new_paths = [ - new_paths[idx] - if new_paths[idx] - else self._convert_path(p, ids, v) - for idx, p in enumerate(paths) - ] - else: - new_paths = [ - new_paths[idx] - if new_paths[idx] - else self._convert_path(p, ids_name, v) - for idx, p in enumerate(paths) - ] - except UnknownDDVersion as e: - logger.info(e) - continue - except ValueError as e: - logger.info(e) + result = [] + for path in paths: + full_path = f"{ids_name}/{path}" if ids_name else path + document = self.document_store.get_document(full_path) + if not document: + result.append(self.path_map.get(full_path if ids_name else path, None)) continue + result.append(path) + return result - return [ - n if n else "PATH not found in any IMAS version" - for i, n in enumerate(new_paths) + @persistent_cache("../resources/imas_path_migration_cache.json") + def _build_migration_map(self) -> dict[str, str | None]: + """Build old_path -> new_path mapping for all IDS from all older versions to target.""" + version_factories: list[imas.IDSFactory] = [ + imas.IDSFactory(v) for v in self.supported_versions if v < dd_version ] + target_factory = imas.IDSFactory(dd_version) + path_map: dict[str, str | None] = {} - def _convert_path(self, path: str, ids: str, version: str) -> str | None: - """Helper to prefix path with IDS name if provided.""" - if imas.dd_zip.parse_dd_version(version) > imas.dd_zip.parse_dd_version( - dd_version - ): - raise ValueError( - f"Source DD version {version} is older than served version {dd_version}" - ) - elif imas.dd_zip.parse_dd_version(version) == imas.dd_zip.parse_dd_version( - dd_version - ): - return path - version_map, _ = imas.ids_convert.dd_version_map_from_factories( - ids, imas.IDSFactory(version), imas.IDSFactory(dd_version) - ) - new_path = version_map.old_to_new.path.get(path) - return new_path + for old_factory in version_factories: + for ids in old_factory.ids_names(): + if ids not in target_factory.ids_names(): + continue # IDS may not exist in new version -> cannot produce mapping (circumvent error handling for speedup) + + version_map, _ = imas.ids_convert.dd_version_map_from_factories( + ids, old_factory, target_factory + ) + + for old_path, new_path in version_map.old_to_new.path.items(): + full_old = f"{ids}/{old_path}" + full_new = f"{ids}/{new_path}" if new_path else None + path_map[full_old] = full_new + return path_map From 1165c4c47315a4f44d548fb29c3c822ef655f706 Mon Sep 17 00:00:00 2001 From: RaphFrey Date: Tue, 2 Dec 2025 15:42:32 +0100 Subject: [PATCH 5/9] Fixed tests --- imas_mcp/tools/path_tool.py | 4 ++-- tests/test_path_tool.py | 21 ++++++++++----------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/imas_mcp/tools/path_tool.py b/imas_mcp/tools/path_tool.py index 379b6ca..b0435b8 100644 --- a/imas_mcp/tools/path_tool.py +++ b/imas_mcp/tools/path_tool.py @@ -338,7 +338,7 @@ async def ensure_current_path( if isinstance(paths, str): paths = paths.split(" ") - result = [] + result: list[str | None] = [] for path in paths: full_path = f"{ids_name}/{path}" if ids_name else path document = self.document_store.get_document(full_path) @@ -348,7 +348,7 @@ async def ensure_current_path( result.append(path) return result - @persistent_cache("../resources/imas_path_migration_cache.json") + @persistent_cache("./imas_mcp/resources/imas_path_migration_cache.json") def _build_migration_map(self) -> dict[str, str | None]: """Build old_path -> new_path mapping for all IDS from all older versions to target.""" version_factories: list[imas.IDSFactory] = [ diff --git a/tests/test_path_tool.py b/tests/test_path_tool.py index 2cc69d2..88c86ed 100644 --- a/tests/test_path_tool.py +++ b/tests/test_path_tool.py @@ -34,7 +34,7 @@ def new_paths_with_ids() -> tuple[str, list[str]]: @pytest.mark.asyncio -async def test_update_imas_path_tool_single( +async def test_ensure_current_path_single( path_tool: PathTool, outdated_paths_with_ids: tuple[str, list[str]], new_paths_with_ids: tuple[str, list[str]], @@ -42,10 +42,11 @@ async def test_update_imas_path_tool_single( """Basic test to ensure PathTool is instantiated correctly.""" _, outdated = outdated_paths_with_ids ids, new = new_paths_with_ids + new = [f"{ids}/{n}" for n in new] for o, n in zip(outdated, new, strict=False): - result = path_tool._convert_path(o, ids=ids, version="3.39.0") - assert result == n + result = await path_tool.ensure_current_path(o, ids) + assert result == [n] @pytest.mark.asyncio @@ -54,12 +55,11 @@ async def test_update_imas_path_tool_list( outdated_paths_with_ids: tuple[str, list[str]], new_paths_with_ids: tuple[str, list[str]], ): - _, outdated = outdated_paths_with_ids + ids, outdated = outdated_paths_with_ids ids, new = new_paths_with_ids + new = [f"{ids}/{n}" for n in new] - result = await path_tool.update_imas_path( - outdated, ids_name=ids, source_dd_version="3.39.0" - ) + result = await path_tool.ensure_current_path(outdated, ids_name=ids) assert result == new @@ -70,12 +70,11 @@ async def test_update_imas_path_tool_spaced_list( new_paths_with_ids: tuple[str, list[str]], ): """Test updating IMAS paths provided as space-separated string.""" - _, outdated = outdated_paths_with_ids + ids, outdated = outdated_paths_with_ids ids, new = new_paths_with_ids + new = [f"{ids}/{n}" for n in new] - result = await path_tool.update_imas_path( - " ".join(outdated), ids_name=ids, source_dd_version="3.39.0" - ) + result = await path_tool.ensure_current_path(" ".join(outdated), ids_name=ids) assert result == new From e96a00bd5ac1c61c4e0ac2798301b54f68c9897a Mon Sep 17 00:00:00 2001 From: Simon McIntosh Date: Fri, 5 Dec 2025 10:47:44 +0100 Subject: [PATCH 6/9] feat: centralize embedding model configuration and allow environment variable overrides --- Dockerfile | 7 +- README.md | 12 ++- env.example | 10 ++- imas_mcp/embeddings/config.py | 6 +- imas_mcp/services/docs_server_manager.py | 3 +- imas_mcp/settings.py | 102 +++++++++++++++++++++++ pyproject.toml | 10 +++ scripts/add_docs.py | 7 +- 8 files changed, 141 insertions(+), 16 deletions(-) create mode 100644 imas_mcp/settings.py diff --git a/Dockerfile b/Dockerfile index 4091cd4..6b4cd52 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,8 +36,7 @@ ENV PYTHONPATH="/app" \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ HATCH_BUILD_NO_HOOKS=true \ - OPENAI_BASE_URL=https://openrouter.ai/api/v1 \ - IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b + OPENAI_BASE_URL=https://openrouter.ai/api/v1 # Labels for image provenance LABEL imas_mcp.git_sha=${GIT_SHA} \ @@ -177,9 +176,7 @@ ENV PYTHONPATH="/app" \ DOCS_SERVER_URL=http://localhost:6280 \ DOCS_MCP_TELEMETRY=false \ DOCS_MCP_STORE_PATH=/app/data \ - OPENAI_BASE_URL=https://openrouter.ai/api/v1 \ - IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b \ - DOCS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b + OPENAI_BASE_URL=https://openrouter.ai/api/v1 # Expose port (only needed for streamable-http transport) EXPOSE 8000 diff --git a/README.md b/README.md index 04a3e78..d6621fc 100644 --- a/README.md +++ b/README.md @@ -97,10 +97,20 @@ The IMAS MCP server supports two modes for generating embeddings: 2. **Local embeddings**: Uses sentence-transformers library - Install with `[transformers]` extra: `pip install imas-mcp[transformers]` - Runs models locally without API calls - - Example model: `all-MiniLM-L6-v2` (default) + - Example model: `all-MiniLM-L6-v2` (fallback default) **Configuration:** +Embedding model defaults are configured in `pyproject.toml` under `[tool.imas-mcp]`: + +```toml +[tool.imas-mcp] +imas-embedding-model = "openai/text-embedding-3-large" # For DD embeddings +docs-embedding-model = "openai/text-embedding-3-small" # For documentation +``` + +Environment variables override pyproject.toml settings: + ```bash # API-based (requires API key) export OPENAI_API_KEY="your-api-key" diff --git a/env.example b/env.example index b537a55..5002155 100644 --- a/env.example +++ b/env.example @@ -2,10 +2,14 @@ OPENAI_API_KEY=your_openrouter_api_key_here OPENAI_BASE_URL=https://openrouter.ai/api/v1 -# IMAS DD Embedding Model -IMAS_MCP_EMBEDDING_MODEL=qwen/qwen3-embedding-4b +# IMAS DD Embedding Model (overrides pyproject.toml [tool.imas-mcp] default) +# Default: openai/text-embedding-3-large (from pyproject.toml) +# IMAS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-large + +# Docs Embedding Model (overrides pyproject.toml [tool.imas-mcp] default) +# Default: openai/text-embedding-3-small (from pyproject.toml) +# DOCS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-small -DOCS_MCP_EMBEDDING_MODEL=openai/text-embedding-3-small DOCS_MCP_TELEMETRY=false DOCS_MCP_STORE_PATH=./docs-data DOCS_TIMEOUT=10 diff --git a/imas_mcp/embeddings/config.py b/imas_mcp/embeddings/config.py index d927fd2..fea8b7a 100644 --- a/imas_mcp/embeddings/config.py +++ b/imas_mcp/embeddings/config.py @@ -7,11 +7,13 @@ # Load .env file for local development from dotenv import load_dotenv +from imas_mcp.settings import get_imas_embedding_model + load_dotenv() # Load .env file values (does not override existing env vars) -# Define constants -IMAS_MCP_EMBEDDING_MODEL = os.getenv("IMAS_MCP_EMBEDDING_MODEL", "all-MiniLM-L6-v2") +# Define constants - uses pyproject.toml defaults with env var override +IMAS_MCP_EMBEDDING_MODEL = get_imas_embedding_model() @dataclass diff --git a/imas_mcp/services/docs_server_manager.py b/imas_mcp/services/docs_server_manager.py index ecb7b31..802caae 100644 --- a/imas_mcp/services/docs_server_manager.py +++ b/imas_mcp/services/docs_server_manager.py @@ -27,6 +27,7 @@ from dotenv import load_dotenv from imas_mcp.exceptions import DocsServerError +from imas_mcp.settings import get_docs_embedding_model # Load environment variables from .env file load_dotenv() @@ -386,7 +387,7 @@ async def _start_docs_server_process(self) -> None: env.update( { "DOCS_MCP_EMBEDDING_MODEL": env.get( - "DOCS_MCP_EMBEDDING_MODEL", "openai/text-embedding-3-small" + "DOCS_MCP_EMBEDDING_MODEL", get_docs_embedding_model() ), "DOCS_MCP_TELEMETRY": env.get("DOCS_MCP_TELEMETRY", "false"), "DOCS_MCP_STORE_PATH": str(self.store_path), diff --git a/imas_mcp/settings.py b/imas_mcp/settings.py new file mode 100644 index 0000000..38d7466 --- /dev/null +++ b/imas_mcp/settings.py @@ -0,0 +1,102 @@ +"""Project settings loaded from pyproject.toml [tool.imas-mcp] section. + +This module provides centralized access to project configuration defaults, +with environment variable overrides for runtime flexibility. +""" + +import importlib.resources +import os +from functools import cache + +try: + import tomllib +except ImportError: + import tomli as tomllib # type: ignore[import-not-found] + + +@cache +def _load_pyproject_settings() -> dict[str, str]: + """Load settings from pyproject.toml [tool.imas-mcp] section. + + Returns: + Dictionary of settings from pyproject.toml, empty dict if not found. + """ + try: + # Try package resources first (installed package) + files = importlib.resources.files("imas_mcp") + pyproject_path = files.joinpath("..", "pyproject.toml") + + # If package resource doesn't exist, try filesystem + if not pyproject_path.is_file(): # type: ignore[union-attr] + from pathlib import Path + + # Walk up to find pyproject.toml (for development) + current = Path(__file__).resolve().parent + while current != current.parent: + candidate = current / "pyproject.toml" + if candidate.exists(): + pyproject_path = candidate + break + current = current.parent + else: + return {} + + # Read and parse the TOML file + if hasattr(pyproject_path, "read_text"): + content = pyproject_path.read_text() # type: ignore[union-attr] + else: + from pathlib import Path + + content = Path(pyproject_path).read_text() # type: ignore[arg-type] + + data = tomllib.loads(content) + return data.get("tool", {}).get("imas-mcp", {}) + except Exception: + return {} + + +def get_imas_embedding_model() -> str: + """Get the IMAS DD embedding model name. + + Priority: + 1. IMAS_MCP_EMBEDDING_MODEL environment variable + 2. pyproject.toml [tool.imas-mcp] imas-embedding-model + 3. Fallback default: all-MiniLM-L6-v2 (local model) + + Returns: + Model name string. + """ + if env_model := os.getenv("IMAS_MCP_EMBEDDING_MODEL"): + return env_model + + settings = _load_pyproject_settings() + if model := settings.get("imas-embedding-model"): + return model + + return "all-MiniLM-L6-v2" + + +def get_docs_embedding_model() -> str: + """Get the docs server embedding model name. + + Priority: + 1. DOCS_MCP_EMBEDDING_MODEL environment variable + 2. pyproject.toml [tool.imas-mcp] docs-embedding-model + 3. Fallback default: openai/text-embedding-3-small + + Returns: + Model name string. + """ + if env_model := os.getenv("DOCS_MCP_EMBEDDING_MODEL"): + return env_model + + settings = _load_pyproject_settings() + if model := settings.get("docs-embedding-model"): + return model + + return "openai/text-embedding-3-small" + + +# Computed defaults (for use in module-level constants) +IMAS_MCP_EMBEDDING_MODEL = get_imas_embedding_model() +DOCS_MCP_EMBEDDING_MODEL = get_docs_embedding_model() diff --git a/pyproject.toml b/pyproject.toml index b1003d6..6e73e14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,6 +127,16 @@ verbose = true ids-filter = "" imas-dd-version = "" +# Embedding model configuration +# These defaults are used during build and at runtime (can be overridden via env vars) +[tool.imas-mcp] +# Model for IMAS Data Dictionary embeddings (used for semantic search of DD paths) +# text-embedding-3-large: Higher quality, 3072 dimensions, better for the complex DD structure +imas-embedding-model = "openai/text-embedding-3-large" +# Model for documentation embeddings (used by docs-mcp-server) +# text-embedding-3-small: Faster, 1536 dimensions, suitable for general documentation +docs-embedding-model = "openai/text-embedding-3-small" + [tool.hatch.envs.test] dependencies = [ "pytest>=8.3.5,<9.0.0", diff --git a/scripts/add_docs.py b/scripts/add_docs.py index 71d4c8a..4bcd323 100644 --- a/scripts/add_docs.py +++ b/scripts/add_docs.py @@ -17,6 +17,7 @@ build_docs_server_command, get_npx_executable, ) +from imas_mcp.settings import get_docs_embedding_model # Load environment variables from .env file load_dotenv(override=True) @@ -38,10 +39,8 @@ ) @click.option( "--model", - default=lambda: os.getenv( - "DOCS_MCP_EMBEDDING_MODEL", "openai/text-embedding-3-small" - ), - help="Embedding model to use (defaults to DOCS_MCP_EMBEDDING_MODEL env var or openai/text-embedding-3-small)", + default=get_docs_embedding_model, + help="Embedding model to use (defaults to DOCS_MCP_EMBEDDING_MODEL env var or pyproject.toml setting)", ) @click.option( "--ignore-errors/--no-ignore-errors", From dff5ebbd3608b077865e2214a39847e3916381b4 Mon Sep 17 00:00:00 2001 From: Simon McIntosh Date: Fri, 5 Dec 2025 10:55:27 +0100 Subject: [PATCH 7/9] enable subagents --- .vscode/settings.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 297f184..b1d9e1c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -62,5 +62,6 @@ } }, "chat.mcp.access": "all", - "cursorpyright.analysis.typeCheckingMode": "basic" + "cursorpyright.analysis.typeCheckingMode": "basic", + "chat.customAgentInSubagent.enabled": true } From cf0d8e0598938198c386a729672f60e9e328eaed Mon Sep 17 00:00:00 2001 From: Simon McIntosh Date: Fri, 5 Dec 2025 11:06:21 +0100 Subject: [PATCH 8/9] add commit prompt --- .github/prompts/commit.prompt.md | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/prompts/commit.prompt.md diff --git a/.github/prompts/commit.prompt.md b/.github/prompts/commit.prompt.md new file mode 100644 index 0000000..e3be0bd --- /dev/null +++ b/.github/prompts/commit.prompt.md @@ -0,0 +1,33 @@ +# Commit Workflow + +Follow this workflow when committing changes: + +1. **Identify changed files** - Determine all files edited in the current session +2. **Lint and format** - Run `uv run ruff check --fix` and `uv run ruff format` on changed files +3. **Stage selectively** - Use `git add ` for specific files, never `git add -A` +4. **Commit with conventional format**: + - Types: `feat`, `fix`, `docs`, `refactor`, `test`, `chore` + - Subject: imperative mood, lowercase, no period + - Body: explain WHY, not just what changed +5. **Fix pre-commit errors** - Iterate until the commit is clean +6. **Push** - Push to remote + +## Conventional Commit Format + +``` +: + + +``` + +### Example + +``` +feat: add semantic search for physics domains + +Enables users to search across IDS entries using natural language +queries. Uses sentence-transformers for embedding generation and +FAISS for similarity search. + +Closes #42 +``` From b694f348edcc6663bab9f744ce9650b8b1ca312b Mon Sep 17 00:00:00 2001 From: Simon McIntosh Date: Fri, 5 Dec 2025 12:40:11 +0100 Subject: [PATCH 9/9] refactor(migrations): replace runtime caching with build-time path migration map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the runtime @persistent_cache decorator with build-time resource generation for path migrations. This aligns with the existing hatch build hook pattern used for schemas and other resources. Changes: - Add imas-python>=2.0.1 to build dependencies for dd_version_map_from_factories - Create scripts/build_migrations.py for CLI and build-time generation - Create imas_mcp/migrations/ module with PathMigrationMap, MigrationEntry, RenameHistoryEntry classes and get_migration_map() singleton - Integrate migration map generation into hatch_build_hooks.py - Add migrations_dir property to ResourcePathAccessor - Enhance check_imas_paths to return migration suggestions for deprecated paths and renamed_from history for current paths - Remove @persistent_cache decorator and runtime migration map building - Remove update_imas_path and ensure_current_path methods (functionality merged into enhanced check_imas_paths) - Update tests with MockPathMigrationMap for isolated testing The migration map JSON includes: - old_to_new: deprecated path → {new_path, deprecated_in, last_valid_version} - new_to_old: current path → list of {old_path, deprecated_in} - metadata: target_version, source_versions, generated_at, total_migrations --- hatch_build_hooks.py | 29 ++- imas_mcp/migrations/__init__.py | 174 +++++++++++++++ imas_mcp/resource_path_accessor.py | 5 + imas_mcp/search/decorators/__init__.py | 3 +- imas_mcp/search/decorators/cache.py | 40 ---- imas_mcp/tools/__init__.py | 4 - imas_mcp/tools/path_tool.py | 158 ++++++-------- pyproject.toml | 2 + scripts/build_migrations.py | 288 +++++++++++++++++++++++++ tests/test_path_tool.py | 188 +++++++++++----- 10 files changed, 702 insertions(+), 189 deletions(-) create mode 100644 imas_mcp/migrations/__init__.py create mode 100644 scripts/build_migrations.py diff --git a/hatch_build_hooks.py b/hatch_build_hooks.py index e0f9981..fa4dfb7 100644 --- a/hatch_build_hooks.py +++ b/hatch_build_hooks.py @@ -33,7 +33,7 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None: try: from imas_mcp.core.xml_parser import DataDictionaryTransformer - # from imas_mcp.structure.mermaid_generator import MermaidGraphGenerator + from scripts.build_migrations import build_migration_map finally: # Restore original sys.path @@ -76,3 +76,30 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None: dd_accessor=dd_accessor, ids_set=ids_set, use_rich=True ) json_transformer.build() + + # Build path migration map for version upgrades + # This enables migration suggestions for deprecated paths + resolved_dd_version = dd_version or str(dd_accessor.get_version()) + print(f"Building path migration map for version: {resolved_dd_version}") + + from imas_mcp.resource_path_accessor import ResourcePathAccessor + + path_accessor = ResourcePathAccessor(dd_version=resolved_dd_version) + migrations_dir = path_accessor.migrations_dir + migration_file = migrations_dir / "path_migrations.json" + + import json + + migration_data = build_migration_map( + target_version=resolved_dd_version, + ids_filter=ids_set, + verbose=True, + ) + + with open(migration_file, "w") as f: + json.dump(migration_data, f, indent=2) + + print( + f"Built migration map with " + f"{migration_data['metadata']['total_migrations']} migrations" + ) diff --git a/imas_mcp/migrations/__init__.py b/imas_mcp/migrations/__init__.py new file mode 100644 index 0000000..4a5f737 --- /dev/null +++ b/imas_mcp/migrations/__init__.py @@ -0,0 +1,174 @@ +""" +Path migration utilities for IMAS Data Dictionary version upgrades. + +This module provides access to the build-time generated migration map, +enabling path migration suggestions and rename history lookups. +""" + +import json +import logging +from dataclasses import dataclass +from functools import lru_cache + +from imas_mcp import dd_version +from imas_mcp.resource_path_accessor import ResourcePathAccessor + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class MigrationEntry: + """Information about a path migration from old to new version.""" + + new_path: str | None + deprecated_in: str + last_valid_version: str + + +@dataclass(frozen=True) +class RenameHistoryEntry: + """Information about a path that was renamed to the current path.""" + + old_path: str + deprecated_in: str + + +class PathMigrationMap: + """ + Provides access to path migration data for version upgrades. + + Loads the build-time generated migration map and provides lookup methods + for both forward (old→new) and reverse (new→old) path mappings. + """ + + def __init__( + self, + dd_version: str = dd_version, + migration_data: dict | None = None, + ): + """ + Initialize the migration map. + + Args: + dd_version: The DD version to load migrations for. + migration_data: Optional pre-loaded migration data (for testing). + """ + self._dd_version = dd_version + self._data: dict | None = migration_data + self._loaded = migration_data is not None + + def _ensure_loaded(self) -> None: + """Load migration data from disk if not already loaded.""" + if self._loaded: + return + + path_accessor = ResourcePathAccessor(dd_version=self._dd_version) + migration_file = path_accessor.migrations_dir / "path_migrations.json" + + if not migration_file.exists(): + logger.warning( + f"Migration file not found: {migration_file}. " + "Run 'build-migrations' to generate it." + ) + self._data = {"old_to_new": {}, "new_to_old": {}, "metadata": {}} + self._loaded = True + return + + try: + with open(migration_file) as f: + self._data = json.load(f) + logger.debug( + f"Loaded migration map with " + f"{len(self._data.get('old_to_new', {}))} migrations" + ) + except (json.JSONDecodeError, OSError) as e: + logger.error(f"Failed to load migration file: {e}") + self._data = {"old_to_new": {}, "new_to_old": {}, "metadata": {}} + + self._loaded = True + + def get_migration(self, old_path: str) -> MigrationEntry | None: + """ + Get migration info for an old path. + + Args: + old_path: The old path to look up (e.g., "equilibrium/time_slice/..."). + + Returns: + MigrationEntry with new_path, deprecated_in, and last_valid_version, + or None if no migration exists. + """ + self._ensure_loaded() + + if self._data is None: + return None + + entry = self._data.get("old_to_new", {}).get(old_path) + if entry is None: + return None + + return MigrationEntry( + new_path=entry.get("new_path"), + deprecated_in=entry.get("deprecated_in", ""), + last_valid_version=entry.get("last_valid_version", ""), + ) + + def get_rename_history(self, new_path: str) -> list[RenameHistoryEntry]: + """ + Get rename history for a current path. + + Args: + new_path: The current path to look up. + + Returns: + List of RenameHistoryEntry objects for paths that were renamed + to this path, or empty list if no history. + """ + self._ensure_loaded() + + if self._data is None: + return [] + + entries = self._data.get("new_to_old", {}).get(new_path, []) + return [ + RenameHistoryEntry( + old_path=entry.get("old_path", ""), + deprecated_in=entry.get("deprecated_in", ""), + ) + for entry in entries + ] + + @property + def metadata(self) -> dict: + """Get migration map metadata.""" + self._ensure_loaded() + return self._data.get("metadata", {}) if self._data else {} + + @property + def total_migrations(self) -> int: + """Get total number of migrations in the map.""" + return self.metadata.get("total_migrations", 0) + + @property + def target_version(self) -> str: + """Get the target DD version for migrations.""" + return self.metadata.get("target_version", "") + + +@lru_cache(maxsize=1) +def get_migration_map() -> PathMigrationMap: + """ + Get the singleton PathMigrationMap instance. + + Returns: + PathMigrationMap for the current DD version. + """ + return PathMigrationMap() + + +__all__ = [ + "MigrationEntry", + "RenameHistoryEntry", + "PathMigrationMap", + "get_migration_map", +] diff --git a/imas_mcp/resource_path_accessor.py b/imas_mcp/resource_path_accessor.py index 19bd20f..91942c0 100644 --- a/imas_mcp/resource_path_accessor.py +++ b/imas_mcp/resource_path_accessor.py @@ -144,6 +144,11 @@ def mermaid_dir(self) -> Path: """Get the mermaid graphs directory (imas_data_dictionary/{version}/mermaid/).""" return self._get_subdir_path("mermaid", create=True) + @cached_property + def migrations_dir(self) -> Path: + """Get the migrations directory (imas_data_dictionary/{version}/migrations/).""" + return self._get_subdir_path("migrations", create=True) + @property def version(self) -> str: """Get the DD version string.""" diff --git a/imas_mcp/search/decorators/__init__.py b/imas_mcp/search/decorators/__init__.py index 3eef675..0c9025c 100644 --- a/imas_mcp/search/decorators/__init__.py +++ b/imas_mcp/search/decorators/__init__.py @@ -5,7 +5,7 @@ like caching, validation, sampling, performance monitoring, and error handling. """ -from .cache import cache_results, clear_cache, get_cache_stats, persistent_cache +from .cache import cache_results, clear_cache, get_cache_stats from .error_handling import ( SearchError, ServiceError, @@ -25,7 +25,6 @@ "cache_results", "clear_cache", "get_cache_stats", - "persistent_cache", # Validation decorators "validate_input", "create_validation_schema", diff --git a/imas_mcp/search/decorators/cache.py b/imas_mcp/search/decorators/cache.py index 832d134..8f669b0 100644 --- a/imas_mcp/search/decorators/cache.py +++ b/imas_mcp/search/decorators/cache.py @@ -7,15 +7,11 @@ import functools import hashlib import json -import logging -import os import time from collections import OrderedDict from collections.abc import Callable from typing import Any, TypeVar -from imas_mcp import dd_version - F = TypeVar("F", bound=Callable[..., Any]) @@ -207,39 +203,3 @@ def get_cache_stats() -> dict[str, Any]: "size": _cache.size(), "max_size": _cache.max_size, } - - -def persistent_cache(filename="migration_cache.json"): - def decorator(func): - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - cache = {} - if os.path.exists(filename): - try: - logging.info(f"Loading cache from {filename}") - with open(filename) as f: - cache = json.load(f) - except (json.JSONDecodeError, OSError): - logging.warning( - f"Failed to load cache from {filename}, starting with empty cache." - ) - pass - - if dd_version in cache: - logging.info(f"Using cached migration map for version {dd_version}") - return cache[dd_version] - - result = func(self, *args, **kwargs) - - cache[dd_version] = result - try: - with open(filename, "w") as f: - json.dump(cache, f) - except OSError: - logging.warning(f"Failed to write cache to {filename}") - - return result - - return wrapper - - return decorator diff --git a/imas_mcp/tools/__init__.py b/imas_mcp/tools/__init__.py index bfcb14c..651c61d 100644 --- a/imas_mcp/tools/__init__.py +++ b/imas_mcp/tools/__init__.py @@ -132,10 +132,6 @@ async def export_physics_domain(self, *args, **kwargs): """Delegate to export tool.""" return await self.export_tool.export_physics_domain(*args, **kwargs) - async def update_imas_path(self, *args, **kwargs): - """Delegate to path tool.""" - return await self.path_tool.update_imas_path(*args, **kwargs) - # Documentation search delegation methods async def search_docs(self, *args, **kwargs): """Delegate to docs tool.""" diff --git a/imas_mcp/tools/path_tool.py b/imas_mcp/tools/path_tool.py index b0435b8..c1b967c 100644 --- a/imas_mcp/tools/path_tool.py +++ b/imas_mcp/tools/path_tool.py @@ -1,23 +1,22 @@ """ Path tool implementation. -Provides both fast validation and rich data retrieval for IMAS paths. +Provides both fast validation and rich data retrieval for IMAS paths, +with migration suggestions for deprecated paths and rename history. """ import logging from typing import Any -import imas from fastmcp import Context -from imas_mcp import dd_version +from imas_mcp.migrations import PathMigrationMap, get_migration_map from imas_mcp.models.constants import SearchMode from imas_mcp.models.result_models import IdsPathResult from imas_mcp.search.decorators import ( handle_errors, mcp_tool, measure_performance, - persistent_cache, ) from imas_mcp.search.document_store import DocumentStore @@ -29,11 +28,27 @@ class PathTool(BaseTool): """Tool for IMAS path validation and data retrieval.""" - def __init__(self, document_store: DocumentStore | None = None): + def __init__( + self, + document_store: DocumentStore | None = None, + migration_map: PathMigrationMap | None = None, + ): + """ + Initialize PathTool. + + Args: + document_store: Optional DocumentStore instance. + migration_map: Optional PathMigrationMap for testing. Uses singleton if None. + """ super().__init__(document_store=document_store) - self.supported_versions = imas.dd_zip.dd_xml_versions() - self.path_map = self._build_migration_map() - logging.info(f"Upgrade map built for target version {dd_version}.") + self._migration_map = migration_map + + @property + def migration_map(self) -> PathMigrationMap: + """Get the path migration map (lazy loaded).""" + if self._migration_map is None: + self._migration_map = get_migration_map() + return self._migration_map @property def tool_name(self) -> str: @@ -57,7 +72,8 @@ async def check_imas_paths( Check if one or more exact IMAS paths exist in the data dictionary. Fast validation tool for batch path existence checking without search overhead. - Directly accesses the data dictionary for immediate results. + Directly accesses the data dictionary for immediate results. Returns migration + suggestions for deprecated paths and rename history for current paths. Args: paths: One or more IMAS paths to validate. Accepts either: @@ -77,25 +93,24 @@ async def check_imas_paths( - ids_name: IDS name if path exists - data_type: Data type if available (optional) - units: Physical units if available (optional) + - migration: Migration info if path is deprecated (optional): + - new_path: The current path to use + - deprecated_in: Version where path was deprecated + - last_valid_version: Last DD version where path was valid + - renamed_from: List of old paths that were renamed to this path (optional) - error: Error message if path format is invalid (optional) Examples: - Single path (string): - check_imas_paths("equilibrium/time_slice/boundary/psi") - → {"summary": {"total": 1, "found": 1, "not_found": 0, "invalid": 0}, - "results": [{"path": "equilibrium/time_slice/boundary/psi", "exists": true, "ids_name": "equilibrium"}]} - - Multiple paths with ids prefix (ensemble checking): - check_imas_paths("time_slice/boundary/psi time_slice/boundary/psi_norm time_slice/boundary/type", ids="equilibrium") - → {"summary": {"total": 3, "found": 3, "not_found": 0, "invalid": 0}, - "results": [ - {"path": "equilibrium/time_slice/boundary/psi", "exists": true, "ids_name": "equilibrium"}, - {"path": "equilibrium/time_slice/boundary/psi_norm", "exists": true, "ids_name": "equilibrium"}, - {"path": "equilibrium/time_slice/boundary/type", "exists": true, "ids_name": "equilibrium"} - ]} - - Multiple paths (list): - check_imas_paths(["time_slice/boundary/psi", "time_slice/boundary/psi_norm"], ids="equilibrium") + Path exists (current): + check_imas_paths("equilibrium/time_slice/constraints/b_field_pol_probe") + → {"results": [{"path": "...", "exists": true, "ids_name": "equilibrium", + "renamed_from": [{"old_path": "equilibrium/time_slice/constraints/bpol_probe", "deprecated_in": "4.0.0"}]}]} + + Path deprecated (has migration): + check_imas_paths("equilibrium/time_slice/constraints/bpol_probe") + → {"results": [{"path": "...", "exists": false, + "migration": {"new_path": "equilibrium/time_slice/constraints/b_field_pol_probe", + "deprecated_in": "4.0.0", "last_valid_version": "3.42.0"}}]} Note: This tool is optimized for exact path validation. For discovering paths @@ -105,7 +120,7 @@ async def check_imas_paths( if isinstance(paths, str): paths_list = paths.split() else: - paths_list = paths + paths_list = list(paths) # Initialize counters and results results = [] @@ -143,7 +158,7 @@ async def check_imas_paths( if document and document.metadata: found_count += 1 metadata = document.metadata - result = { + result: dict[str, Any] = { "path": path, "exists": True, "ids_name": metadata.ids_name, @@ -155,16 +170,36 @@ async def check_imas_paths( if metadata.units: result["units"] = metadata.units + # Add rename history if available + rename_history = self.migration_map.get_rename_history(path) + if rename_history: + result["renamed_from"] = [ + { + "old_path": entry.old_path, + "deprecated_in": entry.deprecated_in, + } + for entry in rename_history + ] + results.append(result) logger.debug(f"Path validation: {path} - exists") else: not_found_count += 1 - results.append( - { - "path": path, - "exists": False, + result = { + "path": path, + "exists": False, + } + + # Check for migration suggestion + migration = self.migration_map.get_migration(path) + if migration: + result["migration"] = { + "new_path": migration.new_path, + "deprecated_in": migration.deprecated_in, + "last_valid_version": migration.last_valid_version, } - ) + + results.append(result) logger.debug(f"Path validation: {path} - not found") except Exception as e: @@ -244,7 +279,7 @@ async def fetch_imas_paths( if isinstance(paths, str): paths_list = paths.split() else: - paths_list = paths + paths_list = list(paths) # Initialize tracking nodes = [] @@ -314,60 +349,3 @@ async def fetch_imas_paths( f"Path retrieval completed: {found_count}/{len(paths_list)} retrieved" ) return result - - @mcp_tool("Ensure paths are updated to the current dd_version.") - async def ensure_current_path( - self, - paths: str | list[str], - ids_name: str | None = None, - source_dd_version: str | None = None, - ctx: Context | None = None, - ) -> list[str | None]: - """ - update the IMAS path to the served dd_version. - Args: - paths: One or more IMAS paths to update. Accepts either: - - Space-delimited string: "time_slice/boundary/psi time_slice/boundary/psi_norm" - - List of paths: ["time_slice/boundary/psi", "profiles_1d/electrons/temperature"] - ids_name: Optional IDS name(s) corresponding to the paths. - Can be a single IDS name applied to all paths or a list matching the length of paths. - Returns: - List of paths existing in the target dd_version, or None if no mapping exists. - - """ - if isinstance(paths, str): - paths = paths.split(" ") - - result: list[str | None] = [] - for path in paths: - full_path = f"{ids_name}/{path}" if ids_name else path - document = self.document_store.get_document(full_path) - if not document: - result.append(self.path_map.get(full_path if ids_name else path, None)) - continue - result.append(path) - return result - - @persistent_cache("./imas_mcp/resources/imas_path_migration_cache.json") - def _build_migration_map(self) -> dict[str, str | None]: - """Build old_path -> new_path mapping for all IDS from all older versions to target.""" - version_factories: list[imas.IDSFactory] = [ - imas.IDSFactory(v) for v in self.supported_versions if v < dd_version - ] - target_factory = imas.IDSFactory(dd_version) - path_map: dict[str, str | None] = {} - - for old_factory in version_factories: - for ids in old_factory.ids_names(): - if ids not in target_factory.ids_names(): - continue # IDS may not exist in new version -> cannot produce mapping (circumvent error handling for speedup) - - version_map, _ = imas.ids_convert.dd_version_map_from_factories( - ids, old_factory, target_factory - ) - - for old_path, new_path in version_map.old_to_new.path.items(): - full_old = f"{ids}/{old_path}" - full_new = f"{ids}/{new_path}" if new_path else None - path_map[full_old] = full_new - return path_map diff --git a/pyproject.toml b/pyproject.toml index 0818aef..be4a3d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ requires = [ "PyYAML>=6.0,<7.0", "imas-data-dictionary>=4.1.0", "imas-data-dictionaries>=4.1.0", + "imas-python>=2.0.1", ] build-backend = "hatchling.build" @@ -113,6 +114,7 @@ build-relationships = "scripts.build_relationships:build_relationships" build-embeddings = "scripts.build_embeddings:build_embeddings" build-database = "scripts.build_database:build_database" build-mermaid = "scripts.build_mermaid:build_mermaid" +build-migrations = "scripts.build_migrations:build_migrations" [tool.hatch.version] source = "vcs" diff --git a/scripts/build_migrations.py b/scripts/build_migrations.py new file mode 100644 index 0000000..1844536 --- /dev/null +++ b/scripts/build_migrations.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +""" +Build the path migration map for IMAS Data Dictionary version upgrades. + +This script creates a JSON file mapping old paths to new paths across DD versions, +enabling the MCP server to suggest path migrations for deprecated paths and +provide rename history for current paths. +""" + +import json +import logging +import sys +from collections import defaultdict +from datetime import UTC, datetime +from pathlib import Path + +import click +import imas + +from imas_mcp import dd_version +from imas_mcp.resource_path_accessor import ResourcePathAccessor + + +def build_migration_map( + target_version: str, + ids_filter: set[str] | None = None, + verbose: bool = False, +) -> dict: + """ + Build bidirectional path migration map from all older versions to target. + + Args: + target_version: The DD version to migrate paths to. + ids_filter: Optional set of IDS names to include. + verbose: Enable verbose logging. + + Returns: + Dictionary with metadata, old_to_new, and new_to_old mappings. + """ + logger = logging.getLogger(__name__) + + # Get all available DD versions + all_versions = imas.dd_zip.dd_xml_versions() + source_versions = [v for v in all_versions if v < target_version] + + if verbose: + logger.info(f"Building migration map to {target_version}") + logger.info(f"Source versions: {len(source_versions)} versions") + + target_factory = imas.IDSFactory(target_version) + target_ids_names = set(target_factory.ids_names()) + + # Apply IDS filter if provided + if ids_filter: + target_ids_names = target_ids_names & ids_filter + + # Track migrations with version info + old_to_new: dict[str, dict] = {} + new_to_old: dict[str, list[dict]] = defaultdict(list) + + # Track the earliest version where each path was deprecated + deprecation_versions: dict[str, str] = {} + # Track the latest version where each old path was valid + last_valid_versions: dict[str, str] = {} + + for source_version in sorted(source_versions): + if verbose: + logger.debug(f"Processing version {source_version}") + + source_factory = imas.IDSFactory(source_version) + + for ids_name in source_factory.ids_names(): + if ids_name not in target_ids_names: + continue + + try: + version_map, _ = imas.ids_convert.dd_version_map_from_factories( + ids_name, source_factory, target_factory + ) + except Exception as e: + logger.warning( + f"Failed to get version map for {ids_name} " + f"from {source_version}: {e}" + ) + continue + + for old_path, new_path in version_map.old_to_new.path.items(): + full_old = f"{ids_name}/{old_path}" + full_new = f"{ids_name}/{new_path}" if new_path else None + + # Skip if path unchanged + if full_old == full_new: + continue + + # Track last valid version for this old path + # (the version before it was deprecated) + if full_old not in last_valid_versions: + last_valid_versions[full_old] = source_version + + # Update old_to_new mapping + if full_old not in old_to_new: + old_to_new[full_old] = { + "new_path": full_new, + "deprecated_in": target_version, # Will refine below + "last_valid_version": source_version, + } + + # Track deprecation version (first version where path changed) + if full_old not in deprecation_versions: + deprecation_versions[full_old] = source_version + + # Build reverse mapping (new_to_old) + if full_new: + entry = { + "old_path": full_old, + "deprecated_in": deprecation_versions.get( + full_old, target_version + ), + } + # Avoid duplicates + existing_old_paths = [e["old_path"] for e in new_to_old[full_new]] + if full_old not in existing_old_paths: + new_to_old[full_new].append(entry) + + # Refine deprecation versions based on tracking + for old_path, info in old_to_new.items(): + if old_path in deprecation_versions: + # Find the next version after last_valid + last_valid = last_valid_versions.get(old_path, info["last_valid_version"]) + info["last_valid_version"] = last_valid + + # Deprecated in is the target version (current DD) + # since that's when it's no longer valid + if old_path in deprecation_versions: + info["deprecated_in"] = target_version + + # Build final structure + migration_data = { + "metadata": { + "target_version": target_version, + "source_versions": sorted(source_versions), + "generated_at": datetime.now(UTC).isoformat(), + "total_migrations": len(old_to_new), + "paths_with_history": len(new_to_old), + }, + "old_to_new": old_to_new, + "new_to_old": dict(new_to_old), + } + + return migration_data + + +@click.command() +@click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging output") +@click.option("--quiet", "-q", is_flag=True, help="Suppress all logging except errors") +@click.option( + "--force", + "-f", + is_flag=True, + help="Force rebuild even if migration file already exists", +) +@click.option( + "--ids-filter", + type=str, + help="Specific IDS names to include (space-separated)", +) +@click.option( + "--check-only", + is_flag=True, + help="Only check if migration file exists, don't build it", +) +@click.option( + "--output", + "-o", + type=click.Path(), + help="Override output path for migration file", +) +def build_migrations( + verbose: bool, + quiet: bool, + force: bool, + ids_filter: str, + check_only: bool, + output: str | None, +) -> int: + """Build the path migration map for IMAS DD version upgrades. + + This command creates a JSON file mapping old paths to new paths, + enabling migration suggestions for deprecated paths and rename + history for current paths. + + Examples: + build-migrations # Build with default settings + build-migrations -v # Build with verbose logging + build-migrations -f # Force rebuild + build-migrations --ids-filter "equilibrium core_profiles" + """ + # Set up logging + if quiet: + log_level = logging.ERROR + elif verbose: + log_level = logging.DEBUG + else: + log_level = logging.INFO + + logging.basicConfig( + level=log_level, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + logger = logging.getLogger(__name__) + + try: + # Determine output path + if output: + output_path = Path(output) + else: + path_accessor = ResourcePathAccessor(dd_version=dd_version) + output_path = path_accessor.migrations_dir / "path_migrations.json" + + # Check-only mode + if check_only: + if output_path.exists(): + with open(output_path) as f: + data = json.load(f) + metadata = data.get("metadata", {}) + click.echo(f"Migration file exists: {output_path}") + click.echo(f"Target version: {metadata.get('target_version')}") + click.echo(f"Total migrations: {metadata.get('total_migrations')}") + click.echo(f"Generated at: {metadata.get('generated_at')}") + return 0 + else: + click.echo("Migration file does not exist") + return 1 + + # Check if rebuild needed + if output_path.exists() and not force: + logger.info(f"Migration file already exists: {output_path}") + logger.info("Use --force to rebuild") + return 0 + + logger.info(f"Building migration map for DD version {dd_version}...") + + # Parse IDS filter + ids_set: set[str] | None = None + if ids_filter: + ids_set = set(ids_filter.split()) + logger.info(f"Filtering to IDS: {sorted(ids_set)}") + + # Build migration map + migration_data = build_migration_map( + target_version=dd_version, + ids_filter=ids_set, + verbose=verbose, + ) + + # Ensure output directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Write migration file + with open(output_path, "w") as f: + json.dump(migration_data, f, indent=2) + + # Report results + metadata = migration_data["metadata"] + logger.info("Migration map built successfully:") + logger.info(f" - Target version: {metadata['target_version']}") + logger.info(f" - Source versions: {len(metadata['source_versions'])}") + logger.info(f" - Total migrations: {metadata['total_migrations']}") + logger.info(f" - Paths with history: {metadata['paths_with_history']}") + logger.info(f" - Output: {output_path}") + + click.echo( + f"Built migration map with {metadata['total_migrations']} migrations" + ) + click.echo(f"Output: {output_path}") + + return 0 + + except Exception as e: + logger.error(f"Error building migration map: {e}") + if verbose: + logger.exception("Full traceback:") + click.echo(f"Error: {e}", err=True) + return 1 + + +if __name__ == "__main__": + sys.exit(build_migrations()) diff --git a/tests/test_path_tool.py b/tests/test_path_tool.py index 88c86ed..bc40627 100644 --- a/tests/test_path_tool.py +++ b/tests/test_path_tool.py @@ -2,80 +2,121 @@ import pytest +from imas_mcp.migrations import MigrationEntry, PathMigrationMap, RenameHistoryEntry from imas_mcp.search.document_store import DocumentStore from imas_mcp.tools import PathTool +# ============================================================================ +# Mock PathMigrationMap for testing +# ============================================================================ + -def _full_path(ids: str, path: str) -> str: - return f"{ids}/{path}" +class MockPathMigrationMap(PathMigrationMap): + """Mock PathMigrationMap with predefined test data.""" + + def __init__(self): + # Initialize with test data, bypassing file loading + migration_data = { + "metadata": { + "target_version": "4.0.1", + "total_migrations": 2, + }, + "old_to_new": { + "equilibrium/time_slice/constraints/bpol_probe": { + "new_path": "equilibrium/time_slice/constraints/b_field_pol_probe", + "deprecated_in": "4.0.0", + "last_valid_version": "3.42.0", + }, + "equilibrium/time_slice/global_quantities/li": { + "new_path": "equilibrium/time_slice/global_quantities/li_3", + "deprecated_in": "4.0.0", + "last_valid_version": "3.41.0", + }, + }, + "new_to_old": { + "equilibrium/time_slice/constraints/b_field_pol_probe": [ + { + "old_path": "equilibrium/time_slice/constraints/bpol_probe", + "deprecated_in": "4.0.0", + } + ], + "equilibrium/time_slice/global_quantities/li_3": [ + { + "old_path": "equilibrium/time_slice/global_quantities/li", + "deprecated_in": "4.0.0", + } + ], + }, + } + super().__init__(dd_version="4.0.1", migration_data=migration_data) @pytest.fixture -def path_tool() -> PathTool: - """Create a PathTool instance for testing.""" - doc_store = DocumentStore() - return PathTool(doc_store) +def mock_migration_map() -> MockPathMigrationMap: + """Create a mock PathMigrationMap for testing.""" + return MockPathMigrationMap() @pytest.fixture -def outdated_paths_with_ids() -> tuple[str, list[str]]: - return "equilibrium", [ - "time_slice/constraints/bpol_probe", - "time_slice/constraints/bpol_probe", - ] +def path_tool(mock_migration_map: MockPathMigrationMap) -> PathTool: + """Create a PathTool instance for testing with mocked migration map.""" + doc_store = DocumentStore() + return PathTool(doc_store, migration_map=mock_migration_map) -@pytest.fixture -def new_paths_with_ids() -> tuple[str, list[str]]: - return "equilibrium", [ - "time_slice/constraints/b_field_pol_probe", - "time_slice/constraints/b_field_pol_probe", - ] +# ============================================================================ +# Tests for PathMigrationMap +# ============================================================================ -@pytest.mark.asyncio -async def test_ensure_current_path_single( - path_tool: PathTool, - outdated_paths_with_ids: tuple[str, list[str]], - new_paths_with_ids: tuple[str, list[str]], -): - """Basic test to ensure PathTool is instantiated correctly.""" - _, outdated = outdated_paths_with_ids - ids, new = new_paths_with_ids - new = [f"{ids}/{n}" for n in new] +def test_migration_map_get_migration(mock_migration_map: MockPathMigrationMap): + """Test getting migration info for an old path.""" + migration = mock_migration_map.get_migration( + "equilibrium/time_slice/constraints/bpol_probe" + ) - for o, n in zip(outdated, new, strict=False): - result = await path_tool.ensure_current_path(o, ids) - assert result == [n] + assert migration is not None + assert migration.new_path == "equilibrium/time_slice/constraints/b_field_pol_probe" + assert migration.deprecated_in == "4.0.0" + assert migration.last_valid_version == "3.42.0" -@pytest.mark.asyncio -async def test_update_imas_path_tool_list( - path_tool: PathTool, - outdated_paths_with_ids: tuple[str, list[str]], - new_paths_with_ids: tuple[str, list[str]], +def test_migration_map_get_migration_not_found( + mock_migration_map: MockPathMigrationMap, ): - ids, outdated = outdated_paths_with_ids - ids, new = new_paths_with_ids - new = [f"{ids}/{n}" for n in new] + """Test getting migration info for a path with no migration.""" + migration = mock_migration_map.get_migration("fake/path/here") + assert migration is None - result = await path_tool.ensure_current_path(outdated, ids_name=ids) - assert result == new +def test_migration_map_get_rename_history(mock_migration_map: MockPathMigrationMap): + """Test getting rename history for a current path.""" + history = mock_migration_map.get_rename_history( + "equilibrium/time_slice/constraints/b_field_pol_probe" + ) -@pytest.mark.asyncio -async def test_update_imas_path_tool_spaced_list( - path_tool: PathTool, - outdated_paths_with_ids: tuple[str, list[str]], - new_paths_with_ids: tuple[str, list[str]], + assert len(history) == 1 + assert history[0].old_path == "equilibrium/time_slice/constraints/bpol_probe" + assert history[0].deprecated_in == "4.0.0" + + +def test_migration_map_get_rename_history_not_found( + mock_migration_map: MockPathMigrationMap, ): - """Test updating IMAS paths provided as space-separated string.""" - ids, outdated = outdated_paths_with_ids - ids, new = new_paths_with_ids - new = [f"{ids}/{n}" for n in new] + """Test getting rename history for a path with no history.""" + history = mock_migration_map.get_rename_history("fake/path/here") + assert history == [] + + +def test_migration_map_metadata(mock_migration_map: MockPathMigrationMap): + """Test migration map metadata access.""" + assert mock_migration_map.target_version == "4.0.1" + assert mock_migration_map.total_migrations == 2 - result = await path_tool.ensure_current_path(" ".join(outdated), ids_name=ids) - assert result == new + +# ============================================================================ +# Tests for check_imas_paths - Basic validation +# ============================================================================ @pytest.mark.asyncio @@ -222,6 +263,11 @@ async def test_token_efficient_response(path_tool): assert "documentation" not in res +# ============================================================================ +# Tests for check_imas_paths - IDS prefix handling +# ============================================================================ + + @pytest.mark.asyncio async def test_ids_prefix_single_path(path_tool): """Test ids parameter with single path.""" @@ -298,6 +344,45 @@ async def test_ids_prefix_mixed_paths(path_tool): assert result["results"][1]["path"] == "equilibrium/time_slice/boundary/psi_norm" +# ============================================================================ +# Tests for check_imas_paths - Migration suggestions +# ============================================================================ + + +@pytest.mark.asyncio +async def test_deprecated_path_returns_migration(path_tool): + """Test that deprecated paths return migration suggestions.""" + result = await path_tool.check_imas_paths( + "equilibrium/time_slice/constraints/bpol_probe" + ) + + assert result["summary"]["total"] == 1 + assert result["summary"]["not_found"] == 1 + + res = result["results"][0] + assert res["exists"] is False + assert res["path"] == "equilibrium/time_slice/constraints/bpol_probe" + + # Should have migration info + assert "migration" in res + assert ( + res["migration"]["new_path"] + == "equilibrium/time_slice/constraints/b_field_pol_probe" + ) + assert res["migration"]["deprecated_in"] == "4.0.0" + assert res["migration"]["last_valid_version"] == "3.42.0" + + +@pytest.mark.asyncio +async def test_nonexistent_path_no_migration(path_tool): + """Test that truly invalid paths don't have migration info.""" + result = await path_tool.check_imas_paths("fake/nonexistent/path") + + res = result["results"][0] + assert res["exists"] is False + assert "migration" not in res + + # ============================================================================ # Tests for fetch_imas_paths - Rich data retrieval # ============================================================================ @@ -325,7 +410,6 @@ async def test_fetch_single_path(path_tool): assert node.path == "core_profiles/profiles_1d/electrons/temperature" assert node.documentation # Should have documentation assert node.data_type # Should have data_type - # Units might or might not be present depending on the path @pytest.mark.asyncio