From a5acf1ef3a44d26481160f9f65ec9d5ee7469beb Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 9 Jul 2024 13:55:07 +0200 Subject: [PATCH 01/29] Implement `DBEntry.get_sample` in IMASPy --- imaspy/backends/db_entry_impl.py | 32 +- imaspy/backends/imas_core/al_context.py | 36 ++ imaspy/backends/imas_core/db_entry_al.py | 27 +- imaspy/backends/imas_core/imas_interface.py | 7 + imaspy/db_entry.py | 130 ++++++- imaspy/test/test_get_sample.py | 393 ++++++++++++++++++++ 6 files changed, 603 insertions(+), 22 deletions(-) create mode 100644 imaspy/test/test_get_sample.py diff --git a/imaspy/backends/db_entry_impl.py b/imaspy/backends/db_entry_impl.py index 7f86e622..dbbb1329 100644 --- a/imaspy/backends/db_entry_impl.py +++ b/imaspy/backends/db_entry_impl.py @@ -2,13 +2,34 @@ # You should have received the IMASPy LICENSE file with this project. from abc import ABC, abstractmethod -from typing import Any, List, Optional +from dataclasses import dataclass +from typing import Any, List, Optional, Union + +import numpy from imaspy.ids_convert import NBCPathMap from imaspy.ids_factory import IDSFactory from imaspy.ids_toplevel import IDSToplevel +@dataclass +class GetSliceParameters: + """Helper class to store parameters to get_slice.""" + + time_requested: float + interpolation_method: int + + +@dataclass +class GetSampleParameters: + """Helper class to store parameters to get_sample.""" + + tmin: float + tmax: float + dtime: Optional[numpy.ndarray] + interpolation_method: Optional[int] + + class DBEntryImpl(ABC): """Interface for DBEntry implementations.""" @@ -47,20 +68,17 @@ def get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, nbc_map: Optional[NBCPathMap], ) -> None: - """Implement DBEntry.get()/get_slice(). Load data from the data source. + """Implement DBEntry.get/get_slice/get_sample. Load data from the data source. Args: ids_name: Name of the IDS to load. occurrence: Which occurence of the IDS to load. - time_requested: None for get(), requested time slice for get_slice(). - interpolation_method: Requested interpolation method (ignore when - time_requested is None). + parameters: Additional parameters for a get_slice/get_sample call. destination: IDS object to store data in. lazy: Use lazy loading. nbc_map: NBCPathMap to use for implicit conversion. When None, no implicit diff --git a/imaspy/backends/imas_core/al_context.py b/imaspy/backends/imas_core/al_context.py index 07f37dec..d14f6bfd 100644 --- a/imaspy/backends/imas_core/al_context.py +++ b/imaspy/backends/imas_core/al_context.py @@ -8,6 +8,8 @@ from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Optional, Tuple +import numpy + from imaspy.backends.imas_core.imas_interface import ll_interface from imaspy.exception import LowlevelError from imaspy.ids_defs import ( @@ -105,6 +107,21 @@ def slice_action( raise LowlevelError("slice_action", status) return ALContext(ctx) + def timerange_action( + self, + path: str, + rwmode: int, + tmin: float, + tmax: float, + dtime: Optional[numpy.ndarray], + interpolation_method: int, + ) -> "ALContext": + """Begin a new timerange action for use in a ``with`` context.""" + ctx = ll_interface.begin_timerange_action( + self.ctx, path, rwmode, tmin, tmax, dtime, interpolation_method + ) + return ALContext(ctx) + def arraystruct_action( self, path: str, timebase: str, size: int ) -> "ALArrayStructContext": @@ -317,6 +334,25 @@ def slice_action( (path, rwmode, time_requested, interpolation_method), ) + @contextmanager + def timerange_action( + self, + path: str, + rwmode: int, + tmin: float, + tmax: float, + dtime: Optional[numpy.ndarray], + interpolation_method: int, + ) -> Iterator["LazyALContext"]: + """Lazily start a lowlevel timerange action, see + :meth:`ALContext.timerange_action`. + """ + yield LazyALContext( + self, + ALContext.timerange_action, + (path, rwmode, tmin, tmax, dtime, interpolation_method), + ) + def arraystruct_action( self, path: str, timebase: str, size: int ) -> "LazyALArrayStructContext": diff --git a/imaspy/backends/imas_core/db_entry_al.py b/imaspy/backends/imas_core/db_entry_al.py index e126bf9b..89cf3625 100644 --- a/imaspy/backends/imas_core/db_entry_al.py +++ b/imaspy/backends/imas_core/db_entry_al.py @@ -5,9 +5,10 @@ import logging import os from collections import deque -from typing import Any, Deque, List, Optional +from typing import Any, Deque, List, Optional, Union from urllib.parse import urlparse +from imaspy.backends.db_entry_impl import GetSampleParameters, GetSliceParameters from imaspy.db_entry import DBEntryImpl from imaspy.exception import DataEntryException, LowlevelError from imaspy.ids_convert import NBCPathMap, dd_version_map_from_factories @@ -216,8 +217,7 @@ def get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, nbc_map: Optional[NBCPathMap], @@ -245,13 +245,28 @@ def get( else: context = self._db_ctx # Now fill the IDSToplevel - if time_requested is None or destination.metadata.type is IDSType.CONSTANT: + if parameters is None or destination.metadata.type is IDSType.CONSTANT: # called from get(), or when the IDS is constant (see IMAS-3330) manager = context.global_action(ll_path, READ_OP) - else: # get_slice + elif isinstance(parameters, GetSliceParameters): manager = context.slice_action( - ll_path, READ_OP, time_requested, interpolation_method + ll_path, + READ_OP, + parameters.time_requested, + parameters.interpolation_method, ) + elif isinstance(parameters, GetSampleParameters): + manager = context.timerange_action( + ll_path, + READ_OP, + parameters.tmin, + parameters.tmax, + parameters.dtime, + parameters.interpolation_method, + ) + else: + raise TypeError(f"Incorrect type for parameters: {type(parameters)}.") + with manager as read_ctx: if lazy: destination._set_lazy_context(read_ctx) diff --git a/imaspy/backends/imas_core/imas_interface.py b/imaspy/backends/imas_core/imas_interface.py index 07f4783e..cca7d42f 100644 --- a/imaspy/backends/imas_core/imas_interface.py +++ b/imaspy/backends/imas_core/imas_interface.py @@ -215,6 +215,13 @@ def get_occurrences(self, ctx, ids_name): def get_al_version(self): return self._al_version_str + # New methods added in AL 5.3 + + def begin_timerange_action( + self, ctx, path, rwmode, tmin, tmax, dtime, interpolation_method + ): + raise self._minimal_version("5.3") + # Dummy documentation for interface: for funcname in dir(LowlevelInterface): diff --git a/imaspy/db_entry.py b/imaspy/db_entry.py index 9ca826b7..ba5bcac6 100644 --- a/imaspy/db_entry.py +++ b/imaspy/db_entry.py @@ -5,10 +5,16 @@ import logging import os -from typing import Any, List, Optional, Tuple, Type, overload +from typing import Any, List, Optional, Tuple, Type, Union, overload + +import numpy import imaspy -from imaspy.backends.db_entry_impl import DBEntryImpl +from imaspy.backends.db_entry_impl import ( + DBEntryImpl, + GetSampleParameters, + GetSliceParameters, +) from imaspy.dd_zip import dd_xml_versions from imaspy.exception import IDSNameError, UnknownDDVersion, ValidationError from imaspy.ids_base import IDSBase @@ -347,7 +353,6 @@ def get( ids_name, occurrence, None, - 0, destination, lazy, autoconvert, @@ -416,8 +421,117 @@ def get_slice( return self._get( ids_name, occurrence, - time_requested, - interpolation_method, + GetSliceParameters(time_requested, interpolation_method), + destination, + lazy, + autoconvert, + ignore_unknown_dd_version, + ) + + def get_sample( + self, + ids_name: str, + tmin: float, + tmax: float, + dtime: Optional[Union[float, numpy.ndarray]] = None, + interpolation_method: Optional[int] = None, + occurrence: int = 0, + *, + lazy: bool = False, + autoconvert: bool = True, + ignore_unknown_dd_version: bool = False, + destination: Optional[IDSToplevel] = None, + ) -> IDSToplevel: + """Read a range of time slices from an IDS in this Database Entry. + + This method has three different modes, depending on the provided arguments: + + 1. No interpolation. This method is selected when :param:`dtime` and + :param:`interpolation_method` are not provided. + + This mode returns an IDS object with all constant/static data filled. The + dynamic data is retrieved for the provided time range [tmin, tmax]. + + 2. Interpolate dynamic data on a uniform time base. This method is selected + when :param:`dtime` and :param:`interpolation_method` are provided. + :param:`dtime` must be a number or a numpy array of size 1. + + This mode will generate an IDS with a homogeneous time vector ``[tmin, tmin + + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The returned IDS always has + ``ids_properties.homogeneous_time = 1``. + + 3. Interpolate dynamic data on an explicit time base. This method is selected + when :param:`dtime` and :param:`interpolation_method` are provided. + :param:`dtime` must be a numpy array of size larger than 1. + + This mode will generate an IDS with a homogeneous time vector equal to + :param:`dtime`. :param:`tmin` and :param:`tmax` are ignored in this mode. + The returned IDS always has ``ids_properties.homogeneous_time = 1``. + + Args: + ids_name: Name of the IDS to read from the backend + tmin: Lower bound of the requested time range + tmax: Upper bound of the requested time range, must be larger than or + equal to :param:`tmin` + dtime: Interval to use when interpolating, must be positive, or numpy array + containing an explicit time base to interpolate. + interpolation_method: Interpolation method to use. Available options: + + - :const:`~imaspy.ids_defs.CLOSEST_INTERP` + - :const:`~imaspy.ids_defs.PREVIOUS_INTERP` + - :const:`~imaspy.ids_defs.LINEAR_INTERP` + + occurrence: Which occurrence of the IDS to read. + + Keyword Args: + lazy: When set to ``True``, values in this IDS will be retrieved only when + needed (instead of getting the full IDS immediately). See :ref:`Lazy + loading` for more details. + autoconvert: Automatically convert IDSs. + + If enabled (default), a call to ``get_sample()`` will return + an IDS from the Data Dictionary version attached to this Data Entry. + Data is automatically converted between the on-disk version and the + in-memory version. + + When set to ``False``, the IDS will be returned in the DD version it was + stored in. + ignore_unknown_dd_version: When an IDS is stored with an unknown DD version, + do not attempt automatic conversion and fetch the data in the Data + Dictionary version attached to this Data Entry. + destination: Populate this IDSToplevel instead of creating an empty one. + + Returns: + The loaded IDS. + + Example: + .. code-block:: python + + import imaspy + import numpy + from imaspy import ids_defs + + imas_entry = imaspy.DBEntry( + "imas:mdsplus?user=public;pulse=131024;run=41;database=ITER", "r") + + # All time slices between t=200 and t=370 + core_profiles = imas_entry.get_sample("core_profiles", 200, 370) + + # Closest points to [0, 100, 200, ..., 1000] + core_profiles_interp = imas_entry.get_sample( + "core_profiles", 0, 1000, 100, ids_defs.CLOSEST_INTERP) + + # Linear interpolation for [10, 11, 12, 14, 16, 20, 30, 40, 50] + times = numpy.array([10, 11, 12, 14, 16, 20, 30, 40, 50]) + core_profiles_interp = imas_entry.get_sample( + "core_profiles", 0, 0, times, ids_defs.LINEAR_INTERP) + """ + if dtime is not None: + dtime = numpy.atleast_1d(dtime) # Convert floats and 0D arrays to 1D array + return self._get( + ids_name, + occurrence, + GetSampleParameters(tmin, tmax, dtime, interpolation_method), destination, lazy, autoconvert, @@ -428,8 +542,7 @@ def _get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: Optional[IDSToplevel], lazy: bool, autoconvert: bool, @@ -492,8 +605,7 @@ def _get( return self._dbe_impl.get( ids_name, occurrence, - time_requested, - interpolation_method, + parameters, destination, lazy, nbc_map, diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py new file mode 100644 index 00000000..7c3b210f --- /dev/null +++ b/imaspy/test/test_get_sample.py @@ -0,0 +1,393 @@ +import numpy as np +import pytest + +import imaspy +from imaspy.backends.imas_core.imas_interface import lowlevel +from imaspy.exception import DataEntryException +from imaspy.ids_defs import ( + CLOSEST_INTERP, + EMPTY_FLOAT, + HDF5_BACKEND, + IDS_TIME_MODE_HETEROGENEOUS, + IDS_TIME_MODE_HOMOGENEOUS, + LINEAR_INTERP, + MDSPLUS_BACKEND, + PREVIOUS_INTERP, +) + + +@pytest.fixture() +def test_db_uri(backend, worker_id, tmp_path_factory): + # Check if begin_timerange_action is available in imas_core + if not hasattr(lowlevel, "al_begin_timerange_action"): + pytest.skip("imas_core version doesn't support begin_timerange_action.") + + if backend not in [HDF5_BACKEND, MDSPLUS_BACKEND]: + pytest.skip("Backend doesn't support time range operations.") + + tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}") + backend_str = {HDF5_BACKEND: "hdf5", MDSPLUS_BACKEND: "mdsplus"}[backend] + uri = f"imas:{backend_str}?path={tmp_path}" + entry = imaspy.DBEntry(uri, "x") + + # Homogeneous core profiles: + cp = entry.factory.core_profiles() + cp.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + N_time = 32 + cp.time = np.linspace(0, 1, N_time) + cp.profiles_1d.resize(N_time) + for i in range(N_time): + # FLT_1D: + cp.profiles_1d[i].grid.rho_tor_norm = np.array([0.0, 1.0]) + cp.profiles_1d[i].t_i_average = np.array([2.0, 1.0]) * (i + 1) + cp.profiles_1d[i].ion.resize(1) + # STR_0D: + cp.profiles_1d[i].ion[0].label = "D" + # FLT_0D + cp.profiles_1d[i].ion[0].z_ion = 1.0 + cp.profiles_1d[i].ion[0].temperature = cp.profiles_1d[i].t_i_average + # INT_0D + cp.profiles_1d[i].ion[0].temperature_validity = 0 + cp.global_quantities.ip = (2 - cp.time) ** 0.5 + entry.put(cp) + + # Inhomogeneous equilibrium + eq = entry.factory.equilibrium() + eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + eq.time = np.linspace(0, 2, 512) + # GGD Grid with 1 time slice + eq.grids_ggd.resize(1) + eq.grids_ggd[0].time = 0.0 + eq.grids_ggd[0].grid.resize(1) + eq.grids_ggd[0].grid[0].path = "wall:0/description_ggd(1)/grid_ggd" + # multiple time slices with data + N_time = 6 + eq.time_slice.resize(N_time) + for i in range(N_time): + # FLT_0D + eq.time_slice[i].time = i / 5.0 + eq.time_slice[i].profiles_2d.resize(1) + # FLT_1D + eq.time_slice[i].profiles_2d[0].grid.dim1 = np.array([0.0, 1.0]) + eq.time_slice[i].profiles_2d[0].grid.dim2 = np.array([3.0, 4.0]) + # STR_0D + eq.time_slice[i].profiles_2d[0].grid_type.name = f"test {i}" + eq.time_slice[i].profiles_2d[0].grid_type.description = "test description" + # INT_0D + eq.time_slice[i].profiles_2d[0].grid_type.index = -1 + # FLT_2D + eq.time_slice[i].profiles_2d[0].r = np.array([[0.0, 0.0], [1.0, 1.0]]) + eq.time_slice[i].profiles_2d[0].z = np.array([[3.0, 4.0], [3.0, 4.0]]) + eq.time_slice[i].profiles_2d[0].psi = ( + eq.time_slice[i].profiles_2d[0].r - eq.time_slice[i].profiles_2d[0].z + ) * (1 + eq.time_slice[i].time) ** 2 + entry.put(eq) + + # Equilibrium only has dynamic AOS and no other non-homogenous time nodes + # Use magnetics to test that case: + mag = entry.factory.magnetics() + mag.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + mag.time = np.array([0.0]) + mag.flux_loop.resize(3) + for i in range(3): + mag.flux_loop[i].flux.time = np.linspace(0.0123, 1, 5 + i) + mag.flux_loop[i].flux.data = 2 + 2 * mag.flux_loop[i].flux.time + mag.flux_loop[i].voltage.time = np.linspace(0.0123, 1, 8 + i) + mag.flux_loop[i].voltage.data = 2 - 5 * mag.flux_loop[i].voltage.time + entry.put(mag) + + entry.close() + return uri + + +def test_invalid_arguments(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + with pytest.raises(ValueError): + entry.get_sample("core_profiles", 0.3, 0.2) # tmin > tmax + with pytest.raises(DataEntryException): + entry.get_sample("core_profiles", 0.1, 0.2, occurrence="invalid") + with pytest.raises(ValueError): + entry.get_sample("core_profiles", 0.1, 0.2, 0.05) # no interpolation method + + +def test_get_sample_homogeneous(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + cp = entry.get_sample("core_profiles", 0.3, 14 / 31) + assert np.array_equal(cp.time, np.linspace(0, 1, 32)[10:15]) + + for i, p1d in enumerate(cp.profiles_1d): + assert np.array_equal(p1d.grid.rho_tor_norm, [0.0, 1.0]) + assert np.array_equal(p1d.t_i_average, np.array([2.0, 1.0]) * (i + 11)) + assert len(p1d.ion) == 1 + assert p1d.ion[0].label == "D" + assert p1d.ion[0].z_ion == 1 + assert np.array_equal(p1d.ion[0].temperature, p1d.t_i_average) + assert p1d.ion[0].temperature_validity == 0 + + assert np.array_equal(cp.global_quantities.ip, (2 - cp.time) ** 0.5) + + +def test_get_sample_heterogeneous(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + eq = entry.get_sample("equilibrium", -1.0, 0.2) + # Main time array + assert np.array_equal(eq.time, np.linspace(0, 2, 512)[:52]) + # grids_ggd AoS + assert len(eq.grids_ggd) == 1 + assert eq.grids_ggd[0].time == 0.0 + assert eq.grids_ggd[0].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + # time_slice AoS + assert len(eq.time_slice) == 2 + assert eq.time_slice[0].time == 0.0 + assert eq.time_slice[1].time == 0.2 + + for i in range(2): + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + assert p2d.grid_type.name == f"test {i}" + assert p2d.grid_type.index == -1 + assert np.array_equal(p2d.r, [[0.0, 0.0], [1.0, 1.0]]) + assert np.array_equal(p2d.z, [[3.0, 4.0], [3.0, 4.0]]) + expected_psi = (p2d.r - p2d.z) * (1 + eq.time_slice[i].time) ** 2 + assert np.array_equal(p2d.psi, expected_psi) + + mag = entry.get_sample("magnetics", 0.25, 0.75) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS + assert len(mag.time) == 0 + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[0.25 <= flux_time] + flux_time = flux_time[flux_time <= 0.75] + assert np.array_equal(fl.flux.time, flux_time) + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[0.25 <= voltage_time] + voltage_time = voltage_time[voltage_time <= 0.75] + assert np.array_equal(fl.voltage.time, voltage_time) + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) + + +def test_get_sample_homogeneous_linear_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, LINEAR_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * (1 + 31 * cp.time[i]) + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_explicit_timebase(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + times = [0.1, 0.2345, 0.5, np.sqrt(2) / 2] + cp = entry.get_sample("core_profiles", 0, 0, times, LINEAR_INTERP) + assert np.allclose(cp.time, times, rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 4 + # Check some interpolated values + for i in range(4): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * (1 + 31 * cp.time[i]) + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_previous_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, PREVIOUS_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * [10, 10, 11, 12, 12, 13][i] + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_closest_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, CLOSEST_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * [10, 11, 12, 12, 13, 13][i] + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_heterogeneous_linear_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, LINEAR_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + # Determine the data as we have stored it in test_db_uri() + time = eq.time[i] + original_times = [0, 0.2, 0.4, 0.6, 0.8, 1.0] + index = np.searchsorted(original_times, time) + prevtime = original_times[index - 1] + nexttime = original_times[index] + prevpsi = (p2d.r - p2d.z) * (1 + prevtime) ** 2 + nextpsi = (p2d.r - p2d.z) * (1 + nexttime) ** 2 + # Linear interpolation + expected_psi = (nextpsi * (time - prevtime) + prevpsi * (nexttime - time)) / ( + nexttime - prevtime + ) + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, LINEAR_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + assert np.allclose(fl.flux.data, 2 + 2 * mag.time, rtol=1e-14, atol=0) + assert np.allclose(fl.voltage.data, 2 - 5 * mag.time, rtol=1e-14, atol=2e-16) + + +def test_get_sample_heterogeneous_previous_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, PREVIOUS_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + origtime = [0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.4][i] + expected_psi = (p2d.r - p2d.z) * (1 + origtime) ** 2 + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, PREVIOUS_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[np.searchsorted(flux_time, mag.time, side="right") - 1] + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[ + np.searchsorted(voltage_time, mag.time, side="right") - 1 + ] + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) + + +def test_get_sample_heterogeneous_closest_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, CLOSEST_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + # Note: CLOSEST appears to round up: 0.4 is closer to 0.3 than 0.2 + origtime = [0.2, 0.2, 0.4, 0.4, 0.4, 0.4, 0.6][i] + expected_psi = (p2d.r - p2d.z) * (1 + origtime) ** 2 + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, CLOSEST_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[ + np.argmin(np.abs(flux_time[None, :] - mag.time[:, None]), axis=1) + ] + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[ + np.argmin(np.abs(voltage_time[None, :] - mag.time[:, None]), axis=1) + ] + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) From fd49e02171ab03cb997303bb31eb9543db2ae251 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 9 Jul 2024 14:16:14 +0200 Subject: [PATCH 02/29] Add docstrings for GetSliceParameters / GetSampleParameters --- imaspy/backends/db_entry_impl.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/imaspy/backends/db_entry_impl.py b/imaspy/backends/db_entry_impl.py index dbbb1329..9fa42bd8 100644 --- a/imaspy/backends/db_entry_impl.py +++ b/imaspy/backends/db_entry_impl.py @@ -17,7 +17,9 @@ class GetSliceParameters: """Helper class to store parameters to get_slice.""" time_requested: float + """See :param:`imaspy.db_entry.DBEntry.get_slice.time_requested`.""" interpolation_method: int + """See :param:`imaspy.db_entry.DBEntry.get_slice.interpolation_method`.""" @dataclass @@ -25,9 +27,13 @@ class GetSampleParameters: """Helper class to store parameters to get_sample.""" tmin: float + """See :param:`imaspy.db_entry.DBEntry.get_sample.tmin`.""" tmax: float + """See :param:`imaspy.db_entry.DBEntry.get_sample.tmax`.""" dtime: Optional[numpy.ndarray] + """See :param:`imaspy.db_entry.DBEntry.get_sample.dtime`.""" interpolation_method: Optional[int] + """See :param:`imaspy.db_entry.DBEntry.get_sample.interpolation_method`.""" class DBEntryImpl(ABC): From 73268bb212c06c1dc23223a24e31537d8cffbee6 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 11 Nov 2024 10:11:44 +0100 Subject: [PATCH 03/29] Update tests for DD 4.0.0 --- imaspy/test/test_get_sample.py | 54 +++++++++++++++------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py index 7c3b210f..beffe52d 100644 --- a/imaspy/test/test_get_sample.py +++ b/imaspy/test/test_get_sample.py @@ -6,7 +6,6 @@ from imaspy.exception import DataEntryException from imaspy.ids_defs import ( CLOSEST_INTERP, - EMPTY_FLOAT, HDF5_BACKEND, IDS_TIME_MODE_HETEROGENEOUS, IDS_TIME_MODE_HOMOGENEOUS, @@ -28,7 +27,7 @@ def test_db_uri(backend, worker_id, tmp_path_factory): tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}") backend_str = {HDF5_BACKEND: "hdf5", MDSPLUS_BACKEND: "mdsplus"}[backend] uri = f"imas:{backend_str}?path={tmp_path}" - entry = imaspy.DBEntry(uri, "x") + entry = imaspy.DBEntry(uri, "x", dd_version="4.0.0") # Homogeneous core profiles: cp = entry.factory.core_profiles() @@ -42,7 +41,7 @@ def test_db_uri(backend, worker_id, tmp_path_factory): cp.profiles_1d[i].t_i_average = np.array([2.0, 1.0]) * (i + 1) cp.profiles_1d[i].ion.resize(1) # STR_0D: - cp.profiles_1d[i].ion[0].label = "D" + cp.profiles_1d[i].ion[0].name = "D" # FLT_0D cp.profiles_1d[i].ion[0].z_ion = 1.0 cp.profiles_1d[i].ion[0].temperature = cp.profiles_1d[i].t_i_average @@ -100,8 +99,12 @@ def test_db_uri(backend, worker_id, tmp_path_factory): return uri -def test_invalid_arguments(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +@pytest.fixture() +def entry(test_db_uri): + return imaspy.DBEntry(test_db_uri, "r", dd_version="4.0.0") + + +def test_invalid_arguments(entry): with pytest.raises(ValueError): entry.get_sample("core_profiles", 0.3, 0.2) # tmin > tmax with pytest.raises(DataEntryException): @@ -110,8 +113,7 @@ def test_invalid_arguments(test_db_uri): entry.get_sample("core_profiles", 0.1, 0.2, 0.05) # no interpolation method -def test_get_sample_homogeneous(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous(entry): cp = entry.get_sample("core_profiles", 0.3, 14 / 31) assert np.array_equal(cp.time, np.linspace(0, 1, 32)[10:15]) @@ -119,7 +121,7 @@ def test_get_sample_homogeneous(test_db_uri): assert np.array_equal(p1d.grid.rho_tor_norm, [0.0, 1.0]) assert np.array_equal(p1d.t_i_average, np.array([2.0, 1.0]) * (i + 11)) assert len(p1d.ion) == 1 - assert p1d.ion[0].label == "D" + assert p1d.ion[0].name == "D" assert p1d.ion[0].z_ion == 1 assert np.array_equal(p1d.ion[0].temperature, p1d.t_i_average) assert p1d.ion[0].temperature_validity == 0 @@ -127,8 +129,7 @@ def test_get_sample_homogeneous(test_db_uri): assert np.array_equal(cp.global_quantities.ip, (2 - cp.time) ** 0.5) -def test_get_sample_heterogeneous(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_heterogeneous(entry): eq = entry.get_sample("equilibrium", -1.0, 0.2) # Main time array assert np.array_equal(eq.time, np.linspace(0, 2, 512)[:52]) @@ -172,8 +173,7 @@ def test_get_sample_heterogeneous(test_db_uri): assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) -def test_get_sample_homogeneous_linear_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous_linear_interp(entry): # Note requesting 0.401 and not 0.4, since # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, LINEAR_INTERP) @@ -191,8 +191,7 @@ def test_get_sample_homogeneous_linear_interp(test_db_uri): assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) -def test_get_sample_homogeneous_explicit_timebase(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous_explicit_timebase(entry): times = [0.1, 0.2345, 0.5, np.sqrt(2) / 2] cp = entry.get_sample("core_profiles", 0, 0, times, LINEAR_INTERP) assert np.allclose(cp.time, times, rtol=1e-14, atol=0) @@ -209,8 +208,7 @@ def test_get_sample_homogeneous_explicit_timebase(test_db_uri): assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) -def test_get_sample_homogeneous_previous_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous_previous_interp(entry): # Note requesting 0.401 and not 0.4, since # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, PREVIOUS_INTERP) @@ -228,8 +226,7 @@ def test_get_sample_homogeneous_previous_interp(test_db_uri): assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) -def test_get_sample_homogeneous_closest_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous_closest_interp(entry): # Note requesting 0.401 and not 0.4, since # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, CLOSEST_INTERP) @@ -247,8 +244,7 @@ def test_get_sample_homogeneous_closest_interp(test_db_uri): assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) -def test_get_sample_heterogeneous_linear_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_heterogeneous_linear_interp(entry): eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, LINEAR_INTERP) N_samples = 7 # IDS becomes homogeneous after resampling @@ -258,14 +254,14 @@ def test_get_sample_heterogeneous_linear_interp(test_db_uri): # Check interpolated grids_ggd assert len(eq.grids_ggd) == N_samples for i in range(N_samples): - assert eq.grids_ggd[i].time == EMPTY_FLOAT + # assert eq.grids_ggd[i].time == EMPTY_FLOAT assert len(eq.grids_ggd[i].grid) == 1 assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" # Check interpolated time_slice assert len(eq.time_slice) == N_samples for i in range(N_samples): - assert eq.time_slice[i].time == EMPTY_FLOAT + # assert eq.time_slice[i].time == EMPTY_FLOAT assert len(eq.time_slice[i].profiles_2d) == 1 p2d = eq.time_slice[i].profiles_2d[0] assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) @@ -296,8 +292,7 @@ def test_get_sample_heterogeneous_linear_interp(test_db_uri): assert np.allclose(fl.voltage.data, 2 - 5 * mag.time, rtol=1e-14, atol=2e-16) -def test_get_sample_heterogeneous_previous_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_heterogeneous_previous_interp(entry): eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, PREVIOUS_INTERP) N_samples = 7 # IDS becomes homogeneous after resampling @@ -307,14 +302,14 @@ def test_get_sample_heterogeneous_previous_interp(test_db_uri): # Check interpolated grids_ggd assert len(eq.grids_ggd) == N_samples for i in range(N_samples): - assert eq.grids_ggd[i].time == EMPTY_FLOAT + # assert eq.grids_ggd[i].time == EMPTY_FLOAT assert len(eq.grids_ggd[i].grid) == 1 assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" # Check interpolated time_slice assert len(eq.time_slice) == N_samples for i in range(N_samples): - assert eq.time_slice[i].time == EMPTY_FLOAT + # assert eq.time_slice[i].time == EMPTY_FLOAT assert len(eq.time_slice[i].profiles_2d) == 1 p2d = eq.time_slice[i].profiles_2d[0] assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) @@ -343,8 +338,7 @@ def test_get_sample_heterogeneous_previous_interp(test_db_uri): assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) -def test_get_sample_heterogeneous_closest_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_heterogeneous_closest_interp(entry): eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, CLOSEST_INTERP) N_samples = 7 # IDS becomes homogeneous after resampling @@ -354,14 +348,14 @@ def test_get_sample_heterogeneous_closest_interp(test_db_uri): # Check interpolated grids_ggd assert len(eq.grids_ggd) == N_samples for i in range(N_samples): - assert eq.grids_ggd[i].time == EMPTY_FLOAT + # assert eq.grids_ggd[i].time == EMPTY_FLOAT assert len(eq.grids_ggd[i].grid) == 1 assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" # Check interpolated time_slice assert len(eq.time_slice) == N_samples for i in range(N_samples): - assert eq.time_slice[i].time == EMPTY_FLOAT + # assert eq.time_slice[i].time == EMPTY_FLOAT assert len(eq.time_slice[i].profiles_2d) == 1 p2d = eq.time_slice[i].profiles_2d[0] assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) From bc9ea191da4626ec92905b68352d3e12d2a10c08 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 11 Nov 2024 10:47:46 +0100 Subject: [PATCH 04/29] Update NCDBEntryImpl for get_sample and raise NotImplementedError --- imaspy/backends/netcdf/db_entry_nc.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index ba7334fc..f04630db 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -1,9 +1,13 @@ """DBEntry implementation using NetCDF as a backend.""" import logging -from typing import List +from typing import List, Optional, Union -from imaspy.backends.db_entry_impl import DBEntryImpl +from imaspy.backends.db_entry_impl import ( + DBEntryImpl, + GetSampleParameters, + GetSliceParameters, +) from imaspy.backends.netcdf.ids2nc import IDS2NC from imaspy.backends.netcdf.nc2ids import nc2ids from imaspy.exception import DataEntryException @@ -74,15 +78,18 @@ def get( self, ids_name: str, occurrence: int, - time_requested: float | None, - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, - nbc_map: NBCPathMap | None, + nbc_map: Optional[NBCPathMap], ) -> None: # Feature compatibility checks - if time_requested is not None: - raise NotImplementedError("`get_slice` is not available for netCDF files.") + if parameters is not None: + if isinstance(parameters, GetSliceParameters): + func = "get_slice" + else: + func = "get_sample" + raise NotImplementedError(f"`{func}` is not available for netCDF files.") if lazy: raise NotImplementedError( "Lazy loading is not implemented for netCDF files." From 674460bbece63ccbd51f03e1d644a7916d250a6f Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 11 Nov 2024 10:49:14 +0100 Subject: [PATCH 05/29] Set `begin_timerange_action` as available since AL core 5.4 --- imaspy/backends/imas_core/imas_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imaspy/backends/imas_core/imas_interface.py b/imaspy/backends/imas_core/imas_interface.py index cca7d42f..b92438b1 100644 --- a/imaspy/backends/imas_core/imas_interface.py +++ b/imaspy/backends/imas_core/imas_interface.py @@ -215,12 +215,12 @@ def get_occurrences(self, ctx, ids_name): def get_al_version(self): return self._al_version_str - # New methods added in AL 5.3 + # New methods added in AL 5.4 def begin_timerange_action( self, ctx, path, rwmode, tmin, tmax, dtime, interpolation_method ): - raise self._minimal_version("5.3") + raise self._minimal_version("5.4") # Dummy documentation for interface: From 0a47f94e97467d6e57c65e335590daab25f021b0 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 14 Nov 2024 10:17:49 +0100 Subject: [PATCH 06/29] Make the NC2IDS reader class-based In preparation for validating the NC data --- imaspy/backends/netcdf/nc2ids.py | 157 ++++++++++++++++++------------- 1 file changed, 89 insertions(+), 68 deletions(-) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index d071a3ba..2877b297 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -54,10 +54,98 @@ def _tree_iter( yield from _tree_iter(node, paths, curindex + (i,)) +class NC2IDS: + """Class responsible for reading an IDS from a NetCDF group.""" + + def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: + """Initialize NC2IDS converter. + + Args: + group: NetCDF group that stores the IDS data. + ids: Corresponding IDS toplevel to store the data in. + """ + self.group = group + """NetCDF Group that the IDS is stored in.""" + self.ids = ids + """IDS to store the data in.""" + + self.ncmeta = NCMetadata(ids.metadata) + """NetCDF related metadata.""" + self.variables = list(group.variables) + """List of variable names stored in the netCDF group.""" + # TODO: validate ids_properties.homogeneous_time + self.homogeneous_time = ( + group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS + ) + """True iff the IDS time mode is homogeneous.""" + + # Don't use masked arrays: they're slow and we'll handle most of the unset + # values through the `:shape` arrays + self.group.set_auto_mask(False) + + def run(self) -> None: + # FIXME: ensure that var_names are sorted properly + # Current assumption is that creation-order is fine + for var_name in self.variables: + if var_name.endswith(":shape"): + continue # TODO: validate that this is used + + # FIXME: error handling: + metadata = self.ids.metadata[var_name] + + # TODO: validate metadata (data type, units, etc.) conforms to DD + + if metadata.data_type is IDSDataType.STRUCTURE: + continue # This only contains DD metadata we already know + + var = self.group[var_name] + if metadata.data_type is IDSDataType.STRUCT_ARRAY: + if "sparse" in var.ncattrs(): + shapes = self.group[var_name + ":shape"][()] + for index, node in tree_iter(self.ids, metadata): + node.resize(shapes[index][0]) + + else: + # FIXME: extract dimension name from nc file? + dim = self.ncmeta.get_dimensions( + metadata.path_string, self.homogeneous_time + )[-1] + size = self.group.dimensions[dim].size + for _, node in tree_iter(self.ids, metadata): + node.resize(size) + + continue + + # FIXME: this may be a gigantic array, not required for sparse data + var = self.group[var_name] + data = var[()] + + if "sparse" in var.ncattrs(): + if metadata.ndim: + shapes = self.group[var_name + ":shape"][()] + for index, node in tree_iter(self.ids, metadata): + shape = shapes[index] + if shape.all(): + node.value = data[index + tuple(map(slice, shapes[index]))] + else: + for index, node in tree_iter(self.ids, metadata): + value = data[index] + if value != getattr(var, "_FillValue", None): + node.value = data[index] + + elif metadata.path_string not in self.ncmeta.aos: + # Shortcut for assigning untensorized data + self.ids[metadata.path] = data + + else: + for index, node in tree_iter(self.ids, metadata): + node.value = data[index] + + def nc2ids(group: netCDF4.Group, ids: IDSToplevel): """Get data from the netCDF group and store it in the provided IDS.""" try: - _nc2ids(group, ids) + NC2IDS(group, ids).run() except Exception as exc: raise RuntimeError( "An error occurred while reading data from the netCDF file " @@ -66,70 +154,3 @@ def nc2ids(group: netCDF4.Group, ids: IDSToplevel): "may cause errors in IMASPy. A more robust mechanism to load IDS data from " "netCDF files will be included in the next release of IMASPy." ) from exc - - -def _nc2ids(group: netCDF4.Group, ids: IDSToplevel): - var_names = list(group.variables) - # FIXME: ensure that var_names are sorted properly - # Current assumption is that creation-order is fine - homogeneous_time = ( - group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS - ) - ncmeta = NCMetadata(ids.metadata) - - # Never return masked arrays, they're slow and we'll handle most of the unset values - # through the `:shape` arrays - group.set_auto_mask(False) - - for var_name in var_names: - if var_name.endswith(":shape"): - continue # TODO: validate that this is used - - # FIXME: error handling: - metadata = ids.metadata[var_name] - - # TODO: validate metadata (data type, units, etc.) conforms to DD - - if metadata.data_type is IDSDataType.STRUCTURE: - continue # This only contains DD metadata we already know - - var = group[var_name] - if metadata.data_type is IDSDataType.STRUCT_ARRAY: - if "sparse" in var.ncattrs(): - shapes = group[var_name + ":shape"][()] - for index, node in tree_iter(ids, metadata): - node.resize(shapes[index][0]) - - else: - # FIXME: extract dimension name from nc file? - dim = ncmeta.get_dimensions(metadata.path_string, homogeneous_time)[-1] - size = group.dimensions[dim].size - for _, node in tree_iter(ids, metadata): - node.resize(size) - - continue - - # FIXME: this may be a gigantic array, not required for sparse data - var = group[var_name] - data = var[()] - - if "sparse" in var.ncattrs(): - if metadata.ndim: - shapes = group[var_name + ":shape"][()] - for index, node in tree_iter(ids, metadata): - shape = shapes[index] - if shape.all(): - node.value = data[index + tuple(map(slice, shapes[index]))] - else: - for index, node in tree_iter(ids, metadata): - value = data[index] - if value != getattr(var, "_FillValue", None): - node.value = data[index] - - elif metadata.path_string not in ncmeta.aos: - # Shortcut for assigning untensorized data - ids[metadata.path] = data - - else: - for index, node in tree_iter(ids, metadata): - node.value = data[index] From d59fcabc29d5691004649bcf48016255010d7fa7 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 14 Nov 2024 14:35:30 +0100 Subject: [PATCH 07/29] Add missing docstring --- imaspy/backends/netcdf/nc2ids.py | 1 + 1 file changed, 1 insertion(+) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index 2877b297..cc3ebc25 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -84,6 +84,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: self.group.set_auto_mask(False) def run(self) -> None: + """Load the data from the netCDF group into the IDS.""" # FIXME: ensure that var_names are sorted properly # Current assumption is that creation-order is fine for var_name in self.variables: From ee385b736cb3f10a7378343f2de59e46815b26b7 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 15 Nov 2024 11:46:33 +0100 Subject: [PATCH 08/29] Disable MDSplus backend tests for get_sample Feature not yet implemented, see IMAS-5593 --- imaspy/test/test_get_sample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py index beffe52d..0f5fed3e 100644 --- a/imaspy/test/test_get_sample.py +++ b/imaspy/test/test_get_sample.py @@ -21,7 +21,8 @@ def test_db_uri(backend, worker_id, tmp_path_factory): if not hasattr(lowlevel, "al_begin_timerange_action"): pytest.skip("imas_core version doesn't support begin_timerange_action.") - if backend not in [HDF5_BACKEND, MDSPLUS_BACKEND]: + # TODO: add MDSPLUS_BACKEND once implemented, see IMAS-5593 + if backend not in [HDF5_BACKEND]: pytest.skip("Backend doesn't support time range operations.") tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}") From b007316a44e07a80a8ccad67f62b017a537b2332 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 13 Nov 2024 16:32:25 +0100 Subject: [PATCH 09/29] Add validation for the ids_properties.homogeneous time variable in netCDF IDSs. --- imaspy/backends/netcdf/ids2nc.py | 8 +++--- imaspy/backends/netcdf/nc2ids.py | 47 +++++++++++++++++++++++++++---- imaspy/exception.py | 4 +++ imaspy/test/test_nc_validation.py | 36 +++++++++++++++++++++++ 4 files changed, 85 insertions(+), 10 deletions(-) create mode 100644 imaspy/test/test_nc_validation.py diff --git a/imaspy/backends/netcdf/ids2nc.py b/imaspy/backends/netcdf/ids2nc.py index 9fad4044..34e63101 100644 --- a/imaspy/backends/netcdf/ids2nc.py +++ b/imaspy/backends/netcdf/ids2nc.py @@ -23,10 +23,10 @@ IDSDataType.CPX: netCDF4.default_fillvals["f8"] * (1 + 1j), } dtypes = { - IDSDataType.INT: numpy.int32, + IDSDataType.INT: numpy.dtype(numpy.int32), IDSDataType.STR: str, - IDSDataType.FLT: numpy.float64, - IDSDataType.CPX: numpy.complex128, + IDSDataType.FLT: numpy.dtype(numpy.float64), + IDSDataType.CPX: numpy.dtype(numpy.complex128), } SHAPE_DTYPE = numpy.int32 @@ -188,7 +188,7 @@ def create_variables(self) -> None: kwargs = {} if dtype is not str: # Enable compression: kwargs.update(compression="zlib", complevel=1) - if dtype is not numpy.complex128: # Set fillvalue + if dtype is not dtypes[IDSDataType.CPX]: # Set fillvalue kwargs.update(fill_value=default_fillvals[metadata.data_type]) # Create variable dimensions = get_dimensions(path, self.homogeneous_time) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index cc3ebc25..24cbc7b3 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -2,7 +2,9 @@ import netCDF4 +from imaspy.backends.netcdf import ids2nc from imaspy.backends.netcdf.nc_metadata import NCMetadata +from imaspy.exception import InvalidNetCDFEntry from imaspy.ids_base import IDSBase from imaspy.ids_data_type import IDSDataType from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS @@ -73,16 +75,27 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: """NetCDF related metadata.""" self.variables = list(group.variables) """List of variable names stored in the netCDF group.""" - # TODO: validate ids_properties.homogeneous_time - self.homogeneous_time = ( - group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS - ) - """True iff the IDS time mode is homogeneous.""" - # Don't use masked arrays: they're slow and we'll handle most of the unset # values through the `:shape` arrays self.group.set_auto_mask(False) + # Validate and get value of ids_properties.homogeneous_time + self.homogeneous_time = True # Must be initialized for self._validate_variable + """True iff the IDS time mode is homogeneous.""" + + if "ids_properties.homogeneous_time" not in self.variables: + raise InvalidNetCDFEntry( + "Mandatory variable `ids_properties.homogeneous_time` does not exist." + ) + var = group["ids_properties.homogeneous_time"] + self._validate_variable(var, ids.ids_properties.homogeneous_time.metadata) + if var[()] not in [0, 1, 2]: + raise InvalidNetCDFEntry( + f"Invalid value for ids_properties.homogeneous_time: {var[()]}. " + "Was expecting: 0, 1 or 2." + ) + self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS + def run(self) -> None: """Load the data from the netCDF group into the IDS.""" # FIXME: ensure that var_names are sorted properly @@ -142,6 +155,28 @@ def run(self) -> None: for index, node in tree_iter(self.ids, metadata): node.value = data[index] + def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None: + """Validate that the variable has correct metadata, raise an exception if not. + + Args: + var: NetCDF variable + metadata: IDSMetadata of the corresponding IDS object + """ + if var.dtype != ids2nc.dtypes[metadata.data_type]: + raise InvalidNetCDFEntry( + f"Variable {var.name} has incorrect data type: {var.dtype}. " + f"Was expecting: {ids2nc.dtypes[metadata.data_type]}." + ) + # Dimensions + expected_dims = self.ncmeta.get_dimensions( + metadata.path_string, self.homogeneous_time + ) + if var.dimensions != expected_dims: + raise InvalidNetCDFEntry( + f"Variable {var.name} has incorrect dimensions: {var.dimensions}. " + f"Was expecting: {expected_dims}." + ) + def nc2ids(group: netCDF4.Group, ids: IDSToplevel): """Get data from the netCDF group and store it in the provided IDS.""" diff --git a/imaspy/exception.py b/imaspy/exception.py index 8377d13b..550ce2ed 100644 --- a/imaspy/exception.py +++ b/imaspy/exception.py @@ -101,3 +101,7 @@ def __init__(self, node, dimension, expected_size, coor_path): super().__init__( f"Element `{node._path}` has incorrect shape {node.shape}: {details}" ) + + +class InvalidNetCDFEntry(Exception): + """Error raised when loading an IDS from a NetCDF file that fails validation.""" diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py new file mode 100644 index 00000000..dc5309f0 --- /dev/null +++ b/imaspy/test/test_nc_validation.py @@ -0,0 +1,36 @@ +import netCDF4 +import pytest + +from imaspy.backends.netcdf.nc2ids import NC2IDS +from imaspy.exception import InvalidNetCDFEntry +from imaspy.ids_factory import IDSFactory + + +@pytest.fixture() +def memfile(): + with netCDF4.Dataset("-", "w", diskless=True) as memfile: + yield memfile + + +def test_invalid_homogeneous_time(memfile): + empty_group = memfile.createGroup("empty_group") + # Invalid dtype + invalid_dtype = memfile.createGroup("invalid_dtype") + invalid_dtype.createVariable("ids_properties.homogeneous_time", float, ())[()] = 0 + # Invalid shape: 1D instead of 0D + invalid_shape = memfile.createGroup("invalid_shape") + invalid_shape.createDimension("dim") + invalid_shape.createVariable("ids_properties.homogeneous_time", "i4", ("dim",)) + # Invalid value: not 0, 1 or 2 + invalid_value = memfile.createGroup("invalid_value") + invalid_value.createVariable("ids_properties.homogeneous_time", "i4", ()) + + ids = IDSFactory().core_profiles() + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(empty_group, ids) # ids_properties.homogeneous_time does not exist + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_dtype, ids) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_shape, ids) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_value, ids) From f7be3845994c6510be29a35d47b58b7504fbaa7a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 15 Nov 2024 16:05:27 +0100 Subject: [PATCH 10/29] Additional validation checks and tests --- imaspy/backends/netcdf/nc2ids.py | 109 ++++++++++++++++++++++++++---- imaspy/test/test_nc_validation.py | 69 ++++++++++++++++++- 2 files changed, 164 insertions(+), 14 deletions(-) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index 24cbc7b3..e2cf65b3 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -1,3 +1,4 @@ +import logging from typing import Iterator, List, Tuple import netCDF4 @@ -12,6 +13,15 @@ from imaspy.ids_structure import IDSStructure from imaspy.ids_toplevel import IDSToplevel +logger = logging.getLogger(__name__) + + +def variable_error(var, issue, value, expected=None) -> InvalidNetCDFEntry: + return InvalidNetCDFEntry( + f"Variable `{var.name}` has incorrect {issue}: `{value}`." + + (f" Was expecting `{expected}`." if expected is not None else "") + ) + def split_on_aos(metadata: IDSMetadata): paths = [] @@ -98,6 +108,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: def run(self) -> None: """Load the data from the netCDF group into the IDS.""" + self._validate_variables() # FIXME: ensure that var_names are sorted properly # Current assumption is that creation-order is fine for var_name in self.variables: @@ -155,6 +166,42 @@ def run(self) -> None: for index, node in tree_iter(self.ids, metadata): node.value = data[index] + def _validate_variables(self) -> None: + """Validate that all variables in the netCDF Group exist and match the DD.""" + self.variables.sort() + for var_name in self.variables: + if var_name.endswith(":shape"): + # Check that there is a corresponding variable + data_var = var_name.rpartition(":shape")[0] + if data_var not in self.variables: + raise InvalidNetCDFEntry( + f"Invalid netCDF variable: {var_name}. " + f"Shape information provided for non-existing {data_var}." + ) + # Corresponding variable must be sparse + if "sparse" not in self.group[data_var].ncattrs(): + raise InvalidNetCDFEntry( + f"Shape information provided for {data_var}, but this variable " + "is not sparse." + ) + # That's all for :shape arrays + continue + + # Check that the DD defines this variable, and validate its metadata + var = self.group[var_name] + try: + metadata = self.ids.metadata[var_name] + except KeyError: + raise InvalidNetCDFEntry( + f"Invalid variable {var_name}: no such variable exists in the " + f"{self.ids.metadata.name} IDS." + ) + self._validate_variable(var, metadata) + + # Validate sparsity metadata + if "sparse" in var.ncattrs(): + ... # TODO + def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None: """Validate that the variable has correct metadata, raise an exception if not. @@ -162,20 +209,58 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No var: NetCDF variable metadata: IDSMetadata of the corresponding IDS object """ - if var.dtype != ids2nc.dtypes[metadata.data_type]: - raise InvalidNetCDFEntry( - f"Variable {var.name} has incorrect data type: {var.dtype}. " - f"Was expecting: {ids2nc.dtypes[metadata.data_type]}." + attrs: dict = vars(var).copy() + attrs.pop("_FillValue", None) + if metadata.data_type not in [IDSDataType.STRUCTURE, IDSDataType.STRUCT_ARRAY]: + # Data type + expected_dtype = ids2nc.dtypes[metadata.data_type] + if var.dtype != expected_dtype: + raise variable_error(var, "data type", var.dtype, expected_dtype) + + # Dimensions + expected_dims = self.ncmeta.get_dimensions( + metadata.path_string, self.homogeneous_time ) - # Dimensions - expected_dims = self.ncmeta.get_dimensions( - metadata.path_string, self.homogeneous_time - ) - if var.dimensions != expected_dims: - raise InvalidNetCDFEntry( - f"Variable {var.name} has incorrect dimensions: {var.dimensions}. " - f"Was expecting: {expected_dims}." + if var.dimensions != expected_dims: + raise variable_error(var, "dimensions", var.dimensions, expected_dims) + + # Coordinates + coordinates = str(attrs.pop("coordinates", "")) + expected_coordinates = self.ncmeta.get_coordinates( + metadata.path_string, self.homogeneous_time ) + if any(coord not in expected_coordinates for coord in coordinates.split()): + raise variable_error( + var, "coordinates", coordinates, " ".join(expected_coordinates) + ) + + # Ancillary variables + ancvar = attrs.pop("ancillary_variables", None) + if ancvar: + allowed_ancvar = [f"{var.name}_error_upper", f"{var.name}_error_lower"] + if any(var not in allowed_ancvar for var in ancvar.split()): + raise variable_error( + var, "ancillary_variables", ancvar, " ".join(allowed_ancvar) + ) + + # Units + units = attrs.pop("units", None) + if metadata.units and metadata.units != units: + raise variable_error(var, "units", units, metadata.units) + + # Sparse + sparse = attrs.pop("sparse", None) + if sparse is not None: + ... # TODO + + # Documentation + doc = attrs.pop("documentation", None) + if metadata.documentation != doc: + logger.warning("Documentation of variable %s differs from the DD", var.name) + + # Unknown attrs + if attrs: + raise variable_error(var, "attributes", list(attrs.keys())) def nc2ids(group: netCDF4.Group, ids: IDSToplevel): diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py index dc5309f0..d3bf8c09 100644 --- a/imaspy/test/test_nc_validation.py +++ b/imaspy/test/test_nc_validation.py @@ -1,8 +1,10 @@ import netCDF4 import pytest +from imaspy.backends.netcdf.ids2nc import IDS2NC from imaspy.backends.netcdf.nc2ids import NC2IDS from imaspy.exception import InvalidNetCDFEntry +from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS from imaspy.ids_factory import IDSFactory @@ -12,7 +14,26 @@ def memfile(): yield memfile -def test_invalid_homogeneous_time(memfile): +@pytest.fixture() +def factory(): + return IDSFactory("4.0.0") + + +@pytest.fixture() +def memfile_with_ids(memfile, factory): + ids = factory.core_profiles() + ids.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + ids.time = [1.0, 2.0, 3.0] + ids.profiles_1d.resize(2) + for i in range(2): + ids.profiles_1d[i].grid.rho_tor_norm = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0] + IDS2NC(ids, memfile).run() + # This one is valid: + NC2IDS(memfile, factory.core_profiles()).run() + return memfile + + +def test_invalid_homogeneous_time(memfile, factory): empty_group = memfile.createGroup("empty_group") # Invalid dtype invalid_dtype = memfile.createGroup("invalid_dtype") @@ -25,7 +46,7 @@ def test_invalid_homogeneous_time(memfile): invalid_value = memfile.createGroup("invalid_value") invalid_value.createVariable("ids_properties.homogeneous_time", "i4", ()) - ids = IDSFactory().core_profiles() + ids = factory.core_profiles() with pytest.raises(InvalidNetCDFEntry): NC2IDS(empty_group, ids) # ids_properties.homogeneous_time does not exist with pytest.raises(InvalidNetCDFEntry): @@ -34,3 +55,47 @@ def test_invalid_homogeneous_time(memfile): NC2IDS(invalid_shape, ids) with pytest.raises(InvalidNetCDFEntry): NC2IDS(invalid_value, ids) + + +def test_invalid_units(memfile_with_ids, factory): + memfile_with_ids["time"].units = "hours" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_documentation(memfile_with_ids, factory, caplog): + with caplog.at_level("WARNING"): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + assert not caplog.records + # Invalid docstring logs a warning + memfile_with_ids["time"].documentation = "https://en.wikipedia.org/wiki/Time" + with caplog.at_level("WARNING"): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + assert len(caplog.records) == 1 + + +def test_invalid_dimension_name(memfile_with_ids, factory): + memfile_with_ids.renameDimension("time", "T") + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_coordinates(memfile_with_ids, factory): + memfile_with_ids["profiles_1d.grid.rho_tor_norm"].coordinates = "xyz" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_ancillary_variables(memfile_with_ids, factory): + memfile_with_ids["time"].ancillary_variables = "xyz" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_extra_attributes(memfile_with_ids, factory): + memfile_with_ids["time"].new_attribute = [1, 2, 3] + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +# TODO: tests for sparsity information From e5246464d588069af3f0f25e5a0e00d41d7fd4ef Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 19 Nov 2024 10:21:18 +0100 Subject: [PATCH 11/29] Fix a bug with lazy loading multiple IDSs from the same HDF5 DBEntry Ensure lazy contexts belonging to a different IDS are always closed. See IMAS-5603 for more details. --- imaspy/backends/imas_core/al_context.py | 4 ++++ imaspy/test/test_lazy_loading.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/imaspy/backends/imas_core/al_context.py b/imaspy/backends/imas_core/al_context.py index 07f37dec..10c0bf45 100644 --- a/imaspy/backends/imas_core/al_context.py +++ b/imaspy/backends/imas_core/al_context.py @@ -299,6 +299,10 @@ def get_context(self) -> ALContext: # from the cache else: + # Purge the cache to close open contexts from other IDSs (IMAS-5603) + cache = self.dbentry._lazy_ctx_cache + while cache: + cache.pop().close() return self.dbentry_ctx @contextmanager diff --git a/imaspy/test/test_lazy_loading.py b/imaspy/test/test_lazy_loading.py index 8c3b2fef..c0e54aad 100644 --- a/imaspy/test/test_lazy_loading.py +++ b/imaspy/test/test_lazy_loading.py @@ -163,3 +163,23 @@ def test_lazy_load_with_new_aos(requires_imas): assert len(lazy_et.model[0].ggd[0].electrons.particles.d_radial) == 0 dbentry.close() + + +def test_lazy_load_multiple_ids(backend, worker_id, tmp_path): + if backend == ASCII_BACKEND: + pytest.skip("Lazy loading is not supported by the ASCII backend.") + + with open_dbentry(backend, "w", worker_id, tmp_path) as dbentry: + cp = dbentry.factory.core_profiles() + cp.ids_properties.homogeneous_time = 1 + cp.time = [0.0, 1.0] + dbentry.put(cp) + eq = dbentry.factory.equilibrium() + eq.ids_properties.homogeneous_time = 1 + eq.time = [1.0, 2.0] + dbentry.put(eq) + + lazy_cp = dbentry.get("core_profiles", lazy=True) + lazy_eq = dbentry.get("equilibrium", lazy=True) + assert all(cp.time - eq.time == -1) + assert all(lazy_cp.time - lazy_eq.time == -1) From d47566baca31d9b53ca46ed0dfd06dde70030211 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 19 Nov 2024 13:26:29 +0100 Subject: [PATCH 12/29] Validate netCDF sparsity metadata --- imaspy/backends/netcdf/nc2ids.py | 60 +++++++++++++++++++++++-------- imaspy/test/test_nc_validation.py | 51 ++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 18 deletions(-) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index e2cf65b3..3666d49e 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -1,5 +1,5 @@ import logging -from typing import Iterator, List, Tuple +from typing import Iterator, List, Optional, Tuple import netCDF4 @@ -108,18 +108,13 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: def run(self) -> None: """Load the data from the netCDF group into the IDS.""" + self.variables.sort() self._validate_variables() - # FIXME: ensure that var_names are sorted properly - # Current assumption is that creation-order is fine for var_name in self.variables: if var_name.endswith(":shape"): - continue # TODO: validate that this is used - - # FIXME: error handling: + continue metadata = self.ids.metadata[var_name] - # TODO: validate metadata (data type, units, etc.) conforms to DD - if metadata.data_type is IDSDataType.STRUCTURE: continue # This only contains DD metadata we already know @@ -168,7 +163,6 @@ def run(self) -> None: def _validate_variables(self) -> None: """Validate that all variables in the netCDF Group exist and match the DD.""" - self.variables.sort() for var_name in self.variables: if var_name.endswith(":shape"): # Check that there is a corresponding variable @@ -184,7 +178,8 @@ def _validate_variables(self) -> None: f"Shape information provided for {data_var}, but this variable " "is not sparse." ) - # That's all for :shape arrays + # That's all for :shape arrays here, rest is checked in + # _validate_variable (which defers to _validate_sparsity) continue # Check that the DD defines this variable, and validate its metadata @@ -198,10 +193,6 @@ def _validate_variables(self) -> None: ) self._validate_variable(var, metadata) - # Validate sparsity metadata - if "sparse" in var.ncattrs(): - ... # TODO - def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None: """Validate that the variable has correct metadata, raise an exception if not. @@ -251,7 +242,9 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No # Sparse sparse = attrs.pop("sparse", None) if sparse is not None: - ... # TODO + shape_name = f"{var.name}:shape" + shape_var = self.group[shape_name] if shape_name in self.variables else None + self._validate_sparsity(var, shape_var, metadata) # Documentation doc = attrs.pop("documentation", None) @@ -262,6 +255,43 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No if attrs: raise variable_error(var, "attributes", list(attrs.keys())) + def _validate_sparsity( + self, + var: netCDF4.Variable, + shape_var: Optional[netCDF4.Variable], + metadata: IDSMetadata, + ) -> None: + """Validate that the variable has correct sparsity. + + Args: + var: Variable with a "sparse" attribute + shape_var: Corresponding shape array (if it exists in the NC group) + metadata: IDSMetadata of the corresponding IDS object + """ + if metadata.ndim == 0: + return # Sparsity is stored with _Fillvalue, nothing to validate + + # Dimensions + aos_dimensions = self.ncmeta.get_dimensions( + self.ncmeta.aos.get(metadata.path_string), self.homogeneous_time + ) + shape_dimensions = shape_var.dimensions + if ( + len(shape_dimensions) != len(aos_dimensions) + 1 + or shape_dimensions[:-1] != aos_dimensions + or self.group.dimensions[shape_dimensions[-1]].size != metadata.ndim + ): + expected_dims = aos_dimensions + (f"{metadata.ndim}D",) + raise variable_error( + shape_var, "dimensions", shape_dimensions, expected_dims + ) + + # Data type + if shape_var.dtype.kind not in "ui": # should be (un)signed integer + raise variable_error( + shape_var, "dtype", shape_var.dtype, "any integer type" + ) + def nc2ids(group: netCDF4.Group, ids: IDSToplevel): """Get data from the netCDF group and store it in the provided IDS.""" diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py index d3bf8c09..f7cc029f 100644 --- a/imaspy/test/test_nc_validation.py +++ b/imaspy/test/test_nc_validation.py @@ -1,4 +1,5 @@ import netCDF4 +import numpy as np import pytest from imaspy.backends.netcdf.ids2nc import IDS2NC @@ -24,9 +25,10 @@ def memfile_with_ids(memfile, factory): ids = factory.core_profiles() ids.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS ids.time = [1.0, 2.0, 3.0] - ids.profiles_1d.resize(2) - for i in range(2): + ids.profiles_1d.resize(3) + for i in range(3): ids.profiles_1d[i].grid.rho_tor_norm = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0] + ids.profiles_1d[0].zeff = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] IDS2NC(ids, memfile).run() # This one is valid: NC2IDS(memfile, factory.core_profiles()).run() @@ -98,4 +100,47 @@ def test_extra_attributes(memfile_with_ids, factory): NC2IDS(memfile_with_ids, factory.core_profiles()).run() -# TODO: tests for sparsity information +def test_shape_array_without_data(memfile_with_ids, factory): + memfile_with_ids.createVariable("profiles_1d.t_i_average:shape", int, ()) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_shape_array_without_sparse_data(memfile_with_ids, factory): + memfile_with_ids.createVariable("profiles_1d.grid.rho_tor_norm:shape", int, ()) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_shape_array_with_invalid_dimensions(memfile_with_ids, factory): + cp = factory.core_profiles() + t_i_average_meta = cp.metadata["profiles_1d.t_i_average"] + t_i_average = memfile_with_ids.createVariable( + "profiles_1d.t_i_average", float, ("time", "profiles_1d.grid.rho_tor_norm:i") + ) + t_i_average.units = t_i_average_meta.units + t_i_average.documentation = t_i_average_meta.documentation + t_i_average.sparse = "Contents don't matter" + memfile_with_ids.createVariable( + "profiles_1d.t_i_average:shape", + np.int32, + ("time", "profiles_1d.grid.rho_tor_norm:i"), + ) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, cp).run() + + +def test_shape_array_with_invalid_dtype(memfile_with_ids, factory): + cp = factory.core_profiles() + t_i_average_meta = cp.metadata["profiles_1d.t_i_average"] + t_i_average = memfile_with_ids.createVariable( + "profiles_1d.t_i_average", float, ("time", "profiles_1d.grid.rho_tor_norm:i") + ) + t_i_average.units = t_i_average_meta.units + t_i_average.documentation = t_i_average_meta.documentation + t_i_average.sparse = "Contents don't matter" + memfile_with_ids.createVariable( + "profiles_1d.t_i_average:shape", float, ("time", "1D") + ) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, cp).run() From 54d78d6e08fd44343578bb0ae13c8404f7951de2 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 19 Nov 2024 13:49:33 +0100 Subject: [PATCH 13/29] Add environment variable to disable netCDF file validation. --- docs/source/configuring.rst | 7 +++++++ imaspy/backends/netcdf/nc2ids.py | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/docs/source/configuring.rst b/docs/source/configuring.rst index 07073faf..dae11b6f 100644 --- a/docs/source/configuring.rst +++ b/docs/source/configuring.rst @@ -29,6 +29,13 @@ This page provides an overview of available variables. you can use :external:py:meth:`logging.getLogger("imaspy").setLevel(...) ` to change the log level programmatically. + +``IMASPY_DISABLE_NC_VALIDATE`` + Disables validation of netCDF files when loading an IDS from an IMAS netCDF file. + + .. caution:: + Disabling the validation may lead to errors when reading data from an IMAS netCDF file. + ``IMAS_VERSION`` Sets :ref:`The default Data Dictionary version` to use. diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index 3666d49e..0a69f964 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -1,4 +1,5 @@ import logging +import os from typing import Iterator, List, Optional, Tuple import netCDF4 @@ -163,6 +164,14 @@ def run(self) -> None: def _validate_variables(self) -> None: """Validate that all variables in the netCDF Group exist and match the DD.""" + disable_validate = os.environ.get("IMASPY_DISABLE_NC_VALIDATE") + if disable_validate and disable_validate != "0": + logger.info( + "NetCDF file validation disabled: " + "This may lead to errors when reading data!" + ) + return # validation checks are disabled + for var_name in self.variables: if var_name.endswith(":shape"): # Check that there is a corresponding variable From 5ccae5dec8e4994748a42f0e7d970049504cda95 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 19 Nov 2024 15:50:32 +0100 Subject: [PATCH 14/29] Eliminate nc2ids function --- imaspy/backends/netcdf/db_entry_nc.py | 6 +++--- imaspy/backends/netcdf/nc2ids.py | 14 -------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index ba7334fc..9a0bf9c9 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -5,7 +5,7 @@ from imaspy.backends.db_entry_impl import DBEntryImpl from imaspy.backends.netcdf.ids2nc import IDS2NC -from imaspy.backends.netcdf.nc2ids import nc2ids +from imaspy.backends.netcdf.nc2ids import NC2IDS from imaspy.exception import DataEntryException from imaspy.ids_convert import NBCPathMap, convert_ids from imaspy.ids_factory import IDSFactory @@ -98,13 +98,13 @@ def get( # Load data into the destination IDS if self._ds_factory.dd_version == destination._dd_version: - nc2ids(group, destination) + NC2IDS(group, destination).run() else: # FIXME: implement automatic conversion using nbc_map # As a work-around: do an explicit conversion, but automatic conversion # will also be needed to implement lazy loading. ids = self._ds_factory.new(ids_name) - nc2ids(group, ids) + NC2IDS(group, ids).run() convert_ids(ids, None, target=destination) return destination diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index 0a69f964..b74b4676 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -300,17 +300,3 @@ def _validate_sparsity( raise variable_error( shape_var, "dtype", shape_var.dtype, "any integer type" ) - - -def nc2ids(group: netCDF4.Group, ids: IDSToplevel): - """Get data from the netCDF group and store it in the provided IDS.""" - try: - NC2IDS(group, ids).run() - except Exception as exc: - raise RuntimeError( - "An error occurred while reading data from the netCDF file " - f"'{group.filepath()}'. The netCDF functionality is currently in " - "preview status. Unexpected data in an otherwise valid netCDF file " - "may cause errors in IMASPy. A more robust mechanism to load IDS data from " - "netCDF files will be included in the next release of IMASPy." - ) from exc From afb8c292c6527f03354ac35fa43dfaebd8fb4ef3 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 20 Nov 2024 10:50:59 +0100 Subject: [PATCH 15/29] Update `get_sample` docstring to clarify that the interpolation mode has no effect on the `ids.time` vector. See also https://git.iter.org/projects/IMAS/repos/al-matlab/pull-requests/29/overview?commentId=48957 --- imaspy/db_entry.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/imaspy/db_entry.py b/imaspy/db_entry.py index cb948fea..3834655d 100644 --- a/imaspy/db_entry.py +++ b/imaspy/db_entry.py @@ -459,7 +459,9 @@ def get_sample( :param:`dtime` must be a number or a numpy array of size 1. This mode will generate an IDS with a homogeneous time vector ``[tmin, tmin - + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The returned IDS always has + + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The chosen interpolation + method will have no effect on the time vector, but may have an impact on the + other dynamic values. The returned IDS always has ``ids_properties.homogeneous_time = 1``. 3. Interpolate dynamic data on an explicit time base. This method is selected @@ -468,7 +470,9 @@ def get_sample( This mode will generate an IDS with a homogeneous time vector equal to :param:`dtime`. :param:`tmin` and :param:`tmax` are ignored in this mode. - The returned IDS always has ``ids_properties.homogeneous_time = 1``. + The chosen interpolation method will have no effect on the time vector, but + may have an impact on the other dynamic values. The returned IDS always has + ``ids_properties.homogeneous_time = 1``. Args: ids_name: Name of the IDS to read from the backend From 6cd1e44f2ca06d9746a6e0722cf216da2e6fed62 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 22 Nov 2024 15:05:37 +0100 Subject: [PATCH 16/29] Add `validate_nc` command to imaspy CLI Also fix a couple of bugs when opening invalid netCDF files and raise a proper exception. --- imaspy/backends/netcdf/db_entry_nc.py | 8 ++-- imaspy/backends/netcdf/nc2ids.py | 4 +- imaspy/backends/netcdf/nc_validate.py | 53 +++++++++++++++++++++++++++ imaspy/command/cli.py | 15 ++++++++ 4 files changed, 75 insertions(+), 5 deletions(-) create mode 100644 imaspy/backends/netcdf/nc_validate.py diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index 9a0bf9c9..3725c5a9 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -6,7 +6,7 @@ from imaspy.backends.db_entry_impl import DBEntryImpl from imaspy.backends.netcdf.ids2nc import IDS2NC from imaspy.backends.netcdf.nc2ids import NC2IDS -from imaspy.exception import DataEntryException +from imaspy.exception import DataEntryException, InvalidNetCDFEntry from imaspy.ids_convert import NBCPathMap, convert_ids from imaspy.ids_factory import IDSFactory from imaspy.ids_toplevel import IDSToplevel @@ -45,14 +45,16 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: # Check if there is already data in this dataset: if self._dataset.dimensions or self._dataset.variables or self._dataset.groups: if "data_dictionary_version" not in self._dataset.ncattrs(): - raise RuntimeError( + raise InvalidNetCDFEntry( "Invalid netCDF file: `data_dictionary_version` missing" ) dataset_dd_version = self._dataset.data_dictionary_version if dataset_dd_version != factory.dd_version: self._ds_factory = IDSFactory(dataset_dd_version) - # TODO: [validate] that the data contained in this file adheres to the DD + elif mode not in ["w", "r+", "a"]: + # Reading an empty file... + raise InvalidNetCDFEntry(f"Invalid netCDF file: `{fname}` is empty.") else: # This is an empty netCDF dataset: set global attributes self._dataset.Conventions = "IMAS" diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index b74b4676..50905ba8 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -110,7 +110,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: def run(self) -> None: """Load the data from the netCDF group into the IDS.""" self.variables.sort() - self._validate_variables() + self.validate_variables() for var_name in self.variables: if var_name.endswith(":shape"): continue @@ -162,7 +162,7 @@ def run(self) -> None: for index, node in tree_iter(self.ids, metadata): node.value = data[index] - def _validate_variables(self) -> None: + def validate_variables(self) -> None: """Validate that all variables in the netCDF Group exist and match the DD.""" disable_validate = os.environ.get("IMASPY_DISABLE_NC_VALIDATE") if disable_validate and disable_validate != "0": diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py new file mode 100644 index 00000000..7b6a1eac --- /dev/null +++ b/imaspy/backends/netcdf/nc_validate.py @@ -0,0 +1,53 @@ +from imaspy.backends.netcdf.db_entry_nc import NCDBEntryImpl +from imaspy.backends.netcdf.nc2ids import NC2IDS +from imaspy.db_entry import DBEntry +from imaspy.exception import InvalidNetCDFEntry + + +def validate_netcdf_file(filename: str) -> None: + """Validate if the provided netCDF file adheres to the IMAS conventions.""" + if not filename.endswith(".nc"): + raise InvalidNetCDFEntry( + f"Invalid filename `{filename}` provided: " + "an IMAS netCDF file should end with `.nc`" + ) + + entry = DBEntry(filename, "r") + entry_impl: NCDBEntryImpl = entry._dbe_impl + dataset = entry_impl._dataset + factory = entry_impl._ds_factory + + ids_names = factory.ids_names() + + # Check that groups in the dataset correspond to an IDS/occurrence and no additional + # variables are smuggled inside: + groups = [dataset] + [dataset[group] for group in dataset.groups] + for group in groups: + if group.variables or group.dimensions: + raise InvalidNetCDFEntry( + "NetCDF file should not have variables or dimensions in the " + f"{group.name} group." + ) + if group is dataset: + continue + if group.name not in ids_names: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}: there is no IDS with this name." + ) + for subgroup in group.groups: + try: + int(subgroup) + except ValueError: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}/{subgroup}: " + f"{subgroup} is not a valid occurrence number." + ) + + for ids_name in ids_names: + for occurrence in entry.list_all_occurrences(ids_name): + group = dataset[f"{ids_name}/{occurrence}"] + try: + NC2IDS(group, factory.new(ids_name)).validate_variables() + except InvalidNetCDFEntry as exc: + occ = f":{occurrence}" if occurrence else "" + raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}") diff --git a/imaspy/command/cli.py b/imaspy/command/cli.py index 246922ce..f894f02d 100644 --- a/imaspy/command/cli.py +++ b/imaspy/command/cli.py @@ -218,5 +218,20 @@ def convert_ids( console.Console().print(timer.get_table("Time required per IDS")) +@cli.command("validate_nc", no_args_is_help=True) +@click.argument("filename", type=click.Path(exists=True, dir_okay=False)) +def validate_nc(filename): + """Validate if the provided netCDF file adheres to the IMAS conventions.""" + from imaspy.backends.netcdf.nc_validate import validate_netcdf_file + + try: + validate_netcdf_file(filename) + except Exception as exc: + click.echo(f"File `{filename}` does not adhere to the IMAS conventions:") + click.echo(exc) + sys.exit(1) + click.echo(f"File `{filename}` is a valid IMAS netCDF file.") + + if __name__ == "__main__": cli() From 6ec21c71dbda710bb9230525e2b2bb830b645d6b Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Nov 2024 09:50:07 +0100 Subject: [PATCH 17/29] Fix incorrect exception when using mode="x" for netCDF files --- imaspy/backends/netcdf/db_entry_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index 3725c5a9..da239745 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -52,7 +52,7 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: if dataset_dd_version != factory.dd_version: self._ds_factory = IDSFactory(dataset_dd_version) - elif mode not in ["w", "r+", "a"]: + elif mode not in ["w", "x", "r+", "a"]: # Reading an empty file... raise InvalidNetCDFEntry(f"Invalid netCDF file: `{fname}` is empty.") else: From 1f6c6fe9730dc063443408cca81fc5416d34a184 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Nov 2024 15:06:38 +0100 Subject: [PATCH 18/29] Close netCDF datasets when an exception is raised --- imaspy/backends/netcdf/db_entry_nc.py | 10 +++- imaspy/backends/netcdf/nc_validate.py | 68 +++++++++++++-------------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index da239745..a66154f1 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -39,9 +39,17 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: """NetCDF4 dataset.""" self._factory = factory """Factory (DD version) that the user wishes to use.""" - self._ds_factory = factory # Overwritten if data exists, see below + self._ds_factory = factory # Overwritten if data exists, see _init_dd_version """Factory (DD version) that the data is stored in.""" + try: + self._init_dd_version(fname, mode, factory) + except Exception: + self._dataset.close() + raise + + def _init_dd_version(self, fname: str, mode: str, factory: IDSFactory) -> None: + """Check or setup data dictionary version.""" # Check if there is already data in this dataset: if self._dataset.dimensions or self._dataset.variables or self._dataset.groups: if "data_dictionary_version" not in self._dataset.ncattrs(): diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py index 7b6a1eac..49a14283 100644 --- a/imaspy/backends/netcdf/nc_validate.py +++ b/imaspy/backends/netcdf/nc_validate.py @@ -12,42 +12,42 @@ def validate_netcdf_file(filename: str) -> None: "an IMAS netCDF file should end with `.nc`" ) - entry = DBEntry(filename, "r") - entry_impl: NCDBEntryImpl = entry._dbe_impl - dataset = entry_impl._dataset - factory = entry_impl._ds_factory + with DBEntry(filename, "r") as entry: + entry_impl: NCDBEntryImpl = entry._dbe_impl + dataset = entry_impl._dataset + factory = entry_impl._ds_factory - ids_names = factory.ids_names() + ids_names = factory.ids_names() - # Check that groups in the dataset correspond to an IDS/occurrence and no additional - # variables are smuggled inside: - groups = [dataset] + [dataset[group] for group in dataset.groups] - for group in groups: - if group.variables or group.dimensions: - raise InvalidNetCDFEntry( - "NetCDF file should not have variables or dimensions in the " - f"{group.name} group." - ) - if group is dataset: - continue - if group.name not in ids_names: - raise InvalidNetCDFEntry( - f"Invalid group name {group.name}: there is no IDS with this name." - ) - for subgroup in group.groups: - try: - int(subgroup) - except ValueError: + # Check that groups in the dataset correspond to an IDS/occurrence and no + # additional variables are smuggled inside: + groups = [dataset] + [dataset[group] for group in dataset.groups] + for group in groups: + if group.variables or group.dimensions: raise InvalidNetCDFEntry( - f"Invalid group name {group.name}/{subgroup}: " - f"{subgroup} is not a valid occurrence number." + "NetCDF file should not have variables or dimensions in the " + f"{group.name} group." ) + if group is dataset: + continue + if group.name not in ids_names: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}: there is no IDS with this name." + ) + for subgroup in group.groups: + try: + int(subgroup) + except ValueError: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}/{subgroup}: " + f"{subgroup} is not a valid occurrence number." + ) - for ids_name in ids_names: - for occurrence in entry.list_all_occurrences(ids_name): - group = dataset[f"{ids_name}/{occurrence}"] - try: - NC2IDS(group, factory.new(ids_name)).validate_variables() - except InvalidNetCDFEntry as exc: - occ = f":{occurrence}" if occurrence else "" - raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}") + for ids_name in ids_names: + for occurrence in entry.list_all_occurrences(ids_name): + group = dataset[f"{ids_name}/{occurrence}"] + try: + NC2IDS(group, factory.new(ids_name)).validate_variables() + except InvalidNetCDFEntry as exc: + occ = f":{occurrence}" if occurrence else "" + raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}") From b7d89635a270f623b0af2583c595b37c8a64420a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Nov 2024 15:06:54 +0100 Subject: [PATCH 19/29] Add unit tests for `nc_validate.py` --- imaspy/test/test_nc_validation.py | 53 ++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py index f7cc029f..efd25420 100644 --- a/imaspy/test/test_nc_validation.py +++ b/imaspy/test/test_nc_validation.py @@ -4,7 +4,8 @@ from imaspy.backends.netcdf.ids2nc import IDS2NC from imaspy.backends.netcdf.nc2ids import NC2IDS -from imaspy.exception import InvalidNetCDFEntry +from imaspy.backends.netcdf.nc_validate import validate_netcdf_file +from imaspy.exception import InvalidNetCDFEntry, UnknownDDVersion from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS from imaspy.ids_factory import IDSFactory @@ -144,3 +145,53 @@ def test_shape_array_with_invalid_dtype(memfile_with_ids, factory): ) with pytest.raises(InvalidNetCDFEntry): NC2IDS(memfile_with_ids, cp).run() + + +def test_validate_nc(tmpdir): + fname = str(tmpdir / "test.nc") + + # Wrong extension + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file("test.h5") # invalid extension + + # Empty file + netCDF4.Dataset(fname, "w").close() + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid DD version + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "invalid" + dataset.createGroup("core_profiles") + with pytest.raises(UnknownDDVersion): + validate_netcdf_file(fname) + + # Invalid group + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("X") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid occurrence + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("core_profiles/a") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid variable in root group + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createVariable("core_profiles", int, ()) + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Missing ids_properties.homogeneous_time + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("core_profiles/1") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # All other validations are handled by NC2IDS and tested above From fc2cbf20b3cf1c549761214109d3d89b4ee34091 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 27 Nov 2024 16:31:59 +0100 Subject: [PATCH 20/29] Additional documentation for the `imaspy validate_nc` command line tool --- docs/source/netcdf.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/netcdf.rst b/docs/source/netcdf.rst index dd3bf431..7a7593e6 100644 --- a/docs/source/netcdf.rst +++ b/docs/source/netcdf.rst @@ -102,3 +102,11 @@ your directory. Let's open this file with ``xarray.load_dataset``: Attributes: Conventions: IMAS data_dictionary_version: 3.41.0 + + +Validating an IMAS netCDF file +------------------------------ + +IMAS netCDF files can be validated with IMASPy through the command line ``imaspy +validate_nc ``. See also :ref:`IMASPy Command Line tool` or type +``imaspy validate_nc --help`` in a command line. From 7c56b5f0713e7083fc3887884af3bf5dc2852f78 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 4 Dec 2024 15:35:31 +0100 Subject: [PATCH 21/29] Explicitly set `IDSDEF_PATH` when opening a DBEntry with the UDA backend --- imaspy/backends/imas_core/db_entry_al.py | 19 ++++++++ imaspy/backends/imas_core/uda_support.py | 56 ++++++++++++++++++++++++ imaspy/test/test_uda_support.py | 12 +++++ 3 files changed, 87 insertions(+) create mode 100644 imaspy/backends/imas_core/uda_support.py create mode 100644 imaspy/test/test_uda_support.py diff --git a/imaspy/backends/imas_core/db_entry_al.py b/imaspy/backends/imas_core/db_entry_al.py index a90e4d6a..34a3ab32 100644 --- a/imaspy/backends/imas_core/db_entry_al.py +++ b/imaspy/backends/imas_core/db_entry_al.py @@ -41,6 +41,7 @@ from .db_entry_helpers import delete_children, get_children, put_children from .imas_interface import LLInterfaceError, has_imas, ll_interface from .mdsplus_model import ensure_data_dir, mdsplus_model_dir +from .uda_support import extract_idsdef, get_dd_version_from_idsdef_xml _BACKEND_NAME = { ASCII_BACKEND: "ascii", @@ -186,6 +187,24 @@ def _setup_backend( pass # nothing to set up elif backend == "uda": + # Set IDSDEF_PATH to point the UDA backend to the selected DD version + idsdef_path = None + + if factory._xml_path is not None: + # Factory was constructed with an explicit XML path, point UDA to that: + idsdef_path = factory._xml_path + + elif "IMAS_PREFIX" in os.environ: + # Check if UDA can use the IDSDef.xml stored in $IMAS_PREFIX/include/ + idsdef_path = os.environ["IMAS_PREFIX"] + "/include/IDSDef.xml" + if get_dd_version_from_idsdef_xml(idsdef_path) != factory.version: + idsdef_path = None + + if idsdef_path is None: + # Extract XML from the DD zip and point UDA to it + idsdef_path = extract_idsdef(factory.version) + + os.environ["IDSDEF_PATH"] = idsdef_path logger.warning( "The UDA backend is not tested with IMASPy and may not work properly. " "Please raise any issues you find." diff --git a/imaspy/backends/imas_core/uda_support.py b/imaspy/backends/imas_core/uda_support.py new file mode 100644 index 00000000..8b599faa --- /dev/null +++ b/imaspy/backends/imas_core/uda_support.py @@ -0,0 +1,56 @@ +import logging +from pathlib import Path +from typing import Union +from xml.etree import ElementTree as ET + +from imaspy import dd_zip + +from .mdsplus_model import _get_xdg_cache_dir + +logger = logging.getLogger(__name__) + + +def get_dd_version_from_idsdef_xml(path: Union[str, Path]) -> str: + """Parse the IDSDef.xml up to the point where the Data Dictionary version is set. + + Returns: + The Data Dictionary version for the provided file, or None if the file cannot be + parsed / contains no Data Dictionary version. + """ + try: + for _, elem in ET.iterparse(path): + if elem.tag == "version": + return elem.text + except OSError: + pass # File not found, etc. + except Exception: + logger.warning("Could not read DD version from file '%s'.", path, exc_info=True) + return None + + +def extract_idsdef(dd_version: str) -> str: + """Extract the IDSDef.xml for the given version and return its path. + + The IDSDef.xml is extracted to the imaspy cache folder: + + - If the file imaspy/uda/.xml already exists, we assume it is correct + """ + cache_dir_path = Path(_get_xdg_cache_dir()) / "imaspy" / "uda" + cache_dir_path.mkdir(parents=True, exist_ok=True) # ensure cache folder exists + idsdef_path = cache_dir_path / (dd_version + ".xml") + + if idsdef_path.exists(): + extract = False + # Check if the file is fine + if get_dd_version_from_idsdef_xml(idsdef_path) != dd_version: + # File is corrupt, I guess? We'll overwrite: + extract = True + else: + extract = True + + if extract: + # Extract XML from the dd_zip and store + data = dd_zip.get_dd_xml(dd_version) + idsdef_path.write_bytes(data) + + return str(idsdef_path) diff --git a/imaspy/test/test_uda_support.py b/imaspy/test/test_uda_support.py new file mode 100644 index 00000000..f623219a --- /dev/null +++ b/imaspy/test/test_uda_support.py @@ -0,0 +1,12 @@ +from pathlib import Path +from zlib import crc32 + +from imaspy import dd_zip +from imaspy.backends.imas_core.uda_support import extract_idsdef + + +def test_extract_idsdef(): + fname = extract_idsdef("4.0.0") + expected_crc = dd_zip.get_dd_xml_crc("4.0.0") + actual_crc = crc32(Path(fname).read_bytes()) + assert expected_crc == actual_crc From 74d2e3afe41a3fb907bc0a3729fcea29590aa4ca Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Mon, 9 Dec 2024 15:14:44 +0100 Subject: [PATCH 22/29] Make prepare_data_dictionaries compatible with DD>4.0.0 (change in schemas layout) --- imaspy/dd_helpers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/imaspy/dd_helpers.py b/imaspy/dd_helpers.py index 0506482f..21a7775f 100644 --- a/imaspy/dd_helpers.py +++ b/imaspy/dd_helpers.py @@ -58,9 +58,14 @@ def prepare_data_dictionaries(): dd_zip.write(filename, arcname=arcname) # Include identifiers from latest tag in zip file repo.git.checkout(newest_version_and_tag[1], force=True) + # DD layout <= 4.0.0 for filename in Path("data-dictionary").glob("*/*identifier.xml"): arcname = Path("identifiers").joinpath(*filename.parts[1:]) dd_zip.write(filename, arcname=arcname) + # DD layout > 4.0.0 + for filename in Path("data-dictionary").glob("schemas/*/*identifier.xml"): + arcname = Path("identifiers").joinpath(*filename.parts[2:]) + dd_zip.write(filename, arcname=arcname) # pre 3.30.0 versions of the DD have the `saxon9he.jar` file path hardcoded From 77fb044a48b709ddaaef9091b1101484526338cd Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Wed, 11 Dec 2024 17:03:11 +0100 Subject: [PATCH 23/29] Updating the license and readme --- LICENSE.md | 46 --------------- LICENSE.txt | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 121 ++++++-------------------------------- 3 files changed, 182 insertions(+), 150 deletions(-) delete mode 100644 LICENSE.md create mode 100644 LICENSE.txt diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index ea4a5d46..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,46 +0,0 @@ -Copyright (c) 2020-2023 ITER Organization, Route de Vinon-sur-Verdon, CS 90 046, - 13067 St-Paul-lez-Durance Cedex, France - -Copyright (c) 2020-2023 Karel Lucas van de Plassche - -Copyright (c) 2020 Dutch Institute for Fundamental Energy Research - -Copyright (c) 2020-2022 Daan van Vugt - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Use and redistribution, for peaceful purposes only, are granted solely to the - ITER Members (the People's Republic of China, the European Atomic Energy - Community, the Republic of India, Japan, the Republic of Korea, the Russian - Federation, and the United States of America), with the right to sub-license - within their territory for the purpose of fusion research and development. - Organizations, bodies or individuals of non-ITER Members shall seek specific - written permission from the ITER Organization before use or redistribution of - this software. - -* All modifications/derivatives shall be made available to the ITER Organization. - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -* Neither the name of the ITER Organization nor the names of its contributors - may be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE ITER ORGANIZATION OR ITS CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..33bb3680 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md index ce753f5a..23e83fde 100644 --- a/README.md +++ b/README.md @@ -1,99 +1,24 @@ # IMASPy IMASPy is a pure-python library to handle arbitrarily nested data structures. -IMASPy is designed for, but not necessarily bound to, interacting with -Interface Data Structures (IDSs) as defined by the -Integrated Modelling & Analysis Suite (IMAS) Data Model. +IMASPy is designed for, but not necessarily bound to, interacting with Interface +Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) +Data Model. -It provides: -* An easy-to-install and easy-to-get started package by - * Not requiring an IMAS installation - * Not strictly requiring matching a Data Dictionary (DD) version -* An pythonic alternative to the IMAS Python High Level Interface (HLI) -* Checking of correctness on assign time, instead of database write time -* Dynamically created in-memory pre-filled data trees from DD XML specifications +## Install -This package is developed on [ITER bitbucket](https://git.iter.org/projects/IMAS/repos/imaspy). -For user support, contact the IMAS team on the [IMAS user slack](https://imasusers.slack.com), -open a [JIRA issue](https://jira.iter.org/projects/IMAS), or email the -support team on . +Install steps are described in the documentation generated from `/docs/source/installing.rst`. -## Installation - -### On ITER system, EuroFusion gateway - -There is a `module` available on ITER and the Gateway, so you can run - -```bash -module load IMASPy -``` - -IMASPy can work with either Access Layer versions 4 or 5 (the used version is -automatically detected when importing the `imaspy` module). IMASPy still works (with -limited functionality) when no IMAS module is loaded. - -### Local - -We recommend using a `venv`: - -```bash -python3 -m venv ./venv -. venv/bin/activate -``` - -Then clone this repository, and run `pip install`: - -```bash -git clone ssh://git@git.iter.org/imas/imaspy.git -cd imaspy -pip install . -# Optional: also install `imas-core` with the HDF5 backend in the venv: -pip install .[imas-core] -``` - -If you get strange errors you might want to upgrade your `setuptools` and `pip`. -(you might want to add the `--user` flag to your pip installs when not in a `venv`) - -### Development installation - -For development an installation in editable mode may be more convenient, and -you will need some extra dependencies to run the test suite and build -documentation. - -```bash -pip install -e .[test,docs] -``` +Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) +and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html) -Test your installation by trying +The documentation can be manually generated by installing sphinx and running: ```bash -cd ~ -python -c "import imaspy; print(imaspy.__version__)" +make -C docs html ``` -which should return your just installed version number. - -### Installation without ITER access - -The installation script tries to access the [ITER IMAS Core Data Dictionary repository](https://git.iter.org/projects/IMAS/repos/data-dictionary/browse) -to fetch the latest versions. If you do not have git+ssh access there, you can -try to find this repository elsewhere, and do a `git fetch --tags`. - -Alternatively you could try to obtain an `IDSDef.zip` and place it in `~/.config/imaspy/`. - -Test your installation by trying - -```bash -python -c "import imaspy; factory = imaspy.IDSFactory()" -``` -If the following error is raised: -```bash -RuntimeError: Could not find any data dictionary definitions. -``` -it means that the Data Dictionary definitions weren't created during the install. -You can generate these definitions by executing `build_DD` in the command line. -Missing packages can include among others: [GitPython](https://github.com/gitpython-developers/GitPython), and Java. ## How to use @@ -106,32 +31,20 @@ print(equilibrium) equilibrium.ids_properties.homogeneous_time = imaspy.ids_defs.IDS_TIME_MODE_HETEROGENEOUS equilibrium.ids_properties.comment = "testing" -dbentry = imaspy.DBEntry(imaspy.ids_defs.HDF5_BACKEND, "ITER", 1, 1) -dbentry.create() -dbentry.put(equilibrium) - -# TODO: find an example with a significant change between versions (rename?) -older_dbentry = imaspy.DBEntry(imaspy.ids_defs.HDF5_BACKEND, "ITER", 1, 1, version="3.35.0") -equilibrium2 = older_root.get("equilibrium") -print(equilibrium2.ids_properties.comment) +with imaspy.DBEntry("imas:hdf5?path=./testdb","w") as dbentry: + dbentry.put(equilibrium) ``` -## Documentation - -Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) -and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html) +A quick 5 minutes introduction is available in the documentation generated from `/docs/sources/intro.rst`. -The documentation can be manually generated by installing sphinx and running: -```bash -make -C docs html -``` +## Legal -## Interacting with IMAS AL +IMASPy is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de +Plassche , Copyright 2020-2022 Daan van Vugt , +and Copyright 2020 Dutch Institute for Fundamental Energy Research . +It is licensed under [LGPL 3.0](LICENSE.txt). -Interaction with the IMAS AL is provided by a Cython interface to the Access Layer. -As Cython code, it needs to be compiled on your local system. -To find the headers, the Access Layer `include` folder needs to be in your `INCLUDE_PATH`. On most HPC systems, a `module load IMAS` is enough. ## Acknowledgments From d80778fe6cdadf50255a50905a00ed9e2b458c8c Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 12 Dec 2024 10:23:31 +0100 Subject: [PATCH 24/29] Replace references to LICENSE.md to LICENSE.txt --- docs/source/index.rst | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 19e3985b..c5a3f24c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -77,5 +77,5 @@ Manual LICENSE ------- -.. literalinclude:: ../../LICENSE.md +.. literalinclude:: ../../LICENSE.txt :language: text diff --git a/pyproject.toml b/pyproject.toml index 1c1ce2cc..dccd6912 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ authors = [ description = "Pythonic wrappers for the IMAS Access Layer" readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3.7" -license = {file = "LICENSE.md"} +license = {file = "LICENSE.txt"} classifiers = [ "Development Status :: 3 - Alpha", "Environment :: Console", From cef46674cc1f032d9ae65dfe4507060493a43ddf Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 17 Dec 2024 13:57:11 +0100 Subject: [PATCH 25/29] Fix a bug with lazy loading Bug: IMASPy runs into an attribute error when lazy loading a child quantity that was added in a newer DD version than stored on disk. Example: 1. Equilibrium IDS stored in DD 3.33.0 2. Lazy loading IDS with DD 4.0.0 3. Try to access `eq.time_slice[0].boundary.dr_dz_zero_point.r` resulted in an AttributeError Root cause: IMASPy did not handle correctly that the `dr_dz_zero_point` was added between 3.33.0 and 4.0.0. This commit fixes the bug. --- imaspy/backends/imas_core/db_entry_helpers.py | 6 +++++- imaspy/test/test_lazy_loading.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/imaspy/backends/imas_core/db_entry_helpers.py b/imaspy/backends/imas_core/db_entry_helpers.py index de1d9323..f69eafd3 100644 --- a/imaspy/backends/imas_core/db_entry_helpers.py +++ b/imaspy/backends/imas_core/db_entry_helpers.py @@ -77,11 +77,15 @@ def get_children( getattr(structure, name)._IDSPrimitive__value = data -def _get_child(child: IDSBase, ctx: LazyALContext): +def _get_child(child: IDSBase, ctx: Optional[LazyALContext]): """Get a single child when required (lazy loading).""" # NOTE: changes in this method must be propagated to _get_children and vice versa # Performance: this method is specialized for the lazy get + # ctx can be None when the parent structure does not exist in the on-disk DD version + if ctx is None: + return # There is no data to be loaded + time_mode = ctx.time_mode if time_mode == IDS_TIME_MODE_INDEPENDENT and child.metadata.type.is_dynamic: return # skip dynamic (time-dependent) nodes diff --git a/imaspy/test/test_lazy_loading.py b/imaspy/test/test_lazy_loading.py index c0e54aad..1d34e2a1 100644 --- a/imaspy/test/test_lazy_loading.py +++ b/imaspy/test/test_lazy_loading.py @@ -165,6 +165,22 @@ def test_lazy_load_with_new_aos(requires_imas): dbentry.close() +def test_lazy_load_with_new_structure(requires_imas): + dbentry = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, dd_version="3.30.0") + dbentry.create() + + eq = dbentry.factory.equilibrium() + eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + eq.time = [0.0] + eq.time_slice.resize(1) + dbentry.put(eq) + + entry2 = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, data_version="3", dd_version="4.0.0") + entry2.open() + lazy_eq = entry2.get("equilibrium", lazy=True) + assert not lazy_eq.time_slice[0].boundary.dr_dz_zero_point.r.has_value + + def test_lazy_load_multiple_ids(backend, worker_id, tmp_path): if backend == ASCII_BACKEND: pytest.skip("Lazy loading is not supported by the ASCII backend.") From 4beab9fcbcf590356b0d92b7b65894f907157962 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Fri, 10 Jan 2025 18:03:27 +0100 Subject: [PATCH 26/29] Add contributing guidelines --- CODE_OF_CONDUCT.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 45 +++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..df8ba3bd --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,72 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..ac28e400 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,45 @@ +# Contributing guidelines + +We welcome any kind of contribution to `imas-python`, +from a simple comment, a question or even a full fledged pull +request. +Please first make sure you read and follow the +[Code of Conduct](CODE_OF_CONDUCT.md). + +## You think you found a bug in the code, or have a question in its use +1. use the [issue search](https://github.com/iterorganization/ +imas-python/issues) to check if someone already created +a similar issue; +2. if not, make a **new issue** to describe your problem or question. +In the case of a bug suspiscion, please try to give all the relevant +information to allow reproducing the error or identifying +its root cause (version of the imas-python, OS and relevant +dependencies, snippet of code); +3. apply relevant labels to the issue. + +## You want to make or ask some change to the code +1. use the [issue search](https://github.com/iterorganization/ +imas-python/issues) to check if someone already proposed +a similar idea/change; +2. if not, create a **new issue** to describe what change you would like to see +implemented and specify it if you intend to work on it yourself or if some help +will be needed; +3. wait until some kind of consensus is reached about your idea being relevant, +at which time the issue will be assigned (to you or someone else who can work on +this topic); +4. if you do the development yourself, fork the repository to your own Github +profile and create your own feature branch off of the latest develop commit. +Make sure to regularly sync your branch with the latest commits from `develop` +(find instructions +[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/ +working-with-forks/syncing-a-fork); +5. when your development is ready, create a pull request (find instructions +[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/ +proposing-changes-to-your-work-with-pull-requests/ +creating-a-pull-request-from-a-fork)). + + +While we will try to answer questions quickly and to address issues in a timely +manner, it can may sometimes take longer than expected. A friendly ping in the +discussion or the issue thread can help draw attention if you find that it was +stalled. From 2eb385e77e953ffef5a46a274897da6f4fb52d87 Mon Sep 17 00:00:00 2001 From: gautambaabu Date: Fri, 13 Dec 2024 23:10:14 +0530 Subject: [PATCH 27/29] fixed readme.md for imas --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 23e83fde..03f00ce5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# IMASPy +# IMAS -IMASPy is a pure-python library to handle arbitrarily nested data structures. -IMASPy is designed for, but not necessarily bound to, interacting with Interface +IMAS is a pure-python library to handle arbitrarily nested data structures. +IMAS is designed for, but not necessarily bound to, interacting with Interface Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) Data Model. @@ -11,7 +11,7 @@ Data Model. Install steps are described in the documentation generated from `/docs/source/installing.rst`. Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) -and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html) +and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMAS-doc/index.html) The documentation can be manually generated by installing sphinx and running: @@ -23,15 +23,15 @@ make -C docs html ## How to use ```python -import imaspy -factory = imaspy.IDSFactory() +import imas +factory = imas.IDSFactory() equilibrium = factory.equilibrium() print(equilibrium) -equilibrium.ids_properties.homogeneous_time = imaspy.ids_defs.IDS_TIME_MODE_HETEROGENEOUS +equilibrium.ids_properties.homogeneous_time = imas.ids_defs.IDS_TIME_MODE_HETEROGENEOUS equilibrium.ids_properties.comment = "testing" -with imaspy.DBEntry("imas:hdf5?path=./testdb","w") as dbentry: +with imas.DBEntry("imas:hdf5?path=./testdb","w") as dbentry: dbentry.put(equilibrium) ``` @@ -40,7 +40,7 @@ A quick 5 minutes introduction is available in the documentation generated from ## Legal -IMASPy is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de +IMAS is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de Plassche , Copyright 2020-2022 Daan van Vugt , and Copyright 2020 Dutch Institute for Fundamental Energy Research . It is licensed under [LGPL 3.0](LICENSE.txt). From 133f78c30803cdb4ad8e3afab9f10e8b652c0d58 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Tue, 17 Dec 2024 14:56:13 +0100 Subject: [PATCH 28/29] Apply suggestion on naming --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 03f00ce5..9fc27d68 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# IMAS +# IMAS-Python -IMAS is a pure-python library to handle arbitrarily nested data structures. -IMAS is designed for, but not necessarily bound to, interacting with Interface +IMAS-Python is a pure-python library to handle arbitrarily nested data structures. +IMAS-Python is designed for, but not necessarily bound to, interacting with Interface Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) Data Model. @@ -40,7 +40,7 @@ A quick 5 minutes introduction is available in the documentation generated from ## Legal -IMAS is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de +IMAS-Python is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de Plassche , Copyright 2020-2022 Daan van Vugt , and Copyright 2020 Dutch Institute for Fundamental Energy Research . It is licensed under [LGPL 3.0](LICENSE.txt). From 6f871f5b98f268b5329310fcd0e572c109cb6539 Mon Sep 17 00:00:00 2001 From: Gautam raj Date: Tue, 17 Dec 2024 19:32:31 +0530 Subject: [PATCH 29/29] Update README.md Co-authored-by: Simon Pinches --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9fc27d68..14d4b81e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # IMAS-Python IMAS-Python is a pure-python library to handle arbitrarily nested data structures. -IMAS-Python is designed for, but not necessarily bound to, interacting with Interface +It is designed for, but not necessarily bound to, interacting with Interface Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) Data Model.