From a5acf1ef3a44d26481160f9f65ec9d5ee7469beb Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Tue, 9 Jul 2024 13:55:07 +0200
Subject: [PATCH 01/29] Implement `DBEntry.get_sample` in IMASPy

---
 imaspy/backends/db_entry_impl.py            |  32 +-
 imaspy/backends/imas_core/al_context.py     |  36 ++
 imaspy/backends/imas_core/db_entry_al.py    |  27 +-
 imaspy/backends/imas_core/imas_interface.py |   7 +
 imaspy/db_entry.py                          | 130 ++++++-
 imaspy/test/test_get_sample.py              | 393 ++++++++++++++++++++
 6 files changed, 603 insertions(+), 22 deletions(-)
 create mode 100644 imaspy/test/test_get_sample.py

diff --git a/imaspy/backends/db_entry_impl.py b/imaspy/backends/db_entry_impl.py
index 7f86e622..dbbb1329 100644
--- a/imaspy/backends/db_entry_impl.py
+++ b/imaspy/backends/db_entry_impl.py
@@ -2,13 +2,34 @@
 # You should have received the IMASPy LICENSE file with this project.
 
 from abc import ABC, abstractmethod
-from typing import Any, List, Optional
+from dataclasses import dataclass
+from typing import Any, List, Optional, Union
+
+import numpy
 
 from imaspy.ids_convert import NBCPathMap
 from imaspy.ids_factory import IDSFactory
 from imaspy.ids_toplevel import IDSToplevel
 
 
+@dataclass
+class GetSliceParameters:
+    """Helper class to store parameters to get_slice."""
+
+    time_requested: float
+    interpolation_method: int
+
+
+@dataclass
+class GetSampleParameters:
+    """Helper class to store parameters to get_sample."""
+
+    tmin: float
+    tmax: float
+    dtime: Optional[numpy.ndarray]
+    interpolation_method: Optional[int]
+
+
 class DBEntryImpl(ABC):
     """Interface for DBEntry implementations."""
 
@@ -47,20 +68,17 @@ def get(
         self,
         ids_name: str,
         occurrence: int,
-        time_requested: Optional[float],
-        interpolation_method: int,
+        parameters: Union[None, GetSliceParameters, GetSampleParameters],
         destination: IDSToplevel,
         lazy: bool,
         nbc_map: Optional[NBCPathMap],
     ) -> None:
-        """Implement DBEntry.get()/get_slice(). Load data from the data source.
+        """Implement DBEntry.get/get_slice/get_sample. Load data from the data source.
 
         Args:
             ids_name: Name of the IDS to load.
             occurrence: Which occurence of the IDS to load.
-            time_requested: None for get(), requested time slice for get_slice().
-            interpolation_method: Requested interpolation method (ignore when
-                time_requested is None).
+            parameters: Additional parameters for a get_slice/get_sample call.
             destination: IDS object to store data in.
             lazy: Use lazy loading.
             nbc_map: NBCPathMap to use for implicit conversion. When None, no implicit
diff --git a/imaspy/backends/imas_core/al_context.py b/imaspy/backends/imas_core/al_context.py
index 07f37dec..d14f6bfd 100644
--- a/imaspy/backends/imas_core/al_context.py
+++ b/imaspy/backends/imas_core/al_context.py
@@ -8,6 +8,8 @@
 from contextlib import contextmanager
 from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Optional, Tuple
 
+import numpy
+
 from imaspy.backends.imas_core.imas_interface import ll_interface
 from imaspy.exception import LowlevelError
 from imaspy.ids_defs import (
@@ -105,6 +107,21 @@ def slice_action(
             raise LowlevelError("slice_action", status)
         return ALContext(ctx)
 
+    def timerange_action(
+        self,
+        path: str,
+        rwmode: int,
+        tmin: float,
+        tmax: float,
+        dtime: Optional[numpy.ndarray],
+        interpolation_method: int,
+    ) -> "ALContext":
+        """Begin a new timerange action for use in a ``with`` context."""
+        ctx = ll_interface.begin_timerange_action(
+            self.ctx, path, rwmode, tmin, tmax, dtime, interpolation_method
+        )
+        return ALContext(ctx)
+
     def arraystruct_action(
         self, path: str, timebase: str, size: int
     ) -> "ALArrayStructContext":
@@ -317,6 +334,25 @@ def slice_action(
             (path, rwmode, time_requested, interpolation_method),
         )
 
+    @contextmanager
+    def timerange_action(
+        self,
+        path: str,
+        rwmode: int,
+        tmin: float,
+        tmax: float,
+        dtime: Optional[numpy.ndarray],
+        interpolation_method: int,
+    ) -> Iterator["LazyALContext"]:
+        """Lazily start a lowlevel timerange action, see
+        :meth:`ALContext.timerange_action`.
+        """
+        yield LazyALContext(
+            self,
+            ALContext.timerange_action,
+            (path, rwmode, tmin, tmax, dtime, interpolation_method),
+        )
+
     def arraystruct_action(
         self, path: str, timebase: str, size: int
     ) -> "LazyALArrayStructContext":
diff --git a/imaspy/backends/imas_core/db_entry_al.py b/imaspy/backends/imas_core/db_entry_al.py
index e126bf9b..89cf3625 100644
--- a/imaspy/backends/imas_core/db_entry_al.py
+++ b/imaspy/backends/imas_core/db_entry_al.py
@@ -5,9 +5,10 @@
 import logging
 import os
 from collections import deque
-from typing import Any, Deque, List, Optional
+from typing import Any, Deque, List, Optional, Union
 from urllib.parse import urlparse
 
+from imaspy.backends.db_entry_impl import GetSampleParameters, GetSliceParameters
 from imaspy.db_entry import DBEntryImpl
 from imaspy.exception import DataEntryException, LowlevelError
 from imaspy.ids_convert import NBCPathMap, dd_version_map_from_factories
@@ -216,8 +217,7 @@ def get(
         self,
         ids_name: str,
         occurrence: int,
-        time_requested: Optional[float],
-        interpolation_method: int,
+        parameters: Union[None, GetSliceParameters, GetSampleParameters],
         destination: IDSToplevel,
         lazy: bool,
         nbc_map: Optional[NBCPathMap],
@@ -245,13 +245,28 @@ def get(
         else:
             context = self._db_ctx
         # Now fill the IDSToplevel
-        if time_requested is None or destination.metadata.type is IDSType.CONSTANT:
+        if parameters is None or destination.metadata.type is IDSType.CONSTANT:
             # called from get(), or when the IDS is constant (see IMAS-3330)
             manager = context.global_action(ll_path, READ_OP)
-        else:  # get_slice
+        elif isinstance(parameters, GetSliceParameters):
             manager = context.slice_action(
-                ll_path, READ_OP, time_requested, interpolation_method
+                ll_path,
+                READ_OP,
+                parameters.time_requested,
+                parameters.interpolation_method,
             )
+        elif isinstance(parameters, GetSampleParameters):
+            manager = context.timerange_action(
+                ll_path,
+                READ_OP,
+                parameters.tmin,
+                parameters.tmax,
+                parameters.dtime,
+                parameters.interpolation_method,
+            )
+        else:
+            raise TypeError(f"Incorrect type for parameters: {type(parameters)}.")
+
         with manager as read_ctx:
             if lazy:
                 destination._set_lazy_context(read_ctx)
diff --git a/imaspy/backends/imas_core/imas_interface.py b/imaspy/backends/imas_core/imas_interface.py
index 07f4783e..cca7d42f 100644
--- a/imaspy/backends/imas_core/imas_interface.py
+++ b/imaspy/backends/imas_core/imas_interface.py
@@ -215,6 +215,13 @@ def get_occurrences(self, ctx, ids_name):
     def get_al_version(self):
         return self._al_version_str
 
+    # New methods added in AL 5.3
+
+    def begin_timerange_action(
+        self, ctx, path, rwmode, tmin, tmax, dtime, interpolation_method
+    ):
+        raise self._minimal_version("5.3")
+
 
 # Dummy documentation for interface:
 for funcname in dir(LowlevelInterface):
diff --git a/imaspy/db_entry.py b/imaspy/db_entry.py
index 9ca826b7..ba5bcac6 100644
--- a/imaspy/db_entry.py
+++ b/imaspy/db_entry.py
@@ -5,10 +5,16 @@
 
 import logging
 import os
-from typing import Any, List, Optional, Tuple, Type, overload
+from typing import Any, List, Optional, Tuple, Type, Union, overload
+
+import numpy
 
 import imaspy
-from imaspy.backends.db_entry_impl import DBEntryImpl
+from imaspy.backends.db_entry_impl import (
+    DBEntryImpl,
+    GetSampleParameters,
+    GetSliceParameters,
+)
 from imaspy.dd_zip import dd_xml_versions
 from imaspy.exception import IDSNameError, UnknownDDVersion, ValidationError
 from imaspy.ids_base import IDSBase
@@ -347,7 +353,6 @@ def get(
             ids_name,
             occurrence,
             None,
-            0,
             destination,
             lazy,
             autoconvert,
@@ -416,8 +421,117 @@ def get_slice(
         return self._get(
             ids_name,
             occurrence,
-            time_requested,
-            interpolation_method,
+            GetSliceParameters(time_requested, interpolation_method),
+            destination,
+            lazy,
+            autoconvert,
+            ignore_unknown_dd_version,
+        )
+
+    def get_sample(
+        self,
+        ids_name: str,
+        tmin: float,
+        tmax: float,
+        dtime: Optional[Union[float, numpy.ndarray]] = None,
+        interpolation_method: Optional[int] = None,
+        occurrence: int = 0,
+        *,
+        lazy: bool = False,
+        autoconvert: bool = True,
+        ignore_unknown_dd_version: bool = False,
+        destination: Optional[IDSToplevel] = None,
+    ) -> IDSToplevel:
+        """Read a range of time slices from an IDS in this Database Entry.
+
+        This method has three different modes, depending on the provided arguments:
+
+        1.  No interpolation. This method is selected when :param:`dtime` and
+            :param:`interpolation_method` are not provided.
+
+            This mode returns an IDS object with all constant/static data filled. The
+            dynamic data is retrieved for the provided time range [tmin, tmax].
+
+        2.  Interpolate dynamic data on a uniform time base. This method is selected
+            when :param:`dtime` and :param:`interpolation_method` are provided.
+            :param:`dtime` must be a number or a numpy array of size 1.
+
+            This mode will generate an IDS with a homogeneous time vector ``[tmin, tmin
+            + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The returned IDS always has
+            ``ids_properties.homogeneous_time = 1``.
+
+        3.  Interpolate dynamic data on an explicit time base. This method is selected
+            when :param:`dtime` and :param:`interpolation_method` are provided.
+            :param:`dtime` must be a numpy array of size larger than 1.
+
+            This mode will generate an IDS with a homogeneous time vector equal to
+            :param:`dtime`. :param:`tmin` and :param:`tmax` are ignored in this mode.
+            The returned IDS always has ``ids_properties.homogeneous_time = 1``.
+
+        Args:
+            ids_name: Name of the IDS to read from the backend
+            tmin: Lower bound of the requested time range
+            tmax: Upper bound of the requested time range, must be larger than or
+                equal to :param:`tmin`
+            dtime: Interval to use when interpolating, must be positive, or numpy array
+                containing an explicit time base to interpolate.
+            interpolation_method: Interpolation method to use. Available options:
+
+                - :const:`~imaspy.ids_defs.CLOSEST_INTERP`
+                - :const:`~imaspy.ids_defs.PREVIOUS_INTERP`
+                - :const:`~imaspy.ids_defs.LINEAR_INTERP`
+
+            occurrence: Which occurrence of the IDS to read.
+
+        Keyword Args:
+            lazy: When set to ``True``, values in this IDS will be retrieved only when
+                needed (instead of getting the full IDS immediately). See :ref:`Lazy
+                loading` for more details.
+            autoconvert: Automatically convert IDSs.
+
+                If enabled (default), a call to ``get_sample()`` will return
+                an IDS from the Data Dictionary version attached to this Data Entry.
+                Data is automatically converted between the on-disk version and the
+                in-memory version.
+
+                When set to ``False``, the IDS will be returned in the DD version it was
+                stored in.
+            ignore_unknown_dd_version: When an IDS is stored with an unknown DD version,
+                do not attempt automatic conversion and fetch the data in the Data
+                Dictionary version attached to this Data Entry.
+            destination: Populate this IDSToplevel instead of creating an empty one.
+
+        Returns:
+            The loaded IDS.
+
+        Example:
+            .. code-block:: python
+
+                import imaspy
+                import numpy
+                from imaspy import ids_defs
+
+                imas_entry = imaspy.DBEntry(
+                    "imas:mdsplus?user=public;pulse=131024;run=41;database=ITER", "r")
+
+                # All time slices between t=200 and t=370
+                core_profiles = imas_entry.get_sample("core_profiles", 200, 370)
+
+                # Closest points to [0, 100, 200, ..., 1000]
+                core_profiles_interp = imas_entry.get_sample(
+                    "core_profiles", 0, 1000, 100, ids_defs.CLOSEST_INTERP)
+
+                # Linear interpolation for [10, 11, 12, 14, 16, 20, 30, 40, 50]
+                times = numpy.array([10, 11, 12, 14, 16, 20, 30, 40, 50])
+                core_profiles_interp = imas_entry.get_sample(
+                    "core_profiles", 0, 0, times, ids_defs.LINEAR_INTERP)
+        """
+        if dtime is not None:
+            dtime = numpy.atleast_1d(dtime)  # Convert floats and 0D arrays to 1D array
+        return self._get(
+            ids_name,
+            occurrence,
+            GetSampleParameters(tmin, tmax, dtime, interpolation_method),
             destination,
             lazy,
             autoconvert,
@@ -428,8 +542,7 @@ def _get(
         self,
         ids_name: str,
         occurrence: int,
-        time_requested: Optional[float],
-        interpolation_method: int,
+        parameters: Union[None, GetSliceParameters, GetSampleParameters],
         destination: Optional[IDSToplevel],
         lazy: bool,
         autoconvert: bool,
@@ -492,8 +605,7 @@ def _get(
         return self._dbe_impl.get(
             ids_name,
             occurrence,
-            time_requested,
-            interpolation_method,
+            parameters,
             destination,
             lazy,
             nbc_map,
diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py
new file mode 100644
index 00000000..7c3b210f
--- /dev/null
+++ b/imaspy/test/test_get_sample.py
@@ -0,0 +1,393 @@
+import numpy as np
+import pytest
+
+import imaspy
+from imaspy.backends.imas_core.imas_interface import lowlevel
+from imaspy.exception import DataEntryException
+from imaspy.ids_defs import (
+    CLOSEST_INTERP,
+    EMPTY_FLOAT,
+    HDF5_BACKEND,
+    IDS_TIME_MODE_HETEROGENEOUS,
+    IDS_TIME_MODE_HOMOGENEOUS,
+    LINEAR_INTERP,
+    MDSPLUS_BACKEND,
+    PREVIOUS_INTERP,
+)
+
+
+@pytest.fixture()
+def test_db_uri(backend, worker_id, tmp_path_factory):
+    # Check if begin_timerange_action is available in imas_core
+    if not hasattr(lowlevel, "al_begin_timerange_action"):
+        pytest.skip("imas_core version doesn't support begin_timerange_action.")
+
+    if backend not in [HDF5_BACKEND, MDSPLUS_BACKEND]:
+        pytest.skip("Backend doesn't support time range operations.")
+
+    tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}")
+    backend_str = {HDF5_BACKEND: "hdf5", MDSPLUS_BACKEND: "mdsplus"}[backend]
+    uri = f"imas:{backend_str}?path={tmp_path}"
+    entry = imaspy.DBEntry(uri, "x")
+
+    # Homogeneous core profiles:
+    cp = entry.factory.core_profiles()
+    cp.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS
+    N_time = 32
+    cp.time = np.linspace(0, 1, N_time)
+    cp.profiles_1d.resize(N_time)
+    for i in range(N_time):
+        # FLT_1D:
+        cp.profiles_1d[i].grid.rho_tor_norm = np.array([0.0, 1.0])
+        cp.profiles_1d[i].t_i_average = np.array([2.0, 1.0]) * (i + 1)
+        cp.profiles_1d[i].ion.resize(1)
+        # STR_0D:
+        cp.profiles_1d[i].ion[0].label = "D"
+        # FLT_0D
+        cp.profiles_1d[i].ion[0].z_ion = 1.0
+        cp.profiles_1d[i].ion[0].temperature = cp.profiles_1d[i].t_i_average
+        # INT_0D
+        cp.profiles_1d[i].ion[0].temperature_validity = 0
+    cp.global_quantities.ip = (2 - cp.time) ** 0.5
+    entry.put(cp)
+
+    # Inhomogeneous equilibrium
+    eq = entry.factory.equilibrium()
+    eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS
+    eq.time = np.linspace(0, 2, 512)
+    # GGD Grid with 1 time slice
+    eq.grids_ggd.resize(1)
+    eq.grids_ggd[0].time = 0.0
+    eq.grids_ggd[0].grid.resize(1)
+    eq.grids_ggd[0].grid[0].path = "wall:0/description_ggd(1)/grid_ggd"
+    # multiple time slices with data
+    N_time = 6
+    eq.time_slice.resize(N_time)
+    for i in range(N_time):
+        # FLT_0D
+        eq.time_slice[i].time = i / 5.0
+        eq.time_slice[i].profiles_2d.resize(1)
+        # FLT_1D
+        eq.time_slice[i].profiles_2d[0].grid.dim1 = np.array([0.0, 1.0])
+        eq.time_slice[i].profiles_2d[0].grid.dim2 = np.array([3.0, 4.0])
+        # STR_0D
+        eq.time_slice[i].profiles_2d[0].grid_type.name = f"test {i}"
+        eq.time_slice[i].profiles_2d[0].grid_type.description = "test description"
+        # INT_0D
+        eq.time_slice[i].profiles_2d[0].grid_type.index = -1
+        # FLT_2D
+        eq.time_slice[i].profiles_2d[0].r = np.array([[0.0, 0.0], [1.0, 1.0]])
+        eq.time_slice[i].profiles_2d[0].z = np.array([[3.0, 4.0], [3.0, 4.0]])
+        eq.time_slice[i].profiles_2d[0].psi = (
+            eq.time_slice[i].profiles_2d[0].r - eq.time_slice[i].profiles_2d[0].z
+        ) * (1 + eq.time_slice[i].time) ** 2
+    entry.put(eq)
+
+    # Equilibrium only has dynamic AOS and no other non-homogenous time nodes
+    # Use magnetics to test that case:
+    mag = entry.factory.magnetics()
+    mag.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS
+    mag.time = np.array([0.0])
+    mag.flux_loop.resize(3)
+    for i in range(3):
+        mag.flux_loop[i].flux.time = np.linspace(0.0123, 1, 5 + i)
+        mag.flux_loop[i].flux.data = 2 + 2 * mag.flux_loop[i].flux.time
+        mag.flux_loop[i].voltage.time = np.linspace(0.0123, 1, 8 + i)
+        mag.flux_loop[i].voltage.data = 2 - 5 * mag.flux_loop[i].voltage.time
+    entry.put(mag)
+
+    entry.close()
+    return uri
+
+
+def test_invalid_arguments(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    with pytest.raises(ValueError):
+        entry.get_sample("core_profiles", 0.3, 0.2)  # tmin > tmax
+    with pytest.raises(DataEntryException):
+        entry.get_sample("core_profiles", 0.1, 0.2, occurrence="invalid")
+    with pytest.raises(ValueError):
+        entry.get_sample("core_profiles", 0.1, 0.2, 0.05)  # no interpolation method
+
+
+def test_get_sample_homogeneous(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    cp = entry.get_sample("core_profiles", 0.3, 14 / 31)
+    assert np.array_equal(cp.time, np.linspace(0, 1, 32)[10:15])
+
+    for i, p1d in enumerate(cp.profiles_1d):
+        assert np.array_equal(p1d.grid.rho_tor_norm, [0.0, 1.0])
+        assert np.array_equal(p1d.t_i_average, np.array([2.0, 1.0]) * (i + 11))
+        assert len(p1d.ion) == 1
+        assert p1d.ion[0].label == "D"
+        assert p1d.ion[0].z_ion == 1
+        assert np.array_equal(p1d.ion[0].temperature, p1d.t_i_average)
+        assert p1d.ion[0].temperature_validity == 0
+
+    assert np.array_equal(cp.global_quantities.ip, (2 - cp.time) ** 0.5)
+
+
+def test_get_sample_heterogeneous(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    eq = entry.get_sample("equilibrium", -1.0, 0.2)
+    # Main time array
+    assert np.array_equal(eq.time, np.linspace(0, 2, 512)[:52])
+    # grids_ggd AoS
+    assert len(eq.grids_ggd) == 1
+    assert eq.grids_ggd[0].time == 0.0
+    assert eq.grids_ggd[0].grid[0].path == "wall:0/description_ggd(1)/grid_ggd"
+    # time_slice AoS
+    assert len(eq.time_slice) == 2
+    assert eq.time_slice[0].time == 0.0
+    assert eq.time_slice[1].time == 0.2
+
+    for i in range(2):
+        p2d = eq.time_slice[i].profiles_2d[0]
+        assert np.array_equal(p2d.grid.dim1, [0.0, 1.0])
+        assert np.array_equal(p2d.grid.dim2, [3.0, 4.0])
+        assert p2d.grid_type.name == f"test {i}"
+        assert p2d.grid_type.index == -1
+        assert np.array_equal(p2d.r, [[0.0, 0.0], [1.0, 1.0]])
+        assert np.array_equal(p2d.z, [[3.0, 4.0], [3.0, 4.0]])
+        expected_psi = (p2d.r - p2d.z) * (1 + eq.time_slice[i].time) ** 2
+        assert np.array_equal(p2d.psi, expected_psi)
+
+    mag = entry.get_sample("magnetics", 0.25, 0.75)
+    assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS
+    assert len(mag.time) == 0
+    assert len(mag.flux_loop) == 3
+    for i in range(3):
+        fl = mag.flux_loop[i]
+
+        flux_time = np.linspace(0.0123, 1, 5 + i)
+        flux_time = flux_time[0.25 <= flux_time]
+        flux_time = flux_time[flux_time <= 0.75]
+        assert np.array_equal(fl.flux.time, flux_time)
+        assert np.array_equal(fl.flux.data, 2 + 2 * flux_time)
+
+        voltage_time = np.linspace(0.0123, 1, 8 + i)
+        voltage_time = voltage_time[0.25 <= voltage_time]
+        voltage_time = voltage_time[voltage_time <= 0.75]
+        assert np.array_equal(fl.voltage.time, voltage_time)
+        assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time)
+
+
+def test_get_sample_homogeneous_linear_interp(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    # Note requesting 0.401 and not 0.4, since
+    # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17
+    cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, LINEAR_INTERP)
+    assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0)
+
+    assert len(cp.profiles_1d) == 6
+    # Check some interpolated values
+    for i in range(6):
+        # Check rho_tor_norm
+        rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm
+        assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0]))
+        # Check t_i_average
+        expected = np.array([2.0, 1.0]) * (1 + 31 * cp.time[i])
+        t_i_average = cp.profiles_1d[i].t_i_average
+        assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0)
+
+
+def test_get_sample_homogeneous_explicit_timebase(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    times = [0.1, 0.2345, 0.5, np.sqrt(2) / 2]
+    cp = entry.get_sample("core_profiles", 0, 0, times, LINEAR_INTERP)
+    assert np.allclose(cp.time, times, rtol=1e-14, atol=0)
+
+    assert len(cp.profiles_1d) == 4
+    # Check some interpolated values
+    for i in range(4):
+        # Check rho_tor_norm
+        rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm
+        assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0]))
+        # Check t_i_average
+        expected = np.array([2.0, 1.0]) * (1 + 31 * cp.time[i])
+        t_i_average = cp.profiles_1d[i].t_i_average
+        assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0)
+
+
+def test_get_sample_homogeneous_previous_interp(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    # Note requesting 0.401 and not 0.4, since
+    # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17
+    cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, PREVIOUS_INTERP)
+    assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0)
+
+    assert len(cp.profiles_1d) == 6
+    # Check some interpolated values
+    for i in range(6):
+        # Check rho_tor_norm
+        rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm
+        assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0]))
+        # Check t_i_average
+        expected = np.array([2.0, 1.0]) * [10, 10, 11, 12, 12, 13][i]
+        t_i_average = cp.profiles_1d[i].t_i_average
+        assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0)
+
+
+def test_get_sample_homogeneous_closest_interp(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    # Note requesting 0.401 and not 0.4, since
+    # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17
+    cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, CLOSEST_INTERP)
+    assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0)
+
+    assert len(cp.profiles_1d) == 6
+    # Check some interpolated values
+    for i in range(6):
+        # Check rho_tor_norm
+        rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm
+        assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0]))
+        # Check t_i_average
+        expected = np.array([2.0, 1.0]) * [10, 11, 12, 12, 13, 13][i]
+        t_i_average = cp.profiles_1d[i].t_i_average
+        assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0)
+
+
+def test_get_sample_heterogeneous_linear_interp(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, LINEAR_INTERP)
+    N_samples = 7
+    # IDS becomes homogeneous after resampling
+    assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples))
+    assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
+
+    # Check interpolated grids_ggd
+    assert len(eq.grids_ggd) == N_samples
+    for i in range(N_samples):
+        assert eq.grids_ggd[i].time == EMPTY_FLOAT
+        assert len(eq.grids_ggd[i].grid) == 1
+        assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd"
+
+    # Check interpolated time_slice
+    assert len(eq.time_slice) == N_samples
+    for i in range(N_samples):
+        assert eq.time_slice[i].time == EMPTY_FLOAT
+        assert len(eq.time_slice[i].profiles_2d) == 1
+        p2d = eq.time_slice[i].profiles_2d[0]
+        assert np.array_equal(p2d.grid.dim1, [0.0, 1.0])
+        assert np.array_equal(p2d.grid.dim2, [3.0, 4.0])
+
+        # Determine the data as we have stored it in test_db_uri()
+        time = eq.time[i]
+        original_times = [0, 0.2, 0.4, 0.6, 0.8, 1.0]
+        index = np.searchsorted(original_times, time)
+        prevtime = original_times[index - 1]
+        nexttime = original_times[index]
+        prevpsi = (p2d.r - p2d.z) * (1 + prevtime) ** 2
+        nextpsi = (p2d.r - p2d.z) * (1 + nexttime) ** 2
+        # Linear interpolation
+        expected_psi = (nextpsi * (time - prevtime) + prevpsi * (nexttime - time)) / (
+            nexttime - prevtime
+        )
+        assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0)
+
+    mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, LINEAR_INTERP)
+    assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
+    assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples))
+
+    assert len(mag.flux_loop) == 3
+    for i in range(3):
+        fl = mag.flux_loop[i]
+        assert np.allclose(fl.flux.data, 2 + 2 * mag.time, rtol=1e-14, atol=0)
+        assert np.allclose(fl.voltage.data, 2 - 5 * mag.time, rtol=1e-14, atol=2e-16)
+
+
+def test_get_sample_heterogeneous_previous_interp(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, PREVIOUS_INTERP)
+    N_samples = 7
+    # IDS becomes homogeneous after resampling
+    assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples))
+    assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
+
+    # Check interpolated grids_ggd
+    assert len(eq.grids_ggd) == N_samples
+    for i in range(N_samples):
+        assert eq.grids_ggd[i].time == EMPTY_FLOAT
+        assert len(eq.grids_ggd[i].grid) == 1
+        assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd"
+
+    # Check interpolated time_slice
+    assert len(eq.time_slice) == N_samples
+    for i in range(N_samples):
+        assert eq.time_slice[i].time == EMPTY_FLOAT
+        assert len(eq.time_slice[i].profiles_2d) == 1
+        p2d = eq.time_slice[i].profiles_2d[0]
+        assert np.array_equal(p2d.grid.dim1, [0.0, 1.0])
+        assert np.array_equal(p2d.grid.dim2, [3.0, 4.0])
+
+        origtime = [0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.4][i]
+        expected_psi = (p2d.r - p2d.z) * (1 + origtime) ** 2
+        assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0)
+
+    mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, PREVIOUS_INTERP)
+    assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
+    assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples))
+
+    assert len(mag.flux_loop) == 3
+    for i in range(3):
+        fl = mag.flux_loop[i]
+
+        flux_time = np.linspace(0.0123, 1, 5 + i)
+        flux_time = flux_time[np.searchsorted(flux_time, mag.time, side="right") - 1]
+        assert np.array_equal(fl.flux.data, 2 + 2 * flux_time)
+
+        voltage_time = np.linspace(0.0123, 1, 8 + i)
+        voltage_time = voltage_time[
+            np.searchsorted(voltage_time, mag.time, side="right") - 1
+        ]
+        assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time)
+
+
+def test_get_sample_heterogeneous_closest_interp(test_db_uri):
+    entry = imaspy.DBEntry(test_db_uri, "r")
+    eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, CLOSEST_INTERP)
+    N_samples = 7
+    # IDS becomes homogeneous after resampling
+    assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples))
+    assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
+
+    # Check interpolated grids_ggd
+    assert len(eq.grids_ggd) == N_samples
+    for i in range(N_samples):
+        assert eq.grids_ggd[i].time == EMPTY_FLOAT
+        assert len(eq.grids_ggd[i].grid) == 1
+        assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd"
+
+    # Check interpolated time_slice
+    assert len(eq.time_slice) == N_samples
+    for i in range(N_samples):
+        assert eq.time_slice[i].time == EMPTY_FLOAT
+        assert len(eq.time_slice[i].profiles_2d) == 1
+        p2d = eq.time_slice[i].profiles_2d[0]
+        assert np.array_equal(p2d.grid.dim1, [0.0, 1.0])
+        assert np.array_equal(p2d.grid.dim2, [3.0, 4.0])
+
+        # Note: CLOSEST appears to round up: 0.4 is closer to 0.3 than 0.2
+        origtime = [0.2, 0.2, 0.4, 0.4, 0.4, 0.4, 0.6][i]
+        expected_psi = (p2d.r - p2d.z) * (1 + origtime) ** 2
+        assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0)
+
+    mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, CLOSEST_INTERP)
+    assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
+    assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples))
+
+    assert len(mag.flux_loop) == 3
+    for i in range(3):
+        fl = mag.flux_loop[i]
+
+        flux_time = np.linspace(0.0123, 1, 5 + i)
+        flux_time = flux_time[
+            np.argmin(np.abs(flux_time[None, :] - mag.time[:, None]), axis=1)
+        ]
+        assert np.array_equal(fl.flux.data, 2 + 2 * flux_time)
+
+        voltage_time = np.linspace(0.0123, 1, 8 + i)
+        voltage_time = voltage_time[
+            np.argmin(np.abs(voltage_time[None, :] - mag.time[:, None]), axis=1)
+        ]
+        assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time)

From fd49e02171ab03cb997303bb31eb9543db2ae251 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Tue, 9 Jul 2024 14:16:14 +0200
Subject: [PATCH 02/29] Add docstrings for GetSliceParameters /
 GetSampleParameters

---
 imaspy/backends/db_entry_impl.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/imaspy/backends/db_entry_impl.py b/imaspy/backends/db_entry_impl.py
index dbbb1329..9fa42bd8 100644
--- a/imaspy/backends/db_entry_impl.py
+++ b/imaspy/backends/db_entry_impl.py
@@ -17,7 +17,9 @@ class GetSliceParameters:
     """Helper class to store parameters to get_slice."""
 
     time_requested: float
+    """See :param:`imaspy.db_entry.DBEntry.get_slice.time_requested`."""
     interpolation_method: int
+    """See :param:`imaspy.db_entry.DBEntry.get_slice.interpolation_method`."""
 
 
 @dataclass
@@ -25,9 +27,13 @@ class GetSampleParameters:
     """Helper class to store parameters to get_sample."""
 
     tmin: float
+    """See :param:`imaspy.db_entry.DBEntry.get_sample.tmin`."""
     tmax: float
+    """See :param:`imaspy.db_entry.DBEntry.get_sample.tmax`."""
     dtime: Optional[numpy.ndarray]
+    """See :param:`imaspy.db_entry.DBEntry.get_sample.dtime`."""
     interpolation_method: Optional[int]
+    """See :param:`imaspy.db_entry.DBEntry.get_sample.interpolation_method`."""
 
 
 class DBEntryImpl(ABC):

From 73268bb212c06c1dc23223a24e31537d8cffbee6 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Mon, 11 Nov 2024 10:11:44 +0100
Subject: [PATCH 03/29] Update tests for DD 4.0.0

---
 imaspy/test/test_get_sample.py | 54 +++++++++++++++-------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py
index 7c3b210f..beffe52d 100644
--- a/imaspy/test/test_get_sample.py
+++ b/imaspy/test/test_get_sample.py
@@ -6,7 +6,6 @@
 from imaspy.exception import DataEntryException
 from imaspy.ids_defs import (
     CLOSEST_INTERP,
-    EMPTY_FLOAT,
     HDF5_BACKEND,
     IDS_TIME_MODE_HETEROGENEOUS,
     IDS_TIME_MODE_HOMOGENEOUS,
@@ -28,7 +27,7 @@ def test_db_uri(backend, worker_id, tmp_path_factory):
     tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}")
     backend_str = {HDF5_BACKEND: "hdf5", MDSPLUS_BACKEND: "mdsplus"}[backend]
     uri = f"imas:{backend_str}?path={tmp_path}"
-    entry = imaspy.DBEntry(uri, "x")
+    entry = imaspy.DBEntry(uri, "x", dd_version="4.0.0")
 
     # Homogeneous core profiles:
     cp = entry.factory.core_profiles()
@@ -42,7 +41,7 @@ def test_db_uri(backend, worker_id, tmp_path_factory):
         cp.profiles_1d[i].t_i_average = np.array([2.0, 1.0]) * (i + 1)
         cp.profiles_1d[i].ion.resize(1)
         # STR_0D:
-        cp.profiles_1d[i].ion[0].label = "D"
+        cp.profiles_1d[i].ion[0].name = "D"
         # FLT_0D
         cp.profiles_1d[i].ion[0].z_ion = 1.0
         cp.profiles_1d[i].ion[0].temperature = cp.profiles_1d[i].t_i_average
@@ -100,8 +99,12 @@ def test_db_uri(backend, worker_id, tmp_path_factory):
     return uri
 
 
-def test_invalid_arguments(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+@pytest.fixture()
+def entry(test_db_uri):
+    return imaspy.DBEntry(test_db_uri, "r", dd_version="4.0.0")
+
+
+def test_invalid_arguments(entry):
     with pytest.raises(ValueError):
         entry.get_sample("core_profiles", 0.3, 0.2)  # tmin > tmax
     with pytest.raises(DataEntryException):
@@ -110,8 +113,7 @@ def test_invalid_arguments(test_db_uri):
         entry.get_sample("core_profiles", 0.1, 0.2, 0.05)  # no interpolation method
 
 
-def test_get_sample_homogeneous(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_homogeneous(entry):
     cp = entry.get_sample("core_profiles", 0.3, 14 / 31)
     assert np.array_equal(cp.time, np.linspace(0, 1, 32)[10:15])
 
@@ -119,7 +121,7 @@ def test_get_sample_homogeneous(test_db_uri):
         assert np.array_equal(p1d.grid.rho_tor_norm, [0.0, 1.0])
         assert np.array_equal(p1d.t_i_average, np.array([2.0, 1.0]) * (i + 11))
         assert len(p1d.ion) == 1
-        assert p1d.ion[0].label == "D"
+        assert p1d.ion[0].name == "D"
         assert p1d.ion[0].z_ion == 1
         assert np.array_equal(p1d.ion[0].temperature, p1d.t_i_average)
         assert p1d.ion[0].temperature_validity == 0
@@ -127,8 +129,7 @@ def test_get_sample_homogeneous(test_db_uri):
     assert np.array_equal(cp.global_quantities.ip, (2 - cp.time) ** 0.5)
 
 
-def test_get_sample_heterogeneous(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_heterogeneous(entry):
     eq = entry.get_sample("equilibrium", -1.0, 0.2)
     # Main time array
     assert np.array_equal(eq.time, np.linspace(0, 2, 512)[:52])
@@ -172,8 +173,7 @@ def test_get_sample_heterogeneous(test_db_uri):
         assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time)
 
 
-def test_get_sample_homogeneous_linear_interp(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_homogeneous_linear_interp(entry):
     # Note requesting 0.401 and not 0.4, since
     # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17
     cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, LINEAR_INTERP)
@@ -191,8 +191,7 @@ def test_get_sample_homogeneous_linear_interp(test_db_uri):
         assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0)
 
 
-def test_get_sample_homogeneous_explicit_timebase(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_homogeneous_explicit_timebase(entry):
     times = [0.1, 0.2345, 0.5, np.sqrt(2) / 2]
     cp = entry.get_sample("core_profiles", 0, 0, times, LINEAR_INTERP)
     assert np.allclose(cp.time, times, rtol=1e-14, atol=0)
@@ -209,8 +208,7 @@ def test_get_sample_homogeneous_explicit_timebase(test_db_uri):
         assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0)
 
 
-def test_get_sample_homogeneous_previous_interp(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_homogeneous_previous_interp(entry):
     # Note requesting 0.401 and not 0.4, since
     # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17
     cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, PREVIOUS_INTERP)
@@ -228,8 +226,7 @@ def test_get_sample_homogeneous_previous_interp(test_db_uri):
         assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0)
 
 
-def test_get_sample_homogeneous_closest_interp(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_homogeneous_closest_interp(entry):
     # Note requesting 0.401 and not 0.4, since
     # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17
     cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, CLOSEST_INTERP)
@@ -247,8 +244,7 @@ def test_get_sample_homogeneous_closest_interp(test_db_uri):
         assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0)
 
 
-def test_get_sample_heterogeneous_linear_interp(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_heterogeneous_linear_interp(entry):
     eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, LINEAR_INTERP)
     N_samples = 7
     # IDS becomes homogeneous after resampling
@@ -258,14 +254,14 @@ def test_get_sample_heterogeneous_linear_interp(test_db_uri):
     # Check interpolated grids_ggd
     assert len(eq.grids_ggd) == N_samples
     for i in range(N_samples):
-        assert eq.grids_ggd[i].time == EMPTY_FLOAT
+        # assert eq.grids_ggd[i].time == EMPTY_FLOAT
         assert len(eq.grids_ggd[i].grid) == 1
         assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd"
 
     # Check interpolated time_slice
     assert len(eq.time_slice) == N_samples
     for i in range(N_samples):
-        assert eq.time_slice[i].time == EMPTY_FLOAT
+        # assert eq.time_slice[i].time == EMPTY_FLOAT
         assert len(eq.time_slice[i].profiles_2d) == 1
         p2d = eq.time_slice[i].profiles_2d[0]
         assert np.array_equal(p2d.grid.dim1, [0.0, 1.0])
@@ -296,8 +292,7 @@ def test_get_sample_heterogeneous_linear_interp(test_db_uri):
         assert np.allclose(fl.voltage.data, 2 - 5 * mag.time, rtol=1e-14, atol=2e-16)
 
 
-def test_get_sample_heterogeneous_previous_interp(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_heterogeneous_previous_interp(entry):
     eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, PREVIOUS_INTERP)
     N_samples = 7
     # IDS becomes homogeneous after resampling
@@ -307,14 +302,14 @@ def test_get_sample_heterogeneous_previous_interp(test_db_uri):
     # Check interpolated grids_ggd
     assert len(eq.grids_ggd) == N_samples
     for i in range(N_samples):
-        assert eq.grids_ggd[i].time == EMPTY_FLOAT
+        # assert eq.grids_ggd[i].time == EMPTY_FLOAT
         assert len(eq.grids_ggd[i].grid) == 1
         assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd"
 
     # Check interpolated time_slice
     assert len(eq.time_slice) == N_samples
     for i in range(N_samples):
-        assert eq.time_slice[i].time == EMPTY_FLOAT
+        # assert eq.time_slice[i].time == EMPTY_FLOAT
         assert len(eq.time_slice[i].profiles_2d) == 1
         p2d = eq.time_slice[i].profiles_2d[0]
         assert np.array_equal(p2d.grid.dim1, [0.0, 1.0])
@@ -343,8 +338,7 @@ def test_get_sample_heterogeneous_previous_interp(test_db_uri):
         assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time)
 
 
-def test_get_sample_heterogeneous_closest_interp(test_db_uri):
-    entry = imaspy.DBEntry(test_db_uri, "r")
+def test_get_sample_heterogeneous_closest_interp(entry):
     eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, CLOSEST_INTERP)
     N_samples = 7
     # IDS becomes homogeneous after resampling
@@ -354,14 +348,14 @@ def test_get_sample_heterogeneous_closest_interp(test_db_uri):
     # Check interpolated grids_ggd
     assert len(eq.grids_ggd) == N_samples
     for i in range(N_samples):
-        assert eq.grids_ggd[i].time == EMPTY_FLOAT
+        # assert eq.grids_ggd[i].time == EMPTY_FLOAT
         assert len(eq.grids_ggd[i].grid) == 1
         assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd"
 
     # Check interpolated time_slice
     assert len(eq.time_slice) == N_samples
     for i in range(N_samples):
-        assert eq.time_slice[i].time == EMPTY_FLOAT
+        # assert eq.time_slice[i].time == EMPTY_FLOAT
         assert len(eq.time_slice[i].profiles_2d) == 1
         p2d = eq.time_slice[i].profiles_2d[0]
         assert np.array_equal(p2d.grid.dim1, [0.0, 1.0])

From bc9ea191da4626ec92905b68352d3e12d2a10c08 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Mon, 11 Nov 2024 10:47:46 +0100
Subject: [PATCH 04/29] Update NCDBEntryImpl for get_sample and raise
 NotImplementedError

---
 imaspy/backends/netcdf/db_entry_nc.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py
index ba7334fc..f04630db 100644
--- a/imaspy/backends/netcdf/db_entry_nc.py
+++ b/imaspy/backends/netcdf/db_entry_nc.py
@@ -1,9 +1,13 @@
 """DBEntry implementation using NetCDF as a backend."""
 
 import logging
-from typing import List
+from typing import List, Optional, Union
 
-from imaspy.backends.db_entry_impl import DBEntryImpl
+from imaspy.backends.db_entry_impl import (
+    DBEntryImpl,
+    GetSampleParameters,
+    GetSliceParameters,
+)
 from imaspy.backends.netcdf.ids2nc import IDS2NC
 from imaspy.backends.netcdf.nc2ids import nc2ids
 from imaspy.exception import DataEntryException
@@ -74,15 +78,18 @@ def get(
         self,
         ids_name: str,
         occurrence: int,
-        time_requested: float | None,
-        interpolation_method: int,
+        parameters: Union[None, GetSliceParameters, GetSampleParameters],
         destination: IDSToplevel,
         lazy: bool,
-        nbc_map: NBCPathMap | None,
+        nbc_map: Optional[NBCPathMap],
     ) -> None:
         # Feature compatibility checks
-        if time_requested is not None:
-            raise NotImplementedError("`get_slice` is not available for netCDF files.")
+        if parameters is not None:
+            if isinstance(parameters, GetSliceParameters):
+                func = "get_slice"
+            else:
+                func = "get_sample"
+            raise NotImplementedError(f"`{func}` is not available for netCDF files.")
         if lazy:
             raise NotImplementedError(
                 "Lazy loading is not implemented for netCDF files."

From 674460bbece63ccbd51f03e1d644a7916d250a6f Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Mon, 11 Nov 2024 10:49:14 +0100
Subject: [PATCH 05/29] Set `begin_timerange_action` as available since AL core
 5.4

---
 imaspy/backends/imas_core/imas_interface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/imaspy/backends/imas_core/imas_interface.py b/imaspy/backends/imas_core/imas_interface.py
index cca7d42f..b92438b1 100644
--- a/imaspy/backends/imas_core/imas_interface.py
+++ b/imaspy/backends/imas_core/imas_interface.py
@@ -215,12 +215,12 @@ def get_occurrences(self, ctx, ids_name):
     def get_al_version(self):
         return self._al_version_str
 
-    # New methods added in AL 5.3
+    # New methods added in AL 5.4
 
     def begin_timerange_action(
         self, ctx, path, rwmode, tmin, tmax, dtime, interpolation_method
     ):
-        raise self._minimal_version("5.3")
+        raise self._minimal_version("5.4")
 
 
 # Dummy documentation for interface:

From 0a47f94e97467d6e57c65e335590daab25f021b0 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Thu, 14 Nov 2024 10:17:49 +0100
Subject: [PATCH 06/29] Make the NC2IDS reader class-based

In preparation for validating the NC data
---
 imaspy/backends/netcdf/nc2ids.py | 157 ++++++++++++++++++-------------
 1 file changed, 89 insertions(+), 68 deletions(-)

diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py
index d071a3ba..2877b297 100644
--- a/imaspy/backends/netcdf/nc2ids.py
+++ b/imaspy/backends/netcdf/nc2ids.py
@@ -54,10 +54,98 @@ def _tree_iter(
             yield from _tree_iter(node, paths, curindex + (i,))
 
 
+class NC2IDS:
+    """Class responsible for reading an IDS from a NetCDF group."""
+
+    def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
+        """Initialize NC2IDS converter.
+
+        Args:
+            group: NetCDF group that stores the IDS data.
+            ids: Corresponding IDS toplevel to store the data in.
+        """
+        self.group = group
+        """NetCDF Group that the IDS is stored in."""
+        self.ids = ids
+        """IDS to store the data in."""
+
+        self.ncmeta = NCMetadata(ids.metadata)
+        """NetCDF related metadata."""
+        self.variables = list(group.variables)
+        """List of variable names stored in the netCDF group."""
+        # TODO: validate ids_properties.homogeneous_time
+        self.homogeneous_time = (
+            group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS
+        )
+        """True iff the IDS time mode is homogeneous."""
+
+        # Don't use masked arrays: they're slow and we'll handle most of the unset
+        # values through the `:shape` arrays
+        self.group.set_auto_mask(False)
+
+    def run(self) -> None:
+        # FIXME: ensure that var_names are sorted properly
+        # Current assumption is that creation-order is fine
+        for var_name in self.variables:
+            if var_name.endswith(":shape"):
+                continue  # TODO: validate that this is used
+
+            # FIXME: error handling:
+            metadata = self.ids.metadata[var_name]
+
+            # TODO: validate metadata (data type, units, etc.) conforms to DD
+
+            if metadata.data_type is IDSDataType.STRUCTURE:
+                continue  # This only contains DD metadata we already know
+
+            var = self.group[var_name]
+            if metadata.data_type is IDSDataType.STRUCT_ARRAY:
+                if "sparse" in var.ncattrs():
+                    shapes = self.group[var_name + ":shape"][()]
+                    for index, node in tree_iter(self.ids, metadata):
+                        node.resize(shapes[index][0])
+
+                else:
+                    # FIXME: extract dimension name from nc file?
+                    dim = self.ncmeta.get_dimensions(
+                        metadata.path_string, self.homogeneous_time
+                    )[-1]
+                    size = self.group.dimensions[dim].size
+                    for _, node in tree_iter(self.ids, metadata):
+                        node.resize(size)
+
+                continue
+
+            # FIXME: this may be a gigantic array, not required for sparse data
+            var = self.group[var_name]
+            data = var[()]
+
+            if "sparse" in var.ncattrs():
+                if metadata.ndim:
+                    shapes = self.group[var_name + ":shape"][()]
+                    for index, node in tree_iter(self.ids, metadata):
+                        shape = shapes[index]
+                        if shape.all():
+                            node.value = data[index + tuple(map(slice, shapes[index]))]
+                else:
+                    for index, node in tree_iter(self.ids, metadata):
+                        value = data[index]
+                        if value != getattr(var, "_FillValue", None):
+                            node.value = data[index]
+
+            elif metadata.path_string not in self.ncmeta.aos:
+                # Shortcut for assigning untensorized data
+                self.ids[metadata.path] = data
+
+            else:
+                for index, node in tree_iter(self.ids, metadata):
+                    node.value = data[index]
+
+
 def nc2ids(group: netCDF4.Group, ids: IDSToplevel):
     """Get data from the netCDF group and store it in the provided IDS."""
     try:
-        _nc2ids(group, ids)
+        NC2IDS(group, ids).run()
     except Exception as exc:
         raise RuntimeError(
             "An error occurred while reading data from the netCDF file "
@@ -66,70 +154,3 @@ def nc2ids(group: netCDF4.Group, ids: IDSToplevel):
             "may cause errors in IMASPy. A more robust mechanism to load IDS data from "
             "netCDF files will be included in the next release of IMASPy."
         ) from exc
-
-
-def _nc2ids(group: netCDF4.Group, ids: IDSToplevel):
-    var_names = list(group.variables)
-    # FIXME: ensure that var_names are sorted properly
-    # Current assumption is that creation-order is fine
-    homogeneous_time = (
-        group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS
-    )
-    ncmeta = NCMetadata(ids.metadata)
-
-    # Never return masked arrays, they're slow and we'll handle most of the unset values
-    # through the `:shape` arrays
-    group.set_auto_mask(False)
-
-    for var_name in var_names:
-        if var_name.endswith(":shape"):
-            continue  # TODO: validate that this is used
-
-        # FIXME: error handling:
-        metadata = ids.metadata[var_name]
-
-        # TODO: validate metadata (data type, units, etc.) conforms to DD
-
-        if metadata.data_type is IDSDataType.STRUCTURE:
-            continue  # This only contains DD metadata we already know
-
-        var = group[var_name]
-        if metadata.data_type is IDSDataType.STRUCT_ARRAY:
-            if "sparse" in var.ncattrs():
-                shapes = group[var_name + ":shape"][()]
-                for index, node in tree_iter(ids, metadata):
-                    node.resize(shapes[index][0])
-
-            else:
-                # FIXME: extract dimension name from nc file?
-                dim = ncmeta.get_dimensions(metadata.path_string, homogeneous_time)[-1]
-                size = group.dimensions[dim].size
-                for _, node in tree_iter(ids, metadata):
-                    node.resize(size)
-
-            continue
-
-        # FIXME: this may be a gigantic array, not required for sparse data
-        var = group[var_name]
-        data = var[()]
-
-        if "sparse" in var.ncattrs():
-            if metadata.ndim:
-                shapes = group[var_name + ":shape"][()]
-                for index, node in tree_iter(ids, metadata):
-                    shape = shapes[index]
-                    if shape.all():
-                        node.value = data[index + tuple(map(slice, shapes[index]))]
-            else:
-                for index, node in tree_iter(ids, metadata):
-                    value = data[index]
-                    if value != getattr(var, "_FillValue", None):
-                        node.value = data[index]
-
-        elif metadata.path_string not in ncmeta.aos:
-            # Shortcut for assigning untensorized data
-            ids[metadata.path] = data
-
-        else:
-            for index, node in tree_iter(ids, metadata):
-                node.value = data[index]

From d59fcabc29d5691004649bcf48016255010d7fa7 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Thu, 14 Nov 2024 14:35:30 +0100
Subject: [PATCH 07/29] Add missing docstring

---
 imaspy/backends/netcdf/nc2ids.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py
index 2877b297..cc3ebc25 100644
--- a/imaspy/backends/netcdf/nc2ids.py
+++ b/imaspy/backends/netcdf/nc2ids.py
@@ -84,6 +84,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
         self.group.set_auto_mask(False)
 
     def run(self) -> None:
+        """Load the data from the netCDF group into the IDS."""
         # FIXME: ensure that var_names are sorted properly
         # Current assumption is that creation-order is fine
         for var_name in self.variables:

From ee385b736cb3f10a7378343f2de59e46815b26b7 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Fri, 15 Nov 2024 11:46:33 +0100
Subject: [PATCH 08/29] Disable MDSplus backend tests for get_sample

Feature not yet implemented, see IMAS-5593
---
 imaspy/test/test_get_sample.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py
index beffe52d..0f5fed3e 100644
--- a/imaspy/test/test_get_sample.py
+++ b/imaspy/test/test_get_sample.py
@@ -21,7 +21,8 @@ def test_db_uri(backend, worker_id, tmp_path_factory):
     if not hasattr(lowlevel, "al_begin_timerange_action"):
         pytest.skip("imas_core version doesn't support begin_timerange_action.")
 
-    if backend not in [HDF5_BACKEND, MDSPLUS_BACKEND]:
+    # TODO: add MDSPLUS_BACKEND once implemented, see IMAS-5593
+    if backend not in [HDF5_BACKEND]:
         pytest.skip("Backend doesn't support time range operations.")
 
     tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}")

From b007316a44e07a80a8ccad67f62b017a537b2332 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Wed, 13 Nov 2024 16:32:25 +0100
Subject: [PATCH 09/29] Add validation for the ids_properties.homogeneous time
 variable in netCDF IDSs.

---
 imaspy/backends/netcdf/ids2nc.py  |  8 +++---
 imaspy/backends/netcdf/nc2ids.py  | 47 +++++++++++++++++++++++++++----
 imaspy/exception.py               |  4 +++
 imaspy/test/test_nc_validation.py | 36 +++++++++++++++++++++++
 4 files changed, 85 insertions(+), 10 deletions(-)
 create mode 100644 imaspy/test/test_nc_validation.py

diff --git a/imaspy/backends/netcdf/ids2nc.py b/imaspy/backends/netcdf/ids2nc.py
index 9fad4044..34e63101 100644
--- a/imaspy/backends/netcdf/ids2nc.py
+++ b/imaspy/backends/netcdf/ids2nc.py
@@ -23,10 +23,10 @@
     IDSDataType.CPX: netCDF4.default_fillvals["f8"] * (1 + 1j),
 }
 dtypes = {
-    IDSDataType.INT: numpy.int32,
+    IDSDataType.INT: numpy.dtype(numpy.int32),
     IDSDataType.STR: str,
-    IDSDataType.FLT: numpy.float64,
-    IDSDataType.CPX: numpy.complex128,
+    IDSDataType.FLT: numpy.dtype(numpy.float64),
+    IDSDataType.CPX: numpy.dtype(numpy.complex128),
 }
 SHAPE_DTYPE = numpy.int32
 
@@ -188,7 +188,7 @@ def create_variables(self) -> None:
                 kwargs = {}
                 if dtype is not str:  # Enable compression:
                     kwargs.update(compression="zlib", complevel=1)
-                if dtype is not numpy.complex128:  # Set fillvalue
+                if dtype is not dtypes[IDSDataType.CPX]:  # Set fillvalue
                     kwargs.update(fill_value=default_fillvals[metadata.data_type])
                 # Create variable
                 dimensions = get_dimensions(path, self.homogeneous_time)
diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py
index cc3ebc25..24cbc7b3 100644
--- a/imaspy/backends/netcdf/nc2ids.py
+++ b/imaspy/backends/netcdf/nc2ids.py
@@ -2,7 +2,9 @@
 
 import netCDF4
 
+from imaspy.backends.netcdf import ids2nc
 from imaspy.backends.netcdf.nc_metadata import NCMetadata
+from imaspy.exception import InvalidNetCDFEntry
 from imaspy.ids_base import IDSBase
 from imaspy.ids_data_type import IDSDataType
 from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
@@ -73,16 +75,27 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
         """NetCDF related metadata."""
         self.variables = list(group.variables)
         """List of variable names stored in the netCDF group."""
-        # TODO: validate ids_properties.homogeneous_time
-        self.homogeneous_time = (
-            group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS
-        )
-        """True iff the IDS time mode is homogeneous."""
-
         # Don't use masked arrays: they're slow and we'll handle most of the unset
         # values through the `:shape` arrays
         self.group.set_auto_mask(False)
 
+        # Validate and get value of ids_properties.homogeneous_time
+        self.homogeneous_time = True  # Must be initialized for self._validate_variable
+        """True iff the IDS time mode is homogeneous."""
+
+        if "ids_properties.homogeneous_time" not in self.variables:
+            raise InvalidNetCDFEntry(
+                "Mandatory variable `ids_properties.homogeneous_time` does not exist."
+            )
+        var = group["ids_properties.homogeneous_time"]
+        self._validate_variable(var, ids.ids_properties.homogeneous_time.metadata)
+        if var[()] not in [0, 1, 2]:
+            raise InvalidNetCDFEntry(
+                f"Invalid value for ids_properties.homogeneous_time: {var[()]}. "
+                "Was expecting: 0, 1 or 2."
+            )
+        self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS
+
     def run(self) -> None:
         """Load the data from the netCDF group into the IDS."""
         # FIXME: ensure that var_names are sorted properly
@@ -142,6 +155,28 @@ def run(self) -> None:
                 for index, node in tree_iter(self.ids, metadata):
                     node.value = data[index]
 
+    def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None:
+        """Validate that the variable has correct metadata, raise an exception if not.
+
+        Args:
+            var: NetCDF variable
+            metadata: IDSMetadata of the corresponding IDS object
+        """
+        if var.dtype != ids2nc.dtypes[metadata.data_type]:
+            raise InvalidNetCDFEntry(
+                f"Variable {var.name} has incorrect data type: {var.dtype}. "
+                f"Was expecting: {ids2nc.dtypes[metadata.data_type]}."
+            )
+        # Dimensions
+        expected_dims = self.ncmeta.get_dimensions(
+            metadata.path_string, self.homogeneous_time
+        )
+        if var.dimensions != expected_dims:
+            raise InvalidNetCDFEntry(
+                f"Variable {var.name} has incorrect dimensions: {var.dimensions}. "
+                f"Was expecting: {expected_dims}."
+            )
+
 
 def nc2ids(group: netCDF4.Group, ids: IDSToplevel):
     """Get data from the netCDF group and store it in the provided IDS."""
diff --git a/imaspy/exception.py b/imaspy/exception.py
index 8377d13b..550ce2ed 100644
--- a/imaspy/exception.py
+++ b/imaspy/exception.py
@@ -101,3 +101,7 @@ def __init__(self, node, dimension, expected_size, coor_path):
         super().__init__(
             f"Element `{node._path}` has incorrect shape {node.shape}: {details}"
         )
+
+
+class InvalidNetCDFEntry(Exception):
+    """Error raised when loading an IDS from a NetCDF file that fails validation."""
diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py
new file mode 100644
index 00000000..dc5309f0
--- /dev/null
+++ b/imaspy/test/test_nc_validation.py
@@ -0,0 +1,36 @@
+import netCDF4
+import pytest
+
+from imaspy.backends.netcdf.nc2ids import NC2IDS
+from imaspy.exception import InvalidNetCDFEntry
+from imaspy.ids_factory import IDSFactory
+
+
+@pytest.fixture()
+def memfile():
+    with netCDF4.Dataset("-", "w", diskless=True) as memfile:
+        yield memfile
+
+
+def test_invalid_homogeneous_time(memfile):
+    empty_group = memfile.createGroup("empty_group")
+    # Invalid dtype
+    invalid_dtype = memfile.createGroup("invalid_dtype")
+    invalid_dtype.createVariable("ids_properties.homogeneous_time", float, ())[()] = 0
+    # Invalid shape: 1D instead of 0D
+    invalid_shape = memfile.createGroup("invalid_shape")
+    invalid_shape.createDimension("dim")
+    invalid_shape.createVariable("ids_properties.homogeneous_time", "i4", ("dim",))
+    # Invalid value: not 0, 1 or 2
+    invalid_value = memfile.createGroup("invalid_value")
+    invalid_value.createVariable("ids_properties.homogeneous_time", "i4", ())
+
+    ids = IDSFactory().core_profiles()
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(empty_group, ids)  # ids_properties.homogeneous_time does not exist
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(invalid_dtype, ids)
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(invalid_shape, ids)
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(invalid_value, ids)

From f7be3845994c6510be29a35d47b58b7504fbaa7a Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Fri, 15 Nov 2024 16:05:27 +0100
Subject: [PATCH 10/29] Additional validation checks and tests

---
 imaspy/backends/netcdf/nc2ids.py  | 109 ++++++++++++++++++++++++++----
 imaspy/test/test_nc_validation.py |  69 ++++++++++++++++++-
 2 files changed, 164 insertions(+), 14 deletions(-)

diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py
index 24cbc7b3..e2cf65b3 100644
--- a/imaspy/backends/netcdf/nc2ids.py
+++ b/imaspy/backends/netcdf/nc2ids.py
@@ -1,3 +1,4 @@
+import logging
 from typing import Iterator, List, Tuple
 
 import netCDF4
@@ -12,6 +13,15 @@
 from imaspy.ids_structure import IDSStructure
 from imaspy.ids_toplevel import IDSToplevel
 
+logger = logging.getLogger(__name__)
+
+
+def variable_error(var, issue, value, expected=None) -> InvalidNetCDFEntry:
+    return InvalidNetCDFEntry(
+        f"Variable `{var.name}` has incorrect {issue}: `{value}`."
+        + (f" Was expecting `{expected}`." if expected is not None else "")
+    )
+
 
 def split_on_aos(metadata: IDSMetadata):
     paths = []
@@ -98,6 +108,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
 
     def run(self) -> None:
         """Load the data from the netCDF group into the IDS."""
+        self._validate_variables()
         # FIXME: ensure that var_names are sorted properly
         # Current assumption is that creation-order is fine
         for var_name in self.variables:
@@ -155,6 +166,42 @@ def run(self) -> None:
                 for index, node in tree_iter(self.ids, metadata):
                     node.value = data[index]
 
+    def _validate_variables(self) -> None:
+        """Validate that all variables in the netCDF Group exist and match the DD."""
+        self.variables.sort()
+        for var_name in self.variables:
+            if var_name.endswith(":shape"):
+                # Check that there is a corresponding variable
+                data_var = var_name.rpartition(":shape")[0]
+                if data_var not in self.variables:
+                    raise InvalidNetCDFEntry(
+                        f"Invalid netCDF variable: {var_name}. "
+                        f"Shape information provided for non-existing {data_var}."
+                    )
+                # Corresponding variable must be sparse
+                if "sparse" not in self.group[data_var].ncattrs():
+                    raise InvalidNetCDFEntry(
+                        f"Shape information provided for {data_var}, but this variable "
+                        "is not sparse."
+                    )
+                # That's all for :shape arrays
+                continue
+
+            # Check that the DD defines this variable, and validate its metadata
+            var = self.group[var_name]
+            try:
+                metadata = self.ids.metadata[var_name]
+            except KeyError:
+                raise InvalidNetCDFEntry(
+                    f"Invalid variable {var_name}: no such variable exists in the "
+                    f"{self.ids.metadata.name} IDS."
+                )
+            self._validate_variable(var, metadata)
+
+            # Validate sparsity metadata
+            if "sparse" in var.ncattrs():
+                ...  # TODO
+
     def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None:
         """Validate that the variable has correct metadata, raise an exception if not.
 
@@ -162,20 +209,58 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No
             var: NetCDF variable
             metadata: IDSMetadata of the corresponding IDS object
         """
-        if var.dtype != ids2nc.dtypes[metadata.data_type]:
-            raise InvalidNetCDFEntry(
-                f"Variable {var.name} has incorrect data type: {var.dtype}. "
-                f"Was expecting: {ids2nc.dtypes[metadata.data_type]}."
+        attrs: dict = vars(var).copy()
+        attrs.pop("_FillValue", None)
+        if metadata.data_type not in [IDSDataType.STRUCTURE, IDSDataType.STRUCT_ARRAY]:
+            # Data type
+            expected_dtype = ids2nc.dtypes[metadata.data_type]
+            if var.dtype != expected_dtype:
+                raise variable_error(var, "data type", var.dtype, expected_dtype)
+
+            # Dimensions
+            expected_dims = self.ncmeta.get_dimensions(
+                metadata.path_string, self.homogeneous_time
             )
-        # Dimensions
-        expected_dims = self.ncmeta.get_dimensions(
-            metadata.path_string, self.homogeneous_time
-        )
-        if var.dimensions != expected_dims:
-            raise InvalidNetCDFEntry(
-                f"Variable {var.name} has incorrect dimensions: {var.dimensions}. "
-                f"Was expecting: {expected_dims}."
+            if var.dimensions != expected_dims:
+                raise variable_error(var, "dimensions", var.dimensions, expected_dims)
+
+            # Coordinates
+            coordinates = str(attrs.pop("coordinates", ""))
+            expected_coordinates = self.ncmeta.get_coordinates(
+                metadata.path_string, self.homogeneous_time
             )
+            if any(coord not in expected_coordinates for coord in coordinates.split()):
+                raise variable_error(
+                    var, "coordinates", coordinates, " ".join(expected_coordinates)
+                )
+
+            # Ancillary variables
+            ancvar = attrs.pop("ancillary_variables", None)
+            if ancvar:
+                allowed_ancvar = [f"{var.name}_error_upper", f"{var.name}_error_lower"]
+                if any(var not in allowed_ancvar for var in ancvar.split()):
+                    raise variable_error(
+                        var, "ancillary_variables", ancvar, " ".join(allowed_ancvar)
+                    )
+
+        # Units
+        units = attrs.pop("units", None)
+        if metadata.units and metadata.units != units:
+            raise variable_error(var, "units", units, metadata.units)
+
+        # Sparse
+        sparse = attrs.pop("sparse", None)
+        if sparse is not None:
+            ...  # TODO
+
+        # Documentation
+        doc = attrs.pop("documentation", None)
+        if metadata.documentation != doc:
+            logger.warning("Documentation of variable %s differs from the DD", var.name)
+
+        # Unknown attrs
+        if attrs:
+            raise variable_error(var, "attributes", list(attrs.keys()))
 
 
 def nc2ids(group: netCDF4.Group, ids: IDSToplevel):
diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py
index dc5309f0..d3bf8c09 100644
--- a/imaspy/test/test_nc_validation.py
+++ b/imaspy/test/test_nc_validation.py
@@ -1,8 +1,10 @@
 import netCDF4
 import pytest
 
+from imaspy.backends.netcdf.ids2nc import IDS2NC
 from imaspy.backends.netcdf.nc2ids import NC2IDS
 from imaspy.exception import InvalidNetCDFEntry
+from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
 from imaspy.ids_factory import IDSFactory
 
 
@@ -12,7 +14,26 @@ def memfile():
         yield memfile
 
 
-def test_invalid_homogeneous_time(memfile):
+@pytest.fixture()
+def factory():
+    return IDSFactory("4.0.0")
+
+
+@pytest.fixture()
+def memfile_with_ids(memfile, factory):
+    ids = factory.core_profiles()
+    ids.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS
+    ids.time = [1.0, 2.0, 3.0]
+    ids.profiles_1d.resize(2)
+    for i in range(2):
+        ids.profiles_1d[i].grid.rho_tor_norm = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
+    IDS2NC(ids, memfile).run()
+    # This one is valid:
+    NC2IDS(memfile, factory.core_profiles()).run()
+    return memfile
+
+
+def test_invalid_homogeneous_time(memfile, factory):
     empty_group = memfile.createGroup("empty_group")
     # Invalid dtype
     invalid_dtype = memfile.createGroup("invalid_dtype")
@@ -25,7 +46,7 @@ def test_invalid_homogeneous_time(memfile):
     invalid_value = memfile.createGroup("invalid_value")
     invalid_value.createVariable("ids_properties.homogeneous_time", "i4", ())
 
-    ids = IDSFactory().core_profiles()
+    ids = factory.core_profiles()
     with pytest.raises(InvalidNetCDFEntry):
         NC2IDS(empty_group, ids)  # ids_properties.homogeneous_time does not exist
     with pytest.raises(InvalidNetCDFEntry):
@@ -34,3 +55,47 @@ def test_invalid_homogeneous_time(memfile):
         NC2IDS(invalid_shape, ids)
     with pytest.raises(InvalidNetCDFEntry):
         NC2IDS(invalid_value, ids)
+
+
+def test_invalid_units(memfile_with_ids, factory):
+    memfile_with_ids["time"].units = "hours"
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+
+
+def test_invalid_documentation(memfile_with_ids, factory, caplog):
+    with caplog.at_level("WARNING"):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+    assert not caplog.records
+    # Invalid docstring logs a warning
+    memfile_with_ids["time"].documentation = "https://en.wikipedia.org/wiki/Time"
+    with caplog.at_level("WARNING"):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+    assert len(caplog.records) == 1
+
+
+def test_invalid_dimension_name(memfile_with_ids, factory):
+    memfile_with_ids.renameDimension("time", "T")
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+
+
+def test_invalid_coordinates(memfile_with_ids, factory):
+    memfile_with_ids["profiles_1d.grid.rho_tor_norm"].coordinates = "xyz"
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+
+
+def test_invalid_ancillary_variables(memfile_with_ids, factory):
+    memfile_with_ids["time"].ancillary_variables = "xyz"
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+
+
+def test_extra_attributes(memfile_with_ids, factory):
+    memfile_with_ids["time"].new_attribute = [1, 2, 3]
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+
+
+# TODO: tests for sparsity information

From e5246464d588069af3f0f25e5a0e00d41d7fd4ef Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Tue, 19 Nov 2024 10:21:18 +0100
Subject: [PATCH 11/29] Fix a bug with lazy loading multiple IDSs from the same
 HDF5 DBEntry

Ensure lazy contexts belonging to a different IDS are always closed.
See IMAS-5603 for more details.
---
 imaspy/backends/imas_core/al_context.py |  4 ++++
 imaspy/test/test_lazy_loading.py        | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/imaspy/backends/imas_core/al_context.py b/imaspy/backends/imas_core/al_context.py
index 07f37dec..10c0bf45 100644
--- a/imaspy/backends/imas_core/al_context.py
+++ b/imaspy/backends/imas_core/al_context.py
@@ -299,6 +299,10 @@ def get_context(self) -> ALContext:
             # from the cache
 
         else:
+            # Purge the cache to close open contexts from other IDSs (IMAS-5603)
+            cache = self.dbentry._lazy_ctx_cache
+            while cache:
+                cache.pop().close()
             return self.dbentry_ctx
 
     @contextmanager
diff --git a/imaspy/test/test_lazy_loading.py b/imaspy/test/test_lazy_loading.py
index 8c3b2fef..c0e54aad 100644
--- a/imaspy/test/test_lazy_loading.py
+++ b/imaspy/test/test_lazy_loading.py
@@ -163,3 +163,23 @@ def test_lazy_load_with_new_aos(requires_imas):
     assert len(lazy_et.model[0].ggd[0].electrons.particles.d_radial) == 0
 
     dbentry.close()
+
+
+def test_lazy_load_multiple_ids(backend, worker_id, tmp_path):
+    if backend == ASCII_BACKEND:
+        pytest.skip("Lazy loading is not supported by the ASCII backend.")
+
+    with open_dbentry(backend, "w", worker_id, tmp_path) as dbentry:
+        cp = dbentry.factory.core_profiles()
+        cp.ids_properties.homogeneous_time = 1
+        cp.time = [0.0, 1.0]
+        dbentry.put(cp)
+        eq = dbentry.factory.equilibrium()
+        eq.ids_properties.homogeneous_time = 1
+        eq.time = [1.0, 2.0]
+        dbentry.put(eq)
+
+        lazy_cp = dbentry.get("core_profiles", lazy=True)
+        lazy_eq = dbentry.get("equilibrium", lazy=True)
+        assert all(cp.time - eq.time == -1)
+        assert all(lazy_cp.time - lazy_eq.time == -1)

From d47566baca31d9b53ca46ed0dfd06dde70030211 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Tue, 19 Nov 2024 13:26:29 +0100
Subject: [PATCH 12/29] Validate netCDF sparsity metadata

---
 imaspy/backends/netcdf/nc2ids.py  | 60 +++++++++++++++++++++++--------
 imaspy/test/test_nc_validation.py | 51 ++++++++++++++++++++++++--
 2 files changed, 93 insertions(+), 18 deletions(-)

diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py
index e2cf65b3..3666d49e 100644
--- a/imaspy/backends/netcdf/nc2ids.py
+++ b/imaspy/backends/netcdf/nc2ids.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Iterator, List, Tuple
+from typing import Iterator, List, Optional, Tuple
 
 import netCDF4
 
@@ -108,18 +108,13 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
 
     def run(self) -> None:
         """Load the data from the netCDF group into the IDS."""
+        self.variables.sort()
         self._validate_variables()
-        # FIXME: ensure that var_names are sorted properly
-        # Current assumption is that creation-order is fine
         for var_name in self.variables:
             if var_name.endswith(":shape"):
-                continue  # TODO: validate that this is used
-
-            # FIXME: error handling:
+                continue
             metadata = self.ids.metadata[var_name]
 
-            # TODO: validate metadata (data type, units, etc.) conforms to DD
-
             if metadata.data_type is IDSDataType.STRUCTURE:
                 continue  # This only contains DD metadata we already know
 
@@ -168,7 +163,6 @@ def run(self) -> None:
 
     def _validate_variables(self) -> None:
         """Validate that all variables in the netCDF Group exist and match the DD."""
-        self.variables.sort()
         for var_name in self.variables:
             if var_name.endswith(":shape"):
                 # Check that there is a corresponding variable
@@ -184,7 +178,8 @@ def _validate_variables(self) -> None:
                         f"Shape information provided for {data_var}, but this variable "
                         "is not sparse."
                     )
-                # That's all for :shape arrays
+                # That's all for :shape arrays here, rest is checked in
+                # _validate_variable (which defers to _validate_sparsity)
                 continue
 
             # Check that the DD defines this variable, and validate its metadata
@@ -198,10 +193,6 @@ def _validate_variables(self) -> None:
                 )
             self._validate_variable(var, metadata)
 
-            # Validate sparsity metadata
-            if "sparse" in var.ncattrs():
-                ...  # TODO
-
     def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None:
         """Validate that the variable has correct metadata, raise an exception if not.
 
@@ -251,7 +242,9 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No
         # Sparse
         sparse = attrs.pop("sparse", None)
         if sparse is not None:
-            ...  # TODO
+            shape_name = f"{var.name}:shape"
+            shape_var = self.group[shape_name] if shape_name in self.variables else None
+            self._validate_sparsity(var, shape_var, metadata)
 
         # Documentation
         doc = attrs.pop("documentation", None)
@@ -262,6 +255,43 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No
         if attrs:
             raise variable_error(var, "attributes", list(attrs.keys()))
 
+    def _validate_sparsity(
+        self,
+        var: netCDF4.Variable,
+        shape_var: Optional[netCDF4.Variable],
+        metadata: IDSMetadata,
+    ) -> None:
+        """Validate that the variable has correct sparsity.
+
+        Args:
+            var: Variable with a "sparse" attribute
+            shape_var: Corresponding shape array (if it exists in the NC group)
+            metadata: IDSMetadata of the corresponding IDS object
+        """
+        if metadata.ndim == 0:
+            return  # Sparsity is stored with _Fillvalue, nothing to validate
+
+        # Dimensions
+        aos_dimensions = self.ncmeta.get_dimensions(
+            self.ncmeta.aos.get(metadata.path_string), self.homogeneous_time
+        )
+        shape_dimensions = shape_var.dimensions
+        if (
+            len(shape_dimensions) != len(aos_dimensions) + 1
+            or shape_dimensions[:-1] != aos_dimensions
+            or self.group.dimensions[shape_dimensions[-1]].size != metadata.ndim
+        ):
+            expected_dims = aos_dimensions + (f"{metadata.ndim}D",)
+            raise variable_error(
+                shape_var, "dimensions", shape_dimensions, expected_dims
+            )
+
+        # Data type
+        if shape_var.dtype.kind not in "ui":  # should be (un)signed integer
+            raise variable_error(
+                shape_var, "dtype", shape_var.dtype, "any integer type"
+            )
+
 
 def nc2ids(group: netCDF4.Group, ids: IDSToplevel):
     """Get data from the netCDF group and store it in the provided IDS."""
diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py
index d3bf8c09..f7cc029f 100644
--- a/imaspy/test/test_nc_validation.py
+++ b/imaspy/test/test_nc_validation.py
@@ -1,4 +1,5 @@
 import netCDF4
+import numpy as np
 import pytest
 
 from imaspy.backends.netcdf.ids2nc import IDS2NC
@@ -24,9 +25,10 @@ def memfile_with_ids(memfile, factory):
     ids = factory.core_profiles()
     ids.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS
     ids.time = [1.0, 2.0, 3.0]
-    ids.profiles_1d.resize(2)
-    for i in range(2):
+    ids.profiles_1d.resize(3)
+    for i in range(3):
         ids.profiles_1d[i].grid.rho_tor_norm = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
+    ids.profiles_1d[0].zeff = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
     IDS2NC(ids, memfile).run()
     # This one is valid:
     NC2IDS(memfile, factory.core_profiles()).run()
@@ -98,4 +100,47 @@ def test_extra_attributes(memfile_with_ids, factory):
         NC2IDS(memfile_with_ids, factory.core_profiles()).run()
 
 
-# TODO: tests for sparsity information
+def test_shape_array_without_data(memfile_with_ids, factory):
+    memfile_with_ids.createVariable("profiles_1d.t_i_average:shape", int, ())
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+
+
+def test_shape_array_without_sparse_data(memfile_with_ids, factory):
+    memfile_with_ids.createVariable("profiles_1d.grid.rho_tor_norm:shape", int, ())
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, factory.core_profiles()).run()
+
+
+def test_shape_array_with_invalid_dimensions(memfile_with_ids, factory):
+    cp = factory.core_profiles()
+    t_i_average_meta = cp.metadata["profiles_1d.t_i_average"]
+    t_i_average = memfile_with_ids.createVariable(
+        "profiles_1d.t_i_average", float, ("time", "profiles_1d.grid.rho_tor_norm:i")
+    )
+    t_i_average.units = t_i_average_meta.units
+    t_i_average.documentation = t_i_average_meta.documentation
+    t_i_average.sparse = "Contents don't matter"
+    memfile_with_ids.createVariable(
+        "profiles_1d.t_i_average:shape",
+        np.int32,
+        ("time", "profiles_1d.grid.rho_tor_norm:i"),
+    )
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, cp).run()
+
+
+def test_shape_array_with_invalid_dtype(memfile_with_ids, factory):
+    cp = factory.core_profiles()
+    t_i_average_meta = cp.metadata["profiles_1d.t_i_average"]
+    t_i_average = memfile_with_ids.createVariable(
+        "profiles_1d.t_i_average", float, ("time", "profiles_1d.grid.rho_tor_norm:i")
+    )
+    t_i_average.units = t_i_average_meta.units
+    t_i_average.documentation = t_i_average_meta.documentation
+    t_i_average.sparse = "Contents don't matter"
+    memfile_with_ids.createVariable(
+        "profiles_1d.t_i_average:shape", float, ("time", "1D")
+    )
+    with pytest.raises(InvalidNetCDFEntry):
+        NC2IDS(memfile_with_ids, cp).run()

From 54d78d6e08fd44343578bb0ae13c8404f7951de2 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Tue, 19 Nov 2024 13:49:33 +0100
Subject: [PATCH 13/29] Add environment variable to disable netCDF file
 validation.

---
 docs/source/configuring.rst      | 7 +++++++
 imaspy/backends/netcdf/nc2ids.py | 9 +++++++++
 2 files changed, 16 insertions(+)

diff --git a/docs/source/configuring.rst b/docs/source/configuring.rst
index 07073faf..dae11b6f 100644
--- a/docs/source/configuring.rst
+++ b/docs/source/configuring.rst
@@ -29,6 +29,13 @@ This page provides an overview of available variables.
         you can use :external:py:meth:`logging.getLogger("imaspy").setLevel(...)
         <logging.Logger.setLevel>` to change the log level programmatically.
 
+
+``IMASPY_DISABLE_NC_VALIDATE``
+    Disables validation of netCDF files when loading an IDS from an IMAS netCDF file.
+
+    .. caution::
+        Disabling the validation may lead to errors when reading data from an IMAS netCDF file.
+
 ``IMAS_VERSION``
     Sets :ref:`The default Data Dictionary version` to use.
 
diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py
index 3666d49e..0a69f964 100644
--- a/imaspy/backends/netcdf/nc2ids.py
+++ b/imaspy/backends/netcdf/nc2ids.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from typing import Iterator, List, Optional, Tuple
 
 import netCDF4
@@ -163,6 +164,14 @@ def run(self) -> None:
 
     def _validate_variables(self) -> None:
         """Validate that all variables in the netCDF Group exist and match the DD."""
+        disable_validate = os.environ.get("IMASPY_DISABLE_NC_VALIDATE")
+        if disable_validate and disable_validate != "0":
+            logger.info(
+                "NetCDF file validation disabled: "
+                "This may lead to errors when reading data!"
+            )
+            return  # validation checks are disabled
+
         for var_name in self.variables:
             if var_name.endswith(":shape"):
                 # Check that there is a corresponding variable

From 5ccae5dec8e4994748a42f0e7d970049504cda95 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Tue, 19 Nov 2024 15:50:32 +0100
Subject: [PATCH 14/29] Eliminate nc2ids function

---
 imaspy/backends/netcdf/db_entry_nc.py |  6 +++---
 imaspy/backends/netcdf/nc2ids.py      | 14 --------------
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py
index ba7334fc..9a0bf9c9 100644
--- a/imaspy/backends/netcdf/db_entry_nc.py
+++ b/imaspy/backends/netcdf/db_entry_nc.py
@@ -5,7 +5,7 @@
 
 from imaspy.backends.db_entry_impl import DBEntryImpl
 from imaspy.backends.netcdf.ids2nc import IDS2NC
-from imaspy.backends.netcdf.nc2ids import nc2ids
+from imaspy.backends.netcdf.nc2ids import NC2IDS
 from imaspy.exception import DataEntryException
 from imaspy.ids_convert import NBCPathMap, convert_ids
 from imaspy.ids_factory import IDSFactory
@@ -98,13 +98,13 @@ def get(
 
         # Load data into the destination IDS
         if self._ds_factory.dd_version == destination._dd_version:
-            nc2ids(group, destination)
+            NC2IDS(group, destination).run()
         else:
             # FIXME: implement automatic conversion using nbc_map
             #   As a work-around: do an explicit conversion, but automatic conversion
             #   will also be needed to implement lazy loading.
             ids = self._ds_factory.new(ids_name)
-            nc2ids(group, ids)
+            NC2IDS(group, ids).run()
             convert_ids(ids, None, target=destination)
 
         return destination
diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py
index 0a69f964..b74b4676 100644
--- a/imaspy/backends/netcdf/nc2ids.py
+++ b/imaspy/backends/netcdf/nc2ids.py
@@ -300,17 +300,3 @@ def _validate_sparsity(
             raise variable_error(
                 shape_var, "dtype", shape_var.dtype, "any integer type"
             )
-
-
-def nc2ids(group: netCDF4.Group, ids: IDSToplevel):
-    """Get data from the netCDF group and store it in the provided IDS."""
-    try:
-        NC2IDS(group, ids).run()
-    except Exception as exc:
-        raise RuntimeError(
-            "An error occurred while reading data from the netCDF file "
-            f"'{group.filepath()}'. The netCDF functionality is currently in "
-            "preview status. Unexpected data in an otherwise valid netCDF file "
-            "may cause errors in IMASPy. A more robust mechanism to load IDS data from "
-            "netCDF files will be included in the next release of IMASPy."
-        ) from exc

From afb8c292c6527f03354ac35fa43dfaebd8fb4ef3 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Wed, 20 Nov 2024 10:50:59 +0100
Subject: [PATCH 15/29] Update `get_sample` docstring to clarify that the
 interpolation mode has no effect on the `ids.time` vector.

See also https://git.iter.org/projects/IMAS/repos/al-matlab/pull-requests/29/overview?commentId=48957
---
 imaspy/db_entry.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/imaspy/db_entry.py b/imaspy/db_entry.py
index cb948fea..3834655d 100644
--- a/imaspy/db_entry.py
+++ b/imaspy/db_entry.py
@@ -459,7 +459,9 @@ def get_sample(
             :param:`dtime` must be a number or a numpy array of size 1.
 
             This mode will generate an IDS with a homogeneous time vector ``[tmin, tmin
-            + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The returned IDS always has
+            + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The chosen interpolation
+            method will have no effect on the time vector, but may have an impact on the
+            other dynamic values. The returned IDS always has
             ``ids_properties.homogeneous_time = 1``.
 
         3.  Interpolate dynamic data on an explicit time base. This method is selected
@@ -468,7 +470,9 @@ def get_sample(
 
             This mode will generate an IDS with a homogeneous time vector equal to
             :param:`dtime`. :param:`tmin` and :param:`tmax` are ignored in this mode.
-            The returned IDS always has ``ids_properties.homogeneous_time = 1``.
+            The chosen interpolation method will have no effect on the time vector, but
+            may have an impact on the other dynamic values. The returned IDS always has
+            ``ids_properties.homogeneous_time = 1``.
 
         Args:
             ids_name: Name of the IDS to read from the backend

From 6cd1e44f2ca06d9746a6e0722cf216da2e6fed62 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Fri, 22 Nov 2024 15:05:37 +0100
Subject: [PATCH 16/29] Add `validate_nc` command to imaspy CLI

Also fix a couple of bugs when opening invalid netCDF files and raise a proper exception.
---
 imaspy/backends/netcdf/db_entry_nc.py |  8 ++--
 imaspy/backends/netcdf/nc2ids.py      |  4 +-
 imaspy/backends/netcdf/nc_validate.py | 53 +++++++++++++++++++++++++++
 imaspy/command/cli.py                 | 15 ++++++++
 4 files changed, 75 insertions(+), 5 deletions(-)
 create mode 100644 imaspy/backends/netcdf/nc_validate.py

diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py
index 9a0bf9c9..3725c5a9 100644
--- a/imaspy/backends/netcdf/db_entry_nc.py
+++ b/imaspy/backends/netcdf/db_entry_nc.py
@@ -6,7 +6,7 @@
 from imaspy.backends.db_entry_impl import DBEntryImpl
 from imaspy.backends.netcdf.ids2nc import IDS2NC
 from imaspy.backends.netcdf.nc2ids import NC2IDS
-from imaspy.exception import DataEntryException
+from imaspy.exception import DataEntryException, InvalidNetCDFEntry
 from imaspy.ids_convert import NBCPathMap, convert_ids
 from imaspy.ids_factory import IDSFactory
 from imaspy.ids_toplevel import IDSToplevel
@@ -45,14 +45,16 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None:
         # Check if there is already data in this dataset:
         if self._dataset.dimensions or self._dataset.variables or self._dataset.groups:
             if "data_dictionary_version" not in self._dataset.ncattrs():
-                raise RuntimeError(
+                raise InvalidNetCDFEntry(
                     "Invalid netCDF file: `data_dictionary_version` missing"
                 )
             dataset_dd_version = self._dataset.data_dictionary_version
             if dataset_dd_version != factory.dd_version:
                 self._ds_factory = IDSFactory(dataset_dd_version)
-            # TODO: [validate] that the data contained in this file adheres to the DD
 
+        elif mode not in ["w", "r+", "a"]:
+            # Reading an empty file...
+            raise InvalidNetCDFEntry(f"Invalid netCDF file: `{fname}` is empty.")
         else:
             # This is an empty netCDF dataset: set global attributes
             self._dataset.Conventions = "IMAS"
diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py
index b74b4676..50905ba8 100644
--- a/imaspy/backends/netcdf/nc2ids.py
+++ b/imaspy/backends/netcdf/nc2ids.py
@@ -110,7 +110,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
     def run(self) -> None:
         """Load the data from the netCDF group into the IDS."""
         self.variables.sort()
-        self._validate_variables()
+        self.validate_variables()
         for var_name in self.variables:
             if var_name.endswith(":shape"):
                 continue
@@ -162,7 +162,7 @@ def run(self) -> None:
                 for index, node in tree_iter(self.ids, metadata):
                     node.value = data[index]
 
-    def _validate_variables(self) -> None:
+    def validate_variables(self) -> None:
         """Validate that all variables in the netCDF Group exist and match the DD."""
         disable_validate = os.environ.get("IMASPY_DISABLE_NC_VALIDATE")
         if disable_validate and disable_validate != "0":
diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py
new file mode 100644
index 00000000..7b6a1eac
--- /dev/null
+++ b/imaspy/backends/netcdf/nc_validate.py
@@ -0,0 +1,53 @@
+from imaspy.backends.netcdf.db_entry_nc import NCDBEntryImpl
+from imaspy.backends.netcdf.nc2ids import NC2IDS
+from imaspy.db_entry import DBEntry
+from imaspy.exception import InvalidNetCDFEntry
+
+
+def validate_netcdf_file(filename: str) -> None:
+    """Validate if the provided netCDF file adheres to the IMAS conventions."""
+    if not filename.endswith(".nc"):
+        raise InvalidNetCDFEntry(
+            f"Invalid filename `{filename}` provided: "
+            "an IMAS netCDF file should end with `.nc`"
+        )
+
+    entry = DBEntry(filename, "r")
+    entry_impl: NCDBEntryImpl = entry._dbe_impl
+    dataset = entry_impl._dataset
+    factory = entry_impl._ds_factory
+
+    ids_names = factory.ids_names()
+
+    # Check that groups in the dataset correspond to an IDS/occurrence and no additional
+    # variables are smuggled inside:
+    groups = [dataset] + [dataset[group] for group in dataset.groups]
+    for group in groups:
+        if group.variables or group.dimensions:
+            raise InvalidNetCDFEntry(
+                "NetCDF file should not have variables or dimensions in the "
+                f"{group.name} group."
+            )
+        if group is dataset:
+            continue
+        if group.name not in ids_names:
+            raise InvalidNetCDFEntry(
+                f"Invalid group name {group.name}: there is no IDS with this name."
+            )
+        for subgroup in group.groups:
+            try:
+                int(subgroup)
+            except ValueError:
+                raise InvalidNetCDFEntry(
+                    f"Invalid group name {group.name}/{subgroup}: "
+                    f"{subgroup} is not a valid occurrence number."
+                )
+
+    for ids_name in ids_names:
+        for occurrence in entry.list_all_occurrences(ids_name):
+            group = dataset[f"{ids_name}/{occurrence}"]
+            try:
+                NC2IDS(group, factory.new(ids_name)).validate_variables()
+            except InvalidNetCDFEntry as exc:
+                occ = f":{occurrence}" if occurrence else ""
+                raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}")
diff --git a/imaspy/command/cli.py b/imaspy/command/cli.py
index 246922ce..f894f02d 100644
--- a/imaspy/command/cli.py
+++ b/imaspy/command/cli.py
@@ -218,5 +218,20 @@ def convert_ids(
         console.Console().print(timer.get_table("Time required per IDS"))
 
 
+@cli.command("validate_nc", no_args_is_help=True)
+@click.argument("filename", type=click.Path(exists=True, dir_okay=False))
+def validate_nc(filename):
+    """Validate if the provided netCDF file adheres to the IMAS conventions."""
+    from imaspy.backends.netcdf.nc_validate import validate_netcdf_file
+
+    try:
+        validate_netcdf_file(filename)
+    except Exception as exc:
+        click.echo(f"File `{filename}` does not adhere to the IMAS conventions:")
+        click.echo(exc)
+        sys.exit(1)
+    click.echo(f"File `{filename}` is a valid IMAS netCDF file.")
+
+
 if __name__ == "__main__":
     cli()

From 6ec21c71dbda710bb9230525e2b2bb830b645d6b Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Mon, 25 Nov 2024 09:50:07 +0100
Subject: [PATCH 17/29] Fix incorrect exception when using mode="x" for netCDF
 files

---
 imaspy/backends/netcdf/db_entry_nc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py
index 3725c5a9..da239745 100644
--- a/imaspy/backends/netcdf/db_entry_nc.py
+++ b/imaspy/backends/netcdf/db_entry_nc.py
@@ -52,7 +52,7 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None:
             if dataset_dd_version != factory.dd_version:
                 self._ds_factory = IDSFactory(dataset_dd_version)
 
-        elif mode not in ["w", "r+", "a"]:
+        elif mode not in ["w", "x", "r+", "a"]:
             # Reading an empty file...
             raise InvalidNetCDFEntry(f"Invalid netCDF file: `{fname}` is empty.")
         else:

From 1f6c6fe9730dc063443408cca81fc5416d34a184 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Mon, 25 Nov 2024 15:06:38 +0100
Subject: [PATCH 18/29] Close netCDF datasets when an exception is raised

---
 imaspy/backends/netcdf/db_entry_nc.py | 10 +++-
 imaspy/backends/netcdf/nc_validate.py | 68 +++++++++++++--------------
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py
index da239745..a66154f1 100644
--- a/imaspy/backends/netcdf/db_entry_nc.py
+++ b/imaspy/backends/netcdf/db_entry_nc.py
@@ -39,9 +39,17 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None:
         """NetCDF4 dataset."""
         self._factory = factory
         """Factory (DD version) that the user wishes to use."""
-        self._ds_factory = factory  # Overwritten if data exists, see below
+        self._ds_factory = factory  # Overwritten if data exists, see _init_dd_version
         """Factory (DD version) that the data is stored in."""
 
+        try:
+            self._init_dd_version(fname, mode, factory)
+        except Exception:
+            self._dataset.close()
+            raise
+
+    def _init_dd_version(self, fname: str, mode: str, factory: IDSFactory) -> None:
+        """Check or setup data dictionary version."""
         # Check if there is already data in this dataset:
         if self._dataset.dimensions or self._dataset.variables or self._dataset.groups:
             if "data_dictionary_version" not in self._dataset.ncattrs():
diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py
index 7b6a1eac..49a14283 100644
--- a/imaspy/backends/netcdf/nc_validate.py
+++ b/imaspy/backends/netcdf/nc_validate.py
@@ -12,42 +12,42 @@ def validate_netcdf_file(filename: str) -> None:
             "an IMAS netCDF file should end with `.nc`"
         )
 
-    entry = DBEntry(filename, "r")
-    entry_impl: NCDBEntryImpl = entry._dbe_impl
-    dataset = entry_impl._dataset
-    factory = entry_impl._ds_factory
+    with DBEntry(filename, "r") as entry:
+        entry_impl: NCDBEntryImpl = entry._dbe_impl
+        dataset = entry_impl._dataset
+        factory = entry_impl._ds_factory
 
-    ids_names = factory.ids_names()
+        ids_names = factory.ids_names()
 
-    # Check that groups in the dataset correspond to an IDS/occurrence and no additional
-    # variables are smuggled inside:
-    groups = [dataset] + [dataset[group] for group in dataset.groups]
-    for group in groups:
-        if group.variables or group.dimensions:
-            raise InvalidNetCDFEntry(
-                "NetCDF file should not have variables or dimensions in the "
-                f"{group.name} group."
-            )
-        if group is dataset:
-            continue
-        if group.name not in ids_names:
-            raise InvalidNetCDFEntry(
-                f"Invalid group name {group.name}: there is no IDS with this name."
-            )
-        for subgroup in group.groups:
-            try:
-                int(subgroup)
-            except ValueError:
+        # Check that groups in the dataset correspond to an IDS/occurrence and no
+        # additional variables are smuggled inside:
+        groups = [dataset] + [dataset[group] for group in dataset.groups]
+        for group in groups:
+            if group.variables or group.dimensions:
                 raise InvalidNetCDFEntry(
-                    f"Invalid group name {group.name}/{subgroup}: "
-                    f"{subgroup} is not a valid occurrence number."
+                    "NetCDF file should not have variables or dimensions in the "
+                    f"{group.name} group."
                 )
+            if group is dataset:
+                continue
+            if group.name not in ids_names:
+                raise InvalidNetCDFEntry(
+                    f"Invalid group name {group.name}: there is no IDS with this name."
+                )
+            for subgroup in group.groups:
+                try:
+                    int(subgroup)
+                except ValueError:
+                    raise InvalidNetCDFEntry(
+                        f"Invalid group name {group.name}/{subgroup}: "
+                        f"{subgroup} is not a valid occurrence number."
+                    )
 
-    for ids_name in ids_names:
-        for occurrence in entry.list_all_occurrences(ids_name):
-            group = dataset[f"{ids_name}/{occurrence}"]
-            try:
-                NC2IDS(group, factory.new(ids_name)).validate_variables()
-            except InvalidNetCDFEntry as exc:
-                occ = f":{occurrence}" if occurrence else ""
-                raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}")
+        for ids_name in ids_names:
+            for occurrence in entry.list_all_occurrences(ids_name):
+                group = dataset[f"{ids_name}/{occurrence}"]
+                try:
+                    NC2IDS(group, factory.new(ids_name)).validate_variables()
+                except InvalidNetCDFEntry as exc:
+                    occ = f":{occurrence}" if occurrence else ""
+                    raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}")

From b7d89635a270f623b0af2583c595b37c8a64420a Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Mon, 25 Nov 2024 15:06:54 +0100
Subject: [PATCH 19/29] Add unit tests for `nc_validate.py`

---
 imaspy/test/test_nc_validation.py | 53 ++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py
index f7cc029f..efd25420 100644
--- a/imaspy/test/test_nc_validation.py
+++ b/imaspy/test/test_nc_validation.py
@@ -4,7 +4,8 @@
 
 from imaspy.backends.netcdf.ids2nc import IDS2NC
 from imaspy.backends.netcdf.nc2ids import NC2IDS
-from imaspy.exception import InvalidNetCDFEntry
+from imaspy.backends.netcdf.nc_validate import validate_netcdf_file
+from imaspy.exception import InvalidNetCDFEntry, UnknownDDVersion
 from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
 from imaspy.ids_factory import IDSFactory
 
@@ -144,3 +145,53 @@ def test_shape_array_with_invalid_dtype(memfile_with_ids, factory):
     )
     with pytest.raises(InvalidNetCDFEntry):
         NC2IDS(memfile_with_ids, cp).run()
+
+
+def test_validate_nc(tmpdir):
+    fname = str(tmpdir / "test.nc")
+
+    # Wrong extension
+    with pytest.raises(InvalidNetCDFEntry):
+        validate_netcdf_file("test.h5")  # invalid extension
+
+    # Empty file
+    netCDF4.Dataset(fname, "w").close()
+    with pytest.raises(InvalidNetCDFEntry):
+        validate_netcdf_file(fname)
+
+    # Invalid DD version
+    with netCDF4.Dataset(fname, "w") as dataset:
+        dataset.data_dictionary_version = "invalid"
+        dataset.createGroup("core_profiles")
+    with pytest.raises(UnknownDDVersion):
+        validate_netcdf_file(fname)
+
+    # Invalid group
+    with netCDF4.Dataset(fname, "w") as dataset:
+        dataset.data_dictionary_version = "4.0.0"
+        dataset.createGroup("X")
+    with pytest.raises(InvalidNetCDFEntry):
+        validate_netcdf_file(fname)
+
+    # Invalid occurrence
+    with netCDF4.Dataset(fname, "w") as dataset:
+        dataset.data_dictionary_version = "4.0.0"
+        dataset.createGroup("core_profiles/a")
+    with pytest.raises(InvalidNetCDFEntry):
+        validate_netcdf_file(fname)
+
+    # Invalid variable in root group
+    with netCDF4.Dataset(fname, "w") as dataset:
+        dataset.data_dictionary_version = "4.0.0"
+        dataset.createVariable("core_profiles", int, ())
+    with pytest.raises(InvalidNetCDFEntry):
+        validate_netcdf_file(fname)
+
+    # Missing ids_properties.homogeneous_time
+    with netCDF4.Dataset(fname, "w") as dataset:
+        dataset.data_dictionary_version = "4.0.0"
+        dataset.createGroup("core_profiles/1")
+    with pytest.raises(InvalidNetCDFEntry):
+        validate_netcdf_file(fname)
+
+    # All other validations are handled by NC2IDS and tested above

From fc2cbf20b3cf1c549761214109d3d89b4ee34091 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Wed, 27 Nov 2024 16:31:59 +0100
Subject: [PATCH 20/29] Additional documentation for the `imaspy validate_nc`
 command line tool

---
 docs/source/netcdf.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/source/netcdf.rst b/docs/source/netcdf.rst
index dd3bf431..7a7593e6 100644
--- a/docs/source/netcdf.rst
+++ b/docs/source/netcdf.rst
@@ -102,3 +102,11 @@ your directory. Let's open this file with ``xarray.load_dataset``:
         Attributes:
             Conventions:              IMAS
             data_dictionary_version:  3.41.0
+
+
+Validating an IMAS netCDF file
+------------------------------
+
+IMAS netCDF files can be validated with IMASPy through the command line ``imaspy
+validate_nc <filename>``. See also :ref:`IMASPy Command Line tool` or type
+``imaspy validate_nc --help`` in a command line.

From 7c56b5f0713e7083fc3887884af3bf5dc2852f78 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Wed, 4 Dec 2024 15:35:31 +0100
Subject: [PATCH 21/29] Explicitly set `IDSDEF_PATH` when opening a DBEntry
 with the UDA backend

---
 imaspy/backends/imas_core/db_entry_al.py | 19 ++++++++
 imaspy/backends/imas_core/uda_support.py | 56 ++++++++++++++++++++++++
 imaspy/test/test_uda_support.py          | 12 +++++
 3 files changed, 87 insertions(+)
 create mode 100644 imaspy/backends/imas_core/uda_support.py
 create mode 100644 imaspy/test/test_uda_support.py

diff --git a/imaspy/backends/imas_core/db_entry_al.py b/imaspy/backends/imas_core/db_entry_al.py
index a90e4d6a..34a3ab32 100644
--- a/imaspy/backends/imas_core/db_entry_al.py
+++ b/imaspy/backends/imas_core/db_entry_al.py
@@ -41,6 +41,7 @@
 from .db_entry_helpers import delete_children, get_children, put_children
 from .imas_interface import LLInterfaceError, has_imas, ll_interface
 from .mdsplus_model import ensure_data_dir, mdsplus_model_dir
+from .uda_support import extract_idsdef, get_dd_version_from_idsdef_xml
 
 _BACKEND_NAME = {
     ASCII_BACKEND: "ascii",
@@ -186,6 +187,24 @@ def _setup_backend(
             pass  # nothing to set up
 
         elif backend == "uda":
+            # Set IDSDEF_PATH to point the UDA backend to the selected DD version
+            idsdef_path = None
+
+            if factory._xml_path is not None:
+                # Factory was constructed with an explicit XML path, point UDA to that:
+                idsdef_path = factory._xml_path
+
+            elif "IMAS_PREFIX" in os.environ:
+                # Check if UDA can use the IDSDef.xml stored in $IMAS_PREFIX/include/
+                idsdef_path = os.environ["IMAS_PREFIX"] + "/include/IDSDef.xml"
+                if get_dd_version_from_idsdef_xml(idsdef_path) != factory.version:
+                    idsdef_path = None
+
+            if idsdef_path is None:
+                # Extract XML from the DD zip and point UDA to it
+                idsdef_path = extract_idsdef(factory.version)
+
+            os.environ["IDSDEF_PATH"] = idsdef_path
             logger.warning(
                 "The UDA backend is not tested with IMASPy and may not work properly. "
                 "Please raise any issues you find."
diff --git a/imaspy/backends/imas_core/uda_support.py b/imaspy/backends/imas_core/uda_support.py
new file mode 100644
index 00000000..8b599faa
--- /dev/null
+++ b/imaspy/backends/imas_core/uda_support.py
@@ -0,0 +1,56 @@
+import logging
+from pathlib import Path
+from typing import Union
+from xml.etree import ElementTree as ET
+
+from imaspy import dd_zip
+
+from .mdsplus_model import _get_xdg_cache_dir
+
+logger = logging.getLogger(__name__)
+
+
+def get_dd_version_from_idsdef_xml(path: Union[str, Path]) -> str:
+    """Parse the IDSDef.xml up to the point where the Data Dictionary version is set.
+
+    Returns:
+        The Data Dictionary version for the provided file, or None if the file cannot be
+        parsed / contains no Data Dictionary version.
+    """
+    try:
+        for _, elem in ET.iterparse(path):
+            if elem.tag == "version":
+                return elem.text
+    except OSError:
+        pass  # File not found, etc.
+    except Exception:
+        logger.warning("Could not read DD version from file '%s'.", path, exc_info=True)
+    return None
+
+
+def extract_idsdef(dd_version: str) -> str:
+    """Extract the IDSDef.xml for the given version and return its path.
+
+    The IDSDef.xml is extracted to the imaspy cache folder:
+
+    - If the file imaspy/uda/<version>.xml already exists, we assume it is correct
+    """
+    cache_dir_path = Path(_get_xdg_cache_dir()) / "imaspy" / "uda"
+    cache_dir_path.mkdir(parents=True, exist_ok=True)  # ensure cache folder exists
+    idsdef_path = cache_dir_path / (dd_version + ".xml")
+
+    if idsdef_path.exists():
+        extract = False
+        # Check if the file is fine
+        if get_dd_version_from_idsdef_xml(idsdef_path) != dd_version:
+            # File is corrupt, I guess? We'll overwrite:
+            extract = True
+    else:
+        extract = True
+
+    if extract:
+        # Extract XML from the dd_zip and store
+        data = dd_zip.get_dd_xml(dd_version)
+        idsdef_path.write_bytes(data)
+
+    return str(idsdef_path)
diff --git a/imaspy/test/test_uda_support.py b/imaspy/test/test_uda_support.py
new file mode 100644
index 00000000..f623219a
--- /dev/null
+++ b/imaspy/test/test_uda_support.py
@@ -0,0 +1,12 @@
+from pathlib import Path
+from zlib import crc32
+
+from imaspy import dd_zip
+from imaspy.backends.imas_core.uda_support import extract_idsdef
+
+
+def test_extract_idsdef():
+    fname = extract_idsdef("4.0.0")
+    expected_crc = dd_zip.get_dd_xml_crc("4.0.0")
+    actual_crc = crc32(Path(fname).read_bytes())
+    assert expected_crc == actual_crc

From 74d2e3afe41a3fb907bc0a3729fcea29590aa4ca Mon Sep 17 00:00:00 2001
From: Olivier Hoenen <Olivier.Hoenen@iter.org>
Date: Mon, 9 Dec 2024 15:14:44 +0100
Subject: [PATCH 22/29] Make prepare_data_dictionaries compatible with DD>4.0.0
 (change in schemas layout)

---
 imaspy/dd_helpers.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/imaspy/dd_helpers.py b/imaspy/dd_helpers.py
index 0506482f..21a7775f 100644
--- a/imaspy/dd_helpers.py
+++ b/imaspy/dd_helpers.py
@@ -58,9 +58,14 @@ def prepare_data_dictionaries():
                 dd_zip.write(filename, arcname=arcname)
             # Include identifiers from latest tag in zip file
             repo.git.checkout(newest_version_and_tag[1], force=True)
+            # DD layout <= 4.0.0
             for filename in Path("data-dictionary").glob("*/*identifier.xml"):
                 arcname = Path("identifiers").joinpath(*filename.parts[1:])
                 dd_zip.write(filename, arcname=arcname)
+            # DD layout > 4.0.0
+            for filename in Path("data-dictionary").glob("schemas/*/*identifier.xml"):
+                arcname = Path("identifiers").joinpath(*filename.parts[2:])
+                dd_zip.write(filename, arcname=arcname)
 
 
 # pre 3.30.0 versions of the DD have the `saxon9he.jar` file path hardcoded

From 77fb044a48b709ddaaef9091b1101484526338cd Mon Sep 17 00:00:00 2001
From: Olivier Hoenen <Olivier.Hoenen@iter.org>
Date: Wed, 11 Dec 2024 17:03:11 +0100
Subject: [PATCH 23/29] Updating the license and readme

---
 LICENSE.md  |  46 ---------------
 LICENSE.txt | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md   | 121 ++++++--------------------------------
 3 files changed, 182 insertions(+), 150 deletions(-)
 delete mode 100644 LICENSE.md
 create mode 100644 LICENSE.txt

diff --git a/LICENSE.md b/LICENSE.md
deleted file mode 100644
index ea4a5d46..00000000
--- a/LICENSE.md
+++ /dev/null
@@ -1,46 +0,0 @@
-Copyright (c) 2020-2023 ITER Organization, Route de Vinon-sur-Verdon, CS 90 046,
-                        13067 St-Paul-lez-Durance Cedex, France
-
-Copyright (c) 2020-2023 Karel Lucas van de Plassche <karelvandeplassche@gmail.com>
-
-Copyright (c) 2020 Dutch Institute for Fundamental Energy Research <info@differ.nl>
-
-Copyright (c) 2020-2022 Daan van Vugt <dvanvugt@ignitioncomputing.com>
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Use and redistribution, for peaceful purposes only, are granted solely to the
-  ITER Members (the People's Republic of China, the European Atomic Energy
-  Community, the Republic of India, Japan, the Republic of Korea, the Russian
-  Federation, and the United States of America), with the right to sub-license
-  within their territory for the purpose of fusion research and development.
-  Organizations, bodies or individuals of non-ITER Members shall seek specific
-  written permission from the ITER Organization before use or redistribution of
-  this software.
-  
-* All modifications/derivatives shall be made available to the ITER Organization.
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
-  list of conditions and the following disclaimer in the documentation and/or
-  other materials provided with the distribution.
-
-* Neither the name of the ITER Organization nor the names of its contributors
-  may be used to endorse or promote products derived from this software without
-  specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE ITER ORGANIZATION OR ITS CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 00000000..33bb3680
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,165 @@
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions.
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version.
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/README.md b/README.md
index ce753f5a..23e83fde 100644
--- a/README.md
+++ b/README.md
@@ -1,99 +1,24 @@
 # IMASPy
 
 IMASPy is a pure-python library to handle arbitrarily nested data structures.
-IMASPy is designed for, but not necessarily bound to, interacting with
-Interface Data Structures (IDSs) as defined by the
-Integrated Modelling & Analysis Suite (IMAS) Data Model.
+IMASPy is designed for, but not necessarily bound to, interacting with Interface 
+Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) 
+Data Model.
 
-It provides:
 
-* An easy-to-install and easy-to-get started package by
-  * Not requiring an IMAS installation
-  * Not strictly requiring matching a Data Dictionary (DD) version
-* An pythonic alternative to the IMAS Python High Level Interface (HLI)
-* Checking of correctness on assign time, instead of database write time
-* Dynamically created in-memory pre-filled data trees from DD XML specifications
+## Install
 
-This package is developed on [ITER bitbucket](https://git.iter.org/projects/IMAS/repos/imaspy).
-For user support, contact the IMAS team on the [IMAS user slack](https://imasusers.slack.com),
-open a [JIRA issue](https://jira.iter.org/projects/IMAS), or email the
-support team on <imas-support@iter.org>.
+Install steps are described in the documentation generated from `/docs/source/installing.rst`.
 
-## Installation
-
-### On ITER system, EuroFusion gateway
-
-There is a `module` available on ITER and the Gateway, so you can run
-
-```bash
-module load IMASPy
-```
-
-IMASPy can work with either Access Layer versions 4 or 5 (the used version is
-automatically detected when importing the `imaspy` module). IMASPy still works (with
-limited functionality) when no IMAS module is loaded.
-
-### Local
-
-We recommend using a `venv`:
-
-```bash
-python3 -m venv ./venv
-. venv/bin/activate
-```
-
-Then clone this repository, and run `pip install`:
-
-```bash
-git clone ssh://git@git.iter.org/imas/imaspy.git
-cd imaspy
-pip install .
-# Optional: also install `imas-core` with the HDF5 backend in the venv:
-pip install .[imas-core]
-```
-
-If you get strange errors you might want to upgrade your `setuptools` and `pip`.
-(you might want to add the `--user` flag to your pip installs when not in a `venv`)
-
-### Development installation
-
-For development an installation in editable mode may be more convenient, and
-you will need some extra dependencies to run the test suite and build
-documentation.
-
-```bash
-pip install -e .[test,docs]
-```
+Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/)
+and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html)
 
-Test your installation by trying
+The documentation can be manually generated by installing sphinx and running:
 
 ```bash
-cd ~
-python -c "import imaspy; print(imaspy.__version__)"
+make -C docs html
 ```
 
-which should return your just installed version number.
-
-### Installation without ITER access
-
-The installation script tries to access the [ITER IMAS Core Data Dictionary repository](https://git.iter.org/projects/IMAS/repos/data-dictionary/browse)
-to fetch the latest versions. If you do not have git+ssh access there, you can
-try to find this repository elsewhere, and do a `git fetch --tags`.
-
-Alternatively you could try to obtain an `IDSDef.zip` and place it in `~/.config/imaspy/`.
-
-Test your installation by trying
-
-```bash
-python -c "import imaspy; factory = imaspy.IDSFactory()"
-```
-If the following error is raised:
-```bash
-RuntimeError: Could not find any data dictionary definitions. 
-```
-it means that the Data Dictionary definitions weren't created during the install.
-You can generate these definitions by executing `build_DD` in the command line.
-Missing packages can include among others: [GitPython](https://github.com/gitpython-developers/GitPython), and Java.
 
 ## How to use
 
@@ -106,32 +31,20 @@ print(equilibrium)
 equilibrium.ids_properties.homogeneous_time = imaspy.ids_defs.IDS_TIME_MODE_HETEROGENEOUS
 equilibrium.ids_properties.comment = "testing"
 
-dbentry = imaspy.DBEntry(imaspy.ids_defs.HDF5_BACKEND, "ITER", 1, 1)
-dbentry.create()
-dbentry.put(equilibrium)
-
-# TODO: find an example with a significant change between versions (rename?)
-older_dbentry = imaspy.DBEntry(imaspy.ids_defs.HDF5_BACKEND, "ITER", 1, 1, version="3.35.0")
-equilibrium2 = older_root.get("equilibrium")
-print(equilibrium2.ids_properties.comment)
+with imaspy.DBEntry("imas:hdf5?path=./testdb","w") as dbentry:
+    dbentry.put(equilibrium)
 ```
 
-## Documentation
-
-Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/)
-and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html)
+A quick 5 minutes introduction is available in the documentation generated from `/docs/sources/intro.rst`.
 
-The documentation can be manually generated by installing sphinx and running:
 
-```bash
-make -C docs html
-```
+## Legal
 
-## Interacting with IMAS AL
+IMASPy is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de 
+Plassche <karelvandeplassche@gmail.com>, Copyright 2020-2022 Daan van Vugt <dvanvugt@ignitioncomputing.com>,
+and Copyright 2020 Dutch Institute for Fundamental Energy Research <info@differ.nl>.
+It is licensed under [LGPL 3.0](LICENSE.txt).
 
-Interaction with the IMAS AL is provided by a Cython interface to the Access Layer.
-As Cython code, it needs to be compiled on your local system.
-To find the headers, the Access Layer `include` folder needs to be in your `INCLUDE_PATH`. On most HPC systems, a `module load IMAS` is enough.
 
 ## Acknowledgments
 

From d80778fe6cdadf50255a50905a00ed9e2b458c8c Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Thu, 12 Dec 2024 10:23:31 +0100
Subject: [PATCH 24/29] Replace references to LICENSE.md to LICENSE.txt

---
 docs/source/index.rst | 2 +-
 pyproject.toml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 19e3985b..c5a3f24c 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -77,5 +77,5 @@ Manual
 LICENSE
 -------
 
-.. literalinclude:: ../../LICENSE.md
+.. literalinclude:: ../../LICENSE.txt
    :language: text
diff --git a/pyproject.toml b/pyproject.toml
index 1c1ce2cc..dccd6912 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ authors = [
 description = "Pythonic wrappers for the IMAS Access Layer"
 readme = {file = "README.md", content-type = "text/markdown"}
 requires-python = ">=3.7"
-license = {file = "LICENSE.md"}
+license = {file = "LICENSE.txt"}
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Environment :: Console",

From cef46674cc1f032d9ae65dfe4507060493a43ddf Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Tue, 17 Dec 2024 13:57:11 +0100
Subject: [PATCH 25/29] Fix a bug with lazy loading

Bug: IMASPy runs into an attribute error when lazy loading a child quantity that was added in a newer DD version than stored on disk.

Example:
1. Equilibrium IDS stored in DD 3.33.0
2. Lazy loading IDS with DD 4.0.0
3. Try to access `eq.time_slice[0].boundary.dr_dz_zero_point.r` resulted in an AttributeError

Root cause: IMASPy did not handle correctly that the `dr_dz_zero_point` was added between 3.33.0 and 4.0.0.

This commit fixes the bug.
---
 imaspy/backends/imas_core/db_entry_helpers.py |  6 +++++-
 imaspy/test/test_lazy_loading.py              | 16 ++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/imaspy/backends/imas_core/db_entry_helpers.py b/imaspy/backends/imas_core/db_entry_helpers.py
index de1d9323..f69eafd3 100644
--- a/imaspy/backends/imas_core/db_entry_helpers.py
+++ b/imaspy/backends/imas_core/db_entry_helpers.py
@@ -77,11 +77,15 @@ def get_children(
                 getattr(structure, name)._IDSPrimitive__value = data
 
 
-def _get_child(child: IDSBase, ctx: LazyALContext):
+def _get_child(child: IDSBase, ctx: Optional[LazyALContext]):
     """Get a single child when required (lazy loading)."""
     # NOTE: changes in this method must be propagated to _get_children and vice versa
     #   Performance: this method is specialized for the lazy get
 
+    # ctx can be None when the parent structure does not exist in the on-disk DD version
+    if ctx is None:
+        return  # There is no data to be loaded
+
     time_mode = ctx.time_mode
     if time_mode == IDS_TIME_MODE_INDEPENDENT and child.metadata.type.is_dynamic:
         return  # skip dynamic (time-dependent) nodes
diff --git a/imaspy/test/test_lazy_loading.py b/imaspy/test/test_lazy_loading.py
index c0e54aad..1d34e2a1 100644
--- a/imaspy/test/test_lazy_loading.py
+++ b/imaspy/test/test_lazy_loading.py
@@ -165,6 +165,22 @@ def test_lazy_load_with_new_aos(requires_imas):
     dbentry.close()
 
 
+def test_lazy_load_with_new_structure(requires_imas):
+    dbentry = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, dd_version="3.30.0")
+    dbentry.create()
+
+    eq = dbentry.factory.equilibrium()
+    eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS
+    eq.time = [0.0]
+    eq.time_slice.resize(1)
+    dbentry.put(eq)
+
+    entry2 = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, data_version="3", dd_version="4.0.0")
+    entry2.open()
+    lazy_eq = entry2.get("equilibrium", lazy=True)
+    assert not lazy_eq.time_slice[0].boundary.dr_dz_zero_point.r.has_value
+
+
 def test_lazy_load_multiple_ids(backend, worker_id, tmp_path):
     if backend == ASCII_BACKEND:
         pytest.skip("Lazy loading is not supported by the ASCII backend.")

From 4beab9fcbcf590356b0d92b7b65894f907157962 Mon Sep 17 00:00:00 2001
From: Olivier Hoenen <Olivier.Hoenen@iter.org>
Date: Fri, 10 Jan 2025 18:03:27 +0100
Subject: [PATCH 26/29] Add contributing guidelines

---
 CODE_OF_CONDUCT.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 CONTRIBUTING.md    | 45 +++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 CODE_OF_CONDUCT.md
 create mode 100644 CONTRIBUTING.md

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000..df8ba3bd
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,72 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at <imas-administration@iter.org>. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..ac28e400
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,45 @@
+# Contributing guidelines
+
+We welcome any kind of contribution to `imas-python`, 
+from a simple comment, a question or even a full fledged pull 
+request. 
+Please first make sure you read and follow the 
+[Code of Conduct](CODE_OF_CONDUCT.md).
+
+## You think you found a bug in the code, or have a question in its use
+1. use the [issue search](https://github.com/iterorganization/
+imas-python/issues) to check if someone already created 
+a similar issue;
+2. if not, make a **new issue** to describe your problem or question. 
+In the case of a bug suspiscion, please try to give all the relevant 
+information to allow reproducing the error or identifying 
+its root cause (version of the imas-python, OS and relevant 
+dependencies, snippet of code);
+3. apply relevant labels to the issue.
+
+## You want to make or ask some change to the code
+1. use the [issue search](https://github.com/iterorganization/
+imas-python/issues) to check if someone already proposed 
+a similar idea/change;
+2. if not, create a **new issue** to describe what change you would like to see 
+implemented and specify it if you intend to work on it yourself or if some help 
+will be needed;
+3. wait until some kind of consensus is reached about your idea being relevant, 
+at which time the issue will be assigned (to you or someone else who can work on 
+this topic);
+4. if you do the development yourself, fork the repository to your own Github 
+profile and create your own feature branch off of the latest develop commit. 
+Make sure to regularly sync your branch with the latest commits from `develop` 
+(find instructions 
+[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/
+working-with-forks/syncing-a-fork);
+5. when your development is ready, create a pull request (find instructions 
+[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/
+proposing-changes-to-your-work-with-pull-requests/
+creating-a-pull-request-from-a-fork)).
+
+
+While we will try to answer questions quickly and to address issues in a timely 
+manner, it can may sometimes take longer than expected. A friendly ping in the 
+discussion or the issue thread can help draw attention if you find that it was 
+stalled.

From 2eb385e77e953ffef5a46a274897da6f4fb52d87 Mon Sep 17 00:00:00 2001
From: gautambaabu <gautamraj8044@gmail.com>
Date: Fri, 13 Dec 2024 23:10:14 +0530
Subject: [PATCH 27/29] fixed readme.md for imas

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 23e83fde..03f00ce5 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
-# IMASPy
+# IMAS
 
-IMASPy is a pure-python library to handle arbitrarily nested data structures.
-IMASPy is designed for, but not necessarily bound to, interacting with Interface 
+IMAS is a pure-python library to handle arbitrarily nested data structures.
+IMAS is designed for, but not necessarily bound to, interacting with Interface 
 Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) 
 Data Model.
 
@@ -11,7 +11,7 @@ Data Model.
 Install steps are described in the documentation generated from `/docs/source/installing.rst`.
 
 Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/)
-and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html)
+and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMAS-doc/index.html)
 
 The documentation can be manually generated by installing sphinx and running:
 
@@ -23,15 +23,15 @@ make -C docs html
 ## How to use
 
 ```python
-import imaspy
-factory = imaspy.IDSFactory()
+import imas
+factory = imas.IDSFactory()
 equilibrium = factory.equilibrium()
 print(equilibrium)
 
-equilibrium.ids_properties.homogeneous_time = imaspy.ids_defs.IDS_TIME_MODE_HETEROGENEOUS
+equilibrium.ids_properties.homogeneous_time = imas.ids_defs.IDS_TIME_MODE_HETEROGENEOUS
 equilibrium.ids_properties.comment = "testing"
 
-with imaspy.DBEntry("imas:hdf5?path=./testdb","w") as dbentry:
+with imas.DBEntry("imas:hdf5?path=./testdb","w") as dbentry:
     dbentry.put(equilibrium)
 ```
 
@@ -40,7 +40,7 @@ A quick 5 minutes introduction is available in the documentation generated from
 
 ## Legal
 
-IMASPy is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de 
+IMAS is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de 
 Plassche <karelvandeplassche@gmail.com>, Copyright 2020-2022 Daan van Vugt <dvanvugt@ignitioncomputing.com>,
 and Copyright 2020 Dutch Institute for Fundamental Energy Research <info@differ.nl>.
 It is licensed under [LGPL 3.0](LICENSE.txt).

From 133f78c30803cdb4ad8e3afab9f10e8b652c0d58 Mon Sep 17 00:00:00 2001
From: Olivier Hoenen <Olivier.Hoenen@iter.org>
Date: Tue, 17 Dec 2024 14:56:13 +0100
Subject: [PATCH 28/29] Apply suggestion on naming

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 03f00ce5..9fc27d68 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
-# IMAS
+# IMAS-Python
 
-IMAS is a pure-python library to handle arbitrarily nested data structures.
-IMAS is designed for, but not necessarily bound to, interacting with Interface 
+IMAS-Python is a pure-python library to handle arbitrarily nested data structures.
+IMAS-Python is designed for, but not necessarily bound to, interacting with Interface 
 Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) 
 Data Model.
 
@@ -40,7 +40,7 @@ A quick 5 minutes introduction is available in the documentation generated from
 
 ## Legal
 
-IMAS is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de 
+IMAS-Python is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de 
 Plassche <karelvandeplassche@gmail.com>, Copyright 2020-2022 Daan van Vugt <dvanvugt@ignitioncomputing.com>,
 and Copyright 2020 Dutch Institute for Fundamental Energy Research <info@differ.nl>.
 It is licensed under [LGPL 3.0](LICENSE.txt).

From 6f871f5b98f268b5329310fcd0e572c109cb6539 Mon Sep 17 00:00:00 2001
From: Gautam raj <gautamraj8044@gmail.com>
Date: Tue, 17 Dec 2024 19:32:31 +0530
Subject: [PATCH 29/29] Update README.md

Co-authored-by: Simon Pinches <SimonPinches@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9fc27d68..14d4b81e 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # IMAS-Python
 
 IMAS-Python is a pure-python library to handle arbitrarily nested data structures.
-IMAS-Python is designed for, but not necessarily bound to, interacting with Interface 
+It is designed for, but not necessarily bound to, interacting with Interface 
 Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) 
 Data Model.