Merge pull request #66 from easyScience/dataset-to-datagroup

Latest Scipp (Dataset -> DataGroup)
EasyScience · Jan 11, 2024 · e759eff · e759eff
2 parents eab0d31 + d89117e
commit e759eff
Show file tree

Hide file tree

Showing 14 changed files with 132 additions and 116 deletions.
diff --git a/.github/workflows/ci_pip.yml b/.github/workflows/ci_pip.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11']
+        python-version: ['3.9', '3.10', '3.11']
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     runs-on: ${{ matrix.os }}

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11']
+        python-version: ['3.9', '3.10', '3.11']
     if: "!contains(github.event.head_commit.message, '[ci skip]')"
 
     steps:

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
@@ -102,7 +102,7 @@ Before you submit a pull request, check that it meets these guidelines:
 2. If the pull request adds functionality, the docs should be updated. Put
    your new functionality into a function with a docstring, and add the
    feature to the list in README.rst.
-3. The pull request should work for Python 3.8, 3.9, and 3.10, and for PyPy. Check
+3. The pull request should work for Python 3.9, 3.10, and 3.11, and for PyPy. Check
    https://travis-ci.com/easyScience/EasyReflectometryLib/pull_requests
    and make sure that the tests pass for all supported Python versions.
 

diff --git a/EasyReflectometry/data.py b/EasyReflectometry/data.py
@@ -6,60 +6,66 @@
 from orsopy.fileio import orso, Header
 
 
-def load(fname: Union[TextIO, str]) -> sc.Dataset:
+def load(fname: Union[TextIO, str]) -> sc.DataGroup:
     """
     Load data from an ORSO .ort file.
 
     :param fname: The file to be read.
 
-    :return: A scipp Dataset for the loaded datasets.
+    :return: A scipp DataGroup for the loaded datasets.
     """
     try:
         return _load_orso(fname)
     except (IndexError, ValueError):
         return _load_txt(fname)
 
 
-def _load_orso(fname: Union[TextIO, str]) -> sc.Dataset:
+def _load_orso(fname: Union[TextIO, str]) -> sc.DataGroup:
     """
     Load from an ORSO compatible file.
 
     :param fname: The path for the file to be read.
 
-    :return: A populated scipp dataset.
+    :return: A populated scipp DataGroup.
     """
     data = {}
+    coords = {}
+    attrs = {}
     f_data = orso.load_orso(fname)
     for i, o in enumerate(f_data):
         name = i
         if o.info.data_set is not None:
             name = o.info.data_set
-        coords = {}
-        coords[f'Qz_{name}'] = sc.array(dims=[f'{o.info.columns[0].name}_{name}'],
-                                        values=o.data[:, 0],
-                                        variances=np.square(o.data[:, 3]),
-                                        unit=sc.Unit(o.info.columns[0].unit))
+        coords[f'Qz_{name}'] = sc.array(
+            dims=[f'{o.info.columns[0].name}_{name}'],
+            values=o.data[:, 0],
+            variances=np.square(o.data[:, 3]),
+            unit=sc.Unit(o.info.columns[0].unit)
+        )
         try:
-            ordinate = sc.array(dims=[f'{o.info.columns[0].name}_{name}'],
-                                values=o.data[:, 1],
-                                variances=np.square(o.data[:, 2]),
-                                unit=sc.Unit(o.info.columns[1].unit))
+            data[f'R_{name}'] = sc.array(
+                dims=[f'{o.info.columns[0].name}_{name}'],
+                values=o.data[:, 1],
+                variances=np.square(o.data[:, 2]),
+                unit=sc.Unit(o.info.columns[1].unit)
+            )
         except TypeError:
-            ordinate = sc.array(dims=[f'{o.info.columns[0].name}_{name}'],
-                                values=o.data[:, 1],
-                                variances=np.square(o.data[:, 2]))
-        attrs = {'orso_header': sc.scalar(Header.asdict(o.info))}
-        data[f'R_{name}'] = sc.DataArray(data=ordinate, coords=coords, attrs=attrs)
-    return sc.Dataset(data=data)
+            data[f'R_{name}'] = sc.array(
+                dims=[f'{o.info.columns[0].name}_{name}'],
+                values=o.data[:, 1],
+                variances=np.square(o.data[:, 2])
+            )
+        attrs[f'R_{name}'] = {'orso_header': sc.scalar(Header.asdict(o.info))}
+    return sc.DataGroup(data=data, coords=coords, attrs=attrs)
 
 
-def _load_txt(fname: Union[TextIO, str]) -> sc.Dataset:
+def _load_txt(fname: Union[TextIO, str]) -> sc.DataGroup:
     """
     Load data from a simple txt file.
 
     :param fname: The path for the file to be read.
 
-    :return: A populated scipp dataset.
+    :return: A populated scipp DataGroup.
     """
     f_data = np.loadtxt(fname)
     data = {
@@ -68,9 +74,11 @@ def _load_txt(fname: Union[TextIO, str]) -> sc.Dataset:
     }
     coords = {
         data['R_0'].dims[0]:
-        sc.array(dims=['Qz_0'],
-                 values=f_data[:, 0],
-                 variances=np.square(f_data[:, 3]),
-                 unit=sc.Unit('1/angstrom'))
+        sc.array(
+            dims=['Qz_0'],
+            values=f_data[:, 0],
+            variances=np.square(f_data[:, 3]),
+            unit=sc.Unit('1/angstrom')
+        )
     }
-    return sc.Dataset(data=data, coords=coords)
+    return sc.DataGroup(data=data, coords=coords)
diff --git a/EasyReflectometry/fitting.py b/EasyReflectometry/fitting.py
@@ -13,7 +13,7 @@ class Fitter:
     def __init__(self, *args: Model):
         """
         A convinence class for the :py:class:`easyCore.Fitting.Fitting`
-        which will populate the :py:class:`sc.Dataset` appropriately
+        which will populate the :py:class:`sc.DataGroup` appropriately
         after the fitting is performed.
 
         :param model: Reflectometry model
@@ -32,35 +32,41 @@ def wrapped(*args, **kwargs):
         self._models = args
         self.easy_f = easyFitter(args, self._fit_func)
 
-    def fit(self, data: sc.Dataset, method: str = 'least_squares', id=0):
+    def fit(self, data: sc.DataGroup, method: str = 'least_squares', id: int = 0) -> sc.DataGroup:
         """
-        Perform the fitting and populate the datasets with the result.
+        Perform the fitting and populate the DataGroups with the result.
 
-        :param data: Dataset to be fitted to and populated
+        :param data: DataGroup to be fitted to and populated
         :param method: Optimisation method
         """
-        refl_nums = [k[3:] for k, v in data.coords.items() if 'Qz' == k[:2]]
-        x = [data.coords[f'Qz_{i}'].values for i in refl_nums]
-        y = [data[f'R_{i}'].data.values for i in refl_nums]
-        dy = [1 / np.sqrt(data[f'R_{i}'].data.variances) for i in refl_nums]
+        refl_nums = [k[3:] for k in data['coords'].keys() if 'Qz' == k[:2]]
+        x = [data['coords'][f'Qz_{i}'].values for i in refl_nums]
+        y = [data['data'][f'R_{i}'].values for i in refl_nums]
+        dy = [1 / np.sqrt(data['data'][f'R_{i}'].variances) for i in refl_nums]
         result = self.easy_f.fit(x, y, weights=dy, method=method)
         new_data = data.copy()
         for i, _ in enumerate(result):
             id = refl_nums[i]
-            new_data[f'R_{id}_model'] = sc.array(dims=[f'Qz_{id}'],
-                                                 values=self._fit_func[i](
-                                                     data.coords[f'Qz_{id}'].values))
+            new_data[f'R_{id}_model'] = sc.array(
+                dims=[f'Qz_{id}'],
+                values=self._fit_func[i](
+                    data['coords'][f'Qz_{id}'].values)
+            )
             sld_profile = self.easy_f._fit_objects[i].interface.sld_profile(
                 self._models[i].uid)
-            new_data[f'SLD_{id}'] = sc.array(dims=[f'z_{id}'],
-                                             values=sld_profile[1] * 1e-6,
-                                             unit=sc.Unit('1/angstrom')**2)
-            new_data[f'R_{id}_model'].attrs['model'] = sc.scalar(
-                self._models[i].as_dict())
-            new_data.coords[f'z_{id}'] = sc.array(
+            new_data[f'SLD_{id}'] = sc.array(
+                dims=[f'z_{id}'],
+                values=sld_profile[1] * 1e-6,
+                unit=sc.Unit('1/angstrom')**2
+            )
+            new_data['attrs'][f'R_{id}_model'] = {
+                'model' : sc.scalar(self._models[i].as_dict())
+            }
+            new_data['coords'][f'z_{id}'] = sc.array(
                 dims=[f'z_{id}'],
                 values=sld_profile[0],
-                unit=(1 / new_data.coords[f'Qz_{id}'].unit).unit)
+                unit=(1 / new_data['coords'][f'Qz_{id}'].unit).unit
+            )
         return new_data
 
 

diff --git a/EasyReflectometry/measurement/data.py b/EasyReflectometry/measurement/data.py
@@ -5,5 +5,5 @@
 from orsopy.fileio import orso
 
 
-def load(fname: Union[TextIO, str]) -> sc.Dataset:
+def load(fname: Union[TextIO, str]) -> sc.DataGroup:
     return orso.load_orso(fname)
diff --git a/EasyReflectometry/plot.py b/EasyReflectometry/plot.py
@@ -7,13 +7,11 @@
 color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
 
 
-def plot(data: sc.Dataset):
+def plot(data: sc.DataGroup) -> None:
     """
     A general plotting function for EasyReflectometry.
 
-    :param data: the Dataset to be plotted.
-
-    :returns: The plot canvas.
+    :param data: the DataGroup to be plotted.
     """
     if len([i for i in list(data.keys()) if 'SLD' in i]) == 0:
         plot_sld = False
@@ -25,25 +23,33 @@ def plot(data: sc.Dataset):
         gs = GridSpec(2, 1, figure=fig)
         ax2 = fig.add_subplot(gs[1, 0])
     ax1 = fig.add_subplot(gs[0, 0])
-    refl_nums = [k[3:] for k, v in data.coords.items() if 'Qz' == k[:2]]
+    refl_nums = [k[3:] for k in data['coords'].keys() if 'Qz' == k[:2]]
     for i, refl_num in enumerate(refl_nums):
-        copy = data[f'R_{refl_num}'].copy()
-        copy.data *= sc.scalar(10.**i, unit=copy.unit)
-        copy.coords[f'Qz_{refl_num}'].variances = None
+        plot_data = sc.DataArray(
+            name=f'R_{refl_num}',
+            data=data['data'][f'R_{refl_num}'].copy(),
+            coords={f'Qz_{refl_num}': data['coords'][f'Qz_{refl_num}'].copy()}
+        )
+        plot_data.data *= sc.scalar(10.**i, unit=plot_data.unit)
+        plot_data.coords[f'Qz_{refl_num}'].variances = None
         sc.plot(
-            copy,
+            plot_data,
             ax=ax1,
             norm='log',
             linestyle='',
             marker='.',
             color=color_cycle[i]
         )
         try:
-            copy = data[f'R_{refl_num}_model'].copy()
-            copy.data *= sc.scalar(10.**float(i))
-            copy.coords[f'Qz_{refl_num}'].variances = None
+            plot_model_data = sc.DataArray(
+                name=f'R_{refl_num}_model',
+                data=data[f'R_{refl_num}_model'].copy(),
+                coords={f'Qz_{refl_num}': data['coords'][f'Qz_{refl_num}'].copy()}
+            )
+            plot_model_data.data *= sc.scalar(10.**i, unit=plot_model_data.unit)
+            plot_model_data.coords[f'Qz_{refl_num}'].variances = None
             sc.plot(
-                copy,
+                plot_model_data,
                 ax=ax1,
                 norm='log',
                 linestyle='--',
@@ -58,10 +64,13 @@ def plot(data: sc.Dataset):
 
     if plot_sld:
         for i, refl_num in enumerate(refl_nums):
-            copy = data[f'SLD_{refl_num}'].copy()
-            copy.data += sc.scalar(10. * i, unit=copy.unit)
+            plot_sld_data = sc.DataArray(
+                name=f'SLD_{refl_num}',
+                data=data[f'SLD_{refl_num}'].copy(),
+                coords={f'z_{refl_num}': data['coords'][f'z_{refl_num}'].copy()}
+            )
             sc.plot(
-                data[f'SLD_{refl_num}'],
+                plot_sld_data,
                 ax=ax2,
                 linestyle='-',
                 color=color_cycle[i],

diff --git a/docs/monolayer.ipynb b/docs/monolayer.ipynb
@@ -296,7 +296,7 @@
    "outputs": [],
    "source": [
     "structure = Structure.from_pars(air_layer, dspc, d2o_layer)\n",
-    "model = Model.from_pars(structure, 1, data['R_0'].data.values.min(), 5)"
+    "model = Model.from_pars(structure, 1, data['data']['R_0'].values.min(), 5)"
    ]
   },
   {
@@ -451,7 +451,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,

diff --git a/docs/multi_contrast.ipynb b/docs/multi_contrast.ipynb
@@ -315,9 +315,9 @@
     "d13d2o_structure = Structure.from_pars(air_layer, d13d2o, d2o_layer)\n",
     "d70d2o_structure = Structure.from_pars(air_layer, d70d2o, d2o_layer)\n",
     "d83acmw_structure = Structure.from_pars(air_layer, d83acmw, acmw_layer)\n",
-    "d13d2o_model = Model.from_pars(d13d2o_structure, 0.1, data['R_d13DSPC-D2O'].data.values.min(), 5)\n",
-    "d70d2o_model = Model.from_pars(d70d2o_structure, 0.1, data['R_d70DSPC-D2O'].data.values.min(), 5)\n",
-    "d83acmw_model = Model.from_pars(d83acmw_structure, 0.1, data['R_d83DSPC-ACMW'].data.values.min(), 5)"
+    "d13d2o_model = Model.from_pars(d13d2o_structure, 0.1, data['data']['R_d13DSPC-D2O'].values.min(), 5)\n",
+    "d70d2o_model = Model.from_pars(d70d2o_structure, 0.1, data['data']['R_d70DSPC-D2O'].values.min(), 5)\n",
+    "d83acmw_model = Model.from_pars(d83acmw_structure, 0.1, data['data']['R_d83DSPC-ACMW'].values.min(), 5)"
    ]
   },
   {
@@ -448,7 +448,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,

diff --git a/docs/repeating.ipynb b/docs/repeating.ipynb
@@ -321,7 +321,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,

diff --git a/docs/simple_fitting.ipynb b/docs/simple_fitting.ipynb
@@ -519,7 +519,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,

diff --git a/docs/solvation.ipynb b/docs/solvation.ipynb
@@ -343,7 +343,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,

diff --git a/pyproject.toml b/pyproject.toml
@@ -23,12 +23,12 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Development Status :: 3 - Alpha"
 ]
-requires-python = ">=3.8,<3.12"
+requires-python = ">=3.9,<3.12"
 dependencies = [
     "easyScienceCore @ git+https://github.com/easyscience/easycore.git@failed_unit_check",
     "refnx>=0.1.15",
     "refl1d>=0.8.14",
-    "scipp==23.08.0",
+    "scipp>=23.12.0",
     "orsopy>=0.0.4"
 ]
 
@@ -75,7 +75,6 @@ isolated_build = True
 envlist = py{38,39,310,311}
 [gh-actions]
 python =
-    3.8: py38
     3.9: py39
     3.10: py310
     3.11: py311