From 9890f1c7fbdcd8ce59d459802ef45748502bdede Mon Sep 17 00:00:00 2001 From: Philippe THOMY Date: Tue, 21 May 2024 11:28:37 +0200 Subject: [PATCH] add pre-commit --- .../-use--custom-issue-template.md | 2 - .github/workflows/ci.yml | 12 +- .pre-commit-config.yaml | 28 + README.md | 54 +- __init__.py | 2 +- docs/release.rst | 6 +- example/ex_ndarray.ntv | 2 +- example/interop.mmd | 2 +- example/struc.mmd | 2 +- ntv_numpy/__init__.py | 28 +- ntv_numpy/data_array.py | 110 +- ntv_numpy/ndarray.py | 374 +++--- ntv_numpy/ndtype.py | 28 +- ntv_numpy/ntv_numpy.ini | 34 +- ntv_numpy/numpy_ntv_connector.py | 135 ++- ntv_numpy/xarray_accessors.py | 10 +- ntv_numpy/xconnector.py | 478 ++++---- ntv_numpy/xdataset.py | 397 ++++--- ntv_numpy/xndarray.py | 168 +-- pyproject.toml | 23 + setup.cfg | 2 +- setup.py | 9 +- tests/tests_ntv_numpy.py | 1015 ++++++++++------- 23 files changed, 1691 insertions(+), 1230 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 pyproject.toml diff --git a/.github/ISSUE_TEMPLATE/-use--custom-issue-template.md b/.github/ISSUE_TEMPLATE/-use--custom-issue-template.md index 19b0950..030c4ea 100644 --- a/.github/ISSUE_TEMPLATE/-use--custom-issue-template.md +++ b/.github/ISSUE_TEMPLATE/-use--custom-issue-template.md @@ -6,5 +6,3 @@ labels: '' assignees: '' --- - - diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0950ced..dd5572b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,27 +20,27 @@ jobs: fail-fast: false matrix: python-version: ["3.10", "3.11"] - + steps: - uses: actions/checkout@v4 - + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - + - name: Install run: | python -m pip install --upgrade pip pip install -r ./ci/requirements.txt - + - name: Lint run: | - # stop if Python syntax errors or undefined names + # stop if Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # treats all errors as warnings flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - + - name: Test run: | python -m unittest discover -s ./tests -p 'tests_*.py' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..2818a1c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +exclude: '\\*.htm*' + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 # the release, git tag, or commit you want to use + hooks: + - id: check-toml + - id: check-yaml + - id: check-json + - id: check-docstring-first + - id: check-ast + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.4.4 + hooks: + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format + + #- repo: https://github.com/numpy/numpydoc + # rev: v1.6.0 + # hooks: + # - id: numpydoc-validation diff --git a/README.md b/README.md index 569f215..72b2a63 100644 --- a/README.md +++ b/README.md @@ -52,18 +52,18 @@ In [1]: example = { 'var1.variance': [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], 'var1.mask1': [[[True, False]], ['x']], 'var1.mask2': [[[2, 2], [True, False, False, True]]], - - 'var2': [['var2.ntv'], ['x', 'y']], - + + 'var2': [['var2.ntv'], ['x', 'y']], + 'x': [['string', ['23F0AE', '578B98']], {'test': 21}], 'y': [['date', ['2021-01-01', '2022-02-02']]], - + 'ranking': [['month', [2, 2], [1, 2, 3, 4]], ['var1']], 'z': [['float', [10, 20]], ['x']], 'z.uncertainty': [[[0.1, 0.2]]], - + 'z_bis': [[['z1_bis', 'z2_bis']]], - + 'info': {'path': 'https://github.com/loco-philippe/ntv-numpy/tree/main/example/', 'location': [['string', ['paris']]]} } @@ -136,20 +136,20 @@ Out[5]: True In [6]: x_dataframe = x_example.to_dataframe() print(x_example.to_dataframe(json_name=False)) print(x_xarray) -Out[6]: +Out[6]: ranking z z.uncertainty var1 var1.mask1 var1.mask2 \ -x y -23F0AE 2021-01-01 1 10.0 0.1 10.1 True True - 2022-02-02 2 10.0 0.1 0.4 True False -578B98 2021-01-01 3 20.0 0.2 3.4 False False - 2022-02-02 4 20.0 0.2 8.2 False True - - var1.variance location -x y -23F0AE 2021-01-01 0.1 paris - 2022-02-02 0.2 paris -578B98 2021-01-01 0.3 paris - 2022-02-02 0.4 paris +x y +23F0AE 2021-01-01 1 10.0 0.1 10.1 True True + 2022-02-02 2 10.0 0.1 0.4 True False +578B98 2021-01-01 3 20.0 0.2 3.4 False False + 2022-02-02 4 20.0 0.2 8.2 False True + + var1.variance location +x y +23F0AE 2021-01-01 0.1 paris + 2022-02-02 0.2 paris +578B98 2021-01-01 0.3 paris + 2022-02-02 0.4 paris ``` Reversibility: @@ -203,9 +203,9 @@ In [1]: example = { 'LATPOLE': -72.05457184279, 'WCSNAME': 'IDC_qbu1641sj', 'MJDREF': 0.0, 'RADESYS': 'ICRS'}, 'psf': [['float[erg/s]', [1,2,3,4]]] } - } + } n_example = Xdataset.read_json(example) - n_example.info + n_example.info Out[1]: {'name': 'example', 'xtype': 'group', 'data_arrays': ['data', 'psf'], @@ -240,18 +240,18 @@ In [1]: example = { 'var1.variance': [[[2, 2], 'var1_variance.ntv']], 'var1.mask1': [['var1_mask1.ntv'], ['x']], 'var1.mask2': [[[2, 2], 'var1_mask2.ntv']], - - 'var2': [['var2.ntv'], ['x', 'y']], - + + 'var2': [['var2.ntv'], ['x', 'y']], + 'x': [['x.ntv'], {'test': 21}], 'y': [['date', 'y.ntv']], - + 'ranking': [['month', [2, 2], 'ranking.ntv'], ['var1']], 'z': [['float', 'z.ntv'], ['x']], 'z.uncertainty': [['z_uncertainty.ntv']], - + 'z_bis': [['z_bis.ntv']], - + 'info': {'path': 'https://github.com/loco-philippe/ntv-numpy/tree/main/example/'} } } diff --git a/__init__.py b/__init__.py index f51b2a5..2a579b6 100644 --- a/__init__.py +++ b/__init__.py @@ -3,4 +3,4 @@ @author: philippe@loco-labs.io """ -name = 'ntv_numpy' +name = "ntv_numpy" diff --git a/docs/release.rst b/docs/release.rst index b3dde6f..60957df 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -11,13 +11,13 @@ Version x.y.z - add NTVtype extension (Ndtype class) - add 'uniques' xndarray - interface: - - pandas.DataFrame + - pandas.DataFrame 0.1.2 alpha 1 (2024-04-10) --------------------------- - First version - exchanging via JSON format -- lightweight format (structure: json, arrays: URI) +- lightweight format (structure: json, arrays: URI) - numpy.dtype supported: all except 'complex' and 'timedelta' - NTVtype supported: - datetime, date, time, dat @@ -33,4 +33,4 @@ Version x.y.z - astropy.NDData - scipp.Dataset - Xarray.Dataset, Xarray.Dataarray - - JSON \ No newline at end of file + - JSON diff --git a/example/ex_ndarray.ntv b/example/ex_ndarray.ntv index 8dfd508..3089e11 100644 --- a/example/ex_ndarray.ntv +++ b/example/ex_ndarray.ntv @@ -1 +1 @@ -{":ndarray": ["int64[kg]", [2, 2], [[1, 2, 3, 4], [0, 1, 2, 3]]]} \ No newline at end of file +{":ndarray": ["int64[kg]", [2, 2], [[1, 2, 3, 4], [0, 1, 2, 3]]]} diff --git a/example/interop.mmd b/example/interop.mmd index f72d8fd..989e6f3 100644 --- a/example/interop.mmd +++ b/example/interop.mmd @@ -6,4 +6,4 @@ flowchart LR D[Scipp] <--lossless--> B C[NDData] <--lossless--> B B <--lossless--> E[JSON] - B <--lossless--> F[DataFrame] \ No newline at end of file + B <--lossless--> F[DataFrame] diff --git a/example/struc.mmd b/example/struc.mmd index 35358f9..ea4e2a3 100644 --- a/example/struc.mmd +++ b/example/struc.mmd @@ -31,4 +31,4 @@ flowchart LR classDef xnda fill:#99d,stroke:#009,stroke-width:2px,color:#808 classDef nda fill:#77b,stroke:#00e,stroke-width:2px,color:#808 classDef da fill:#55b,stroke:#00e,stroke-width:2px,color:#fff - classDef dataset stroke:#007,fill:#bbf,color:#808 \ No newline at end of file + classDef dataset stroke:#007,fill:#bbf,color:#808 diff --git a/ntv_numpy/__init__.py b/ntv_numpy/__init__.py index 14bb4f1..c06dc09 100644 --- a/ntv_numpy/__init__.py +++ b/ntv_numpy/__init__.py @@ -49,18 +49,22 @@ - `ntv-numpy.ntv_numpy.ndtype.Ndtype` -""" -# from pathlib import Path -# , read_json, to_json -from ntv_numpy.numpy_ntv_connector import XndarrayConnec, NdarrayConnec -from ntv_numpy.data_array import Dfull, Dcomplete, Darray, Dutil -from ntv_numpy.ndarray import Ndarray, Nutil -from ntv_numpy.xndarray import Xndarray -from ntv_numpy.xdataset import Xdataset -import ntv_numpy.xarray_accessors -# import ntv_pandas.pandas_ntv_connector +- `ntv-numpy.ntv_numpy.xarray_accessors` : -# path = Path(ntv_numpy.numpy_ntv_connector.__file__).parent + - `ntv-numpy.ntv_numpy.xarray_accessors.NxrDatasetAccessor` +""" -# print('package :', __package__) +# from pathlib import Path +# , read_json, to_json +from ntv_numpy.numpy_ntv_connector import XndarrayConnec as XndarrayConnec +from ntv_numpy.numpy_ntv_connector import NdarrayConnec as NdarrayConnec +from ntv_numpy.data_array import Dfull as Dfull +from ntv_numpy.data_array import Dcomplete as Dcomplete +from ntv_numpy.data_array import Darray as Darray +from ntv_numpy.data_array import Dutil as Dutil +from ntv_numpy.ndarray import Nutil as Nutil +from ntv_numpy.ndarray import Ndarray as Ndarray +from ntv_numpy.xndarray import Xndarray as Xndarray +from ntv_numpy.xdataset import Xdataset as Xdataset +from ntv_numpy.xarray_accessors import NxrDatasetAccessor as NxrDatasetAccessor diff --git a/ntv_numpy/data_array.py b/ntv_numpy/data_array.py index 723623d..99ca928 100644 --- a/ntv_numpy/data_array.py +++ b/ntv_numpy/data_array.py @@ -12,6 +12,7 @@ [user guide](https://loco-philippe.github.io/ntv-numpy/docs/user_guide.html) or the [github repository](https://github.com/loco-philippe/ntv-numpy). """ + from abc import ABC, abstractmethod import json import numpy as np @@ -20,7 +21,7 @@ class Darray(ABC): - ''' The Darray class is an abstract class used by `Dfull`and `Dcomplete` classes. + """The Darray class is an abstract class used by `Dfull`and `Dcomplete` classes. *Attributes :* - **data** : np.ndarray - data after coding @@ -33,10 +34,10 @@ class Darray(ABC): *methods* - `read_json` (staticmethod) - `to_json` - ''' + """ def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False): - '''Darray constructor. + """Darray constructor. *Parameters* @@ -44,7 +45,7 @@ def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False): - **ref** : String or integer (default None) - name or index of another Darray - **coding**: List of integer (default None) - mapping between data and the list of values - **dtype**: string (default None) - numpy.dtype to apply - ''' + """ if isinstance(data, Darray): self.data = data.data self.ref = data.ref @@ -52,7 +53,7 @@ def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False): return data = data if isinstance(data, (list, np.ndarray)) else [data] if (len(data) > 0 and isinstance(data[0], (list, np.ndarray))) or unidim: - dtype = data.dtype if isinstance(data, np.ndarray) else 'object' + dtype = data.dtype if isinstance(data, np.ndarray) else "object" self.data = np.fromiter(data, dtype=dtype) else: self.data = np.array(data, dtype=dtype).reshape(-1) @@ -60,27 +61,27 @@ def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False): self.coding = np.array(coding) def __repr__(self): - '''return classname and number of value''' - return self.__class__.__name__ + '[' + str(len(self)) + ']' + """return classname and number of value""" + return self.__class__.__name__ + "[" + str(len(self)) + "]" def __str__(self): - '''return json string format''' + """return json string format""" return json.dumps(self.to_json()) def __eq__(self, other): - ''' equal if values are equal''' + """equal if values are equal""" return np.array_equal(self.values, other.values, equal_nan=False) def __len__(self): - ''' len of values''' + """len of values""" return self._len_val def __contains__(self, item): - ''' item of values''' + """item of values""" return item in self.values def __getitem__(self, ind): - ''' return value item''' + """return value item""" if isinstance(ind, tuple): return [self.values[i] for i in ind] # return [copy(self.values[i]) for i in ind] @@ -88,54 +89,58 @@ def __getitem__(self, ind): # return copy(self.values[ind]) def __copy__(self): - ''' Copy all the data ''' + """Copy all the data""" return self.__class__(self) @staticmethod def read_json(val, dtype=None, unidim=False): - ''' return a Darray entity from a list of data. + """return a Darray entity from a list of data. *Parameters* - **val**: list of data - **dtype** : string (default None) - numpy.dtype to apply - ''' + """ val = val if isinstance(val, list) else [val] if not val or not isinstance(val[0], list): return Dfull(val, dtype=dtype, unidim=unidim) match val: - case [data, ref, list(coding)] if (isinstance(ref, (int, str)) and - isinstance(coding[0], int) and - max(coding) < len(data)): + case [data, ref, list(coding)] if ( + isinstance(ref, (int, str)) + and isinstance(coding[0], int) + and max(coding) < len(data) + ): return None - case [data, ref] if (isinstance(data, list) and - isinstance(ref, (int, str))): + case [data, ref] if ( + isinstance(data, list) and isinstance(ref, (int, str)) + ): return None case [data, list(coef)] if len(coef) == 1: return None - case [data, list(coding)] if (isinstance(coding[0], int) and - max(coding) < len(data)): + case [data, list(coding)] if ( + isinstance(coding[0], int) and max(coding) < len(data) + ): return Dcomplete(data, None, coding, dtype=dtype, unidim=unidim) case _: return Dfull(val, dtype=dtype, unidim=unidim) @abstractmethod def to_json(self): - ''' return a JsonValue''' + """return a JsonValue""" @property @abstractmethod def values(self): - ''' return the list of values''' + """return the list of values""" @property @abstractmethod def _len_val(self): - '''return the length of the entity''' + """return the length of the entity""" class Dfull(Darray): - ''' Representation of a one dimensional Array with full representation + """Representation of a one dimensional Array with full representation *dynamic values (@property)* - `values` @@ -143,10 +148,10 @@ class Dfull(Darray): *methods* - `read_json` (staticmethod) - `to_json` - ''' + """ def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False): - '''Dfull constructor. + """Dfull constructor. *Parameters* @@ -154,26 +159,26 @@ def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False): - **ref** : unused - **coding**: unused - **dtype**: string (default None) - numpy.dtype to apply - ''' + """ super().__init__(data, dtype=dtype, unidim=unidim) def to_json(self): - ''' return a JsonValue of the Dfull entity.''' + """return a JsonValue of the Dfull entity.""" return Dutil.list_json(self.data) @property def values(self): - ''' return the list of values''' + """return the list of values""" return self.data @property def _len_val(self): - '''return the length of the Dfull entity''' + """return the length of the Dfull entity""" return len(self.data) if self.data.ndim > 0 else 0 class Dcomplete(Darray): - ''' Representation of a one dimensional Array with full representation + """Representation of a one dimensional Array with full representation *dynamic values (@property)* - `values` @@ -181,10 +186,10 @@ class Dcomplete(Darray): *methods* - `read_json` (staticmethod) - `to_json` - ''' + """ def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False): - '''Dcomplete constructor. + """Dcomplete constructor. *Parameters* @@ -192,33 +197,36 @@ def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False): - **ref** : unused - **coding**: List of integer (default None) - mapping between data and the list of values - **dtype**: string (default None) - numpy.dtype to apply - ''' + """ if coding is None: try: data, coding = np.unique(data, return_inverse=True) except (TypeError, ValueError): - dat, idx, coding = np.unique(np.frompyfunc(Ntv.from_obj, 1, 1)(data), - return_index=True, return_inverse=True) + dat, idx, coding = np.unique( + np.frompyfunc(Ntv.from_obj, 1, 1)(data), + return_index=True, + return_inverse=True, + ) data = data[idx] super().__init__(data, coding=coding, dtype=dtype, unidim=unidim) def to_json(self): - ''' return a JsonValue of the Dcomplete entity.''' + """return a JsonValue of the Dcomplete entity.""" return [Dutil.list_json(self.data), self.coding.tolist()] @property def values(self): - ''' return the list of values''' + """return the list of values""" return self.data[self.coding] @property def _len_val(self): - '''return the length of the Dcomplete entity''' + """return the length of the Dcomplete entity""" return len(self.coding) if self.coding.ndim > 0 else 0 class Dutil: - '''np.ndarray utilities. + """np.ndarray utilities. *static methods* - `convert` @@ -232,11 +240,11 @@ class Dutil: - `json_ntv` - `split_name` - `split_json_name` - ''' + """ @staticmethod def equals(nself, nother): - '''return True if all elements are equals and dtype are equal''' + """return True if all elements are equals and dtype are equal""" if not (isinstance(nself, np.ndarray) and isinstance(nother, np.ndarray)): return False if nself.dtype != nother.dtype or nself.shape != nother.shape: @@ -248,11 +256,13 @@ def equals(nself, nother): if len(nself) == 0: return True if isinstance(nself[0], (np.ndarray, pd.Series, pd.DataFrame)): - SeriesConnec = NtvConnector.connector().get('SeriesConnec') - DataFrameConnec = NtvConnector.connector().get('DataFrameConnec') - equal = {np.ndarray: Dutil.equals, - pd.Series: SeriesConnec.equals, - pd.DataFrame: DataFrameConnec.equals} + SeriesConnec = NtvConnector.connector().get("SeriesConnec") + DataFrameConnec = NtvConnector.connector().get("DataFrameConnec") + equal = { + np.ndarray: Dutil.equals, + pd.Series: SeriesConnec.equals, + pd.DataFrame: DataFrameConnec.equals, + } for nps, npo in zip(nself, nother): if not equal[type(nself[0])](nps, npo): return False @@ -261,7 +271,7 @@ def equals(nself, nother): @staticmethod def list_json(nda): - '''return a JSON representation of a unidimensional np.ndarray''' + """return a JSON representation of a unidimensional np.ndarray""" if len(nda) == 0: return [] if isinstance(nda[0], np.ndarray): diff --git a/ntv_numpy/ndarray.py b/ntv_numpy/ndarray.py index 4909686..d915158 100644 --- a/ntv_numpy/ndarray.py +++ b/ntv_numpy/ndarray.py @@ -26,16 +26,16 @@ class Ndarray: - ''' The Ndarray class is the JSON interface of numpy.ndarrays. + """The Ndarray class is the JSON interface of numpy.ndarrays. *static methods* - `read_json` - `to_json` - `set_shape` - ''' + """ def __init__(self, dar, ntv_type=None, shape=None, str_uri=True): - '''Ndarray constructor. + """Ndarray constructor. *Parameters* @@ -44,7 +44,7 @@ def __init__(self, dar, ntv_type=None, shape=None, str_uri=True): - **ntv_type**: string (default None) - NTVtype to apply - **str_uri**: boolean(default True) - if True and dar is a string, dar is an uri else a np.array - ''' + """ dar = [None] if isinstance(dar, list) and len(dar) == 0 else dar if isinstance(dar, Ndarray): self.uri = dar.uri @@ -74,20 +74,20 @@ def __init__(self, dar, ntv_type=None, shape=None, str_uri=True): self.darray = dar.astype(Nutil.dtype(str(self.ntvtype))) def __repr__(self): - '''return classname, the shape and the ntv_type''' - uri = self.uri if self.uri else '' - typ = self.ntv_type if self.ntv_type else '' - sha = str(self.shape) if self.shape else '' - u_t = ', ' if uri and typ + sha else '' - t_s = ', ' if typ and sha else '' - return self.__class__.__name__ + '(' + uri + u_t + typ + t_s + sha + ')' + """return classname, the shape and the ntv_type""" + uri = self.uri if self.uri else "" + typ = self.ntv_type if self.ntv_type else "" + sha = str(self.shape) if self.shape else "" + u_t = ", " if uri and typ + sha else "" + t_s = ", " if typ and sha else "" + return self.__class__.__name__ + "(" + uri + u_t + typ + t_s + sha + ")" def __str__(self): - '''return json string format''' + """return json string format""" return json.dumps(self.to_json()) def __eq__(self, other): - ''' equal if attributes are equal''' + """equal if attributes are equal""" if self.ntv_type != other.ntv_type: return False if self.uri != other.uri: @@ -101,15 +101,15 @@ def __eq__(self, other): return Dutil.equals(self.darray, other.darray) def __len__(self): - ''' len of ndarray''' + """len of ndarray""" return len(self.darray) if self.darray is not None else 0 def __contains__(self, item): - ''' item of darray values''' + """item of darray values""" return item in self.darray if self.darray is not None else None def __getitem__(self, ind): - ''' return darray value item''' + """return darray value item""" if self.darray is None: return None if isinstance(ind, tuple): @@ -117,43 +117,48 @@ def __getitem__(self, ind): return self.darray[ind] def __copy__(self): - ''' Copy all the data ''' + """Copy all the data""" return self.__class__(self) def __array__(self): - '''numpy array interface''' + """numpy array interface""" return self.ndarray @property def ntv_type(self): - ''' string representation of ntvtype''' + """string representation of ntvtype""" return str(self.ntvtype) if self.ntvtype else None @property def ndarray(self): - '''representation with a np.ndarray not flattened''' + """representation with a np.ndarray not flattened""" return self.darray.reshape(self.shape) if self.darray is not None else None def set_shape(self, shape): - '''update the shape''' + """update the shape""" if Ndarray.len_shape(shape) != len(self.darray): - raise NdarrayError( - "shape is not consistent with the ndarray length") + raise NdarrayError("shape is not consistent with the ndarray length") self.shape = list(shape) def update(self, nda, nda_uri=True): - '''update uri and darray and return the result (True, False) + """update uri and darray and return the result (True, False) *Parameters* - **nda** : string, list, np.ndarray, Ndarray - data to include - **nda_uri** : boolean (default True) - if True, existing shape and - ntv_type are not updated (but are created if not existing)''' - if not nda_uri and not (self.shape is None or nda.shape is None - ) and self.shape != nda.shape: + ntv_type are not updated (but are created if not existing)""" + if ( + not nda_uri + and not (self.shape is None or nda.shape is None) + and self.shape != nda.shape + ): return False - if not nda_uri and not (self.ntv_type is None or nda.ntv_type is None - ) and self.ntv_type != nda.ntv_type: + if ( + not nda_uri + and not (self.ntv_type is None or nda.ntv_type is None) + and self.ntv_type != nda.ntv_type + ): return False if nda_uri: len_s = self.len_shape(self.shape) @@ -164,24 +169,26 @@ def update(self, nda, nda_uri=True): else: self.ntvtype = nda.ntvtype if nda.ntv_type is not None else self.ntvtype self.shape = nda.shape if nda.shape is not None else self.shape - self.uri, self.darray = ( - nda.uri, None) if nda.uri else (None, nda.darray) + self.uri, self.darray = (nda.uri, None) if nda.uri else (None, nda.darray) return True def set_array(self, darray): - '''set a new darray and remove uri, return the result (True, False) + """set a new darray and remove uri, return the result (True, False) *Parameters* - - **darray** : list, np.ndarray, Ndarray - data to include''' + - **darray** : list, np.ndarray, Ndarray - data to include""" ndarray = Ndarray(darray) darray = ndarray.darray ntv_type = ndarray.ntv_type shape = ndarray.shape new_shape = shape if self.shape is None else self.shape new_ntv_type = ntv_type if self.ntv_type is None else self.ntv_type - if (len(darray) != Ndarray.len_shape(new_shape) or - new_ntv_type != ntv_type or new_shape != shape): + if ( + len(darray) != Ndarray.len_shape(new_shape) + or new_ntv_type != ntv_type + or new_shape != shape + ): return False self.uri = None self.darray = darray @@ -190,7 +197,7 @@ def set_array(self, darray): return True def set_uri(self, uri, no_ntv_type=False, no_shape=False): - '''set a new uri and remove ndarray and optionaly ntv_type and shape. + """set a new uri and remove ndarray and optionaly ntv_type and shape. Return the result (True, False) *Parameters* @@ -198,7 +205,7 @@ def set_uri(self, uri, no_ntv_type=False, no_shape=False): - **uri** : string - URI of the Ndarray - **no_ntv_type** : boolean (default False) - If True, ntv_type is None - **no_shape** : boolean (default False) - If True, shape is None - ''' + """ if not isinstance(uri, str) or not uri: return False self.uri = uri @@ -208,54 +215,60 @@ def set_uri(self, uri, no_ntv_type=False, no_shape=False): return True def to_ndarray(self): - '''representation with a np.ndarray not flattened''' + """representation with a np.ndarray not flattened""" return self.ndarray @property def mode(self): - '''representation mode of the darray/uri data (relative, absolute, - undefined, inconsistent)''' + """representation mode of the darray/uri data (relative, absolute, + undefined, inconsistent)""" match [self.darray, self.uri]: case [None, str()]: - return 'relative' + return "relative" case [None, None]: - return 'undefined' + return "undefined" case [_, None]: - return 'absolute' + return "absolute" case _: - return 'inconsistent' + return "inconsistent" @staticmethod def read_json(jsn, **kwargs): - ''' convert json ntv_value into a ndarray. + """convert json ntv_value into a ndarray. *Parameters* - **convert** : boolean (default True) - If True, convert json data with non Numpy ntv_type into data with python type - ''' - option = {'convert': True} | kwargs + """ + option = {"convert": True} | kwargs jso = json.loads(jsn) if isinstance(jsn, str) else jsn - ntv_value, = Ntv.decode_json(jso)[:1] + (ntv_value,) = Ntv.decode_json(jso)[:1] ntv_type = None shape = None match ntv_value[:-1]: - case []: ... - case [ntv_type, shape]: ... - case [str(ntv_type)]: ... - case [list(shape)]: ... + case []: + ... + case [ntv_type, shape]: + ... + case [str(ntv_type)]: + ... + case [list(shape)]: + ... unidim = shape is not None if isinstance(ntv_value[-1], str): return Ndarray(ntv_value[-1], shape=shape, ntv_type=ntv_type) - darray = Darray.read_json(ntv_value[-1], dtype=Nutil.dtype(ntv_type), - unidim=unidim) - darray.data = Nutil.convert(ntv_type, darray.data, tojson=False, - convert=option['convert']) + darray = Darray.read_json( + ntv_value[-1], dtype=Nutil.dtype(ntv_type), unidim=unidim + ) + darray.data = Nutil.convert( + ntv_type, darray.data, tojson=False, convert=option["convert"] + ) return Ndarray(darray.values, shape=shape, ntv_type=ntv_type) def to_json(self, **kwargs): - ''' convert a Ndarray into json-value + """convert a Ndarray into json-value *Parameters* @@ -265,41 +278,59 @@ def to_json(self, **kwargs): - **format** : string (default 'full') - representation format of the ndarray, - **encoded** : Boolean (default False) - json-value if False else json-text - **header** : Boolean (default True) - including ndarray type - ''' - option = {'format': 'full', 'header': True, 'encoded': False, - 'notype': False, 'noshape': True, 'novalue': False} | kwargs - if self.mode in ['undefined', 'inconsistent']: + """ + option = { + "format": "full", + "header": True, + "encoded": False, + "notype": False, + "noshape": True, + "novalue": False, + } | kwargs + if self.mode in ["undefined", "inconsistent"]: return None - if self.mode == 'absolute' and len(self.darray) == 0: + if self.mode == "absolute" and len(self.darray) == 0: return [[]] - shape = None if not self.shape or (len(self.shape) < 2 and - option['noshape']) else self.shape + shape = ( + None + if not self.shape or (len(self.shape) < 2 and option["noshape"]) + else self.shape + ) - if self.mode == 'relative': + if self.mode == "relative": js_val = self.uri else: - js_val = Nutil.ntv_val(self.ntv_type, self.darray, option['format'], - self.is_json) if not option['novalue'] else ['-'] - - lis = [self.ntv_type if not option['notype'] else None, shape, js_val] - return Nutil.json_ntv(None, 'ndarray', - [val for val in lis if val is not None], - header=option['header'], encoded=option['encoded']) + js_val = ( + Nutil.ntv_val( + self.ntv_type, self.darray, option["format"], self.is_json + ) + if not option["novalue"] + else ["-"] + ) + + lis = [self.ntv_type if not option["notype"] else None, shape, js_val] + return Nutil.json_ntv( + None, + "ndarray", + [val for val in lis if val is not None], + header=option["header"], + encoded=option["encoded"], + ) @property def info(self): - ''' infos of the Ndarray''' - inf = {'shape': self.shape} - inf['length'] = len(self) - inf['ntvtype'] = self.ntv_type - inf['shape'] = self.shape - inf['uri'] = self.uri + """infos of the Ndarray""" + inf = {"shape": self.shape} + inf["length"] = len(self) + inf["ntvtype"] = self.ntv_type + inf["shape"] = self.shape + inf["uri"] = self.uri return {key: val for key, val in inf.items() if val} @staticmethod def len_shape(shape): - '''return a length from a shape (product of dimensions)''' + """return a length from a shape (product of dimensions)""" if not shape: return 0 prod = 1 @@ -309,7 +340,7 @@ def len_shape(shape): class Nutil: - '''ntv-ndarray utilities. + """ntv-ndarray utilities. *static methods* - `convert` @@ -324,31 +355,44 @@ class Nutil: - `split_name` - `split_json_name` - ''' - CONNECTOR_DT = {'field': 'Series', 'tab': 'DataFrame'} - PYTHON_DT = {'array': 'list', 'time': 'datetime.time', - 'object': 'dict', 'null': 'NoneType', 'decimal64': 'Decimal', - 'ndarray': 'ndarray', 'narray': 'narray'} - LOCATION_DT = {'point': 'Point', - 'line': 'LineString', 'polygon': 'Polygon'} + """ + + CONNECTOR_DT = {"field": "Series", "tab": "DataFrame"} + PYTHON_DT = { + "array": "list", + "time": "datetime.time", + "object": "dict", + "null": "NoneType", + "decimal64": "Decimal", + "ndarray": "ndarray", + "narray": "narray", + } + LOCATION_DT = {"point": "Point", "line": "LineString", "polygon": "Polygon"} DT_CONNECTOR = {val: key for key, val in CONNECTOR_DT.items()} DT_PYTHON = {val: key for key, val in PYTHON_DT.items()} DT_LOCATION = {val: key for key, val in LOCATION_DT.items()} DT_NTVTYPE = DT_LOCATION | DT_CONNECTOR | DT_PYTHON - FORMAT_CLS = {'full': Dfull, 'complete': Dcomplete} - STRUCT_DT = {'Ntv': 'object', 'NtvSingle': 'object', 'NtvList': 'object'} - CONVERT_DT = {'object': 'object', 'array': 'object', 'json': 'object', - 'number': 'float', 'boolean': 'bool', 'null': 'object', - 'string': 'str', 'integer': 'int'} + FORMAT_CLS = {"full": Dfull, "complete": Dcomplete} + STRUCT_DT = {"Ntv": "object", "NtvSingle": "object", "NtvList": "object"} + CONVERT_DT = { + "object": "object", + "array": "object", + "json": "object", + "number": "float", + "boolean": "bool", + "null": "object", + "string": "str", + "integer": "int", + } @staticmethod def is_json(obj): - ''' check if obj is a json structure and return True if obj is a json-value + """check if obj is a json structure and return True if obj is a json-value *Parameters* - - **obj** : object to check''' + - **obj** : object to check""" if obj is None: return True is_js = NtvConnector.is_json @@ -370,7 +414,7 @@ def is_json(obj): @staticmethod def extend_array(arr, til, shap, order): - '''return a flattened np.ndarray extended in additional dimensions + """return a flattened np.ndarray extended in additional dimensions parameters: @@ -378,14 +422,14 @@ def extend_array(arr, til, shap, order): - til: integer - parameter to apply to np.tile function - shap: list of integer - shape of the array - order: list of integer - order of dimensions to apply - ''' + """ old_order = list(range(len(order))) arr_tab = np.tile(arr, til).reshape(shap) return np.moveaxis(arr_tab, old_order, order).flatten() @staticmethod def convert(ntv_type, nda, tojson=True, convert=True): - ''' convert np.ndarray with external NTVtype. + """convert np.ndarray with external NTVtype. *Parameters* @@ -394,19 +438,19 @@ def convert(ntv_type, nda, tojson=True, convert=True): - **tojson** : boolean (default True) - apply to json function - **convert** : boolean (default True) - If True, convert json data with non Numpy ntv_type into data with python type - ''' + """ dtype = Nutil.dtype(ntv_type) jtype = Nutil.dtype(ntv_type, convert=False) if tojson: match ntv_type: - case dat if Ndtype(dat).category == 'datation': + case dat if Ndtype(dat).category == "datation": return nda.astype(dtype).astype(jtype) - case 'base16': + case "base16": return nda.astype(dtype) - case 'time' | 'decimal64': + case "time" | "decimal64": return nda.astype(jtype) - case 'geojson': + case "geojson": return np.frompyfunc(ShapelyConnec.to_geojson, 1, 1)(nda) case _: return nda @@ -416,27 +460,30 @@ def convert(ntv_type, nda, tojson=True, convert=True): return nda case [_, False]: return nda.astype(jtype) - case ['time', _]: + case ["time", _]: return np.frompyfunc(datetime.time.fromisoformat, 1, 1)(nda) - case ['decimal64', _]: + case ["decimal64", _]: return np.frompyfunc(Decimal, 1, 1)(nda) - case ['narray', _]: + case ["narray", _]: nar = np.frompyfunc(Ndarray.read_json, 1, 1)(nda) return np.frompyfunc(Ndarray.to_ndarray, 1, 1)(nar) - case ['ndarray', _]: + case ["ndarray", _]: return np.frompyfunc(Ndarray.read_json, 1, 1)(nda) - case [('point' | 'line' | 'polygon' | 'geometry'), _]: + case [("point" | "line" | "polygon" | "geometry"), _]: return np.frompyfunc(ShapelyConnec.to_geometry, 1, 1)(nda) case [connec, _] if connec in Nutil.CONNECTOR_DT: - return np.fromiter([NtvConnector.uncast(nd, None, connec)[0] - for nd in nda], dtype='object') + return np.fromiter( + [NtvConnector.uncast(nd, None, connec)[0] for nd in nda], + dtype="object", + ) case _: return nda.astype(dtype) # float.fromhex(x.hex()) == x, bytes(bytearray.fromhex(x.hex())) == x + @staticmethod def ntv_val(ntv_type, nda, form, is_json=False): - ''' convert a np.ndarray into NTV json-value. + """convert a np.ndarray into NTV json-value. *Parameters* @@ -444,10 +491,11 @@ def ntv_val(ntv_type, nda, form, is_json=False): - **nda** : ndarray to be converted. - **form** : format of data ('full', 'complete', 'sparse', 'primary'). - **is_json** : boolean (defaut False) - True if nda data is Json data - ''' - if form == 'complete' and len(nda) < 2: + """ + if form == "complete" and len(nda) < 2: raise NdarrayError( - "complete format is not available with ndarray length < 2") + "complete format is not available with ndarray length < 2" + ) Format = Nutil.FORMAT_CLS[form] darray = Format(nda) ref = darray.ref @@ -455,16 +503,13 @@ def ntv_val(ntv_type, nda, form, is_json=False): if is_json: return Format(darray.data, ref=ref, coding=coding).to_json() match ntv_type: - case 'narray': - data = [Ndarray(nd).to_json(header=False) - for nd in darray.data] - case 'ndarray': - data = [Ndarray(nd).to_json(header=False) - for nd in darray.data] + case "narray": + data = [Ndarray(nd).to_json(header=False) for nd in darray.data] + case "ndarray": + data = [Ndarray(nd).to_json(header=False) for nd in darray.data] case connec if connec in Nutil.CONNECTOR_DT: - data = [NtvConnector.cast(nd, None, connec)[0] - for nd in darray.data] - case 'point' | 'line' | 'polygon' | 'geometry': + data = [NtvConnector.cast(nd, None, connec)[0] for nd in darray.data] + case "point" | "line" | "polygon" | "geometry": data = np.frompyfunc(ShapelyConnec.to_coord, 1, 1)(darray.data) case None: data = nda @@ -475,104 +520,103 @@ def ntv_val(ntv_type, nda, form, is_json=False): @staticmethod def add_ext(typ, ext): '''return extended type string: "typ[ext]"''' - ext = '[' + ext + ']' if ext else '' - return '' if not typ else typ + ext + ext = "[" + ext + "]" if ext else "" + return "" if not typ else typ + ext @staticmethod def split_type(typ): - '''return a tuple with typ and extension''' + """return a tuple with typ and extension""" if not isinstance(typ, str): return (None, None) - spl = typ.split('[', maxsplit=1) + spl = typ.split("[", maxsplit=1) return (spl[0], None) if len(spl) == 1 else (spl[0], spl[1][:-1]) @staticmethod def split_json_name(string, notnone=False): - '''return a tuple with name, ntv_type from string''' - null = '' if notnone else None - if not string or string == ':': + """return a tuple with name, ntv_type from string""" + null = "" if notnone else None + if not string or string == ":": return (null, null) - spl = string.rsplit(':', maxsplit=1) + spl = string.rsplit(":", maxsplit=1) if len(spl) == 1: return (string, null) - if spl[0] == '': + if spl[0] == "": return (null, spl[1]) - sp0 = spl[0][:-1] if spl[0][-1] == ':' else spl[0] - return (null if sp0 == '' else sp0, null if spl[1] == '' else spl[1]) + sp0 = spl[0][:-1] if spl[0][-1] == ":" else spl[0] + return (null if sp0 == "" else sp0, null if spl[1] == "" else spl[1]) @staticmethod def split_name(string): - '''return a list with name, add_name from string''' - if not string or string == '.': - return ['', ''] - spl = string.split('.', maxsplit=1) - spl = [spl[0], ''] if len(spl) < 2 else spl + """return a list with name, add_name from string""" + if not string or string == ".": + return ["", ""] + spl = string.split(".", maxsplit=1) + spl = [spl[0], ""] if len(spl) < 2 else spl return spl @staticmethod def ntv_type(dtype, ntv_type=None, ext=None): - ''' return ntv_type string from dtype, additional type and extension. + """return ntv_type string from dtype, additional type and extension. *Parameters* - **dtype** : string - dtype of the ndarray - **ntv_type** : string - additional type - **ext** : string - type extension - ''' - np_ntype = NP_NTYPE | Nutil.DT_NTVTYPE | { - 'int': 'int', 'object': 'object'} + """ + np_ntype = NP_NTYPE | Nutil.DT_NTVTYPE | {"int": "int", "object": "object"} if ntv_type: return Nutil.add_ext(ntv_type, ext) match dtype: - case string if string[:3] == 'str': - return Nutil.add_ext('string', ext) - case bytesxx if bytesxx[:5] == 'bytes': - return Nutil.add_ext('base16', ext) + case string if string[:3] == "str": + return Nutil.add_ext("string", ext) + case bytesxx if bytesxx[:5] == "bytes": + return Nutil.add_ext("base16", ext) case dtyp if dtyp in np_ntype: return Nutil.add_ext(np_ntype[dtyp], ext) - case date if date[:10] == 'datetime64': - return 'datetime' + date[10:] - case delta if delta[:11] == 'timedelta64': - return 'timedelta' + delta[11:] + case date if date[:10] == "datetime64": + return "datetime" + date[10:] + case delta if delta[:11] == "timedelta64": + return "timedelta" + delta[11:] case _: return Nutil.add_ext(dtype, ext) @staticmethod def nda_ntv_type(nda, ntv_type=None, ext=None): - '''return ntv_type string from an ndarray, additional type and extension. + """return ntv_type string from an ndarray, additional type and extension. *Parameters* - **nda** : ndarray - data used to calculate the ntv_type - **ntv_type** : string - additional type - **ext** : string - type extension - ''' + """ if ntv_type or nda is None: return ntv_type dtype = nda.dtype.name pytype = nda.flat[0].__class__.__name__ - dtype = pytype if dtype == 'object' and pytype not in Nutil.STRUCT_DT else dtype + dtype = pytype if dtype == "object" and pytype not in Nutil.STRUCT_DT else dtype return Nutil.ntv_type(dtype, ntv_type, ext) @staticmethod def dtype(ntv_type, convert=True): - ''' return dtype from ntv_type + """return dtype from ntv_type *parameters* - **convert** : boolean (default True) - if True, dtype if from converted data - ''' + """ if not ntv_type: return None if convert: - if ntv_type[:8] == 'datetime' and ntv_type[8:]: - return 'datetime64' + ntv_type[8:] + if ntv_type[:8] == "datetime" and ntv_type[8:]: + return "datetime64" + ntv_type[8:] return Ndtype(ntv_type).dtype return Nutil.CONVERT_DT[Ndtype(ntv_type).json_type] @staticmethod def json_ntv(ntv_name, ntv_type, ntv_value, **kwargs): - ''' return the JSON representation of a NTV entity + """return the JSON representation of a NTV entity *parameters* @@ -581,18 +625,18 @@ def json_ntv(ntv_name, ntv_type, ntv_value, **kwargs): - **ntv_value** : string - Json value of the NTV - **encoded** : boolean (default False) - if True return JsonText else JsonValue - **header** : boolean (default True) - if True include ntv_name + ntv_type - ''' - name = ntv_name if ntv_name else '' - option = {'encoded': False, 'header': True} | kwargs - if option['header'] or name: - typ = ':' + ntv_type if option['header'] and ntv_type else '' + """ + name = ntv_name if ntv_name else "" + option = {"encoded": False, "header": True} | kwargs + if option["header"] or name: + typ = ":" + ntv_type if option["header"] and ntv_type else "" jsn = {name + typ: ntv_value} if name + typ else ntv_value else: jsn = ntv_value - if option['encoded']: + if option["encoded"]: return json.dumps(jsn) return jsn class NdarrayError(Exception): - '''Multidimensional exception''' + """Multidimensional exception""" diff --git a/ntv_numpy/ndtype.py b/ntv_numpy/ndtype.py index b48a98c..4936623 100644 --- a/ntv_numpy/ndtype.py +++ b/ntv_numpy/ndtype.py @@ -11,6 +11,7 @@ [user guide](https://loco-philippe.github.io/ntv-numpy/docs/user_guide.html) or the [github repository](https://github.com/loco-philippe/ntv-numpy). """ + import configparser from pathlib import Path import json @@ -19,7 +20,7 @@ class Ndtype(Datatype): - ''' The Ndtype is a child class of Datatype with additional attributes. + """The Ndtype is a child class of Datatype with additional attributes. *Additional attributes :* - add_type: additional data added to the JSON ndarray @@ -35,19 +36,22 @@ class Ndtype(Datatype): The methods defined in this class are : - `read_ini` (static method) - ''' + """ + @staticmethod def read_ini(): - '''return a dict with config data read in ntv_numpy.ini''' + """return a dict with config data read in ntv_numpy.ini""" config = configparser.ConfigParser() - p_file = Path('ntv_numpy.ini') + p_file = Path("ntv_numpy.ini") config.read(Path(ntv_numpy.__file__).parent / p_file) - types = json.loads(config['data']['types']) - return {ntv_type: {'add_type': add_type, 'dtype': dtype} - for [ntv_type, add_type, dtype] in types} + types = json.loads(config["data"]["types"]) + return { + ntv_type: {"add_type": add_type, "dtype": dtype} + for [ntv_type, add_type, dtype] in types + } def __init__(self, full_name, module=False, force=False, validate=None): - '''NdType constructor. + """NdType constructor. *Parameters* @@ -55,12 +59,12 @@ def __init__(self, full_name, module=False, force=False, validate=None): - **module** : boolean (default False) - if True search data in the local .ini file, else in the distant repository - **force** : boolean (default False) - if True, no Namespace control - - **validate** : function (default None) - validate function to include''' + - **validate** : function (default None) - validate function to include""" super().__init__(full_name, module=module, force=force, validate=validate) np_type = NP_TYPES.get(self.base_name) - self.dtype = np_type['dtype'] if np_type else None - self.add_type = np_type['add_type'] if np_type else None + self.dtype = np_type["dtype"] if np_type else None + self.add_type = np_type["add_type"] if np_type else None NP_TYPES = Ndtype.read_ini() -NP_NTYPE = {val['dtype']: key for key, val in NP_TYPES.items()} +NP_NTYPE = {val["dtype"]: key for key, val in NP_TYPES.items()} diff --git a/ntv_numpy/ntv_numpy.ini b/ntv_numpy/ntv_numpy.ini index dd854fc..d750c46 100644 --- a/ntv_numpy/ntv_numpy.ini +++ b/ntv_numpy/ntv_numpy.ini @@ -2,35 +2,35 @@ # define the categories of data defined in 'type' column = [ - - # 'ntv_type' is the type of the data - "ntv_type", - + + # 'ntv_type' is the type of the data + "ntv_type", + # 'add_type' is the additional data added to the ndarray object - "add_type", - + "add_type", + # 'dtype' is used to define the dtype of the ndarray, if 'null' NumPy choose the dtype "dtype"] # define the types with a NumPy conversion types = [ - # without ntv_type - # dtype is : 'intxx', 'floatxx', 'boolean', + # without ntv_type + # dtype is : 'intxx', 'floatxx', 'boolean', # or 'object' (json-array, json-object) ["", "json", null ], - # with ntv_type only in json data (not numbers) - ["date", null, "datetime64[D]" ], - ["yearmonth", null, "datetime64[M]" ], + # with ntv_type only in json data (not numbers) + ["date", null, "datetime64[D]" ], + ["yearmonth", null, "datetime64[M]" ], ["year", null, "datetime64[Y]" ], ["string", null, "str" ], - # with ntv_type only in json data and with NTVtype extension (not numbers) + # with ntv_type only in json data and with NTVtype extension (not numbers) ["datetime", null, "datetime64[s]" ], ["timedelta", null, "timedelta64[s]" ], - # with ntv_type only in json data (numbers and bytes) + # with ntv_type only in json data (numbers and bytes) ["float16", null, "float16" ], ["float32", null, "float32" ], ["float64", null, "float64" ], @@ -77,8 +77,8 @@ types = [ ["week", "week", "int" ], ["hour", "hour", "int" ], ["minute", "minute", "int" ], - ["second", "second", "int" ], - + ["second", "second", "int" ], + # with ntv_type in add_type and in json data (not numbers) ["binary", "binary", "str" ], ["base32", "base32", "str" ], @@ -87,7 +87,7 @@ types = [ ["duration", "duration", "str" ], ["jpointer", "jpointer", "str" ], ["uri", "uri", "str" ], - ["uriref", "uriref", "str" ], + ["uriref", "uriref", "str" ], ["iri", "iri", "str" ], ["iriref", "iriref", "str" ], ["email", "email", "str" ], @@ -97,7 +97,7 @@ types = [ ["ipv6", "ipv6", "str" ], ["file", "file", "str" ], ["geojson", "geojson", "str" ], - + # with ntv_type converted in object dtype (python type) ["geometry", "geometry", "object" ], ["timearray", "timearray","object" ] diff --git a/ntv_numpy/numpy_ntv_connector.py b/ntv_numpy/numpy_ntv_connector.py index 1f9aa06..9f00ef1 100644 --- a/ntv_numpy/numpy_ntv_connector.py +++ b/ntv_numpy/numpy_ntv_connector.py @@ -18,6 +18,7 @@ - `XndarrayConnec`: 'xndarray' connector for Xndarray data - `XdatasetConnec`: 'xdataset' connector for Xdataset data """ + from json_ntv import NtvConnector from ntv_numpy.ndarray import Ndarray @@ -26,25 +27,24 @@ class NarrayConnec(NtvConnector): + """NTV connector for Numpy ndarray.""" - '''NTV connector for Numpy ndarray.''' - - clas_obj = 'ndarray' - clas_typ = 'narray' + clas_obj = "ndarray" + clas_typ = "narray" @staticmethod def to_obj_ntv(ntv_value, **kwargs): - ''' convert json ntv_value into a np.ndarray. + """convert json ntv_value into a np.ndarray. *Parameters* - **convert** : boolean (default True) - If True, convert json data with - non Numpy ntv_type into data with python type''' + non Numpy ntv_type into data with python type""" return Ndarray.read_json(ntv_value, **kwargs).darray @staticmethod def to_json_ntv(value, name=None, typ=None, **kwargs): - ''' convert a np.ndarray (value, name, type) into NTV json (json-value, name, ntv_type). + """convert a np.ndarray (value, name, type) into NTV json (json-value, name, ntv_type). *Parameters* @@ -57,35 +57,40 @@ def to_json_ntv(value, name=None, typ=None, **kwargs): - **format** : string (default 'full') - representation format of the ndarray, - **encoded** : Boolean (default False) - json-value if False else json-text - **header** : Boolean (default True) - including ndarray type - ''' - option = {'format': 'full', 'header': True, 'encoded': False, - 'notype': False, 'noshape': True, 'novalue': False} | kwargs - if not option['format'] in ['full', 'complete']: - option['noshape'] = False - option['header'] = False - return (Ndarray(value).to_json(**option), name, 'narray') + """ + option = { + "format": "full", + "header": True, + "encoded": False, + "notype": False, + "noshape": True, + "novalue": False, + } | kwargs + if option["format"] not in ["full", "complete"]: + option["noshape"] = False + option["header"] = False + return (Ndarray(value).to_json(**option), name, "narray") class NdarrayConnec(NtvConnector): + """NTV connector for Ndarray.""" - '''NTV connector for Ndarray.''' - - clas_obj = 'Ndarray' - clas_typ = 'ndarray' + clas_obj = "Ndarray" + clas_typ = "ndarray" @staticmethod def to_obj_ntv(ntv_value, **kwargs): - ''' convert json ntv_value into a Ndarray. + """convert json ntv_value into a Ndarray. *Parameters* - **convert** : boolean (default True) - If True, convert json data with - non-Numpy ntv_type into data with python type''' + non-Numpy ntv_type into data with python type""" return Ndarray.read_json(ntv_value, **kwargs) @staticmethod def to_json_ntv(value, name=None, typ=None, **kwargs): - ''' convert a Ndarray (value, name, type) into NTV json (json-value, name, ntv_type). + """convert a Ndarray (value, name, type) into NTV json (json-value, name, ntv_type). *Parameters* @@ -98,36 +103,41 @@ def to_json_ntv(value, name=None, typ=None, **kwargs): - **format** : string (default 'full') - representation format of the ndarray, - **encoded** : Boolean (default False) - json-value if False else json-text - **header** : Boolean (default True) - including ndarray type - ''' - option = {'format': 'full', 'header': True, 'encoded': False, - 'notype': False, 'noshape': True, 'novalue': False} | kwargs - if not option['format'] in ['full', 'complete']: - option['noshape'] = False - return (Ndarray(value).to_json(**option), name, 'ndarray') + """ + option = { + "format": "full", + "header": True, + "encoded": False, + "notype": False, + "noshape": True, + "novalue": False, + } | kwargs + if option["format"] not in ["full", "complete"]: + option["noshape"] = False + return (Ndarray(value).to_json(**option), name, "ndarray") class XndarrayConnec(NtvConnector): + """NTV connector for Xndarray.""" - '''NTV connector for Xndarray.''' - - clas_obj = 'Xndarray' - clas_typ = 'xndarray' + clas_obj = "Xndarray" + clas_typ = "xndarray" @staticmethod def to_obj_ntv(ntv_value, **kwargs): - ''' convert json ntv_value into a Xndarray. + """convert json ntv_value into a Xndarray. *Parameters* - **convert** : boolean (default True) - If True, convert json data with non-umpy ntv_type into Xndarray with python type - ''' + """ print(ntv_value) return Xndarray.read_json(ntv_value, **kwargs) @staticmethod def to_json_ntv(value, name=None, typ=None, **kwargs): - ''' convert a Xndarray (value) into NTV json (json-value, name, ntv_type). + """convert a Xndarray (value) into NTV json (json-value, name, ntv_type). *Parameters* @@ -141,36 +151,42 @@ def to_json_ntv(value, name=None, typ=None, **kwargs): - **noshape** : Boolean (default True) - if True, without shape if dim < 1 - **format** : string (default 'full') - representation format of the ndarray, - **extension** : string (default None) - type extension - ''' - option = {'notype': False, 'extension': None, 'format': 'full', - 'noshape': True, 'header': True, 'encoded': False, - 'novalue': False, 'noname': False} | kwargs - if not option['format'] in ['full', 'complete']: - option['noshape'] = False - return (value.to_json(**option), name, 'xndarray') + """ + option = { + "notype": False, + "extension": None, + "format": "full", + "noshape": True, + "header": True, + "encoded": False, + "novalue": False, + "noname": False, + } | kwargs + if option["format"] not in ["full", "complete"]: + option["noshape"] = False + return (value.to_json(**option), name, "xndarray") class XdatasetConnec(NtvConnector): + """NTV connector for Xdataset.""" - '''NTV connector for Xdataset.''' - - clas_obj = 'Xdataset' - clas_typ = 'xdataset' + clas_obj = "Xdataset" + clas_typ = "xdataset" @staticmethod def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): - ''' convert json ntv_value into a Xdataset. + """convert json ntv_value into a Xdataset. *Parameters* - **convert** : boolean (default True) - If True, convert json data with non-Numpy ntv_type into Xdataset with python type - ''' + """ return Xdataset.read_json(ntv_value, **kwargs) @staticmethod def to_json_ntv(value, name=None, typ=None, **kwargs): - ''' convert a Xdataset (value) into NTV json (json-value, name, ntv_type). + """convert a Xdataset (value) into NTV json (json-value, name, ntv_type). *Parameters* @@ -184,11 +200,18 @@ def to_json_ntv(value, name=None, typ=None, **kwargs): - **noshape** : Boolean (default False) - if True, without shape if dim < 1 - **format** : list of string (default list of 'full') - representation format of the np.ndarray, - ''' - option = {'notype': False, 'extension': None, 'format': 'full', - 'noshape': True, 'header': True, 'encoded': False, - 'novalue': False, 'noname': False} | kwargs - if not option['format'] in ['full', 'complete']: - option['noshape'] = False - option['noname'] = True - return (value.to_json(**option), name, 'xdataset') + """ + option = { + "notype": False, + "extension": None, + "format": "full", + "noshape": True, + "header": True, + "encoded": False, + "novalue": False, + "noname": False, + } | kwargs + if option["format"] not in ["full", "complete"]: + option["noshape"] = False + option["noname"] = True + return (value.to_json(**option), name, "xdataset") diff --git a/ntv_numpy/xarray_accessors.py b/ntv_numpy/xarray_accessors.py index 7d693a0..c459faa 100644 --- a/ntv_numpy/xarray_accessors.py +++ b/ntv_numpy/xarray_accessors.py @@ -8,6 +8,7 @@ [user guide](https://loco-philippe.github.io/ntv-numpy/docs/user_guide.html) or the [github repository](https://github.com/loco-philippe/ntv-numpy). """ + import xarray as xr from ntv_numpy.xdataset import Xdataset @@ -18,12 +19,13 @@ except AttributeError: pass + @xr.register_dataset_accessor("nxr") class NxrDatasetAccessor: """Accessor class for methods invoked as `xr.Dataset.nxr.*`""" - + def __init__(self, xarray_obj): - '''initialisation of the class''' + """initialisation of the class""" self._obj = xarray_obj def to_dataframe(self, **kwargs): @@ -66,6 +68,6 @@ def to_json(self, **kwargs): - **noshape** : Boolean (default True) - if True, without shape if dim < 1 - **format** : list of string (default list of 'full') - representation format of the ndarray, - + """ - return Xdataset.from_xarray(self._obj, **kwargs).to_json(**kwargs) \ No newline at end of file + return Xdataset.from_xarray(self._obj, **kwargs).to_json(**kwargs) diff --git a/ntv_numpy/xconnector.py b/ntv_numpy/xconnector.py index a666c36..1a38b4a 100644 --- a/ntv_numpy/xconnector.py +++ b/ntv_numpy/xconnector.py @@ -17,7 +17,6 @@ or the [github repository](https://github.com/loco-philippe/ntv-numpy). """ - import xarray as xr import scipp as sc import pandas as pd @@ -31,64 +30,70 @@ class AstropyNDDataConnec: - ''' NDData interface with two static methods ximport and xexport''' + """NDData interface with two static methods ximport and xexport""" @staticmethod def xexport(xdt, **kwargs): - '''return a NDData from a Xdataset''' - data = xdt['data'].ndarray - mask = xdt['data.mask'].ndarray - unit = xdt['data'].nda.ntvtype.extension - uncert = xdt['data.uncertainty'].ndarray - typ_u = xdt['data.uncertainty'].nda.ntvtype.extension + """return a NDData from a Xdataset""" + data = xdt["data"].ndarray + mask = xdt["data.mask"].ndarray + unit = xdt["data"].nda.ntvtype.extension + uncert = xdt["data.uncertainty"].ndarray + typ_u = xdt["data.uncertainty"].nda.ntvtype.extension match typ_u: - case 'std': + case "std": uncertainty = StdDevUncertainty(uncert) - case 'var': + case "var": uncertainty = VarianceUncertainty(uncert) - case 'inv': + case "inv": uncertainty = InverseVariance(uncert) case _: uncertainty = uncert - meta = xdt['meta'].meta | {'name': xdt.name} - wcs_dic = xdt['wcs'].meta - psf = xdt['psf'].ndarray - return NDData(data, mask=mask, unit=unit, uncertainty=uncertainty, - meta=meta, wcs=wcs.WCS(wcs_dic), psf=psf) + meta = xdt["meta"].meta | {"name": xdt.name} + wcs_dic = xdt["wcs"].meta + psf = xdt["psf"].ndarray + return NDData( + data, + mask=mask, + unit=unit, + uncertainty=uncertainty, + meta=meta, + wcs=wcs.WCS(wcs_dic), + psf=psf, + ) @staticmethod def ximport(ndd, Xclass, **kwargs): - '''return a Xdataset from a astropy.NDData''' + """return a Xdataset from a astropy.NDData""" xnd = [] - name = 'no_name' + name = "no_name" unit = ndd.unit.to_string() if ndd.unit is not None else None ntv_type = Nutil.ntv_type(ndd.data.dtype.name, ext=unit) - xnd += [Xndarray('data', nda=Ndarray(ndd.data, ntv_type=ntv_type))] + xnd += [Xndarray("data", nda=Ndarray(ndd.data, ntv_type=ntv_type))] if ndd.meta: - meta = {key: val for key, val in ndd.meta.items() if key != 'name'} - name = ndd.meta.get('name', 'no_name') - xnd += [Xndarray('meta', meta=meta)] + meta = {key: val for key, val in ndd.meta.items() if key != "name"} + name = ndd.meta.get("name", "no_name") + xnd += [Xndarray("meta", meta=meta)] if ndd.wcs: - xnd += [Xndarray('wcs', meta=dict(ndd.wcs.to_header()))] + xnd += [Xndarray("wcs", meta=dict(ndd.wcs.to_header()))] if ndd.psf is not None: - xnd += [Xndarray('psf', nda=Ndarray(ndd.psf, ntv_type=ntv_type))] + xnd += [Xndarray("psf", nda=Ndarray(ndd.psf, ntv_type=ntv_type))] if ndd.mask is not None: - xnd += [Xndarray('data.mask', nda=ndd.mask)] + xnd += [Xndarray("data.mask", nda=ndd.mask)] if ndd.uncertainty is not None: typ_u = ndd.uncertainty.__class__.__name__[:3].lower() - ntv_type = Nutil.ntv_type( - ndd.uncertainty.array.dtype.name, ext=typ_u) + ntv_type = Nutil.ntv_type(ndd.uncertainty.array.dtype.name, ext=typ_u) nda = Ndarray(ndd.uncertainty.array, ntv_type=ntv_type) - xnd += [Xndarray('data.uncertainty', nda=nda)] + xnd += [Xndarray("data.uncertainty", nda=nda)] return Xclass(xnd, name).to_canonical() class PandasConnec: - ''' pandas.DataFrame interface with two static methods ximport and xexport''' + """pandas.DataFrame interface with two static methods ximport and xexport""" @staticmethod def xexport(xdt, **kwargs): - '''return a pd.DataFrame from a Xdataset + """return a pd.DataFrame from a Xdataset *Parameters* @@ -96,108 +101,135 @@ def xexport(xdt, **kwargs): - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs - **dims**: list of string (default None) - order of dimensions full_name to apply - **index**: Boolean (default True) - if True, dimensions are translated into indexes - ''' - opt = {'ntv_type': True, 'info': True, 'index': True, 'dims': None} | kwargs - dic_name = {name: xdt[name].json_name if opt['ntv_type'] else xdt[name].full_name - for name in xdt.names} - dims = xdt.dimensions if not opt['dims'] else tuple(opt['dims']) - fields = (xdt.group(dims) + xdt.group(xdt.coordinates) + - xdt.group(xdt.data_vars) + xdt.uniques) - fields += tuple(nam for nam in xdt.group(xdt.data_arrays) - if len(xdt[nam]) == xdt.length) + """ + opt = {"ntv_type": True, "info": True, "index": True, "dims": None} | kwargs + dic_name = { + name: xdt[name].json_name if opt["ntv_type"] else xdt[name].full_name + for name in xdt.names + } + dims = xdt.dimensions if not opt["dims"] else tuple(opt["dims"]) + fields = ( + xdt.group(dims) + + xdt.group(xdt.coordinates) + + xdt.group(xdt.data_vars) + + xdt.uniques + ) + fields += tuple( + nam for nam in xdt.group(xdt.data_arrays) if len(xdt[nam]) == xdt.length + ) fields_array = tuple(var for var in fields if not xdt[var].uri) - dic_series = {dic_name[name]: PandasConnec._to_np_series(xdt, name, dims) - for name in fields_array} + dic_series = { + dic_name[name]: PandasConnec._to_np_series(xdt, name, dims) + for name in fields_array + } dfr = pd.DataFrame(dic_series) index = [dic_name[name] for name in dims] - if index and opt['index']: + if index and opt["index"]: dfr = dfr.set_index(index) - if opt['info']: - dfr.attrs |= {'info': xdt.tab_info} - dfr.attrs |= {'metadata': { - name: xdt[name].meta for name in xdt.metadata}} + if opt["info"]: + dfr.attrs |= {"info": xdt.tab_info} + dfr.attrs |= {"metadata": {name: xdt[name].meta for name in xdt.metadata}} fields_uri = [var for var in fields if var not in fields_array] - fields_other = [nam for nam in xdt.group(xdt.data_arrays) - if len(xdt[nam]) != xdt.length] + fields_other = [ + nam for nam in xdt.group(xdt.data_arrays) if len(xdt[nam]) != xdt.length + ] if fields_uri: - dfr.attrs |= {'fields': {nam: xdt[nam].to_json(noname=True,) - for nam in fields_uri + fields_other}} + dfr.attrs |= { + "fields": { + nam: xdt[nam].to_json( + noname=True, + ) + for nam in fields_uri + fields_other + } + } if xdt.name: - dfr.attrs |= {'name': xdt.name} + dfr.attrs |= {"name": xdt.name} return dfr @staticmethod def ximport(df, Xclass, **kwargs): - '''return a Xdataset from a pd.DataFrame + """return a Xdataset from a pd.DataFrame *Parameters* - dims: list of string (default None) - order of dimensions to apply - ''' - opt = {'dims': None} | kwargs + """ + opt = {"dims": None} | kwargs xnd = [] dfr = df.reset_index() - if 'index' in dfr.columns and 'index' not in df.columns: - del dfr['index'] - df_names = {Nutil.split_json_name(j_name)[0]: j_name - for j_name in dfr.columns} - df_ntv_types = {Nutil.split_json_name(j_name)[0]: - Nutil.split_json_name(j_name)[1] for j_name in dfr.columns} + if "index" in dfr.columns and "index" not in df.columns: + del dfr["index"] + df_names = {Nutil.split_json_name(j_name)[0]: j_name for j_name in dfr.columns} + df_ntv_types = { + Nutil.split_json_name(j_name)[0]: Nutil.split_json_name(j_name)[1] + for j_name in dfr.columns + } dfr.columns = [Nutil.split_json_name(name)[0] for name in dfr.columns] - if dfr.attrs.get('metadata'): - for name, meta in dfr.attrs['metadata'].items(): + if dfr.attrs.get("metadata"): + for name, meta in dfr.attrs["metadata"].items(): xnd += [Xndarray.read_json({name: meta})] - if dfr.attrs.get('fields'): - for name, jsn in dfr.attrs['fields'].items(): + if dfr.attrs.get("fields"): + for name, jsn in dfr.attrs["fields"].items(): xnd += [Xndarray.read_json({name: jsn})] - if dfr.attrs.get('info'): - dimensions = dfr.attrs['info']['dimensions'] - data = dfr.attrs['info']['data'] + if dfr.attrs.get("info"): + dimensions = dfr.attrs["info"]["dimensions"] + data = dfr.attrs["info"]["data"] else: - dimensions, data = PandasConnec._ximport_analysis(dfr, opt['dims']) - shape_dfr = [data[dim]['shape'][0] - for dim in dimensions] if dimensions else len(dfr) + dimensions, data = PandasConnec._ximport_analysis(dfr, opt["dims"]) + shape_dfr = ( + [data[dim]["shape"][0] for dim in dimensions] if dimensions else len(dfr) + ) dfr = dfr.sort_values(dimensions) for name in df_names: - xnd += [PandasConnec._ximport_series(data, name, dfr, dimensions, - shape_dfr, df_ntv_types, **opt)] - return Xclass(xnd, dfr.attrs.get('name')).to_canonical() + xnd += [ + PandasConnec._ximport_series( + data, name, dfr, dimensions, shape_dfr, df_ntv_types, **opt + ) + ] + return Xclass(xnd, dfr.attrs.get("name")).to_canonical() @staticmethod def _ximport_analysis(dfr, opt_dims): - '''return data and dimensions from analysis module + """return data and dimensions from analysis module - opt_dims: partition to apply - - dfr: dataframe to analyse''' + - dfr: dataframe to analyse""" dfr_idx = list(dfr.index.names) opt_dims = dfr_idx if dfr_idx != [None] else opt_dims ana = dfr.npd.analysis(distr=True) - partition = ana.field_partition(partition=opt_dims, mode='id') + partition = ana.field_partition(partition=opt_dims, mode="id") part_rel = ana.relation_partition(partition=opt_dims, noroot=True) - part_dim = ana.relation_partition( - partition=opt_dims, noroot=True, primary=True) - dimensions = partition['primary'] + part_dim = ana.relation_partition(partition=opt_dims, noroot=True, primary=True) + dimensions = partition["primary"] len_fields = {fld.idfield: fld.lencodec for fld in ana.fields} - data = {fld.idfield: { - 'shape': [len_fields[dim] for dim in part_dim[fld.idfield]] if part_dim else [], - 'links': part_rel[fld.idfield] if part_rel else []} for fld in ana.fields} + data = { + fld.idfield: { + "shape": [len_fields[dim] for dim in part_dim[fld.idfield]] + if part_dim + else [], + "links": part_rel[fld.idfield] if part_rel else [], + } + for fld in ana.fields + } for json_name in data: - if not data[json_name]['shape']: + if not data[json_name]["shape"]: name = Nutil.split_name(Nutil.split_json_name(json_name)[0])[0] - p_name = [js_name for js_name in data - if Nutil.split_json_name(js_name)[0] == name][0] - data[json_name]['shape'] = data[p_name]['shape'] + p_name = [ + js_name + for js_name in data + if Nutil.split_json_name(js_name)[0] == name + ][0] + data[json_name]["shape"] = data[p_name]["shape"] return (dimensions, data) @staticmethod def _ximport_series(data, name, dfr, dimensions, shape_dfr, df_ntv_types, **opt): - '''return a Xndarray from a Series of a pd.DataFrame''' - if data[name].get('xtype') == 'meta': # or len(dfr[name].unique()) == 1: + """return a Xndarray from a Series of a pd.DataFrame""" + if data[name].get("xtype") == "meta": # or len(dfr[name].unique()) == 1: return Xndarray(name, meta=dfr[name].iloc[0]) - meta = data[name].get('meta') + meta = data[name].get("meta") ntv_type = df_ntv_types[name] if len(dfr[name].unique()) == 1: - nda = Ndarray(np.array(dfr[name].iloc[0]), - ntv_type=ntv_type, str_uri=False) + nda = Ndarray(np.array(dfr[name].iloc[0]), ntv_type=ntv_type, str_uri=False) nda.set_shape([1]) return Xndarray(name, nda=nda, meta=meta) if not dimensions: @@ -209,22 +241,23 @@ def _ximport_series(data, name, dfr, dimensions, shape_dfr, df_ntv_types, **opt) p_name, add_name = Nutil.split_name(name) if add_name: PandasConnec._get_dims(dims, p_name, data, dimensions) - np_array = PandasConnec._from_series(dfr, name, shape_dfr, - dimensions, dims, opt['dims']) - shape = data[name].get('shape', [len(dfr)]) + np_array = PandasConnec._from_series( + dfr, name, shape_dfr, dimensions, dims, opt["dims"] + ) + shape = data[name].get("shape", [len(dfr)]) nda = Ndarray(np_array, ntv_type, shape) - links = data[name].get('links') + links = data[name].get("links") return Xndarray(name, nda=nda, links=links if links else dims, meta=meta) @staticmethod def _to_np_series(xdt, name, dims): - '''return a np.ndarray from the Xndarray of xdt defined by his name + """return a np.ndarray from the Xndarray of xdt defined by his name *parameters* - **xdt**: Xdataset - data to convert in a pd.DataFrame - **name**: string - full_name of the Xndarray to convert - - **dims**: list of string - order of dimensions full_name to apply''' + - **dims**: list of string - order of dimensions full_name to apply""" if name in xdt.uniques: return np.array([xdt[name].darray[0]] * xdt.length) if xdt[name].shape == [xdt.length]: @@ -246,7 +279,7 @@ def _to_np_series(xdt, name, dims): @staticmethod def _from_series(dfr, name, shape, dims, links, new_dims=None): - '''return a flattened np.ndarray from the pd.Series of dfr defined by his name + """return a flattened np.ndarray from the pd.Series of dfr defined by his name *parameters* @@ -256,18 +289,18 @@ def _from_series(dfr, name, shape, dims, links, new_dims=None): - dims: list of string - list of name of dimensions - links: list of string - list of linked Series - new_dims: list of string (default None) - new order of dims - ''' + """ if not links: return np.array(dfr[name]) old_order = list(range(len(dims))) new_dims = new_dims if new_dims else dims - order = [dims.index(dim) - for dim in new_dims] if new_dims else old_order + order = [dims.index(dim) for dim in new_dims] if new_dims else old_order idx = [0] * len(dims) for nam in links: idx[new_dims.index(nam)] = slice(shape[dims.index(nam)]) - xar = np.moveaxis(np.array(dfr[name]).reshape(shape), - old_order, order)[tuple(idx)] + xar = np.moveaxis(np.array(dfr[name]).reshape(shape), old_order, order)[ + tuple(idx) + ] if not links: return xar.flatten() lnk = [nam for nam in new_dims if nam in links] @@ -279,24 +312,24 @@ def _from_series(dfr, name, shape, dims, links, new_dims=None): @staticmethod def _get_dims(dims, name, data, dimensions): - '''add names of dimensions into dims''' + """add names of dimensions into dims""" if not name: return if name in dimensions: dims += [name] else: - if 'links' not in data[name]: + if "links" not in data[name]: return - for nam in data[name]['links']: + for nam in data[name]["links"]: PandasConnec._get_dims(dims, nam, data, dimensions) class XarrayConnec: - ''' Xarray interface with two static methods ximport and xexport''' + """Xarray interface with two static methods ximport and xexport""" @staticmethod def xexport(xdt, **kwargs): - '''return a xr.DataArray or a xr.Dataset from a Xdataset + """return a xr.DataArray or a xr.Dataset from a Xdataset *Parameters* @@ -304,36 +337,40 @@ def xexport(xdt, **kwargs): return a xr.DataArray - **info** : Boolean (default True) - if True, add json representation of 'relative' Xndarrays and 'data_arrays' Xndarrays in attrs - ''' - option = {'dataset': True, 'info': True} | kwargs + """ + option = {"dataset": True, "info": True} | kwargs coords = XarrayConnec._to_xr_vars( - xdt, xdt.dimensions + xdt.coordinates + xdt.uniques) + xdt, xdt.dimensions + xdt.coordinates + xdt.uniques + ) coords |= XarrayConnec._to_xr_vars(xdt, xdt.additionals) attrs = XarrayConnec._to_xr_attrs(xdt, **option) - if len(xdt.data_vars) == 1 and not option['dataset']: + if len(xdt.data_vars) == 1 and not option["dataset"]: var_name = xdt.data_vars[0] data = xdt.to_ndarray(var_name) dims = xdt.dims(var_name) - attrs |= {'ntv_type': xdt[var_name].nda.ntv_type} + attrs |= {"ntv_type": xdt[var_name].nda.ntv_type} attrs |= xdt[var_name].meta if xdt[var_name].meta else {} - name = var_name if var_name != 'data' else None - xrd = xr.DataArray(data=data, coords=coords, dims=dims, attrs=attrs, - name=name) + name = var_name if var_name != "data" else None + xrd = xr.DataArray( + data=data, coords=coords, dims=dims, attrs=attrs, name=name + ) else: data_vars = XarrayConnec._to_xr_vars(xdt, xdt.data_vars) xrd = xr.Dataset(data_vars, coords=coords, attrs=attrs) for unic in xdt.uniques: - xrd[unic].attrs |= {'ntv_type': xdt[unic].ntv_type} | ( - xdt[unic].meta if xdt[unic].meta else {}) + xrd[unic].attrs |= {"ntv_type": xdt[unic].ntv_type} | ( + xdt[unic].meta if xdt[unic].meta else {} + ) return xrd @staticmethod def ximport(xar, Xclass, **kwargs): - '''return a Xdataset from a xr.DataArray or a xr.Dataset''' + """return a Xdataset from a xr.DataArray or a xr.Dataset""" xnd = [] if xar.attrs: - attrs = {k: v for k, v in xar.attrs.items() if k not in [ - 'name', 'ntv_type']} + attrs = { + k: v for k, v in xar.attrs.items() if k not in ["name", "ntv_type"] + } for name, meta in attrs.items(): if isinstance(meta, list): xnd += [Xndarray.read_json({name: meta})] @@ -341,58 +378,61 @@ def ximport(xar, Xclass, **kwargs): xnd += [Xndarray(name, meta=meta)] for coord in xar.coords: xnd += [XarrayConnec._var_xr_to_xnd(xar.coords[coord])] - if list(xar.coords[coord].dims) == list(xar.dims) and isinstance(xar, xr.Dataset): + if list(xar.coords[coord].dims) == list(xar.dims) and isinstance( + xar, xr.Dataset + ): xnd[-1].links = [list(xar.data_vars)[0]] if isinstance(xar, xr.DataArray): - var = XarrayConnec._var_xr_to_xnd( - xar, name='data', add_attrs=False) - xnd += [XarrayConnec._var_xr_to_xnd(xar, - name='data', add_attrs=False)] - xdt = Xclass(xnd, xar.attrs.get('name')) + var = XarrayConnec._var_xr_to_xnd(xar, name="data", add_attrs=False) + xnd += [XarrayConnec._var_xr_to_xnd(xar, name="data", add_attrs=False)] + xdt = Xclass(xnd, xar.attrs.get("name")) for var in xdt.data_vars: if var != xar.name and xar.name: xdt[var].links = [xar.name] return xdt.to_canonical() for var in xar.data_vars: xnd += [XarrayConnec._var_xr_to_xnd(xar.data_vars[var])] - return Xclass(xnd, xar.attrs.get('name')).to_canonical() + return Xclass(xnd, xar.attrs.get("name")).to_canonical() @staticmethod def _var_xr_to_xnd(var, name=None, add_attrs=True): - '''return a Xndarray from a Xarray variable + """return a Xndarray from a Xarray variable *Parameters* - **var** : Xarray variable to convert in Xndarray, - **name** : string (default None) - default name if var have no name, - **add_attrs** : boolean (default True) - if False, attrs are not converted - ''' + """ full_name = var.name if var.name else name name = Nutil.split_name(full_name)[0] dims = None if var.dims == (name,) or var.size == 1 else list(var.dims) - ntv_type = var.attrs.get('ntv_type') + ntv_type = var.attrs.get("ntv_type") nda = var.values nda = nda.reshape(1) if not nda.shape else nda - if nda.dtype.name == 'datetime64[ns]' and ntv_type: + if nda.dtype.name == "datetime64[ns]" and ntv_type: nda = Nutil.convert(ntv_type, nda, tojson=False) - attrs = {k: v for k, v in var.attrs.items() - if k not in ['ntv_type', 'name']} if add_attrs else {} + attrs = ( + {k: v for k, v in var.attrs.items() if k not in ["ntv_type", "name"]} + if add_attrs + else {} + ) return Xndarray(full_name, Ndarray(nda, ntv_type), dims, attrs) @staticmethod def _to_xr_attrs(xdt, **option): - '''return a dict with attributes from a Xdataset + """return a dict with attributes from a Xdataset *Parameters* - **info** : Boolean if True, add json representation of 'relative' Xndarrays and 'data_arrays' Xndarrays in attrs - ''' + """ attrs = {meta: xdt[meta].meta for meta in xdt.metadata} - attrs |= {'name': xdt.name} if xdt.name else {} - if option['info']: + attrs |= {"name": xdt.name} if xdt.name else {} + if option["info"]: for name in xdt.names: - if xdt[name].mode == 'relative': + if xdt[name].mode == "relative": attrs |= xdt[name].to_json(header=False) for name in xdt.data_arrays: attrs |= xdt[name].to_json(header=False) @@ -400,35 +440,35 @@ def _to_xr_attrs(xdt, **option): @staticmethod def _to_xr_coord(xdt, name): - '''return a dict with Xarray attributes from a Xndarray defined by his name''' + """return a dict with Xarray attributes from a Xndarray defined by his name""" data = xdt.to_ndarray(name) if name in xdt.uniques: return {name: data[0]} if name in xdt.additionals and not xdt[name].links: data = data.reshape(xdt.shape_dims(xdt[name].name)) dims = tuple(xdt.dims(name)) if xdt.dims(name) else (xdt[name].name) - meta = {'ntv_type': xdt[name].ntv_type} | ( - xdt[name].meta if xdt[name].meta else {}) + meta = {"ntv_type": xdt[name].ntv_type} | ( + xdt[name].meta if xdt[name].meta else {} + ) return {name: (dims, data, meta)} @staticmethod def _to_xr_vars(xdt, list_names): - '''return a dict with Xarray attributes from a list of Xndarray names''' + """return a dict with Xarray attributes from a list of Xndarray names""" arg_vars = {} - valid_names = [ - nam for nam in list_names if xdt[nam].mode == 'absolute'] + valid_names = [nam for nam in list_names if xdt[nam].mode == "absolute"] for xnd_name in valid_names: arg_vars |= XarrayConnec._to_xr_coord(xdt, xnd_name) for name in list_names: - if xdt[name].xtype == 'meta': + if xdt[name].xtype == "meta": arg_vars |= {name: xdt[name].meta} return arg_vars @staticmethod def _xr_add_type(xar): - '''add 'ntv_type' as attribute for a xr.DataArray''' - if isinstance(xar, xr.DataArray) and 'ntv_type' not in xar.attrs: - xar.attrs |= {'ntv_type': Nutil.ntv_type(xar.data.dtype.name)} + """add 'ntv_type' as attribute for a xr.DataArray""" + if isinstance(xar, xr.DataArray) and "ntv_type" not in xar.attrs: + xar.attrs |= {"ntv_type": Nutil.ntv_type(xar.data.dtype.name)} return for coord in xar.coords: XarrayConnec._xr_add_type(coord) @@ -438,13 +478,13 @@ def _xr_add_type(xar): class ScippConnec: - ''' Scipp interface with two static methods ximport and xexport''' + """Scipp interface with two static methods ximport and xexport""" - SCTYPE_DTYPE = {'string': 'str'} + SCTYPE_DTYPE = {"string": "str"} @staticmethod def xexport(xdt, **kwargs): - '''return a sc.DataArray or a sc.Dataset from a xdataset + """return a sc.DataArray or a sc.Dataset from a xdataset *Parameters* @@ -453,24 +493,33 @@ def xexport(xdt, **kwargs): - **info** : Boolean (default True) - if True return a DataGroup with metadata and data_arrays - **ntv_type** : Boolean (default True) - if True add ntv-type to the name - ''' - option = {'dataset': True, 'info': True, - 'ntv_type': True} | kwargs - coords = dict([ScippConnec._to_scipp_var(xdt, name, **option) - for name in xdt.coordinates + xdt.dimensions + xdt.uniques - if xdt[name].mode == 'absolute']) - scd = sc.Dataset(dict([ScippConnec._to_sc_dataarray(xdt, name, coords, **option) - for name in xdt.data_vars - if xdt[name].mode == 'absolute'])) - scd = scd if option['dataset'] else scd[list(scd)[0]] - if not option['info']: + """ + option = {"dataset": True, "info": True, "ntv_type": True} | kwargs + coords = dict( + [ + ScippConnec._to_scipp_var(xdt, name, **option) + for name in xdt.coordinates + xdt.dimensions + xdt.uniques + if xdt[name].mode == "absolute" + ] + ) + scd = sc.Dataset( + dict( + [ + ScippConnec._to_sc_dataarray(xdt, name, coords, **option) + for name in xdt.data_vars + if xdt[name].mode == "absolute" + ] + ) + ) + scd = scd if option["dataset"] else scd[list(scd)[0]] + if not option["info"]: return scd - sc_name = xdt.name if xdt.name else 'no_name' + sc_name = xdt.name if xdt.name else "no_name" return sc.DataGroup({sc_name: scd} | ScippConnec._to_scipp_grp(xdt, **option)) @staticmethod def ximport(sc_obj, Xclass, **kwargs): - '''return a xdataset from a scipp object DataArray, Dataset or DataGroup''' + """return a xdataset from a scipp object DataArray, Dataset or DataGroup""" xnd = [] scd = sc_obj xnd_name = None @@ -481,16 +530,16 @@ def ximport(sc_obj, Xclass, **kwargs): xnd_name = obj break if isinstance(scd, sc.DataArray): - scd = sc.Dataset({(scd.name if scd.name else 'no_name'): scd}) + scd = sc.Dataset({(scd.name if scd.name else "no_name"): scd}) if isinstance(scd, sc.Dataset): for coord in scd.coords: - xnd += ScippConnec._var_sc_to_xnd( - scd.coords[coord], scd, coord) + xnd += ScippConnec._var_sc_to_xnd(scd.coords[coord], scd, coord) for var in scd: for mask in scd[var].masks: m_var = Nutil.split_json_name(var)[0] xnd += ScippConnec._var_sc_to_xnd( - scd[var].masks[mask], scd, mask, m_var) + scd[var].masks[mask], scd, mask, m_var + ) xnd += ScippConnec._var_sc_to_xnd(scd[var].data, scd, var) if isinstance(sc_obj, sc.DataGroup): xnd = ScippConnec._grp_sc_to_xnd(sc_obj, xnd) @@ -498,7 +547,7 @@ def ximport(sc_obj, Xclass, **kwargs): @staticmethod def _grp_sc_to_xnd(sc_obj, xnd): - '''return a list of Xndarray from a scipp variable''' + """return a list of Xndarray from a scipp variable""" dic_xnd = {xar.name: xar for xar in xnd} for obj in sc_obj: name, add_name = Nutil.split_name(obj) @@ -506,8 +555,7 @@ def _grp_sc_to_xnd(sc_obj, xnd): case [name, None, list()]: xnd += [Xndarray.read_json({name: sc_obj[obj]})] case [name, add_name, sc.Variable()]: - xnd += ScippConnec._var_sc_to_xnd( - sc_obj[obj], None, add_name, name) + xnd += ScippConnec._var_sc_to_xnd(sc_obj[obj], None, add_name, name) case [name, _, dict() | str() | list()] if name in dic_xnd: if dic_xnd[name].meta: dic_xnd[name].meta |= sc_obj[obj] @@ -515,31 +563,34 @@ def _grp_sc_to_xnd(sc_obj, xnd): dic_xnd[name].meta = sc_obj[obj] case [name, _, dict() | str() | list()]: xnd += [Xndarray.read_json({name: sc_obj[obj]})] - case [_, _, _]: ... + case [_, _, _]: + ... return xnd @staticmethod - def _var_sc_to_xnd(scv, scd=None, sc_name='', var=None): - '''return a list of Xndarray from a scipp variable + def _var_sc_to_xnd(scv, scd=None, sc_name="", var=None): + """return a list of Xndarray from a scipp variable - scd : scipp dataset - scv : scipp variable - var : name - - sc_name : scipp name''' + - sc_name : scipp name""" l_xnda = [] - unit = scv.unit.name if scv.unit and scv.unit not in [ - 'dimensionless', 'ns'] else '' + unit = ( + scv.unit.name + if scv.unit and scv.unit not in ["dimensionless", "ns"] + else "" + ) ext_name, typ1 = Nutil.split_json_name(sc_name, True) var_name, typ2 = Nutil.split_json_name(var, True) - full_name = var_name + \ - ('.' if var_name and ext_name else '') + ext_name + full_name = var_name + ("." if var_name and ext_name else "") + ext_name ntv_type_base = typ1 + typ2 - ntv_type = ntv_type_base + ('[' + unit + ']' if unit else '') + ntv_type = ntv_type_base + ("[" + unit + "]" if unit else "") links = [Nutil.split_json_name(jsn)[0] for jsn in scv.dims] if scd is not None and sc_name in scd.coords and scv.dims == scd.dims: links = [Nutil.split_json_name(list(scd)[0])[0]] if scv.variances is not None: nda = Ndarray(scv.variances, ntv_type_base) - l_xnda.append(Xndarray(full_name + '.variance', nda, links)) + l_xnda.append(Xndarray(full_name + ".variance", nda, links)) nda = Ndarray(scv.values, ntv_type, str_uri=False) shape = scv.shape if scv.shape else (1,) nda.set_shape(shape) @@ -548,47 +599,58 @@ def _var_sc_to_xnd(scv, scd=None, sc_name='', var=None): @staticmethod def _to_sc_dataarray(xdt, name, coords, **option): - '''return a scipp.DataArray from a xdataset.global_var defined by his name''' + """return a scipp.DataArray from a xdataset.global_var defined by his name""" scipp_name, data = ScippConnec._to_scipp_var(xdt, name, **option) - masks = dict([ScippConnec._to_scipp_var(xdt, nam, **option) - for nam in set(xdt.group(name)) & set(xdt.masks)]) + masks = dict( + [ + ScippConnec._to_scipp_var(xdt, nam, **option) + for nam in set(xdt.group(name)) & set(xdt.masks) + ] + ) return (scipp_name, sc.DataArray(data, coords=coords, masks=masks)) @staticmethod def _to_scipp_grp(xdt, **option): - '''return a dict with metadata, data-array and data_add from a xdataset''' + """return a dict with metadata, data-array and data_add from a xdataset""" grp = {} - grp |= dict([ScippConnec._to_scipp_var(xdt, name, **option) - for name in xdt.data_add + xdt.data_arrays - if xdt[name].add_name != 'variance']) - opt_mask = option | {'grp_mask': True} - grp |= dict([ScippConnec._to_scipp_var(xdt, name, **opt_mask) - for name in xdt.masks - if xdt[name].name in xdt.names and xdt[name].name not in xdt.data_vars]) - grp |= {name + '.meta': xdt[name].meta for name in xdt.names - if xdt[name].meta} + grp |= dict( + [ + ScippConnec._to_scipp_var(xdt, name, **option) + for name in xdt.data_add + xdt.data_arrays + if xdt[name].add_name != "variance" + ] + ) + opt_mask = option | {"grp_mask": True} + grp |= dict( + [ + ScippConnec._to_scipp_var(xdt, name, **opt_mask) + for name in xdt.masks + if xdt[name].name in xdt.names and xdt[name].name not in xdt.data_vars + ] + ) + grp |= {name + ".meta": xdt[name].meta for name in xdt.names if xdt[name].meta} for name in xdt.names: - if xdt[name].mode == 'relative': + if xdt[name].mode == "relative": grp |= xdt[name].to_json(header=False) return grp @staticmethod def _to_scipp_var(xdt, name, **kwargs): - '''return a scipp.Variable from a Xndarray defined by his name''' - option = {'grp_mask': False, 'ntv_type': True} | kwargs + """return a scipp.Variable from a Xndarray defined by his name""" + option = {"grp_mask": False, "ntv_type": True} | kwargs simple_type, unit = Nutil.split_type(xdt[name].ntv_type) - unit = unit if unit else '' + unit = unit if unit else "" add_name = Nutil.split_name(name)[1] - new_n = add_name if name in xdt.masks and not option['grp_mask'] else name - opt_n = option['ntv_type'] - scipp_name = new_n + (':' + simple_type if opt_n else '') + new_n = add_name if name in xdt.masks and not option["grp_mask"] else name + opt_n = option["ntv_type"] + scipp_name = new_n + (":" + simple_type if opt_n else "") if name in xdt.uniques: return (scipp_name, sc.scalar(xdt[name].darray[0], unit=unit)) - vari_name = name + '.variance' + vari_name = name + ".variance" variances = xdt[vari_name].darray if vari_name in xdt.names else None - dims = xdt.dims(name, opt_n) if xdt.dims( - name, opt_n) else [xdt[name].name] - var = sc.array(dims=['flat'], values=xdt.to_darray( - name), variances=variances, unit=unit) - var = sc.fold(var, dim='flat', sizes=dict(zip(dims, xdt[name].shape))) + dims = xdt.dims(name, opt_n) if xdt.dims(name, opt_n) else [xdt[name].name] + var = sc.array( + dims=["flat"], values=xdt.to_darray(name), variances=variances, unit=unit + ) + var = sc.fold(var, dim="flat", sizes=dict(zip(dims, xdt[name].shape))) return (scipp_name, var) diff --git a/ntv_numpy/xdataset.py b/ntv_numpy/xdataset.py index 7c81361..b828bcc 100644 --- a/ntv_numpy/xdataset.py +++ b/ntv_numpy/xdataset.py @@ -9,6 +9,7 @@ [user guide](https://loco-philippe.github.io/ntv-numpy/docs/user_guide.html) or the [github repository](https://github.com/loco-philippe/ntv-numpy). """ + from abc import ABC, abstractmethod import json import pprint @@ -20,24 +21,29 @@ class XdatasetCategory(ABC): - ''' category of Xndarray (dynamic tuple of full_name) - see Xdataset docstring''' + """category of Xndarray (dynamic tuple of full_name) - see Xdataset docstring""" xnd: list = NotImplemented names: list = NotImplemented @abstractmethod def dims(self, var, json_name=False): - '''method defined in Xdataset class''' + """method defined in Xdataset class""" @property def data_arrays(self): - '''return a tuple of data_arrays Xndarray full_name''' - return tuple(sorted(nda for nda in self.namedarrays - if nda not in self.dimensions + self.uniques)) + """return a tuple of data_arrays Xndarray full_name""" + return tuple( + sorted( + nda + for nda in self.namedarrays + if nda not in self.dimensions + self.uniques + ) + ) @property def dimensions(self): - '''return a tuple of dimensions Xndarray full_name''' + """return a tuple of dimensions Xndarray full_name""" dimable = [] for var in self.variables: dimable += self.dims(var) @@ -45,113 +51,160 @@ def dimensions(self): @property def shape(self): - '''return an array with the length of dimensions''' + """return an array with the length of dimensions""" return [len(self[dim]) for dim in self.dimensions] @property def coordinates(self): - '''return a tuple of coordinates Xndarray full_name''' + """return a tuple of coordinates Xndarray full_name""" dims = set(self.dimensions) if not dims: return () - return tuple(sorted(set(xnda.name for xnda in self.xnd - if xnda.xtype == 'variable' and set(xnda.links) != dims))) + return tuple( + sorted( + set( + xnda.name + for xnda in self.xnd + if xnda.xtype == "variable" and set(xnda.links) != dims + ) + ) + ) @property def data_vars(self): - '''return a tuple of data_vars Xndarray full_name''' + """return a tuple of data_vars Xndarray full_name""" dims = set(self.dimensions) if not dims: return self.variables - return tuple(sorted(xnda.name for xnda in self.xnd - if xnda.xtype == 'variable' and set(xnda.links) == dims)) + return tuple( + sorted( + xnda.name + for xnda in self.xnd + if xnda.xtype == "variable" and set(xnda.links) == dims + ) + ) @property def namedarrays(self): - '''return a tuple of namedarray Xndarray full_name''' - return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == 'namedarray')) + """return a tuple of namedarray Xndarray full_name""" + return tuple( + sorted(xnda.name for xnda in self.xnd if xnda.xtype == "namedarray") + ) @property def variables(self): - '''return a tuple of variables Xndarray full_name''' - return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == 'variable')) + """return a tuple of variables Xndarray full_name""" + return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == "variable")) @property def undef_vars(self): - '''return a tuple of variables Xndarray full_name with inconsistent shape''' - return tuple(sorted(var for var in self.variables if self[var].shape != - [len(self[dim]) for dim in self.dims(var)])) + """return a tuple of variables Xndarray full_name with inconsistent shape""" + return tuple( + sorted( + var + for var in self.variables + if self[var].shape != [len(self[dim]) for dim in self.dims(var)] + ) + ) @property def undef_links(self): - '''return a tuple of variables Xndarray full_name with inconsistent links''' - return tuple(sorted(link for var in self.variables for link in self[var].links - if link not in self.names)) + """return a tuple of variables Xndarray full_name with inconsistent links""" + return tuple( + sorted( + link + for var in self.variables + for link in self[var].links + if link not in self.names + ) + ) @property def masks(self): - '''return a tuple of additional Xndarray full_name with boolean ntv_type''' - return tuple(sorted(xnda.full_name for xnda in self.xnd - if xnda.xtype == 'additional' and xnda.ntv_type == 'boolean')) + """return a tuple of additional Xndarray full_name with boolean ntv_type""" + return tuple( + sorted( + xnda.full_name + for xnda in self.xnd + if xnda.xtype == "additional" and xnda.ntv_type == "boolean" + ) + ) @property def data_add(self): - '''return a tuple of additional Xndarray full_name with not boolean ntv_type''' - return tuple(sorted(xnda.full_name for xnda in self.xnd - if xnda.xtype == 'additional' and xnda.ntv_type != 'boolean')) + """return a tuple of additional Xndarray full_name with not boolean ntv_type""" + return tuple( + sorted( + xnda.full_name + for xnda in self.xnd + if xnda.xtype == "additional" and xnda.ntv_type != "boolean" + ) + ) @property def metadata(self): - '''return a tuple of metadata Xndarray full_name''' - return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == 'meta')) + """return a tuple of metadata Xndarray full_name""" + return tuple( + sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == "meta") + ) @property def uniques(self): - '''return a tuple of unique Xndarray full_name''' - return tuple(full_name for full_name in self.namedarrays if len(self[full_name]) == 1) + """return a tuple of unique Xndarray full_name""" + return tuple( + full_name for full_name in self.namedarrays if len(self[full_name]) == 1 + ) @property def additionals(self): - '''return a tuple of additionals Xndarray full_name''' - return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == 'additional')) + """return a tuple of additionals Xndarray full_name""" + return tuple( + sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == "additional") + ) def group(self, grp): - '''return a tuple of Xndarray full_name with the same name''' + """return a tuple of Xndarray full_name with the same name""" if isinstance(grp, str): - return tuple(sorted(xnda.full_name for xnda in self.xnd - if grp in (xnda.name, xnda.full_name))) + return tuple( + sorted( + xnda.full_name + for xnda in self.xnd + if grp in (xnda.name, xnda.full_name) + ) + ) return tuple(sorted(nam for gr_nam in grp for nam in self.group(gr_nam))) def add_group(self, add_name): - '''return a tuple of Xndarray full_name with the same add_name''' - return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.add_name == add_name)) + """return a tuple of Xndarray full_name with the same add_name""" + return tuple( + sorted(xnda.full_name for xnda in self.xnd if xnda.add_name == add_name) + ) class XdatasetInterface(ABC): - ''' Xdataset interface - see Xdataset docstring''' + """Xdataset interface - see Xdataset docstring""" name: str = NotImplemented xnd: list = NotImplemented @staticmethod def read_json(jsn, **kwargs): - ''' convert json data into a Xdataset. + """convert json data into a Xdataset. *Parameters* - **convert** : boolean (default True) - If True, convert json data with non Numpy ntv_type into Xndarray with python type - ''' - option = {'convert': True} | kwargs + """ + option = {"convert": True} | kwargs jso = json.loads(jsn) if isinstance(jsn, str) else jsn value, name = Ntv.decode_json(jso)[:2] - xnd = [Xndarray.read_json({key: val}, **option) - for key, val in value.items()] + xnd = [Xndarray.read_json({key: val}, **option) for key, val in value.items()] return Xdataset(xnd, name) def to_json(self, **kwargs): - ''' convert a Xdataset into json-value. + """convert a Xdataset into json-value. *Parameters* @@ -162,22 +215,45 @@ def to_json(self, **kwargs): - **noshape** : Boolean (default True) - if True, without shape if dim < 1 - **format** : list of string (default list of 'full') - representation format of the ndarray, - ''' - notype = kwargs['notype'] if ('notype' in kwargs and isinstance(kwargs['notype'], list) and - len(kwargs['notype']) == len(self)) else [False] * len(self) - forma = kwargs['format'] if ('format' in kwargs and isinstance(kwargs['format'], list) and - len(kwargs['format']) == len(self)) else ['full'] * len(self) - noshape = kwargs.get('noshape', True) + """ + notype = ( + kwargs["notype"] + if ( + "notype" in kwargs + and isinstance(kwargs["notype"], list) + and len(kwargs["notype"]) == len(self) + ) + else [False] * len(self) + ) + forma = ( + kwargs["format"] + if ( + "format" in kwargs + and isinstance(kwargs["format"], list) + and len(kwargs["format"]) == len(self) + ) + else ["full"] * len(self) + ) + noshape = kwargs.get("noshape", True) dic_xnd = {} for xna, notyp, forma in zip(self.xnd, notype, forma): - dic_xnd |= xna.to_json(notype=notyp, novalue=kwargs.get('novalue', False), - noshape=noshape, format=forma, header=False) - return Nutil.json_ntv(self.name, 'xdataset', dic_xnd, - header=kwargs.get('header', True), - encoded=kwargs.get('encoded', False)) + dic_xnd |= xna.to_json( + notype=notyp, + novalue=kwargs.get("novalue", False), + noshape=noshape, + format=forma, + header=False, + ) + return Nutil.json_ntv( + self.name, + "xdataset", + dic_xnd, + header=kwargs.get("header", True), + encoded=kwargs.get("encoded", False), + ) def to_xarray(self, **kwargs): - '''return a xr.DataArray or a xr.Dataset from a Xdataset + """return a xr.DataArray or a xr.Dataset from a Xdataset *Parameters* @@ -185,16 +261,16 @@ def to_xarray(self, **kwargs): return a xr.DataArray - **info** : Boolean (default True) - if True, add json representation of 'relative' Xndarrays and 'data_arrays' Xndarrays in attrs - ''' + """ return XarrayConnec.xexport(self, **kwargs) @staticmethod def from_xarray(xar, **kwargs): - '''return a Xdataset from a DataArray or a Dataset''' + """return a Xdataset from a DataArray or a Dataset""" return XarrayConnec.ximport(xar, Xdataset, **kwargs) def to_scipp(self, **kwargs): - '''return a sc.DataArray or a sc.Dataset from a Xdataset + """return a sc.DataArray or a sc.Dataset from a Xdataset *Parameters* @@ -203,25 +279,25 @@ def to_scipp(self, **kwargs): - **info** : Boolean (default True) - if True return an additional DataGroup with metadata and data_arrays - **ntv_type** : Boolean (default True) - if True add ntv_type to the name - ''' + """ return ScippConnec.xexport(self, **kwargs) @staticmethod def from_scipp(sci, **kwargs): - '''return a Xdataset from a scipp object DataArray, Dataset or DataGroup''' + """return a Xdataset from a scipp object DataArray, Dataset or DataGroup""" return ScippConnec.ximport(sci, Xdataset, **kwargs) def to_nddata(self, **kwargs): - '''return a NDData from a Xdataset''' + """return a NDData from a Xdataset""" return AstropyNDDataConnec.xexport(self, **kwargs) @staticmethod def from_nddata(ndd, **kwargs): - '''return a Xdataset from a NDData''' + """return a Xdataset from a NDData""" return AstropyNDDataConnec.ximport(ndd, Xdataset, **kwargs) def to_dataframe(self, **kwargs): - '''return a pd.DataFrame from a Xdataset + """return a pd.DataFrame from a Xdataset *Parameters* @@ -229,22 +305,22 @@ def to_dataframe(self, **kwargs): - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs - **dims**: list of string (default None) - order of dimensions full_name to apply - **index**: Boolean (default True) - if True, dimensions are translated into indexes - ''' + """ return PandasConnec.xexport(self, **kwargs) @staticmethod def from_dataframe(dfr, **kwargs): - '''return a Xdataset from a pd.DataFrame + """return a Xdataset from a pd.DataFrame *Parameters* - dims: list of string (default None) - order of dimensions to apply - ''' + """ return PandasConnec.ximport(dfr, Xdataset, **kwargs) class Xdataset(XdatasetCategory, XdatasetInterface): - ''' Representation of a multidimensional Dataset + """Representation of a multidimensional Dataset *Attributes :* - **name** : String - name of the Xdataset @@ -296,16 +372,16 @@ class Xdataset(XdatasetCategory, XdatasetInterface): - `to_nddata` - `from_dataframe` (static) - `to_dataframe` - ''' + """ def __init__(self, xnd=None, name=None): - '''Xdataset constructor + """Xdataset constructor - *Parameters* + *Parameters* - - **xnd** : Xdataset/Xndarray/list of Xndarray (default None), - - **name** : String (default None) - name of the Xdataset - ''' + - **xnd** : Xdataset/Xndarray/list of Xndarray (default None), + - **name** : String (default None) - name of the Xdataset + """ self.name = name match xnd: case list(): @@ -319,16 +395,21 @@ def __init__(self, xnd=None, name=None): self.xnd = [] def __repr__(self): - '''return classname and number of value''' - return (self.__class__.__name__ + '[' + str(len(self)) + ']\n' + - pprint.pformat(self.to_json(novalue=True, header=False, noshape=False))) + """return classname and number of value""" + return ( + self.__class__.__name__ + + "[" + + str(len(self)) + + "]\n" + + pprint.pformat(self.to_json(novalue=True, header=False, noshape=False)) + ) def __str__(self): - '''return json string format''' + """return json string format""" return json.dumps(self.to_json()) def __eq__(self, other): - '''equal if xnd are equal''' + """equal if xnd are equal""" for xnda in self.xnd: if xnda not in other: return False @@ -338,20 +419,20 @@ def __eq__(self, other): return True def __len__(self): - '''number of Xndarray''' + """number of Xndarray""" return len(self.xnd) def __contains__(self, item): - ''' item of xnd''' + """item of xnd""" return item in self.xnd def __getitem__(self, selec): - ''' return Xndarray or tuple of Xndarray with selec: - - string : name of a xndarray, - - integer : index of a xndarray, - - index selector : index interval - - tuple : names or index ''' - if selec is None or selec == '' or selec in ([], ()): + """return Xndarray or tuple of Xndarray with selec: + - string : name of a xndarray, + - integer : index of a xndarray, + - index selector : index interval + - tuple : names or index""" + if selec is None or selec == "" or selec in ([], ()): return self if isinstance(selec, (list, tuple)) and len(selec) == 1: selec = selec[0] @@ -364,7 +445,7 @@ def __getitem__(self, selec): return self.xnd[selec] def __delitem__(self, ind): - '''remove a Xndarray (ind is index, name or tuple of names).''' + """remove a Xndarray (ind is index, name or tuple of names).""" if isinstance(ind, int): del self.xnd[ind] elif isinstance(ind, str): @@ -375,23 +456,23 @@ def __delitem__(self, ind): del self[i] def __copy__(self): - ''' Copy all the data ''' + """Copy all the data""" return self.__class__(self) def parent(self, var): - '''return the Xndarray parent (where the full_name is equal to the name)''' + """return the Xndarray parent (where the full_name is equal to the name)""" if var.name in self.names: return self[var.name] return var def dims(self, var, json_name=False): - '''return the list of parent namedarrays of the links of a Xndarray + """return the list of parent namedarrays of the links of a Xndarray *parameters* - **var**: string - full_name of the Xndarray - **json_name**: boolean (defaut False) - if True return json_name else full_name - ''' + """ if var not in self.names: return None if self[var].add_name and not self[var].links: @@ -402,123 +483,133 @@ def dims(self, var, json_name=False): return None list_dims = [] for link in self[var].links: - list_dims += self.dims(link, json_name) if self.dims(link, - json_name) else [link] + list_dims += ( + self.dims(link, json_name) if self.dims(link, json_name) else [link] + ) return list_dims def shape_dims(self, var): - '''return a shape with the dimensions associated to the var full_name''' - return [len(self[dim]) for dim in self.dims(var) - ] if set(self.dims(var)) <= set(self.names) else None + """return a shape with the dimensions associated to the var full_name""" + return ( + [len(self[dim]) for dim in self.dims(var)] + if set(self.dims(var)) <= set(self.names) + else None + ) @property def validity(self): - '''return the validity state: 'inconsistent', 'undifined' or 'valid' ''' + """return the validity state: 'inconsistent', 'undifined' or 'valid'""" for xnda in self: - if xnda.mode in ['relative', 'inconsistent']: - return 'undefined' + if xnda.mode in ["relative", "inconsistent"]: + return "undefined" if self.undef_links or self.undef_vars: - return 'inconsistent' - return 'valid' + return "inconsistent" + return "valid" @property def xtype(self): - '''return the Xdataset type: 'meta', 'group', 'mono', 'multi' ''' - if self.metadata and not (self.additionals or self.variables or - self.namedarrays): - return 'meta' - if self.validity != 'valid': - return 'group' + """return the Xdataset type: 'meta', 'group', 'mono', 'multi'""" + if self.metadata and not ( + self.additionals or self.variables or self.namedarrays + ): + return "meta" + if self.validity != "valid": + return "group" match len(self.data_vars): case 0: - return 'group' + return "group" case 1: - return 'mono' + return "mono" case _: - return 'multi' + return "multi" @property def dic_xnd(self): - '''return a dict of Xndarray where key is the full_name''' + """return a dict of Xndarray where key is the full_name""" return {xnda.full_name: xnda for xnda in self.xnd} @property def length(self): - '''return the max length of Xndarray''' + """return the max length of Xndarray""" return max(len(xnda) for xnda in self.xnd) @property def names(self): - '''return a tuple with the Xndarray full_name''' + """return a tuple with the Xndarray full_name""" return tuple(xnda.full_name for xnda in self.xnd) @property def partition(self): - '''return a dict of Xndarray grouped with category''' + """return a dict of Xndarray grouped with category""" dic = {} - dic |= {'data_vars': list(self.data_vars)} if self.data_vars else {} - dic |= {'data_arrays': list(self.data_arrays) - } if self.data_arrays else {} - dic |= {'dimensions': list(self.dimensions)} if self.dimensions else {} - dic |= {'coordinates': list(self.coordinates) - } if self.coordinates else {} - dic |= {'additionals': list(self.additionals) - } if self.additionals else {} - dic |= {'metadata': list(self.metadata)} if self.metadata else {} - dic |= {'uniques': list(self.uniques)} if self.uniques else {} + dic |= {"data_vars": list(self.data_vars)} if self.data_vars else {} + dic |= {"data_arrays": list(self.data_arrays)} if self.data_arrays else {} + dic |= {"dimensions": list(self.dimensions)} if self.dimensions else {} + dic |= {"coordinates": list(self.coordinates)} if self.coordinates else {} + dic |= {"additionals": list(self.additionals)} if self.additionals else {} + dic |= {"metadata": list(self.metadata)} if self.metadata else {} + dic |= {"uniques": list(self.uniques)} if self.uniques else {} return dic @property def info(self): - '''return a dict with Xdataset information ''' - inf = {'name': self.name, 'xtype': self.xtype} | self.partition - inf['validity'] = self.validity - inf['length'] = len(self[self.data_vars[0]]) if self.data_vars else 0 - inf['width'] = len(self) - data = {name: {key: val for key, val in self[name].info.items() if key != 'name'} - for name in self.names} - return {'structure': {key: val for key, val in inf.items() if val}, - 'data': {key: val for key, val in data.items() if val}} + """return a dict with Xdataset information""" + inf = {"name": self.name, "xtype": self.xtype} | self.partition + inf["validity"] = self.validity + inf["length"] = len(self[self.data_vars[0]]) if self.data_vars else 0 + inf["width"] = len(self) + data = { + name: {key: val for key, val in self[name].info.items() if key != "name"} + for name in self.names + } + return { + "structure": {key: val for key, val in inf.items() if val}, + "data": {key: val for key, val in data.items() if val}, + } @property def tab_info(self): - '''return a dict with Xdataset information for tabular interface''' + """return a dict with Xdataset information for tabular interface""" info = self.info - data = info['data'] + data = info["data"] t_info = {} - if 'dimensions' in info['structure']: - t_info['dimensions'] = info['structure']['dimensions'] - t_info['data'] = {name: {key: val for key, val in data[name].items() - if key in ['shape', 'xtype', 'meta', 'links']} - for name in data} + if "dimensions" in info["structure"]: + t_info["dimensions"] = info["structure"]["dimensions"] + t_info["data"] = { + name: { + key: val + for key, val in data[name].items() + if key in ["shape", "xtype", "meta", "links"] + } + for name in data + } return t_info def to_canonical(self): - '''remove optional links of the included Xndarray''' + """remove optional links of the included Xndarray""" for name in self.names: if self[name].links in ([self[name].name], [name]): self[name].links = None for add in self.additionals: - if self[add].links in [self[self[add].name].links, - [self[add].name]]: + if self[add].links in [self[self[add].name].links, [self[add].name]]: self[add].links = None for unic in self.uniques: self[unic].links = None return self def to_ndarray(self, full_name): - '''convert a Xndarray from a Xdataset in a np.ndarray''' + """convert a Xndarray from a Xdataset in a np.ndarray""" if self.shape_dims(full_name) is None: data = self[full_name].ndarray else: data = self[full_name].darray.reshape(self.shape_dims(full_name)) - if data.dtype.name[:8] == 'datetime': - data = data.astype('datetime64[ns]') + if data.dtype.name[:8] == "datetime": + data = data.astype("datetime64[ns]") return data def to_darray(self, full_name): - '''convert a Xndarray from a Xdataset in a flattened np.ndarray''' + """convert a Xndarray from a Xdataset in a flattened np.ndarray""" data = self[full_name].darray - if data.dtype.name[:8] == 'datetime': - data = data.astype('datetime64[ns]') + if data.dtype.name[:8] == "datetime": + data = data.astype("datetime64[ns]") return data diff --git a/ntv_numpy/xndarray.py b/ntv_numpy/xndarray.py index 4365209..25f7c4f 100644 --- a/ntv_numpy/xndarray.py +++ b/ntv_numpy/xndarray.py @@ -18,7 +18,7 @@ class Xndarray: - ''' Representation of a labelled multidimensional Array + """Representation of a labelled multidimensional Array *Attributes :* - **name** : string - name of the Xndarray @@ -43,11 +43,10 @@ class Xndarray: - `to_json` - `read_json (static method)` - `set_ndarray` - ''' + """ - def __init__(self, full_name, nda=None, links=None, - meta=None): - '''Xndarray constructor. + def __init__(self, full_name, nda=None, links=None, meta=None): + """Xndarray constructor. *Parameters* @@ -56,7 +55,7 @@ def __init__(self, full_name, nda=None, links=None, - **links**: List of string (default None) - dims or other names of associated Xndarray - **ntv_type**: string (default None) - ntv_type to apply to data - **meta**: dict (default None) - information - ''' + """ if isinstance(full_name, Xndarray): self.name = full_name.name self.add_name = full_name.add_name @@ -69,18 +68,18 @@ def __init__(self, full_name, nda=None, links=None, self.links = links if links else None self.meta = meta if meta else None if self.meta is None and self.nda is None: - raise NdarrayError('A Xndarray has to have metadata or Ndarray') + raise NdarrayError("A Xndarray has to have metadata or Ndarray") def __repr__(self): - '''return classname and number of value''' - return self.__class__.__name__ + '[' + self.full_name + ']' + """return classname and number of value""" + return self.__class__.__name__ + "[" + self.full_name + "]" def __str__(self): - '''return json string format''' + """return json string format""" return json.dumps(self.to_json()) def __eq__(self, other): - ''' equal if attributes are equal''' + """equal if attributes are equal""" if self.name != other.name or self.add_name != other.add_name: return False if self.links != other.links or self.meta != other.meta: @@ -92,15 +91,15 @@ def __eq__(self, other): return self.nda == other.nda def __len__(self): - ''' len of ndarray''' + """len of ndarray""" return len(self.nda) if self.nda is not None else 0 def __contains__(self, item): - ''' item of ndarray values''' + """item of ndarray values""" return item in self.nda if self.nda is not None else None def __getitem__(self, ind): - ''' return ndarray value item''' + """return ndarray value item""" if self.nda is None: return None if isinstance(ind, tuple): @@ -108,118 +107,124 @@ def __getitem__(self, ind): return self.nda[ind] def __copy__(self): - ''' Copy all the data ''' + """Copy all the data""" return self.__class__(self) @property def darray(self): - '''return the darray of the ndarray''' + """return the darray of the ndarray""" return self.nda.darray if self.nda is not None else None @property def ndarray(self): - '''return the darray of the ndarray''' + """return the darray of the ndarray""" return self.nda.ndarray if self.nda is not None else None @property def uri(self): - '''return the uri of the ndarray''' + """return the uri of the ndarray""" return self.nda.uri if self.nda is not None else None @property def shape(self): - '''return the shape of the ndarray''' + """return the shape of the ndarray""" return self.nda.shape if self.nda is not None else None @property def ntv_type(self): - '''return the ntv_type of the ndarray''' + """return the ntv_type of the ndarray""" return self.nda.ntv_type if self.nda is not None else None @property def mode(self): - '''return the mode of the ndarray''' - return self.nda.mode if self.nda is not None else 'undefined' + """return the mode of the ndarray""" + return self.nda.mode if self.nda is not None else "undefined" @property def info(self): - ''' infos of the Xndarray''' - inf = {'name': self.full_name} - inf['length'] = len(self) + """infos of the Xndarray""" + inf = {"name": self.full_name} + inf["length"] = len(self) if self.nda: - inf['mode'] = self.mode - inf['ntvtype'] = self.ntv_type - inf['shape'] = self.shape - inf['uri'] = self.uri - inf['meta'] = self.meta - inf['xtype'] = self.xtype - inf['links'] = self.links + inf["mode"] = self.mode + inf["ntvtype"] = self.ntv_type + inf["shape"] = self.shape + inf["uri"] = self.uri + inf["meta"] = self.meta + inf["xtype"] = self.xtype + inf["links"] = self.links return {key: val for key, val in inf.items() if val} @property def xtype(self): - '''nature of the Xndarray (undefined, namedarray, variable, additional, - meta, inconsistent)''' + """nature of the Xndarray (undefined, namedarray, variable, additional, + meta, inconsistent)""" match [self.links, self.add_name, self.mode]: - case [_, _, 'inconsistent']: - return 'inconsistent' - case [_, _, 'undefined']: - return 'meta' - case [None, '', _]: - return 'namedarray' - case [_, '', _]: - return 'variable' + case [_, _, "inconsistent"]: + return "inconsistent" + case [_, _, "undefined"]: + return "meta" + case [None, "", _]: + return "namedarray" + case [_, "", _]: + return "variable" case [_, str(), _]: - return 'additional' + return "additional" case _: - return 'inconsistent' + return "inconsistent" @property def full_name(self): - '''concatenation of name and additional name''' - add_name = '.' + self.add_name if self.add_name else '' + """concatenation of name and additional name""" + add_name = "." + self.add_name if self.add_name else "" return self.name + add_name @property def json_name(self): - '''concatenation of full_name and ntv_type''' - add_ntv_type = ':' + self.ntv_type if self.ntv_type else '' + """concatenation of full_name and ntv_type""" + add_ntv_type = ":" + self.ntv_type if self.ntv_type else "" return self.full_name + add_ntv_type @staticmethod def read_json(jsn, **kwargs): - ''' convert json data into a Xndarray. + """convert json data into a Xndarray. *Parameters* - **convert** : boolean (default True) - If True, convert json data with non Numpy ntv_type into data with python type - ''' - option = {'convert': True} | kwargs + """ + option = {"convert": True} | kwargs jso = json.loads(jsn) if isinstance(jsn, str) else jsn value, full_name = Ntv.decode_json(jso)[:2] meta = links = nda = None match value: - case str(meta) | dict(meta): ... - case [list(nda)]: ... - case [list(nda), list(links)]: ... - case [list(nda), dict(meta)] | [list(nda), str(meta)]: ... - case [list(nda), list(links), dict(meta)]: ... - case [list(nda), list(links), str(meta)]: ... + case str(meta) | dict(meta): + ... + case [list(nda)]: + ... + case [list(nda), list(links)]: + ... + case [list(nda), dict(meta)] | [list(nda), str(meta)]: + ... + case [list(nda), list(links), dict(meta)]: + ... + case [list(nda), list(links), str(meta)]: + ... case _: return None nda = Ndarray.read_json(nda, **option) if nda else None return Xndarray(full_name, links=links, meta=meta, nda=nda) def set_ndarray(self, ndarray, nda_uri=True): - '''set a new ndarray (nda) and return the result (True, False) + """set a new ndarray (nda) and return the result (True, False) *Parameters* - **ndarray** : string, list, np.ndarray, Ndarray - data to include - **nda_uri** : boolean (default True) - if True, existing shape and - ntv_type are not updated (but are created if not existing)''' + ntv_type are not updated (but are created if not existing)""" ndarray = Ndarray(ndarray) if self.nda is not None: return self.nda.update(ndarray, nda_uri=nda_uri) @@ -227,7 +232,7 @@ def set_ndarray(self, ndarray, nda_uri=True): return True def to_json(self, **kwargs): - ''' convert a Xndarray into json-value. + """convert a Xndarray into json-value. *Parameters* @@ -239,22 +244,37 @@ def to_json(self, **kwargs): - **noshape** : Boolean (default True) - if True, without shape if dim < 1 - **format** : string (default 'full') - representation format of the ndarray, - **extension** : string (default None) - type extension - ''' - option = {'notype': False, 'format': 'full', - 'noshape': True, 'header': True, 'encoded': False, - 'novalue': False, 'noname': False} | kwargs - if not option['format'] in ['full', 'complete']: - option['noshape'] = False - opt_nda = option | {'header': False} + """ + option = { + "notype": False, + "format": "full", + "noshape": True, + "header": True, + "encoded": False, + "novalue": False, + "noname": False, + } | kwargs + if option["format"] not in ["full", "complete"]: + option["noshape"] = False + opt_nda = option | {"header": False} nda_str = self.nda.to_json(**opt_nda) if self.nda is not None else None lis = [nda_str, self.links, self.meta] lis = [val for val in lis if val is not None] - return Nutil.json_ntv(None if option['noname'] else self.full_name, - None if option['noname'] else 'xndarray', - lis[0] if lis == [self.meta] else lis, - header=option['header'], encoded=option['encoded']) + return Nutil.json_ntv( + None if option["noname"] else self.full_name, + None if option["noname"] else "xndarray", + lis[0] if lis == [self.meta] else lis, + header=option["header"], + encoded=option["encoded"], + ) def _to_json(self): - '''return dict of attributes''' - return {'name': self.name, 'ntv_type': self.ntv_type, 'uri': self.uri, - 'nda': self.nda, 'meta': self.meta, 'links': self.links} + """return dict of attributes""" + return { + "name": self.name, + "ntv_type": self.ntv_type, + "uri": self.uri, + "nda": self.nda, + "meta": self.meta, + "links": self.links, + } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..36633dc --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[tool.numpydoc_validation] +checks = [ + "all", + "EX01", + "SA01", + "ES01", +] + +[tool.ruff] +# E402: module level import not at top of file +extend-exclude = [ + "doc", +] +target-version = "py310" + +[tool.ruff.lint] +# E402: module level import not at top of file +extend-safe-fixes = [ + "TID252", # absolute imports +] +ignore = [ + "E402", +] diff --git a/setup.cfg b/setup.cfg index ee6a564..dadae66 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,4 +1,4 @@ [metadata] # This includes the license file(s) in the wheel. # https://wheel.readthedocs.io/en/stable/user_guide.html#including-license-files-in-the-generated-wheel-file -license_files = LICENSE.txt \ No newline at end of file +license_files = LICENSE.txt diff --git a/setup.py b/setup.py index f75fc84..5738c15 100644 --- a/setup.py +++ b/setup.py @@ -23,10 +23,11 @@ "Intended Audience :: Developers", "Topic :: Software Development :: Build Tools", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3"], + "Programming Language :: Python :: 3", + ], keywords="numpy, JSON-NTV, semantic JSON, development, environmental data, multidimensional", - packages=find_packages(include=['ntv_numpy', 'ntv_numpy.*']), - package_data={'ntv_numpy': ['*.ini']}, + packages=find_packages(include=["ntv_numpy", "ntv_numpy.*"]), + package_data={"ntv_numpy": ["*.ini"]}, python_requires=">=3.10, <4", - install_requires=['json_ntv', 'numpy', 'shapely'] + install_requires=["json_ntv", "numpy", "shapely"], ) diff --git a/tests/tests_ntv_numpy.py b/tests/tests_ntv_numpy.py index f5d2904..3d338e3 100644 --- a/tests/tests_ntv_numpy.py +++ b/tests/tests_ntv_numpy.py @@ -5,6 +5,7 @@ The `test_ntv_numpy` module contains the unit tests (class unittest) for the `Darray`, `Ndarray` and `Xndarray` classes. """ + import unittest from decimal import Decimal from datetime import date, time @@ -18,22 +19,21 @@ from ntv_numpy import Darray, Dfull, Ndarray, Xndarray, Xdataset, Dutil from ntv_numpy.xconnector import PandasConnec -import ntv_pandas as npd # activation of pandas ntv connector nd_equals = Dutil.equals -FILE = 'https://raw.githubusercontent.com/loco-philippe/ntv-numpy/master/example/ex_ndarray.ntv' +FILE = "https://raw.githubusercontent.com/loco-philippe/ntv-numpy/master/example/ex_ndarray.ntv" class TestDarray(unittest.TestCase): - '''test Darray class''' + """test Darray class""" def test_darray_simple(self): - '''test Darray''' + """test Darray""" example = [ - ([1, 2], 'Dfull'), - ([[1, 2], [0, 1]], 'Dcomplete'), - ([[10, 20], [1, 2]], 'Dfull'), - ([[[10, 20], [1, 2]], [0, 1]], 'Dcomplete') + ([1, 2], "Dfull"), + ([[1, 2], [0, 1]], "Dcomplete"), + ([[10, 20], [1, 2]], "Dfull"), + ([[[10, 20], [1, 2]], [0, 1]], "Dcomplete"), ] for index, ex in enumerate(example): @@ -41,27 +41,32 @@ def test_darray_simple(self): self.assertEqual(da.__class__.__name__, ex[1]) self.assertEqual(len(da), len(ex[0])) match ex[1]: - case 'Dfull': + case "Dfull": self.assertIsNone(da.ref) self.assertTrue(nd_equals(np.array(None), da.coding)) self.assertTrue(nd_equals(da.data, da.values)) - case 'Dcomplete': - da_full = Darray.read_json(example[index-1][0]) + case "Dcomplete": + da_full = Darray.read_json(example[index - 1][0]) self.assertIsNone(da.ref) self.assertFalse(nd_equals(np.array(None), da.coding)) self.assertTrue(nd_equals(da_full.values, da.values)) def test_darray_dtype(self): - '''test Darray''' - self.assertEqual(Darray.read_json([1, 'two'], dtype='object').to_json(), - [1, 'two']) + """test Darray""" + self.assertEqual( + Darray.read_json([1, "two"], dtype="object").to_json(), [1, "two"] + ) def test_darray_nested(self): - '''test Darray''' + """test Darray""" example = [ - np.array([np.array([1, 2], dtype='int64'), - np.array(['test1', 'test2'], dtype='str_')], - dtype='object') + np.array( + [ + np.array([1, 2], dtype="int64"), + np.array(["test1", "test2"], dtype="str_"), + ], + dtype="object", + ) ] for ex in example: @@ -74,29 +79,29 @@ def test_darray_nested(self): class TestNdarray(unittest.TestCase): - '''test Ndarray class''' + """test Ndarray class""" def test_update(self): - '''test Ndarray''' + """test Ndarray""" nda = Ndarray.read_json([[2, 2], [1, 2, 3, 4]]) nda1 = Ndarray.read_json([[2, 2], [1, 2, 3, 4]]) - self.assertTrue(nda.update(Ndarray.read_json(['ex_nda2']))) + self.assertTrue(nda.update(Ndarray.read_json(["ex_nda2"]))) self.assertTrue(nda.update(Ndarray.read_json([[2, 2], [1, 2, 3, 4]]))) self.assertEqual(nda, nda1) def test_set_array_uri(self): - '''test Ndarray''' - ndas = Ndarray.read_json([[2, 2], 'uri']) + """test Ndarray""" + ndas = Ndarray.read_json([[2, 2], "uri"]) self.assertFalse(ndas.set_array([1, 2, 3])) self.assertTrue(ndas.set_array([[1, 2], [4, 3]])) self.assertFalse(ndas.set_array([[1.1, 2], [4, 3]])) self.assertTrue(ndas.set_array([[10, 20], [40, 30]])) self.assertFalse(ndas.set_array([[10, 20], [40, 30], [40, 30]])) - self.assertTrue(ndas.set_uri('uri', no_ntv_type=True)) - self.assertEqual(ndas, Ndarray.read_json([[2, 2], 'uri'])) + self.assertTrue(ndas.set_uri("uri", no_ntv_type=True)) + self.assertEqual(ndas, Ndarray.read_json([[2, 2], "uri"])) def test_ndarray_null(self): - '''test Ndarray''' + """test Ndarray""" example = [[[], None]] for ex in example: @@ -109,36 +114,42 @@ def test_ndarray_null(self): self.assertEqual(js, ex_rt.to_json()) ex_rt = Ndarray.read_json(js, convert=False) # print(js, ex_rt.to_json(format=format)) - self.assertEqual(js[':ndarray'][1], ex_rt.to_json()[':ndarray'][1]) + self.assertEqual(js[":ndarray"][1], ex_rt.to_json()[":ndarray"][1]) # print(np.array_equal(ex_rt, arr), ex_rt, ex_rt.dtype) def test_ndarray_simple2(self): - '''test Ndarray''' - example = [[[1, 2], 'int64'], - [[1, 2], None], - [[True, False], 'boolean'], - # [['1+2j', 1], 'complex'], - [['test1', 'test2'], 'string'], - [['2022-01-01T10:05:21.0002', '2023-01-01T10:05:21.0002'], 'datetime'], - [['2022-01-01', '2023-01-01'], 'date'], - [['2022-01', '2023-01'], 'yearmonth'], - [['2022', '2023'], 'year'], - # [[1,2], 'timedelta[D]'], - [[b'abc\x09', b'abc'], 'base16'], - [[time(10, 2, 3), time(20, 2, 3)], 'time'], - [[{'one': 1}, {'two': 2}], 'object'], - [[None, None], 'null'], - [[Decimal('10.5'), Decimal('20.5')], 'decimal64'], - [[Point([1, 2]), Point([3, 4])], 'point'], - # [[Ntv.obj({':point':[1,2]}), NtvSingle(12, 'noon', 'hour')], 'ntv'], - [[LineString([[0, 0], [0, 1], [1, 1], [0, 0]]), - LineString([[0, 0], [0, 10], [10, 10], [0, 0]])], 'line'] - ] + """test Ndarray""" + example = [ + [[1, 2], "int64"], + [[1, 2], None], + [[True, False], "boolean"], + # [['1+2j', 1], 'complex'], + [["test1", "test2"], "string"], + [["2022-01-01T10:05:21.0002", "2023-01-01T10:05:21.0002"], "datetime"], + [["2022-01-01", "2023-01-01"], "date"], + [["2022-01", "2023-01"], "yearmonth"], + [["2022", "2023"], "year"], + # [[1,2], 'timedelta[D]'], + [[b"abc\x09", b"abc"], "base16"], + [[time(10, 2, 3), time(20, 2, 3)], "time"], + [[{"one": 1}, {"two": 2}], "object"], + [[None, None], "null"], + [[Decimal("10.5"), Decimal("20.5")], "decimal64"], + [[Point([1, 2]), Point([3, 4])], "point"], + # [[Ntv.obj({':point':[1,2]}), NtvSingle(12, 'noon', 'hour')], 'ntv'], + [ + [ + LineString([[0, 0], [0, 1], [1, 1], [0, 0]]), + LineString([[0, 0], [0, 10], [10, 10], [0, 0]]), + ], + "line", + ], + ] for ex in example: # print(ex[0], ex[1]) arr = Ndarray(ex[0], ntv_type=ex[1]) - for forma in ['full', 'complete']: + for forma in ["full", "complete"]: js = arr.to_json(format=forma) # print(js) ex_rt = Ndarray.read_json(js) @@ -147,12 +158,13 @@ def test_ndarray_simple2(self): self.assertEqual(js, ex_rt.to_json(format=forma)) ex_rt = Ndarray.read_json(js, convert=False) # print(js, ex_rt.to_json(format=format)) - self.assertEqual(js[':ndarray'][1], ex_rt.to_json( - format=forma)[':ndarray'][1]) + self.assertEqual( + js[":ndarray"][1], ex_rt.to_json(format=forma)[":ndarray"][1] + ) # print(np.array_equal(ex_rt, arr), ex_rt, ex_rt.dtype) if len(ex[0]) == 2: arr = Ndarray(ex[0], ntv_type=ex[1], shape=[2, 1]) - for forma in ['full', 'complete']: + for forma in ["full", "complete"]: # print(ex, format) js = arr.to_json(format=forma) # print(js) @@ -160,19 +172,38 @@ def test_ndarray_simple2(self): self.assertEqual(ex_rt, arr) def test_ndarray_nested2(self): - '''test Ndarray''' - example = [[[[1, 2], [3, 4]], 'array'], - [[np.array([1, 2], dtype='int64'), np.array( - ['test1', 'test2'], dtype='str_')], 'narray'], - [[pd.Series([1, 2, 3]), pd.Series([4, 5, 6])], 'field'], - [[pd.DataFrame({'::date': pd.Series([date(1964, 1, 1), date(1985, 2, 5)]), - 'names': ['john', 'eric']}), - pd.DataFrame({'::date': pd.Series([date(1984, 1, 1), date(1995, 2, 5)]), - 'names': ['anna', 'erich']})], 'tab'] - ] + """test Ndarray""" + example = [ + [[[1, 2], [3, 4]], "array"], + [ + [ + np.array([1, 2], dtype="int64"), + np.array(["test1", "test2"], dtype="str_"), + ], + "narray", + ], + [[pd.Series([1, 2, 3]), pd.Series([4, 5, 6])], "field"], + [ + [ + pd.DataFrame( + { + "::date": pd.Series([date(1964, 1, 1), date(1985, 2, 5)]), + "names": ["john", "eric"], + } + ), + pd.DataFrame( + { + "::date": pd.Series([date(1984, 1, 1), date(1995, 2, 5)]), + "names": ["anna", "erich"], + } + ), + ], + "tab", + ], + ] for ex in example: arr = Ndarray(ex[0], shape=[2], ntv_type=ex[1]) - for forma in ['full', 'complete']: + for forma in ["full", "complete"]: js = arr.to_json(format=forma) # print(js) ex_rt = Ndarray.read_json(js) @@ -180,25 +211,25 @@ def test_ndarray_nested2(self): # print(nd_equals(ex_rt, arr), ex_rt, ex_rt.dtype) def test_ndarray_ntvtype2(self): - '''test Ndarray''' - example = [['int64[kg]', [[1, 2], [3, 4]]], - ['int', [[1, 2], [3, 4]]], - ['json', [1, 'two']], - ['month', [1, 2]], - ['base16', ['1F23', '236A5E']], - ['duration', ['P3Y6M4DT12H30M5S', 'P3Y6M4DT12H30M']], - ['uri', ['geo:13.4125,103.86673', 'geo:13.41,103.86']], - ['email', ['John Doe ', - 'Anna Doe ']], - # ['$org.propertyID', ['NO2', 'NH3']] - ['ipv4', ['192.168.1.1', '192.168.2.5']], - [None, ['a', 's']], - # [None, 'uri'], - ['float', 'uri'], - ] + """test Ndarray""" + example = [ + ["int64[kg]", [[1, 2], [3, 4]]], + ["int", [[1, 2], [3, 4]]], + ["json", [1, "two"]], + ["month", [1, 2]], + ["base16", ["1F23", "236A5E"]], + ["duration", ["P3Y6M4DT12H30M5S", "P3Y6M4DT12H30M"]], + ["uri", ["geo:13.4125,103.86673", "geo:13.41,103.86"]], + ["email", ["John Doe ", "Anna Doe "]], + # ['$org.propertyID', ['NO2', 'NH3']] + ["ipv4", ["192.168.1.1", "192.168.2.5"]], + [None, ["a", "s"]], + # [None, 'uri'], + ["float", "uri"], + ] for ex in example: arr = Ndarray(ex[1], ntv_type=ex[0]) - for forma in ['full', 'complete']: + for forma in ["full", "complete"]: js = arr.to_json(format=forma) # print(js) ex_rt = Ndarray.read_json(js) @@ -206,44 +237,44 @@ def test_ndarray_ntvtype2(self): self.assertEqual(ex_rt, arr) def test_ndarray_uri2(self): - '''test Ndarray''' + """test Ndarray""" jsn = requests.get(FILE, allow_redirects=True, timeout=30).content.decode() # print(type(jsn), jsn) nda = Ndarray.read_json(jsn) # print(nda) - self.assertEqual(nda, Ndarray.read_json( - {':ndarray': ['int64[kg]', [2, 2], [1, 2, 3, 4]]})) - example = [['uri', 'int32', None], - ['uri', None, None], - ['uri', 'int32', [2, 2]], - ['uri', None, [2, 2]], - ] + self.assertEqual( + nda, Ndarray.read_json({":ndarray": ["int64[kg]", [2, 2], [1, 2, 3, 4]]}) + ) + example = [ + ["uri", "int32", None], + ["uri", None, None], + ["uri", "int32", [2, 2]], + ["uri", None, [2, 2]], + ] for ex in example: nda = Ndarray(ex[0], ex[1], ex[2]) self.assertEqual(Ndarray.read_json(nda.to_json()), nda) class TestXndarray(unittest.TestCase): - '''test Xndarray class''' + """test Xndarray class""" def test_xndarray_simple(self): - '''test Xndarray''' + """test Xndarray""" example = [ - {'y': [['string', ['y1', 'y2']]]}, - + {"y": [["string", ["y1", "y2"]]]}, ] for ex in example: self.assertEqual(ex, Xndarray.read_json(ex).to_json(header=False)) example = [ - {':xndarray': [['int64[kg]', [10, 20]]]}, - {':xndarray': [['month', [1, 2]]]}, - {':xndarray': [['ipv4', ['192.168.1.1', '192.168.2.5']]]}, - {':xndarray': [['json', [1, 'two', {'three': 3}]]]}, - {':xndarray': [['base16', [b'1F23', b'236A5E']]]}, - {':xndarray': [ - ['uri', ['geo:13.4125,103.86673', 'geo:13.41,103.86']]]}, - {':xndarray': [['object', FILE]]} + {":xndarray": [["int64[kg]", [10, 20]]]}, + {":xndarray": [["month", [1, 2]]]}, + {":xndarray": [["ipv4", ["192.168.1.1", "192.168.2.5"]]]}, + {":xndarray": [["json", [1, "two", {"three": 3}]]]}, + {":xndarray": [["base16", [b"1F23", b"236A5E"]]]}, + {":xndarray": [["uri", ["geo:13.4125,103.86673", "geo:13.41,103.86"]]]}, + {":xndarray": [["object", FILE]]}, ] for ex in example: # print(ex) @@ -252,28 +283,29 @@ def test_xndarray_simple(self): self.assertEqual(xnd, Xndarray.read_json(xnd.to_json())) def test_xndarray_dataset(self): - '''test Xndarray''' - example = [[{'var1': [['object', FILE], ['x', 'y']]}, 'relative', 'variable'], - [{'var1': [[FILE], ['x', 'y']]}, 'relative', 'variable'], - [{'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], - ['x', 'y']]}, 'absolute', 'variable'], - - [{'ranking': [['int', [2, 2], [1, 2, 3, 4]], ['var1']]}, - 'absolute', 'variable'], - [{'x': [['string', ['x1', 'x2']], {'test': 21}]}, - 'absolute', 'namedarray'], - [{'y': [['string', ['y1', 'y2']]]}, 'absolute', 'namedarray'], - [{'z': [['string', ['z1', 'z2']], ['x']]}, - 'absolute', 'variable'], - [{'x.mask': [['boolean', [True, False]]]}, - 'absolute', 'additional'], - [{'x.variance': [['float64', [0.1, 0.2]]]}, - 'absolute', 'additional'], - [{'z.variance': [['float64', [0.1, 0.2]]]}, - 'absolute', 'additional'], - [{'unit': 'kg'}, 'undefined', 'meta'], - [{'info': {'example': 'everything'}}, 'undefined', 'meta'], - ] + """test Xndarray""" + example = [ + [{"var1": [["object", FILE], ["x", "y"]]}, "relative", "variable"], + [{"var1": [[FILE], ["x", "y"]]}, "relative", "variable"], + [ + {"var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]]}, + "absolute", + "variable", + ], + [ + {"ranking": [["int", [2, 2], [1, 2, 3, 4]], ["var1"]]}, + "absolute", + "variable", + ], + [{"x": [["string", ["x1", "x2"]], {"test": 21}]}, "absolute", "namedarray"], + [{"y": [["string", ["y1", "y2"]]]}, "absolute", "namedarray"], + [{"z": [["string", ["z1", "z2"]], ["x"]]}, "absolute", "variable"], + [{"x.mask": [["boolean", [True, False]]]}, "absolute", "additional"], + [{"x.variance": [["float64", [0.1, 0.2]]]}, "absolute", "additional"], + [{"z.variance": [["float64", [0.1, 0.2]]]}, "absolute", "additional"], + [{"unit": "kg"}, "undefined", "meta"], + [{"info": {"example": "everything"}}, "undefined", "meta"], + ] for ex, mode, xtype in example: # print(ex) @@ -281,173 +313,230 @@ def test_xndarray_dataset(self): self.assertEqual(mode, Xndarray.read_json(ex).mode) self.assertEqual(xtype, Xndarray.read_json(ex).xtype) xa = Xndarray.read_json(ex) - for format in ['full', 'complete']: + for format in ["full", "complete"]: # print(xa.to_json(format=format)) # print(Xndarray.read_json(xa.to_json(format=format))) - self.assertEqual(xa, Xndarray.read_json( - xa.to_json(format=format))) - - example2 = [{'var1': [['object', FILE], ['x', 'y']]}, - {'var1': [[FILE], ['x', 'y']]}, - {'var2': [['float[kg]', [2, 2], [ - 10.1, 0.4, 3.4, 8.2]], ['x', 'y']]}, - {'ranking': [[[2, 2], [1, 2, 3, 4]], ['var1']]}, - {'x': [[['x1', 'x2']], {'test': 21}]}, - {'y': [[['y1', 'y2']]]}, - {'z': [[['z1', 'z2']], ['x']]}, - {'x.mask': [[[True, False]]]}, - {'x.variance': [[[0.1, 0.2]]]}, - {'z.variance': [[[0.1, 0.2]]]}, - ] + self.assertEqual(xa, Xndarray.read_json(xa.to_json(format=format))) + + example2 = [ + {"var1": [["object", FILE], ["x", "y"]]}, + {"var1": [[FILE], ["x", "y"]]}, + {"var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]]}, + {"ranking": [[[2, 2], [1, 2, 3, 4]], ["var1"]]}, + {"x": [[["x1", "x2"]], {"test": 21}]}, + {"y": [[["y1", "y2"]]]}, + {"z": [[["z1", "z2"]], ["x"]]}, + {"x.mask": [[[True, False]]]}, + {"x.variance": [[[0.1, 0.2]]]}, + {"z.variance": [[[0.1, 0.2]]]}, + ] for (ex, mode, xtype), ex2 in zip(example, example2): # print(ex, ex2) self.assertEqual(Xndarray.read_json(ex2).to_json(header=False), ex) class TestXdataset(unittest.TestCase): - '''test Xdataset class''' + """test Xdataset class""" def test_xdataset_full(self): - '''test Xdataset''' - example = {'test': { - 'var1': [[FILE], ['x', 'y']], - 'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], ['x', 'y']], - 'ranking': [[[2, 2], [1, 2, 3, 4]], ['var1']], - 'x': [[['x1', 'x2']], {'test': 21}], - 'y': [[['y1', 'y2']]], - 'z': [[['z1', 'z2']], ['x']], - 'x.mask1': [[[True, False]]], - 'x.variance': [[[0.1, 0.2]]], - 'z.variance': [[[0.1, 0.2]]], - 'unit': [[['kg']]], - 'info': {'example': 'everything'}}} - - notype = [True, False, True, True, True, - True, True, True, True, True, True] + """test Xdataset""" + example = { + "test": { + "var1": [[FILE], ["x", "y"]], + "var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]], + "ranking": [[[2, 2], [1, 2, 3, 4]], ["var1"]], + "x": [[["x1", "x2"]], {"test": 21}], + "y": [[["y1", "y2"]]], + "z": [[["z1", "z2"]], ["x"]], + "x.mask1": [[[True, False]]], + "x.variance": [[[0.1, 0.2]]], + "z.variance": [[[0.1, 0.2]]], + "unit": [[["kg"]]], + "info": {"example": "everything"}, + } + } + + notype = [True, False, True, True, True, True, True, True, True, True, True] xds = Xdataset.read_json(example) - self.assertEqual(xds.to_json( - notype=notype, noshape=True, header=False), example) - self.assertEqual(xds.dimensions, ('x', 'y')) - self.assertEqual(xds.partition, { - 'coordinates': ['ranking', 'z'], - 'data_vars': ['var1', 'var2'], 'uniques': ['unit'], 'metadata': ['info'], - 'additionals': ['x.mask1', 'x.variance', 'z.variance'], 'dimensions': ['x', 'y']}) + self.assertEqual( + xds.to_json(notype=notype, noshape=True, header=False), example + ) + self.assertEqual(xds.dimensions, ("x", "y")) + self.assertEqual( + xds.partition, + { + "coordinates": ["ranking", "z"], + "data_vars": ["var1", "var2"], + "uniques": ["unit"], + "metadata": ["info"], + "additionals": ["x.mask1", "x.variance", "z.variance"], + "dimensions": ["x", "y"], + }, + ) xdim = Xdataset(xds[xds.dimensions]) - self.assertEqual(xdim.to_json(novalue=True, noshape=True), {':xdataset': { - 'x': [['string', ['-']], {'test': 21}], - 'y': [['string', ['-']]]}}) + self.assertEqual( + xdim.to_json(novalue=True, noshape=True), + { + ":xdataset": { + "x": [["string", ["-"]], {"test": 21}], + "y": [["string", ["-"]]], + } + }, + ) def test_xdataset_info(self): - '''test Xdataset''' - xd = Xdataset([Xndarray('example', np.array(['x1', 'x2']))], 'test') - self.assertEqual(xd.info, { - 'structure': {'name': 'test', 'xtype': 'group', - 'validity': 'valid', 'data_arrays': ['example'], 'width': 1}, - 'data': {'example': {'length': 2, 'mode': 'absolute', - 'ntvtype': 'string', 'shape': [2], 'xtype': 'namedarray'}}}) - - example = {'test': { - 'var1': [[FILE], ['x', 'y']], - 'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], ['x', 'y']], - 'ranking': [[[2, 2], [1, 2, 3, 4]], ['var2']], - 'x': [[['x1', 'x2']], {'test': 21}], - 'y': [[['y1', 'y2']]], - 'z': [[['z1', 'z2']], ['x']], - 'z_bis': [[['z1_bis', 'z2_bis']]], - 'x.mask1': [[[True, False]], ['x']], - 'x.variance': [[[0.1, 0.2]], ['x']], - 'z.variance': [[[0.1, 0.2]], ['x']], - 'unit': [[['kg']]], - 'info': {'example': 'everything'}}} + """test Xdataset""" + xd = Xdataset([Xndarray("example", np.array(["x1", "x2"]))], "test") + self.assertEqual( + xd.info, + { + "structure": { + "name": "test", + "xtype": "group", + "validity": "valid", + "data_arrays": ["example"], + "width": 1, + }, + "data": { + "example": { + "length": 2, + "mode": "absolute", + "ntvtype": "string", + "shape": [2], + "xtype": "namedarray", + } + }, + }, + ) + + example = { + "test": { + "var1": [[FILE], ["x", "y"]], + "var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]], + "ranking": [[[2, 2], [1, 2, 3, 4]], ["var2"]], + "x": [[["x1", "x2"]], {"test": 21}], + "y": [[["y1", "y2"]]], + "z": [[["z1", "z2"]], ["x"]], + "z_bis": [[["z1_bis", "z2_bis"]]], + "x.mask1": [[[True, False]], ["x"]], + "x.variance": [[[0.1, 0.2]], ["x"]], + "z.variance": [[[0.1, 0.2]], ["x"]], + "unit": [[["kg"]]], + "info": {"example": "everything"}, + } + } xd = Xdataset.read_json(example) - self.assertEqual(xd.info['structure'], { - 'name': 'test', 'xtype': 'group', - 'data_vars': ['var1', 'var2'], - 'data_arrays': ['z_bis'], - 'dimensions': ['x', 'y'], - 'coordinates': ['ranking', 'z'], - 'additionals': ['x.mask1', 'x.variance', 'z.variance'], - 'metadata': ['info'], - 'uniques': ['unit'], - 'validity': 'undefined', 'width': 12}) - - del xd[('var1', 'z_bis')] - self.assertEqual(xd.info['structure'], { - 'name': 'test', 'xtype': 'mono', - 'data_vars': ['var2'], - 'dimensions': ['x', 'y'], - 'length': 4, - 'coordinates': ['ranking', 'z'], - 'additionals': ['x.mask1', 'x.variance', 'z.variance'], - 'metadata': ['info'], - 'uniques': ['unit'], - 'validity': 'valid', 'width': 10}) - - example = {'test': { - 'var1': [['float[m3]', [2, 2], 'path/var1.ntv'], ['x', 'y']], - 'var2': [['float[kg]', [2, 2], 'path/var2.ntv'], ['x', 'y']], - 'ranking': [[[2, 2], 'path/ranking.ntv'], ['var2']], - 'x': [['path/x.ntv'], {'test': 21}], - 'y': [['path/y.ntv']], - 'z': [['path/z.ntv'], ['x']], - 'z_bis': [['path/z_bis.ntv']], - 'x.mask1': [['path/x.mask1.ntv'], ['x']], - 'x.variance': [['path/x.variance.ntv'], ['x']], - 'z.variance': [['path/z.variance.ntv'], ['x']], - 'info': {'path': 'https://github.com/loco-philippe/ntv-numpy/tree/main/example/'}}} + self.assertEqual( + xd.info["structure"], + { + "name": "test", + "xtype": "group", + "data_vars": ["var1", "var2"], + "data_arrays": ["z_bis"], + "dimensions": ["x", "y"], + "coordinates": ["ranking", "z"], + "additionals": ["x.mask1", "x.variance", "z.variance"], + "metadata": ["info"], + "uniques": ["unit"], + "validity": "undefined", + "width": 12, + }, + ) + + del xd[("var1", "z_bis")] + self.assertEqual( + xd.info["structure"], + { + "name": "test", + "xtype": "mono", + "data_vars": ["var2"], + "dimensions": ["x", "y"], + "length": 4, + "coordinates": ["ranking", "z"], + "additionals": ["x.mask1", "x.variance", "z.variance"], + "metadata": ["info"], + "uniques": ["unit"], + "validity": "valid", + "width": 10, + }, + ) + + example = { + "test": { + "var1": [["float[m3]", [2, 2], "path/var1.ntv"], ["x", "y"]], + "var2": [["float[kg]", [2, 2], "path/var2.ntv"], ["x", "y"]], + "ranking": [[[2, 2], "path/ranking.ntv"], ["var2"]], + "x": [["path/x.ntv"], {"test": 21}], + "y": [["path/y.ntv"]], + "z": [["path/z.ntv"], ["x"]], + "z_bis": [["path/z_bis.ntv"]], + "x.mask1": [["path/x.mask1.ntv"], ["x"]], + "x.variance": [["path/x.variance.ntv"], ["x"]], + "z.variance": [["path/z.variance.ntv"], ["x"]], + "info": { + "path": "https://github.com/loco-philippe/ntv-numpy/tree/main/example/" + }, + } + } xd = Xdataset.read_json(example) - self.assertEqual(xd.info['structure'], { - 'name': 'test', 'xtype': 'group', - 'data_vars': ['var1', 'var2'], - 'data_arrays': ['z_bis'], - 'dimensions': ['x', 'y'], - 'coordinates': ['ranking', 'z'], - 'additionals': ['x.mask1', 'x.variance', 'z.variance'], - 'metadata': ['info'], - 'validity': 'undefined', 'width': 11}) + self.assertEqual( + xd.info["structure"], + { + "name": "test", + "xtype": "group", + "data_vars": ["var1", "var2"], + "data_arrays": ["z_bis"], + "dimensions": ["x", "y"], + "coordinates": ["ranking", "z"], + "additionals": ["x.mask1", "x.variance", "z.variance"], + "metadata": ["info"], + "validity": "undefined", + "width": 11, + }, + ) class TestXdatasetXarrayScipp(unittest.TestCase): - '''test Scipp interface''' + """test Scipp interface""" def test_xdataset_dataarray(self): - '''test Scipp''' + """test Scipp""" examples = [ - {'test': { - 'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], ['x', 'y']], - 'unit': 'kg', 'info': {'example': 'everything'}, - 'ranking': [[[2, 2], [1, 2, 3, 4]], ['var2']], # !!! - 'x': [[['x1', 'x2']], {'test': 21}], - 'y': [['date', ['2021-01-01', '2022-02-02']]], - 'z': [[['z1', 'z2']], ['x']], - # 'z_bis': [[['z1_bis', 'z2_bis']]], - 'x.mask1': [[[True, False]]], - 'x.variance': [[[0.1, 0.2]]], - 'z.variance': [[[0.1, 0.2]]] - }}, - {'test': { - 'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], ['x', 'y']], - 'var2.variance': [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], - 'var2.mask1': [[[True, False]], ['x']], - 'var2.mask2': [[[2, 2], [True, False, False, True]]], - - 'ranking': [['month', [2, 2], [1, 2, 3, 4]], ['var2']], # !!! - - - 'x': [['string', ['23F0AE', '578B98']], {'test': 21}], - 'x.mask1': [[[True, False]]], - - 'y': [['date', ['2021-01-01', '2022-02-02']]], - - 'z': [['float', [10, 20]], ['x']], - # 'z_bis': [[['z1_bis', 'z2_bis']]], - 'z.variance': [[[0.1, 0.2]]], - - 'unit': [[['kg']]], - 'info': {'example': 'everything'} - }}] + { + "test": { + "var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]], + "unit": "kg", + "info": {"example": "everything"}, + "ranking": [[[2, 2], [1, 2, 3, 4]], ["var2"]], # !!! + "x": [[["x1", "x2"]], {"test": 21}], + "y": [["date", ["2021-01-01", "2022-02-02"]]], + "z": [[["z1", "z2"]], ["x"]], + # 'z_bis': [[['z1_bis', 'z2_bis']]], + "x.mask1": [[[True, False]]], + "x.variance": [[[0.1, 0.2]]], + "z.variance": [[[0.1, 0.2]]], + } + }, + { + "test": { + "var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]], + "var2.variance": [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], + "var2.mask1": [[[True, False]], ["x"]], + "var2.mask2": [[[2, 2], [True, False, False, True]]], + "ranking": [["month", [2, 2], [1, 2, 3, 4]], ["var2"]], # !!! + "x": [["string", ["23F0AE", "578B98"]], {"test": 21}], + "x.mask1": [[[True, False]]], + "y": [["date", ["2021-01-01", "2022-02-02"]]], + "z": [["float", [10, 20]], ["x"]], + # 'z_bis': [[['z1_bis', 'z2_bis']]], + "z.variance": [[[0.1, 0.2]]], + "unit": [[["kg"]]], + "info": {"example": "everything"}, + } + }, + ] for example in examples: xd = Xdataset.read_json(example) xd2 = Xdataset.from_xarray(xd.to_xarray(dataset=False)) @@ -455,23 +544,40 @@ def test_xdataset_dataarray(self): xd2 = Xdataset.from_xarray(xd.to_xarray()) self.assertEqual(xd, xd2) - examples = [xr.DataArray(np.array([1, 2, 3, 4]).reshape([2, 2])), - xr.Dataset({'var': (['date', 'y'], np.array([1, 2, 3, 4]).reshape([2, 2]))}, - coords={'date': np.array(['2021-02-04', '2022-02-04'], - dtype='datetime64[ns]'), - 'y': np.array([10, 20])}), - xr.Dataset({'var': (['date', 'y'], np.array([1, 2, 3, 4]).reshape([2, 2]))}, - coords={'date': - (['date'], np.array(['2021-02-04', '2022-02-04'], - dtype='datetime64[ns]'), - {'ntv_type': 'date'}), - 'y': np.array([10, 20])}), - xr.Dataset({'var': (['date', 'y'], np.array([1, 2, 3, 4]).reshape([2, 2]))}, - coords={'date': (['date'], - np.array(['2021-02-04', '2022-02-04'], - dtype='datetime64[ns]'), - {'ntv_type': 'date'}), - 'y': np.array([Point([1, 2]), Point([3, 4])])})] + examples = [ + xr.DataArray(np.array([1, 2, 3, 4]).reshape([2, 2])), + xr.Dataset( + {"var": (["date", "y"], np.array([1, 2, 3, 4]).reshape([2, 2]))}, + coords={ + "date": np.array( + ["2021-02-04", "2022-02-04"], dtype="datetime64[ns]" + ), + "y": np.array([10, 20]), + }, + ), + xr.Dataset( + {"var": (["date", "y"], np.array([1, 2, 3, 4]).reshape([2, 2]))}, + coords={ + "date": ( + ["date"], + np.array(["2021-02-04", "2022-02-04"], dtype="datetime64[ns]"), + {"ntv_type": "date"}, + ), + "y": np.array([10, 20]), + }, + ), + xr.Dataset( + {"var": (["date", "y"], np.array([1, 2, 3, 4]).reshape([2, 2]))}, + coords={ + "date": ( + ["date"], + np.array(["2021-02-04", "2022-02-04"], dtype="datetime64[ns]"), + {"ntv_type": "date"}, + ), + "y": np.array([Point([1, 2]), Point([3, 4])]), + }, + ), + ] for xar in examples: xd = Xdataset.from_xarray(xar) @@ -484,42 +590,42 @@ def test_xdataset_dataarray(self): self.assertEqual(xd.to_json(), xd2.to_json()) def test_xdataset_scipp(self): - '''test Scipp''' - examples = [{ # 'test': { - 'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], ['x', 'y']], - 'var2.variance': [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], - 'var2.mask1': [[[True, False]], ['x']], - 'var2.mask2': [[[2, 2], [True, False, False, True]]], - - 'ranking': [['month', [2, 2], [1, 2, 3, 4]], ['var2']], - - - 'x': [['string', ['23F0AE', '578B98']]], # , {'test': 21}], - # 'x.mask1': [[[True, False]]], - - 'y': [['date', ['2021-01-01', '2022-02-02']]], - - 'z': [['float', [10, 20]], ['x']], - # 'z_bis': [[['z1_bis', 'z2_bis']]], - 'z.variance': [['float', [0.1, 0.2]]], - - # 'unit': 'kg', - # 'info': {'example': 'everything'} - }, # }, - {'x': [['int32', [10, 20]]], - 'y': [['string', ['a', 'b', 'c']]], - 'z': [['int32', [1, 2, 3]]], - 'year': [['int32', [2020, 2021]]], - 'point': [['string', [3, 2], ['pt1', 'pt2', 'pt3', 'pt4', 'pt5', 'pt6']], - ['y', 'x']], - 'along_x': [['float64', [-1.18, -0.74]], ['x']], - 'foo': [['float64', [2, 3, 3, 2], list(range(36))], - ['x', 'y', 'z', 'year']]} + """test Scipp""" + examples = [ + { # 'test': { + "var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]], + "var2.variance": [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], + "var2.mask1": [[[True, False]], ["x"]], + "var2.mask2": [[[2, 2], [True, False, False, True]]], + "ranking": [["month", [2, 2], [1, 2, 3, 4]], ["var2"]], + "x": [["string", ["23F0AE", "578B98"]]], # , {'test': 21}], + # 'x.mask1': [[[True, False]]], + "y": [["date", ["2021-01-01", "2022-02-02"]]], + "z": [["float", [10, 20]], ["x"]], + # 'z_bis': [[['z1_bis', 'z2_bis']]], + "z.variance": [["float", [0.1, 0.2]]], + # 'unit': 'kg', + # 'info': {'example': 'everything'} + }, # }, + { + "x": [["int32", [10, 20]]], + "y": [["string", ["a", "b", "c"]]], + "z": [["int32", [1, 2, 3]]], + "year": [["int32", [2020, 2021]]], + "point": [ + ["string", [3, 2], ["pt1", "pt2", "pt3", "pt4", "pt5", "pt6"]], + ["y", "x"], + ], + "along_x": [["float64", [-1.18, -0.74]], ["x"]], + "foo": [ + ["float64", [2, 3, 3, 2], list(range(36))], + ["x", "y", "z", "year"], + ], + }, ] for example in examples: xd = Xdataset.read_json(example) - xd2 = Xdataset.from_scipp( - xd.to_scipp(dataset=False, info=False)) + xd2 = Xdataset.from_scipp(xd.to_scipp(dataset=False, info=False)) self.assertEqual(xd, xd2) xd2 = Xdataset.from_scipp(xd.to_scipp(dataset=False)) self.assertEqual(xd, xd2) @@ -529,30 +635,27 @@ def test_xdataset_scipp(self): self.assertEqual(xd, xd2) def test_xdataset_mixte(self): - '''test Scipp''' - examples = [{'test:xdataset': { - 'var1': [[FILE], ['x', 'y']], - 'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], ['x', 'y']], - 'var2.variance': [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], - 'var2.mask1': [[[True, False]], ['x']], - 'var2.mask2': [[[2, 2], [True, False, False, True]]], - - 'ranking': [['month', [2, 2], [1, 2, 3, 4]], ['var2']], # !!! - - - 'x': [['string', ['23F0AE', '578B98']]], # , {'test': 21}], - 'x.mask1': [[[True, False]]], - - 'y': [['date', ['2021-01-01', '2022-02-02']]], - - 'z': [['float', [10, 20]], ['x']], - 'z_bis': [[['z1_bis', 'z2_bis']]], - 'z.uncertainty': [[[0.1, 0.2]]], - 'z.variance': [['float', [0.1, 0.2]]], - - 'info': {'example': 'everything'}, - 'location': [[['paris']]] - }} + """test Scipp""" + examples = [ + { + "test:xdataset": { + "var1": [[FILE], ["x", "y"]], + "var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]], + "var2.variance": [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], + "var2.mask1": [[[True, False]], ["x"]], + "var2.mask2": [[[2, 2], [True, False, False, True]]], + "ranking": [["month", [2, 2], [1, 2, 3, 4]], ["var2"]], # !!! + "x": [["string", ["23F0AE", "578B98"]]], # , {'test': 21}], + "x.mask1": [[[True, False]]], + "y": [["date", ["2021-01-01", "2022-02-02"]]], + "z": [["float", [10, 20]], ["x"]], + "z_bis": [[["z1_bis", "z2_bis"]]], + "z.uncertainty": [[[0.1, 0.2]]], + "z.variance": [["float", [0.1, 0.2]]], + "info": {"example": "everything"}, + "location": [[["paris"]]], + } + } ] for example in examples: xd = Xdataset.read_json(example) @@ -565,19 +668,28 @@ def test_xdataset_mixte(self): class TestXdatasetPandas(unittest.TestCase): - '''test pandas interface''' + """test pandas interface""" def test_xdataset_dataframe(self): - '''test pandas interface''' - ds = xr.Dataset({"foo": (("x", "y", "z", "year"), np.random.randn(2, 3, 3, 2))}, - coords={ - "x": [10, 20], "y": ["a", "b", "c"], "z": [1, 2, 3], "year": [2020, 2021], - "point": (("x", "y"), - np.array(["pt1", "pt2", "pt3", "pt4", "pt5", "pt6"]).reshape(2, 3)), - "along_x": ("x", np.random.randn(2)), "scalar": 123}) + """test pandas interface""" + ds = xr.Dataset( + {"foo": (("x", "y", "z", "year"), np.random.randn(2, 3, 3, 2))}, + coords={ + "x": [10, 20], + "y": ["a", "b", "c"], + "z": [1, 2, 3], + "year": [2020, 2021], + "point": ( + ("x", "y"), + np.array(["pt1", "pt2", "pt3", "pt4", "pt5", "pt6"]).reshape(2, 3), + ), + "along_x": ("x", np.random.randn(2)), + "scalar": 123, + }, + ) xdt = Xdataset.from_xarray(ds) df = ds.to_dataframe().reset_index() - dimensions = ['x', 'y', 'z', 'year'] + dimensions = ["x", "y", "z", "year"] for name in xdt.names[:]: # tab = xdt.to_tab_array(name, dimensions) tab = PandasConnec._to_np_series(xdt, name, dimensions) @@ -589,55 +701,47 @@ def test_xdataset_dataframe(self): self.assertEqual(xds, xdt) def test_xdataset_multidim(self): - '''test pandas interface''' - example = {'test:xdataset': { - 'var1': [[FILE], ['x', 'y']], - 'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], ['x', 'y']], - 'var2.variance': [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], - 'var2.mask1': [[[True, False]], ['x']], - 'var2.mask2': [[[2, 2], [True, False, False, True]]], - - 'ranking': [['month', [2, 2], [1, 2, 3, 4]], ['var2']], - - - 'x': [['string', ['23F0AE', '578B98']]], # , {'test': 21}], - 'x.mask1': [[[True, False]]], - - 'y': [['date', ['2021-01-01', '2022-02-02']]], - - 'z': [['float', [10, 20]], ['x']], - 'z_bis': [[['z1_bis', 'z2_bis']]], - 'z.uncertainty': [[[0.1, 0.2]]], - 'z.variance': [['float', [0.1, 0.2]]], - - 'info': {'example': 'everything'}, - 'location': 'paris' - }} + """test pandas interface""" + example = { + "test:xdataset": { + "var1": [[FILE], ["x", "y"]], + "var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]], + "var2.variance": [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], + "var2.mask1": [[[True, False]], ["x"]], + "var2.mask2": [[[2, 2], [True, False, False, True]]], + "ranking": [["month", [2, 2], [1, 2, 3, 4]], ["var2"]], + "x": [["string", ["23F0AE", "578B98"]]], # , {'test': 21}], + "x.mask1": [[[True, False]]], + "y": [["date", ["2021-01-01", "2022-02-02"]]], + "z": [["float", [10, 20]], ["x"]], + "z_bis": [[["z1_bis", "z2_bis"]]], + "z.uncertainty": [[[0.1, 0.2]]], + "z.variance": [["float", [0.1, 0.2]]], + "info": {"example": "everything"}, + "location": "paris", + } + } xd = Xdataset.read_json(example) df = xd.to_dataframe() xd2 = Xdataset.from_dataframe(df) self.assertEqual(xd, xd2) - example = {':xdataset': { - 'var2': [['float[kg]', [2, 2], [10.1, 0.4, 3.4, 8.2]], ['x', 'y']], - 'var2.variance': [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], - 'var2.mask1': [[[True, False]], ['x']], - 'var2.mask2': [[[2, 2], [True, False, False, True]]], - - 'ranking': [['month', [2, 2], [1, 2, 3, 4]], ['x', 'y']], - - - 'x': [['string', ['23F0AE', '578B98']]], # , {'test': 21}], - 'x.mask1': [[[True, False]]], - - 'y': [['date', ['2021-01-01', '2022-02-02']]], - - 'z': [['float', [10, 20]], ['x']], - 'z.uncertainty': [[[0.1, 0.2]]], - 'z.variance': [['float', [0.1, 0.2]]], - - 'location': [['string', ['paris']]] - }} + example = { + ":xdataset": { + "var2": [["float[kg]", [2, 2], [10.1, 0.4, 3.4, 8.2]], ["x", "y"]], + "var2.variance": [[[2, 2], [0.1, 0.2, 0.3, 0.4]]], + "var2.mask1": [[[True, False]], ["x"]], + "var2.mask2": [[[2, 2], [True, False, False, True]]], + "ranking": [["month", [2, 2], [1, 2, 3, 4]], ["x", "y"]], + "x": [["string", ["23F0AE", "578B98"]]], # , {'test': 21}], + "x.mask1": [[[True, False]]], + "y": [["date", ["2021-01-01", "2022-02-02"]]], + "z": [["float", [10, 20]], ["x"]], + "z.uncertainty": [[[0.1, 0.2]]], + "z.variance": [["float", [0.1, 0.2]]], + "location": [["string", ["paris"]]], + } + } xd = Xdataset.read_json(example) df = xd.to_dataframe() xd2 = Xdataset.from_dataframe(df) @@ -647,58 +751,105 @@ def test_xdataset_multidim(self): self.assertEqual(xd2, xd3) def test_xdataset_multipart(self): - '''test pandas interface''' - fruits = {'plants': ['fruit', 'fruit', 'fruit', 'fruit', 'vegetable', - 'vegetable', 'vegetable', 'vegetable'], - 'plts': ['fr', 'fr', 'fr', 'fr', 've', 've', 've', 've'], - 'quantity': ['1 kg', '10 kg', '1 kg', '10 kg', '1 kg', - '10 kg', '1 kg', '10 kg'], - 'product': ['apple', 'apple', 'orange', 'orange', 'peppers', - 'peppers', 'carrot', 'carrot'], - 'price': [1, 10, 2, 20, 1.5, 15, 1.5, 20], - 'price level': ['low', 'low', 'high', 'high', 'low', 'low', - 'high', 'high'], - 'group': ['fruit 1', 'fruit 10', 'fruit 1', 'veget', - 'veget', 'veget', 'veget', 'veget'], - 'id': [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008], - 'supplier': ["sup1", "sup1", "sup1", "sup2", "sup2", - "sup2", "sup2", "sup1"], - 'location': ["fr", "gb", "es", "ch", "gb", "fr", "es", "ch"], - 'valid': ["ok", "ok", "ok", "ok", "ok", "ok", "ok", "ok"]} + """test pandas interface""" + fruits = { + "plants": [ + "fruit", + "fruit", + "fruit", + "fruit", + "vegetable", + "vegetable", + "vegetable", + "vegetable", + ], + "plts": ["fr", "fr", "fr", "fr", "ve", "ve", "ve", "ve"], + "quantity": [ + "1 kg", + "10 kg", + "1 kg", + "10 kg", + "1 kg", + "10 kg", + "1 kg", + "10 kg", + ], + "product": [ + "apple", + "apple", + "orange", + "orange", + "peppers", + "peppers", + "carrot", + "carrot", + ], + "price": [1, 10, 2, 20, 1.5, 15, 1.5, 20], + "price level": ["low", "low", "high", "high", "low", "low", "high", "high"], + "group": [ + "fruit 1", + "fruit 10", + "fruit 1", + "veget", + "veget", + "veget", + "veget", + "veget", + ], + "id": [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008], + "supplier": [ + "sup1", + "sup1", + "sup1", + "sup2", + "sup2", + "sup2", + "sup2", + "sup1", + ], + "location": ["fr", "gb", "es", "ch", "gb", "fr", "es", "ch"], + "valid": ["ok", "ok", "ok", "ok", "ok", "ok", "ok", "ok"], + } df1 = pd.DataFrame(fruits) a_df = df1.npd.analysis(distr=True) xdt = Xdataset.from_dataframe(df1) df3 = xdt.to_dataframe(ntv_type=False).reset_index() - df2 = df1.sort_values(a_df.partitions(mode='id') - [0]).reset_index(drop=True) - df4 = df3.sort_values(a_df.partitions(mode='id')[ - 0]).reset_index(drop=True)[df2.columns] + df2 = df1.sort_values(a_df.partitions(mode="id")[0]).reset_index(drop=True) + df4 = df3.sort_values(a_df.partitions(mode="id")[0]).reset_index(drop=True)[ + df2.columns + ] self.assertTrue(df4.equals(df2)) def test_xdataset_unidim(self): - '''test pandas interface''' - simple = {'a': [1, 2, 3, 4, 4], - 'b': [10, 20, 30, 40, 40], - # 'b2': [10,20,30,40,40], - 'c': [1, 1, 3, 4, 4], - 'd': [1, 1, 1, 4, 4], - 'e': [1, 1, 1, 1, 1]} + """test pandas interface""" + simple = { + "a": [1, 2, 3, 4, 4], + "b": [10, 20, 30, 40, 40], + # 'b2': [10,20,30,40,40], + "c": [1, 1, 3, 4, 4], + "d": [1, 1, 1, 4, 4], + "e": [1, 1, 1, 1, 1], + } df1 = pd.DataFrame(simple) - df3 = Xdataset.from_dataframe(df1).to_dataframe( - ntv_type=False)[df1.columns] + df3 = Xdataset.from_dataframe(df1).to_dataframe(ntv_type=False)[df1.columns] self.assertTrue(df3.equals(df1)) - simple = {'a': [1, 2, 3, 4, 5], - 'b': [10, 20, 30, 40, 50], - 'b2': [10, 20, 30, 40, 40], - 'c': [1, 1, 3, 4, 4], - 'd': [1, 1, 1, 4, 4], - 'e': [1, 1, 1, 1, 1]} + simple = { + "a": [1, 2, 3, 4, 5], + "b": [10, 20, 30, 40, 50], + "b2": [10, 20, 30, 40, 40], + "c": [1, 1, 3, 4, 4], + "d": [1, 1, 1, 4, 4], + "e": [1, 1, 1, 1, 1], + } df1 = pd.DataFrame(simple) - df3 = Xdataset.from_dataframe(df1).to_dataframe( - ntv_type=False).reset_index()[df1.columns] + df3 = ( + Xdataset.from_dataframe(df1) + .to_dataframe(ntv_type=False) + .reset_index()[df1.columns] + ) self.assertTrue(df3.equals(df1)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main(verbosity=2)