From d2a093c8e3cd715fa0783adcbed79716bca93151 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 4 Dec 2023 15:28:09 +0000 Subject: [PATCH 1/9] Add install config for optional dependencies --- Makefile | 3 +- docs/development.rst | 14 +++- docs/install.rst | 54 +++++++++++++- earthkit/data/readers/bufr/bufr.py | 12 +++- earthkit/data/readers/bufr/pandas.py | 4 +- earthkit/data/readers/geojson.py | 3 +- earthkit/data/readers/odb.py | 8 +-- earthkit/data/sources/ads.py | 5 +- earthkit/data/sources/cds.py | 5 +- earthkit/data/sources/ecmwf_open_data.py | 8 ++- earthkit/data/sources/fdb.py | 9 ++- earthkit/data/sources/mars.py | 6 +- earthkit/data/sources/polytope.py | 9 +-- earthkit/data/sources/wekeo.py | 6 +- earthkit/data/sources/wekeocds.py | 6 +- earthkit/data/testing.py | 31 ++++---- earthkit/data/translators/pandas.py | 3 +- earthkit/data/utils/importer.py | 80 +++++++++++++++++++++ earthkit/data/utils/projections/__init__.py | 25 +++---- setup.cfg | 34 ++++++--- tests/translators/test_translators.py | 5 +- tests/utils/test_projections.py | 9 +-- 22 files changed, 254 insertions(+), 85 deletions(-) create mode 100644 earthkit/data/utils/importer.py diff --git a/Makefile b/Makefile index db046484..60026f43 100644 --- a/Makefile +++ b/Makefile @@ -12,10 +12,9 @@ qa: pre-commit run --all-files unit-tests: - python -m pytest -vv -m 'not notebook and not no_cache_init and not plugin' --cov=. --cov-report=$(COV_REPORT) + python -m pytest -vv -m 'not notebook and not no_cache_init' --cov=. --cov-report=$(COV_REPORT) python -m pytest -v -m "notebook" python -m pytest --forked -vv -m 'no_cache_init' - python -m pytest -v -m "plugin" # type-check: # python -m mypy . diff --git a/docs/development.rst b/docs/development.rst index 38ab61d0..eaca6ecf 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -36,7 +36,19 @@ To run the test suite, you can use the following command: .. code-block:: shell - pytest + make unit-tests + +Please note this will not run any of the tests based on remote services e.g. :ref:`data-sources-mars`. These are disabled by default because they can take a very long time to complete or just hang. To enable all these tests you need to run: + +.. code-block:: shell + + pytest -E long -v + +If just want to run e.g. the :ref:`data-sources-cds` tests you can use: + +.. code-block:: shell + + pytest -E long -v -k cds Build documentation diff --git a/docs/install.rst b/docs/install.rst index f8d6ae48..262c41e4 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -1,8 +1,11 @@ Installation ============ -Installing earthkit-data ----------------------------- +Installing from PyPI +------------------------------------ + +Minimal installation ++++++++++++++++++++++++++ Install **earthkit-data** with python3 (>= 3.8) and ``pip`` as follows: @@ -10,7 +13,52 @@ Install **earthkit-data** with python3 (>= 3.8) and ``pip`` as follows: python3 -m pip install earthkit-data -Alternatively, install via ``conda`` with: +The package installed like this is **minimal** supporting only GRIB and NetCDF data and cannot access remote services other than URLs. If you want to use more data types or remote services you need to install the optional Python packages. + +Installing all the optional packages +++++++++++++++++++++++++++++++++++++++++ + +You can install **earthkit-data** with all the optional packages in one go by using: + +.. code-block:: bash + + python3 -m pip install .earthkit-data[all] + +Please note in **zsh** you need to use quotes around the square brackets: + +.. code-block:: bash + + python3 -m pip install ".earthkit-data[all]" + + +Installing individual optional packages ++++++++++++++++++++++++++++++++++++++++++ + +Alternatively, you can install the following components individually: + + - mars: provides access to the :ref:`data-sources-mars` source + - cds: provides access to the :ref:`data-sources-cds` and :ref:`data-sources-ads` sources + - ecmwf-opendata: provides access to the :ref:`data-sources-eod` + - wekeo: provides access to the :ref:`data-sources-wekeo` and :ref:`data-sources-wekeocds` sources + - fdb: provides access to the :ref:`data-sources-fdb` source + - polytope: provides access to the :ref:`data-sources-polytope` source + - bufr: provides full support for the :ref:`bufr` data type + - odb: provides full support for the :ref:`odb` data type + - geojson: adds geojson support + - projection: adds projection support + +E.g. to add :ref:`data-sources-mars` support you can use: + +.. code-block:: bash + + python3 -m pip install .earthkit-data[mars] + + + +Installing with conda +--------------------------------------- + +Install **earthkit-data** via ``conda`` with: .. code-block:: bash diff --git a/earthkit/data/readers/bufr/bufr.py b/earthkit/data/readers/bufr/bufr.py index 87ae8b88..69de7252 100644 --- a/earthkit/data/readers/bufr/bufr.py +++ b/earthkit/data/readers/bufr/bufr.py @@ -11,10 +11,10 @@ from abc import abstractmethod import eccodes -from pdbufr.high_level_bufr.bufr import bufr_code_is_coord from earthkit.data.core import Base from earthkit.data.core.index import Index, MaskIndex, MultiIndex +from earthkit.data.utils.importer import IMPORTER from earthkit.data.utils.message import ( CodesHandle, CodesMessagePositionIndex, @@ -27,6 +27,16 @@ from .. import Reader from .pandas import PandasMixIn +# from earthkit.data.utils.importer import IMPORTER + +# pdbufr = IMPORTER.import_module("pdbufr") + +# from pdbufr.high_level_bufr.bufr import bufr_code_is_coord # noqa + + +pdbufr = IMPORTER.import_module("pdbufr") +from pdbufr.high_level_bufr.bufr import bufr_code_is_coord # noqa + BUFR_LS_KEYS = { "edition": "edition", "type": "dataCategory", diff --git a/earthkit/data/readers/bufr/pandas.py b/earthkit/data/readers/bufr/pandas.py index f91f6120..06db0055 100644 --- a/earthkit/data/readers/bufr/pandas.py +++ b/earthkit/data/readers/bufr/pandas.py @@ -39,7 +39,9 @@ def to_pandas(self, columns=COLUMNS, filters=None, **kwargs): - :ref:`/examples/bufr_synop.ipynb` """ - import pdbufr + from earthkit.data.utils.importer import IMPORTER + + pdbufr = IMPORTER.import_module("pdbufr") filters = {} if filters is None else filters diff --git a/earthkit/data/readers/geojson.py b/earthkit/data/readers/geojson.py index 60f3f377..b53fb5e4 100644 --- a/earthkit/data/readers/geojson.py +++ b/earthkit/data/readers/geojson.py @@ -91,8 +91,9 @@ def to_xarray(self, **kwargs): @classmethod def to_pandas_from_multi_paths(cls, paths, **kwargs): - import geopandas as gpd + from earthkit.data.utils.importer import IMPORTER + gpd = IMPORTER.import_module("geopandas") geo_df = gpd.pd.concat([gpd.read_file(path, **kwargs) for path in paths]) return geo_df.set_index(np.arange(len(geo_df))) diff --git a/earthkit/data/readers/odb.py b/earthkit/data/readers/odb.py index 3fd07e14..3bac4333 100644 --- a/earthkit/data/readers/odb.py +++ b/earthkit/data/readers/odb.py @@ -16,13 +16,9 @@ class ODBReader(Reader): def to_pandas(self, **kwargs): - try: - import codc as odc - except Exception: - import pyodc as odc - - LOG.debug("Using pure Python odc decoder.") + from earthkit.data.utils.importer import IMPORTER + odc = IMPORTER.import_module(["codc", "pyodc"]) odc_read_odb_kwargs = kwargs.get("odc_read_odb_kwargs", {}) return odc.read_odb(self.path, single=True, **odc_read_odb_kwargs) diff --git a/earthkit/data/sources/ads.py b/earthkit/data/sources/ads.py index e298e44b..ab639aff 100644 --- a/earthkit/data/sources/ads.py +++ b/earthkit/data/sources/ads.py @@ -9,7 +9,6 @@ import os -import cdsapi import yaml from .cds import CdsRetriever @@ -54,6 +53,10 @@ def client(): with open(path) as f: rc = yaml.safe_load(f.read()) + from earthkit.data.utils.importer import IMPORTER + + cdsapi = IMPORTER.import_module("cdsapi") + return cdsapi.Client(**rc) diff --git a/earthkit/data/sources/cds.py b/earthkit/data/sources/cds.py index 6f086e14..6a420bea 100644 --- a/earthkit/data/sources/cds.py +++ b/earthkit/data/sources/cds.py @@ -11,7 +11,6 @@ import sys from functools import cached_property -import cdsapi import yaml from earthkit.data.core.thread import SoftThreadPool @@ -70,6 +69,10 @@ def client(): prompt = CDSAPIKeyPrompt() prompt.check() + from earthkit.data.utils.importer import IMPORTER + + cdsapi = IMPORTER.import_module("cdsapi") + try: return cdsapi.Client() except Exception as e: diff --git a/earthkit/data/sources/ecmwf_open_data.py b/earthkit/data/sources/ecmwf_open_data.py index 3b5ba42c..ed632858 100644 --- a/earthkit/data/sources/ecmwf_open_data.py +++ b/earthkit/data/sources/ecmwf_open_data.py @@ -7,8 +7,6 @@ # nor does it submit to any jurisdiction. # -import ecmwf.opendata - from .file import FileSource @@ -19,6 +17,10 @@ class EODRetriever(FileSource): def __init__(self, source="ecmwf", *args, **kwargs): super().__init__() + from earthkit.data.utils.importer import IMPORTER + + opendata = IMPORTER.import_module("ecmwf.opendata") + if len(args): assert len(args) == 1 assert isinstance(args[0], dict) @@ -27,7 +29,7 @@ def __init__(self, source="ecmwf", *args, **kwargs): self.source_kwargs = self.request(**kwargs) - self.client = ecmwf.opendata.Client(source=source, preserve_request_order=True) + self.client = opendata.Client(source=source, preserve_request_order=True) self.path = self._retrieve(self.source_kwargs) diff --git a/earthkit/data/sources/fdb.py b/earthkit/data/sources/fdb.py index f69b8a43..ccbb8955 100644 --- a/earthkit/data/sources/fdb.py +++ b/earthkit/data/sources/fdb.py @@ -11,8 +11,6 @@ import os import shutil -import pyfdb - from earthkit.data.sources.file import FileSource from earthkit.data.sources.stream import StreamSource @@ -48,6 +46,9 @@ def __init__(self, *args, stream=True, **kwargs): def mutate(self): if self.stream: + from earthkit.data.utils.importer import IMPORTER + + pyfdb = IMPORTER.import_module("pyfdb") stream = pyfdb.retrieve(self.request) return StreamSource(stream, **self._stream_kwargs) else: @@ -60,6 +61,10 @@ def __init__(self, request): self.path = self._retrieve(request) def _retrieve(self, request): + from earthkit.data.utils.importer import IMPORTER + + pyfdb = IMPORTER.import_module("pyfdb") + def retrieve(target, request): with open(target, "wb") as o, pyfdb.retrieve(request) as i: shutil.copyfileobj(i, o) diff --git a/earthkit/data/sources/mars.py b/earthkit/data/sources/mars.py index 1688bc35..5882d8b2 100644 --- a/earthkit/data/sources/mars.py +++ b/earthkit/data/sources/mars.py @@ -11,8 +11,6 @@ import os import subprocess -import ecmwfapi - from earthkit.data.core.settings import SETTINGS from earthkit.data.core.temporary import temp_file @@ -48,6 +46,10 @@ def execute(self, request, target): class MarsRetriever(ECMWFApi): def service(self): + from earthkit.data.utils.importer import IMPORTER + + ecmwfapi = IMPORTER.import_module("ecmwfapi") + if SETTINGS.get("use-standalone-mars-client-when-available"): if os.path.exists(StandaloneMarsClient.EXE): return StandaloneMarsClient() diff --git a/earthkit/data/sources/polytope.py b/earthkit/data/sources/polytope.py index 2a03fc11..90b30dd8 100644 --- a/earthkit/data/sources/polytope.py +++ b/earthkit/data/sources/polytope.py @@ -56,12 +56,9 @@ class Polytope(Source): """ def __init__(self, dataset, request) -> None: - try: - import polytope - except ImportError: - raise ImportError( - "Polytope Web Client must be installed with 'pip install polytope-client'" - ) + from earthkit.data.utils.importer import IMPORTER + + polytope = IMPORTER.import_module("polytope") super().__init__() assert isinstance(dataset, str) diff --git a/earthkit/data/sources/wekeo.py b/earthkit/data/sources/wekeo.py index b4bddcf5..2f64ccbe 100644 --- a/earthkit/data/sources/wekeo.py +++ b/earthkit/data/sources/wekeo.py @@ -9,16 +9,18 @@ import os -import hda import yaml -from hda.api import DataOrderRequest from earthkit.data.core.thread import SoftThreadPool from earthkit.data.utils import tqdm +from earthkit.data.utils.importer import IMPORTER from .file import FileSource from .prompt import APIKeyPrompt +hda = IMPORTER.import_module("hda") +from hda.api import DataOrderRequest # noqa + class HDAAPIKeyPrompt(APIKeyPrompt): register_or_sign_in_url = "https://www.wekeo.eu" diff --git a/earthkit/data/sources/wekeocds.py b/earthkit/data/sources/wekeocds.py index d86b2caa..10c404a2 100644 --- a/earthkit/data/sources/wekeocds.py +++ b/earthkit/data/sources/wekeocds.py @@ -9,17 +9,19 @@ import os -from hda.api import DataOrderRequest - from earthkit.data.core.thread import SoftThreadPool from earthkit.data.decorators import normalize from earthkit.data.utils import tqdm +from earthkit.data.utils.importer import IMPORTER from .file import FileSource from .wekeo import EXTENSIONS from .wekeo import ApiClient as WekeoClient from .wekeo import HDAAPIKeyPrompt +hda = IMPORTER.import_module("hda") +from hda.api import DataOrderRequest # noqa + class ApiClient(WekeoClient): name = "wekeocds" diff --git a/earthkit/data/testing.py b/earthkit/data/testing.py index 013f3ff9..7d781a19 100644 --- a/earthkit/data/testing.py +++ b/earthkit/data/testing.py @@ -78,30 +78,23 @@ def modules_installed(*modules): return True +def MISSING(*modules): + return not modules_installed(*modules) + + NO_MARS = not os.path.exists(os.path.expanduser("~/.ecmwfapirc")) NO_CDS = not os.path.exists(os.path.expanduser("~/.cdsapirc")) +NO_ADS = not os.path.exists(os.path.expanduser("~/.adsapirc")) NO_HDA = not os.path.exists(os.path.expanduser("~/.hdarc")) -IN_GITHUB = os.environ.get("GITHUB_WORKFLOW") is not None -try: - import ecmwf.opendata # noqa - - NO_EOD = False -except Exception: - NO_EOD = True - -try: - import pyfdb # noqa - - fdb_home = os.environ.get("FDB_HOME", None) - NO_FDB = fdb_home is None -except Exception: - NO_FDB = True - +NO_EOD = MISSING("ecmwf.opendata") +NO_FDB = not ( + os.environ.get("FDB_HOME", None) is not None + or os.environ.get("FDB5_CONFIG", None) is not None +) NO_POLYTOPE = not os.path.exists(os.path.expanduser("~/.polytopeapirc")) +NO_CARTOPY = MISSING("cartopy.ccrs") - -def MISSING(*modules): - return not modules_installed(*modules) +IN_GITHUB = os.environ.get("GITHUB_WORKFLOW") is not None UNSAFE_SAMPLES_URL = "https://github.com/jwilk/traversal-archives/releases/download/0" diff --git a/earthkit/data/translators/pandas.py b/earthkit/data/translators/pandas.py index 9998f8d3..eec66530 100644 --- a/earthkit/data/translators/pandas.py +++ b/earthkit/data/translators/pandas.py @@ -47,8 +47,9 @@ def __call__(self): """ GeoDataFrame requested, if normal pandas convert to geopandas. """ - import geopandas as gpd + from earthkit.data.utils.importer import IMPORTER + gpd = IMPORTER.import_module("geopandas") if isinstance(self.data, pd.DataFrame): return gpd.GeoDataFrame(self.data) diff --git a/earthkit/data/utils/importer.py b/earthkit/data/utils/importer.py new file mode 100644 index 00000000..6c344938 --- /dev/null +++ b/earthkit/data/utils/importer.py @@ -0,0 +1,80 @@ +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +""" +Import and check availability of optional dependencies +""" + +from importlib import import_module + + +class ImporterItem: + def __init__(self, module, message): + self._status = None + self._module = module + self._message = message + + if not isinstance(self._module, tuple): + self._module = (self._module,) + + def import_module(self): + if self._status is not None and not self._status: + raise ModuleNotFoundError(self._message) + + try: + r = self._import() + self._status = True + except Exception as e: + self._status = False + raise type(e)(self._message + " ") from e + + return r + + def _import(self): + if len(self._module) == 1: + return import_module(self._module[0]) + else: + for i, m in enumerate(self._module): + if i < len(self._module) - 1: + try: + return import_module(m) + except Exception: + pass + else: + return import_module(m) + + +class Importer: + def __init__(self): + self._conf = {} + for k, v in _conf.items(): + self._conf[k] = ImporterItem(k, v) + + def import_module(self, module): + k = module + if isinstance(k, list): + k = tuple(k) + + return self._conf[k].import_module() + + +_conf = { + "cartopy.crs": "this feature requires 'cartopy' to be installed", + "cdsapi": "the 'cds' and 'ads' sources require 'cdsapi' to be installed", + ("codc", "pyodc"): "ODB data handling requires 'codc' or 'pyodc' to be installed!", + "ecmwfapi": "the 'mars' source requires 'ecmwf-api-client' to be installed", + "ecmwf.opendata": "the 'ecmwf-opendata' source requires 'ecmwf-opendata' to be installed", + "geopandas": "this feature requires 'geopandas' to be installed!", + "hda": "the 'wekeo' and 'wekeo-cds` sources require 'hda' to be installed", + "polytope": "the 'polytope' source requires 'polytope-client' to be installed", + "pdbufr": "BUFR data handling requires 'pdbufr' to be installed", + "pyfdb": "the 'fdb' source requires 'pyfdb' to be installed", +} + +IMPORTER = Importer() diff --git a/earthkit/data/utils/projections/__init__.py b/earthkit/data/utils/projections/__init__.py index b1956efb..c9b0ec39 100644 --- a/earthkit/data/utils/projections/__init__.py +++ b/earthkit/data/utils/projections/__init__.py @@ -8,17 +8,6 @@ from . import cf, proj -try: - import cartopy.crs as ccrs - - NO_CARTOPY = False -except ImportError: - NO_CARTOPY = True -CARTOPY_WARNING = ( - "no cartopy installation found; cartopy must be installed to use this feature" -) - - DEFAULT_LATLON_PROJ_STRING = ( "+proj=eqc +ellps=WGS84 +a=6378137.0 +lon_0=0.0 +to_meter=111319.4907932736 " "+no_defs +type=crs" @@ -70,7 +59,9 @@ def __init__(self, proj_string=None, **kwargs): self._proj_string = proj_string def __repr__(self): - if not NO_CARTOPY: + from earthkit.data.utils.importer import IMPORTER + + if IMPORTER.status("cartopy.crs"): return self.to_cartopy_crs().__repr__() else: return self.__str__() @@ -84,13 +75,15 @@ def to_proj_string(self): return self._proj_string def to_cartopy_globe(self): - if NO_CARTOPY: - raise ImportError(CARTOPY_WARNING) + from earthkit.data.utils.importer import IMPORTER + + ccrs = IMPORTER.import_module("cartopy.crs") return ccrs.Globe(**self.globe) def to_cartopy_crs(self): - if NO_CARTOPY: - raise ImportError(CARTOPY_WARNING) + from earthkit.data.utils.importer import IMPORTER + + ccrs = IMPORTER.import_module("cartopy.crs") return getattr(ccrs, self.CARTOPY_CRS)( globe=self.to_cartopy_globe(), **self.parameters, diff --git a/setup.cfg b/setup.cfg index 9f69f4e0..f394c79c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,23 +21,16 @@ test_suite = tests [options] packages = find_namespace: install_requires = - cdsapi cfgrib>=0.9.10.1 eccodes>=1.5.0 - ecmwf-api-client>=1.6.1 - ecmwf-opendata>=0.1.2 - polytope-client>=0.7.1 dask entrypoints filelock - hda jinja2 markdown multiurl netcdf4 - pdbufr>=0.11.0 - pyfdb - pyodc + pandas pyyaml scipy tqdm @@ -48,11 +41,36 @@ install_requires = include = earthkit.* [options.extras_require] +mars = ecmwf-api-client>=1.6.1 +cds = cdsapi +fdb = pyfdb +polytope = polytope-client>=0.7.1 +wekeo = hda +ecmwf-opendata = ecmwf-opendata>=0.1.2 +odb = pyodc +bufr = + eccodes>=1.5.0 + pdbufr>=0.11.0 +projection = cartopy +geopandas = geopandas +all = + earthkit-data[mars] + earthkit-data[cds] + earthkit-data[ecmwf-opendata] + earthkit-data[fdb] + earthkit-data[polytope] + earthkit-data[wekeo] + earthkit-data[odb] + earthkit-data[bufr] + earthkit-data[projection] + earthkit-data[geopandas] test = + earthkit-data[all] pytest pytest-cov pytest-forked pytest-timeout + nbformat [flake8] max-line-length = 110 diff --git a/tests/translators/test_translators.py b/tests/translators/test_translators.py index f4a92501..02479c8b 100644 --- a/tests/translators/test_translators.py +++ b/tests/translators/test_translators.py @@ -12,7 +12,6 @@ import logging -import geopandas as gpd import numpy as np import pandas as pd import xarray as xr @@ -117,6 +116,10 @@ def test_pd_dataframe_translator(): def test_gpd_dataframe_translator(): + from earthkit.data.utils.importer import IMPORTER + + gpd = IMPORTER.import_module("geopandas") + # Check that an xr.Dataset translator can be created _pdwrapper = wrappers.get_wrapper(gpd.GeoDataFrame()) _trans = pdtranslator.translator(_pdwrapper, gpd.GeoDataFrame) diff --git a/tests/utils/test_projections.py b/tests/utils/test_projections.py index cdbc4b6c..884d9036 100644 --- a/tests/utils/test_projections.py +++ b/tests/utils/test_projections.py @@ -11,14 +11,9 @@ import pytest +from earthkit.data.testing import NO_CARTOPY from earthkit.data.utils import projections -NO_CARTOPY = False -try: - import cartopy.crs as ccrs -except ImportError: - NO_CARTOPY = True - def test_from_proj_string_laea(): proj_string = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs" @@ -52,6 +47,8 @@ def test_from_cf_grid_mapping_aea(): @pytest.mark.skipif(NO_CARTOPY, reason="cartopy is not installed") def test_to_cartopy_crs_laea(): + import cartopy.ccrs as ccrs + proj_string = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs" projection = projections.Projection.from_proj_string(proj_string) assert projection.to_cartopy_crs() == ccrs.LambertAzimuthalEqualArea( From 950572cda6d2a41083436c6b0fa812e3015cf9f1 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 4 Dec 2023 15:58:11 +0000 Subject: [PATCH 2/9] Add install config for optional dependencies --- docs/install.rst | 2 +- earthkit/data/utils/importer.py | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/docs/install.rst b/docs/install.rst index 262c41e4..85e1434e 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -44,7 +44,7 @@ Alternatively, you can install the following components individually: - polytope: provides access to the :ref:`data-sources-polytope` source - bufr: provides full support for the :ref:`bufr` data type - odb: provides full support for the :ref:`odb` data type - - geojson: adds geojson support + - geopandas: adds geojson/geopandas support - projection: adds projection support E.g. to add :ref:`data-sources-mars` support you can use: diff --git a/earthkit/data/utils/importer.py b/earthkit/data/utils/importer.py index 6c344938..f4a03629 100644 --- a/earthkit/data/utils/importer.py +++ b/earthkit/data/utils/importer.py @@ -23,6 +23,12 @@ def __init__(self, module, message): if not isinstance(self._module, tuple): self._module = (self._module,) + @property + def status(self): + if self._status is None: + self.import_module() + return self._status + def import_module(self): if self._status is not None and not self._status: raise ModuleNotFoundError(self._message) @@ -56,12 +62,18 @@ def __init__(self): for k, v in _conf.items(): self._conf[k] = ImporterItem(k, v) - def import_module(self, module): + def _item(self, module): k = module if isinstance(k, list): k = tuple(k) - return self._conf[k].import_module() + return self._conf[k] + + def import_module(self, module): + return self._item(module).import_module() + + def status(self, module): + return self._item(module).status _conf = { From 918ad71697d086c4eb8eaa788d4e31960464000b Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 4 Dec 2023 16:17:08 +0000 Subject: [PATCH 3/9] Add install config for optional dependencies --- earthkit/data/utils/importer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/earthkit/data/utils/importer.py b/earthkit/data/utils/importer.py index f4a03629..573342a5 100644 --- a/earthkit/data/utils/importer.py +++ b/earthkit/data/utils/importer.py @@ -26,10 +26,10 @@ def __init__(self, module, message): @property def status(self): if self._status is None: - self.import_module() + self.import_module(reraise=False) return self._status - def import_module(self): + def import_module(self, reraise=True): if self._status is not None and not self._status: raise ModuleNotFoundError(self._message) @@ -38,7 +38,8 @@ def import_module(self): self._status = True except Exception as e: self._status = False - raise type(e)(self._message + " ") from e + if reraise: + raise type(e)(self._message + " ") from e return r From 13e1986c5e6f1eac18535355e141b7433538f297 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 4 Dec 2023 16:27:38 +0000 Subject: [PATCH 4/9] Add install config for optional dependencies --- docs/install.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/install.rst b/docs/install.rst index 85e1434e..8d7deb95 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -22,13 +22,13 @@ You can install **earthkit-data** with all the optional packages in one go by us .. code-block:: bash - python3 -m pip install .earthkit-data[all] + python3 -m pip install earthkit-data[all] Please note in **zsh** you need to use quotes around the square brackets: .. code-block:: bash - python3 -m pip install ".earthkit-data[all]" + python3 -m pip install "earthkit-data[all]" Installing individual optional packages @@ -51,7 +51,7 @@ E.g. to add :ref:`data-sources-mars` support you can use: .. code-block:: bash - python3 -m pip install .earthkit-data[mars] + python3 -m pip install earthkit-data[mars] From dc5e4071e1f916e27966368d6556cd74d5c52b27 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 14 Mar 2024 13:24:15 +0000 Subject: [PATCH 5/9] Make dependencies optional --- Untitled.ipynb | 33 -------- earthkit/data/readers/bufr/bufr.py | 12 +-- earthkit/data/readers/bufr/pandas.py | 4 +- earthkit/data/readers/geojson.py | 3 +- earthkit/data/readers/odb.py | 8 +- earthkit/data/sources/cds.py | 5 +- earthkit/data/sources/fdb.py | 9 +- earthkit/data/sources/mars.py | 6 +- earthkit/data/sources/polytope.py | 9 +- earthkit/data/sources/wekeo.py | 6 +- earthkit/data/sources/wekeocds.py | 6 +- earthkit/data/translators/pandas.py | 3 +- earthkit/data/utils/importer.py | 93 --------------------- earthkit/data/utils/projections/__init__.py | 25 ++++-- tests/translators/test_translators.py | 5 +- tests/utils/test_projections.py | 9 +- 16 files changed, 48 insertions(+), 188 deletions(-) delete mode 100644 Untitled.ipynb delete mode 100644 earthkit/data/utils/importer.py diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index 8c538c3f..00000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,33 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "a922d141-d705-453e-9816-0899283d9cbb", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev_ecc", - "language": "python", - "name": "dev_ecc" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/earthkit/data/readers/bufr/bufr.py b/earthkit/data/readers/bufr/bufr.py index e7bf7171..57ff0863 100644 --- a/earthkit/data/readers/bufr/bufr.py +++ b/earthkit/data/readers/bufr/bufr.py @@ -11,10 +11,10 @@ from abc import abstractmethod import eccodes +from pdbufr.high_level_bufr.bufr import bufr_code_is_coord from earthkit.data.core import Base from earthkit.data.core.index import Index, MaskIndex, MultiIndex -from earthkit.data.utils.importer import IMPORTER from earthkit.data.utils.message import ( CodesHandle, CodesMessagePositionIndex, @@ -27,16 +27,6 @@ from .. import Reader from .pandas import PandasMixIn -# from earthkit.data.utils.importer import IMPORTER - -# pdbufr = IMPORTER.import_module("pdbufr") - -# from pdbufr.high_level_bufr.bufr import bufr_code_is_coord # noqa - - -pdbufr = IMPORTER.import_module("pdbufr") -from pdbufr.high_level_bufr.bufr import bufr_code_is_coord # noqa - BUFR_LS_KEYS = { "edition": "edition", "type": "dataCategory", diff --git a/earthkit/data/readers/bufr/pandas.py b/earthkit/data/readers/bufr/pandas.py index 06db0055..f91f6120 100644 --- a/earthkit/data/readers/bufr/pandas.py +++ b/earthkit/data/readers/bufr/pandas.py @@ -39,9 +39,7 @@ def to_pandas(self, columns=COLUMNS, filters=None, **kwargs): - :ref:`/examples/bufr_synop.ipynb` """ - from earthkit.data.utils.importer import IMPORTER - - pdbufr = IMPORTER.import_module("pdbufr") + import pdbufr filters = {} if filters is None else filters diff --git a/earthkit/data/readers/geojson.py b/earthkit/data/readers/geojson.py index 5e7b6ba6..fabe2e59 100644 --- a/earthkit/data/readers/geojson.py +++ b/earthkit/data/readers/geojson.py @@ -91,9 +91,8 @@ def to_xarray(self, **kwargs): @classmethod def to_pandas_from_multi_paths(cls, paths, **kwargs): - from earthkit.data.utils.importer import IMPORTER + import geopandas as gpd - gpd = IMPORTER.import_module("geopandas") geo_df = gpd.pd.concat([gpd.read_file(path, **kwargs) for path in paths]) return geo_df.set_index(np.arange(len(geo_df))) diff --git a/earthkit/data/readers/odb.py b/earthkit/data/readers/odb.py index 25ddda75..60fdb33b 100644 --- a/earthkit/data/readers/odb.py +++ b/earthkit/data/readers/odb.py @@ -16,9 +16,13 @@ class ODBReader(Reader): def to_pandas(self, **kwargs): - from earthkit.data.utils.importer import IMPORTER + try: + import codc as odc + except Exception: + import pyodc as odc + + LOG.debug("Using pure Python odc decoder.") - odc = IMPORTER.import_module(["codc", "pyodc"]) odc_read_odb_kwargs = kwargs.get("odc_read_odb_kwargs", {}) return odc.read_odb(self.path, single=True, **odc_read_odb_kwargs) diff --git a/earthkit/data/sources/cds.py b/earthkit/data/sources/cds.py index eab73881..58842db1 100644 --- a/earthkit/data/sources/cds.py +++ b/earthkit/data/sources/cds.py @@ -12,6 +12,7 @@ import sys from functools import cached_property +import cdsapi import yaml from earthkit.data.core.thread import SoftThreadPool @@ -73,10 +74,6 @@ def client(use_prompt): prompt = CDSAPIKeyPrompt() prompt.check() - from earthkit.data.utils.importer import IMPORTER - - cdsapi = IMPORTER.import_module("cdsapi") - try: return cdsapi.Client() except Exception as e: diff --git a/earthkit/data/sources/fdb.py b/earthkit/data/sources/fdb.py index ccbb8955..f69b8a43 100644 --- a/earthkit/data/sources/fdb.py +++ b/earthkit/data/sources/fdb.py @@ -11,6 +11,8 @@ import os import shutil +import pyfdb + from earthkit.data.sources.file import FileSource from earthkit.data.sources.stream import StreamSource @@ -46,9 +48,6 @@ def __init__(self, *args, stream=True, **kwargs): def mutate(self): if self.stream: - from earthkit.data.utils.importer import IMPORTER - - pyfdb = IMPORTER.import_module("pyfdb") stream = pyfdb.retrieve(self.request) return StreamSource(stream, **self._stream_kwargs) else: @@ -61,10 +60,6 @@ def __init__(self, request): self.path = self._retrieve(request) def _retrieve(self, request): - from earthkit.data.utils.importer import IMPORTER - - pyfdb = IMPORTER.import_module("pyfdb") - def retrieve(target, request): with open(target, "wb") as o, pyfdb.retrieve(request) as i: shutil.copyfileobj(i, o) diff --git a/earthkit/data/sources/mars.py b/earthkit/data/sources/mars.py index 023d6e35..3124e3c9 100644 --- a/earthkit/data/sources/mars.py +++ b/earthkit/data/sources/mars.py @@ -11,6 +11,8 @@ import os import subprocess +import ecmwfapi + from earthkit.data.core.settings import SETTINGS from earthkit.data.core.temporary import temp_file @@ -46,10 +48,6 @@ def execute(self, request, target): class MarsRetriever(ECMWFApi): def service(self): - from earthkit.data.utils.importer import IMPORTER - - ecmwfapi = IMPORTER.import_module("ecmwfapi") - if SETTINGS.get("use-standalone-mars-client-when-available"): if os.path.exists(StandaloneMarsClient.EXE): return StandaloneMarsClient() diff --git a/earthkit/data/sources/polytope.py b/earthkit/data/sources/polytope.py index 5400fd79..f2fe281a 100644 --- a/earthkit/data/sources/polytope.py +++ b/earthkit/data/sources/polytope.py @@ -55,9 +55,12 @@ class Polytope(Source): """ def __init__(self, dataset, request, address=None, stream=True, **kwargs) -> None: - from earthkit.data.utils.importer import IMPORTER - - polytope = IMPORTER.import_module("polytope") + try: + import polytope + except ImportError: + raise ImportError( + "Polytope Web Client must be installed with 'pip install polytope-client'" + ) super().__init__() assert isinstance(dataset, str) diff --git a/earthkit/data/sources/wekeo.py b/earthkit/data/sources/wekeo.py index c07999bc..3be0745c 100644 --- a/earthkit/data/sources/wekeo.py +++ b/earthkit/data/sources/wekeo.py @@ -10,18 +10,16 @@ import logging import os +import hda import yaml +from hda.api import DataOrderRequest from earthkit.data.core.thread import SoftThreadPool from earthkit.data.utils import tqdm -from earthkit.data.utils.importer import IMPORTER from .file import FileSource from .prompt import APIKeyPrompt -hda = IMPORTER.import_module("hda") -from hda.api import DataOrderRequest # noqa - LOG = logging.getLogger(__name__) diff --git a/earthkit/data/sources/wekeocds.py b/earthkit/data/sources/wekeocds.py index 8ac88956..3c50e1c4 100644 --- a/earthkit/data/sources/wekeocds.py +++ b/earthkit/data/sources/wekeocds.py @@ -10,19 +10,17 @@ import logging import os +from hda.api import DataOrderRequest + from earthkit.data.core.thread import SoftThreadPool from earthkit.data.decorators import normalize from earthkit.data.utils import tqdm -from earthkit.data.utils.importer import IMPORTER from .file import FileSource from .wekeo import EXTENSIONS from .wekeo import ApiClient as WekeoClient from .wekeo import HDAAPIKeyPrompt -hda = IMPORTER.import_module("hda") -from hda.api import DataOrderRequest # noqa - LOG = logging.getLogger(__name__) diff --git a/earthkit/data/translators/pandas.py b/earthkit/data/translators/pandas.py index eec66530..9998f8d3 100644 --- a/earthkit/data/translators/pandas.py +++ b/earthkit/data/translators/pandas.py @@ -47,9 +47,8 @@ def __call__(self): """ GeoDataFrame requested, if normal pandas convert to geopandas. """ - from earthkit.data.utils.importer import IMPORTER + import geopandas as gpd - gpd = IMPORTER.import_module("geopandas") if isinstance(self.data, pd.DataFrame): return gpd.GeoDataFrame(self.data) diff --git a/earthkit/data/utils/importer.py b/earthkit/data/utils/importer.py deleted file mode 100644 index 573342a5..00000000 --- a/earthkit/data/utils/importer.py +++ /dev/null @@ -1,93 +0,0 @@ -# (C) Copyright 2020 ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. -# - -""" -Import and check availability of optional dependencies -""" - -from importlib import import_module - - -class ImporterItem: - def __init__(self, module, message): - self._status = None - self._module = module - self._message = message - - if not isinstance(self._module, tuple): - self._module = (self._module,) - - @property - def status(self): - if self._status is None: - self.import_module(reraise=False) - return self._status - - def import_module(self, reraise=True): - if self._status is not None and not self._status: - raise ModuleNotFoundError(self._message) - - try: - r = self._import() - self._status = True - except Exception as e: - self._status = False - if reraise: - raise type(e)(self._message + " ") from e - - return r - - def _import(self): - if len(self._module) == 1: - return import_module(self._module[0]) - else: - for i, m in enumerate(self._module): - if i < len(self._module) - 1: - try: - return import_module(m) - except Exception: - pass - else: - return import_module(m) - - -class Importer: - def __init__(self): - self._conf = {} - for k, v in _conf.items(): - self._conf[k] = ImporterItem(k, v) - - def _item(self, module): - k = module - if isinstance(k, list): - k = tuple(k) - - return self._conf[k] - - def import_module(self, module): - return self._item(module).import_module() - - def status(self, module): - return self._item(module).status - - -_conf = { - "cartopy.crs": "this feature requires 'cartopy' to be installed", - "cdsapi": "the 'cds' and 'ads' sources require 'cdsapi' to be installed", - ("codc", "pyodc"): "ODB data handling requires 'codc' or 'pyodc' to be installed!", - "ecmwfapi": "the 'mars' source requires 'ecmwf-api-client' to be installed", - "ecmwf.opendata": "the 'ecmwf-opendata' source requires 'ecmwf-opendata' to be installed", - "geopandas": "this feature requires 'geopandas' to be installed!", - "hda": "the 'wekeo' and 'wekeo-cds` sources require 'hda' to be installed", - "polytope": "the 'polytope' source requires 'polytope-client' to be installed", - "pdbufr": "BUFR data handling requires 'pdbufr' to be installed", - "pyfdb": "the 'fdb' source requires 'pyfdb' to be installed", -} - -IMPORTER = Importer() diff --git a/earthkit/data/utils/projections/__init__.py b/earthkit/data/utils/projections/__init__.py index 51386f53..1675cffc 100644 --- a/earthkit/data/utils/projections/__init__.py +++ b/earthkit/data/utils/projections/__init__.py @@ -8,6 +8,17 @@ from . import cf, proj +try: + import cartopy.crs as ccrs + + NO_CARTOPY = False +except ImportError: + NO_CARTOPY = True +CARTOPY_WARNING = ( + "no cartopy installation found; cartopy must be installed to use this feature" +) + + DEFAULT_LATLON_PROJ_STRING = ( "+proj=eqc +ellps=WGS84 +a=6378137.0 +lon_0=0.0 +to_meter=111319.4907932736 " "+no_defs +type=crs" @@ -59,9 +70,7 @@ def __init__(self, proj_string=None, **kwargs): self._proj_string = proj_string def __repr__(self): - from earthkit.data.utils.importer import IMPORTER - - if IMPORTER.status("cartopy.crs"): + if not NO_CARTOPY: return self.to_cartopy_crs().__repr__() else: return self.__str__() @@ -75,15 +84,13 @@ def to_proj_string(self): return self._proj_string def to_cartopy_globe(self): - from earthkit.data.utils.importer import IMPORTER - - ccrs = IMPORTER.import_module("cartopy.crs") + if NO_CARTOPY: + raise ImportError(CARTOPY_WARNING) return ccrs.Globe(**self.globe) def to_cartopy_crs(self): - from earthkit.data.utils.importer import IMPORTER - - ccrs = IMPORTER.import_module("cartopy.crs") + if NO_CARTOPY: + raise ImportError(CARTOPY_WARNING) return getattr(ccrs, self.CARTOPY_CRS)( globe=self.to_cartopy_globe(), **self.parameters, diff --git a/tests/translators/test_translators.py b/tests/translators/test_translators.py index 720f558d..26d31481 100644 --- a/tests/translators/test_translators.py +++ b/tests/translators/test_translators.py @@ -12,6 +12,7 @@ import logging +import geopandas as gpd import numpy as np import pandas as pd import xarray as xr @@ -117,10 +118,6 @@ def test_pd_dataframe_translator(): def test_gpd_dataframe_translator(): - from earthkit.data.utils.importer import IMPORTER - - gpd = IMPORTER.import_module("geopandas") - # Check that an xr.Dataset translator can be created _pdwrapper = wrappers.get_wrapper(gpd.GeoDataFrame()) _trans = pdtranslator.translator(_pdwrapper, gpd.GeoDataFrame) diff --git a/tests/utils/test_projections.py b/tests/utils/test_projections.py index 8ee34c6e..80a35652 100644 --- a/tests/utils/test_projections.py +++ b/tests/utils/test_projections.py @@ -11,9 +11,14 @@ import pytest -from earthkit.data.testing import NO_CARTOPY from earthkit.data.utils import projections +NO_CARTOPY = False +try: + import cartopy.crs as ccrs +except ImportError: + NO_CARTOPY = True + def test_from_proj_string_laea(): proj_string = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs" @@ -57,8 +62,6 @@ def test_from_cf_grid_mapping_aea(): @pytest.mark.skipif(NO_CARTOPY, reason="cartopy is not installed") def test_to_cartopy_crs_laea(): - import cartopy.ccrs as ccrs - proj_string = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs" projection = projections.Projection.from_proj_string(proj_string) assert projection.to_cartopy_crs() == ccrs.LambertAzimuthalEqualArea( From eb1c2319042d29f69b953b3c28316f4ca8a11637 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 14 Mar 2024 13:36:41 +0000 Subject: [PATCH 6/9] Make dependencies optional --- docs/install.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/install.rst b/docs/install.rst index 8d7deb95..265cbb1f 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -34,7 +34,7 @@ Please note in **zsh** you need to use quotes around the square brackets: Installing individual optional packages +++++++++++++++++++++++++++++++++++++++++ -Alternatively, you can install the following components individually: +Alternatively, you can install the following components: - mars: provides access to the :ref:`data-sources-mars` source - cds: provides access to the :ref:`data-sources-cds` and :ref:`data-sources-ads` sources @@ -53,6 +53,11 @@ E.g. to add :ref:`data-sources-mars` support you can use: python3 -m pip install earthkit-data[mars] +List of optional dependencies can also be specified : + +.. code-block:: bash + + python3 -m pip install earthkit-data[cds,mars] Installing with conda From a0e1f28638d3ee67444ff46e7e61f3756c987ad3 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 14 Mar 2024 16:33:22 +0000 Subject: [PATCH 7/9] Make dependencies optional --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 32666e21..31fe15f9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,7 +52,6 @@ wekeo = hda ecmwf-opendata = ecmwf-opendata>=0.3.3 odb = pyodc bufr = - eccodes>=1.5.0 pdbufr>=0.11.0 projection = cartopy geopandas = geopandas From a1666e4f7d6adf7b15fa36325e1f5f9f435a98e3 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Fri, 15 Mar 2024 13:59:10 +0000 Subject: [PATCH 8/9] Make eccovjson an optional dependency --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 31fe15f9..e9d6268b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,8 +36,6 @@ install_requires = tqdm xarray>=0.19.0 earthkit-meteo>=0.0.1 - aws-requests-auth - eccovjson>=0.0.5 include_package_data = True [options.packages.find] @@ -55,6 +53,7 @@ bufr = pdbufr>=0.11.0 projection = cartopy geopandas = geopandas +eccovjson = eccovjson>=0.0.5 all = earthkit-data[mars] earthkit-data[cds] @@ -66,6 +65,7 @@ all = earthkit-data[bufr] earthkit-data[projection] earthkit-data[geopandas] + earthkit-data[eccovjson] test = earthkit-data[all] pytest From 3e3dbe6197f911f1f96e925fe8e0369525a9eab3 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Fri, 15 Mar 2024 14:19:49 +0000 Subject: [PATCH 9/9] Update docs --- docs/install.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/install.rst b/docs/install.rst index 265cbb1f..248a704c 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -46,6 +46,7 @@ Alternatively, you can install the following components: - odb: provides full support for the :ref:`odb` data type - geopandas: adds geojson/geopandas support - projection: adds projection support + - eccovjson: provides access to coverage json data served by the :ref:`data-sources-polytope` source E.g. to add :ref:`data-sources-mars` support you can use: