From 4748c49980ada6e1a5b82845867ebe2ff9153a0f Mon Sep 17 00:00:00 2001 From: nicolasK Date: Thu, 2 Nov 2023 14:46:03 +0100 Subject: [PATCH] update 0.0.1-rc5 setup.py update tests and data --- CHANGELOG.md | 4 +- earthdaily/__init__.py | 2 +- earthdaily/datasets/__init__.py | 29 ++++++++ earthdaily/datasets/data/__init__.py | 0 .../datasets/data}/pivot.geojson | 0 .../earthdatastore/cube_utils/__init__.py | 5 +- .../cube_utils/asset_mapper/__init__.py | 35 ++++----- examples/compare_scale_s2.py | 4 +- examples/county_steel.geojson | 8 --- examples/field_evolution.py | 4 +- examples/first_steps_create_datacube.py | 4 +- examples/multisensors_cube.py | 4 +- setup.py | 8 +-- tests/test_assetmapper.py | 8 +-- tests/test_simple_datacube.py | 71 +++++++++++++------ tests/test_zonalstats.py | 29 +++++--- 16 files changed, 141 insertions(+), 74 deletions(-) create mode 100644 earthdaily/datasets/__init__.py create mode 100644 earthdaily/datasets/data/__init__.py rename {examples => earthdaily/datasets/data}/pivot.geojson (100%) delete mode 100644 examples/county_steel.geojson diff --git a/CHANGELOG.md b/CHANGELOG.md index 14029b1d..24d4509c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.0.1-rc5] 2023-11-02 +## [0.0.1-rc5] ### Added - `common_band_names` default set to True in datacube creation. It uses the new Assets Mapper in order to define to best suitable bands according to user needs. - `clear_cover` in the datacube method when using the `earthdatastore.Auth` method. +- `datasets.load_pivot()` to load a GeoDataFrame of a pivot in Nebraska (alternates between corn or soy between years). +- Several tests to check and validate code. ### Changed diff --git a/earthdaily/__init__.py b/earthdaily/__init__.py index c1da8f75..cfe59f3e 100644 --- a/earthdaily/__init__.py +++ b/earthdaily/__init__.py @@ -1,3 +1,3 @@ -from earthdaily import earthdatastore +from . import earthdatastore, datasets __version__ = "0.0.1-rc5" diff --git a/earthdaily/datasets/__init__.py b/earthdaily/datasets/__init__.py new file mode 100644 index 00000000..73ab4a07 --- /dev/null +++ b/earthdaily/datasets/__init__.py @@ -0,0 +1,29 @@ +import os +import geopandas as gpd + +__pathFile = os.path.dirname(os.path.realpath(__file__)) + + +def load_pivot(to_wkt: bool = False, to_geojson: bool = False): + """ + A pivot located in Nebraska. + + Parameters + ---------- + to_wkt : BOOL, optional + Returns the pivot as a wkt. The default is False. + to_geojson : BOOL, optional + Returns the pivot as a geojson. The default is False. + + Returns + ------- + pivot : str, GeoDataFrame + DESCRIPTION. + + """ + pivot = gpd.read_file(os.path.join(__pathFile, f"data{os.path.sep}pivot.geojson")) + if to_wkt: + pivot = pivot.to_wkt()["geometry"].iloc[0] + if to_geojson: + pivot = pivot.to_json() + return pivot diff --git a/earthdaily/datasets/data/__init__.py b/earthdaily/datasets/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/pivot.geojson b/earthdaily/datasets/data/pivot.geojson similarity index 100% rename from examples/pivot.geojson rename to earthdaily/datasets/data/pivot.geojson diff --git a/earthdaily/earthdatastore/cube_utils/__init__.py b/earthdaily/earthdatastore/cube_utils/__init__.py index d517bcf7..a124e1c4 100644 --- a/earthdaily/earthdatastore/cube_utils/__init__.py +++ b/earthdaily/earthdatastore/cube_utils/__init__.py @@ -43,7 +43,7 @@ def _cube_odc(items_collection, assets=None, times=None, **kwargs): if "resampling" in kwargs: if isinstance(kwargs["resampling"], int): kwargs["resampling"] = Resampling(kwargs["resampling"]).name - chunks = kwargs.get("chunks", dict(x="256", y="256", time="auto")) + chunks = kwargs.get("chunks", dict(x="auto", y="auto", time="auto")) kwargs.pop("chunks", None) ds = stac.load( @@ -55,6 +55,7 @@ def _cube_odc(items_collection, assets=None, times=None, **kwargs): groupby=None, **kwargs, ) + ds = ds.chunk(dict(x=256, y=256)) return ds @@ -110,7 +111,7 @@ def datacube( ) if common_band_names and not isinstance(assets, dict): aM = AssetMapper() - assets = aM.map_collection_bands(items_collection[0].collection_id, assets) + assets = aM.map_collection_assets(items_collection[0].collection_id, assets) if isinstance(assets, dict): assets_keys = list(assets.keys()) diff --git a/earthdaily/earthdatastore/cube_utils/asset_mapper/__init__.py b/earthdaily/earthdatastore/cube_utils/asset_mapper/__init__.py index 5bab2e79..b84ae6da 100644 --- a/earthdaily/earthdatastore/cube_utils/asset_mapper/__init__.py +++ b/earthdaily/earthdatastore/cube_utils/asset_mapper/__init__.py @@ -19,31 +19,34 @@ def _collection_exists(self, collection, raise_warning=False): ) return exists - def map_collection_bands(self, collection, bands): - if isinstance(bands, (dict | None)): - return bands + def collection_spectral_assets(self, collection): + return self.collection_mapping(collection) + + def map_collection_assets(self, collection, assets): + if isinstance(assets, (dict | None)): + return assets if not self._collection_exists(collection): - return bands + return assets # HANDLE LIST TO DICT CONVERSION - if isinstance(bands, list): - bands = {band: band for band in bands} + if isinstance(assets, list): + assets = {asset: asset for asset in assets} - output_bands = {} + output_assets = {} config = self.collection_mapping(collection) - # Try to map each band - for band in bands: - if band in config[0]: - output_bands[config[0][band]] = band - # No band found with specified key (common band name) + # Try to map each asset + for asset in assets: + if asset in config[0]: + output_assets[config[0][asset]] = asset + # No asset found with specified key (common asset name) else: - # Looking for band matching the specified value (asset name) + # Looking for asset matching the specified value (asset name) matching_assets = [ - key for key, value in config[0].items() if value == band + key for key, value in config[0].items() if value == asset ] if matching_assets: - output_bands[band] = band - return output_bands + output_assets[asset] = asset + return output_assets diff --git a/examples/compare_scale_s2.py b/examples/compare_scale_s2.py index 2b2250bc..08308a68 100644 --- a/examples/compare_scale_s2.py +++ b/examples/compare_scale_s2.py @@ -8,7 +8,7 @@ # Import librairies # ------------------------------------------- -from earthdaily import earthdatastore +from earthdaily import earthdatastore, datasets import geopandas as gpd from matplotlib import pyplot as plt @@ -17,7 +17,7 @@ # ------------------------------------------- # load geojson -pivot = gpd.read_file("pivot.geojson") +pivot = datasets.load_pivot() ############################################################################## # Init earthdatastore with env params diff --git a/examples/county_steel.geojson b/examples/county_steel.geojson deleted file mode 100644 index f2cd7aad..00000000 --- a/examples/county_steel.geojson +++ /dev/null @@ -1,8 +0,0 @@ -{ -"type": "FeatureCollection", -"name": "county_steel", -"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, -"features": [ -{ "type": "Feature", "properties": { "fid": 3039, "amu_id": 2121386, "amu_name": "Steele" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -93.406642, 43.84812 ], [ -93.406565, 44.196377 ], [ -93.04591, 44.196716 ], [ -93.045966, 43.848467 ], [ -93.049524, 43.848443 ], [ -93.406642, 43.84812 ] ] ] } } -] -} diff --git a/examples/field_evolution.py b/examples/field_evolution.py index df592fe0..50f5eadc 100644 --- a/examples/field_evolution.py +++ b/examples/field_evolution.py @@ -8,7 +8,7 @@ # Import librairies # ------------------------------------------- -from earthdaily import earthdatastore +from earthdaily import earthdatastore, datasets import geopandas as gpd from matplotlib import pyplot as plt @@ -17,7 +17,7 @@ # ------------------------------------------- # load geojson -pivot = gpd.read_file("pivot.geojson") +pivot = datasets.load_pivot() ############################################################################## # Init earthdatastore with env params diff --git a/examples/first_steps_create_datacube.py b/examples/first_steps_create_datacube.py index fb2d9353..849550fd 100644 --- a/examples/first_steps_create_datacube.py +++ b/examples/first_steps_create_datacube.py @@ -13,14 +13,14 @@ # Import librairies # ------------------------------------------- -from earthdaily import earthdatastore +from earthdaily import earthdatastore, datasets import geopandas as gpd from matplotlib import pyplot as plt ########################## # Loading geometry -geometry = gpd.read_file("pivot.geojson") +geometry = datasets.load_pivot() ########################## # Init earthdaily and check available assets diff --git a/examples/multisensors_cube.py b/examples/multisensors_cube.py index 10ca5adf..0df9fad4 100644 --- a/examples/multisensors_cube.py +++ b/examples/multisensors_cube.py @@ -13,14 +13,14 @@ from matplotlib import pyplot as plt from rasterio.enums import Resampling -from earthdaily import earthdatastore +from earthdaily import earthdatastore, datasets ############################################################################## # Import librairies # ------------------------------------------- eds = earthdatastore.Auth() -polygon = gpd.read_file("pivot.geojson") +polygon = datasets.load_pivot() # 500x500m polygon.geometry = ( polygon.geometry.to_crs(epsg=3857).centroid.buffer(500).to_crs(epsg=4326) diff --git a/setup.py b/setup.py index 9134e37c..91a44d8b 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name="earthdaily", - packages=find_packages(), + packages=['earthdaily'], version=version, description="earthdaily: easy authentication, search and retrieval of Earth Data Store collections data", author="EarthDaily Agro", @@ -23,24 +23,24 @@ "matplotlib", "joblib", "psutil", - "xarray", "pandas", "geopandas", "rasterio", "pystac-client", + "pystac", "requests", "xarray", "rioxarray", "h5netcdf ", "netcdf4", - "pystac", "stackstac", "odc-stac", "tqdm", "python-dotenv", "rich", - "python-dotenv", ], + include_package_data=True, + package_data={"":['*.geojson']}, license="MIT", zip_safe=False, keywords=["Earth Data Store", "earthdaily", "earthdailyagro", "stac"], diff --git a/tests/test_assetmapper.py b/tests/test_assetmapper.py index 5a541174..e1da4f48 100644 --- a/tests/test_assetmapper.py +++ b/tests/test_assetmapper.py @@ -10,21 +10,21 @@ def setUp(self): def test_unknow_collection(self): collection = "earthdaily-unknow-collection" assets = ["blue", "green", "red", "lambda"] - self.assertEqual(self.aM.map_collection_bands(collection, assets), assets) + self.assertEqual(self.aM.map_collection_assets(collection, assets), assets) with self.assertRaises(NotImplementedError): self.aM._collection_exists(collection, raise_warning=True) def test_return_same_dict(self): collection = "sentinel-2-l2a" assets = {"key": "value", "source": "target", "sensorasset": "myoutputband"} - self.assertEqual(self.aM.map_collection_bands(collection, assets), assets) + self.assertEqual(self.aM.map_collection_assets(collection, assets), assets) def test_sentinel2(self): collection = "sentinel-2-l2a" assets = ["blue", "green", "red", "rededge74", "missing_band"] assets_s2 = ["blue", "green", "red", "rededge2"] self.assertEqual( - list(self.aM.map_collection_bands(collection, assets).keys()), assets_s2 + list(self.aM.map_collection_assets(collection, assets).keys()), assets_s2 ) def test_venus_rededge(self): @@ -36,7 +36,7 @@ def test_venus_rededge(self): } self.assertEqual( - list(self.aM.map_collection_bands(collection, rededges.keys()).keys()), + list(self.aM.map_collection_assets(collection, rededges.keys()).keys()), list(rededges.values()), ) diff --git a/tests/test_simple_datacube.py b/tests/test_simple_datacube.py index cf797432..4c14c70e 100644 --- a/tests/test_simple_datacube.py +++ b/tests/test_simple_datacube.py @@ -6,8 +6,9 @@ class TestEarthDataStore(unittest.TestCase): def setUp(self): self.eds = earthdaily.earthdatastore.Auth() + self.pivot = earthdaily.datasets.load_pivot() - def test_venus(self): + def test_rescale_on_venus(self): collection = "venus-l2a" theia_location = "MEAD" max_cloud_cover = 20 @@ -16,27 +17,57 @@ def test_venus(self): "theia:location": {"eq": theia_location}, "eo:cloud_cover": {"lt": max_cloud_cover}, } - - items = self.eds.search(collection, query=query, max_items=1) - crs = items[0].properties['proj:epsg'] - gsd = items[0].properties['gsd'] - - - datacube = self.eds.datacube(collection, assets=['image_file_SRE_B3'], search_kwargs=dict(query=query, max_items=1),resolution=gsd,crs=crs) - - self.assertEqual(datacube.rio.width,9374) - self.assertEqual(datacube.rio.height,10161) - self.assertEqual(datacube.time.size,1) - blue = datacube['image_file_SRE_B3'].isel(x=5000,y=5000,time=0).data.compute() - self.assertEqual(blue,0.028999999999999998) - - + + items = self.eds.search(collection, query=query, max_items=1) + crs = items[0].properties["proj:epsg"] + gsd = items[0].properties["gsd"] + + bands_info = ( + items[0].assets["image_file_SRE_B3"].extra_fields["raster:bands"][0] + ) + scale, offset = bands_info["scale"], bands_info["offset"] + for rescale in True, False: + datacube = self.eds.datacube( + collection, + assets=["image_file_SRE_B3"], + rescale=rescale, + search_kwargs=dict(query=query, max_items=1), + resolution=gsd, + crs=crs, + ) + + # self.assertEqual(datacube.rio.width,9374) + # self.assertEqual(datacube.rio.height,10161) + self.assertEqual(datacube.time.size, 1) + blue = ( + datacube["image_file_SRE_B3"] + .isel(x=4000, y=4000, time=0) + .data.compute() + ) + if rescale is False: + blue = blue * scale + offset + self.assertEqual(blue, 0.136) + def test_sentinel1(self): - # TODO : implement s1 collection = "sentinel-1-rtc" - - # datacube = self.eds.datacube(collection, bbox=bbox) + datacube = self.eds.datacube( + collection, + assets=["vh", "vv"], + intersects=self.pivot, + datetime="2022-01", + ) + self.assertEqual(list(datacube.data_vars.keys()), ["vh", "vv"]) + + def test_sentinel2(self): + collection = "sentinel-2-l2a" + datacube = self.eds.datacube( + collection, + assets=["blue", "green", "red"], + intersects=self.pivot, + datetime="2023-07-01", + ) + self.assertEqual(list(datacube.data_vars.keys()), ["blue", "green", "red"]) + - if __name__ == "__main__": unittest.main() diff --git a/tests/test_zonalstats.py b/tests/test_zonalstats.py index ced04591..97761660 100644 --- a/tests/test_zonalstats.py +++ b/tests/test_zonalstats.py @@ -43,24 +43,33 @@ def test_numpy(self): self.datacube, self.gdf, all_touched=True, - operations=dict(mean=np.nanmean, max=np.nanmax, min=np.nanmin, mode=np.mode), + operations=dict( + mean=np.nanmean, max=np.nanmax, min=np.nanmin, mode=np.mode + ), ) - for operation in ['min','max', 'mode']: - self._check_results(zonalstats["first_var"].sel(stats=operation).values, operation=operation) + for operation in ["min", "max", "mode"]: + self._check_results( + zonalstats["first_var"].sel(stats=operation).values, operation=operation + ) def test_basic(self): zonalstats = earthdaily.earthdatastore.cube_utils.zonal_stats( self.datacube, self.gdf, all_touched=True, operations=["min", "max", "mode"] ) - for operation in ['min','max','mode']: - self._check_results(zonalstats["first_var"].sel(stats=operation).values, operation=operation) - - def _check_results(self, stats_values,operation='min'): - results = {"min":np.asarray([[0, self.constant], [9, self.constant]]), - "max":np.asarray([[8, self.constant], [23, self.constant]]), - "mode":np.asarray([[0, self.constant], [9, self.constant]])} + for operation in ["min", "max", "mode"]: + self._check_results( + zonalstats["first_var"].sel(stats=operation).values, operation=operation + ) + + def _check_results(self, stats_values, operation="min"): + results = { + "min": np.asarray([[0, self.constant], [9, self.constant]]), + "max": np.asarray([[8, self.constant], [23, self.constant]]), + "mode": np.asarray([[0, self.constant], [9, self.constant]]), + } self.assertTrue(np.all(stats_values == results[operation])) + if __name__ == "__main__": unittest.main()