From b5817f2d8993c5f3059a09dfe3d3c3454df23888 Mon Sep 17 00:00:00 2001 From: nicolasK Date: Tue, 5 Nov 2024 12:30:03 +0100 Subject: [PATCH] fix(cloudmask) : when share same product --- CHANGELOG.md | 6 +++++ earthdaily/__init__.py | 2 +- earthdaily/earthdatastore/__init__.py | 37 +++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89db3e1d..df9b7add 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.2.13] - 2024-11-05 + +### Fixed + +- Manage when several products have same cloudmask + ## [0.2.12] - 2024-10-31 ### Added diff --git a/earthdaily/__init__.py b/earthdaily/__init__.py index 1304908a..e35caac5 100644 --- a/earthdaily/__init__.py +++ b/earthdaily/__init__.py @@ -7,7 +7,7 @@ # to hide warnings from rioxarray or nano seconds conversion # warnings.filterwarnings("ignore") -__version__ = "0.2.12" +__version__ = "0.2.13" def EarthDataStore( diff --git a/earthdaily/earthdatastore/__init__.py b/earthdaily/earthdatastore/__init__.py index c1c2b726..053d5c3c 100644 --- a/earthdaily/earthdatastore/__init__.py +++ b/earthdaily/earthdatastore/__init__.py @@ -70,6 +70,41 @@ def post_query_items(items, query): items = ItemCollection(items_) return items +def _select_last_common_occurrences(first, second): + """ + For each date in second dataset, select the last N occurrences of that date from first dataset, + where N is the count of that date in second dataset. + + Parameters: + first (xarray.Dataset): Source dataset + second (xarray.Dataset): Dataset containing the dates to match and their counts + + Returns: + xarray.Dataset: Subset of first dataset with selected time indices + """ + # Convert times to datetime64[ns] if they aren't already + first_times = first.time.astype("datetime64[ns]") + second_times = second.time.astype("datetime64[ns]") + + # Get unique dates and their counts from second dataset + unique_dates, counts = np.unique(second_times.values, return_counts=True) + + # Initialize list to store selected indices + selected_indices = [] + + # For each unique date in second + for date, count in zip(unique_dates, counts): + # Find all indices where this date appears in first + date_indices = np.where(first_times == date)[0] + # Take the last 'count' number of indices + selected_indices.extend(date_indices[-count:]) + + # Sort indices to maintain temporal order (or reverse them if needed) + selected_indices = sorted(selected_indices, reverse=True) + + # Select these indices from the first dataset + return first.isel(time=selected_indices) + def _cloud_path_to_http(cloud_path): """Convert a cloud path to HTTP URL. @@ -963,6 +998,8 @@ def datacube( **kwargs, ) xr_datacube["time"] = xr_datacube.time.astype("M8[ns]") + if xr_datacube.time.size != acm_datacube.time.size: + xr_datacube = _select_last_common_occurrences(xr_datacube, acm_datacube) acm_datacube["time"] = xr_datacube["time"].time acm_datacube = cube_utils._match_xy_dims(acm_datacube, xr_datacube) xr_datacube = xr.merge((xr_datacube, acm_datacube), compat="override")