fix(cloudmask) : when share same product

earthdaily · Nov 5, 2024 · b5817f2 · b5817f2
1 parent 1baa737
commit b5817f2
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.13] - 2024-11-05
+
+### Fixed
+
+- Manage when several products have same cloudmask
+
 ## [0.2.12] - 2024-10-31
 
 ### Added

diff --git a/earthdaily/__init__.py b/earthdaily/__init__.py
@@ -7,7 +7,7 @@
 # to hide warnings from rioxarray or nano seconds conversion
 # warnings.filterwarnings("ignore")
 
-__version__ = "0.2.12"
+__version__ = "0.2.13"
 
 
 def EarthDataStore(

diff --git a/earthdaily/earthdatastore/__init__.py b/earthdaily/earthdatastore/__init__.py
@@ -70,6 +70,41 @@ def post_query_items(items, query):
     items = ItemCollection(items_)
     return items
 
+def _select_last_common_occurrences(first, second):
+    """
+    For each date in second dataset, select the last N occurrences of that date from first dataset,
+    where N is the count of that date in second dataset.
+    
+    Parameters:
+    first (xarray.Dataset): Source dataset
+    second (xarray.Dataset): Dataset containing the dates to match and their counts
+    
+    Returns:
+    xarray.Dataset: Subset of first dataset with selected time indices
+    """
+    # Convert times to datetime64[ns] if they aren't already
+    first_times = first.time.astype("datetime64[ns]")
+    second_times = second.time.astype("datetime64[ns]")
+
+    # Get unique dates and their counts from second dataset
+    unique_dates, counts = np.unique(second_times.values, return_counts=True)
+
+    # Initialize list to store selected indices
+    selected_indices = []
+
+    # For each unique date in second
+    for date, count in zip(unique_dates, counts):
+        # Find all indices where this date appears in first
+        date_indices = np.where(first_times == date)[0]
+        # Take the last 'count' number of indices
+        selected_indices.extend(date_indices[-count:])
+
+    # Sort indices to maintain temporal order (or reverse them if needed)
+    selected_indices = sorted(selected_indices, reverse=True)
+
+    # Select these indices from the first dataset
+    return first.isel(time=selected_indices)
+
 
 def _cloud_path_to_http(cloud_path):
     """Convert a cloud path to HTTP URL.
@@ -963,6 +998,8 @@ def datacube(
                     **kwargs,
                 )
                 xr_datacube["time"] = xr_datacube.time.astype("M8[ns]")
+                if xr_datacube.time.size != acm_datacube.time.size:
+                    xr_datacube = _select_last_common_occurrences(xr_datacube, acm_datacube)
                 acm_datacube["time"] = xr_datacube["time"].time
                 acm_datacube = cube_utils._match_xy_dims(acm_datacube, xr_datacube)
                 xr_datacube = xr.merge((xr_datacube, acm_datacube), compat="override")