Skip to content

Commit

Permalink
fix(cloudmask) : when share same product
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolasK committed Nov 5, 2024
1 parent 1baa737 commit b5817f2
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.2.13] - 2024-11-05

### Fixed

- Manage when several products have same cloudmask

## [0.2.12] - 2024-10-31

### Added
Expand Down
2 changes: 1 addition & 1 deletion earthdaily/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# to hide warnings from rioxarray or nano seconds conversion
# warnings.filterwarnings("ignore")

__version__ = "0.2.12"
__version__ = "0.2.13"


def EarthDataStore(
Expand Down
37 changes: 37 additions & 0 deletions earthdaily/earthdatastore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,41 @@ def post_query_items(items, query):
items = ItemCollection(items_)
return items

def _select_last_common_occurrences(first, second):
"""
For each date in second dataset, select the last N occurrences of that date from first dataset,
where N is the count of that date in second dataset.
Parameters:
first (xarray.Dataset): Source dataset
second (xarray.Dataset): Dataset containing the dates to match and their counts
Returns:
xarray.Dataset: Subset of first dataset with selected time indices
"""
# Convert times to datetime64[ns] if they aren't already
first_times = first.time.astype("datetime64[ns]")
second_times = second.time.astype("datetime64[ns]")

# Get unique dates and their counts from second dataset
unique_dates, counts = np.unique(second_times.values, return_counts=True)

# Initialize list to store selected indices
selected_indices = []

# For each unique date in second
for date, count in zip(unique_dates, counts):
# Find all indices where this date appears in first
date_indices = np.where(first_times == date)[0]
# Take the last 'count' number of indices
selected_indices.extend(date_indices[-count:])

# Sort indices to maintain temporal order (or reverse them if needed)
selected_indices = sorted(selected_indices, reverse=True)

# Select these indices from the first dataset
return first.isel(time=selected_indices)


def _cloud_path_to_http(cloud_path):
"""Convert a cloud path to HTTP URL.
Expand Down Expand Up @@ -963,6 +998,8 @@ def datacube(
**kwargs,
)
xr_datacube["time"] = xr_datacube.time.astype("M8[ns]")
if xr_datacube.time.size != acm_datacube.time.size:
xr_datacube = _select_last_common_occurrences(xr_datacube, acm_datacube)
acm_datacube["time"] = xr_datacube["time"].time
acm_datacube = cube_utils._match_xy_dims(acm_datacube, xr_datacube)
xr_datacube = xr.merge((xr_datacube, acm_datacube), compat="override")
Expand Down

0 comments on commit b5817f2

Please sign in to comment.