Skip to content
This repository has been archived by the owner on Jan 10, 2025. It is now read-only.

Commit

Permalink
simplify config
Browse files Browse the repository at this point in the history
  • Loading branch information
floriankrb committed Mar 12, 2024
1 parent 6300d00 commit 09bd6c7
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 6 deletions.
2 changes: 2 additions & 0 deletions ecml_tools/create/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ def __init__(self, config, *args, **kwargs):
if "group_by" in self.build:
self.dates["group_by"] = self.build.group_by

###########

self.reading_chunks = self.get("reading_chunks")
assert "flatten_values" not in self.output
assert "flatten_grid" in self.output, self.output
Expand Down
40 changes: 34 additions & 6 deletions ecml_tools/create/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,28 @@
VERSION = "0.20"


def default_statistics_dates(dates):
first = dates[0]
last = dates[-1]
n_years = (last - first).days // 365

if n_years >= 20:
end = datetime.datetime(last.year - 2, last.month, last.day, last.hour, last.minute, last.second)
print(f"Number of years {n_years} >= 20, leaving out 2 years. {end=}")
return dates[0], end

if n_years >= 10: # leave out 1 year
end = datetime.datetime(last.year - 1, last.month, last.day, last.hour, last.minute, last.second)
print(f"Number of years {n_years} >= 10, leaving out 1 years. {end=}")
return dates[0], end

# leave out 20% of the data
k = int(len(dates) * 0.8)
end = dates[k]
print(f"Number of years {n_years} < 10, leaving out 20%. {end=}")
return dates[0], end


class Loader:
def __init__(self, *, path, print=print, **kwargs):
# Catch all floating point errors, including overflow, sqrt(<0), etc
Expand Down Expand Up @@ -89,12 +111,18 @@ def build_input(self):

def build_statistics_dates(self, start, end):
ds = open_dataset(self.path)
subset = ds.dates_interval_to_indices(start, end)
start, end = ds.dates[subset[0]], ds.dates[subset[-1]]
return (
start.astype(datetime.datetime).isoformat(),
end.astype(datetime.datetime).isoformat(),
)
dates = ds.dates

if end is None and start is None:
start, end = default_statistics_dates(dates)
else:
subset = ds.dates_interval_to_indices(start, end)
start = dates[subset[0]]
end = dates[subset[-1]]

start = start.astype(datetime.datetime)
end = end.astype(datetime.datetime)
return (start.isoformat(), end.isoformat())

def read_dataset_metadata(self):
ds = open_dataset(self.path)
Expand Down

0 comments on commit 09bd6c7

Please sign in to comment.