diff --git a/ecml_tools/commands/scan.py b/ecml_tools/commands/scan.py index e5e22a2..211325e 100644 --- a/ecml_tools/commands/scan.py +++ b/ecml_tools/commands/scan.py @@ -1,3 +1,4 @@ +import fnmatch import os import sys from collections import defaultdict @@ -16,39 +17,21 @@ class Scan(Command): timestamp = True def add_arguments(self, command_parser): - command_parser.add_argument("--extension", default=".grib", help="Extension of the files to scan") command_parser.add_argument( - "--magic", - help="File 'magic' to use to identify the file type. Overrides --extension", + "--match", + help="Give a glob pattern to match files (default: *.grib)", + default="*.grib", + ) + command_parser.add_argument( + "--what", + default="grib", ) command_parser.add_argument("paths", nargs="+", help="Paths to scan") def run(self, args): - EXTENSIONS = { - ".grib": "grib", - ".grib1": "grib", - ".grib2": "grib", - ".grb": "grib", - ".nc": "netcdf", - ".nc4": "netcdf", - } - - MAGICS = { - "GRIB": "grib", - } - - if args.magic: - what = MAGICS[args.magic] - args.magic = args.magic.encode() - else: - what = EXTENSIONS[args.extension] def match(path): - if args.magic: - with open(path, "rb") as f: - return args.magic == f.read(len(args.magic)) - else: - return path.endswith(args.extension) + return fnmatch.fnmatch(path, args.match) paths = [] for path in args.paths: @@ -63,6 +46,7 @@ def match(path): dates = set() gribs = defaultdict(set) unique = defaultdict(lambda: defaultdict(set)) + what = args.what for path in tqdm.tqdm(paths, leave=False): if not match(path): diff --git a/ecml_tools/create/functions/actions/grib.py b/ecml_tools/create/functions/actions/grib.py index d20841f..7c5ff36 100644 --- a/ecml_tools/create/functions/actions/grib.py +++ b/ecml_tools/create/functions/actions/grib.py @@ -13,6 +13,7 @@ def check(ds, paths, **kwargs): + count = 1 for k, v in kwargs.items(): if isinstance(v, (tuple, list)): @@ -41,6 +42,7 @@ def execute(context, dates, path, *args, **kwargs): s = s.sel(valid_datetime=dates, **kwargs) ds = ds + s - check(ds, given_paths, valid_datetime=dates, **kwargs) + if kwargs: + check(ds, given_paths, valid_datetime=dates, **kwargs) return ds diff --git a/ecml_tools/create/functions/steps/unrotate_winds.py b/ecml_tools/create/functions/steps/unrotate_winds.py index c5fcde0..e10ac85 100644 --- a/ecml_tools/create/functions/steps/unrotate_winds.py +++ b/ecml_tools/create/functions/steps/unrotate_winds.py @@ -38,6 +38,7 @@ def rotate_winds( south_pole_longitude, south_pole_rotation_angle=0, ): + # Code from MIR assert south_pole_rotation_angle == 0 C = np.deg2rad(90 - south_pole_latitude) diff --git a/ecml_tools/data.py b/ecml_tools/data.py index 7e17e32..de869df 100644 --- a/ecml_tools/data.py +++ b/ecml_tools/data.py @@ -37,6 +37,9 @@ DEPTH = 0 +# TODO: make numpy arrays read-only +# a.flags.writeable = False + def _debug_indexing(method): @wraps(method) @@ -110,6 +113,7 @@ def __repr__(self): class Dataset: + arguments = {} @cached_property @@ -150,6 +154,11 @@ def _subset(self, **kwargs): statistics = kwargs.pop("statistics") return Statistics(self, statistics)._subset(**kwargs) + if "thinning" in kwargs: + thinning = kwargs.pop("thinning") + method = kwargs.pop("method", None) + return Thinning(self, thinning, method)._subset(**kwargs) + raise NotImplementedError("Unsupported arguments: " + ", ".join(kwargs)) def _frequency_to_indices(self, frequency): @@ -854,6 +863,69 @@ def __getitem__(self, n): return np.concatenate([d[n] for d in self.datasets], axis=self.axis - 1) +class Masked(Forwards): + + def __init__(self, forward, mask): + super().__init__(forward) + assert len(forward.shape) == 4, "Grids must be 1D for now" + self.mask = mask + + @cached_property + def shape(self): + return self.forward.shape[:-1] + (np.count_nonzero(self.mask),) + + @cached_property + def latitudes(self): + return self.forward.latitudes[self.mask] + + @cached_property + def longitudes(self): + return self.forward.longitudes[self.mask] + + def __getitem__(self, index): + if isinstance(index, (int, slice)): + index = (index, slice(None), slice(None), slice(None)) + return self._get_tuple(index) + + @debug_indexing + @expand_list_indexing + def _get_tuple(self, index): + assert self.axis >= len(index) or index[self.axis] == slice( + None + ), f"No support for selecting a subset of the 1D values {index}" + index, changes = index_to_slices(index, self.shape) + + # In case index_to_slices has changed the last slice + index, _ = update_tuple(index, self.axis, slice(None)) + + result = self.forwards[index] + result = result[:, :, :, self.mask] + + return apply_index_to_slices_changes(result, changes) + + +class Thinning(Masked): + + def __init__(self, forward, thinning, method): + self.thinning = thinning + self.method = method + + assert method is None, f"Thinning method not supported: {method}" + latitudes = sorted(set(forward.latitudes)) + longitudes = sorted(set(forward.longitudes)) + + latitudes = set(latitudes[::thinning]) + longitudes = set(longitudes[::thinning]) + + mask = [lat in latitudes and lon in longitudes for lat, lon in zip(forward.latitudes, forward.longitudes)] + mask = np.array(mask, dtype=bool) + + super().__init__(forward, mask) + + def tree(self): + return Node(self, [self.forward.tree()], thinning=self.thinning, method=self.method) + + class Ensemble(GivenAxis): def tree(self): diff --git a/setup.py b/setup.py index 56dc52a..79cb9cf 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ def read(fname): "earthkit-meteo", "pyproj", "semantic-version", + "ecmwflibs>=0.6.3", ]