From 8475e929998234c0b45a3c8744ba2851bce10d54 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Sun, 6 Jun 2021 13:41:23 +0200 Subject: [PATCH 1/3] dev commit --- mapchete/_processing.py | 29 ++++++++++++++++------------- mapchete/formats/default/gtiff.py | 18 +++++++++++------- test/test_formats_geotiff.py | 10 ++++++++++ 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/mapchete/_processing.py b/mapchete/_processing.py index 26ac2bd8..dd51d967 100644 --- a/mapchete/_processing.py +++ b/mapchete/_processing.py @@ -359,15 +359,16 @@ def __init__( self.start_method = start_method self.max_workers = max_workers or os.cpu_count() self.multiprocessing_module = multiprocessing_module - logger.debug( - "init %s Executor with start_method %s and %s workers", - self.multiprocessing_module, - self.start_method, - self.max_workers, - ) - self._pool = self.multiprocessing_module.get_context(self.start_method).Pool( - self.max_workers - ) + if self.max_workers != 1: + logger.debug( + "init %s Executor with start_method %s and %s workers", + self.multiprocessing_module, + self.start_method, + self.max_workers, + ) + self._pool = self.multiprocessing_module.get_context( + self.start_method + ).Pool(self.max_workers) def as_completed( self, func=None, iterable=None, fargs=None, fkwargs=None, chunksize=1 @@ -393,14 +394,16 @@ def as_completed( def __enter__(self): """Enter context manager.""" - self._pool.__enter__() + if self.max_workers != 1: + self._pool.__enter__() return self def __exit__(self, *args): """Exit context manager.""" - logger.debug("closing %s and workers", self._pool) - self._pool.__exit__(*args) - logger.debug("%s closed", self._pool) + if self.max_workers != 1: + logger.debug("closing %s and workers", self._pool) + self._pool.__exit__(*args) + logger.debug("%s closed", self._pool) class FinishedTask: diff --git a/mapchete/formats/default/gtiff.py b/mapchete/formats/default/gtiff.py index b497f217..65789216 100644 --- a/mapchete/formats/default/gtiff.py +++ b/mapchete/formats/default/gtiff.py @@ -336,7 +336,7 @@ def profile(self, tile=None): k: v for k, v in self.output_params.items() if k not in _OUTPUT_PARAMETERS - } + }, ) dst_metadata.pop("transform", None) if tile is not None: @@ -480,9 +480,9 @@ def prepare(self, process_area=None, **kwargs): k: self.output_params.get(k, GTIFF_DEFAULT_PROFILE[k]) for k in GTIFF_DEFAULT_PROFILE.keys() }, - **creation_options + **creation_options, ), - bigtiff=self.output_params.get("bigtiff", "NO") + bigtiff=self.output_params.get("bigtiff", "NO"), ) logger.debug("single GTiff profile: %s", self._profile) self.in_memory = ( @@ -506,11 +506,13 @@ def prepare(self, process_area=None, **kwargs): # (1) use memfile if output is remote or COG if self.cog or path_is_remote(self.path): if self.in_memory: + logger.debug("create MemoryFile") self._memfile = self._ctx.enter_context(MemoryFile()) self.dst = self._ctx.enter_context(self._memfile.open(**self._profile)) else: # in case output raster is too big, use tempfile on disk self._tempfile = self._ctx.enter_context(NamedTemporaryFile()) + logger.debug(f"create tempfile in {self._tempfile.name}") self.dst = self._ctx.enter_context( rasterio.open(self._tempfile.name, "w+", **self._profile) ) @@ -601,7 +603,7 @@ def write(self, process_tile, data): *out_tile.bounds, transform=self.dst.transform, height=self.dst.height, - width=self.dst.width + width=self.dst.width, ) .round_lengths(pixel_precision=0) .round_offsets(pixel_precision=0) @@ -646,7 +648,9 @@ def close(self, exc_type=None, exc_value=None, exc_traceback=None): self.overviews_levels, Resampling[self.overviews_resampling] ) self.dst.update_tags( - ns="rio_overview", resampling=self.overviews_resampling + OVR_RESAMPLING_ALG=Resampling[ + self.overviews_resampling + ].name.upper() ) # write if self.cog: @@ -663,7 +667,7 @@ def close(self, exc_type=None, exc_value=None, exc_traceback=None): self.dst, tmp_dst.name, copy_src_overviews=True, - **self._profile + **self._profile, ) self._bucket_resource.upload_file( Filename=tmp_dst.name, @@ -676,7 +680,7 @@ def close(self, exc_type=None, exc_value=None, exc_traceback=None): self.dst, self.path, copy_src_overviews=True, - **self._profile + **self._profile, ) else: if path_is_remote(self.path): diff --git a/test/test_formats_geotiff.py b/test/test_formats_geotiff.py index af0e1565..18b59ace 100644 --- a/test/test_formats_geotiff.py +++ b/test/test_formats_geotiff.py @@ -315,6 +315,16 @@ def test_output_single_gtiff_overviews(output_single_gtiff): with rasterio.open(mp.config.output.path) as src: assert src.overviews(1) assert src.tags(ns="rio_overview").get("resampling") == "bilinear" + for o in [1, 2, 4, 8]: + a = src.read( + masked=True, out_shape=(1, int(src.height / o), int(src.width / o)) + ) + masked = a.mask.sum() + _all = a.shape[0] * a.shape[1] * a.shape[2] + perc_masked = masked / _all + print(perc_masked) + assert not a.mask.all() + 1 / 0 @pytest.mark.remote From 21cb32654eafdf1b8c640eaa102c03f461a9fd8e Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 23 Jun 2021 14:25:00 +0200 Subject: [PATCH 2/3] use rasterio function to determine overview levels so that no overview block can be smaller than the defined GTiff block size --- mapchete/formats/default/gtiff.py | 34 ++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/mapchete/formats/default/gtiff.py b/mapchete/formats/default/gtiff.py index 65789216..b1a5c2f3 100644 --- a/mapchete/formats/default/gtiff.py +++ b/mapchete/formats/default/gtiff.py @@ -41,6 +41,7 @@ from rasterio.enums import Resampling from rasterio.io import MemoryFile from rasterio.profiles import Profile +from rasterio.rio.overview import get_maximum_overview_level from rasterio.shutil import copy from rasterio.windows import from_bounds from shapely.geometry import box @@ -422,16 +423,6 @@ def __init__(self, output_params, **kwargs): raise ValueError("single file output only works with one zoom level") self.zoom = output_params["delimiters"]["zoom"][0] self.cog = output_params.get("cog", False) - if self.cog or "overviews" in output_params: - self.overviews = True - self.overviews_resampling = output_params.get( - "overviews_resampling", "nearest" - ) - self.overviews_levels = output_params.get( - "overviews_levels", [2 ** i for i in range(1, self.zoom + 1)] - ) - else: - self.overviews = False self.in_memory = output_params.get("in_memory", True) _bucket = self.path.split("/")[2] if self.path.startswith("s3://") else None self._bucket_resource = get_boto3_bucket(_bucket) if _bucket else None @@ -485,11 +476,34 @@ def prepare(self, process_area=None, **kwargs): bigtiff=self.output_params.get("bigtiff", "NO"), ) logger.debug("single GTiff profile: %s", self._profile) + + if self.cog or "overviews" in self.output_params: + self.overviews = True + self.overviews_resampling = self.output_params.get( + "overviews_resampling", "nearest" + ) + self.overviews_levels = self.output_params.get( + "overviews_levels", + [ + 2 ** i + for i in range( + 1, + get_maximum_overview_level( + width, height, minsize=self._profile["blockxsize"] + ), + ) + ], + ) + logger.debug(self.overviews_levels) + else: + self.overviews = False + self.in_memory = ( self.in_memory if self.in_memory is False else height * width < IN_MEMORY_THRESHOLD ) + # set up rasterio if path_exists(self.path): if self.output_params["mode"] != "overwrite": From b1da80863acea2cd087dfdb242d5f36ab2d864cc Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 23 Jun 2021 16:36:05 +0200 Subject: [PATCH 3/3] fix overview tests --- mapchete/formats/default/gtiff.py | 6 +++++- test/test_cli.py | 2 +- test/test_formats_geotiff.py | 7 +------ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/mapchete/formats/default/gtiff.py b/mapchete/formats/default/gtiff.py index b1a5c2f3..e3764c72 100644 --- a/mapchete/formats/default/gtiff.py +++ b/mapchete/formats/default/gtiff.py @@ -477,6 +477,11 @@ def prepare(self, process_area=None, **kwargs): ) logger.debug("single GTiff profile: %s", self._profile) + logger.debug( + get_maximum_overview_level( + width, height, minsize=self._profile["blockxsize"] + ) + ) if self.cog or "overviews" in self.output_params: self.overviews = True self.overviews_resampling = self.output_params.get( @@ -494,7 +499,6 @@ def prepare(self, process_area=None, **kwargs): ) ], ) - logger.debug(self.overviews_levels) else: self.overviews = False diff --git a/test/test_cli.py b/test/test_cli.py index 836fbb18..91f84f94 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -399,7 +399,7 @@ def test_convert_single_gtiff_overviews(cleantopo_br_tif, mp_tmpdir): "--output-pyramid", "geodetic", "-z", - "3", + "7", "--overviews", "--overviews-resampling-method", "bilinear", diff --git a/test/test_formats_geotiff.py b/test/test_formats_geotiff.py index 18b59ace..79048112 100644 --- a/test/test_formats_geotiff.py +++ b/test/test_formats_geotiff.py @@ -314,17 +314,12 @@ def test_output_single_gtiff_overviews(output_single_gtiff): with rasterio.open(mp.config.output.path) as src: assert src.overviews(1) - assert src.tags(ns="rio_overview").get("resampling") == "bilinear" + assert src.tags().get("OVR_RESAMPLING_ALG").lower() == "bilinear" for o in [1, 2, 4, 8]: a = src.read( masked=True, out_shape=(1, int(src.height / o), int(src.width / o)) ) - masked = a.mask.sum() - _all = a.shape[0] * a.shape[1] * a.shape[2] - perc_masked = masked / _all - print(perc_masked) assert not a.mask.all() - 1 / 0 @pytest.mark.remote