From b4b8ab54defc29d747f71d7060c4276c5cbe77d4 Mon Sep 17 00:00:00 2001 From: sujan Date: Wed, 10 Jul 2024 17:05:21 +0545 Subject: [PATCH 1/5] feat: added data extracts to avoid creating tasks with no features --- fmtm_splitter/splitter.py | 62 +++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/fmtm_splitter/splitter.py b/fmtm_splitter/splitter.py index 90372a5..3ebbe84 100755 --- a/fmtm_splitter/splitter.py +++ b/fmtm_splitter/splitter.py @@ -32,6 +32,7 @@ from osm_rawdata.postgres import PostgresClient from psycopg2.extensions import connection from shapely.geometry import Polygon, shape +from shapely.geometry.geo import mapping from shapely.ops import unary_union from fmtm_splitter.db import ( @@ -149,10 +150,11 @@ def geojson_to_shapely_polygon( raise ValueError(msg) return shape(features[0].get("geometry")) - + def splitBySquare( # noqa: N802 self, meters: int, + extract_geojson: Optional[Union[dict, FeatureCollection]] = None, ) -> FeatureCollection: """Split the polygon into squares. @@ -173,8 +175,13 @@ def splitBySquare( # noqa: N802 cols = list(np.arange(xmin, xmax + width, width)) rows = list(np.arange(ymin, ymax + length, length)) + polygons=[] + if extract_geojson: + features = extract_geojson.get('features', extract_geojson) if isinstance(extract_geojson, dict) else extract_geojson.features + extract_geoms = [shape(feature['geometry']) for feature in features] + else: + extract_geoms = [] - polygons = [] for x in cols[:-1]: for y in rows[:-1]: grid_polygon = Polygon( @@ -182,11 +189,10 @@ def splitBySquare( # noqa: N802 ) clipped_polygon = grid_polygon.intersection(self.aoi) if not clipped_polygon.is_empty: - polygons.append(clipped_polygon) + if any(geom.within(clipped_polygon) for geom in extract_geoms): + polygons.append(clipped_polygon) - self.split_features = FeatureCollection( - [Feature(geometry=poly) for poly in polygons] - ) + self.split_features = FeatureCollection([Feature(geometry=mapping(poly)) for poly in polygons]) return self.split_features def splitBySQL( # noqa: N802 @@ -382,6 +388,7 @@ def outputGeojson( # noqa: N802 def split_by_square( aoi: Union[str, FeatureCollection], meters: int = 100, + osm_extract: Union[str, FeatureCollection] = None, outfile: Optional[str] = None, ) -> FeatureCollection: """Split an AOI by square, dividing into an even grid. @@ -400,6 +407,43 @@ def split_by_square( parsed_aoi = FMTMSplitter.input_to_geojson(aoi) aoi_featcol = FMTMSplitter.geojson_to_featcol(parsed_aoi) + if not osm_extract: + config_data = dedent( + """ + query: + select: + from: + - nodes + - ways_poly + - ways_line + where: + tags: + highway: not null + building: not null + waterway: not null + railway: not null + aeroway: not null + """ + ) + # Must be a BytesIO JSON object + config_bytes = BytesIO(config_data.encode()) + + pg = PostgresClient( + "underpass", + config_bytes, + ) + # The total FeatureCollection area merged by osm-rawdata automatically + extract_geojson = pg.execQuery( + aoi_featcol, + extra_params={"fileName": "fmtm_splitter", "useStWithin": False}, + ) + + else: + extract_geojson = FMTMSplitter.input_to_geojson(osm_extract) + if not extract_geojson: + err = "A valid data extract must be provided." + log.error(err) + raise ValueError(err) # Handle multiple geometries passed if len(feat_array := aoi_featcol.get("features", [])) > 1: features = [] @@ -407,16 +451,16 @@ def split_by_square( featcol = split_by_square( FeatureCollection(features=[feat]), meters, + None, f"{Path(outfile).stem}_{index}.geojson)" if outfile else None, ) - feats = featcol.get("features", []) - if feats: + if feats := featcol.get("features", []): features += feats # Parse FeatCols into single FeatCol split_features = FeatureCollection(features) else: splitter = FMTMSplitter(aoi_featcol) - split_features = splitter.splitBySquare(meters) + split_features = splitter.splitBySquare(meters, extract_geojson) if not split_features: msg = "Failed to generate split features." log.error(msg) From 6331940493132722e03e03343332b80faf3753d8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Jul 2024 04:06:12 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- fmtm_splitter/splitter.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fmtm_splitter/splitter.py b/fmtm_splitter/splitter.py index 3ebbe84..9ec4727 100755 --- a/fmtm_splitter/splitter.py +++ b/fmtm_splitter/splitter.py @@ -150,7 +150,7 @@ def geojson_to_shapely_polygon( raise ValueError(msg) return shape(features[0].get("geometry")) - + def splitBySquare( # noqa: N802 self, meters: int, @@ -175,10 +175,14 @@ def splitBySquare( # noqa: N802 cols = list(np.arange(xmin, xmax + width, width)) rows = list(np.arange(ymin, ymax + length, length)) - polygons=[] + polygons = [] if extract_geojson: - features = extract_geojson.get('features', extract_geojson) if isinstance(extract_geojson, dict) else extract_geojson.features - extract_geoms = [shape(feature['geometry']) for feature in features] + features = ( + extract_geojson.get("features", extract_geojson) + if isinstance(extract_geojson, dict) + else extract_geojson.features + ) + extract_geoms = [shape(feature["geometry"]) for feature in features] else: extract_geoms = [] @@ -192,7 +196,9 @@ def splitBySquare( # noqa: N802 if any(geom.within(clipped_polygon) for geom in extract_geoms): polygons.append(clipped_polygon) - self.split_features = FeatureCollection([Feature(geometry=mapping(poly)) for poly in polygons]) + self.split_features = FeatureCollection( + [Feature(geometry=mapping(poly)) for poly in polygons] + ) return self.split_features def splitBySQL( # noqa: N802 @@ -424,7 +430,7 @@ def split_by_square( railway: not null aeroway: not null """ - ) + ) # Must be a BytesIO JSON object config_bytes = BytesIO(config_data.encode()) From f5834c8067662af3c47acba3558347a7137b4cb8 Mon Sep 17 00:00:00 2001 From: sujan Date: Fri, 12 Jul 2024 10:25:38 +0545 Subject: [PATCH 3/5] update test cases of split_by_square with data_extracts --- tests/test_splitter.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/tests/test_splitter.py b/tests/test_splitter.py index ab9452c..ad743a8 100644 --- a/tests/test_splitter.py +++ b/tests/test_splitter.py @@ -61,38 +61,43 @@ def test_init_splitter_types(aoi_json): assert str(error.value) == "The input AOI cannot contain multiple geometries." -def test_split_by_square_with_dict(aoi_json): +def test_split_by_square_with_dict(aoi_json, extract_json): """Test divide by square from geojson dict types.""" features = split_by_square( aoi_json.get("features")[0], meters=50, + osm_extract=extract_json ) - assert len(features.get("features")) == 54 + assert len(features.get("features")) == 50 features = split_by_square( aoi_json.get("features")[0].get("geometry"), meters=50, + osm_extract=extract_json ) - assert len(features.get("features")) == 54 + assert len(features.get("features")) == 50 -def test_split_by_square_with_str(aoi_json): +def test_split_by_square_with_str(aoi_json, extract_json): """Test divide by square from geojson str and file.""" # GeoJSON Dumps features = split_by_square( geojson.dumps(aoi_json.get("features")[0]), meters=50, + osm_extract=extract_json ) - assert len(features.get("features")) == 54 + assert len(features.get("features")) == 50 # JSON Dumps features = split_by_square( json.dumps(aoi_json.get("features")[0].get("geometry")), meters=50, + osm_extract=extract_json ) - assert len(features.get("features")) == 54 + assert len(features.get("features")) == 50 # File features = split_by_square( "tests/testdata/kathmandu.geojson", meters=100, + osm_extract="tests/testdata/kathmandu_extract.geojson" ) assert len(features.get("features")) == 15 @@ -105,26 +110,28 @@ def test_split_by_square_with_file_output(): outfile = Path(__file__).parent.parent / f"{uuid4()}.geojson" features = split_by_square( "tests/testdata/kathmandu.geojson", + osm_extract="tests/testdata/kathmandu_extract.geojson", meters=50, outfile=str(outfile), ) - assert len(features.get("features")) == 54 + assert len(features.get("features")) == 50 # Also check output file with open(outfile, "r") as jsonfile: output_geojson = geojson.load(jsonfile) - assert len(output_geojson.get("features")) == 54 + assert len(output_geojson.get("features")) == 50 -def test_split_by_square_with_multigeom_input(aoi_multi_json): +def test_split_by_square_with_multigeom_input(aoi_multi_json,extract_json): """Test divide by square from geojson dict types.""" file_name = uuid4() outfile = Path(__file__).parent.parent / f"{file_name}.geojson" features = split_by_square( aoi_multi_json, meters=50, + osm_extract=extract_json, outfile=str(outfile), ) - assert len(features.get("features", [])) == 60 + assert len(features.get("features", [])) == 50 for index in [0, 1, 2, 3]: assert Path(f"{Path(outfile).stem}_{index}.geojson)").exists() @@ -208,10 +215,11 @@ def test_cli_help(capsys): def test_split_by_square_cli(): """Test split by square works via CLI.""" infile = Path(__file__).parent / "testdata" / "kathmandu.geojson" + extract_geojson = Path(__file__).parent / "testdata" / "kathmandu_extract.geojson" outfile = Path(__file__).parent.parent / f"{uuid4()}.geojson" try: - main(["--boundary", str(infile), "--meters", "100", "--outfile", str(outfile)]) + main(["--boundary", str(infile), "--meters", "100", "--extract", str(extract_geojson), "--outfile", str(outfile)]) except SystemExit: pass @@ -226,6 +234,7 @@ def test_split_by_features_cli(): infile = Path(__file__).parent / "testdata" / "kathmandu.geojson" outfile = Path(__file__).parent.parent / f"{uuid4()}.geojson" split_geojson = Path(__file__).parent / "testdata" / "kathmandu_split.geojson" + extract_geojson = Path(__file__).parent / "testdata" / "kathmandu_extract.geojson" try: main( @@ -234,6 +243,8 @@ def test_split_by_features_cli(): str(infile), "--source", str(split_geojson), + "--extract", + str(extract_geojson), "--outfile", str(outfile), ] From bb72dd85a06d6aa6159cf60c9ce6679445637bf4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Jul 2024 04:43:36 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_splitter.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tests/test_splitter.py b/tests/test_splitter.py index ad743a8..589d87d 100644 --- a/tests/test_splitter.py +++ b/tests/test_splitter.py @@ -64,15 +64,11 @@ def test_init_splitter_types(aoi_json): def test_split_by_square_with_dict(aoi_json, extract_json): """Test divide by square from geojson dict types.""" features = split_by_square( - aoi_json.get("features")[0], - meters=50, - osm_extract=extract_json + aoi_json.get("features")[0], meters=50, osm_extract=extract_json ) assert len(features.get("features")) == 50 features = split_by_square( - aoi_json.get("features")[0].get("geometry"), - meters=50, - osm_extract=extract_json + aoi_json.get("features")[0].get("geometry"), meters=50, osm_extract=extract_json ) assert len(features.get("features")) == 50 @@ -81,23 +77,21 @@ def test_split_by_square_with_str(aoi_json, extract_json): """Test divide by square from geojson str and file.""" # GeoJSON Dumps features = split_by_square( - geojson.dumps(aoi_json.get("features")[0]), - meters=50, - osm_extract=extract_json + geojson.dumps(aoi_json.get("features")[0]), meters=50, osm_extract=extract_json ) assert len(features.get("features")) == 50 # JSON Dumps features = split_by_square( json.dumps(aoi_json.get("features")[0].get("geometry")), meters=50, - osm_extract=extract_json + osm_extract=extract_json, ) assert len(features.get("features")) == 50 # File features = split_by_square( "tests/testdata/kathmandu.geojson", meters=100, - osm_extract="tests/testdata/kathmandu_extract.geojson" + osm_extract="tests/testdata/kathmandu_extract.geojson", ) assert len(features.get("features")) == 15 @@ -121,7 +115,7 @@ def test_split_by_square_with_file_output(): assert len(output_geojson.get("features")) == 50 -def test_split_by_square_with_multigeom_input(aoi_multi_json,extract_json): +def test_split_by_square_with_multigeom_input(aoi_multi_json, extract_json): """Test divide by square from geojson dict types.""" file_name = uuid4() outfile = Path(__file__).parent.parent / f"{file_name}.geojson" @@ -219,7 +213,18 @@ def test_split_by_square_cli(): outfile = Path(__file__).parent.parent / f"{uuid4()}.geojson" try: - main(["--boundary", str(infile), "--meters", "100", "--extract", str(extract_geojson), "--outfile", str(outfile)]) + main( + [ + "--boundary", + str(infile), + "--meters", + "100", + "--extract", + str(extract_geojson), + "--outfile", + str(outfile), + ] + ) except SystemExit: pass From 98f1c268eb406b4988c4c1207880f64556135e4e Mon Sep 17 00:00:00 2001 From: sujan Date: Fri, 12 Jul 2024 10:39:31 +0545 Subject: [PATCH 5/5] fix: precommit --- fmtm_splitter/splitter.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fmtm_splitter/splitter.py b/fmtm_splitter/splitter.py index 9ec4727..bd58e61 100755 --- a/fmtm_splitter/splitter.py +++ b/fmtm_splitter/splitter.py @@ -160,6 +160,8 @@ def splitBySquare( # noqa: N802 Args: meters (int): The size of each task square in meters. + extract_geojson (dict, FeatureCollection): an OSM extract geojson, + containing building polygons, or linestrings. Returns: data (FeatureCollection): A multipolygon of all the task boundaries. @@ -404,6 +406,11 @@ def split_by_square( GeoJSON string, or FeatureCollection object. meters(str, optional): Specify the square size for the grid. Defaults to 100m grid. + osm_extract (str, FeatureCollection): an OSM extract geojson, + containing building polygons, or linestrings. + Optional param, if not included an extract is generated for you. + It is recommended to leave this param as default, unless you know + what you are doing. outfile(str): Output to a GeoJSON file on disk. Returns: