Skip to content

Commit

Permalink
Merge pull request #51 from fieldsoftheworld/formats-cfg
Browse files Browse the repository at this point in the history
Further improvements for polygonize command
  • Loading branch information
m-mohr authored Oct 30, 2024
2 parents 3aa6cff + ae0d9bf commit 6ce1a91
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 44 deletions.
36 changes: 36 additions & 0 deletions src/ftw_cli/cfg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Microsoft Planetary Computer API URL + collection id + bands for inference download command
MSPC_URL = "https://planetarycomputer.microsoft.com/api/stac/v1"
COLLECTION_ID = "sentinel-2-l2a"
BANDS_OF_INTEREST = ["B04", "B03", "B02", "B08"]

# Supported file formats for the inference polygon command
SUPPORTED_POLY_FORMATS_TXT = "Available file extensions: .fgb (FlatGeoBuf), .gpkg (GeoPackage), .parquet (GeoParquet, see GDAL requirements), .geojson and .json (GeoJSON)"

# List of all available countries
ALL_COUNTRIES = [
"belgium",
"cambodia",
"croatia",
"estonia",
"portugal",
"slovakia",
"south_africa",
"sweden",
"austria",
"brazil",
"corsica",
"denmark",
"france",
"india",
"latvia",
"luxembourg",
"finland",
"germany",
"kenya",
"lithuania",
"netherlands",
"rwanda",
"slovenia",
"spain",
"vietnam"
]
15 changes: 9 additions & 6 deletions src/ftw_cli/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import click

from .cfg import ALL_COUNTRIES, SUPPORTED_POLY_FORMATS_TXT

# Imports are in the functions below to speed-up CLI startup time
# Some of the ML related imports (presumable torch) are very slow
# See https://github.com/fieldsoftheworld/ftw-baselines/issues/40
Expand All @@ -19,7 +21,7 @@ def data():
@data.command("download", help="Download and unpack the FTW dataset.")
@click.option('--out', '-o', type=str, default="./data", help="Folder where the files will be downloaded to. Defaults to './data'.")
@click.option('--clean_download', '-f', is_flag=True, help="If set, the script will delete the root folder before downloading.")
@click.option('--countries', type=str, default="all", help="Comma-separated list of countries to download. If 'all' (default) is passed, downloads all available countries.")
@click.option('--countries', type=str, default="all", help="Comma-separated list of countries to download. If 'all' (default) is passed, downloads all available countries. Available countries: " + ", ".join(ALL_COUNTRIES))
@click.option('--no-unpack', is_flag=True, help="If set, the script will NOT unpack the downloaded files.")
def data_download(out, clean_download, countries, no_unpack):
from ftw_cli.download import download
Expand Down Expand Up @@ -106,13 +108,14 @@ def inference_run(input, model, out, resize_factor, gpu, patch_size, batch_size,

@inference.command("polygonize", help="Polygonize the output from inference for the raster image given via INPUT.")
@click.argument('input', type=click.Path(exists=True), required=True)
@click.option('--out', '-o', type=str, required=True, help="Output filename for the polygonized data.")
@click.option('--simplify', type=float, default=None, help="Simplification factor to use when polygonizing.")
@click.option('--min_size', type=float, default=500, help="Minimum area size in square meters to include in the output.")
@click.option('--out', '-o', type=str, required=True, help="Output filename for the polygonized data. " + SUPPORTED_POLY_FORMATS_TXT)
@click.option('--simplify', type=float, default=15, show_default=True, help="Simplification factor to use when polygonizing in the unit of the CRS, e.g. meters for Sentinel-2 imagery in UTM. Set to 0 to disable simplification.")
@click.option('--min_size', type=float, default=500, show_default=True, help="Minimum area size in square meters to include in the output.")
@click.option('--overwrite', '-f', is_flag=True, help="Overwrite outputs if they exist.")
def inference_polygonize(input, out, simplify, min_size, overwrite):
@click.option('--close_interiors', is_flag=True, help="Remove the interiors holes in the polygons.")
def inference_polygonize(input, out, simplify, min_size, overwrite, close_interiors):
from ftw_cli.inference import polygonize
polygonize(input, out, simplify, min_size, overwrite)
polygonize(input, out, simplify, min_size, overwrite, close_interiors)

inference.add_command(inference_download)
inference.add_command(inference_polygonize)
Expand Down
30 changes: 2 additions & 28 deletions src/ftw_cli/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,8 @@
import wget
from tqdm import tqdm

# List of all available countries
ALL_COUNTRIES = [
"belgium",
"cambodia",
"croatia",
"estonia",
"portugal",
"slovakia",
"south_africa",
"sweden",
"austria",
"brazil",
"corsica",
"denmark",
"france",
"india",
"latvia",
"luxembourg",
"finland",
"germany",
"kenya",
"lithuania",
"netherlands",
"rwanda",
"slovenia",
"spain",
"vietnam"
]
from .cfg import ALL_COUNTRIES


def load_checksums(local_md5_file_path):
"""
Expand Down
32 changes: 22 additions & 10 deletions src/ftw_cli/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from ftw.datasets import SingleRasterDataset
from ftw.trainers import CustomSemanticSegmentationTask

MSPC_URL = "https://planetarycomputer.microsoft.com/api/stac/v1"
COLLECTION_ID = "sentinel-2-l2a"
from .cfg import BANDS_OF_INTEREST, COLLECTION_ID, MSPC_URL, SUPPORTED_POLY_FORMATS_TXT


def get_item(id):
if "/" not in id:
Expand All @@ -53,8 +53,6 @@ def create_input(win_a, win_b, out, overwrite):
# Ensure that the base directory exists
os.makedirs(os.path.dirname(out), exist_ok=True)

BANDS_OF_INTEREST = ["B04", "B03", "B02", "B08"]

item_win_a = get_item(win_a)
item_win_b = get_item(win_b)

Expand Down Expand Up @@ -218,7 +216,7 @@ def run(input, model, out, resize_factor, gpu, patch_size, batch_size, padding,
print(f"Finished inference and saved output to {out} in {time.time() - tic:.2f}s")


def polygonize(input, out, simplify, min_size, overwrite):
def polygonize(input, out, simplify, min_size, overwrite, close_interiors):
"""Polygonize the output from inference."""

print(f"Polygonizing input file: {input}")
Expand All @@ -235,8 +233,8 @@ def polygonize(input, out, simplify, min_size, overwrite):

tic = time.time()
rows = []
schema = {'geometry': 'Polygon', 'properties': {'idx': 'int', 'area': 'float'}}
i = 0
schema = {'geometry': 'Polygon', 'properties': {'id': 'str', 'area': 'float'}}
i = 1
# read the input file as a mask
with rasterio.open(input) as src:
input_height, input_width = src.shape
Expand All @@ -250,8 +248,19 @@ def polygonize(input, out, simplify, min_size, overwrite):
# Define the equal-area projection using EPSG:6933
equal_area_crs = CRS.from_epsg(6933)

if out.endswith(".gpkg"):
format = "GPKG"
elif out.endswith(".parquet"):
format = "Parquet"
elif out.endswith(".fgb"):
format = "FlatGeobuf"
elif out.endswith(".geojson") or out.endswith(".json"):
format = "GeoJSON"
else:
raise ValueError("Output format not supported. " + SUPPORTED_POLY_FORMATS_TXT)

with (
fiona.open(out, 'w', 'GPKG', schema=schema, crs=original_crs) as dst,
fiona.open(out, 'w', format, schema=schema, crs=original_crs) as dst,
tqdm(total=total_iterations, desc="Processing mask windows") as pbar
):
for y in range(0, input_height, polygonization_stride):
Expand All @@ -264,7 +273,10 @@ def polygonize(input, out, simplify, min_size, overwrite):
continue

geom = shapely.geometry.shape(geom_geojson)
if simplify is not None:

if close_interiors:
geom = shapely.geometry.Polygon(geom.exterior)
if simplify > 0:
geom = geom.simplify(simplify)

# Calculate the area of the reprojected geometry
Expand All @@ -288,7 +300,7 @@ def polygonize(input, out, simplify, min_size, overwrite):
rows.append({
"geometry": geom,
"properties": {
"idx": i,
"id": str(i),
"area": area # Add the area in m² to the properties
}
})
Expand Down

0 comments on commit 6ce1a91

Please sign in to comment.