Skip to content

Commit

Permalink
feat: osm-rawdata for generating data extracts (#1183)
Browse files Browse the repository at this point in the history
* refactor: remove old unused DefineMapArea component

* refactor: rename /generate endpoint --> /generate_project_data

* fix(frontend): correctly send data_extract_type to project create

* fix(frontend): add data_extract_url param, refactor DataExtract for clarity

* build: add migration for projects.data_extract_url field

* fix: add slash to osm-data-extract url for correct method

* fix(frontend): pass through data extract url & type during proj creation

* fix: set data extract url and type on generate endpoint

* fix(backend): use osm-rawdata for data extract generation + filtering

* fix(frontend): add form_category for osm data extract post params

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* build: update osm-rawdata --> v0.2.0

* build: update osm-rawdata --> v0.2.1

* feat: correctly set data extract url if custom upload or osm

* fix: load fgb data extract and get geojson geoms

* fix: accept xls forms in xls and xlsx format

* fix: optional data extracts for tasking splitting algo

* build: update fmtm-splitter --> v1.1.1

* refactor: simplify project file generation during creation

* fix(frontend): passing params for tasking splitting algo

* refactor: remove data extract type if osm generated

* build: update minio browser to use env var

* refactor: do not include minio port bind by default

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
spwoodcock and pre-commit-ci[bot] authored Feb 12, 2024
1 parent 5d34fbd commit 90fa0d5
Show file tree
Hide file tree
Showing 23 changed files with 724 additions and 876 deletions.
8 changes: 4 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,13 @@ services:
MINIO_ROOT_USER: ${S3_ACCESS_KEY:-fmtm}
MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY:-somelongpassword}
MINIO_VOLUMES: "/mnt/data"
MINIO_BROWSER: "off"
# MINIO_CONSOLE_ADDRESS: ":9090"
MINIO_BROWSER: ${MINIO_BROWSER:-off}
MINIO_CONSOLE_ADDRESS: ":9090"
volumes:
- fmtm_data:/mnt/data
# ports:
# - 9000:9000
# - 9090:9090
# - 9000:9000
# - 9090:9090
networks:
- fmtm-net
command: minio server
Expand Down
1 change: 1 addition & 0 deletions src/backend/app/db/db_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ def tasks_bad(self):
data_extract_type = cast(
str, Column(String)
) # Type of data extract (Polygon or Centroid)
data_extract_url = cast(str, Column(String))
task_split_type = cast(
TaskSplitType, Column(Enum(TaskSplitType), nullable=True)
) # Options: divide on square, manual upload, task splitting algo
Expand Down
207 changes: 197 additions & 10 deletions src/backend/app/db/postgis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,23 @@
"""PostGIS and geometry handling helper funcs."""

import datetime
import logging
from json import dumps as json_dumps
from typing import Optional, Union

from fastapi import HTTPException
from geoalchemy2 import WKBElement
from geoalchemy2.shape import to_shape
from geojson import FeatureCollection
from geojson import loads as geojson_loads
from geojson_pydantic import Feature
from shapely.geometry import mapping
from sqlalchemy import text
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.orm import Session

log = logging.getLogger(__name__)


def timestamp():
"""Get the current time.
Expand Down Expand Up @@ -83,38 +90,218 @@ async def geojson_to_flatgeobuf(
Returns:
flatgeobuf (bytes): a Python bytes representation of a flatgeobuf file.
"""
sql = f"""
# FIXME make this with with properties / tags
# FIXME this is important
# sql = """
# DROP TABLE IF EXISTS public.temp_features CASCADE;

# CREATE TABLE IF NOT EXISTS public.temp_features(
# geom geometry,
# osm_id integer,
# changeset integer,
# timestamp timestamp
# );

# WITH data AS (SELECT CAST(:geojson AS json) AS fc)
# INSERT INTO public.temp_features (geom, osm_id, changeset, timestamp)
# SELECT
# ST_SetSRID(ST_GeomFromGeoJSON(feat->>'geometry'), 4326) AS geom,
# COALESCE((feat->'properties'->>'osm_id')::integer, -1) AS osm_id,
# COALESCE((feat->'properties'->>'changeset')::integer, -1) AS changeset,
# CASE
# WHEN feat->'properties'->>'timestamp' IS NOT NULL
# THEN (feat->'properties'->>'timestamp')::timestamp
# ELSE NULL
# END AS timestamp
# FROM (
# SELECT json_array_elements(fc->'features') AS feat
# FROM data
# ) AS f;

# -- Second param = generate with spatial index
# SELECT ST_AsFlatGeobuf(geoms, true)
# FROM (SELECT * FROM public.temp_features) AS geoms;
# """
sql = """
DROP TABLE IF EXISTS public.temp_features CASCADE;
CREATE TABLE IF NOT EXISTS public.temp_features(
id serial PRIMARY KEY,
geom geometry
);
WITH data AS (SELECT '{geojson}'::json AS fc)
WITH data AS (SELECT CAST(:geojson AS json) AS fc)
INSERT INTO public.temp_features (geom)
SELECT
ST_AsText(ST_GeomFromGeoJSON(feat->>'geometry')) AS geom
ST_SetSRID(ST_GeomFromGeoJSON(feat->>'geometry'), 4326) AS geom
FROM (
SELECT json_array_elements(fc->'features') AS feat
FROM data
) AS f;
WITH thegeom AS
(SELECT * FROM public.temp_features)
SELECT ST_AsFlatGeobuf(thegeom.*)
FROM thegeom;
SELECT ST_AsFlatGeobuf(fgb_data)
FROM (SELECT * FROM public.temp_features as geoms) AS fgb_data;
"""
# Run the SQL
result = db.execute(text(sql))
result = db.execute(text(sql), {"geojson": json_dumps(geojson)})
# Get a memoryview object, then extract to Bytes
flatgeobuf = result.first()

# Cleanup table
db.execute(text("DROP TABLE IF EXISTS public.temp_features CASCADE;"))
# db.execute(text("DROP TABLE IF EXISTS public.temp_features CASCADE;"))

if flatgeobuf:
return flatgeobuf[0].tobytes()

# Nothing returned (either no features passed, or failed)
return None


async def flatgeobuf_to_geojson(
db: Session, flatgeobuf: bytes
) -> Optional[FeatureCollection]:
"""Converts FlatGeobuf data to GeoJSON.
Args:
db (Session): SQLAlchemy db session.
flatgeobuf (bytes): FlatGeobuf data in bytes format.
Returns:
FeatureCollection: A GeoJSON FeatureCollection object.
"""
sql = text(
"""
DROP TABLE IF EXISTS public.temp_fgb CASCADE;
SELECT ST_FromFlatGeobufToTable('public', 'temp_fgb', :fgb_bytes);
SELECT jsonb_build_object(
'type', 'FeatureCollection',
'features', jsonb_agg(feature)
) AS feature_collection
FROM (
SELECT jsonb_build_object(
'type', 'Feature',
'geometry', ST_AsGeoJSON(fgb_data.geom)::jsonb,
'properties', fgb_data.properties::jsonb
) AS feature
FROM (
SELECT *,
NULL as properties
FROM ST_FromFlatGeobuf(null::temp_fgb, :fgb_bytes)
) AS fgb_data
) AS features;
"""
)

try:
result = db.execute(sql, {"fgb_bytes": flatgeobuf})
feature_collection = result.first()
except ProgrammingError as e:
log.error(e)
log.error(
"Attempted flatgeobuf --> geojson conversion, but duplicate column found"
)
return None

if feature_collection:
return geojson_loads(json_dumps(feature_collection[0]))

return None


async def parse_and_filter_geojson(geojson_str: str) -> Optional[FeatureCollection]:
"""Parse geojson string and filter out incomaptible geometries."""
log.debug("Parsing geojson file")
geojson_parsed = geojson_loads(geojson_str)
if isinstance(geojson_parsed, FeatureCollection):
log.debug("Already in FeatureCollection format, skipping reparse")
featcol = geojson_parsed
elif isinstance(geojson_parsed, Feature):
log.debug("Converting Feature to FeatureCollection")
featcol = FeatureCollection(geojson_parsed)
else:
log.debug("Converting geometry to FeatureCollection")
featcol = FeatureCollection[Feature(geometry=geojson_parsed)]

# Validating Coordinate Reference System
check_crs(featcol)

geom_type = await get_featcol_main_geom_type(featcol)

# Filter out geoms not matching main type
features_filtered = [
feature
for feature in featcol.get("features", [])
if feature.get("geometry", {}).get("type", "") == geom_type
]

return FeatureCollection(features_filtered)


async def get_featcol_main_geom_type(featcol: FeatureCollection) -> str:
"""Get the predominant geometry type in a FeatureCollection."""
geometry_counts = {"Polygon": 0, "Point": 0, "Polyline": 0}

for feature in featcol.get("features", []):
geometry_type = feature.get("geometry", {}).get("type", "")
if geometry_type in geometry_counts:
geometry_counts[geometry_type] += 1

return max(geometry_counts, key=geometry_counts.get)


async def check_crs(input_geojson: Union[dict, FeatureCollection]):
"""Validate CRS is valid for a geojson."""
log.debug("validating coordinate reference system")

def is_valid_crs(crs_name):
valid_crs_list = [
"urn:ogc:def:crs:OGC:1.3:CRS84",
"urn:ogc:def:crs:EPSG::4326",
"WGS 84",
]
return crs_name in valid_crs_list

def is_valid_coordinate(coord):
if coord is None:
return False
return -180 <= coord[0] <= 180 and -90 <= coord[1] <= 90

error_message = (
"ERROR: Unsupported coordinate system, it is recommended to use a "
"GeoJSON file in WGS84(EPSG 4326) standard."
)
if "crs" in input_geojson:
crs = input_geojson.get("crs", {}).get("properties", {}).get("name")
if not is_valid_crs(crs):
log.error(error_message)
raise HTTPException(status_code=400, detail=error_message)
return

if input_geojson_type := input_geojson.get("type") == "FeatureCollection":
features = input_geojson.get("features", [])
coordinates = (
features[-1].get("geometry", {}).get("coordinates", []) if features else []
)
elif input_geojson_type := input_geojson.get("type") == "Feature":
coordinates = input_geojson.get("geometry", {}).get("coordinates", [])

geometry_type = (
features[0].get("geometry", {}).get("type")
if input_geojson_type == "FeatureCollection" and features
else input_geojson.get("geometry", {}).get("type", "")
)
if geometry_type == "MultiPolygon":
first_coordinate = coordinates[0][0] if coordinates and coordinates[0] else None
elif geometry_type == "Point":
first_coordinate = coordinates if coordinates else None

elif geometry_type == "LineString":
first_coordinate = coordinates[0] if coordinates else None

else:
first_coordinate = coordinates[0][0] if coordinates else None

if not is_valid_coordinate(first_coordinate):
log.error(error_message)
raise HTTPException(status_code=400, detail=error_message)
Loading

0 comments on commit 90fa0d5

Please sign in to comment.