From 90fa0d5078e27a34c721813878ed24967d13e5f7 Mon Sep 17 00:00:00 2001 From: Sam <78538841+spwoodcock@users.noreply.github.com> Date: Mon, 12 Feb 2024 07:42:47 +0000 Subject: [PATCH] feat: osm-rawdata for generating data extracts (#1183) * refactor: remove old unused DefineMapArea component * refactor: rename /generate endpoint --> /generate_project_data * fix(frontend): correctly send data_extract_type to project create * fix(frontend): add data_extract_url param, refactor DataExtract for clarity * build: add migration for projects.data_extract_url field * fix: add slash to osm-data-extract url for correct method * fix(frontend): pass through data extract url & type during proj creation * fix: set data extract url and type on generate endpoint * fix(backend): use osm-rawdata for data extract generation + filtering * fix(frontend): add form_category for osm data extract post params * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * build: update osm-rawdata --> v0.2.0 * build: update osm-rawdata --> v0.2.1 * feat: correctly set data extract url if custom upload or osm * fix: load fgb data extract and get geojson geoms * fix: accept xls forms in xls and xlsx format * fix: optional data extracts for tasking splitting algo * build: update fmtm-splitter --> v1.1.1 * refactor: simplify project file generation during creation * fix(frontend): passing params for tasking splitting algo * refactor: remove data extract type if osm generated * build: update minio browser to use env var * refactor: do not include minio port bind by default --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docker-compose.yml | 8 +- src/backend/app/db/db_models.py | 1 + src/backend/app/db/postgis_utils.py | 207 ++++- src/backend/app/projects/project_crud.py | 706 +++++++----------- src/backend/app/projects/project_routes.py | 153 ++-- src/backend/migrations/005-remove-qrcode.sql | 2 +- .../migrations/007-add-extract-url.sql | 12 + .../migrations/init/fmtm_base_schema.sql | 1 + .../migrations/revert/005-remove-qrcode.sql | 2 +- .../migrations/revert/007-add-extract-url.sql | 9 + src/backend/pdm.lock | 15 +- src/backend/pyproject.toml | 4 +- src/backend/tests/test_projects_routes.py | 2 - src/frontend/src/api/CreateProjectService.ts | 107 +-- .../createnewproject/DataExtract.tsx | 172 +++-- .../createnewproject/SelectForm.tsx | 6 +- .../createnewproject/SplitTasks.tsx | 11 +- .../createnewproject/UploadArea.tsx | 3 - .../validation/DataExtractValidation.tsx | 7 +- .../validation/UploadAreaValidation.tsx | 2 - .../src/store/types/ICreateProject.ts | 2 +- src/frontend/src/views/CreateProject.tsx | 7 - src/frontend/src/views/DefineAreaMap.tsx | 161 ---- 23 files changed, 724 insertions(+), 876 deletions(-) create mode 100644 src/backend/migrations/007-add-extract-url.sql create mode 100644 src/backend/migrations/revert/007-add-extract-url.sql delete mode 100644 src/frontend/src/views/DefineAreaMap.tsx diff --git a/docker-compose.yml b/docker-compose.yml index c1cdef9cd8..6db8f404f5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -188,13 +188,13 @@ services: MINIO_ROOT_USER: ${S3_ACCESS_KEY:-fmtm} MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY:-somelongpassword} MINIO_VOLUMES: "/mnt/data" - MINIO_BROWSER: "off" - # MINIO_CONSOLE_ADDRESS: ":9090" + MINIO_BROWSER: ${MINIO_BROWSER:-off} + MINIO_CONSOLE_ADDRESS: ":9090" volumes: - fmtm_data:/mnt/data # ports: - # - 9000:9000 - # - 9090:9090 + # - 9000:9000 + # - 9090:9090 networks: - fmtm-net command: minio server diff --git a/src/backend/app/db/db_models.py b/src/backend/app/db/db_models.py index bf5a9bab34..d6346be80b 100644 --- a/src/backend/app/db/db_models.py +++ b/src/backend/app/db/db_models.py @@ -601,6 +601,7 @@ def tasks_bad(self): data_extract_type = cast( str, Column(String) ) # Type of data extract (Polygon or Centroid) + data_extract_url = cast(str, Column(String)) task_split_type = cast( TaskSplitType, Column(Enum(TaskSplitType), nullable=True) ) # Options: divide on square, manual upload, task splitting algo diff --git a/src/backend/app/db/postgis_utils.py b/src/backend/app/db/postgis_utils.py index b5c6f72f81..4fafaacaa6 100644 --- a/src/backend/app/db/postgis_utils.py +++ b/src/backend/app/db/postgis_utils.py @@ -18,16 +18,23 @@ """PostGIS and geometry handling helper funcs.""" import datetime +import logging +from json import dumps as json_dumps from typing import Optional, Union +from fastapi import HTTPException from geoalchemy2 import WKBElement from geoalchemy2.shape import to_shape from geojson import FeatureCollection +from geojson import loads as geojson_loads from geojson_pydantic import Feature from shapely.geometry import mapping from sqlalchemy import text +from sqlalchemy.exc import ProgrammingError from sqlalchemy.orm import Session +log = logging.getLogger(__name__) + def timestamp(): """Get the current time. @@ -83,38 +90,218 @@ async def geojson_to_flatgeobuf( Returns: flatgeobuf (bytes): a Python bytes representation of a flatgeobuf file. """ - sql = f""" + # FIXME make this with with properties / tags + # FIXME this is important + # sql = """ + # DROP TABLE IF EXISTS public.temp_features CASCADE; + + # CREATE TABLE IF NOT EXISTS public.temp_features( + # geom geometry, + # osm_id integer, + # changeset integer, + # timestamp timestamp + # ); + + # WITH data AS (SELECT CAST(:geojson AS json) AS fc) + # INSERT INTO public.temp_features (geom, osm_id, changeset, timestamp) + # SELECT + # ST_SetSRID(ST_GeomFromGeoJSON(feat->>'geometry'), 4326) AS geom, + # COALESCE((feat->'properties'->>'osm_id')::integer, -1) AS osm_id, + # COALESCE((feat->'properties'->>'changeset')::integer, -1) AS changeset, + # CASE + # WHEN feat->'properties'->>'timestamp' IS NOT NULL + # THEN (feat->'properties'->>'timestamp')::timestamp + # ELSE NULL + # END AS timestamp + # FROM ( + # SELECT json_array_elements(fc->'features') AS feat + # FROM data + # ) AS f; + + # -- Second param = generate with spatial index + # SELECT ST_AsFlatGeobuf(geoms, true) + # FROM (SELECT * FROM public.temp_features) AS geoms; + # """ + sql = """ DROP TABLE IF EXISTS public.temp_features CASCADE; CREATE TABLE IF NOT EXISTS public.temp_features( - id serial PRIMARY KEY, geom geometry ); - WITH data AS (SELECT '{geojson}'::json AS fc) + WITH data AS (SELECT CAST(:geojson AS json) AS fc) INSERT INTO public.temp_features (geom) SELECT - ST_AsText(ST_GeomFromGeoJSON(feat->>'geometry')) AS geom + ST_SetSRID(ST_GeomFromGeoJSON(feat->>'geometry'), 4326) AS geom FROM ( SELECT json_array_elements(fc->'features') AS feat FROM data ) AS f; - WITH thegeom AS - (SELECT * FROM public.temp_features) - SELECT ST_AsFlatGeobuf(thegeom.*) - FROM thegeom; + SELECT ST_AsFlatGeobuf(fgb_data) + FROM (SELECT * FROM public.temp_features as geoms) AS fgb_data; """ # Run the SQL - result = db.execute(text(sql)) + result = db.execute(text(sql), {"geojson": json_dumps(geojson)}) # Get a memoryview object, then extract to Bytes flatgeobuf = result.first() # Cleanup table - db.execute(text("DROP TABLE IF EXISTS public.temp_features CASCADE;")) + # db.execute(text("DROP TABLE IF EXISTS public.temp_features CASCADE;")) if flatgeobuf: return flatgeobuf[0].tobytes() # Nothing returned (either no features passed, or failed) return None + + +async def flatgeobuf_to_geojson( + db: Session, flatgeobuf: bytes +) -> Optional[FeatureCollection]: + """Converts FlatGeobuf data to GeoJSON. + + Args: + db (Session): SQLAlchemy db session. + flatgeobuf (bytes): FlatGeobuf data in bytes format. + + Returns: + FeatureCollection: A GeoJSON FeatureCollection object. + """ + sql = text( + """ + DROP TABLE IF EXISTS public.temp_fgb CASCADE; + + SELECT ST_FromFlatGeobufToTable('public', 'temp_fgb', :fgb_bytes); + + SELECT jsonb_build_object( + 'type', 'FeatureCollection', + 'features', jsonb_agg(feature) + ) AS feature_collection + FROM ( + SELECT jsonb_build_object( + 'type', 'Feature', + 'geometry', ST_AsGeoJSON(fgb_data.geom)::jsonb, + 'properties', fgb_data.properties::jsonb + ) AS feature + FROM ( + SELECT *, + NULL as properties + FROM ST_FromFlatGeobuf(null::temp_fgb, :fgb_bytes) + ) AS fgb_data + ) AS features; + """ + ) + + try: + result = db.execute(sql, {"fgb_bytes": flatgeobuf}) + feature_collection = result.first() + except ProgrammingError as e: + log.error(e) + log.error( + "Attempted flatgeobuf --> geojson conversion, but duplicate column found" + ) + return None + + if feature_collection: + return geojson_loads(json_dumps(feature_collection[0])) + + return None + + +async def parse_and_filter_geojson(geojson_str: str) -> Optional[FeatureCollection]: + """Parse geojson string and filter out incomaptible geometries.""" + log.debug("Parsing geojson file") + geojson_parsed = geojson_loads(geojson_str) + if isinstance(geojson_parsed, FeatureCollection): + log.debug("Already in FeatureCollection format, skipping reparse") + featcol = geojson_parsed + elif isinstance(geojson_parsed, Feature): + log.debug("Converting Feature to FeatureCollection") + featcol = FeatureCollection(geojson_parsed) + else: + log.debug("Converting geometry to FeatureCollection") + featcol = FeatureCollection[Feature(geometry=geojson_parsed)] + + # Validating Coordinate Reference System + check_crs(featcol) + + geom_type = await get_featcol_main_geom_type(featcol) + + # Filter out geoms not matching main type + features_filtered = [ + feature + for feature in featcol.get("features", []) + if feature.get("geometry", {}).get("type", "") == geom_type + ] + + return FeatureCollection(features_filtered) + + +async def get_featcol_main_geom_type(featcol: FeatureCollection) -> str: + """Get the predominant geometry type in a FeatureCollection.""" + geometry_counts = {"Polygon": 0, "Point": 0, "Polyline": 0} + + for feature in featcol.get("features", []): + geometry_type = feature.get("geometry", {}).get("type", "") + if geometry_type in geometry_counts: + geometry_counts[geometry_type] += 1 + + return max(geometry_counts, key=geometry_counts.get) + + +async def check_crs(input_geojson: Union[dict, FeatureCollection]): + """Validate CRS is valid for a geojson.""" + log.debug("validating coordinate reference system") + + def is_valid_crs(crs_name): + valid_crs_list = [ + "urn:ogc:def:crs:OGC:1.3:CRS84", + "urn:ogc:def:crs:EPSG::4326", + "WGS 84", + ] + return crs_name in valid_crs_list + + def is_valid_coordinate(coord): + if coord is None: + return False + return -180 <= coord[0] <= 180 and -90 <= coord[1] <= 90 + + error_message = ( + "ERROR: Unsupported coordinate system, it is recommended to use a " + "GeoJSON file in WGS84(EPSG 4326) standard." + ) + if "crs" in input_geojson: + crs = input_geojson.get("crs", {}).get("properties", {}).get("name") + if not is_valid_crs(crs): + log.error(error_message) + raise HTTPException(status_code=400, detail=error_message) + return + + if input_geojson_type := input_geojson.get("type") == "FeatureCollection": + features = input_geojson.get("features", []) + coordinates = ( + features[-1].get("geometry", {}).get("coordinates", []) if features else [] + ) + elif input_geojson_type := input_geojson.get("type") == "Feature": + coordinates = input_geojson.get("geometry", {}).get("coordinates", []) + + geometry_type = ( + features[0].get("geometry", {}).get("type") + if input_geojson_type == "FeatureCollection" and features + else input_geojson.get("geometry", {}).get("type", "") + ) + if geometry_type == "MultiPolygon": + first_coordinate = coordinates[0][0] if coordinates and coordinates[0] else None + elif geometry_type == "Point": + first_coordinate = coordinates if coordinates else None + + elif geometry_type == "LineString": + first_coordinate = coordinates[0] if coordinates else None + + else: + first_coordinate = coordinates[0][0] if coordinates else None + + if not is_valid_coordinate(first_coordinate): + log.error(error_message) + raise HTTPException(status_code=400, detail=error_message) diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index 35f22c0306..670bd6ea35 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -19,7 +19,6 @@ import json import os -import time import uuid from asyncio import gather from concurrent.futures import ThreadPoolExecutor, wait @@ -41,18 +40,15 @@ from geojson.feature import Feature, FeatureCollection from loguru import logger as log from osm_fieldwork.basemapper import create_basemap_file -from osm_fieldwork.data_models import data_models_path -from osm_fieldwork.filter_data import FilterData from osm_fieldwork.json2osm import json2osm from osm_fieldwork.OdkCentral import OdkAppUser from osm_fieldwork.xlsforms import xlsforms_path from osm_rawdata.postgres import PostgresClient -from shapely import to_geojson, wkt +from shapely import wkt from shapely.geometry import ( Polygon, shape, ) -from shapely.ops import unary_union from sqlalchemy import and_, column, func, inspect, select, table, text from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import Session @@ -61,7 +57,14 @@ from app.config import encrypt_value, settings from app.db import db_models from app.db.database import get_db -from app.db.postgis_utils import geojson_to_flatgeobuf, geometry_to_geojson +from app.db.postgis_utils import ( + check_crs, + flatgeobuf_to_geojson, + geojson_to_flatgeobuf, + geometry_to_geojson, + get_featcol_main_geom_type, + parse_and_filter_geojson, +) from app.models.enums import HTTPStatus, ProjectRole from app.projects import project_schemas from app.s3 import add_obj_to_bucket, get_obj_from_bucket @@ -476,152 +479,54 @@ def remove_z_dimension(coord): ) -async def get_data_extract_from_osm_rawdata( - aoi: UploadFile, - category: str, -): - """Get data extract using OSM RawData module. - - Filters by a specific category. - """ - try: - # read entire file - aoi_content = await aoi.read() - boundary = json.loads(aoi_content) - - # Validatiing Coordinate Reference System - check_crs(boundary) - - # Get pre-configured filter for category - config_path = f"{data_models_path}/{category}.yaml" - - if boundary["type"] == "FeatureCollection": - # Convert each feature into a Shapely geometry - geometries = [ - shape(feature["geometry"]) for feature in boundary["features"] - ] - updated_geometry = unary_union(geometries) - else: - updated_geometry = shape(boundary["geometry"]) - - # Convert the merged MultiPolygon to a single Polygon using convex hull - merged_polygon = updated_geometry.convex_hull - - # Convert the merged polygon back to a GeoJSON-like dictionary - boundary = { - "type": "Feature", - "geometry": to_geojson(merged_polygon), - "properties": {}, - } - - # # OSM Extracts using raw data api - pg = PostgresClient("underpass", config_path) - data_extract = pg.execQuery(boundary) - return data_extract - except Exception as e: - log.error(e) - raise HTTPException(status_code=400, detail=str(e)) from e - - -async def get_data_extract_url( - db: Session, +async def generate_data_extract( aoi: Union[FeatureCollection, Feature, dict], - project_id: Optional[int] = None, + extract_config: Optional[BytesIO] = None, ) -> str: - """Request an extract from raw-data-api and extract the file contents. + """Request a new data extract in flatgeobuf format. + + Args: + db (Session): + Database session. + aoi (Union[FeatureCollection, Feature, dict]): + Area of interest for data extraction. + extract_config (Optional[BytesIO], optional): + Configuration for data extraction. Defaults to None. - - The query is posted to raw-data-api and job initiated for fetching the extract. - - The status of the job is polled every few seconds, until 'SUCCESS' is returned. - - The resulting flatgeobuf file is streamed in the frontend. + Raises: + HTTPException: + When necessary parameters are missing or data extraction fails. Returns: - str: the URL for the flatgeobuf data extract. + str: + URL for the flatgeobuf data extract. """ - if project_id: - db_project = await get_project_by_id(db, project_id) - if not db_project: - log.error(f"Project {project_id} doesn't exist!") - return False - - # TODO update db field data_extract_type --> data_extract_url - fgb_url = db_project.data_extract_type - - # If extract already exists, return url to it - if fgb_url: - return fgb_url - - # FIXME replace below with get_data_extract_from_osm_rawdata - - # Data extract does not exist, continue to create - # Filters for osm extracts - query = { - "filters": { - "tags": { - "all_geometry": { - "join_or": {"building": [], "highway": [], "waterway": []} - } - } - } - } + if not extract_config: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="To generate a new data extract a form_category must be specified.", + ) - if (geom_type := aoi.get("type")) == "FeatureCollection": - # Convert each feature into a Shapely geometry - geometries = [ - shape(feature.get("geometry")) for feature in aoi.get("features", []) - ] - merged_geom = unary_union(geometries) - elif geom_type == "Feature": - merged_geom = shape(aoi.get("geometry")) - else: - merged_geom = shape(aoi) - # Convert the merged geoms to a single Polygon GeoJSON using convex hull - query["geometry"] = json.loads(to_geojson(merged_geom.convex_hull)) - - # Filename to generate - # query["fileName"] = f"fmtm-project-{project_id}-extract" - query["fileName"] = "fmtm-extract" - # Output to flatgeobuf format - query["outputType"] = "fgb" - # Generate without zipping - query["bind_zip"] = False - # Optional authentication - # headers["access-token"] = settings.OSM_SVC_ACCOUNT_TOKEN - - log.debug(f"Query for raw data api: {query}") - base_url = settings.UNDERPASS_API_URL - query_url = f"{base_url}/snapshot/" - headers = {"accept": "application/json", "Content-Type": "application/json"} - - # Send the request to raw data api - try: - result = requests.post(query_url, data=json.dumps(query), headers=headers) - result.raise_for_status() - except requests.exceptions.HTTPError: - error_dict = result.json() - error_dict["status_code"] = result.status_code - log.error(f"Failed to get extract from raw data api: {error_dict}") - return error_dict - - task_id = result.json().get("task_id") - - # Check status of task (PENDING, or SUCCESS) - task_url = f"{base_url}/tasks/status/{task_id}" - while True: - result = requests.get(task_url, headers=headers) - if result.json().get("status") == "PENDING": - # Wait 2 seconds before polling again - time.sleep(2) - elif result.json().get("status") == "SUCCESS": - break - - fgb_url = result.json().get("result", {}).get("download_url", None) + pg = PostgresClient( + "underpass", + extract_config, + # auth_token=settings.OSM_SVC_ACCOUNT_TOKEN, + ) + fgb_url = pg.execQuery( + aoi, + extra_params={ + "fileName": "fmtm_extract", + "outputType": "fgb", + "bind_zip": False, + }, + ) if not fgb_url: - log.error("Could not get download URL for data extract. Did the API change?") - log.error(f"To debug: {task_url}") + msg = "Could not get download URL for data extract. Did the API change?" + log.error(msg) raise HTTPException( status_code=HTTPStatus.UNPROCESSABLE_ENTITY, - detail="Could not get download URL for data extract. Did the API change?", + detail=msg, ) return fgb_url @@ -630,8 +535,8 @@ async def get_data_extract_url( async def split_geojson_into_tasks( db: Session, project_geojson: Union[dict, FeatureCollection], - extract_geojson: Union[dict, FeatureCollection], no_of_buildings: int, + extract_geojson: Optional[Union[dict, FeatureCollection]] = None, ): """Splits a project into tasks. @@ -641,6 +546,9 @@ async def split_geojson_into_tasks( boundary. extract_geojson (Union[dict, FeatureCollection]): A GeoJSON of the project boundary osm data extract (features). + extract_geojson (Union[dict, FeatureCollection]): A GeoJSON of the project + boundary osm data extract (features). + If not included, an extract is generated automatically. no_of_buildings (int): The number of buildings to include in each task. Returns: @@ -1006,6 +914,65 @@ async def get_odk_id_for_project(db: Session, project_id: int): return project_info.odkid +async def get_or_set_data_extract_url( + db: Session, + project_id: int, + url: Optional[str], + extract_type: Optional[str], +) -> str: + """Get or set the data extract URL for a project. + + Args: + db (Session): SQLAlchemy database session. + project_id (int): The ID of the project. + url (str): URL to the streamable flatgeobuf data extract. + If not passed, a new extract is generated. + extract_type (str): The type of data extract, required if setting URL + in database. + + Returns: + str: URL to fgb file in S3. + """ + db_project = await get_project_by_id(db, project_id) + if not db_project: + msg = f"Project ({project_id}) not found" + log.error(msg) + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=msg) + + # If url, get extract + # If not url, get new extract / set in db + if not url: + existing_url = db_project.data_extract_url + + if not existing_url: + msg = ( + f"No data extract exists for project ({project_id}). " + "To generate one, call 'projects/generate-data-extract/'" + ) + log.error(msg) + raise HTTPException(status_code=HTTPStatus.UNPROCESSABLE_ENTITY, detail=msg) + return existing_url + + if not extract_type: + msg = "The extract_type param is required if URL is set." + log.error(msg) + raise HTTPException(status_code=HTTPStatus.UNPROCESSABLE_ENTITY, detail=msg) + + await update_data_extract_url_in_db(db, db_project, url, extract_type) + + return url + + +async def update_data_extract_url_in_db( + db: Session, project: db_models.DbProject, url: str, extract_type: str +): + """Update the data extract params in the database for a project.""" + log.debug(f"Setting data extract URL for project ({project.id}): {url}") + project.data_extract_url = url + project.data_extract_type = extract_type + db.commit() + + async def upload_custom_data_extract( db: Session, project_id: int, @@ -1027,46 +994,28 @@ async def upload_custom_data_extract( if not project: raise HTTPException(status_code=404, detail="Project not found") - log.debug("Parsing geojson file") - geojson_parsed = geojson.loads(geojson_str) - if isinstance(geojson_parsed, FeatureCollection): - log.debug("Already in FeatureCollection format, skipping reparse") - featcol = geojson_parsed - elif isinstance(geojson_parsed, Feature): - log.debug("Converting Feature to FeatureCollection") - featcol = FeatureCollection(geojson_parsed) - else: - log.debug("Converting geometry to FeatureCollection") - featcol = FeatureCollection[Feature(geometry=geojson_parsed)] - - # Validating Coordinate Reference System - check_crs(featcol) - - # FIXME use osm-fieldwork filter/clean data - # cleaned = FilterData() - # models = xlsforms_path.replace("xlsforms", "data_models") - # xlsfile = f"{category}.xls" # FIXME: for custom form - # file = f"{xlsforms_path}/{xlsfile}" - # if os.path.exists(file): - # title, extract = cleaned.parse(file) - # elif os.path.exists(f"{file}x"): - # title, extract = cleaned.parse(f"{file}x") - # # Remove anything in the data extract not in the choices sheet. - # cleaned_data = cleaned.cleanData(features_data) - feature_type = featcol.get("features", [])[-1].get("geometry", {}).get("type") - if feature_type not in ["Polygon", "Polyline"]: + featcol_filtered = await parse_and_filter_geojson(geojson_str) + if not featcol_filtered: + raise HTTPException( + status_code=HTTPStatus.UNPROCESSABLE_ENTITY, + detail="Could not process geojson input", + ) + + # Get geom type from data extract + geom_type = await get_featcol_main_geom_type(featcol_filtered) + if geom_type not in ["Polygon", "Polyline", "Point"]: msg = ( "Extract does not contain valid geometry types, from 'Polygon' " - "and 'Polyline'" + ", 'Polyline' and 'Point'." ) log.error(msg) - raise HTTPException(status_code=404, detail=msg) - features_filtered = [ - feature - for feature in featcol.get("features", []) - if feature.get("geometry", {}).get("type", "") == feature_type - ] - featcol_filtered = FeatureCollection(features_filtered) + raise HTTPException(status_code=HTTPStatus.UNPROCESSABLE_ENTITY, detail=msg) + geom_name_map = { + "Polygon": "polygon", + "Point": "centroid", + "Polyline": "line", + } + data_extract_type = geom_name_map.get(geom_type, "polygon") log.debug( "Generating fgb object from geojson with " @@ -1083,13 +1032,14 @@ async def upload_custom_data_extract( content_type="application/octet-stream", ) - # Add url to database - s3_fgb_url = f"{settings.S3_DOWNLOAD_ROOT}/{settings.S3_BUCKET_NAME}{s3_fgb_path}" - log.debug(f"Commiting extract S3 path to database: {s3_fgb_url}") - project.data_extract_type = s3_fgb_url - db.commit() + # Add url and type to database + s3_fgb_full_url = ( + f"{settings.S3_DOWNLOAD_ROOT}/{settings.S3_BUCKET_NAME}{s3_fgb_path}" + ) - return s3_fgb_url + await update_data_extract_url_in_db(db, project, s3_fgb_full_url, data_extract_type) + + return s3_fgb_full_url def flatten_dict(d, parent_key="", sep="_"): @@ -1251,11 +1201,9 @@ def generate_task_files( def generate_appuser_files( db: Session, project_id: int, - extract_polygon: bool, - custom_xls_form: str, - extracts_contents: str, - category: str, - form_type: str, + custom_form: Optional[BytesIO], + form_category: str, + form_format: str, background_task_id: Optional[uuid.UUID] = None, ): """Generate the files for a project. @@ -1265,11 +1213,9 @@ def generate_appuser_files( Parameters: - db: the database session - project_id: Project ID - - extract_polygon: boolean to determine if we should extract the polygon - - custom_xls_form: the xls file to upload if we have a custom form - - extracts_contents: the custom data extract - - category: the category of the project - - form_type: weather the form is xls, xlsx or xml + - custom_form: the xls file to upload if we have a custom form + - form_category: the category for the custom XLS form + - form_format: weather the form is xls, xlsx or xml - background_task_id: the task_id of the background task running this function. """ try: @@ -1277,161 +1223,104 @@ def generate_appuser_files( project_log.info(f"Starting generate_appuser_files for project {project_id}") - # Get the project table contents. - project = table( - "projects", - column("project_name_prefix"), - column("xform_title"), - column("id"), - column("odk_central_url"), - column("odk_central_user"), - column("odk_central_password"), - column("outline"), - ) - - where = f"id={project_id}" - sql = select( - project.c.project_name_prefix, - project.c.xform_title, - project.c.id, - project.c.odk_central_url, - project.c.odk_central_user, - project.c.odk_central_password, - geoalchemy2.functions.ST_AsGeoJSON(project.c.outline).label("outline"), - ).where(text(where)) - result = db.execute(sql) - - # There should only be one match - if result.rowcount != 1: - log.warning(str(sql)) - if result.rowcount < 1: - raise HTTPException(status_code=400, detail="Project not found") - else: - raise HTTPException(status_code=400, detail="Multiple projects found") - - one = result.first() - - if one: - # Get odk credentials from project. - odk_credentials = { - "odk_central_url": one.odk_central_url, - "odk_central_user": one.odk_central_user, - "odk_central_password": one.odk_central_password, - } - - odk_credentials = project_schemas.ODKCentralDecrypted(**odk_credentials) + get_project_sync = async_to_sync(get_project) + project = get_project_sync(db, project_id) + if not project: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=f"Project with id {project_id} does not exist", + ) - xform_title = one.xform_title if one.xform_title else None + # Get odk credentials from project. + odk_credentials = { + "odk_central_url": project.odk_central_url, + "odk_central_user": project.odk_central_user, + "odk_central_password": project.odk_central_password, + } - category = xform_title - if custom_xls_form: - xlsform = f"/tmp/{category}.{form_type}" - contents = custom_xls_form - with open(xlsform, "wb") as f: - f.write(contents) - else: - xlsform = f"{xlsforms_path}/{xform_title}.xls" + odk_credentials = project_schemas.ODKCentralDecrypted(**odk_credentials) - # Data Extracts - if extracts_contents is not None: - project_log.info("Uploading data extracts") - upload_extract_sync = async_to_sync(upload_custom_data_extract) - upload_extract_sync(db, project_id, extracts_contents) + if custom_form: + # TODO uncomment after refactor to use BytesIO + # xlsform = custom_form - else: - project = ( - db.query(db_models.DbProject) - .filter(db_models.DbProject.id == project_id) - .first() - ) - config_file_contents = project.form_config_file - - project_log.info("Extracting Data from OSM") - - config_path = "/tmp/config.yaml" - if config_file_contents: - with open(config_path, "w", encoding="utf-8") as config_file_handle: - config_file_handle.write(config_file_contents.decode("utf-8")) - else: - config_path = f"{data_models_path}/{category}.yaml" - - # # OSM Extracts for whole project - pg = PostgresClient("underpass", config_path) - outline = json.loads(one.outline) - boundary = {"type": "Feature", "properties": {}, "geometry": outline} - data_extract = pg.execQuery(boundary) - filter = FilterData(xlsform) - - updated_data_extract = {"type": "FeatureCollection", "features": []} - filtered_data_extract = ( - filter.cleanData(data_extract) - if data_extract - else updated_data_extract + xlsform = f"/tmp/{form_category}.{form_format}" + with open(xlsform, "wb") as f: + f.write(custom_form.getvalue()) + else: + # TODO uncomment after refactor to use BytesIO + # xlsform_path = f"{xlsforms_path}/{form_category}.xls" + # with open(xlsform_path, "rb") as f: + # xlsform = BytesIO(f.read()) + + xlsform = f"{xlsforms_path}/{form_category}.xls" + + # filter = FilterData(xlsform) + # updated_data_extract = {"type": "FeatureCollection", "features": []} + # filtered_data_extract = ( + # filter.cleanData(data_extract) + # if data_extract + # else updated_data_extract + # ) + + # FIXME do we need these geoms in the db? + # FIXME can we remove this section? + get_extract_geojson_sync = async_to_sync(get_project_features_geojson) + data_extract_geojson = get_extract_geojson_sync(db, project_id) + # Collect feature mappings for bulk insert + feature_mappings = [] + for feature in data_extract_geojson["features"]: + # If the osm extracts contents do not have a title, + # provide an empty text for that. + properties = feature.get("properties", {}) + properties["title"] = "" + + feature_shape = shape(feature["geometry"]) + + wkb_element = from_shape(feature_shape, srid=4326) + feature_mapping = { + "project_id": project_id, + "category_title": form_category, + "geometry": wkb_element, + "properties": properties, + } + feature_mappings.append(feature_mapping) + # Bulk insert the osm extracts into the db. + db.bulk_insert_mappings(db_models.DbFeatures, feature_mappings) + + # Generating QR Code, XForm and uploading OSM Extracts to the form. + # Creating app users and updating the role of that user. + get_task_id_list_sync = async_to_sync(tasks_crud.get_task_id_list) + task_list = get_task_id_list_sync(db, project_id) + + # Run with expensive task via threadpool + def wrap_generate_task_files(task): + """Func to wrap and return errors from thread. + + Also passes it's own database session for thread safety. + If we pass a single db session to multiple threads, + there may be inconsistencies or errors. + """ + try: + generate_task_files( + next(get_db()), + project_id, + task, + xlsform, + form_format, + odk_credentials, ) - - # Collect feature mappings for bulk insert - feature_mappings = [] - - for feature in filtered_data_extract["features"]: - # If the osm extracts contents do not have a title, - # provide an empty text for that. - feature["properties"]["title"] = "" - - feature_shape = shape(feature["geometry"]) - - # If the centroid of the Polygon is not inside the outline, - # skip the feature. - if extract_polygon and ( - not shape(outline).contains(shape(feature_shape.centroid)) - ): - continue - - wkb_element = from_shape(feature_shape, srid=4326) - feature_mapping = { - "project_id": project_id, - "category_title": category, - "geometry": wkb_element, - "properties": feature["properties"], - } - updated_data_extract["features"].append(feature) - feature_mappings.append(feature_mapping) - # Bulk insert the osm extracts into the db. - db.bulk_insert_mappings(db_models.DbFeatures, feature_mappings) - - # Generating QR Code, XForm and uploading OSM Extracts to the form. - # Creating app users and updating the role of that user. - get_task_id_list_sync = async_to_sync(tasks_crud.get_task_id_list) - task_list = get_task_id_list_sync(db, project_id) - - # Run with expensive task via threadpool - def wrap_generate_task_files(task): - """Func to wrap and return errors from thread. - - Also passes it's own database session for thread safety. - If we pass a single db session to multiple threads, - there may be inconsistencies or errors. - """ - try: - generate_task_files( - next(get_db()), - project_id, - task, - xlsform, - form_type, - odk_credentials, - ) - except Exception as e: - log.exception(str(e)) - - # Use a ThreadPoolExecutor to run the synchronous code in threads - with ThreadPoolExecutor() as executor: - # Submit tasks to the thread pool - futures = [ - executor.submit(wrap_generate_task_files, task) - for task in task_list - ] - # Wait for all tasks to complete - wait(futures) + except Exception as e: + log.exception(str(e)) + + # Use a ThreadPoolExecutor to run the synchronous code in threads + with ThreadPoolExecutor() as executor: + # Submit tasks to the thread pool + futures = [ + executor.submit(wrap_generate_task_files, task) for task in task_list + ] + # Wait for all tasks to complete + wait(futures) if background_task_id: # Update background task status to COMPLETED @@ -1504,43 +1393,48 @@ async def get_task_geometry(db: Session, project_id: int): return json.dumps(feature_collection) -async def get_project_features_geojson(db: Session, project_id: int): +async def get_project_features_geojson( + db: Session, project: Union[db_models.DbProject, int] +) -> FeatureCollection: """Get a geojson of all features for a task.""" - db_features = ( - db.query(db_models.DbFeatures) - .filter(db_models.DbFeatures.project_id == project_id) - .all() - ) + if isinstance(project, int): + db_project = await get_project(db, project) + else: + db_project = project + project_id = db_project.id - query = text( - f"""SELECT jsonb_build_object( - 'type', 'FeatureCollection', - 'features', jsonb_agg(feature) - ) - FROM ( - SELECT jsonb_build_object( - 'type', 'Feature', - 'id', id, - 'geometry', ST_AsGeoJSON(geometry)::jsonb, - 'properties', properties - ) AS feature - FROM features - WHERE project_id={project_id} - ) features; - """ - ) + data_extract_url = db_project.data_extract_url - result = db.execute(query) - features = result.fetchone()[0] + if not data_extract_url: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=f"No data extract exists for project ({project_id})", + ) - # Create mapping feat_id:task_id - task_feature_mapping = {feat.id: feat.task_id for feat in db_features} + # If local debug URL, replace with Docker service name + data_extract_url = data_extract_url.replace( + settings.S3_DOWNLOAD_ROOT, + settings.S3_ENDPOINT, + ) - for feature in features["features"]: - if (feat_id := feature["id"]) in task_feature_mapping: - feature["properties"]["task_id"] = task_feature_mapping[feat_id] + with requests.get(data_extract_url) as response: + if not response.ok: + raise HTTPException( + status_code=HTTPStatus.UNPROCESSABLE_ENTITY, + detail=f"Download failed for data extract, project ({project_id})", + ) + data_extract_geojson = await flatgeobuf_to_geojson(db, response.content) - return features + if not data_extract_geojson: + raise HTTPException( + status_code=HTTPStatus.UNPROCESSABLE_ENTITY, + detail=( + "Failed to convert flatgeobuf --> geojson for " + f"project ({project_id})" + ), + ) + + return data_extract_geojson async def get_json_from_zip(zip, filename: str, error_detail: str): @@ -1564,7 +1458,7 @@ async def get_outline_from_geojson_file_in_zip( with zip.open(filename) as file: data = file.read() json_dump = json.loads(data) - check_crs(json_dump) # Validatiing Coordinate Reference System + await check_crs(json_dump) # Validatiing Coordinate Reference System feature_collection = FeatureCollection(json_dump) feature = feature_collection["features"][feature_index] geom = feature["geometry"] @@ -1857,6 +1751,9 @@ async def update_project_form( f"/tmp/{project_title}_{category}.geojson" # This file will store osm extracts ) + # FIXME test this works + # FIXME PostgresClient.getFeatures does not exist... + # FIXME getFeatures is part of the DataExtract osm-fieldwork class extract_polygon = True if project.data_extract_type == "polygon" else False project = table("projects", column("outline")) @@ -2188,63 +2085,6 @@ async def update_project_location_info( db_project.location_str = address if address is not None else "" -def check_crs(input_geojson: Union[dict, FeatureCollection]): - """Validate CRS is valid for a geojson.""" - log.debug("validating coordinate reference system") - - def is_valid_crs(crs_name): - valid_crs_list = [ - "urn:ogc:def:crs:OGC:1.3:CRS84", - "urn:ogc:def:crs:EPSG::4326", - "WGS 84", - ] - return crs_name in valid_crs_list - - def is_valid_coordinate(coord): - if coord is None: - return False - return -180 <= coord[0] <= 180 and -90 <= coord[1] <= 90 - - error_message = ( - "ERROR: Unsupported coordinate system, it is recommended to use a " - "GeoJSON file in WGS84(EPSG 4326) standard." - ) - if "crs" in input_geojson: - crs = input_geojson.get("crs", {}).get("properties", {}).get("name") - if not is_valid_crs(crs): - log.error(error_message) - raise HTTPException(status_code=400, detail=error_message) - return - - if input_geojson_type := input_geojson.get("type") == "FeatureCollection": - features = input_geojson.get("features", []) - coordinates = ( - features[-1].get("geometry", {}).get("coordinates", []) if features else [] - ) - elif input_geojson_type := input_geojson.get("type") == "Feature": - coordinates = input_geojson.get("geometry", {}).get("coordinates", []) - - geometry_type = ( - features[0].get("geometry", {}).get("type") - if input_geojson_type == "FeatureCollection" and features - else input_geojson.get("geometry", {}).get("type", "") - ) - if geometry_type == "MultiPolygon": - first_coordinate = coordinates[0][0] if coordinates and coordinates[0] else None - elif geometry_type == "Point": - first_coordinate = coordinates if coordinates else None - - elif geometry_type == "LineString": - first_coordinate = coordinates[0] if coordinates else None - - else: - first_coordinate = coordinates[0][0] if coordinates else None - - if not is_valid_coordinate(first_coordinate): - log.error(error_message) - raise HTTPException(status_code=400, detail=error_message) - - async def get_tasks_count(db: Session, project_id: int): """Get number of tasks for a project.""" db_task = ( diff --git a/src/backend/app/projects/project_routes.py b/src/backend/app/projects/project_routes.py index 5a21a96f41..45796b548d 100644 --- a/src/backend/app/projects/project_routes.py +++ b/src/backend/app/projects/project_routes.py @@ -20,6 +20,7 @@ import json import os import uuid +from io import BytesIO from pathlib import Path from typing import Optional @@ -37,6 +38,7 @@ ) from fastapi.responses import FileResponse, JSONResponse from loguru import logger as log +from osm_fieldwork.data_models import data_models_path from osm_fieldwork.make_data_extract import getChoices from osm_fieldwork.xlsforms import xlsforms_path from sqlalchemy.orm import Session @@ -46,10 +48,10 @@ from app.auth.roles import mapper, org_admin, project_admin, super_admin from app.central import central_crud from app.db import database, db_models +from app.db.postgis_utils import check_crs from app.models.enums import TILES_FORMATS, TILES_SOURCE, HTTPStatus from app.organisations import organisation_deps from app.projects import project_crud, project_deps, project_schemas -from app.projects.project_crud import check_crs from app.static import data_path from app.submissions import submission_crud from app.tasks import tasks_crud @@ -410,7 +412,7 @@ async def upload_custom_task_boundaries( boundary = json.loads(content) # Validatiing Coordinate Reference System - check_crs(boundary) + await check_crs(boundary) log.debug("Creating tasks for each polygon in project") result = await project_crud.update_multi_polygon_project_boundary( @@ -432,10 +434,10 @@ async def upload_custom_task_boundaries( } -@router.post("/task_split") +@router.post("/task-split") async def task_split( project_geojson: UploadFile = File(...), - extract_geojson: UploadFile = File(...), + extract_geojson: Optional[UploadFile] = File(None), no_of_buildings: int = Form(50), db: Session = Depends(database.get_db), ): @@ -444,8 +446,9 @@ async def task_split( Args: project_geojson (UploadFile): The geojson to split. Should be a FeatureCollection. - extract_geojson (UploadFile): Data extract geojson containing osm features. - Should be a FeatureCollection. + extract_geojson (UploadFile, optional): Custom data extract geojson + containing osm features (should be a FeatureCollection). + If not included, an extract is generated automatically. no_of_buildings (int, optional): The number of buildings per subtask. Defaults to 50. db (Session, optional): The database session. Injected by FastAPI. @@ -457,18 +460,19 @@ async def task_split( # read project boundary parsed_boundary = geojson.loads(await project_geojson.read()) # Validatiing Coordinate Reference Systems - check_crs(parsed_boundary) + await check_crs(parsed_boundary) # read data extract - parsed_extract = geojson.loads(await extract_geojson.read()) - - check_crs(parsed_extract) + parsed_extract = None + if extract_geojson: + parsed_extract = geojson.loads(await extract_geojson.read()) + await check_crs(parsed_extract) return await project_crud.split_geojson_into_tasks( db, parsed_boundary, - parsed_extract, no_of_buildings, + parsed_extract, ) @@ -504,7 +508,7 @@ async def upload_project_boundary( boundary = json.loads(content) # Validatiing Coordinate Reference System - check_crs(boundary) + await check_crs(boundary) # update project boundary and dimension result = await project_crud.update_project_boundary( @@ -546,7 +550,7 @@ async def edit_project_boundary( boundary = json.loads(content) # Validatiing Coordinate Reference System - check_crs(boundary) + await check_crs(boundary) result = await project_crud.update_project_boundary( db, project_id, boundary, dimension @@ -584,14 +588,11 @@ async def validate_form(form: UploadFile): return await central_crud.test_form_validity(contents, file_ext[1:]) -@router.post("/{project_id}/generate") +@router.post("/{project_id}/generate-project-data") async def generate_files( background_tasks: BackgroundTasks, project_id: int, - extract_polygon: bool = Form(False), xls_form_upload: Optional[UploadFile] = File(None), - xls_form_config_file: Optional[UploadFile] = File(None), - data_extracts: Optional[UploadFile] = File(None), db: Session = Depends(database.get_db), org_user_dict: db_models.DbUser = Depends(org_admin), ): @@ -610,12 +611,8 @@ async def generate_files( Args: background_tasks (BackgroundTasks): FastAPI bg tasks, provided automatically. project_id (int): The ID of the project for which files are being generated. - extract_polygon (bool): A boolean flag indicating whether the polygon - is extracted or not. xls_form_upload (UploadFile, optional): A custom XLSForm to use in the project. A file should be provided if user wants to upload a custom xls form. - xls_form_config_file (UploadFile, optional): The config YAML for the XLS form. - data_extracts (UploadFile, optional): Custom data extract GeoJSON. db (Session): Database session, provided automatically. org_user_dict (AuthUser): Current logged in user. Must be org admin. @@ -623,8 +620,6 @@ async def generate_files( json (JSONResponse): A success message containing the project ID. """ log.debug(f"Generating media files tasks for project: {project_id}") - custom_xls_form = None - xform_title = None project = await project_crud.get_project(db, project_id) if not project: @@ -632,49 +627,27 @@ async def generate_files( status_code=428, detail=f"Project with id {project_id} does not exist" ) - project.data_extract_type = "polygon" if extract_polygon else "centroid" - db.commit() - + form_category = project.xform_title + custom_xls_form = None if xls_form_upload: log.debug("Validating uploaded XLS form") - # Validating for .XLS File. - file_name = os.path.splitext(xls_form_upload.filename) - file_ext = file_name[1] - allowed_extensions = [".xls", ".xlsx", ".xml"] + + file_path = Path(xls_form_upload.filename) + file_ext = file_path.suffix.lower() + allowed_extensions = {".xls", ".xlsx", ".xml"} if file_ext not in allowed_extensions: - raise HTTPException(status_code=400, detail="Provide a valid .xls file") - xform_title = file_name[0] + raise HTTPException( + status_code=HTTPStatus.UNPROCESSABLE_ENTITY, + detail=f"Invalid file extension, must be {allowed_extensions}", + ) + + form_category = file_path.stem custom_xls_form = await xls_form_upload.read() + # Write XLS form content to db project.form_xls = custom_xls_form - - if xls_form_config_file: - config_file_name = os.path.splitext(xls_form_config_file.filename) - config_file_ext = config_file_name[1] - if not config_file_ext == ".yaml": - raise HTTPException( - status_code=400, detail="Provide a valid .yaml config file" - ) - config_file_contents = await xls_form_config_file.read() - project.form_config_file = config_file_contents - db.commit() - if data_extracts: - log.debug("Validating uploaded geojson file") - # Validating for .geojson File. - data_extracts_file_name = os.path.splitext(data_extracts.filename) - extracts_file_ext = data_extracts_file_name[1] - if extracts_file_ext != ".geojson": - raise HTTPException(status_code=400, detail="Provide a valid geojson file") - try: - extracts_contents = await data_extracts.read() - json.loads(extracts_contents) - except json.JSONDecodeError as e: - raise HTTPException( - status_code=400, detail="Provide a valid geojson file" - ) from e - # Create task in db and return uuid log.debug(f"Creating export background task for project ID: {project_id}") background_task_id = await project_crud.insert_background_task_into_database( @@ -686,11 +659,9 @@ async def generate_files( project_crud.generate_appuser_files, db, project_id, - extract_polygon, - custom_xls_form, - extracts_contents if data_extracts else None, - xform_title, - file_ext[1:] if xls_form_upload else "xls", + BytesIO(custom_xls_form) if custom_xls_form else None, + form_category, + file_ext if xls_form_upload else "xls", background_task_id, ) @@ -841,42 +812,69 @@ async def preview_split_by_square( boundary = geojson.loads(content) # Validatiing Coordinate Reference System - check_crs(boundary) + await check_crs(boundary) result = await project_crud.preview_split_by_square(boundary, dimension) return result -@router.post("/get_data_extract/") +@router.post("/generate-data-extract/") async def get_data_extract( geojson_file: UploadFile = File(...), - project_id: int = Query(None, description="Project ID"), - db: Session = Depends(database.get_db), + form_category: Optional[str] = Form(None), + # config_file: Optional[str] = Form(None), current_user: AuthUser = Depends(login_required), ): - """Get the data extract for a given project AOI. + """Get a new data extract for a given project AOI. - Use for both generating a new data extract and for getting - and existing extract. + TODO allow config file (YAML/JSON) upload for data extract generation + TODO alternatively, direct to raw-data-api to generate first, then upload """ boundary_geojson = json.loads(await geojson_file.read()) - fgb_url = await project_crud.get_data_extract_url( - db, + # Get extract config file from existing data_models + if form_category: + data_model = f"{data_models_path}/{form_category}.yaml" + with open(data_model, "rb") as data_model_yaml: + extract_config = BytesIO(data_model_yaml.read()) + else: + extract_config = None + + fgb_url = await project_crud.generate_data_extract( boundary_geojson, + extract_config, + ) + + return JSONResponse(status_code=200, content={"url": fgb_url}) + + +@router.post("/data-extract-url/") +async def get_or_set_data_extract( + url: Optional[str] = None, + extract_type: Optional[str] = None, + project_id: int = Query(..., description="Project ID"), + db: Session = Depends(database.get_db), + org_user_dict: db_models.DbUser = Depends(project_admin), +): + """Get or set the data extract URL for a project.""" + fgb_url = await project_crud.get_or_set_data_extract_url( + db, project_id, + url, + extract_type, ) + return JSONResponse(status_code=200, content={"url": fgb_url}) -@router.post("/upload_custom_extract/") +@router.post("/upload-custom-extract/") async def upload_custom_extract( custom_extract_file: UploadFile = File(...), project_id: int = Query(..., description="Project ID"), db: Session = Depends(database.get_db), - org_user_dict: db_models.DbUser = Depends(org_admin), + org_user_dict: db_models.DbUser = Depends(project_admin), ): - """Upload a custom data extract for a project as fgb in S3. + """Upload a custom data extract geojson for a project. Request Body - 'custom_extract_file' (file): Geojson files with the features. Required. @@ -959,6 +957,7 @@ async def update_project_category( if file_ext not in allowed_extensions: raise HTTPException(status_code=400, detail="Provide a valid .xls file") + # FIXME project.form_xls = contents db.commit() @@ -1057,14 +1056,14 @@ async def download_features( Returns: Response: The HTTP response object containing the downloaded file. """ - out = await project_crud.get_project_features_geojson(db, project_id) + feature_collection = await project_crud.get_project_features_geojson(db, project_id) headers = { "Content-Disposition": "attachment; filename=project_features.geojson", "Content-Type": "application/media", } - return Response(content=json.dumps(out), headers=headers) + return Response(content=json.dumps(feature_collection), headers=headers) @router.get("/tiles/{project_id}") diff --git a/src/backend/migrations/005-remove-qrcode.sql b/src/backend/migrations/005-remove-qrcode.sql index 8951ba94af..ae50373311 100644 --- a/src/backend/migrations/005-remove-qrcode.sql +++ b/src/backend/migrations/005-remove-qrcode.sql @@ -9,7 +9,7 @@ BEGIN; -- Drop qr_code table DROP TABLE IF EXISTS public.qr_code CASCADE; --- Update field in projects table +-- Update field in tasks table ALTER TABLE IF EXISTS public.tasks DROP COLUMN IF EXISTS qr_code_id, ADD COLUMN IF NOT EXISTS odk_token VARCHAR; diff --git a/src/backend/migrations/007-add-extract-url.sql b/src/backend/migrations/007-add-extract-url.sql new file mode 100644 index 0000000000..f758b9bcaf --- /dev/null +++ b/src/backend/migrations/007-add-extract-url.sql @@ -0,0 +1,12 @@ +-- ## Migration to: +-- * Add public.projects.data_extract_url field. + +-- Start a transaction +BEGIN; + +-- Update field in projects table +ALTER TABLE IF EXISTS public.projects + ADD COLUMN IF NOT EXISTS data_extract_url VARCHAR; + +-- Commit the transaction +COMMIT; diff --git a/src/backend/migrations/init/fmtm_base_schema.sql b/src/backend/migrations/init/fmtm_base_schema.sql index 2879166613..c94ded36af 100644 --- a/src/backend/migrations/init/fmtm_base_schema.sql +++ b/src/backend/migrations/init/fmtm_base_schema.sql @@ -371,6 +371,7 @@ CREATE TABLE public.projects ( form_xls bytea, form_config_file bytea, data_extract_type character varying, + data_extract_url character varying, task_split_type character varying, hashtags character varying[] ); diff --git a/src/backend/migrations/revert/005-remove-qrcode.sql b/src/backend/migrations/revert/005-remove-qrcode.sql index 7421b0c764..ca555f0eb1 100644 --- a/src/backend/migrations/revert/005-remove-qrcode.sql +++ b/src/backend/migrations/revert/005-remove-qrcode.sql @@ -21,7 +21,7 @@ ALTER TABLE ONLY public.qr_code ALTER COLUMN id SET DEFAULT nextval('public.qr_c ALTER TABLE ONLY public.qr_code ADD CONSTRAINT qr_code_pkey PRIMARY KEY (id); --- Update field in projects table +-- Update field in tasks table ALTER TABLE IF EXISTS public.tasks DROP COLUMN IF EXISTS odk_token, ADD COLUMN IF NOT EXISTS qr_code_id integer; diff --git a/src/backend/migrations/revert/007-add-extract-url.sql b/src/backend/migrations/revert/007-add-extract-url.sql new file mode 100644 index 0000000000..c80ecf0a8f --- /dev/null +++ b/src/backend/migrations/revert/007-add-extract-url.sql @@ -0,0 +1,9 @@ +-- Start a transaction +BEGIN; + +-- Update field in projects table +ALTER TABLE IF EXISTS public.projects + DROP COLUMN IF EXISTS data_extract_url; + +-- Commit the transaction +COMMIT; diff --git a/src/backend/pdm.lock b/src/backend/pdm.lock index e872692d74..8c7fc1647e 100644 --- a/src/backend/pdm.lock +++ b/src/backend/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "debug", "dev", "docs", "test"] strategy = ["cross_platform"] lock_version = "4.4.1" -content_hash = "sha256:73bb79db4e82351bb07d7b0faf51b90fc523e0d3f316dca54e14dffee0bc077e" +content_hash = "sha256:074c775ec2ac53324b4b050880d4da3a80b5efd8ba7474868b7bd65a9f70f8a8" [[package]] name = "annotated-types" @@ -611,19 +611,20 @@ files = [ [[package]] name = "fmtm-splitter" -version = "1.0.0" +version = "1.1.1" requires_python = ">=3.10" summary = "A utility for splitting an AOI into multiple tasks." dependencies = [ "geojson>=2.5.0", "geopandas>=0.11.0", "numpy>=1.21.0", + "osm-rawdata>=0.2.1", "psycopg2>=2.9.1", "shapely>=1.8.1", ] files = [ - {file = "fmtm-splitter-1.0.0.tar.gz", hash = "sha256:e6c823b9341f0f58413ee892c2ebb7b91377cddcafb4e6a9edbb4382aee1dd2b"}, - {file = "fmtm_splitter-1.0.0-py3-none-any.whl", hash = "sha256:cb6b391b32caddcca489aa24bdd1e2bb9c4245f345c0b3d42fdd517694ac9bfc"}, + {file = "fmtm-splitter-1.1.1.tar.gz", hash = "sha256:29893d9ae017a9c0fc74a5ecb84ec6f0a072870be06d2979cd165f35001bcb80"}, + {file = "fmtm_splitter-1.1.1-py3-none-any.whl", hash = "sha256:101120bfce3b5c55bdb8cd9a9a56dbe150dc8ba6620c80ab69a016288158ab77"}, ] [[package]] @@ -1386,7 +1387,7 @@ files = [ [[package]] name = "osm-rawdata" -version = "0.1.7" +version = "0.2.1" requires_python = ">=3.10" summary = "Make data extracts from OSM data." dependencies = [ @@ -1402,8 +1403,8 @@ dependencies = [ "sqlalchemy>=2.0.0", ] files = [ - {file = "osm-rawdata-0.1.7.tar.gz", hash = "sha256:b012a20e15cca925ed4d0494cd65ebf3fd97759323ed64fb94dc8cf46ce67b6f"}, - {file = "osm_rawdata-0.1.7-py3-none-any.whl", hash = "sha256:9de18ac8ddc5d25058b79506aa940ab688fc9bf096e09c641bc76266678611a8"}, + {file = "osm-rawdata-0.2.1.tar.gz", hash = "sha256:7cfdee658b57634c2d7ac7aaefcd6a1c5c8d4268b5f2883720283037e7f113d1"}, + {file = "osm_rawdata-0.2.1-py3-none-any.whl", hash = "sha256:f4aa4e9fb4984cab89bec9eb4bab3f39710b562075290560a4154a0ecf50eb7b"}, ] [[package]] diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml index 5bf1107bfb..28d0bd46c0 100644 --- a/src/backend/pyproject.toml +++ b/src/backend/pyproject.toml @@ -47,8 +47,8 @@ dependencies = [ "cryptography>=42.0.1", "osm-login-python==1.0.1", "osm-fieldwork==0.4.2", - "osm-rawdata==0.1.7", - "fmtm-splitter==1.0.0", + "osm-rawdata==0.2.1", + "fmtm-splitter==1.1.1", ] requires-python = ">=3.10" readme = "../../README.md" diff --git a/src/backend/tests/test_projects_routes.py b/src/backend/tests/test_projects_routes.py index 7ee99099e7..084d903131 100644 --- a/src/backend/tests/test_projects_routes.py +++ b/src/backend/tests/test_projects_routes.py @@ -226,9 +226,7 @@ async def test_generate_appuser_files(db, project): lambda: project_crud.generate_appuser_files( db, project_id, - extract_polygon=True, custom_xls_form=xlsform_file, - extracts_contents=data_extracts, category="buildings", form_type="example_form_type", background_task_id=uuid.uuid4(), diff --git a/src/frontend/src/api/CreateProjectService.ts b/src/frontend/src/api/CreateProjectService.ts index 75825e53db..484a48ee53 100755 --- a/src/frontend/src/api/CreateProjectService.ts +++ b/src/frontend/src/api/CreateProjectService.ts @@ -16,7 +16,6 @@ const CreateProjectService: Function = ( fileUpload: any, formUpload: any, dataExtractFile: any, - lineExtractFile: any, ) => { return async (dispatch) => { dispatch(CreateProjectActions.CreateProjectLoading(true)); @@ -33,8 +32,9 @@ const CreateProjectService: Function = ( UploadAreaService(`${import.meta.env.VITE_API_URL}/projects/${resp.id}/custom_task_boundaries`, fileUpload), ); } else if (payload.splitting_algorithm === 'Use natural Boundary') { + // TODO this is not longer valid, remove? await dispatch( - UploadAreaService(`${import.meta.env.VITE_API_URL}/projects/task_split/${resp.id}/`, fileUpload), + UploadAreaService(`${import.meta.env.VITE_API_URL}/projects/task-split/${resp.id}/`, fileUpload), ); } else { await dispatch( @@ -50,28 +50,32 @@ const CreateProjectService: Function = ( duration: 2000, }), ); - if (dataExtractFile) { + + if (payload.dataExtractWays === 'osm_data_extract') { + // Upload data extract generated from raw-data-api + const response = await axios.post( + `${import.meta.env.VITE_API_URL}/projects/data-extract-url/?project_id=${resp.id}`, + { + url: payload.data_extract_url, + extract_type: payload.data_extract_type, + }, + ); + } else if (dataExtractFile) { + // Upload custom data extract from user const dataExtractFormData = new FormData(); dataExtractFormData.append('custom_extract_file', dataExtractFile); - await axios.post( - `${import.meta.env.VITE_API_URL}/projects/upload_custom_extract/?project_id=${resp.id}`, + const response = await axios.post( + `${import.meta.env.VITE_API_URL}/projects/upload-custom-extract/?project_id=${resp.id}`, dataExtractFormData, ); } - if (lineExtractFile) { - const lineExtractFormData = new FormData(); - lineExtractFormData.append('custom_extract_file', lineExtractFile); - await axios.post( - `${import.meta.env.VITE_API_URL}/projects/upload_custom_extract/?project_id=${resp.id}`, - lineExtractFormData, - ); - } + + // Generate QR codes await dispatch( GenerateProjectQRService( - `${import.meta.env.VITE_API_URL}/projects/${resp.id}/generate`, + `${import.meta.env.VITE_API_URL}/projects/${resp.id}/generate-project-data`, payload, formUpload, - dataExtractFile, ), ); @@ -152,40 +156,34 @@ const UploadAreaService: Function = (url: string, filePayload: any, payload: any await postUploadArea(url, filePayload, payload); }; }; -const GenerateProjectQRService: Function = (url: string, payload: any, formUpload: any, dataExtractFile: any) => { +const GenerateProjectQRService: Function = (url: string, payload: any, formUpload: any) => { return async (dispatch) => { dispatch(CreateProjectActions.GenerateProjectQRLoading(true)); dispatch(CommonActions.SetLoading(true)); const postUploadArea = async (url, payload: any, formUpload) => { - // debugger; - console.log(formUpload, 'formUpload'); - console.log(payload, 'payload'); try { - const isPolygon = payload.data_extractWays === 'Polygon'; - const generateApiFormData = new FormData(); + let postNewProjectDetails; + if (payload.form_ways === 'custom_form') { - generateApiFormData.append('extract_polygon', isPolygon.toString()); - generateApiFormData.append('upload', formUpload); - if (dataExtractFile) { - generateApiFormData.append('data_extracts', dataExtractFile); - } + // TODO move form upload to a separate service / endpoint? + const generateApiFormData = new FormData(); + generateApiFormData.append('xls_form_upload', formUpload); + postNewProjectDetails = await axios.post(url, generateApiFormData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }); } else { - generateApiFormData.append('extract_polygon', isPolygon.toString()); - if (dataExtractFile) { - generateApiFormData.append('data_extracts', dataExtractFile); - } + postNewProjectDetails = await axios.post(url, {}); } - const postNewProjectDetails = await axios.post(url, generateApiFormData, { - headers: { - 'Content-Type': 'multipart/form-data', - }, - }); + const resp: string = postNewProjectDetails.data; await dispatch(CreateProjectActions.GenerateProjectQRLoading(false)); dispatch(CommonActions.SetLoading(false)); await dispatch(CreateProjectActions.GenerateProjectQRSuccess(resp)); } catch (error: any) { + console.log(error); dispatch(CommonActions.SetLoading(false)); dispatch( CommonActions.SetSnackBar({ @@ -221,38 +219,6 @@ const OrganisationService: Function = (url: string) => { }; }; -const UploadCustomXLSFormService: Function = (url: string, payload: any) => { - return async (dispatch) => { - dispatch(CreateProjectActions.UploadCustomXLSFormLoading(true)); - - const postUploadCustomXLSForm = async (url, payload) => { - try { - const customXLSFormData = new FormData(); - customXLSFormData.append('upload', payload[0]); - const postCustomXLSForm = await axios.post(url, customXLSFormData, { - headers: { - 'Content-Type': 'multipart/form-data', - }, - }); - await dispatch(CreateProjectActions.UploadCustomXLSFormLoading(false)); - await dispatch(CreateProjectActions.UploadCustomXLSFormSuccess(postCustomXLSForm.data)); - } catch (error: any) { - dispatch( - CommonActions.SetSnackBar({ - open: true, - message: JSON.stringify(error.response.data.detail) || 'Something Went Wrong', - variant: 'error', - duration: 2000, - }), - ); - dispatch(CreateProjectActions.UploadCustomXLSFormLoading(false)); - } - }; - - await postUploadCustomXLSForm(url, payload); - }; -}; - const GenerateProjectLog: Function = (url: string, params: any) => { return async (dispatch) => { dispatch(CreateProjectActions.GenerateProjectLogLoading(true)); @@ -329,8 +295,8 @@ const GetIndividualProjectDetails: Function = (url: string, payload: any) => { const TaskSplittingPreviewService: Function = ( url: string, fileUpload: any, - dataExtractFile: any, no_of_buildings: string, + dataExtractFile: any, ) => { return async (dispatch) => { dispatch(CreateProjectActions.GetTaskSplittingPreviewLoading(true)); @@ -339,8 +305,10 @@ const TaskSplittingPreviewService: Function = ( try { const taskSplittingFileFormData = new FormData(); taskSplittingFileFormData.append('project_geojson', fileUpload); - taskSplittingFileFormData.append('extract_geojson', dataExtractFile); taskSplittingFileFormData.append('no_of_buildings', no_of_buildings); + if (dataExtractFile) { + taskSplittingFileFormData.append('extract_geojson', dataExtractFile); + } const getTaskSplittingResponse = await axios.post(url, taskSplittingFileFormData); const resp: OrganisationListModel = getTaskSplittingResponse.data; @@ -566,7 +534,6 @@ export { FormCategoryService, GenerateProjectQRService, OrganisationService, - UploadCustomXLSFormService, GenerateProjectLog, GetDividedTaskFromGeojson, TaskSplittingPreviewService, diff --git a/src/frontend/src/components/createnewproject/DataExtract.tsx b/src/frontend/src/components/createnewproject/DataExtract.tsx index 561c11eb07..4b0a3b8bf3 100644 --- a/src/frontend/src/components/createnewproject/DataExtract.tsx +++ b/src/frontend/src/components/createnewproject/DataExtract.tsx @@ -20,13 +20,13 @@ const dataExtractOptions = [ ]; const osmFeatureTypeOptions = [ - { name: 'osm_feature_type', value: 'point_centroid', label: 'Point/Centroid' }, + { name: 'osm_feature_type', value: 'centroid', label: 'Point/Centroid' }, { name: 'osm_feature_type', value: 'line', label: 'Line' }, { name: 'osm_feature_type', value: 'polygon', label: 'Polygon' }, ]; enum FeatureTypeName { - point_centroid = 'Point/Centroid', + centroid = 'Point/Centroid', line = 'Line', polygon = 'Polygon', } @@ -38,7 +38,7 @@ const DataExtract = ({ flag, customLineUpload, setCustomLineUpload, customPolygo const [extractWays, setExtractWays] = useState(''); const [featureType, setFeatureType] = useState(''); const projectDetails: any = useAppSelector((state) => state.createproject.projectDetails); - const drawnGeojson = useAppSelector((state) => state.createproject.drawnGeojson); + const projectAoiGeojson = useAppSelector((state) => state.createproject.drawnGeojson); const dataExtractGeojson = useAppSelector((state) => state.createproject.dataExtractGeojson); const isFgbFetching = useAppSelector((state) => state.createproject.isFgbFetching); @@ -73,61 +73,69 @@ const DataExtract = ({ flag, customLineUpload, setCustomLineUpload, customPolygo errors, }: any = useForm(projectDetails, submission, DataExtractValidation); + const getFileFromGeojson = (geojson) => { + // Create a File object from the geojson Blob + const geojsonBlob = new Blob([JSON.stringify(geojson)], { type: 'application/json' }); + return new File([geojsonBlob], 'data.geojson', { type: 'application/json' }); + }; + // Generate OSM data extract const generateDataExtract = async () => { - // Get OSM data extract if required - if (extractWays === 'osm_data_extract') { - // Remove current data extract - dispatch(CreateProjectActions.setDataExtractGeojson(null)); + if (extractWays !== 'osm_data_extract') { + return; + } + + // Remove current data extract + dispatch(CreateProjectActions.setDataExtractGeojson(null)); - // Create a file object from the project area Blob - const projectAreaBlob = new Blob([JSON.stringify(drawnGeojson)], { type: 'application/json' }); - const drawnGeojsonFile = new File([projectAreaBlob], 'outline.json', { type: 'application/json' }); + const dataExtractRequestFormData = new FormData(); + const projectAoiGeojsonFile = getFileFromGeojson(projectAoiGeojson); + dataExtractRequestFormData.append('geojson_file', projectAoiGeojsonFile); + dataExtractRequestFormData.append('form_category', projectDetails.formCategorySelection); - dispatch(CreateProjectActions.SetFgbFetchingStatus(true)); - // Create form and POST endpoint - const dataExtractRequestFormData = new FormData(); - dataExtractRequestFormData.append('geojson_file', drawnGeojsonFile); - try { - const response = await axios.post( - `${import.meta.env.VITE_API_URL}/projects/get_data_extract/`, - dataExtractRequestFormData, - ); + // Set flatgeobuf as loading + dispatch(CreateProjectActions.SetFgbFetchingStatus(true)); - const fgbUrl = response.data.url; - // Append url to project data & remove custom files - dispatch( - CreateProjectActions.SetIndividualProjectDetailsData({ - ...formValues, - data_extract_type: fgbUrl, - dataExtractWays: extractWays, - dataExtractFeatureType: featureType, - customLineUpload: null, - customPolygonUpload: null, - }), - ); + try { + const response = await axios.post( + `${import.meta.env.VITE_API_URL}/projects/generate-data-extract/`, + dataExtractRequestFormData, + ); - // Extract fgb and set geojson to map - const fgbFile = await fetch(fgbUrl); - const binaryData = await fgbFile.arrayBuffer(); - const uint8ArrayData = new Uint8Array(binaryData); - // Deserialize the binary data - const geojsonExtract = await fgbGeojson.deserialize(uint8ArrayData); - dispatch(CreateProjectActions.SetFgbFetchingStatus(false)); - await dispatch(CreateProjectActions.setDataExtractGeojson(geojsonExtract)); - } catch (error) { - dispatch( - CommonActions.SetSnackBar({ - open: true, - message: 'Error to generate FGB file.', - variant: 'error', - duration: 2000, - }), - ); - dispatch(CreateProjectActions.SetFgbFetchingStatus(false)); - // TODO add error message for user - console.error('Error getting data extract:', error); - } + const fgbUrl = response.data.url; + // Append url to project data & remove custom files + dispatch( + CreateProjectActions.SetIndividualProjectDetailsData({ + ...formValues, + data_extract_type: featureType, + data_extract_url: fgbUrl, + dataExtractWays: extractWays, + dataExtractFeatureType: featureType, + customLineUpload: null, + customPolygonUpload: null, + }), + ); + + // Extract fgb and set geojson to map + const fgbFile = await fetch(fgbUrl); + const binaryData = await fgbFile.arrayBuffer(); + const uint8ArrayData = new Uint8Array(binaryData); + // Deserialize the binary data + const geojsonExtract = await fgbGeojson.deserialize(uint8ArrayData); + dispatch(CreateProjectActions.SetFgbFetchingStatus(false)); + await dispatch(CreateProjectActions.setDataExtractGeojson(geojsonExtract)); + } catch (error) { + dispatch( + CommonActions.SetSnackBar({ + open: true, + message: 'Error generating data extract.', + variant: 'error', + duration: 2000, + }), + ); + dispatch(CreateProjectActions.SetFgbFetchingStatus(false)); + // TODO add error message for user + console.error('Error getting data extract:', error); } }; @@ -248,20 +256,6 @@ const DataExtract = ({ flag, customLineUpload, setCustomLineUpload, customPolygo errorMsg={errors.dataExtractWays} /> {extractWays === 'osm_data_extract' && ( -