From 910cfb3f3043fa2d3ddb11ecb8f60d450feab9ec Mon Sep 17 00:00:00 2001 From: Josh Cunningham Date: Thu, 5 Sep 2024 13:29:37 -0500 Subject: [PATCH] fix geopandas without fiona --- modules/data_processing/gpkg_utils.py | 38 ++++++++++++++++++---- modules/map_app/views.py | 47 ++++++++------------------- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/modules/data_processing/gpkg_utils.py b/modules/data_processing/gpkg_utils.py index 56c2632..5231c7d 100644 --- a/modules/data_processing/gpkg_utils.py +++ b/modules/data_processing/gpkg_utils.py @@ -8,6 +8,8 @@ import struct import geopandas as gpd from pathlib import Path +import pyproj +from shapely.ops import transform logger = logging.getLogger(__name__) @@ -38,7 +40,6 @@ def verify_indices(gpkg: str = file_paths.conus_hydrofabric()) -> None: con.commit() con.close() - def create_empty_gpkg(gpkg: str) -> None: """ Create an empty geopackage with the necessary tables and indices. @@ -121,6 +122,19 @@ def blob_to_centre_point(blob: bytes) -> Point: return Point(x, y) +def convert_to_5070(shapely_geometry): + # convert to web mercator + if shapely_geometry.is_empty: + return shapely_geometry + source_crs = pyproj.CRS("EPSG:4326") + target_crs = pyproj.CRS("EPSG:5070") + project = pyproj.Transformer.from_crs(source_crs, target_crs, always_xy=True).transform + new_geometry = transform(project, shapely_geometry) + logger.debug(f" new geometry: {new_geometry}") + logger.debug(f"old geometry: {shapely_geometry}") + return new_geometry + + def get_catid_from_point(coords): """ Retrieves the watershed boundary ID (catid) of the watershed that contains the given point. @@ -138,12 +152,24 @@ def get_catid_from_point(coords): """ logger.info(f"Getting catid for {coords}") q = file_paths.conus_hydrofabric() - d = {"col1": ["point"], "geometry": [Point(coords["lng"], coords["lat"])]} - point = gpd.GeoDataFrame(d, crs="EPSG:4326") - df = gpd.read_file(q, format="GPKG", layer="divides", mask=point) - if df.empty: + point = Point(coords["lng"], coords["lat"]) + point = convert_to_5070(point) + with sqlite3.connect(q) as con: + sql = f"""SELECT DISTINCT d.divide_id, d.geom + FROM divides d + JOIN rtree_divides_geom r ON d.fid = r.id + WHERE r.minx <= {point.x} AND r.maxx >= {point.x} + AND r.miny <= {point.y} AND r.maxy >= {point.y}""" + results = con.execute(sql).fetchall() + if len(results) == 0: raise IndexError(f"No watershed boundary found for {coords}") - return df["id"].values[0] + if len(results) > 1: + # check the geometries to see which one contains the point + for result in results: + geom = blob_to_geometry(result[1]) + if geom.contains(point): + return result[0] + return results[0][0] def create_rTree_table(table: str, con: sqlite3.Connection) -> None: diff --git a/modules/map_app/views.py b/modules/map_app/views.py index 1af4811..cb5e573 100644 --- a/modules/map_app/views.py +++ b/modules/map_app/views.py @@ -39,19 +39,6 @@ def index(): return render_template("index.html") -def get_catid_from_point(coords): - # inpute coords are EPSG:4326 - logger.info(coords) - # takes a point and returns the catid of the watershed it is in - # create a geometry mask for the point - # load the watershed boundaries - q = Path(__file__).parent.parent / "data_sources" / "conus.gpkg" - d = {"col1": ["point"], "geometry": [Point(coords["lng"], coords["lat"])]} - point = gpd.GeoDataFrame(d, crs="EPSG:4326") - df = gpd.read_file(q, format="GPKG", layer="divides", mask=point) - return df["divide_id"].values[0] - - @main.route("/handle_map_interaction", methods=["POST"]) def handle_map_interaction(): data = request.get_json() @@ -99,28 +86,20 @@ def get_map_data(): def catids_to_geojson(cat_dict): + # if the cat id is added to the dict by clicking on the map, it will have coodinates + # if it was added using the select by cat_id box, the coordinates are 0 0 + # if we just use the name this doesn't matter for k, v in cat_dict.items(): - if v[0] != 0 and v[1] != 0: - # assuming this was called by clicking on the map - cat_dict[k] = Point(v[1], v[0]) - else: - # this was called without knowing the coordinates - # use sql to get the geometry - conn = sqlite3.connect(file_paths.conus_hydrofabric()) - c = conn.cursor() - c.execute(f"SELECT geom FROM divides WHERE id = '{k}'") - result = c.fetchone() - if result is not None: - cat_dict[k] = convert_to_4326(blob_to_geometry(result[0])).buffer(-0.0001) - - d = {"col1": cat_dict.keys(), "geometry": cat_dict.values()} - points = gpd.GeoDataFrame(d, crs="EPSG:4326") - logger.debug(points) - q = Path(__file__).parent.parent / "data_sources" / "conus.gpkg" - df = gpd.read_file(q, format="GPKG", layer="divides", mask=points) - # convert crs to 4326 - df = df.to_crs(epsg=4326) - return df.to_json() + # use sql to get the geometry + conn = sqlite3.connect(file_paths.conus_hydrofabric()) + c = conn.cursor() + c.execute(f"SELECT geom FROM divides WHERE divide_id = '{k}'") + result = c.fetchone() + if result is not None: + cat_dict[k] = convert_to_4326(blob_to_geometry(result[0])).buffer(-0.0001) + df = {"col1": cat_dict.keys(), "geometry": cat_dict.values()} + gdf = gpd.GeoDataFrame(df, crs="EPSG:4326") + return gdf.to_json() @main.route("/get_geojson_from_catids", methods=["POST"])