Skip to content

Commit

Permalink
as Python CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
floriscalkoen committed Feb 20, 2024
1 parent 4949d1a commit 9d23a95
Showing 1 changed file with 56 additions and 47 deletions.
103 changes: 56 additions & 47 deletions open_buildings/overture/extract_admin_bounds.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,62 @@
import argparse
import pathlib

import duckdb
import geopandas as gpd
from shapely import wkb

con = duckdb.connect(database=":memory:", read_only=False)
con.execute("LOAD spatial;")

admin_level = 2

OVERTURE_DIR = pathlib.Path("~/data/src/overture/2024-02-15-alpha.0").expanduser()
OUT_DIR = pathlib.Path("~/data/prc/overture/2024-02-15").expanduser()
if not OUT_DIR.exists():
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Adjusted query to perform a join and retrieve polygons
query = f"""
WITH admins_view AS (
SELECT * FROM read_parquet('{str(OVERTURE_DIR)}/theme=admins/type=*/*')
)
SELECT
admins.id,
admins.isoCountryCodeAlpha2,
admins.names,
admins.isoSubCountryCode,
areas.areaGeometry as geometry
FROM admins_view AS admins
INNER JOIN (
SELECT
id as areaId,
localityId,
geometry AS areaGeometry
FROM admins_view
) AS areas ON areas.localityId = admins.id
WHERE admins.adminLevel = {admin_level};
"""

# Execute the query and fetch the result
admins = con.execute(query).fetchdf()

# Convert the 'geometry' column from WKB to Shapely geometries and create a GeoDataFrame
admins = gpd.GeoDataFrame(
admins,
geometry=admins["geometry"].apply(lambda b: wkb.loads(bytes(b))),
crs="EPSG:4326",
)

admins[f"admin_level_{admin_level}_name"] = admins.names.map(lambda r: r["primary"])
admins = admins.drop(columns=["names"])

outpath = OUT_DIR/ f"admin_bounds_level_{admin_level}.parquet"
admins.to_parquet(outpath)

def main(admin_level, overture_dir, out_dir):
con = duckdb.connect(database=":memory:", read_only=False)
con.execute("LOAD spatial;")

OVERTURE_DIR = pathlib.Path(overture_dir).expanduser()
OUT_DIR = pathlib.Path(out_dir).expanduser()
if not OUT_DIR.exists():
OUT_DIR.mkdir(parents=True, exist_ok=True)

query = f"""
WITH admins_view AS (
SELECT * FROM read_parquet('{str(OVERTURE_DIR)}/theme=admins/type=*/*')
)
SELECT
admins.id,
admins.isoCountryCodeAlpha2,
admins.names,
admins.isoSubCountryCode,
areas.areaGeometry as geometry
FROM admins_view AS admins
INNER JOIN (
SELECT
id as areaId,
localityId,
geometry AS areaGeometry
FROM admins_view
) AS areas ON areas.localityId = admins.id
WHERE admins.adminLevel = {admin_level};
"""

admins = con.execute(query).fetchdf()

admins = gpd.GeoDataFrame(
admins,
geometry=admins["geometry"].apply(lambda b: wkb.loads(bytes(b))),
crs="EPSG:4326",
)

admins[f"admin_level_{admin_level}_name"] = admins.names.map(lambda r: r["primary"])
admins = admins.drop(columns=["names"])

outpath = OUT_DIR / f"admin_bounds_level_{admin_level}.parquet"
print(f"Writing admin boundaries level {admin_level} to: {outpath}")
admins.to_parquet(outpath)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process Overture admin level data.')
parser.add_argument('admin_level', type=int, help='The admin level to process')
parser.add_argument('-s', '--source', default='~/data/src/overture/2024-02-15-alpha.0', help='The source Overture directory')
parser.add_argument('-o', '--output', default='~/data/prc/overture/2024-02-15', help='The output directory')

args = parser.parse_args()

main(args.admin_level, args.source, args.output)

0 comments on commit 9d23a95

Please sign in to comment.