Skip to content

Commit

Permalink
from s3 by default and by admin level
Browse files Browse the repository at this point in the history
  • Loading branch information
floriscalkoen committed Feb 21, 2024
1 parent 9d23a95 commit a181338
Showing 1 changed file with 43 additions and 13 deletions.
56 changes: 43 additions & 13 deletions open_buildings/overture/extract_admin_bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,29 @@ def main(admin_level, overture_dir, out_dir):
con = duckdb.connect(database=":memory:", read_only=False)
con.execute("LOAD spatial;")

OVERTURE_DIR = pathlib.Path(overture_dir).expanduser()
OUT_DIR = pathlib.Path(out_dir).expanduser()
if not OUT_DIR.exists():
OUT_DIR.mkdir(parents=True, exist_ok=True)
out_dir = pathlib.Path(out_dir).expanduser()
if not out_dir.exists():
out_dir.mkdir(parents=True, exist_ok=True)

query = f"""
# Common part of the query
common_query = f"""
WITH admins_view AS (
SELECT * FROM read_parquet('{str(OVERTURE_DIR)}/theme=admins/type=*/*')
SELECT * FROM read_parquet('{str(overture_dir)}/theme=admins/type=*/*')
)
SELECT
admins.id,
admins.isoCountryCodeAlpha2,
admins.names,
admins.isoSubCountryCode,
"""

# Conditional part of the query based on admin_level
if admin_level == 2:
admin_level_query = "admins.isoSubCountryCode,"
else:
admin_level_query = ""

# Final part of the query
final_query = f"""
areas.areaGeometry as geometry
FROM admins_view AS admins
INNER JOIN (
Expand All @@ -36,26 +45,47 @@ def main(admin_level, overture_dir, out_dir):
WHERE admins.adminLevel = {admin_level};
"""

# Combine all parts to form the final query
query = common_query + admin_level_query + final_query

# Execute the query
admins = con.execute(query).fetchdf()

# Convert to GeoDataFrame and process geometry
admins = gpd.GeoDataFrame(
admins,
geometry=admins["geometry"].apply(lambda b: wkb.loads(bytes(b))),
crs="EPSG:4326",
)

admins[f"admin_level_{admin_level}_name"] = admins.names.map(lambda r: r["primary"])
# Process names and drop the original column
admins["primary_name"] = admins.names.map(lambda r: r["primary"])
admins = admins.drop(columns=["names"])

outpath = OUT_DIR / f"admin_bounds_level_{admin_level}.parquet"
# Write the output
outpath = out_dir / f"admin_bounds_level_{admin_level}.parquet"
print(f"Writing admin boundaries level {admin_level} to: {outpath}")
admins.to_parquet(outpath)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process Overture admin level data.')
parser.add_argument('admin_level', type=int, help='The admin level to process')
parser.add_argument('-s', '--source', default='~/data/src/overture/2024-02-15-alpha.0', help='The source Overture directory')
parser.add_argument('-o', '--output', default='~/data/prc/overture/2024-02-15', help='The output directory')

# NOTE: href or dir?
# OVERTURE_HREF = str(pathlib.Path("~/data/src/overture/2024-02-15-alpha.0").expanduser())
OVERTURE_HREF = "s3://overturemaps-us-west-2/release/2024-02-15-alpha.0"
OUT_DIR = pathlib.Path("~/data/prc/overture/2024-02-15").expanduser()

parser = argparse.ArgumentParser(description="Process Overture admin level data.")
parser.add_argument("admin_level", type=int, help="The admin level to process")
parser.add_argument(
"-s",
"--source",
default=f"{OVERTURE_HREF}",
help="The source Overture directory",
)
parser.add_argument(
"-o", "--output", default=f"{OUT_DIR}", help="The output directory"
)

args = parser.parse_args()

Expand Down

0 comments on commit a181338

Please sign in to comment.