From 1cbbe793451271d1cbe772bbf18e2a2590d3c873 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 3 Dec 2025 21:05:04 -0800 Subject: [PATCH 1/3] update the projects endpoints to return `bbox` in the payload --- offsets_db_api/geo.py | 95 ++++++++++++++++++++++++++++++ offsets_db_api/models.py | 12 ++++ offsets_db_api/routers/projects.py | 6 ++ 3 files changed, 113 insertions(+) create mode 100644 offsets_db_api/geo.py diff --git a/offsets_db_api/geo.py b/offsets_db_api/geo.py new file mode 100644 index 0000000..d2ea101 --- /dev/null +++ b/offsets_db_api/geo.py @@ -0,0 +1,95 @@ +""" +Helper functions for loading geographic data from parquet files. +""" + +import functools + +import pandas as pd + +from offsets_db_api.log import get_logger + +logger = get_logger() + +# S3 URL for project boundaries geoparquet +PROJECT_BOUNDARIES_URL = 's3://carbonplan-offsets-db/miscellaneous/project-boundaries.parquet' + + +@functools.lru_cache(maxsize=1) +def load_project_bboxes() -> dict[str, dict[str, float]]: + """ + Load project bounding boxes from the geoparquet file. + + Returns a dictionary mapping project_id to bbox dict with keys: + xmin, ymin, xmax, ymax + + The result is cached to avoid repeated S3 reads. + """ + try: + logger.info(f'Loading project bboxes from {PROJECT_BOUNDARIES_URL}') + df = pd.read_parquet( + PROJECT_BOUNDARIES_URL, + columns=['project_id', 'bbox'], + storage_options={'anon': True}, + ) + + # Convert to dict mapping project_id -> bbox + bbox_lookup = {} + for _, row in df.iterrows(): + project_id = row['project_id'] + bbox = row['bbox'] + if bbox is not None: + bbox_lookup[project_id] = { + 'xmin': bbox.get('xmin'), + 'ymin': bbox.get('ymin'), + 'xmax': bbox.get('xmax'), + 'ymax': bbox.get('ymax'), + } + + logger.info(f'Loaded {len(bbox_lookup)} project bboxes') + return bbox_lookup + + except Exception as e: + logger.error(f'Failed to load project bboxes: {e}') + return {} + + +def get_bbox_for_project(project_id: str) -> dict[str, float] | None: + """ + Get the bounding box for a specific project. + + Parameters + ---------- + project_id : str + The project ID to look up + + Returns + ------- + dict or None + Bbox dict with xmin, ymin, xmax, ymax keys, or None if not found + """ + bbox_lookup = load_project_bboxes() + return bbox_lookup.get(project_id) + + +def get_bboxes_for_projects(project_ids: list[str]) -> dict[str, dict[str, float]]: + """ + Get bounding boxes for multiple projects. + + Parameters + ---------- + project_ids : list of str + List of project IDs to look up + + Returns + ------- + dict + Dictionary mapping project_id to bbox dict + """ + bbox_lookup = load_project_bboxes() + return {pid: bbox_lookup[pid] for pid in project_ids if pid in bbox_lookup} + + +def clear_bbox_cache(): + """Clear the cached bbox data to force a reload.""" + load_project_bboxes.cache_clear() + logger.info('Cleared project bbox cache') diff --git a/offsets_db_api/models.py b/offsets_db_api/models.py index f79de81..f6c3af6 100644 --- a/offsets_db_api/models.py +++ b/offsets_db_api/models.py @@ -8,6 +8,15 @@ from offsets_db_api.schemas import FileCategory, FileStatus, Pagination +class BBox(pydantic.BaseModel): + """Bounding box for a project's geographic extent.""" + + xmin: float + ymin: float + xmax: float + ymax: float + + class File(SQLModel, table=True): id: int = Field(default=None, primary_key=True, index=True) url: str @@ -123,6 +132,9 @@ class ProjectWithClips(ProjectBase): clips: list[Clip] | None = Field( default=None, description='List of clips associated with project' ) + bbox: BBox | None = Field( + default=None, description='Bounding box for the project geographic extent' + ) class CreditBase(SQLModel): diff --git a/offsets_db_api/routers/projects.py b/offsets_db_api/routers/projects.py index d505b32..b4e618b 100644 --- a/offsets_db_api/routers/projects.py +++ b/offsets_db_api/routers/projects.py @@ -9,6 +9,7 @@ from offsets_db_api.cache import CACHE_NAMESPACE from offsets_db_api.common import build_filters from offsets_db_api.database import get_session +from offsets_db_api.geo import get_bbox_for_project, get_bboxes_for_projects from offsets_db_api.log import get_logger from offsets_db_api.models import ( Clip, @@ -148,6 +149,9 @@ async def get_projects( for project_id, clip in clip_results: project_to_clips[project_id].append(clip) + # Get bboxes for all project IDs + project_bboxes = get_bboxes_for_projects(project_ids) + # Transform the dictionary into a list of projects with clips and project_type projects_with_clips = [] for project in results: @@ -155,6 +159,7 @@ async def get_projects( project_data['clips'] = [ clip.model_dump() for clip in project_to_clips.get(project.project_id, []) ] + project_data['bbox'] = project_bboxes.get(project.project_id) projects_with_clips.append(project_data) return PaginatedProjects( @@ -205,4 +210,5 @@ async def get_project( project_data = project.model_dump() project_data['clips'] = [clip.model_dump() for clip in clip_projects_subquery] + project_data['bbox'] = get_bbox_for_project(project_id) return project_data From 0c0122179919e0f388c36f11d9629cf624b36ae9 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 3 Dec 2025 21:09:16 -0800 Subject: [PATCH 2/3] Retrigger CI From c1171a84ae8e65be219fd579e6efaf3e52abf67e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Thu, 4 Dec 2025 11:18:07 -0800 Subject: [PATCH 3/3] Update offsets_db_api/routers/projects.py Co-authored-by: Kata Martin --- offsets_db_api/routers/projects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offsets_db_api/routers/projects.py b/offsets_db_api/routers/projects.py index b4e618b..287c718 100644 --- a/offsets_db_api/routers/projects.py +++ b/offsets_db_api/routers/projects.py @@ -152,7 +152,7 @@ async def get_projects( # Get bboxes for all project IDs project_bboxes = get_bboxes_for_projects(project_ids) - # Transform the dictionary into a list of projects with clips and project_type + # Transform the dictionary into a list of projects with clips, project_type, and bbox projects_with_clips = [] for project in results: project_data = project.model_dump()