carbonplan · andersy005 · Dec 12, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/offsets_db_api/geo.py b/offsets_db_api/geo.py
@@ -0,0 +1,95 @@
+"""
+Helper functions for loading geographic data from parquet files.
+"""
+
+import functools
+
+import pandas as pd
+
+from offsets_db_api.log import get_logger
+
+logger = get_logger()
+
+# S3 URL for project boundaries geoparquet
+PROJECT_BOUNDARIES_URL = 's3://carbonplan-offsets-db/miscellaneous/project-boundaries.parquet'
+
+
+@functools.lru_cache(maxsize=1)
+def load_project_bboxes() -> dict[str, dict[str, float]]:
+    """
+    Load project bounding boxes from the geoparquet file.
+
+    Returns a dictionary mapping project_id to bbox dict with keys:
+    xmin, ymin, xmax, ymax
+
+    The result is cached to avoid repeated S3 reads.
+    """
+    try:
+        logger.info(f'Loading project bboxes from {PROJECT_BOUNDARIES_URL}')
+        df = pd.read_parquet(
+            PROJECT_BOUNDARIES_URL,
+            columns=['project_id', 'bbox'],
+            storage_options={'anon': True},
+        )
+
+        # Convert to dict mapping project_id -> bbox
+        bbox_lookup = {}
+        for _, row in df.iterrows():
+            project_id = row['project_id']
+            bbox = row['bbox']
+            if bbox is not None:
+                bbox_lookup[project_id] = {
+                    'xmin': bbox.get('xmin'),
+                    'ymin': bbox.get('ymin'),
+                    'xmax': bbox.get('xmax'),
+                    'ymax': bbox.get('ymax'),
+                }
+
+        logger.info(f'Loaded {len(bbox_lookup)} project bboxes')
+        return bbox_lookup
+
+    except Exception as e:
+        logger.error(f'Failed to load project bboxes: {e}')
+        return {}
+
+
+def get_bbox_for_project(project_id: str) -> dict[str, float] | None:
+    """
+    Get the bounding box for a specific project.
+
+    Parameters
+    ----------
+    project_id : str
+        The project ID to look up
+
+    Returns
+    -------
+    dict or None
+        Bbox dict with xmin, ymin, xmax, ymax keys, or None if not found
+    """
+    bbox_lookup = load_project_bboxes()
+    return bbox_lookup.get(project_id)
+
+
+def get_bboxes_for_projects(project_ids: list[str]) -> dict[str, dict[str, float]]:
+    """
+    Get bounding boxes for multiple projects.
+
+    Parameters
+    ----------
+    project_ids : list of str
+        List of project IDs to look up
+
+    Returns
+    -------
+    dict
+        Dictionary mapping project_id to bbox dict
+    """
+    bbox_lookup = load_project_bboxes()
+    return {pid: bbox_lookup[pid] for pid in project_ids if pid in bbox_lookup}
+
+
+def clear_bbox_cache():
+    """Clear the cached bbox data to force a reload."""
+    load_project_bboxes.cache_clear()
+    logger.info('Cleared project bbox cache')
diff --git a/offsets_db_api/models.py b/offsets_db_api/models.py
@@ -8,6 +8,15 @@
 from offsets_db_api.schemas import FileCategory, FileStatus, Pagination
 
 
+class BBox(pydantic.BaseModel):
+    """Bounding box for a project's geographic extent."""
+
+    xmin: float
+    ymin: float
+    xmax: float
+    ymax: float
+
+
 class File(SQLModel, table=True):
     id: int = Field(default=None, primary_key=True, index=True)
     url: str
@@ -123,6 +132,9 @@ class ProjectWithClips(ProjectBase):
     clips: list[Clip] | None = Field(
         default=None, description='List of clips associated with project'
     )
+    bbox: BBox | None = Field(
+        default=None, description='Bounding box for the project geographic extent'
+    )
 
 
 class CreditBase(SQLModel):

diff --git a/offsets_db_api/routers/projects.py b/offsets_db_api/routers/projects.py
@@ -9,6 +9,7 @@
 from offsets_db_api.cache import CACHE_NAMESPACE
 from offsets_db_api.common import build_filters
 from offsets_db_api.database import get_session
+from offsets_db_api.geo import get_bbox_for_project, get_bboxes_for_projects
 from offsets_db_api.log import get_logger
 from offsets_db_api.models import (
     Clip,
@@ -148,13 +149,17 @@ async def get_projects(
     for project_id, clip in clip_results:
         project_to_clips[project_id].append(clip)
 
-    # Transform the dictionary into a list of projects with clips and project_type
+    # Get bboxes for all project IDs
+    project_bboxes = get_bboxes_for_projects(project_ids)
+
+    # Transform the dictionary into a list of projects with clips, project_type, and bbox
     projects_with_clips = []
     for project in results:
         project_data = project.model_dump()
         project_data['clips'] = [
             clip.model_dump() for clip in project_to_clips.get(project.project_id, [])
         ]
+        project_data['bbox'] = project_bboxes.get(project.project_id)
         projects_with_clips.append(project_data)
 
     return PaginatedProjects(
@@ -205,4 +210,5 @@ async def get_project(
     project_data = project.model_dump()
 
     project_data['clips'] = [clip.model_dump() for clip in clip_projects_subquery]
+    project_data['bbox'] = get_bbox_for_project(project_id)
     return project_data