Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions offsets_db_api/geo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
Helper functions for loading geographic data from parquet files.
"""

import functools

import pandas as pd

from offsets_db_api.log import get_logger

logger = get_logger()

# S3 URL for project boundaries geoparquet
PROJECT_BOUNDARIES_URL = 's3://carbonplan-offsets-db/miscellaneous/project-boundaries.parquet'


@functools.lru_cache(maxsize=1)
def load_project_bboxes() -> dict[str, dict[str, float]]:
"""
Load project bounding boxes from the geoparquet file.

Returns a dictionary mapping project_id to bbox dict with keys:
xmin, ymin, xmax, ymax

The result is cached to avoid repeated S3 reads.
"""
try:
logger.info(f'Loading project bboxes from {PROJECT_BOUNDARIES_URL}')
df = pd.read_parquet(
PROJECT_BOUNDARIES_URL,
columns=['project_id', 'bbox'],
storage_options={'anon': True},
)

# Convert to dict mapping project_id -> bbox
bbox_lookup = {}
for _, row in df.iterrows():
project_id = row['project_id']
bbox = row['bbox']
if bbox is not None:
bbox_lookup[project_id] = {
'xmin': bbox.get('xmin'),
'ymin': bbox.get('ymin'),
'xmax': bbox.get('xmax'),
'ymax': bbox.get('ymax'),
}

logger.info(f'Loaded {len(bbox_lookup)} project bboxes')
return bbox_lookup

except Exception as e:
logger.error(f'Failed to load project bboxes: {e}')
return {}


def get_bbox_for_project(project_id: str) -> dict[str, float] | None:
"""
Get the bounding box for a specific project.

Parameters
----------
project_id : str
The project ID to look up

Returns
-------
dict or None
Bbox dict with xmin, ymin, xmax, ymax keys, or None if not found
"""
bbox_lookup = load_project_bboxes()
return bbox_lookup.get(project_id)


def get_bboxes_for_projects(project_ids: list[str]) -> dict[str, dict[str, float]]:
"""
Get bounding boxes for multiple projects.

Parameters
----------
project_ids : list of str
List of project IDs to look up

Returns
-------
dict
Dictionary mapping project_id to bbox dict
"""
bbox_lookup = load_project_bboxes()
return {pid: bbox_lookup[pid] for pid in project_ids if pid in bbox_lookup}


def clear_bbox_cache():
"""Clear the cached bbox data to force a reload."""
load_project_bboxes.cache_clear()
logger.info('Cleared project bbox cache')
12 changes: 12 additions & 0 deletions offsets_db_api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
from offsets_db_api.schemas import FileCategory, FileStatus, Pagination


class BBox(pydantic.BaseModel):
"""Bounding box for a project's geographic extent."""

xmin: float
ymin: float
xmax: float
ymax: float


class File(SQLModel, table=True):
id: int = Field(default=None, primary_key=True, index=True)
url: str
Expand Down Expand Up @@ -123,6 +132,9 @@ class ProjectWithClips(ProjectBase):
clips: list[Clip] | None = Field(
default=None, description='List of clips associated with project'
)
bbox: BBox | None = Field(
default=None, description='Bounding box for the project geographic extent'
)


class CreditBase(SQLModel):
Expand Down
8 changes: 7 additions & 1 deletion offsets_db_api/routers/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from offsets_db_api.cache import CACHE_NAMESPACE
from offsets_db_api.common import build_filters
from offsets_db_api.database import get_session
from offsets_db_api.geo import get_bbox_for_project, get_bboxes_for_projects
from offsets_db_api.log import get_logger
from offsets_db_api.models import (
Clip,
Expand Down Expand Up @@ -148,13 +149,17 @@ async def get_projects(
for project_id, clip in clip_results:
project_to_clips[project_id].append(clip)

# Transform the dictionary into a list of projects with clips and project_type
# Get bboxes for all project IDs
project_bboxes = get_bboxes_for_projects(project_ids)

# Transform the dictionary into a list of projects with clips, project_type, and bbox
projects_with_clips = []
for project in results:
project_data = project.model_dump()
project_data['clips'] = [
clip.model_dump() for clip in project_to_clips.get(project.project_id, [])
]
project_data['bbox'] = project_bboxes.get(project.project_id)
projects_with_clips.append(project_data)

return PaginatedProjects(
Expand Down Expand Up @@ -205,4 +210,5 @@ async def get_project(
project_data = project.model_dump()

project_data['clips'] = [clip.model_dump() for clip in clip_projects_subquery]
project_data['bbox'] = get_bbox_for_project(project_id)
return project_data
Loading