Skip to content

Commit

Permalink
Merge pull request #92 from ukaea/feat/T-#77-jsonld-mapping
Browse files Browse the repository at this point in the history
JSON-LD Metadata for JSON API
  • Loading branch information
samueljackson92 authored Feb 3, 2025
2 parents 802eaf6 + db2eda4 commit 032e1c4
Show file tree
Hide file tree
Showing 7 changed files with 215 additions and 72 deletions.
8 changes: 7 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
data
s3data
dev/docker/data
dev/docker/data
tests/mock_data
.conda
.venv
.pytest_cache
docs
scr/api/static
31 changes: 20 additions & 11 deletions src/api/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@
import pyarrow.parquet as pq
from psycopg2.extras import Json
from sqlalchemy import MetaData, create_engine, text
from sqlalchemy_utils.functions import (
create_database,
database_exists,
drop_database,
)
from sqlalchemy_utils.functions import create_database, database_exists, drop_database
from sqlmodel import SQLModel
from tqdm import tqdm

Expand Down Expand Up @@ -130,15 +126,16 @@ def create_user(self):
def create_cpf_summary(self, data_path: Path):
"""Create the CPF summary table"""
paths = data_path.glob("cpf/*_cpf_columns.parquet")
for path in paths:
df = pd.read_parquet(path)
# replacing col name row values with cpf alias value in shotmodel
df["name"] = df["name"].apply(
dfs = [pd.read_parquet(path) for path in paths]
df = pd.concat(dfs).reset_index(drop=True)
df["context"] = [Json(base_context)] * len(df)
df = df.drop_duplicates(subset=["name"])
df["name"] = df["name"].apply(
lambda x: models.ShotModel.__fields__.get("cpf_" + x.lower()).alias
if models.ShotModel.__fields__.get("cpf_" + x.lower())
else x
)
df.to_sql("cpf_summary", self.uri, if_exists="replace")
df.to_sql("cpf_summary", self.uri, if_exists="append")

def create_scenarios(self, data_path: Path):
"""Create the scenarios metadata table"""
Expand All @@ -149,6 +146,7 @@ def create_scenarios(self, data_path: Path):

data = pd.DataFrame(dict(id=ids, name=scenarios)).set_index("id")
data = data.dropna()
data["context"] = [Json(base_context)] * len(data)
data.to_sql("scenarios", self.uri, if_exists="append")

def create_shots(self, data_path: Path):
Expand All @@ -167,6 +165,7 @@ def create_shots(self, data_path: Path):
shot_metadata["scenario"] = shot_metadata["scenario_id"]
shot_metadata["facility"] = "MAST"
shot_metadata = shot_metadata.drop(["scenario_id", "reference_id"], axis=1)
shot_metadata["context"] = [Json(base_context)] * len(shot_metadata)
shot_metadata["uuid"] = shot_metadata.index.map(get_dataset_uuid)
shot_metadata["url"] = (
"s3://mast/level1/shots/" + shot_metadata.index.astype(str) + ".zarr"
Expand Down Expand Up @@ -213,6 +212,7 @@ def create_signals(self, data_path: Path):
df = signals_metadata
df = df[df.shot_id <= LAST_MAST_SHOT]
df = df.drop_duplicates(subset="uuid")
df["context"] = [Json(base_context)] * len(df)
df["shape"] = df["shape"].map(lambda x: x.tolist())
df["dimensions"] = df["dimensions"].map(lambda x: x.tolist())
df["url"] = (
Expand All @@ -233,13 +233,22 @@ def create_sources(self, data_path: Path):
source_metadata = pd.read_parquet(data_path / "sources.parquet")
source_metadata = source_metadata.drop_duplicates("uuid")
source_metadata = source_metadata.loc[source_metadata.shot_id <= LAST_MAST_SHOT]
source_metadata["context"] = [Json(base_context)] * len(source_metadata)
source_metadata["url"] = (
"s3://mast/level1/shots/"
+ source_metadata["shot_id"].map(str)
+ ".zarr/"
+ source_metadata["name"]
)
column_names = ["uuid", "shot_id", "name", "description", "quality", "url"]
column_names = [
"uuid",
"shot_id",
"name",
"description",
"quality",
"url",
"context",
]
source_metadata = source_metadata[column_names]
source_metadata.to_sql("sources", self.uri, if_exists="append", index=False)

Expand Down
2 changes: 1 addition & 1 deletion src/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def execute_query_all(db: Session, query: Query):

def execute_query_one(db: Session, query: Query):
item = db.execute(query).one()[0]
item = item.dict(exclude_none=True)
item = item.dict(exclude_none=True, by_alias=True)
return item


Expand Down
54 changes: 33 additions & 21 deletions src/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,10 +290,10 @@ def query_aggregate(
@app.get(
"/json/shots",
description="Get information about experimental shots",
response_model=CursorPage[models.ShotModel],
response_class=CustomJSONResponse,
)
def get_shots(
db: Session = Depends(get_db), params: QueryParams = Depends()
) -> CursorPage[models.ShotModel]:
def get_shots(db: Session = Depends(get_db), params: QueryParams = Depends()):
if params.sort is None:
params.sort = "shot_id"

Expand All @@ -317,8 +317,10 @@ def get_shots_aggregate(
@app.get(
"/json/shots/{shot_id}",
description="Get information about a single experimental shot",
response_model=models.ShotModel,
response_class=CustomJSONResponse,
)
def get_shot(db: Session = Depends(get_db), shot_id: int = None) -> models.ShotModel:
def get_shot(db: Session = Depends(get_db), shot_id: int = None):
shot = crud.get_shot(shot_id)
shot = crud.execute_query_one(db, shot)
return shot
Expand All @@ -337,12 +339,14 @@ def get_dataservice(db: Session = Depends(get_db)):
@app.get(
"/json/shots/{shot_id}/signals",
description="Get information all signals for a single experimental shot",
response_model=CursorPage[models.SignalModel],
response_class=CustomJSONResponse,
)
def get_signals_for_shot(
db: Session = Depends(get_db),
shot_id: int = None,
params: QueryParams = Depends(),
) -> CursorPage[models.SignalModel]:
):
if params.sort is None:
params.sort = "uuid"
# Get shot
Expand All @@ -360,10 +364,10 @@ def get_signals_for_shot(
@app.get(
"/json/signals",
description="Get information about specific signals.",
response_model=CursorPage[models.SignalModel],
response_class=CustomJSONResponse,
)
def get_signals(
db: Session = Depends(get_db), params: QueryParams = Depends()
) -> CursorPage[models.SignalModel]:
def get_signals(db: Session = Depends(get_db), params: QueryParams = Depends()):
if params.sort is None:
params.sort = "uuid"
query = crud.select_query(
Expand All @@ -388,10 +392,10 @@ def get_signals_aggregate(
"/json/signals/{uuid_}",
description="Get information about a single signal",
response_model_exclude_unset=True,
response_model=models.SignalModel,
response_class=CustomJSONResponse,
)
def get_signal(
db: Session = Depends(get_db), uuid_: uuid.UUID = None
) -> models.SignalModel:
def get_signal(db: Session = Depends(get_db), uuid_: uuid.UUID = None):
signal = crud.get_signal(uuid_)
signal = crud.execute_query_one(db, signal)

Expand All @@ -402,6 +406,8 @@ def get_signal(
"/json/signals/{uuid_}/shot",
description="Get information about the shot for a single signal",
response_model_exclude_unset=True,
response_model=models.ShotModel,
response_class=CustomJSONResponse,
)
def get_shot_for_signal(
db: Session = Depends(get_db), uuid_: uuid.UUID = None
Expand All @@ -416,10 +422,10 @@ def get_shot_for_signal(
@app.get(
"/json/cpf_summary",
description="Get descriptions of CPF summary variables.",
response_model=CursorPage[models.CPFSummaryModel],
response_class=CustomJSONResponse,
)
def get_cpf_summary(
db: Session = Depends(get_db), params: QueryParams = Depends()
) -> CursorPage[models.CPFSummaryModel]:
def get_cpf_summary(db: Session = Depends(get_db), params: QueryParams = Depends()):
if params.sort is None:
params.sort = "index"

Expand All @@ -432,6 +438,8 @@ def get_cpf_summary(
@app.get(
"/json/scenarios",
description="Get information on different scenarios.",
response_model=CursorPage[models.ScenarioModel],
response_class=CustomJSONResponse,
)
def get_scenarios(
db: Session = Depends(get_db), params: QueryParams = Depends()
Expand All @@ -448,10 +456,10 @@ def get_scenarios(
@app.get(
"/json/sources",
description="Get information on different sources.",
response_model=CursorPage[models.SourceModel],
response_class=CustomJSONResponse,
)
def get_sources(
db: Session = Depends(get_db), params: QueryParams = Depends()
) -> CursorPage[models.SourceModel]:
def get_sources(db: Session = Depends(get_db), params: QueryParams = Depends()):
if params.sort is None:
params.sort = "name"

Expand All @@ -461,7 +469,11 @@ def get_sources(
return paginate(db, query)


@app.get("/json/sources/aggregate")
@app.get(
"/json/sources/aggregate",
response_model=models.SourceModel,
response_class=CustomJSONResponse,
)
def get_sources_aggregate(
request: Request,
response: Response,
Expand All @@ -475,10 +487,10 @@ def get_sources_aggregate(
@app.get(
"/json/sources/{name}",
description="Get information about a single signal",
response_model=models.SourceModel,
response_class=CustomJSONResponse,
)
def get_single_source(
db: Session = Depends(get_db), name: str = None
) -> models.SourceModel:
def get_single_source(db: Session = Depends(get_db), name: str = None):
source = crud.get_source(db, name)
source = db.execute(source).one()[0]
return source
Expand Down
Loading

0 comments on commit 032e1c4

Please sign in to comment.