Skip to content

Commit

Permalink
db_schema: system_info.{dataset stuff} -> {dataset stuff} (#439)
Browse files Browse the repository at this point in the history
* db_schema: system_info.{dataset stuff} -> {dataset stuff}

* null check for system.dataset

* fix creator and preferred_name
  • Loading branch information
lyuyangh authored Oct 24, 2022
1 parent e993e86 commit 61c3d3d
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 79 deletions.
100 changes: 55 additions & 45 deletions backend/src/impl/db_utils/system_db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,6 @@ def system_from_dict(
document["system_id"] = str(document.pop("_id"))
if document.get("is_private") is None:
document["is_private"] = True
if document.get("dataset_metadata_id") and document.get("dataset") is None:
dataset = DatasetDBUtils.find_dataset_by_id(document["dataset_metadata_id"])
if dataset:
split = document.get("dataset_split")
# this check only valid for create
if split and split not in dataset.split:
abort_with_error_message(
400, f"{split} is not a valid split for {dataset.dataset_name}"
)
document["dataset"] = dataset.to_dict()
document.pop("dataset_metadata_id")

# Parse the shared users
shared_users = document.get("shared_users", None)
Expand Down Expand Up @@ -216,15 +205,15 @@ def find_systems(
if task:
search_conditions.append({"task": task})
if dataset_name:
search_conditions.append({"system_info.dataset_name": dataset_name})
search_conditions.append({"dataset.dataset_name": dataset_name})
if subdataset_name:
search_conditions.append({"system_info.sub_dataset_name": subdataset_name})
search_conditions.append({"dataset.sub_dataset": subdataset_name})
if source_language:
search_conditions.append({"source_language": source_language})
if target_language:
search_conditions.append({"target_language": target_language})
if split:
search_conditions.append({"system_info.dataset_split": split})
search_conditions.append({"dataset.split": split})
if creator:
search_conditions.append({"creator": creator})
if shared_users:
Expand All @@ -233,9 +222,9 @@ def find_systems(
if dataset_list:
dataset_dicts = [
{
"system_info.dataset_name": ds[0],
"system_info.sub_dataset_name": ds[1],
"system_info.dataset_split": ds[2],
"dataset.dataset_name": ds[0],
"dataset.sub_dataset": ds[1],
"dataset.split": ds[2],
}
for ds in dataset_list
]
Expand Down Expand Up @@ -274,7 +263,7 @@ def _load_sys_output(
dataset_file_type=FileType(custom_dataset.file_type),
output_file_type=FileType(system_output.file_type),
).load()
else:
elif system.dataset:
return (
get_loader_class(task=metadata.task)
.from_datalab(
Expand All @@ -290,6 +279,7 @@ def _load_sys_output(
)
.load()
)
raise ValueError("neither dataset or custom_dataset is available")

@staticmethod
def _process(
Expand Down Expand Up @@ -358,33 +348,57 @@ def create_system(
"""
Create a system given metadata and outputs, etc.
"""
document = metadata.to_dict()

# -- parse the system details if getting a string from the frontend
SystemDBUtils._parse_system_details_in_doc(document, metadata)
def _validate_and_create_system():
system = {}

# -- set the creator
user = get_user()
document["creator"] = user.id

# -- set the preferred_username to conform with the return schema
document["preferred_username"] = user.preferred_username

# -- create the object defined in openapi.yaml from only uploaded metadata
system: System = SystemDBUtils.system_from_dict(document)
user = get_user()
system["creator"] = user.id
system["preferred_username"] = user.preferred_username

# -- validate the dataset metadata
if metadata.dataset_metadata_id:
if not system.dataset:
abort_with_error_message(
400, f"dataset: {metadata.dataset_metadata_id} does not exist"
)
if metadata.task not in system.dataset.tasks:
abort_with_error_message(
400,
f"dataset {system.dataset.dataset_name} cannot be used for "
f"{metadata.task} tasks",
if metadata.dataset_metadata_id:
if not metadata.dataset_split:
abort_with_error_message(
400, "dataset split is required if a dataset is chosen"
)
if custom_dataset:
abort_with_error_message(
400,
"both datalab dataset and custom dataset are "
"provided. please only select one.",
)
dataset = DatasetDBUtils.find_dataset_by_id(
metadata.dataset_metadata_id
)
if dataset:
if metadata.dataset_split not in dataset.split:
abort_with_error_message(
400,
f"{metadata.dataset_split} is not a valid split "
f"for {dataset.dataset_name}",
)
if metadata.task not in dataset.tasks:
abort_with_error_message(
400,
f"dataset {dataset.dataset_name} cannot be used for "
f"{metadata.task} tasks",
)
system["dataset"] = {
"dataset_id": dataset.dataset_id,
"split": metadata.dataset_split,
"dataset_name": dataset.dataset_name,
"sub_dataset": dataset.sub_dataset,
}
else:
abort_with_error_message(
400, f"dataset: {metadata.dataset_metadata_id} cannot be found"
)
system.update(metadata.to_dict())
# -- parse the system details if getting a string from the frontend
SystemDBUtils._parse_system_details_in_doc(system, metadata)
return SystemDBUtils.system_from_dict(system)

system = _validate_and_create_system()

try:
# -- find the dataset and grab custom features if they exist
Expand Down Expand Up @@ -428,10 +442,6 @@ def db_operations(session: ClientSession) -> str:
document = general_to_dict(system)
document.pop("system_id")
document.pop("preferred_username")
document["dataset_metadata_id"] = (
system.dataset.dataset_id if system.dataset else None
)
document.pop("dataset")
system_id = DBUtils.insert_one(
DBUtils.DEV_SYSTEM_METADATA, document, session=session
)
Expand Down
45 changes: 18 additions & 27 deletions backend/src/impl/default_controllers_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,30 +297,21 @@ def systems_post(body: SystemCreateProps) -> System:
aborts with error if fails
TODO: error handling
"""
if body.metadata.dataset_metadata_id:
if not body.metadata.dataset_split:
abort_with_error_message(
400, "dataset split is required if a dataset is chosen"
)
if body.custom_dataset:
abort_with_error_message(
400,
"both datalab dataset and custom dataset are "
"provided. please only select one.",
)

try:
body.system_output.data = decode_base64(body.system_output.data)
if body.custom_dataset and body.custom_dataset.data:
if body.custom_dataset:
if not body.custom_dataset.data:
abort_with_error_message(400, "custom_dataset.data cannot be empty")
body.custom_dataset.data = decode_base64(body.custom_dataset.data)
system = SystemDBUtils.create_system(
body.metadata, body.system_output, body.custom_dataset
)
return system
except binascii.Error as e:
abort_with_error_message(
400, f"file should be sent in plain text base64. ({e})"
)
else:
system = SystemDBUtils.create_system(
body.metadata, body.system_output, body.custom_dataset
)
return system


def systems_update_by_id(body: SystemUpdateProps, system_id: str):
Expand All @@ -343,15 +334,15 @@ def system_outputs_get_by_id(
system = SystemDBUtils.find_system_by_id(system_id)
if not _has_read_access(system):
abort_with_error_message(403, "system access denied", 40302)
if is_private_dataset(
if system.dataset and is_private_dataset(
DatalabLoaderOption(
system.system_info.dataset_name,
system.system_info.sub_dataset_name,
system.system_info.dataset_split,
system.dataset.dataset_name,
system.dataset.sub_dataset,
system.dataset.split,
)
):
abort_with_error_message(
403, f"{system.system_info.dataset_name} is a private dataset", 40301
403, f"{system.dataset.dataset_name} is a private dataset", 40301
)

return SystemDBUtils.find_system_outputs(system_id, output_ids)
Expand All @@ -368,15 +359,15 @@ def system_cases_get_by_id(
system = SystemDBUtils.find_system_by_id(system_id)
if not _has_read_access(system):
abort_with_error_message(403, "system access denied", 40302)
if is_private_dataset(
if system.dataset and is_private_dataset(
DatalabLoaderOption(
system.system_info.dataset_name,
system.system_info.sub_dataset_name,
system.system_info.dataset_split,
system.dataset.dataset_name,
system.dataset.sub_dataset,
system.dataset.split,
)
):
abort_with_error_message(
403, f"{system.system_info.dataset_name} is a private dataset", 40301
403, f"{system.dataset.dataset_name} is a private dataset", 40301
)

return SystemDBUtils.find_analysis_cases(system_id, level=level, case_ids=case_ids)
Expand Down
3 changes: 1 addition & 2 deletions frontend/src/components/SystemsTable/SystemTableContent.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,11 @@ export function SystemTableContent({
),
},
{
dataIndex: ["system_info", "split"],
dataIndex: ["dataset", "split"],
width: 110,
title: "Dataset Split",
fixed: "left",
align: "center",
render: (_, record) => record.system_info.dataset_split || "unspecified",
},
{
dataIndex: "source_language",
Expand Down
9 changes: 4 additions & 5 deletions openapi/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -991,19 +991,18 @@ components:
dataset:
description: if not specified, it is undefined
type: object
nullable: true
properties:
dataset_id:
type: string
split:
type: string
dataset_name:
type: string
tasks:
type: array
items:
type: string
sub_dataset:
type: string
nullable: true
required: [dataset_id, dataset_name, tasks]
required: [dataset_id, dataset_name, split]
system_details:
type: object
description: |
Expand Down

0 comments on commit 61c3d3d

Please sign in to comment.