Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

db_schema: system_info.{dataset stuff} -> {dataset stuff} #439

Merged
merged 4 commits into from
Oct 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 55 additions & 45 deletions backend/src/impl/db_utils/system_db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,6 @@ def system_from_dict(
document["system_id"] = str(document.pop("_id"))
if document.get("is_private") is None:
document["is_private"] = True
if document.get("dataset_metadata_id") and document.get("dataset") is None:
dataset = DatasetDBUtils.find_dataset_by_id(document["dataset_metadata_id"])
if dataset:
split = document.get("dataset_split")
# this check only valid for create
if split and split not in dataset.split:
abort_with_error_message(
400, f"{split} is not a valid split for {dataset.dataset_name}"
)
document["dataset"] = dataset.to_dict()
document.pop("dataset_metadata_id")

# Parse the shared users
shared_users = document.get("shared_users", None)
Expand Down Expand Up @@ -216,15 +205,15 @@ def find_systems(
if task:
search_conditions.append({"task": task})
if dataset_name:
search_conditions.append({"system_info.dataset_name": dataset_name})
search_conditions.append({"dataset.dataset_name": dataset_name})
if subdataset_name:
search_conditions.append({"system_info.sub_dataset_name": subdataset_name})
search_conditions.append({"dataset.sub_dataset": subdataset_name})
if source_language:
search_conditions.append({"source_language": source_language})
if target_language:
search_conditions.append({"target_language": target_language})
if split:
search_conditions.append({"system_info.dataset_split": split})
search_conditions.append({"dataset.split": split})
if creator:
search_conditions.append({"creator": creator})
if shared_users:
Expand All @@ -233,9 +222,9 @@ def find_systems(
if dataset_list:
dataset_dicts = [
{
"system_info.dataset_name": ds[0],
"system_info.sub_dataset_name": ds[1],
"system_info.dataset_split": ds[2],
"dataset.dataset_name": ds[0],
"dataset.sub_dataset": ds[1],
"dataset.split": ds[2],
}
for ds in dataset_list
]
Expand Down Expand Up @@ -274,7 +263,7 @@ def _load_sys_output(
dataset_file_type=FileType(custom_dataset.file_type),
output_file_type=FileType(system_output.file_type),
).load()
else:
elif system.dataset:
return (
get_loader_class(task=metadata.task)
.from_datalab(
Expand All @@ -290,6 +279,7 @@ def _load_sys_output(
)
.load()
)
raise ValueError("neither dataset or custom_dataset is available")

@staticmethod
def _process(
Expand Down Expand Up @@ -358,33 +348,57 @@ def create_system(
"""
Create a system given metadata and outputs, etc.
"""
document = metadata.to_dict()

# -- parse the system details if getting a string from the frontend
SystemDBUtils._parse_system_details_in_doc(document, metadata)
def _validate_and_create_system():
system = {}

# -- set the creator
user = get_user()
document["creator"] = user.id

# -- set the preferred_username to conform with the return schema
document["preferred_username"] = user.preferred_username

# -- create the object defined in openapi.yaml from only uploaded metadata
system: System = SystemDBUtils.system_from_dict(document)
user = get_user()
system["creator"] = user.id
system["preferred_username"] = user.preferred_username

# -- validate the dataset metadata
if metadata.dataset_metadata_id:
if not system.dataset:
abort_with_error_message(
400, f"dataset: {metadata.dataset_metadata_id} does not exist"
)
if metadata.task not in system.dataset.tasks:
abort_with_error_message(
400,
f"dataset {system.dataset.dataset_name} cannot be used for "
f"{metadata.task} tasks",
if metadata.dataset_metadata_id:
if not metadata.dataset_split:
abort_with_error_message(
400, "dataset split is required if a dataset is chosen"
)
if custom_dataset:
abort_with_error_message(
400,
"both datalab dataset and custom dataset are "
"provided. please only select one.",
)
dataset = DatasetDBUtils.find_dataset_by_id(
metadata.dataset_metadata_id
)
if dataset:
if metadata.dataset_split not in dataset.split:
abort_with_error_message(
400,
f"{metadata.dataset_split} is not a valid split "
f"for {dataset.dataset_name}",
)
if metadata.task not in dataset.tasks:
abort_with_error_message(
400,
f"dataset {dataset.dataset_name} cannot be used for "
f"{metadata.task} tasks",
)
system["dataset"] = {
"dataset_id": dataset.dataset_id,
"split": metadata.dataset_split,
"dataset_name": dataset.dataset_name,
"sub_dataset": dataset.sub_dataset,
}
else:
abort_with_error_message(
400, f"dataset: {metadata.dataset_metadata_id} cannot be found"
)
system.update(metadata.to_dict())
# -- parse the system details if getting a string from the frontend
SystemDBUtils._parse_system_details_in_doc(system, metadata)
return SystemDBUtils.system_from_dict(system)

system = _validate_and_create_system()

try:
# -- find the dataset and grab custom features if they exist
Expand Down Expand Up @@ -428,10 +442,6 @@ def db_operations(session: ClientSession) -> str:
document = general_to_dict(system)
document.pop("system_id")
document.pop("preferred_username")
document["dataset_metadata_id"] = (
system.dataset.dataset_id if system.dataset else None
)
document.pop("dataset")
system_id = DBUtils.insert_one(
DBUtils.DEV_SYSTEM_METADATA, document, session=session
)
Expand Down
45 changes: 18 additions & 27 deletions backend/src/impl/default_controllers_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,30 +297,21 @@ def systems_post(body: SystemCreateProps) -> System:
aborts with error if fails
TODO: error handling
"""
if body.metadata.dataset_metadata_id:
if not body.metadata.dataset_split:
abort_with_error_message(
400, "dataset split is required if a dataset is chosen"
)
if body.custom_dataset:
abort_with_error_message(
400,
"both datalab dataset and custom dataset are "
"provided. please only select one.",
)

try:
body.system_output.data = decode_base64(body.system_output.data)
if body.custom_dataset and body.custom_dataset.data:
if body.custom_dataset:
if not body.custom_dataset.data:
abort_with_error_message(400, "custom_dataset.data cannot be empty")
body.custom_dataset.data = decode_base64(body.custom_dataset.data)
system = SystemDBUtils.create_system(
body.metadata, body.system_output, body.custom_dataset
)
return system
except binascii.Error as e:
abort_with_error_message(
400, f"file should be sent in plain text base64. ({e})"
)
else:
system = SystemDBUtils.create_system(
body.metadata, body.system_output, body.custom_dataset
)
return system


def systems_update_by_id(body: SystemUpdateProps, system_id: str):
Expand All @@ -343,15 +334,15 @@ def system_outputs_get_by_id(
system = SystemDBUtils.find_system_by_id(system_id)
if not _has_read_access(system):
abort_with_error_message(403, "system access denied", 40302)
if is_private_dataset(
if system.dataset and is_private_dataset(
DatalabLoaderOption(
system.system_info.dataset_name,
system.system_info.sub_dataset_name,
system.system_info.dataset_split,
system.dataset.dataset_name,
system.dataset.sub_dataset,
system.dataset.split,
)
):
abort_with_error_message(
403, f"{system.system_info.dataset_name} is a private dataset", 40301
403, f"{system.dataset.dataset_name} is a private dataset", 40301
)

return SystemDBUtils.find_system_outputs(system_id, output_ids)
Expand All @@ -368,15 +359,15 @@ def system_cases_get_by_id(
system = SystemDBUtils.find_system_by_id(system_id)
if not _has_read_access(system):
abort_with_error_message(403, "system access denied", 40302)
if is_private_dataset(
if system.dataset and is_private_dataset(
DatalabLoaderOption(
system.system_info.dataset_name,
system.system_info.sub_dataset_name,
system.system_info.dataset_split,
system.dataset.dataset_name,
system.dataset.sub_dataset,
system.dataset.split,
)
):
abort_with_error_message(
403, f"{system.system_info.dataset_name} is a private dataset", 40301
403, f"{system.dataset.dataset_name} is a private dataset", 40301
)

return SystemDBUtils.find_analysis_cases(system_id, level=level, case_ids=case_ids)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,11 @@ export function SystemTableContent({
),
},
{
dataIndex: ["system_info", "split"],
dataIndex: ["dataset", "split"],
width: 110,
title: "Dataset Split",
fixed: "left",
align: "center",
render: (_, record) => record.system_info.dataset_split || "unspecified",
},
{
dataIndex: "source_language",
Expand Down
9 changes: 4 additions & 5 deletions openapi/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -991,19 +991,18 @@ components:
dataset:
description: if not specified, it is undefined
type: object
nullable: true
properties:
dataset_id:
type: string
split:
type: string
dataset_name:
type: string
tasks:
type: array
items:
type: string
sub_dataset:
type: string
nullable: true
required: [dataset_id, dataset_name, tasks]
required: [dataset_id, dataset_name, split]
system_details:
type: object
description: |
Expand Down