Skip to content

Commit

Permalink
Allow null values in PFBs (#2462)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Aug 3, 2023
1 parent 49114ed commit 72fe0f5
Show file tree
Hide file tree
Showing 4 changed files with 389 additions and 280 deletions.
47 changes: 9 additions & 38 deletions src/azul/service/avro_pfb.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@
value_and_unit,
)
from azul.types import (
AnyJSON,
AnyMutableJSON,
JSON,
MutableJSON,
)
Expand Down Expand Up @@ -182,7 +180,6 @@ def from_json(cls,
entities by comparing their IDs.
"""
cls._add_missing_fields(name, object_, schema)
object_ = cls._replace_null_with_empty_string(object_)
ids = object_['document_id']
# document_id is an array unless the inner entity type matches the
# outer entity type
Expand All @@ -195,9 +192,7 @@ def from_json(cls,
def _add_missing_fields(cls, name: str, object_: MutableJSON, schema):
"""
Compare entities against the schema and add any fields that are missing.
None is the default value, but because of https://github.com/DataBiosphere/azul/issues/2370
this isn't currently reflected in the schema.
None is the default value.
"""
if schema['type'] == 'record':
object_schema = one(f for f in schema['fields'] if f['name'] == 'object')
Expand All @@ -210,18 +205,14 @@ def _add_missing_fields(cls, name: str, object_: MutableJSON, schema):
field_name, field_type = field['name'], field['type']
if field_name not in object_:
if isinstance(field_type, list):
# FIXME: Change 'string' to 'null'
# https://github.com/DataBiosphere/azul/issues/2462
assert 'string' in field_type or 'null' in field_type, field
assert 'null' in field_type, field
default_value = None
elif field_type['type'] == 'array':
if isinstance(field_type['items'], dict):
assert field_type['items']['type'] in ('record', 'array'), field
default_value = []
else:
# FIXME: Change 'string' to 'null'
# https://github.com/DataBiosphere/azul/issues/2462
assert 'string' in field_type['items'], field
assert 'null' in field_type['items'], field
default_value = [None]
else:
assert False, field
Expand All @@ -237,24 +228,6 @@ def _add_missing_fields(cls, name: str, object_: MutableJSON, schema):
object_=sub_object,
schema=field)

@classmethod
def _replace_null_with_empty_string(cls, object_json: AnyJSON) -> AnyMutableJSON:
# FIXME: remove with https://github.com/DataBiosphere/azul/issues/2462
if object_json is None:
return ''
elif isinstance(object_json, dict):
return {
k: cls._replace_null_with_empty_string(v)
for k, v in object_json.items()
}
elif isinstance(object_json, list):
return [
cls._replace_null_with_empty_string(item)
for item in object_json
]
else:
return object_json

def to_json(self, relations: Iterable['PFBRelation']) -> JSON:
return {
'id': self.id,
Expand Down Expand Up @@ -502,11 +475,11 @@ def _inject_reference_handover_values(entity: MutableJSON, doc: JSON):
# https://github.com/DataBiosphere/azul/issues/4094

_nullable_to_pfb_types = {
null_bool: ['string', 'boolean'],
null_float: ['string', 'double'],
null_int: ['string', 'long'],
null_str: ['string'],
null_datetime: ['string'],
null_bool: ['null', 'boolean'],
null_float: ['null', 'double'],
null_int: ['null', 'long'],
null_str: ['null', 'string'],
null_datetime: ['null', 'string'],
}


Expand Down Expand Up @@ -606,9 +579,7 @@ def _entity_schema_recursive(field_types: FieldTypes,
'namespace': namespace,
'type': 'array',
'items': [
# FIXME: Change 'string' to 'null'
# https://github.com/DataBiosphere/azul/issues/2462
'string',
'null',
{
# FIXME: Why do we need to repeat `name` and `namespace`
# with the same values at three different depths?
Expand Down
Loading

0 comments on commit 72fe0f5

Please sign in to comment.