Skip to content

Commit

Permalink
update schemas, clean to_numeric, fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
patrick-troy committed Nov 13, 2023
1 parent d1a0d5a commit d2b5bd2
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 29 deletions.
12 changes: 9 additions & 3 deletions liiatools/annex_a_pipeline/spec/Annex_A_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,9 @@ column_map:
canbeblank: true
Age of Child (Years):
&age
numeric: "integer"
numeric:
type: "integer"
min_value: 0
header_regex:
- /.*age.*/i
canbeblank: true
Expand Down Expand Up @@ -345,7 +347,9 @@ column_map:
canbeblank: false
Number of Referrals in Last 12 Months:
&integer-not-blank
numeric: "integer"
numeric:
type: "integer"
min_value: 0
canbeblank: false
Allocated Team: *not-blank
Allocated Worker: *not-blank
Expand Down Expand Up @@ -1434,7 +1438,9 @@ column_map:
Date adopter matched with child(ren): *date-blank
Date child/children placed with adopter(s): *date-blank
No. of children placed:
numeric: "integer"
numeric:
type: "integer"
min_value: 0
canbeblank: true
Date of Adoption Order: *date-blank
Date of leaving adoption process: *date-blank
Expand Down
4 changes: 2 additions & 2 deletions liiatools/common/spec/__data_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ class Numeric(BaseModel):
"""
model_config = ConfigDict(extra="forbid")

type: str
type: Literal["integer", "float"]
min_value: int = None
max_value: int = None
decimal_places: int = 0
decimal_places: int = None

def __init__(self, **data):
super().__init__(**data)
Expand Down
35 changes: 25 additions & 10 deletions liiatools/datasets/shared_functions/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _match_postcode(value):
def to_postcode(value):
"""
Checks that the postcodes are in the right format
:param postcode: A string with a UK-style post code
:param value: A string with a UK-style post code
:return: the correctly formatted postcode
:raises: ValueError if the postcode is not in the right format
"""
Expand All @@ -61,21 +61,29 @@ def to_short_postcode(value):
"""
Remove whitespace from the beginning and end of postcodes and the last two digits for anonymity
return blank if not in the right format
:param postcode: A string with a UK-style post code
:param value: A string with a UK-style post code
:return: a shortened post code with the area, district, and sector. The units is removed
"""
match = _match_postcode(value)
return f"{match.group(1)} {match.group(2)}"


def _check_range(value, min_value=None, max_value=None):
"""
Check if a given value is within the min_value and max_value.
:param value: Some value to convert to a number
:param min_value: Minimum value allowed
:param max_value: Maximum value allowed
:return: Value if it is within range or ValueError if not
"""
if not (min_value is None or value >= min_value) and (max_value is None or value <= max_value):
raise ValueError(f"Value: {value} not in acceptable range: {min_value}-{max_value}")
raise ValueError
return value


@allow_blank
def to_numeric(value, _type, min_value=None, max_value=None, decimal_places=0):
def to_numeric(value, _type, min_value=None, max_value=None, decimal_places=None):
"""
Convert any strings that should be numeric values based on the config into numeric values.
Expand All @@ -88,20 +96,26 @@ def to_numeric(value, _type, min_value=None, max_value=None, decimal_places=0):
"""
try:
value = float(value)
_check_range(value, min_value, max_value)
if _type == "float":
return round(value, decimal_places)
value = round(value, decimal_places) if decimal_places else value
elif _type == "integer":
return int(value)
value = int(value)
except Exception as e:
raise ValueError(f"Invalid numeric: {value}") from e
raise ValueError(f"Invalid number: {value}") from e

try:
_check_range(value, min_value, max_value)
except Exception as e:
raise ValueError(f"Value: {value} not in acceptable range: {min_value}-{max_value}") from e

return value


@allow_blank
def to_date(value, dateformat="%d/%m/%Y"):
"""
Convert a string to a date based on the dateformat %d/%m/%Y and convert a datetime to a date
:param datevalue: A value to test and make sure it's a datetime object
:param value: A value to test and make sure it's a datetime object
:param dateformat: A format for the date to be read correctly, default to %d/%m/%Y
:return: Either the specified date, converted to a datetime, or an empty string
"""
Expand All @@ -128,7 +142,8 @@ def to_month_only_dob(*args, **kwargs):
def to_nth_of_month(value: date, n: int = 1):
"""
Converts dates to the nth day of the month. n defaults to first of the month
:param dob: A date datetime object
:param value: A date datetime object
:param n: Number of the day of the month to convert to
:return: A date of birth datetime object with the month rounded to the nth day
"""
try:
Expand Down
8 changes: 6 additions & 2 deletions liiatools/ssda903_pipeline/spec/SSDA903_schema_2017.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ column_map:
dictionary: {"False" : "0", "True" : "1", False: "0", True : "1", "FALSE" : "0", "TRUE" : "1"}
canbeblank: no
NB_ADOPTR:
numeric: "integer"
numeric:
type: "integer"
canbeblank: no
SEX_ADOPTR:
category:
Expand Down Expand Up @@ -288,7 +289,10 @@ column_map:
date: "%d/%m/%Y"
canbeblank: no
SDQ_SCORE:
numeric: "integer"
numeric:
type: "integer"
min_value: 0
max_value: 40
canbeblank: yes
SDQ_REASON:
category:
Expand Down
2 changes: 1 addition & 1 deletion tests/annex_a/test_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_end_to_end(liiatools_dir, build_dir):
"-c",
"BAD",
"--input",
str(liiatools_dir / "spec/annex_a/samples/Annex_A.xlsx"),
incoming_dir.as_posix(),
"--output",
pipeline_dir.as_posix(),
],
Expand Down
28 changes: 19 additions & 9 deletions tests/common/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from liiatools.datasets.shared_functions.converters import (
allow_blank,
to_date,
to_integer,
to_numeric,
to_month_only_dob,
to_short_postcode,
)
Expand Down Expand Up @@ -85,16 +85,26 @@ def test_to_date():
assert to_date("15/03/2017") == datetime.datetime(2017, 3, 15).date()


def test_to_integer():
assert to_integer("3000") == 3000
assert to_integer(123) == 123
assert to_integer("") == ""
assert to_integer(None) == ""
assert to_integer("1.0") == 1
assert to_integer(0) == 0
def test_to_numeric():
assert to_numeric("3000", "integer") == 3000
assert to_numeric(123, "integer") == 123
assert to_numeric("", "integer") == ""
assert to_numeric(None, "integer") == ""
assert to_numeric("1.0", "integer") == 1
assert to_numeric(0, "integer") == 0

assert to_numeric(1.23, "float") == 1.23
assert to_numeric("1.23", "float") == 1.23
assert to_numeric("", "float") == ""
assert to_numeric(None, "float") == ""
assert to_numeric(0.5, "float", min_value=0, max_value=1) == 0.5
assert to_numeric(0.2, "float", min_value=0) == 0.2
assert to_numeric(0.1234, "float", decimal_places=3) == 0.123

with pytest.raises(ValueError):
to_integer("date")
to_numeric("date", "integer")
to_numeric(1.5, "float", min_value=0, max_value=1)
to_numeric(1.5, "float", min_value=2)


def test_check_year():
Expand Down
4 changes: 2 additions & 2 deletions tests/common/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def test_collect_row():
spec_int = Column(numeric="integer")
spec_int = Column(numeric={"type": "integer"})
spec_str = Column(string="alphanumeric")
stream = [
events.StartContainer(),
Expand Down Expand Up @@ -268,7 +268,7 @@ def test_clean_categories():


def test_clean_integers():
integer_spec = Column(numeric="integer")
integer_spec = Column(numeric={"type": "integer"})
event = events.Cell(cell=123, column_spec=integer_spec)
cleaned_event = list(stream_filters.conform_cell_types(event))[0]
assert cleaned_event.cell == 123
Expand Down

0 comments on commit d2b5bd2

Please sign in to comment.