Skip to content

Commit

Permalink
Add docs, test
Browse files Browse the repository at this point in the history
  • Loading branch information
pipliggins committed Oct 3, 2024
1 parent fcbb63f commit c08289f
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 14 deletions.
13 changes: 7 additions & 6 deletions adtl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,19 +104,20 @@ def get_value_unhashed(row: StrDict, rule: Rule, ctx: Context = None) -> Any:
params.append(rule["apply"]["params"][i])

try:
warnings.simplefilter("error", AdtlTransformationWarning)
if params:
value = getattr(tf, transformation)(value, *params)
else:
value = getattr(tf, transformation)(value)
with warnings.catch_warnings():
warnings.simplefilter("error", category=AdtlTransformationWarning)
if params:
value = getattr(tf, transformation)(value, *params)
else:
value = getattr(tf, transformation)(value)
except AttributeError:
raise AttributeError(
f"Error using a data transformation: Function {transformation} "
"has not been defined."
)
except AdtlTransformationWarning as e:
if ctx and ctx.get("returnUnmatched"):
return e
return e.args[0]
else:
warnings.warn(str(e), AdtlTransformationWarning)
return None
Expand Down
40 changes: 32 additions & 8 deletions adtl/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ def wordSubstituteSet(value: str, *params) -> List[str]:
if re.search(r"\b" + match + r"\b", value, re.IGNORECASE):
out.append(subst)
if not out and (value not in [None, ""]):
warnings.warn(f"No matches found for {value}")
warnings.warn(
f"No matches found for: '{value}'",
AdtlTransformationWarning,
stacklevel=2,
)
return sorted(set(out)) if out else None


Expand Down Expand Up @@ -157,7 +161,11 @@ def yearsElapsed(
days = cd - bd
return pint.Quantity(days.days, "days").to("years").m
except ValueError:
warnings.warn(f"Failed calculation yearsElapsed: {birthdate}, {currentdate}")
warnings.warn(
f"Failed calculation yearsElapsed: {birthdate}, {currentdate}",
AdtlTransformationWarning,
stacklevel=2,
)


def durationDays(startdate: str, currentdate: str) -> int:
Expand Down Expand Up @@ -222,14 +230,18 @@ def makeDate(year: str, month: str, day: str) -> str:
year, month, day = int(year), int(month), int(day)
except ValueError:
warnings.warn(
f"Could not construct date from: year={year}, month={month}, day={day}"
f"Could not construct date from: year={year}, month={month}, day={day}",
AdtlTransformationWarning,
stacklevel=2,
)
return None
try:
return date(year, month, day).isoformat()
except ValueError:
warnings.warn(
f"Could not construct date from: year={year}, month={month}, day={day}"
f"Could not construct date from: year={year}, month={month}, day={day}",
AdtlTransformationWarning,
stacklevel=2,
)
return None

Expand Down Expand Up @@ -257,7 +269,9 @@ def makeDateTimeFromSeconds(
)
except ValueError:
warnings.warn(
f"Could not convert date {date!r} from date format {date_format!r}"
f"Could not convert date {date!r} from date format {date_format!r}",
AdtlTransformationWarning,
stacklevel=2,
)
return None
if time_seconds == "":
Expand Down Expand Up @@ -291,7 +305,9 @@ def makeDateTime(
)
except ValueError:
warnings.warn(
f"Could not convert date {date!r} from date format {date_format!r}"
f"Could not convert date {date!r} from date format {date_format!r}",
AdtlTransformationWarning,
stacklevel=2,
)
return None

Expand Down Expand Up @@ -326,7 +342,11 @@ def splitDate(
elif option == "day":
return sd.day
else:
warnings.warn(f"Invalid option {option!r} for splitDate")
warnings.warn(
f"Invalid option {option!r} for splitDate",
AdtlTransformationWarning,
stacklevel=2,
)


def startYear(
Expand Down Expand Up @@ -454,7 +474,11 @@ def correctOldDate(date: str, epoch: float, format: str, return_datetime: bool =
try:
cd = datetime.strptime(date, format)
except ValueError:
warnings.warn(f"Could not convert date {date!r} from date format {format!r}")
warnings.warn(
f"Could not convert date {date!r} from date format {format!r}",
AdtlTransformationWarning,
stacklevel=2,
)
return None

if cd.year >= epoch and "y" in format:
Expand Down
2 changes: 2 additions & 0 deletions docs/specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ if not present in a datafile, following the same syntax as `fieldPattern` key.
* **defaultDateFormat**: Default source date format, applied to all fields
with either "date_" / "_date" in the field name or that have format date
set in the JSON schema
* **returnUnmatched**: Returns all values that are not able to be converted
according to the provided rules and formats. For fields with [value mappings](#field-with-value-mapping), it is equivalent to using `ignoreMissingKeys`. Fields using [data transformation functions](#data-transformations-(apply)) will return a string describing the failed function and input values.

## Validation

Expand Down
7 changes: 7 additions & 0 deletions tests/__snapshots__/test_parser.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@

'''
# ---
# name: test_return_unmapped
'''
age,date_death,date_of_birth,diabetes_type,has_smoking,pregnancy_birth_weight_kg,subject_id
fifteen,"Could not construct date from: year=2023, month=11, day=80",2023-11-20,No matches found for: 'no diabetes',today,eight,1

'''
# ---
# name: test_show_report
'''

Expand Down
46 changes: 46 additions & 0 deletions tests/parsers/return-unmapped.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[adtl]
name = "test-return-unmapped"
description = "Example using returnUnmatched to return unmapped fields"
returnUnmatched = true

[adtl.tables.subject]
kind = "groupBy"
groupBy = "subject_id"
aggregation = "lastNotNull"

[subject]

[subject.subject_id]
field = "subjid"
description = "Subject ID"

[subject.date_of_birth]
field = "first_admit"
source_date = "%m"

[subject.age]
field = "age"
apply = {function = "getFloat"}

[subject.pregnancy_birth_weight_kg]
field = "weight"
unit = "kg"
source_unit = "lbs"

[subject.has_smoking]
field = "smoking"
values = { 1 = "current", 2 = "never", 3 = "former" }

[subject.diabetes_type]
field = "diabetes_type"

apply.function = "wordSubstituteSet"
apply.params = [
["type[\\s\\-]?1", "type-1"],
["type[\\s\\-]?2", "type-2"]
]

[subject.date_death]
field = "death_year"
apply.function = "makeDate"
apply.params = ["$death_month", "$death_day"]
2 changes: 2 additions & 0 deletions tests/sources/return-unmapped.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
subjid,redcap,first_admit,age,weight,smoking,diabetes_type,death_year,death_month,death_day
1,admit,2023-11-20,fifteen,eight,today,no diabetes,2023,11,80
9 changes: 9 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,3 +1360,12 @@ def test_no_overwriting():
.read_table("visit")
)
assert overwriting_output == OVERWRITE_OUTPUT


def test_return_unmapped(snapshot):
transformed_csv_data = (
parser.Parser(TEST_PARSERS_PATH / "return-unmapped.toml")
.parse(TEST_SOURCES_PATH / "return-unmapped.csv")
.write_csv("subject")
)
assert transformed_csv_data == snapshot

0 comments on commit c08289f

Please sign in to comment.