Skip to content

Commit

Permalink
transformations: add wordSubstituteSet (#80)
Browse files Browse the repository at this point in the history
  • Loading branch information
abhidg authored Aug 7, 2023
1 parent 77e69c9 commit 30884c2
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 0 deletions.
24 changes: 24 additions & 0 deletions adtl/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,30 @@ def textIfNotNull(field, return_val):
return return_val if field not in [None, ""] else None


def wordSubstituteSet(value, params):
"""
For a value that can have multiple words, use substitutions from params.
params is a list of 2-tuples, in the form
[(w1, s1), (w2, s2), ... (w_n, s_n)]
where w1 is replaced by s1, w2 is replaced by s2.
Word matches are regular expressions, delimited by the `\b` word boundary
delimiter so can have arbitrary regular expressions to match. Any match of
regex w_n will use substitute s_n. Case is ignored when matching.
"""
out = []
for i in params:
if not isinstance(i, (tuple, list)):
raise ValueError("wordSubstituteSet: params item not a tuple or list")
sub_map = dict(params)
for match, subst in sub_map.items():
if re.search(r"\b" + match + r"\b", value, re.IGNORECASE):
out.append(subst)
return sorted(set(out)) if out else None


def getFloat(value, set_decimal=None, separator=None):
"""
In cases where the decimal seperators is not a . you can
Expand Down
33 changes: 33 additions & 0 deletions tests/test_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,39 @@ def test_isNotNull(test_input, expected):
assert transform.isNotNull(test_input) == expected


@pytest.mark.parametrize(
"test_input,expected",
[
(
(
"Metilprednisolona - Dexametasona - Fluticasona",
[
("Metilprednisolona", "Methylprednisolone"),
("Fluticasona", "Fluticasone"),
("Dexametasona", "Dexamethasone"),
],
),
["Dexamethasone", "Fluticasone", "Methylprednisolone"],
),
(
(
"Hidrocortisona - Fluticasona",
[("Hidrocortisona", "Hydrocortisone"), ("Fluticasona", "Fluticasone")],
),
["Fluticasone", "Hydrocortisone"],
),
((("Hidrocortisona - Fluticasona"), [("Cortisona", "Cortisone")]), None),
],
)
def test_wordSubstituteSet(test_input, expected):
assert transform.wordSubstituteSet(*test_input) == expected


def test_wordSubstituteSet_error():
with pytest.raises(ValueError):
transform.wordSubstituteSet("value", [20, 30])


@pytest.mark.parametrize(
"test_date_birth, test_date_current, expected",
[
Expand Down

0 comments on commit 30884c2

Please sign in to comment.