diff --git a/annotator/utils.py b/annotator/utils.py index 6a3e0a7..a256a91 100644 --- a/annotator/utils.py +++ b/annotator/utils.py @@ -348,9 +348,10 @@ def substituteColumnValues(referenceList, mod): return referenceList -def colFromRegex(referenceList, regex): +def colFromRegex(referenceList, regex, silent=False): """ Return a list created by mapping a regular expression to another list. - The regular expression must contain at least one capture group. + If a capture group is included, contents matching the capture group will + be used to derive a new value. Otherwise the entire expression is used. Parameters ---------- @@ -364,12 +365,19 @@ def colFromRegex(referenceList, regex): The list resulting from mapping `regex` to `referenceList`. """ p = re.compile(regex) - if not p.groups: - raise RuntimeError("`regex` must have at least one capture group.") newCol = [] + if not all([isinstance(i, str) for i in referenceList]): + print("Warning: Attempting to convert values to str.") + print("May cause unexpected results.") for s in referenceList: - m = p.search(s) if isinstance(s, str) else None - if not m and isinstance(s, str): - print("{} does not match regex.".format(s)) - newCol.append(m.group(1)) if m else newCol.append(None) + m = p.search(str(s)) + if not m: + if not silent: + print("{} does not match regex.".format(str(s))) + newCol.append(None) + else: + if p.groups: + newCol.append(m.group(1)) + else: + newCol.append(m.group()) return newCol diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index e958896..2f33701 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -218,3 +218,12 @@ def test_colFromRegex(self, values): # gives a list of the first vowel of each word result = annotator.utils.colFromRegex(values, r"([aeiou])") assert result == ['u', 'u', 'e', 'e', 'o'] + + def test_colFromRegex_non_str(self): + values = [[1,2,3], {4,5,6}] + result = annotator.utils.colFromRegex(values, r"(\d)") + assert result == ["1", "4"] + + def test_colFromRegex_no_capture_group(self, values): + result = annotator.utils.colFromRegex(values, r"\we\w") + assert result == [None, None, 'red', 'ree', None]