Skip to content

Commit 3219e81

Browse files
Merge pull request #2221 from laws-africa/subtype-fix
fix text matching when xpath not available
2 parents 2b3e6fa + 85b9901 commit 3219e81

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

indigo/analysis/refs/base.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,20 +146,21 @@ def setup_subtypes(self):
146146

147147
# sort, longest first
148148
subtypes = sorted(subtype_names + subtype_abbreviations, key=len, reverse=True)
149-
self.subtypes_string = '|'.join(re.escape(s) for s in subtypes)
150149

151-
# build the xpath; if there are no subtypes, use "false" to not match anything
152-
xpath_contains = " or ".join([
153-
f"contains(translate(., '{subtype.upper()}', '{subtype.lower()}'), '{subtype.lower()}')"
154-
for subtype in subtypes
155-
]) or "false"
156-
self.candidate_xpath = self.candidate_xpath.replace('PATTERNS', xpath_contains)
150+
if self.candidate_xpath:
151+
# build the xpath; if there are no subtypes, use "false" to not match anything
152+
xpath_contains = " or ".join([
153+
f"contains(translate(., '{subtype.upper()}', '{subtype.lower()}'), '{subtype.lower()}')"
154+
for subtype in subtypes
155+
]) or "false"
156+
self.candidate_xpath = self.candidate_xpath.replace('PATTERNS', xpath_contains)
157157

158158
# TODO: disregard e.g. "6 May" in "GN 34 of 6 May 2020", but catch reference
159+
subtypes_string = '|'.join(re.escape(s) for s in subtypes)
159160
self.pattern_re = re.compile(
160161
fr'''
161162
(?P<ref>
162-
(?P<subtype>{self.subtypes_string})\s*
163+
(?P<subtype>{subtypes_string})\s*
163164
(No\.?\s*)?
164165
(?P<num>[a-z0-9-]+)
165166
(\s+of\s+|/)

0 commit comments

Comments
 (0)