@@ -146,20 +146,21 @@ def setup_subtypes(self):
146
146
147
147
# sort, longest first
148
148
subtypes = sorted (subtype_names + subtype_abbreviations , key = len , reverse = True )
149
- self .subtypes_string = '|' .join (re .escape (s ) for s in subtypes )
150
149
151
- # build the xpath; if there are no subtypes, use "false" to not match anything
152
- xpath_contains = " or " .join ([
153
- f"contains(translate(., '{ subtype .upper ()} ', '{ subtype .lower ()} '), '{ subtype .lower ()} ')"
154
- for subtype in subtypes
155
- ]) or "false"
156
- self .candidate_xpath = self .candidate_xpath .replace ('PATTERNS' , xpath_contains )
150
+ if self .candidate_xpath :
151
+ # build the xpath; if there are no subtypes, use "false" to not match anything
152
+ xpath_contains = " or " .join ([
153
+ f"contains(translate(., '{ subtype .upper ()} ', '{ subtype .lower ()} '), '{ subtype .lower ()} ')"
154
+ for subtype in subtypes
155
+ ]) or "false"
156
+ self .candidate_xpath = self .candidate_xpath .replace ('PATTERNS' , xpath_contains )
157
157
158
158
# TODO: disregard e.g. "6 May" in "GN 34 of 6 May 2020", but catch reference
159
+ subtypes_string = '|' .join (re .escape (s ) for s in subtypes )
159
160
self .pattern_re = re .compile (
160
161
fr'''
161
162
(?P<ref>
162
- (?P<subtype>{ self . subtypes_string } )\s*
163
+ (?P<subtype>{ subtypes_string } )\s*
163
164
(No\.?\s*)?
164
165
(?P<num>[a-z0-9-]+)
165
166
(\s+of\s+|/)
0 commit comments