diff --git a/canonicalwebteam/discourse/parsers/base_parser.py b/canonicalwebteam/discourse/parsers/base_parser.py index fbc538b..570a5fa 100644 --- a/canonicalwebteam/discourse/parsers/base_parser.py +++ b/canonicalwebteam/discourse/parsers/base_parser.py @@ -35,7 +35,6 @@ def __init__(self, error): super().__init__(error) flask.current_app.extensions["sentry"].captureMessage(error) - pass class BaseParser: @@ -214,7 +213,7 @@ def _parse_redirect_map(self, index_soup): if not ( location.startswith(self.url_prefix) - or validators.url(location, public=True) + or validators.url(location) ): warnings.append( f"Redirect map location {location} is invalid" @@ -479,9 +478,12 @@ def _get_section(self, soup, title_text):
Content
""" - heading = soup.find(HEADER_REGEX, string=title_text) - - if not heading: + for heading in soup(HEADER_REGEX): + if heading.string is None and heading.a.next == title_text: + break + elif heading.string == title_text: + break + else: return None heading_tag = heading.name @@ -501,9 +503,12 @@ def _get_preamble(self, soup, break_on_title): the heading defined in `break_on_title`, and return it as a BeautifulSoup object """ - heading = soup.find(HEADER_REGEX, string=break_on_title) - - if not heading: + for heading in soup(HEADER_REGEX): + if heading.string is None and heading.a.next == break_on_title: + break + elif heading.string == break_on_title: + break + else: return soup # get all the previous contents, reversing order on insert preamble_soup = BeautifulSoup() diff --git a/setup.py b/setup.py index 4183094..1f27fe3 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setup( name="canonicalwebteam.discourse", - version="5.4.2", + version="5.4.3", author="Canonical webteam", author_email="webteam@canonical.com", url="https://github.com/canonical/canonicalwebteam.discourse",