From 7ce41769807e032935fe91cbca8355eedab91304 Mon Sep 17 00:00:00 2001 From: Maks Verver Date: Thu, 29 Aug 2024 23:30:10 +0200 Subject: [PATCH] Set bozo bit on non-fatal errors Previously these errors were ignored, since an exception is raised only on fatal errors. With this change, when a non-fatal error occurs, the bozo bit is still set, but the feed is not reparsed with the loose parser. Background and discussion here: https://github.com/lemon24/reader/issues/350 --- feedparser/api.py | 6 +++++- tests/illformed/always_strip_doctype.xml | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/feedparser/api.py b/feedparser/api.py index 1c9cf15da..04476a3e1 100644 --- a/feedparser/api.py +++ b/feedparser/api.py @@ -327,8 +327,12 @@ def _parse_file_inplace( saxparser.parse(source) except xml.sax.SAXException as e: result["bozo"] = 1 - result["bozo_exception"] = feed_parser.exc or e + result["bozo_exception"] = e use_strict_parser = False + else: + if feed_parser.bozo: + result["bozo"] = 1 + result["bozo_exception"] = feed_parser.exc # The loose XML parser will be tried if the strict XML parser was not used # (or if it failed to parse the feed). diff --git a/tests/illformed/always_strip_doctype.xml b/tests/illformed/always_strip_doctype.xml index eb73620ca..59b4f29e1 100644 --- a/tests/illformed/always_strip_doctype.xml +++ b/tests/illformed/always_strip_doctype.xml @@ -1,7 +1,11 @@