Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/character-encoding.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ sets the ``bozo`` bit to ``1`` and sets ``bozo_exception`` to
``feedparser.CharacterEncodingUnknown``. In this case, parsed values will be
strings, not Unicode strings.

.. note::

When using multiprocessing, Python exceptions cannot always be safely
serialized between processes. In such cases, ``bozo_exception`` will
contain a string representation of the exception instead of the exception
object itself.


Handling Incorrectly-Declared Media Types
-----------------------------------------
Expand Down
8 changes: 8 additions & 0 deletions feedparser/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# POSSIBILITY OF SUCH DAMAGE.

import io
import multiprocessing
import urllib.error
import urllib.parse
import xml.sax
Expand Down Expand Up @@ -374,3 +375,10 @@ def _parse_file_inplace(
result["namespaces"] = {}
else:
result["namespaces"] = feed_parser.namespaces_in_use

if "bozo_exception" in result and _is_multiprocessing():
result["bozo_exception"] = str(result["bozo_exception"])


def _is_multiprocessing():
return multiprocessing.parent_process() is not None
72 changes: 72 additions & 0 deletions tests/test_multiprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from concurrent.futures import ProcessPoolExecutor

import pytest

import feedparser

base_feed_str = (
b"<?xml version='1.0' encoding='UTF-8'?>\n"
b'<rss version="2.0">\n'
b"<channel>\n"
b"<title>Foo</title>\n"
b"<link>https://foo.com/</link>\n"
b"<item>"
b"<title>Title 1</title>"
b"<link>https://foo.com/1</link>"
b"<pubDate>Thu, 05 Jun 2025 18:27:58 -0000</pubDate>"
b"</item>\n"
b"</channel>\n"
b"</rss>\n"
)


def _parse_and_return_full(raw_feed: bytes):
return feedparser.parse(raw_feed)


@pytest.mark.parametrize(
"feed, expected_title, expected_bozo_exception, expected_items",
[
(
base_feed_str,
"Foo",
None,
[
dict(
title="Title 1",
link="https://foo.com/1",
published="Thu, 05 Jun 2025 18:27:58 -0000",
)
],
),
(
b"\n" + base_feed_str,
"Foo",
"XML or text declaration not at start of entity",
[
dict(
title="Title 1",
link="https://foo.com/1",
published="Thu, 05 Jun 2025 18:27:58 -0000",
)
],
),
],
ids=["correct_feed", "leading_newline_feed"],
)
def test_multiprocessing_parse(
feed, expected_title, expected_bozo_exception, expected_items
):
with ProcessPoolExecutor(1) as pool:
future = pool.submit(_parse_and_return_full, feed)
result = future.result()

assert result["feed"]["title"] == expected_title
if expected_bozo_exception:
assert expected_bozo_exception in result.get("bozo_exception")
else:
assert result.get("bozo_exception") is None
for observed, expected in zip(result["entries"], expected_items, strict=True):
assert observed["published"] == expected["published"]
assert observed["link"] == expected["link"]
assert observed["title"] == expected["title"]