diff --git a/changelog.d/20240422_124749_bbolli_jsonfeed_content.rst b/changelog.d/20240422_124749_bbolli_jsonfeed_content.rst new file mode 100644 index 000000000..eb9e011bb --- /dev/null +++ b/changelog.d/20240422_124749_bbolli_jsonfeed_content.rst @@ -0,0 +1,5 @@ +Fixed +----- + +* JSONfeed item content is now a list that prefers HTML content. +* A JSONfeed item without content sets the bozo flag. diff --git a/feedparser/parsers/json.py b/feedparser/parsers/json.py index 36f714a5c..f90f29ab6 100644 --- a/feedparser/parsers/json.py +++ b/feedparser/parsers/json.py @@ -85,16 +85,23 @@ def parse_entry(self, e): if src in e: entry[dst] = e[src] - if "content_text" in e: - entry["content"] = c = FeedParserDict() - c["value"] = e["content_text"] - c["type"] = "text" - elif "content_html" in e: - entry["content"] = c = FeedParserDict() + entry["content"] = content = [] + if "content_html" in e: + c = FeedParserDict() c["value"] = sanitize_html( e["content_html"], self.encoding, "application/json" ) c["type"] = "html" + content.append(c) + if "content_text" in e: + c = FeedParserDict() + c["value"] = e["content_text"] + c["type"] = "text" + content.append(c) + if not content: + raise ValueError( + f"item {entry['id']=} has neither 'content_text' nor 'content_html'" + ) if "date_published" in e: entry["published"] = e["date_published"] diff --git a/tests/json/html_first.json b/tests/json/html_first.json new file mode 100644 index 000000000..b7339cd37 --- /dev/null +++ b/tests/json/html_first.json @@ -0,0 +1,18 @@ +{ + "__TEST__": "Description: basic JSON tests Expect: not bozo and items[0].content[0].type == 'html' -->", + "version": "https://jsonfeed.org/version/1", + "title": "html_preferred", + "home_page_url": "https://example.org/", + "feed_url": "https://example.org/feed.json", + "icon": "https://example.org/feed.png", + "author": { "name": "me" }, + "items": [ + { + "id": "1", + "author": { "name": "you", "url": "http://example.net/~you" }, + "content_text": "Hello, world!\n", + "content_html": "
Hello, world!
\n\n", + "url": "https://example.org/initial-post" + } + ] +} diff --git a/tests/json/no_content.json b/tests/json/no_content.json new file mode 100644 index 000000000..296a84ac6 --- /dev/null +++ b/tests/json/no_content.json @@ -0,0 +1,17 @@ +{ + "__TEST__": "Description: basic JSON tests Expect: bozo and 'neither' in str(bozo_exception) -->", + "version": "https://jsonfeed.org/version/1", + "title": "no content", + "home_page_url": "https://example.org/", + "feed_url": "https://example.org/feed.json", + "icon": "https://example.org/feed.png", + "author": { "name": "me" }, + "items": [ + { + "id": "1", + "author": { "name": "you", "url": "http://example.net/~you" }, + "summary": "Hello, world!\n", + "url": "https://example.org/initial-post" + } + ] +}