From 78b55b0681af8a32b046498f6492c92b22d37c0d Mon Sep 17 00:00:00 2001
From: Beat Bolli <dev@drbeat.li>
Date: Mon, 22 Apr 2024 11:46:15 +0200
Subject: [PATCH 1/4] JSONFeed: change item.content into a list

The spec allows to have both text and HTML elements, and the feedparser
content key supports different content types.

Adjust the code to look at both source elements and add all that are
present.

Signed-off-by: Beat Bolli <dev@drbeat.li>
---
 feedparser/parsers/json.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/feedparser/parsers/json.py b/feedparser/parsers/json.py
index 36f714a5..fb66b7e8 100644
--- a/feedparser/parsers/json.py
+++ b/feedparser/parsers/json.py
@@ -85,16 +85,21 @@ def parse_entry(self, e):
             if src in e:
                 entry[dst] = e[src]
 
+        content = []
         if "content_text" in e:
-            entry["content"] = c = FeedParserDict()
+            c = FeedParserDict()
             c["value"] = e["content_text"]
             c["type"] = "text"
-        elif "content_html" in e:
-            entry["content"] = c = FeedParserDict()
+            content.append(c)
+        if "content_html" in e:
+            c = FeedParserDict()
             c["value"] = sanitize_html(
                 e["content_html"], self.encoding, "application/json"
             )
             c["type"] = "html"
+            content.append(c)
+        if content:
+            entry["content"] = content
 
         if "date_published" in e:
             entry["published"] = e["date_published"]

From 97c648c2bac42ea11c8f2cbf6ddd78ca52ab975c Mon Sep 17 00:00:00 2001
From: Beat Bolli <dev@drbeat.li>
Date: Mon, 22 Apr 2024 12:24:10 +0200
Subject: [PATCH 2/4] JSONfeed: prefer HTML over text content

HTML has generally more useful information than text, so let's move it
first in the content list. Users that want text can still iterate over
the list and pick the text type.

Signed-off-by: Beat Bolli <dev@drbeat.li>
---
 feedparser/parsers/json.py | 10 +++++-----
 tests/json/html_first.json | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 5 deletions(-)
 create mode 100644 tests/json/html_first.json

diff --git a/feedparser/parsers/json.py b/feedparser/parsers/json.py
index fb66b7e8..ef6633e8 100644
--- a/feedparser/parsers/json.py
+++ b/feedparser/parsers/json.py
@@ -86,11 +86,6 @@ def parse_entry(self, e):
                 entry[dst] = e[src]
 
         content = []
-        if "content_text" in e:
-            c = FeedParserDict()
-            c["value"] = e["content_text"]
-            c["type"] = "text"
-            content.append(c)
         if "content_html" in e:
             c = FeedParserDict()
             c["value"] = sanitize_html(
@@ -98,6 +93,11 @@ def parse_entry(self, e):
             )
             c["type"] = "html"
             content.append(c)
+        if "content_text" in e:
+            c = FeedParserDict()
+            c["value"] = e["content_text"]
+            c["type"] = "text"
+            content.append(c)
         if content:
             entry["content"] = content
 
diff --git a/tests/json/html_first.json b/tests/json/html_first.json
new file mode 100644
index 00000000..b7339cd3
--- /dev/null
+++ b/tests/json/html_first.json
@@ -0,0 +1,18 @@
+{
+    "__TEST__": "Description: basic JSON tests Expect: not bozo and items[0].content[0].type == 'html' -->",
+    "version": "https://jsonfeed.org/version/1",
+    "title": "html_preferred",
+    "home_page_url": "https://example.org/",
+    "feed_url": "https://example.org/feed.json",
+    "icon": "https://example.org/feed.png",
+    "author": { "name": "me" },
+    "items": [
+        {
+            "id": "1",
+            "author": { "name": "you", "url": "http://example.net/~you" },
+            "content_text": "Hello, world!\n",
+            "content_html": "<p>Hello, world!</p>\n\n<script>alert();</script>",
+            "url": "https://example.org/initial-post"
+        }
+    ]
+}

From 6a68b8dca380fd3f8ece5746f59e3313d49cf7f9 Mon Sep 17 00:00:00 2001
From: Beat Bolli <dev@drbeat.li>
Date: Mon, 22 Apr 2024 12:28:10 +0200
Subject: [PATCH 3/4] JSONfeed: validate the presence of content_html or
 content_text

The spec says that at least one element is mandatory. Verify this and
raise an exception if both are missing.

Signed-off-by: Beat Bolli <dev@drbeat.li>
---
 feedparser/parsers/json.py |  8 +++++---
 tests/json/no_content.json | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 tests/json/no_content.json

diff --git a/feedparser/parsers/json.py b/feedparser/parsers/json.py
index ef6633e8..f90f29ab 100644
--- a/feedparser/parsers/json.py
+++ b/feedparser/parsers/json.py
@@ -85,7 +85,7 @@ def parse_entry(self, e):
             if src in e:
                 entry[dst] = e[src]
 
-        content = []
+        entry["content"] = content = []
         if "content_html" in e:
             c = FeedParserDict()
             c["value"] = sanitize_html(
@@ -98,8 +98,10 @@ def parse_entry(self, e):
             c["value"] = e["content_text"]
             c["type"] = "text"
             content.append(c)
-        if content:
-            entry["content"] = content
+        if not content:
+            raise ValueError(
+                f"item {entry['id']=} has neither 'content_text' nor 'content_html'"
+            )
 
         if "date_published" in e:
             entry["published"] = e["date_published"]
diff --git a/tests/json/no_content.json b/tests/json/no_content.json
new file mode 100644
index 00000000..296a84ac
--- /dev/null
+++ b/tests/json/no_content.json
@@ -0,0 +1,17 @@
+{
+    "__TEST__": "Description: basic JSON tests Expect: bozo and 'neither' in str(bozo_exception) -->",
+    "version": "https://jsonfeed.org/version/1",
+    "title": "no content",
+    "home_page_url": "https://example.org/",
+    "feed_url": "https://example.org/feed.json",
+    "icon": "https://example.org/feed.png",
+    "author": { "name": "me" },
+    "items": [
+        {
+            "id": "1",
+            "author": { "name": "you", "url": "http://example.net/~you" },
+            "summary": "Hello, world!\n",
+            "url": "https://example.org/initial-post"
+        }
+    ]
+}

From 52209c95894aaa7bee976966fef70d00e6e052a5 Mon Sep 17 00:00:00 2001
From: Beat Bolli <dev@drbeat.li>
Date: Mon, 22 Apr 2024 12:59:34 +0200
Subject: [PATCH 4/4] Add a changelog.d fragment for the three preceding
 commits

Signed-off-by: Beat Bolli <dev@drbeat.li>
---
 changelog.d/20240422_124749_bbolli_jsonfeed_content.rst | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 changelog.d/20240422_124749_bbolli_jsonfeed_content.rst

diff --git a/changelog.d/20240422_124749_bbolli_jsonfeed_content.rst b/changelog.d/20240422_124749_bbolli_jsonfeed_content.rst
new file mode 100644
index 00000000..eb9e011b
--- /dev/null
+++ b/changelog.d/20240422_124749_bbolli_jsonfeed_content.rst
@@ -0,0 +1,5 @@
+Fixed
+-----
+
+*   JSONfeed item content is now a list that prefers HTML content.
+*   A JSONfeed item without content sets the bozo flag.