From ae7297fd1a84c8b82886e5997f231c8b06749646 Mon Sep 17 00:00:00 2001
From: Daniel Schwarz <schwarz.dan@gmail.com>
Date: Sat, 27 May 2023 21:51:41 -0400
Subject: [PATCH] URLEncode hrefs; fixes crash bug with href urls in foreign
 scripts

---
 toot/tui/richtext.py   |  3 +++
 toot/tui/timeline.py   |  3 ++-
 toot/utils/__init__.py | 12 ++++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index 0cf326d9..175de59e 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -10,6 +10,7 @@
 from bs4.element import NavigableString, Tag
 from urwidgets import TextEmbed, Hyperlink, parse_text
 from urwid.util import decompose_tagmarkup
+from toot.utils import urlencode_url
 
 
 class ContentParser:
@@ -232,6 +233,8 @@ def _a(self, tag) -> Tuple:
         if not attrib_list:
             attrib_list = [tag]
         if href:
+            # urlencode the path and query portions of the URL
+            href = urlencode_url(href)
             # use ASCII ETX (end of record) as a
             # delimiter between the title and the HREF
             title += f"\x03{href}"
diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py
index 70192005..d8e20194 100644
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@@ -14,6 +14,7 @@
 from toot.tui import app
 from toot.tui.utils import time_ago
 from toot.utils.language import language_name
+from toot.utils import urlencode_url
 from urwidgets import Hyperlink, TextEmbed, parse_text
 
 logger = logging.getLogger("toot")
@@ -324,7 +325,7 @@ def linkify_content(self, text) -> urwid.Widget:
         TRANSFORM = {
             # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget
             re.compile(r'(https?://[^\s]+)'):
-                lambda g: (len(g[1]), urwid.Filler(Hyperlink(g[1], "link"))),
+                lambda g: (len(g[1]), urwid.Filler(Hyperlink(urlencode_url(g[1]), "link", g[1]))),
         }
         markup_list = []
 
diff --git a/toot/utils/__init__.py b/toot/utils/__init__.py
index e8103acf..425cb382 100644
--- a/toot/utils/__init__.py
+++ b/toot/utils/__init__.py
@@ -5,6 +5,7 @@
 import tempfile
 import unicodedata
 import warnings
+from urllib.parse import urlparse, quote, unquote, urlencode
 
 from bs4 import BeautifulSoup
 from typing import Dict
@@ -81,6 +82,17 @@ def assert_domain_exists(domain):
         raise ConsoleError("Domain {} not found".format(domain))
 
 
+def urlencode_url(url):
+    parsed_url = urlparse(url)
+
+    # unencode before encoding, to prevent double-urlencoding
+    encoded_path = quote(unquote(parsed_url.path), safe=":/")
+    encoded_query = urlencode({k: quote(unquote(v), safe=":/") for k, v in parsed_url.params})
+    encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl()
+
+    return encoded_url
+
+
 EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"