From ae7297fd1a84c8b82886e5997f231c8b06749646 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Sat, 27 May 2023 21:51:41 -0400 Subject: [PATCH] URLEncode hrefs; fixes crash bug with href urls in foreign scripts --- toot/tui/richtext.py | 3 +++ toot/tui/timeline.py | 3 ++- toot/utils/__init__.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py index 0cf326d9..175de59e 100644 --- a/toot/tui/richtext.py +++ b/toot/tui/richtext.py @@ -10,6 +10,7 @@ from bs4.element import NavigableString, Tag from urwidgets import TextEmbed, Hyperlink, parse_text from urwid.util import decompose_tagmarkup +from toot.utils import urlencode_url class ContentParser: @@ -232,6 +233,8 @@ def _a(self, tag) -> Tuple: if not attrib_list: attrib_list = [tag] if href: + # urlencode the path and query portions of the URL + href = urlencode_url(href) # use ASCII ETX (end of record) as a # delimiter between the title and the HREF title += f"\x03{href}" diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py index 70192005..d8e20194 100644 --- a/toot/tui/timeline.py +++ b/toot/tui/timeline.py @@ -14,6 +14,7 @@ from toot.tui import app from toot.tui.utils import time_ago from toot.utils.language import language_name +from toot.utils import urlencode_url from urwidgets import Hyperlink, TextEmbed, parse_text logger = logging.getLogger("toot") @@ -324,7 +325,7 @@ def linkify_content(self, text) -> urwid.Widget: TRANSFORM = { # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget re.compile(r'(https?://[^\s]+)'): - lambda g: (len(g[1]), urwid.Filler(Hyperlink(g[1], "link"))), + lambda g: (len(g[1]), urwid.Filler(Hyperlink(urlencode_url(g[1]), "link", g[1]))), } markup_list = [] diff --git a/toot/utils/__init__.py b/toot/utils/__init__.py index e8103acf..425cb382 100644 --- a/toot/utils/__init__.py +++ b/toot/utils/__init__.py @@ -5,6 +5,7 @@ import tempfile import unicodedata import warnings +from urllib.parse import urlparse, quote, unquote, urlencode from bs4 import BeautifulSoup from typing import Dict @@ -81,6 +82,17 @@ def assert_domain_exists(domain): raise ConsoleError("Domain {} not found".format(domain)) +def urlencode_url(url): + parsed_url = urlparse(url) + + # unencode before encoding, to prevent double-urlencoding + encoded_path = quote(unquote(parsed_url.path), safe=":/") + encoded_query = urlencode({k: quote(unquote(v), safe=":/") for k, v in parsed_url.params}) + encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl() + + return encoded_url + + EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"