Skip to content

Commit

Permalink
URLEncode hrefs; fixes crash bug with href urls in foreign scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
danschwarz committed May 28, 2023
1 parent 2936b04 commit 46d7e85
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
3 changes: 3 additions & 0 deletions toot/tui/richtext.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from bs4.element import NavigableString, Tag
from urwidgets import TextEmbed, Hyperlink, parse_text
from urwid.util import decompose_tagmarkup
from toot.utils import urlencode_url


class ContentParser:
Expand Down Expand Up @@ -232,6 +233,8 @@ def _a(self, tag) -> Tuple:
if not attrib_list:
attrib_list = [tag]
if href:
# urlencode the path and query portions of the URL
href = urlencode_url(href)
# use ASCII ETX (end of record) as a
# delimiter between the title and the HREF
title += f"\x03{href}"
Expand Down
3 changes: 2 additions & 1 deletion toot/tui/timeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from toot.tui import app
from toot.tui.utils import time_ago
from toot.utils.language import language_name
from toot.utils import urlencode_url
from urwidgets import Hyperlink, TextEmbed, parse_text

logger = logging.getLogger("toot")
Expand Down Expand Up @@ -324,7 +325,7 @@ def linkify_content(self, text) -> urwid.Widget:
TRANSFORM = {
# convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget
re.compile(r'(https?://[^\s]+)'):
lambda g: (len(g[1]), urwid.Filler(Hyperlink(g[1], "link"))),
lambda g: (len(g[1]), urwid.Filler(Hyperlink(urlencode_url(g[1]), "link", g[1]))),
}
markup_list = []

Expand Down
12 changes: 12 additions & 0 deletions toot/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import tempfile
import unicodedata
import warnings
from urllib.parse import urlparse, quote, unquote, urlencode

from bs4 import BeautifulSoup
from typing import Dict
Expand Down Expand Up @@ -81,6 +82,17 @@ def assert_domain_exists(domain):
raise ConsoleError("Domain {} not found".format(domain))


def urlencode_url(url):
parsed_url = urlparse(url)

# unencode before encoding, to prevent double-urlencoding
encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/")
encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params})
encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl()

return encoded_url


EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"


Expand Down

0 comments on commit 46d7e85

Please sign in to comment.