diff --git a/Makefile b/Makefile index cafe213b..c413d73a 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ publish : twine upload dist/*.tar.gz dist/*.whl test: - pytest -v + pytest tests/*.py -v flake8 vermin --target=3.6 --no-tips --violations --exclude-regex venv/.* . diff --git a/requirements.txt b/requirements.txt index 3616ac32..8fec2ffe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ requests>=2.13,<3.0 beautifulsoup4>=4.5.0,<5.0 wcwidth>=0.1.7 urwid>=2.0.0,<3.0 - +urwidgets>=0.1,<0.2 +html2text>=2020.1.16 diff --git a/setup.py b/setup.py index a7b5c511..dfe90ad1 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,8 @@ "beautifulsoup4>=4.5.0,<5.0", "wcwidth>=0.1.7", "urwid>=2.0.0,<3.0", + "urwidgets>=0.1,<0.2", + "html2text>=2020.1.16" ], entry_points={ 'console_scripts': [ diff --git a/tests/test_console.py b/tests/test_console.py index ffe1d12c..2e458729 100644 --- a/tests/test_console.py +++ b/tests/test_console.py @@ -151,6 +151,74 @@ def test_timeline(mock_get, monkeypatch, capsys): assert err == "" +@mock.patch('toot.http.get') +def test_timeline_html_content(mock_get, monkeypatch, capsys): + mock_get.return_value = MockResponse([{ + 'id': '111111111111111111', + 'account': { + 'display_name': 'Frank Zappa 🎸', + 'acct': 'fz' + }, + 'created_at': '2017-04-12T15:53:18.174Z', + 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", + 'reblog': None, + 'in_reply_to_id': None, + 'media_attachments': [], + }]) + + console.run_command(app, user, 'timeline', ['--once']) + + mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) + + out, err = capsys.readouterr() + lines = out.split("\n") + reference = [ + "────────────────────────────────────────────────────────────────────────────────────────────────────", + "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", + "", + "## HTML Render Test", + "", + " _emphasized_ ", + " _underlined_ ", + " **bold** ", + " ** _bold and italic_** ", + " ~~strikethrough~~ ", + "regular text", + "", + "Code block:", + "", + " ", + " 10 PRINT \"HELLO WORLD\" ", + " 20 GOTO 10 ", + " ", + "> Something blockquoted here. The indentation is maintained as the text line wraps.", + " 1. List item", + " • Nested item", + " • Another nested ", + " 2. Another list item. ", + " 1. Something else nested", + " 2. And a last nested", + "", + "> Blockquote", + "> 1. List in BQ", + "> 2. List item 2 in BQ", + ">", + "", + "#hashtag #test ", + "https://a.com text after link", + "", + "ID 111111111111111111 ", + "────────────────────────────────────────────────────────────────────────────────────────────────────", + "", + ] + + assert len(lines) == len(reference) + for index, line in enumerate(lines): + assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" + + assert err == "" + + @mock.patch('toot.http.get') def test_timeline_with_re(mock_get, monkeypatch, capsys): mock_get.return_value = MockResponse([{ @@ -585,8 +653,6 @@ def test_notifications(mock_get, capsys): "────────────────────────────────────────────────────────────────────────────────────────────────────", "", ]) - - @mock.patch('toot.http.get') def test_notifications_empty(mock_get, capsys): mock_get.return_value = MockResponse([]) diff --git a/toot/output.py b/toot/output.py index 3414cdb5..36601fa8 100644 --- a/toot/output.py +++ b/toot/output.py @@ -2,6 +2,7 @@ import re import sys import textwrap +import html2text from typing import List from wcwidth import wcswidth @@ -272,6 +273,20 @@ def print_search_results(results): def print_status(status, width): reblog = status['reblog'] content = reblog['content'] if reblog else status['content'] + + h2t = html2text.HTML2Text() + + h2t.body_width = width + h2t.single_line_break = True + h2t.ignore_links = True + h2t.wrap_links = True + h2t.wrap_list_items = True + h2t.wrap_tables = True + h2t.unicode_snob = True + h2t.ul_item_mark = "\N{bullet}" + + text_status = h2t.handle(content).strip() + media_attachments = reblog['media_attachments'] if reblog else status['media_attachments'] in_reply_to = status['in_reply_to_id'] poll = reblog.get('poll') if reblog else status.get('poll') @@ -294,7 +309,7 @@ def print_status(status, width): ) print_out("") - print_html(content, width) + print_out(highlight_hashtags(text_status)) if media_attachments: print_out("\nMedia:") diff --git a/toot/tui/app.py b/toot/tui/app.py index 73e5b358..6b073fe5 100644 --- a/toot/tui/app.py +++ b/toot/tui/app.py @@ -1,5 +1,6 @@ import logging import urwid +import html2text from concurrent.futures import ThreadPoolExecutor @@ -7,6 +8,7 @@ from toot.console import get_default_visibility from toot.exceptions import ApiError + from .compose import StatusComposer from .constants import PALETTE from .entities import Status @@ -14,7 +16,7 @@ from .overlays import StatusDeleteConfirmation, Account from .poll import Poll from .timeline import Timeline -from .utils import parse_content_links, show_media, copy_to_clipboard +from .utils import parse_content_links, show_media, copy_to_clipboard, parse_datetime logger = logging.getLogger(__name__) @@ -119,7 +121,6 @@ def __init__(self, app, user, args): def run(self): self.loop.set_alarm_in(0, lambda *args: self.async_load_instance()) self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_accounts()) - self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_tags()) self.loop.set_alarm_in(0, lambda *args: self.async_load_timeline( is_initial=True, timeline_name="home")) self.loop.run() @@ -315,22 +316,6 @@ def _done_accounts(accounts): self.run_in_thread(_load_accounts, done_callback=_done_accounts) - def async_load_followed_tags(self): - def _load_tag_list(): - try: - return api.followed_tags(self.app, self.user) - except ApiError: - # not supported by all Mastodon servers so fail silently if necessary - return [] - - def _done_tag_list(tags): - if len(tags) > 0: - self.followed_tags = [t["name"] for t in tags] - else: - self.followed_tags = [] - - self.run_in_thread(_load_tag_list, done_callback=_done_tag_list) - def refresh_footer(self, timeline): """Show status details in footer.""" status, index, count = timeline.get_focused_status_with_counts() @@ -646,9 +631,26 @@ def _done(loop): return self.run_in_thread(_delete, done_callback=_done) def copy_status(self, status): - # TODO: copy a better version of status content - # including URLs - copy_to_clipboard(self.screen, status.original.data["content"]) + h2t = html2text.HTML2Text() + h2t.body_width = 0 # nowrap + h2t.single_line_break = True + h2t.ignore_links = True + h2t.unicode_snob = True + h2t.ul_item_mark = "\N{bullet}" + + time = parse_datetime(status.original.data['created_at']) + time = time.strftime('%Y-%m-%d %H:%M %Z') + + text_status = (f"{status.original.data['url']}\n\n" + + (status.original.author.display_name or "") + + "\n" + + (status.original.author.account or "") + + "\n\n" + + h2t.handle(status.original.data["content"]).strip() + + "\n\n" + + f"Created at: {time}") + + copy_to_clipboard(self.screen, text_status) self.footer.set_message(f"Status {status.original.id} copied") # --- Overlay handling ----------------------------------------------------- diff --git a/toot/tui/constants.py b/toot/tui/constants.py index e866e34a..0039de87 100644 --- a/toot/tui/constants.py +++ b/toot/tui/constants.py @@ -37,7 +37,29 @@ ('yellow_bold', 'yellow,bold', ''), ('red', 'dark red', ''), ('warning', 'light red', ''), - ('white_bold', 'white,bold', '') + ('white_bold', 'white,bold', ''), + + # HTML tag styling + ('a', ',italics', ''), + # em tag is mapped to i + ('i', ',italics', ''), + # strong tag is mapped to b + ('b', ',bold', ''), + # special case for bold + italic nested tags + ('bi', ',bold,italics', ''), + ('u', ',underline', ''), + ('del', ',strikethrough', ''), + ('code', 'light gray, standout', ''), + ('pre', 'light gray, standout', ''), + ('blockquote', 'light gray', ''), + ('h1', ',bold', ''), + ('h2', ',bold', ''), + ('h3', ',bold', ''), + ('h4', ',bold', ''), + ('h5', ',bold', ''), + ('h6', ',bold', ''), + ('class_mention_hashtag', 'light cyan,bold', ''), + ('class_hashtag', 'light cyan,bold', ''), ] VISIBILITY_OPTIONS = [ diff --git a/toot/tui/overlays.py b/toot/tui/overlays.py index 9582b920..1be7c8c0 100644 --- a/toot/tui/overlays.py +++ b/toot/tui/overlays.py @@ -4,10 +4,10 @@ import webbrowser from toot import __version__ -from toot.utils import format_content -from .utils import highlight_hashtags, highlight_keys -from .widgets import Button, EditBox, SelectableText from toot import api +from .utils import highlight_keys +from .widgets import Button, EditBox, SelectableText +from .richtext import ContentParser class StatusSource(urwid.Padding): @@ -255,6 +255,8 @@ def setup_listbox(self): super().__init__(walker) def generate_contents(self, account, relationship=None, last_action=None): + parser = ContentParser() + if self.last_action and not self.last_action.startswith("Confirm"): yield Button(f"Confirm {self.last_action}", on_press=take_action, user_data=self) yield Button("Cancel", on_press=cancel_action, user_data=self) @@ -279,8 +281,10 @@ def generate_contents(self, account, relationship=None, last_action=None): if account["note"]: yield urwid.Divider() - for line in format_content(account["note"]): - yield urwid.Text(highlight_hashtags(line, followed_tags=set())) + + widgetlist = parser.html_to_widgets(account["note"]) + for line in widgetlist: + yield (line) yield urwid.Divider() yield urwid.Text(["ID: ", ("green", f"{account['id']}")]) @@ -312,8 +316,11 @@ def generate_contents(self, account, relationship=None, last_action=None): name = field["name"].title() yield urwid.Divider() yield urwid.Text([("yellow", f"{name.rstrip(':')}"), ":"]) - for line in format_content(field["value"]): - yield urwid.Text(highlight_hashtags(line, followed_tags=set())) + + widgetlist = parser.html_to_widgets(field["value"]) + for line in widgetlist: + yield (line) + if field["verified_at"]: yield urwid.Text(("green", "✓ Verified")) diff --git a/toot/tui/poll.py b/toot/tui/poll.py index 81756af2..8ad649c1 100644 --- a/toot/tui/poll.py +++ b/toot/tui/poll.py @@ -2,9 +2,9 @@ from toot import api from toot.exceptions import ApiError -from toot.utils import format_content -from .utils import highlight_hashtags, parse_datetime +from .utils import parse_datetime from .widgets import Button, CheckBox, RadioButton +from .richtext import ContentParser class Poll(urwid.ListBox): @@ -85,8 +85,12 @@ def generate_poll_detail(self): def generate_contents(self, status): yield urwid.Divider() - for line in format_content(status.data["content"]): - yield urwid.Text(highlight_hashtags(line, set())) + + parser = ContentParser() + widgetlist = parser.html_to_widgets(status.data["content"]) + + for line in widgetlist: + yield (line) yield urwid.Divider() yield self.build_linebox(self.generate_poll_detail()) diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py new file mode 100644 index 00000000..175de59e --- /dev/null +++ b/toot/tui/richtext.py @@ -0,0 +1,453 @@ +""" +richtext +""" +from typing import List, Tuple +import re +import urwid +import unicodedata +from .constants import PALETTE +from bs4 import BeautifulSoup +from bs4.element import NavigableString, Tag +from urwidgets import TextEmbed, Hyperlink, parse_text +from urwid.util import decompose_tagmarkup +from toot.utils import urlencode_url + + +class ContentParser: + def __init__(self): + self.palette_names = [] + for p in PALETTE: + self.palette_names.append(p[0]) + + """Parse a limited subset of HTML and create urwid widgets.""" + + def html_to_widgets(self, html, recovery_attempt=False) -> List[urwid.Widget]: + """Convert html to urwid widgets""" + widgets: List[urwid.Widget] = [] + html = unicodedata.normalize("NFKC", html) + soup = BeautifulSoup(html.replace("'", "'"), "html.parser") + first_tag = True + for e in soup.body or soup: + if isinstance(e, NavigableString): + if first_tag and not recovery_attempt: + # if our first "tag" is a navigable string + # the HTML is out of spec, doesn't start with a tag, + # we see this in content from Pixelfed servers. + # attempt a fix by wrapping the HTML with

+ return self.html_to_widgets(f"

{html}

", recovery_attempt=True) + else: + continue + else: + name = e.name + # if our HTML starts with a tag, but not a block tag + # the HTML is out of spec. Attempt a fix by wrapping the + # HTML with

+ if ( + first_tag + and not recovery_attempt + and name + not in ( + "p", + "pre", + "li", + "blockquote", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + ) # NOTE: update this list if Mastodon starts supporting more block tags + ): + return self.html_to_widgets(f"

{html}

", recovery_attempt=True) + + # First, look for a custom tag handler method in this class + # If that fails, fall back to inline_tag_to_text handler + method = getattr(self, "_" + name, self.inline_tag_to_text) + markup = method(e) # either returns a Widget, or plain text + first_tag = False + + if not isinstance(markup, urwid.Widget): + # plaintext, so create a padded text widget + txt = self.text_to_widget("", markup) + markup = urwid.Padding( + txt, + align="left", + width=("relative", 100), + min_width=None, + ) + widgets.append(markup) + # separate top level widgets with a blank line + widgets.append(urwid.Divider(" ")) + return widgets[:-1] # but suppress the last blank line + + def inline_tag_to_text(self, tag) -> Tuple: + """Convert html tag to plain text with tag as attributes recursively""" + markups = self.process_inline_tag_children(tag) + if not markups: + return (tag.name, "") + return (tag.name, markups) + + def process_inline_tag_children(self, tag) -> List: + """Recursively retrieve all children + and convert to a list of markup text""" + markups = [] + for child in tag.children: + if isinstance(child, Tag): + method = getattr(self, "_" + child.name, self.inline_tag_to_text) + markup = method(child) + markups.append(markup) + else: + markups.append(child) + return markups + + def text_to_widget(self, attr, markup) -> TextEmbed: + TRANSFORM = { + # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget + re.compile(r"(^.+)\x03(.+$)"): lambda g: ( + len(g[1]), + urwid.Filler(Hyperlink(g[2], anchor_attr, g[1])), + ), + } + markup_list = [] + + for run in markup: + if isinstance(run, tuple): + txt, attr_list = decompose_tagmarkup(run) + # find anchor titles with an ETX separator followed by href + m = re.match(r"(^.+)\x03(.+$)", txt) + if m: + anchor_attr = self.get_best_anchor_attr(attr_list) + markup_list.append( + parse_text( + txt, + TRANSFORM, + lambda pattern, groups, span: TRANSFORM[pattern](groups), + ) + ) + else: + markup_list.append(run) + else: + markup_list.append(run) + + return TextEmbed(markup_list) + + def process_block_tag_children(self, tag) -> List[urwid.Widget]: + """Recursively retrieve all children + and convert to a list of widgets + any inline tags containing text will be + converted to Text widgets""" + + pre_widget_markups = [] + post_widget_markups = [] + child_widgets = [] + found_nested_widget = False + + for child in tag.children: + if isinstance(child, Tag): + # child is a nested tag; process using custom method + # or default to inline_tag_to_text + method = getattr(self, "_" + child.name, self.inline_tag_to_text) + result = method(child) + if isinstance(result, urwid.Widget): + found_nested_widget = True + child_widgets.append(result) + else: + if not found_nested_widget: + pre_widget_markups.append(result) + else: + post_widget_markups.append(result) + else: + # child is text; append to the appropriate markup list + if not found_nested_widget: + pre_widget_markups.append(child) + else: + post_widget_markups.append(child) + + widget_list = [] + if len(pre_widget_markups): + widget_list.append(self.text_to_widget(tag.name, pre_widget_markups)) + + if len(child_widgets): + widget_list += child_widgets + + if len(post_widget_markups): + widget_list.append(self.text_to_widget(tag.name, post_widget_markups)) + + return widget_list + + def get_urwid_attr_name(self, tag) -> str: + """Get the class name and translate to a + name suitable for use as an urwid + text attribute name""" + + if "class" in tag.attrs: + clss = tag.attrs["class"] + if len(clss) > 0: + style_name = "class_" + "_".join(clss) + # return the class name, only if we + # find it as a defined palette name + if style_name in self.palette_names: + return style_name + + # fallback to returning the tag name + return tag.name + + # Tag handlers start here. + # Tags not explicitly listed are "supported" by + # rendering as text. + # Inline tags return a list of marked up text for urwid.Text + # Block tags return urwid.Widget + + def basic_block_tag_handler(self, tag) -> urwid.Widget: + """default for block tags that need no special treatment""" + return urwid.Pile(self.process_block_tag_children(tag)) + + def get_best_anchor_attr(self, attrib_list) -> str: + if not attrib_list: + return "" + flat_al = list(flatten(attrib_list)) + + for a in flat_al[0]: + # ref: https://docs.joinmastodon.org/spec/activitypub/ + # these are the class names (translated to attrib names) + # that we can support for display + + try: + if a[0] in ["class_hashtag", "class_mention_hashtag", "class_mention"]: + return a[0] + except KeyError: + continue + + return "a" + + def _a(self, tag) -> Tuple: + """anchor tag handler""" + + markups = self.process_inline_tag_children(tag) + if not markups: + return (tag.name, "") + + href = tag.attrs["href"] + title, attrib_list = decompose_tagmarkup(markups) + if not attrib_list: + attrib_list = [tag] + if href: + # urlencode the path and query portions of the URL + href = urlencode_url(href) + # use ASCII ETX (end of record) as a + # delimiter between the title and the HREF + title += f"\x03{href}" + + attr = self.get_best_anchor_attr(attrib_list) + + if attr == "a": + # didn't find an attribute to use + # in the child markup, so let's + # try the anchor tag's own attributes + + attr = self.get_urwid_attr_name(tag) + + # hashtag anchors have a class of "mention hashtag" + # or "hashtag" + # we'll return style "class_mention_hashtag" + # or "class_hashtag" + # in that case; see corresponding palette entry + # in constants.py controlling hashtag highlighting + + return (attr, title) + + def _blockquote(self, tag) -> urwid.Widget: + widget_list = self.process_block_tag_children(tag) + blockquote_widget = urwid.LineBox( + urwid.Padding( + urwid.Pile(widget_list), + align="left", + width=("relative", 100), + min_width=None, + left=1, + right=1, + ), + tlcorner="", + tline="", + lline="│", + trcorner="", + blcorner="", + rline="", + bline="", + brcorner="", + ) + return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")]) + + def _br(self, tag) -> Tuple: + return ("br", "\n") + + def _em(self, tag) -> Tuple: + # to simplify the number of palette entries + # translate EM to I (italic) + markups = self.process_inline_tag_children(tag) + if not markups: + return ("i", "") + + # special case processing for bold and italic + for parent in tag.parents: + if parent.name == "b" or parent.name == "strong": + return ("bi", markups) + + return ("i", markups) + + def _ol(self, tag) -> urwid.Widget: + """ordered list tag handler""" + + widgets = [] + list_item_num = 1 + increment = -1 if tag.has_attr("reversed") else 1 + + # get ol start= attribute if present + if tag.has_attr("start") and len(tag.attrs["start"]) > 0: + try: + list_item_num = int(tag.attrs["start"]) + except ValueError: + pass + + for li in tag.find_all("li", recursive=False): + method = getattr(self, "_li", self.inline_tag_to_text) + markup = method(li) + + # li value= attribute will change the item number + # it also overrides any ol start= attribute + + if li.has_attr("value") and len(li.attrs["value"]) > 0: + try: + list_item_num = int(li.attrs["value"]) + except ValueError: + pass + + if not isinstance(markup, urwid.Widget): + txt = self.text_to_widget("li", [str(list_item_num), ". ", markup]) + # 1. foo, 2. bar, etc. + widgets.append(txt) + else: + txt = self.text_to_widget("li", [str(list_item_num), ". "]) + columns = urwid.Columns( + [txt, ("weight", 9999, markup)], dividechars=1, min_width=3 + ) + widgets.append(columns) + + list_item_num += increment + + return urwid.Pile(widgets) + + def _pre(self, tag) -> urwid.Widget: + #
 tag spec says that text should not wrap,
+        # but horizontal screen space is at a premium
+        # and we have no horizontal scroll bar, so allow
+        # wrapping.
+
+        widget_list = [urwid.Divider(" ")]
+        widget_list += self.process_block_tag_children(tag)
+
+        pre_widget = urwid.Padding(
+            urwid.Pile(widget_list),
+            align="left",
+            width=("relative", 100),
+            min_width=None,
+            left=1,
+            right=1,
+        )
+        return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
+
+    def _span(self, tag) -> Tuple:
+        markups = self.process_inline_tag_children(tag)
+
+        if not markups:
+            return (tag.name, "")
+
+        # span inherits its parent's class definition
+        # unless it has a specific class definition
+        # of its own
+
+        if "class" in tag.attrs:
+            # uncomment the following code to hide all HTML marked
+            # invisible (generally, the http:// prefix of URLs)
+            # could be a user preference, it's only advisable if
+            # the terminal supports OCS 8 hyperlinks (and that's not
+            # automatically detectable)
+
+            # if "invisible" in tag.attrs["class"]:
+            #     return (tag.name, "")
+
+            style_name = self.get_urwid_attr_name(tag)
+
+            if style_name != "span":
+                # unique class name matches an entry in our palette
+                return (style_name, markups)
+
+        if tag.parent:
+            return (self.get_urwid_attr_name(tag.parent), markups)
+        else:
+            # fallback
+            return ("span", markups)
+
+    def _strong(self, tag) -> Tuple:
+        # to simplify the number of palette entries
+        # translate STRONG to B (bold)
+        markups = self.process_inline_tag_children(tag)
+        if not markups:
+            return ("b", "")
+
+        # special case processing for bold and italic
+        for parent in tag.parents:
+            if parent.name == "i" or parent.name == "em":
+                return ("bi", markups)
+
+        return ("b", markups)
+
+    def _ul(self, tag) -> urwid.Widget:
+        """unordered list tag handler"""
+
+        widgets = []
+
+        for li in tag.find_all("li", recursive=False):
+            method = getattr(self, "_li", self.inline_tag_to_text)
+            markup = method(li)
+
+            if not isinstance(markup, urwid.Widget):
+                txt = self.text_to_widget("li", ["\N{bullet} ", markup])
+                # * foo, * bar, etc.
+                widgets.append(txt)
+            else:
+                txt = self.text_to_widget("li", ["\N{bullet} "])
+                columns = urwid.Columns(
+                    [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+                )
+                widgets.append(columns)
+
+        return urwid.Pile(widgets)
+
+    # These tags are handled identically to others
+    # the only difference being the tag name used for
+    # urwid attribute mapping
+
+    _b = _strong
+
+    _div = basic_block_tag_handler
+
+    _i = _em
+
+    _li = basic_block_tag_handler
+
+    # Glitch-soc and Pleroma allow 

...

in content + # Mastodon (PR #23913) does not; header tags are converted to

+ + _h1 = _h2 = _h3 = _h4 = _h5 = _h6 = basic_block_tag_handler + + _p = basic_block_tag_handler + + +def flatten(data): + if isinstance(data, tuple): + for x in data: + yield from flatten(x) + else: + yield data diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py index fb255c87..d8e20194 100644 --- a/toot/tui/timeline.py +++ b/toot/tui/timeline.py @@ -1,4 +1,5 @@ import logging +import re import sys import urwid import webbrowser @@ -7,12 +8,14 @@ from .entities import Status from .scroll import Scrollable, ScrollBar -from .utils import highlight_hashtags, parse_datetime, highlight_keys +from .utils import parse_datetime, highlight_keys from .widgets import SelectableText, SelectableColumns +from .richtext import ContentParser from toot.tui import app from toot.tui.utils import time_ago -from toot.utils import format_content from toot.utils.language import language_name +from toot.utils import urlencode_url +from urwidgets import Hyperlink, TextEmbed, parse_text logger = logging.getLogger("toot") @@ -311,7 +314,6 @@ def remove_status(self, status): class StatusDetails(urwid.Pile): def __init__(self, timeline: Timeline, status: Optional[Status]): self.status = status - self.followed_tags = timeline.tui.followed_tags self.followed_accounts = timeline.tui.followed_accounts reblogged_by = status.author if status and status.reblog else None @@ -319,6 +321,18 @@ def __init__(self, timeline: Timeline, status: Optional[Status]): if status else ()) return super().__init__(widget_list) + def linkify_content(self, text) -> urwid.Widget: + TRANSFORM = { + # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget + re.compile(r'(https?://[^\s]+)'): + lambda g: (len(g[1]), urwid.Filler(Hyperlink(urlencode_url(g[1]), "link", g[1]))), + } + markup_list = [] + + markup_list.append(parse_text(text, TRANSFORM, + lambda pattern, groups, span: TRANSFORM[pattern](groups))) + return TextEmbed(markup_list, align='left') + def content_generator(self, status, reblogged_by): if reblogged_by: text = "♺ {} boosted".format(reblogged_by.display_name or reblogged_by.username) @@ -341,8 +355,12 @@ def content_generator(self, status, reblogged_by): yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view."))) else: content = status.original.translation if status.original.show_translation else status.data["content"] - for line in format_content(content): - yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags))) + + parser = ContentParser() + widgetlist = parser.html_to_widgets(content) + + for line in widgetlist: + yield (line) media = status.data["media_attachments"] if media: @@ -351,7 +369,7 @@ def content_generator(self, status, reblogged_by): yield ("pack", urwid.Text([("bold", "Media attachment"), " (", m["type"], ")"])) if m["description"]: yield ("pack", urwid.Text(m["description"])) - yield ("pack", urwid.Text(("link", m["url"]))) + yield ("pack", self.linkify_content(m["url"])) poll = status.original.data.get("poll") if poll: @@ -411,7 +429,7 @@ def card_generator(self, card): if card["description"]: yield urwid.Text(card["description"].strip()) yield urwid.Text("") - yield urwid.Text(("link", card["url"])) + yield self.linkify_content(card["url"]) def poll_generator(self, poll): for idx, option in enumerate(poll["options"]): diff --git a/toot/tui/utils.py b/toot/tui/utils.py index 2f49362d..84cb7da6 100644 --- a/toot/tui/utils.py +++ b/toot/tui/utils.py @@ -81,21 +81,6 @@ def _gen(): return list(_gen()) -def highlight_hashtags(line, followed_tags, attr="hashtag", followed_attr="followed_hashtag"): - hline = [] - - for p in re.split(HASHTAG_PATTERN, line): - if p.startswith("#"): - if p[1:].lower() in (t.lower() for t in followed_tags): - hline.append((followed_attr, p)) - else: - hline.append((attr, p)) - else: - hline.append(p) - - return hline - - def show_media(paths): """ Attempt to open an image viewer to show given media files. diff --git a/toot/utils/__init__.py b/toot/utils/__init__.py index e8103acf..026c861f 100644 --- a/toot/utils/__init__.py +++ b/toot/utils/__init__.py @@ -5,6 +5,7 @@ import tempfile import unicodedata import warnings +from urllib.parse import urlparse, quote, unquote, urlencode from bs4 import BeautifulSoup from typing import Dict @@ -81,6 +82,17 @@ def assert_domain_exists(domain): raise ConsoleError("Domain {} not found".format(domain)) +def urlencode_url(url): + parsed_url = urlparse(url) + + # unencode before encoding, to prevent double-urlencoding + encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/") + encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params}) + encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl() + + return encoded_url + + EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"