From f54cd172b920d9411315ec0067d36a85d5d5da76 Mon Sep 17 00:00:00 2001 From: Craig Maloney Date: Tue, 21 Nov 2017 18:08:25 -0500 Subject: [PATCH 01/19] Updating Changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2f37c0..5a261f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +### Release +### [0.3.1] - 2017-11-21 + +#### Fixed +- Compatibility with Mastodon 1.1.2 fix + ### Release ### [0.3.0] - 2017-11-17 ### Dedicated to the memory of Natalie Nguyen (aka Tipsy Tentacle). May she live on in our hearts and our changelog. From 90b2958d2b2829fbf8eaabcccac06f9d5cb45a2b Mon Sep 17 00:00:00 2001 From: Craig Maloney Date: Wed, 22 Nov 2017 19:53:33 -0500 Subject: [PATCH 02/19] Change toots to only reply to everyone other than the logged-in user --- src/tootstream/toot.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/tootstream/toot.py b/src/tootstream/toot.py index 437f55b..f3c90d5 100644 --- a/src/tootstream/toot.py +++ b/src/tootstream/toot.py @@ -758,13 +758,19 @@ def rep(mastodon, rest): fg('red')) return - # handle mentions - # TODO: reorder so parent author is first? - mentions = [i['acct'] for i in parent_toot['mentions']] - mentions.append(parent_toot['account']['acct']) - - # Remove duplicates - mentions = ["@%s" % i for i in list(set(mentions))] + # Handle mentions text at the beginning: + mentions_set = set() + for i in parent_toot['mentions']: + mentions_set.add(i['acct']) + mentions_set.add(parent_toot['account']['acct']) + + # Remove our account + # TODO: Better way to get this information? + my_user = mastodon.account_verify_credentials() + mentions_set.discard(my_user['username']) + + # Format each using @username@host and add a space + mentions = ["@%s" % i for i in list(mentions_set)] mentions = ' '.join(mentions) # if user didn't set cw/spoiler, set it here From 64a5c0391738ec6ac9c85da27ef15cc5b19188b4 Mon Sep 17 00:00:00 2001 From: Lain Date: Sat, 25 Nov 2017 15:59:16 -0800 Subject: [PATCH 03/19] Add link styles, emoji convertion, and link shortening to parser Adds new features to toot_parser. Styles for links, mentions and hashtags are supported. Standard emoji short codes can be converted into unicode emoji. Links can be shortened and a list of urls contained in the post can be retrieved. --- requirements.txt | 1 + src/tootstream/toot.py | 4 +- src/tootstream/toot_parser.py | 218 +++++++++++++++++++++++++++++++++- 3 files changed, 216 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 98703e5..0e1bd56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ click>=6.7 Mastodon.py==1.1.2 colored>=1.3.5 humanize>=0.5.1 +emoji>=0.4.5 diff --git a/src/tootstream/toot.py b/src/tootstream/toot.py index f3c90d5..e7eef70 100644 --- a/src/tootstream/toot.py +++ b/src/tootstream/toot.py @@ -89,9 +89,7 @@ def on_update(self, status): ##################################### def get_content(toot): html = toot['content'] - toot_parser.reset() - toot_parser.feed(html) - toot_parser.close() + toot_parser.parse(html) return toot_parser.get_text() diff --git a/src/tootstream/toot_parser.py b/src/tootstream/toot_parser.py index 27636c8..92446f9 100644 --- a/src/tootstream/toot_parser.py +++ b/src/tootstream/toot_parser.py @@ -1,10 +1,96 @@ +import emoji +from colored import attr from html.parser import HTMLParser from textwrap import TextWrapper + +def convert_emoji_shortcodes(text): + """Convert standard emoji short codes to unicode emoji in + the provided text. + + text - The text to parse. + Returns the modified text. + """ + return emoji.emojize(text, use_aliases = True) + + +def find_attr(name, attrs): + """Find an attribute in an HTML tag by name. + + name - The attribute name to search for. + attrs - The list of attributes to search. + Returns the matching attribute or None. + """ + for attr, values in attrs: + if attr == name: + return values + return None + + +def has_class(value, attrs): + """Return whether the HTML attributes contain a specific class name. + + value - The class type to search for. + attrs - The list of attributes to search. + Returns true if the specified class type was found. + """ + values = find_attr('class', attrs) + if values is None: + return False + + return values.find(value) >= 0 + + class TootParser(HTMLParser): + """ + TootParser is used to parse HTML based toots and convert them into + plain text versions. By default the returned text is equivalent to the + source toot text with paragraph and br tags converted to line breaks. + + The text can optionally be indented by passing a string to the indent + field which is prepended to every line in the source text. + + The text can also have text wrapping enabled by passing in a max width to + the width parameter. Note that the text wrapping is not perfect right + now and doesn't work well with terminal colors and a lot of unicode text + on one line. + + Link shortening can be enabled by setting the shorten_links parameter. + This shortens links by using the link shortening helper HTML embedded in + the source toot. This means links embedded from sources other than + mastodon may not be shortened. The shortened urls will look like + example.org/areallylongur... + + Emoji short codes can optionally be converted into unicode based emoji by + enabling the convert_emoji parameter. This parses standard emoji short + code names and does not support custom emojo short codes. + + Styles can also optionally be applied to links found in the source text. + Pass in the desired colored style to the link_style, mention_style, and + hashtag_style parameters. + + To parse a toot, pass the toot source HTML to the parse() command. The + source text can then be retrieved with the get_text() command. Parsed + link urls can also be retrieved by calling the get_links() command. + + indent - A string to prepend to all lines in the output text. + width - The maximum number of characters to allow in a line of text. + shorten_links - Whether or not to shorten links. + convert_emoji - Whether or not to convert emoji short codes to unicode. + link_style - The colored style to apply to generic links. + mention_style - The colored style to apply to mentions. + hashtag_style - The colored style to apply to hashtags. + + """ + def __init__(self, indent = '', - width = 0): + width = 0, + convert_emoji = False, + shorten_links = False, + link_style = None, + mention_style = None, + hashtag_style = None): super().__init__() self.reset() @@ -12,6 +98,11 @@ def __init__(self, self.convert_charrefs = True self.indent = indent + self.convert_emoji = convert_emoji + self.shorten_links = shorten_links + self.link_style = link_style + self.mention_style = mention_style + self.hashtag_style = hashtag_style if width > 0: self.wrap = TextWrapper() @@ -21,38 +112,157 @@ def __init__(self, else: self.wrap = None + def reset(self): + """Resets the parser so a new toot can be parsed.""" super().reset() self.fed = [] self.lines = [] + self.links = [] self.cur_type = None + self.hide = False + self.ellipsis = False + def pop_line(self): + """Take the current text scratchpad and return it as a + line of text and reset the scratchpad.""" line = ''.join(self.fed) self.fed = [] return line + def handle_data(self, data): + """Processes plain text data. + data - The text to process + """ + if self.hide: + return + + if self.convert_emoji: + data = convert_emoji_shortcodes(data) + self.fed.append(data) + + def parse_link(self, attrs): + """Processes a link tag. + attrs - A list of attributes contained in the link tag. + """ + + # Save the link url + self.links.append(find_attr('href', attrs)) + + if has_class('hashtag', attrs): + self.cur_type = 'hashtag' + if self.hashtag_style != None: + self.fed.append(self.hashtag_style) + elif has_class('mention', attrs): + self.cur_type = 'mention' + if self.mention_style != None: + self.fed.append(self.mention_style) + else: + self.cur_type = 'link' + if self.link_style != None: + self.fed.append(self.link_style) + + + def parse_span(self, attrs): + """Processes a span tag. + attrs - A list of attributes contained in the span tag. + """ + + # Right now we only support spans used to shorten links. + # Mastodon links contain