From a1f69a53d70277d654f1f9a474876f3496efd034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maxime=20=E2=80=9Cpep=E2=80=9D=20Buquet?= Date: Fri, 14 Aug 2020 01:05:25 +0200 Subject: plugins/untrackme: new plugin, based on remove_get_trackers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks Perdu for the original work! Signed-off-by: Maxime “pep” Buquet --- plugins/untrackme.py | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 plugins/untrackme.py diff --git a/plugins/untrackme.py b/plugins/untrackme.py new file mode 100644 index 00000000..841e2d90 --- /dev/null +++ b/plugins/untrackme.py @@ -0,0 +1,133 @@ +""" + UntrackMe wannabe plugin +""" + +from typing import Callable, Dict, List, Tuple, Union + +import re +import logging +from slixmpp import Message +from poezio import tabs +from poezio.plugin import BasePlugin +from urllib.parse import quote as urlquote + + +log = logging.getLogger(__name__) + +ChatTabs = Union[ + tabs.MucTab, + tabs.DynamicConversationTab, + tabs.StaticConversationTab, + tabs.PrivateTab, +] + +RE_URL: re.Pattern = re.compile('https?://(?P[^/]+)(?P[^ ]*)') + +SERVICES: Dict[str, Tuple[str, bool]] = { # host: (service, proxy) + 'm.youtube.com': ('invidious', False), + 'www.youtube.com': ('invidious', False), + 'youtube.com': ('invidious', False), + 'youtu.be': ('invidious', False), + 'youtube-nocookie.com': ('invidious', False), + 'mobile.twitter.com': ('nitter', False), + 'www.twitter.com': ('nitter', False), + 'twitter.com': ('nitter', False), + 'pic.twitter.com': ('nitter_img', True), + 'pbs.twimg.com': ('nitter_img', True), + 'instagram.com': ('bibliogram', False), + 'www.instagram.com': ('bibliogram', False), + 'm.instagram.com': ('bibliogram', False), +} + +def proxy(service: str) -> Callable[[str], str]: + """Some services require the original url""" + def inner(origin: str) -> str: + return service + urlquote(origin) + return inner + + +class Plugin(BasePlugin): + """UntrackMe""" + + default_config: Dict[str, str] = { + 'default': { + 'cleanup': True, + 'redirect': True, + 'display_corrections': True, + }, + 'services': { + 'invidious': 'https://invidio.us', + 'nitter': 'https://nitter.net', + 'bibliogram': 'https://bibliogram.art', + }, + } + + def init(self): + nitter_img = self.config.get('nitter', section='services') + '/pic/' + self.config.set('nitter_img', nitter_img, section='services') + + self.api.add_event_handler('muc_say', self.handle_msg) + self.api.add_event_handler('conversation_say', self.handle_msg) + self.api.add_event_handler('private_say', self.handle_msg) + + self.api.add_event_handler('muc_msg', self.handle_msg) + self.api.add_event_handler('conversation_msg', self.handle_msg) + self.api.add_event_handler('private_msg', self.handle_msg) + + def map_services(self, match: re.Match) -> str: + host = match.group('host') + + dest = SERVICES.get(host) + if dest is None: + return match.group(0) + + destname, proxy = dest + replaced = self.config.get(destname, section='services') + result = replaced + match.group('rest') + + if proxy: + url = urlquote(match.group(0)) + result = replaced + url + + # TODO: count parenthesis? + # Removes comma at the end of a link. + if result[-3] == '%2C': + result = result[:-3] + ',' + + return result + + def handle_msg(self, msg: Message, tab: ChatTabs) -> None: + orig = msg['body'] + + if self.config.get('cleanup', section='default'): + msg['body'] = self.cleanup_url(msg['body']) + if self.config.get('redirect', section='default'): + msg['body'] = self.redirect_url(msg['body']) + + log.debug( + 'UntrackMe in tab \'%s\':\nOriginal: %s\nModified: %s', + tab.name, orig, msg['body'], + ) + + if self.config.get('display_corrections', section='default') and \ + msg['body'] != orig: + self.api.information( + 'UntrackMe in tab \'{}\':\nOriginal: {}\nModified: {}'.format( + tab.name, orig, msg['body'] + ), + 'Info', + ) + + def cleanup_url(self, txt: str) -> str: + # fbclid: used globally (Facebook) + # utm_*: used globally https://en.wikipedia.org/wiki/UTM_parameters + # ncid: DoubleClick (Google) + # ref_src, ref_url: twitter + # Others exist but are excluded because they are not common. + # See https://en.wikipedia.org/wiki/UTM_parameters + return re.sub('(https?://[^ ]+)&?(fbclid|dclid|ncid|utm_source|utm_medium|utm_campaign|utm_term|utm_content|ref_src|ref_url)=[^ &#]*', + r'\1', + txt) + + def redirect_url(self, txt: str) -> str: + return RE_URL.sub(self.map_services, txt) -- cgit v1.2.3