1 files changed, 140 insertions, 0 deletions
diff --git a/plugins/untrackme.py b/plugins/untrackme.py
new file mode 100644
index 00000000..5256edad
--- /dev/null
+++ b/plugins/untrackme.py
@@ -0,0 +1,140 @@
+"""
+    UntrackMe wannabe plugin
+"""
+
+from typing import Callable, Dict, List, Tuple, Union
+
+import re
+import logging
+from slixmpp import Message
+from poezio import tabs
+from poezio.plugin import BasePlugin
+from urllib.parse import quote as urlquote
+
+
+log = logging.getLogger(__name__)
+
+ChatTabs = Union[
+    tabs.MucTab,
+    tabs.DynamicConversationTab,
+    tabs.StaticConversationTab,
+    tabs.PrivateTab,
+]
+
+RE_URL: re.Pattern = re.compile('https?://(?P<host>[^/]+)(?P<rest>[^ ]*)')
+
+SERVICES: Dict[str, Tuple[str, bool]] = {  # host: (service, proxy)
+    'm.youtube.com': ('invidious', False),
+    'www.youtube.com': ('invidious', False),
+    'youtube.com': ('invidious', False),
+    'youtu.be': ('invidious', False),
+    'youtube-nocookie.com': ('invidious', False),
+    'mobile.twitter.com': ('nitter', False),
+    'www.twitter.com': ('nitter', False),
+    'twitter.com': ('nitter', False),
+    'pic.twitter.com': ('nitter_img', True),
+    'pbs.twimg.com': ('nitter_img', True),
+    'instagram.com': ('bibliogram', False),
+    'www.instagram.com': ('bibliogram', False),
+    'm.instagram.com': ('bibliogram', False),
+}
+
+def proxy(service: str) -> Callable[[str], str]:
+    """Some services require the original url"""
+    def inner(origin: str) -> str:
+        return service + urlquote(origin)
+    return inner
+
+
+class Plugin(BasePlugin):
+    """UntrackMe"""
+
+    default_config: Dict[str, str] = {
+        'default': {
+            'cleanup': True,
+            'redirect': True,
+            'display_corrections': True,
+        },
+        'services': {
+            'invidious': 'https://invidio.us',
+            'nitter': 'https://nitter.net',
+            'bibliogram': 'https://bibliogram.art',
+        },
+    }
+
+    def init(self):
+        nitter_img = self.config.get('nitter', section='services') + '/pic/'
+        self.config.set('nitter_img', nitter_img, section='services')
+
+        self.api.add_event_handler('muc_say', self.handle_msg)
+        self.api.add_event_handler('conversation_say', self.handle_msg)
+        self.api.add_event_handler('private_say', self.handle_msg)
+
+        self.api.add_event_handler('muc_msg', self.handle_msg)
+        self.api.add_event_handler('conversation_msg', self.handle_msg)
+        self.api.add_event_handler('private_msg', self.handle_msg)
+
+    def map_services(self, match: re.Match) -> str:
+        """
+            If it matches a host that we know about, change the domain for the
+            alternative service. Some hosts needs to be proxied instead (such
+            as twitter pictures), so they're url encoded and appended to the
+            proxy service.
+        """
+
+        host = match.group('host')
+
+        dest = SERVICES.get(host)
+        if dest is None:
+            return match.group(0)
+
+        destname, proxy = dest
+        replaced = self.config.get(destname, section='services')
+        result = replaced + match.group('rest')
+
+        if proxy:
+            url = urlquote(match.group(0))
+            result = replaced + url
+
+            # TODO: count parenthesis?
+            # Removes comma at the end of a link.
+            if result[-3] == '%2C':
+                result = result[:-3] + ','
+
+        return result
+
+    def handle_msg(self, msg: Message, tab: ChatTabs) -> None:
+        orig = msg['body']
+
+        if self.config.get('cleanup', section='default'):
+            msg['body'] = self.cleanup_url(msg['body'])
+        if self.config.get('redirect', section='default'):
+            msg['body'] = self.redirect_url(msg['body'])
+
+        log.debug(
+            'UntrackMe in tab \'%s\':\nOriginal: %s\nModified: %s',
+            tab.name, orig, msg['body'],
+        )
+
+        if self.config.get('display_corrections', section='default') and \
+           msg['body'] != orig:
+            self.api.information(
+                'UntrackMe in tab \'{}\':\nOriginal: {}\nModified: {}'.format(
+                    tab.name, orig, msg['body']
+                ),
+                'Info',
+            )
+
+    def cleanup_url(self, txt: str) -> str:
+        # fbclid: used globally (Facebook)
+        # utm_*: used globally https://en.wikipedia.org/wiki/UTM_parameters
+        # ncid: DoubleClick (Google)
+        # ref_src, ref_url: twitter
+        # Others exist but are excluded because they are not common.
+        # See https://en.wikipedia.org/wiki/UTM_parameters
+        return re.sub('(https?://[^ ]+)&?(fbclid|dclid|ncid|utm_source|utm_medium|utm_campaign|utm_term|utm_content|ref_src|ref_url)=[^ &#]*',
+                             r'\1',
+                             txt)
+
+    def redirect_url(self, txt: str) -> str:
+        return RE_URL.sub(self.map_services, txt)