summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormathieui <mathieui@mathieui.net>2014-10-16 18:49:32 +0200
committermathieui <mathieui@mathieui.net>2014-10-16 18:56:12 +0200
commita9f642f7438fe4489cdb9cc5ac59c929054656c8 (patch)
tree5ce6cba2105f178f92235fa325725941a5c9924c
parentd4590949f7b691e3e1d6eff8fa339e62a44bae51 (diff)
downloadpoezio-a9f642f7438fe4489cdb9cc5ac59c929054656c8.tar.gz
poezio-a9f642f7438fe4489cdb9cc5ac59c929054656c8.tar.bz2
poezio-a9f642f7438fe4489cdb9cc5ac59c929054656c8.tar.xz
poezio-a9f642f7438fe4489cdb9cc5ac59c929054656c8.zip
Extract XHTML-IM inline imags by default
- Add two new options: tmp_image_dir and extract_inline_images - tmp_image_dir is $XDG_CACHE_HOME(usually ~/.cache)/poezio/images if unset - Name the images from a SHA-1 of their data and their mimetype - Output file:// links inside the message
-rw-r--r--data/default_config.cfg8
-rw-r--r--doc/source/configuration.rst19
-rw-r--r--src/config.py21
-rw-r--r--src/core/handlers.py29
-rw-r--r--src/poezio.py1
-rw-r--r--src/xhtml.py40
6 files changed, 104 insertions, 14 deletions
diff --git a/data/default_config.cfg b/data/default_config.cfg
index c1f766b0..35bc498b 100644
--- a/data/default_config.cfg
+++ b/data/default_config.cfg
@@ -379,6 +379,14 @@ ack_message_receipts = true
# Ask for message delivery receipts (XEP-0184)
request_message_receipts = true
+# Extract base64 images received in XHTML-IM messages
+# if true.
+extract_inline_images = true
+
+# The directory where the images will be saved; if unset,
+# defaults to $XDG_CACHE_HOME/poezio/images.
+tmp_image_dir =
+
# Receive the tune notifications or not (in order to display informations
# in the roster).
# If this is set to false, then the display_tune_notifications
diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst
index 32d82f7a..44fd8e11 100644
--- a/doc/source/configuration.rst
+++ b/doc/source/configuration.rst
@@ -851,6 +851,25 @@ Other
The lang some automated entities will use when replying to you.
+ extract_inline_images
+
+ **Default value:** ``true``
+
+ Some clients send inline images in base64 inside some messages, which results in
+ an useless wall of text. If this option is ``true``, then that base64 text will
+ be replaced with a :file:`file://` link to the image file extracted in
+ :term:`tmp_image_dir` or :file:`$XDG_CACHE_HOME/poezio/images` by default, which
+ is usually :file:`~/.cache/poezio/images`
+
+ tmp_image_dir
+
+ **Default value:** ``[empty]``
+
+ The directory where poezio will save the images received, if
+ :term:`extract_inline_images` is set to true. If unset, poezio
+ will default to :file:`$XDG_CACHE_HOME/poezio/images` which is
+ usually :file:`~/.cache/poezio/images`.
+
muc_history_length
**Default value:** ``50``
diff --git a/src/config.py b/src/config.py
index 354c3447..5bd1ac17 100644
--- a/src/config.py
+++ b/src/config.py
@@ -361,7 +361,6 @@ def file_ok(filepath):
def check_create_config_dir():
"""
create the configuration directory if it doesn't exist
- and copy the default config in it
"""
CONFIG_HOME = environ.get("XDG_CONFIG_HOME")
if not CONFIG_HOME:
@@ -374,6 +373,23 @@ def check_create_config_dir():
pass
return CONFIG_PATH
+def check_create_cache_dir():
+ """
+ create the cache directory if it doesn't exist
+ also create the subdirectories
+ """
+ global CACHE_DIR
+ CACHE_HOME = environ.get("XDG_CACHE_HOME")
+ if not CACHE_HOME:
+ CACHE_HOME = path.join(environ.get('HOME'), '.cache')
+ CACHE_DIR = path.join(CACHE_HOME, 'poezio')
+
+ try:
+ makedirs(CACHE_DIR)
+ makedirs(path.join(CACHE_DIR, 'images'))
+ except OSError:
+ pass
+
def run_cmdline_args(CONFIG_PATH):
"Parse the command line arguments"
global options
@@ -495,3 +511,6 @@ safeJID = None
# the global log dir
LOG_DIR = ''
+
+# the global cache dir
+CACHE_DIR = ''
diff --git a/src/core/handlers.py b/src/core/handlers.py
index 75c372bb..87aaecd5 100644
--- a/src/core/handlers.py
+++ b/src/core/handlers.py
@@ -10,6 +10,7 @@ import ssl
import time
from hashlib import sha1, sha512
from gettext import gettext as _
+from os import path
from sleekxmpp import InvalidJID
from sleekxmpp.stanza import Message
@@ -24,7 +25,7 @@ import windows
import xhtml
import multiuserchat as muc
from common import safeJID
-from config import config
+from config import config, CACHE_DIR
from contact import Resource
from logger import logger
from roster import roster
@@ -178,7 +179,11 @@ def on_normal_message(self, message):
return self.information('%s says: %s' % (message['from'], message['body']), 'Headline')
use_xhtml = config.get('enable_xhtml_im', True)
- body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
+ tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
+ extract_images = config.get('extract_inline_images', True)
+ body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
+ tmp_dir=tmp_dir,
+ extract_images=extract_images)
if not body:
return
@@ -223,7 +228,9 @@ def on_normal_message(self, message):
self.events.trigger('conversation_msg', message, conversation)
if not message['body']:
return
- body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
+ body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
+ tmp_dir=tmp_dir,
+ extract_images=extract_images)
delayed, date = common.find_delayed_tag(message)
def try_modify():
@@ -441,7 +448,11 @@ def on_groupchat_message(self, message):
self.events.trigger('muc_msg', message, tab)
use_xhtml = config.get('enable_xhtml_im', True)
- body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
+ tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
+ extract_images = config.get('extract_inline_images', True)
+ body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
+ tmp_dir=tmp_dir,
+ extract_images=extract_images)
if not body:
return
@@ -498,7 +509,11 @@ def on_groupchat_private_message(self, message):
room_from = jid.bare
use_xhtml = config.get('enable_xhtml_im', True)
- body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
+ tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
+ extract_images = config.get('extract_inline_images', True)
+ body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
+ tmp_dir=tmp_dir,
+ extract_images=extract_images)
tab = self.get_tab_by_name(jid.full, tabs.PrivateTab) # get the tab with the private conversation
ignore = config.get_by_tabname('ignore_private', False, room_from)
if not tab: # It's the first message we receive: create the tab
@@ -511,7 +526,9 @@ def on_groupchat_private_message(self, message):
self.xmpp.send_message(mto=jid.full, mbody=msg, mtype='chat')
return
self.events.trigger('private_msg', message, tab)
- body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
+ body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
+ tmp_dir=tmp_dir,
+ extract_images=extract_images)
if not body or not tab:
return
replaced_id = message['replace']['id']
diff --git a/src/poezio.py b/src/poezio.py
index 1baf10eb..f82f103f 100644
--- a/src/poezio.py
+++ b/src/poezio.py
@@ -30,6 +30,7 @@ def main():
config.run_cmdline_args(config_path)
config.create_global_config()
config.check_create_log_dir()
+ config.check_create_cache_dir()
config.setup_logging()
config.post_logging_setup()
diff --git a/src/xhtml.py b/src/xhtml.py
index 48664311..69519f8d 100644
--- a/src/xhtml.py
+++ b/src/xhtml.py
@@ -12,9 +12,13 @@ xhtml code to shell colors,
poezio colors to xhtml code
"""
-import re
+import base64
import curses
+import hashlib
+import re
+from os import path
from sleekxmpp.xmlstream import ET
+from urllib.parse import unquote
from io import BytesIO
from xml import sax
@@ -178,10 +182,12 @@ colors = {
whitespace_re = re.compile(r'\s+')
xhtml_attr_re = re.compile(r'\x19-?\d[^}]*}|\x19[buaio]')
+xhtml_data_re = re.compile(r'data:image/([a-z]+);base64,(.+)')
xhtml_simple_attr_re = re.compile(r'\x19\d')
-def get_body_from_message_stanza(message, use_xhtml=False):
+def get_body_from_message_stanza(message, use_xhtml=False,
+ tmp_dir=None, extract_images=False):
"""
Returns a string with xhtml markups converted to
poezio colors if there's an xhtml_im element, or
@@ -191,7 +197,8 @@ def get_body_from_message_stanza(message, use_xhtml=False):
xhtml = message['html'].xml
xhtml_body = xhtml.find('{http://www.w3.org/1999/xhtml}body')
if xhtml_body:
- content = xhtml_to_poezio_colors(xhtml_body)
+ content = xhtml_to_poezio_colors(xhtml_body, tmp_dir=tmp_dir,
+ extract_images=extract_images)
content = content if content else message['body']
return content or " "
return message['body']
@@ -281,7 +288,7 @@ def trim(string):
return re.sub(whitespace_re, ' ', string)
class XHTMLHandler(sax.ContentHandler):
- def __init__(self, force_ns=False):
+ def __init__(self, force_ns=False, tmp_dir=None, extract_images=False):
self.builder = []
self.formatting = []
self.attrs = []
@@ -291,6 +298,9 @@ class XHTMLHandler(sax.ContentHandler):
# do not care about xhtml-in namespace
self.force_ns = force_ns
+ self.tmp_dir = tmp_dir
+ self.extract_images = extract_images
+
@property
def result(self):
return ''.join(self.builder).strip()
@@ -331,7 +341,22 @@ class XHTMLHandler(sax.ContentHandler):
elif name == 'em':
self.append_formatting('\x19i')
elif name == 'img':
- builder.append(trim(attrs['src']))
+ if re.match(xhtml_data_re, attrs['src']) and self.extract_images:
+ type_, data = [i for i in re.split(xhtml_data_re, attrs['src']) if i]
+ bin_data = base64.b64decode(unquote(data))
+ filename = hashlib.sha1(bin_data).hexdigest() + '.' + type_
+ filepath = path.join(self.tmp_dir, filename)
+ if not path.exists(filepath):
+ try:
+ with open(filepath, 'wb') as fd:
+ fd.write(bin_data)
+ builder.append('file://%s' % filepath)
+ except Exception as e:
+ builder.append('[Error while saving image: %s]' % e)
+ else:
+ builder.append('file://%s' % filepath)
+ else:
+ builder.append(trim(attrs['src']))
if 'alt' in attrs:
builder.append(' (%s)' % trim(attrs['alt']))
elif name == 'ul':
@@ -389,13 +414,14 @@ class XHTMLHandler(sax.ContentHandler):
if 'title' in attrs:
builder.append(' [' + attrs['title'] + ']')
-def xhtml_to_poezio_colors(xml, force=False):
+def xhtml_to_poezio_colors(xml, force=False, tmp_dir=None, extract_images=None):
if isinstance(xml, str):
xml = xml.encode('utf8')
elif not isinstance(xml, bytes):
xml = ET.tostring(xml)
- handler = XHTMLHandler(force_ns=force)
+ handler = XHTMLHandler(force_ns=force, tmp_dir=tmp_dir,
+ extract_images=extract_images)
parser = sax.make_parser()
parser.setFeature(sax.handler.feature_namespaces, True)
parser.setContentHandler(handler)