summaryrefslogtreecommitdiff
path: root/src/xhtml.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/xhtml.py')
-rw-r--r--src/xhtml.py543
1 files changed, 0 insertions, 543 deletions
diff --git a/src/xhtml.py b/src/xhtml.py
deleted file mode 100644
index b84ce943..00000000
--- a/src/xhtml.py
+++ /dev/null
@@ -1,543 +0,0 @@
-# Copyright 2010-2011 Florent Le Coz <louiz@louiz.org>
-#
-# This file is part of Poezio.
-#
-# Poezio is free software: you can redistribute it and/or modify
-# it under the terms of the zlib license. See the COPYING file.
-
-"""
-Various methods to convert
-shell colors to poezio colors,
-xhtml code to shell colors,
-poezio colors to xhtml code
-"""
-
-import base64
-import curses
-import hashlib
-import re
-from os import path
-from slixmpp.xmlstream import ET
-from urllib.parse import unquote
-
-from io import BytesIO
-from xml import sax
-from xml.sax import saxutils
-
-digits = '0123456789' # never trust the modules
-
-XHTML_NS = 'http://www.w3.org/1999/xhtml'
-
-# HTML named colors
-colors = {
- 'aliceblue': 231,
- 'antiquewhite': 231,
- 'aqua': 51,
- 'aquamarine': 122,
- 'azure': 231,
- 'beige': 231,
- 'bisque': 230,
- 'black': 232,
- 'blanchedalmond': 230,
- 'blue': 21,
- 'blueviolet': 135,
- 'brown': 124,
- 'burlywood': 223,
- 'cadetblue': 109,
- 'chartreuse': 118,
- 'chocolate': 172,
- 'coral': 209,
- 'cornflowerblue': 111,
- 'cornsilk': 231,
- 'crimson': 197,
- 'cyan': 51,
- 'darkblue': 19,
- 'darkcyan': 37,
- 'darkgoldenrod': 178,
- 'darkgray': 247,
- 'darkgreen': 28,
- 'darkgrey': 247,
- 'darkkhaki': 186,
- 'darkmagenta': 127,
- 'darkolivegreen': 65,
- 'darkorange': 214,
- 'darkorchid': 134,
- 'darkred': 124,
- 'darksalmon': 216,
- 'darkseagreen': 151,
- 'darkslateblue': 61,
- 'darkslategray': 59,
- 'darkslategrey': 59,
- 'darkturquoise': 44,
- 'darkviolet': 128,
- 'deeppink': 199,
- 'deepskyblue': 45,
- 'dimgray': 241,
- 'dimgrey': 241,
- 'dodgerblue': 39,
- 'firebrick': 160,
- 'floralwhite': 231,
- 'forestgreen': 34,
- 'fuchsia': 201,
- 'gainsboro': 252,
- 'ghostwhite': 231,
- 'gold': 226,
- 'goldenrod': 214,
- 'gray': 244,
- 'green': 34,
- 'greenyellow': 191,
- 'grey': 244,
- 'honeydew': 231,
- 'hotpink': 212,
- 'indianred': 174,
- 'indigo': 55,
- 'ivory': 231,
- 'khaki': 229,
- 'lavender': 231,
- 'lavenderblush': 231,
- 'lawngreen': 118,
- 'lemonchiffon': 230,
- 'lightblue': 195,
- 'lightcoral': 217,
- 'lightcyan': 231,
- 'lightgoldenrodyellow': 230,
- 'lightgray': 251,
- 'lightgreen': 157,
- 'lightgrey': 251,
- 'lightpink': 224,
- 'lightsalmon': 216,
- 'lightseagreen': 43,
- 'lightskyblue': 153,
- 'lightslategray': 109,
- 'lightslategrey': 109,
- 'lightsteelblue': 189,
- 'lightyellow': 231,
- 'lime': 46,
- 'limegreen': 77,
- 'linen': 231,
- 'magenta': 201,
- 'maroon': 124,
- 'mediumaquamarine': 115,
- 'mediumblue': 20,
- 'mediumorchid': 170,
- 'mediumpurple': 141,
- 'mediumseagreen': 78,
- 'mediumslateblue': 105,
- 'mediumspringgreen': 49,
- 'mediumturquoise': 80,
- 'mediumvioletred': 163,
- 'midnightblue': 18,
- 'mintcream': 231,
- 'mistyrose': 231,
- 'moccasin': 230,
- 'navajowhite': 230,
- 'navy': 19,
- 'oldlace': 231,
- 'olive': 142,
- 'olivedrab': 106,
- 'orange': 214,
- 'orangered': 202,
- 'orchid': 213,
- 'palegoldenrod': 229,
- 'palegreen': 157,
- 'paleturquoise': 195,
- 'palevioletred': 211,
- 'papayawhip': 231,
- 'peachpuff': 230,
- 'peru': 179,
- 'pink': 224,
- 'plum': 219,
- 'powderblue': 195,
- 'purple': 127,
- 'red': 196,
- 'rosybrown': 181,
- 'royalblue': 69,
- 'saddlebrown': 130,
- 'salmon': 216,
- 'sandybrown': 216,
- 'seagreen': 72,
- 'seashell': 231,
- 'sienna': 131,
- 'silver': 250,
- 'skyblue': 153,
- 'slateblue': 104,
- 'slategray': 109,
- 'slategrey': 109,
- 'snow': 231,
- 'springgreen': 48,
- 'steelblue': 74,
- 'tan': 187,
- 'teal': 37,
- 'thistle': 225,
- 'tomato': 209,
- 'turquoise': 86,
- 'violet': 219,
- 'wheat': 230,
- 'white': 255,
- 'whitesmoke': 255,
- 'yellow': 226,
- 'yellowgreen': 149
-}
-
-whitespace_re = re.compile(r'\s+')
-
-xhtml_attr_re = re.compile(r'\x19-?\d[^}]*}|\x19[buaio]')
-xhtml_data_re = re.compile(r'data:image/([a-z]+);base64,(.+)')
-poezio_color_double = re.compile(r'(?:\x19\d+}|\x19\d)+(\x19\d|\x19\d+})')
-poezio_format_trim = re.compile(r'(\x19\d+}|\x19\d|\x19[buaio]|\x19o)+\x19o')
-
-xhtml_simple_attr_re = re.compile(r'\x19\d')
-
-def get_body_from_message_stanza(message, use_xhtml=False,
- tmp_dir=None, extract_images=False):
- """
- Returns a string with xhtml markups converted to
- poezio colors if there's an xhtml_im element, or
- the body (without any color) otherwise
- """
- if use_xhtml:
- xhtml = message['html'].xml
- xhtml_body = xhtml.find('{http://www.w3.org/1999/xhtml}body')
- if xhtml_body:
- content = xhtml_to_poezio_colors(xhtml_body, tmp_dir=tmp_dir,
- extract_images=extract_images)
- content = content if content else message['body']
- return content or " "
- return message['body']
-
-def ncurses_color_to_html(color):
- """
- Takes an int between 0 and 256 and returns
- a string of the form #XXXXXX representing an
- html color.
- """
- if color <= 15:
- try:
- (r, g, b) = curses.color_content(color)
- except: # fallback in faulty terminals (e.g. xterm)
- (r, g, b) = curses.color_content(color%8)
- r = r / 1000 * 6 - 0.01
- g = g / 1000 * 6 - 0.01
- b = b / 1000 * 6 - 0.01
- elif color <= 231:
- color = color - 16
- r = color % 6
- color = color / 6
- g = color % 6
- color = color / 6
- b = color % 6
- else:
- color -= 232
- r = g = b = color / 24 * 6
- return '#%02X%02X%02X' % (r*256/6, g*256/6, b*256/6)
-
-def parse_css(css):
- def get_color(value):
- if value[0] == '#':
- value = value[1:]
- length = len(value)
- if length != 3 and length != 6:
- return -1
- value = int(value, 16)
- if length == 6:
- r = int(value >> 16)
- g = int((value >> 8) & 0xff)
- b = int(value & 0xff)
- if r == g == b:
- return 232 + int(r/10.6251)
- div = 42.51
- else:
- r = int(value >> 8)
- g = int((value >> 4) & 0xf)
- b = int(value & 0xf)
- if r == g == b:
- return 232 + int(1.54*r)
- div = 2.51
- return 6*6*int(r/div) + 6*int(g/div) + int(b/div) + 16
- if value in colors:
- return colors[value]
- return -1
- shell = ''
- rules = css.split(';')
- for rule in rules:
- if ':' not in rule:
- continue
- key, value = rule.split(':', 1)
- key = key.strip()
- value = value.strip()
- if key == 'background-color':
- pass#shell += '\x191'
- elif key == 'color':
- color = get_color(value)
- if color != -1:
- shell += '\x19%d}' % color
- elif key == 'font-style':
- shell += '\x19i'
- elif key == 'font-weight':
- shell += '\x19b'
- elif key == 'margin-left':
- shell += ' '
- elif key == 'text-align':
- pass
- elif key == 'text-decoration':
- if value == 'underline':
- shell += '\x19u'
- elif value == 'blink':
- shell += '\x19a'
- return shell
-
-def trim(string):
- return re.sub(whitespace_re, ' ', string)
-
-class XHTMLHandler(sax.ContentHandler):
- def __init__(self, force_ns=False, tmp_dir=None, extract_images=False):
- self.builder = []
- self.formatting = []
- self.attrs = []
- self.list_state = []
- self.is_pre = False
- self.a_start = 0
- # do not care about xhtml-in namespace
- self.force_ns = force_ns
-
- self.tmp_dir = tmp_dir
- self.extract_images = extract_images
-
- @property
- def result(self):
- sanitized = re.sub(poezio_color_double, r'\1', ''.join(self.builder).strip())
- return re.sub(poezio_format_trim, '\x19o', sanitized)
-
- def append_formatting(self, formatting):
- self.formatting.append(formatting)
- self.builder.append(formatting)
-
- def pop_formatting(self):
- self.formatting.pop()
- self.builder.append('\x19o' + ''.join(self.formatting))
-
- def characters(self, characters):
- self.builder.append(characters if self.is_pre else trim(characters))
-
- def startElementNS(self, name, _, attrs):
- if name[0] != XHTML_NS and not self.force_ns:
- return
-
- builder = self.builder
- attrs = {name: value for ((ns, name), value) in attrs.items() if ns is None}
- self.attrs.append(attrs)
-
- if 'style' in attrs:
- style = parse_css(attrs['style'])
- self.append_formatting(style)
-
- name = name[1]
- if name == 'a':
- self.append_formatting('\x19u')
- self.a_start = len(self.builder)
- elif name == 'blockquote':
- builder.append('ā€œ')
- elif name == 'br':
- builder.append('\n')
- elif name == 'cite':
- self.append_formatting('\x19u')
- elif name == 'em':
- self.append_formatting('\x19i')
- elif name == 'img':
- if re.match(xhtml_data_re, attrs['src']) and self.extract_images:
- type_, data = [i for i in re.split(xhtml_data_re, attrs['src']) if i]
- bin_data = base64.b64decode(unquote(data))
- filename = hashlib.sha1(bin_data).hexdigest() + '.' + type_
- filepath = path.join(self.tmp_dir, filename)
- if not path.exists(filepath):
- try:
- with open(filepath, 'wb') as fd:
- fd.write(bin_data)
- builder.append('file://%s' % filepath)
- except Exception as e:
- builder.append('[Error while saving image: %s]' % e)
- else:
- builder.append('file://%s' % filepath)
- else:
- builder.append(trim(attrs['src']))
- if 'alt' in attrs:
- builder.append(' (%s)' % trim(attrs['alt']))
- elif name == 'ul':
- self.list_state.append('ul')
- elif name == 'ol':
- self.list_state.append(1)
- elif name == 'li':
- try:
- state = self.list_state[-1]
- except IndexError:
- state = 'ul'
- if state == 'ul':
- builder.append('\nā€¢ ')
- else:
- builder.append('\n%d) ' % state)
- state += 1
- self.list_state[-1] = state
- elif name == 'p':
- builder.append('\n')
- elif name == 'pre':
- builder.append('\n')
- self.is_pre = True
- elif name == 'strong':
- self.append_formatting('\x19b')
-
- def endElementNS(self, name, _):
- if name[0] != XHTML_NS and not self.force_ns:
- return
-
- builder = self.builder
- attrs = self.attrs.pop()
- name = name[1]
-
- if name == 'a':
- self.pop_formatting()
- # do not display the link twice
- text_elements = filter(lambda x: not x.startswith('\x19'),
- self.builder[self.a_start:])
- link_text = ''.join(text_elements).strip()
- if 'href' in attrs and attrs['href'] != link_text:
- builder.append(' (%s)' % trim(attrs['href']))
- elif name == 'blockquote':
- builder.append('ā€')
- elif name in ('cite', 'em', 'strong'):
- self.pop_formatting()
- elif name in ('ol', 'p', 'ul'):
- builder.append('\n')
- elif name == 'pre':
- builder.append('\n')
- self.is_pre = False
-
- if 'style' in attrs:
- self.pop_formatting()
-
- if 'title' in attrs:
- builder.append(' [' + attrs['title'] + ']')
-
-def xhtml_to_poezio_colors(xml, force=False, tmp_dir=None, extract_images=None):
- if isinstance(xml, str):
- xml = xml.encode('utf8')
- elif not isinstance(xml, bytes):
- xml = ET.tostring(xml)
-
- handler = XHTMLHandler(force_ns=force, tmp_dir=tmp_dir,
- extract_images=extract_images)
- parser = sax.make_parser()
- parser.setFeature(sax.handler.feature_namespaces, True)
- parser.setContentHandler(handler)
- parser.parse(BytesIO(xml))
- return handler.result
-
-def clean_text(s):
- """
- Remove all xhtml-im attributes (\x19etc) from the string with the
- complete color format, i.e \x19xxx}
- """
- s = re.sub(xhtml_attr_re, "", s)
- return s
-
-def clean_text_simple(string):
- """
- Remove all \x19 from the string formatted with simple colors:
- \x198
- """
- pos = string.find('\x19')
- while pos != -1:
- string = string[:pos] + string[pos+2:]
- pos = string.find('\x19')
- return string
-
-def convert_simple_to_full_colors(text):
- """
- takes a \x19n formatted string and returns
- a \x19n} formatted one.
- """
- # TODO, have a single list of this. This is some sort of
- # dusplicate from windows.format_chars
- mapping = str.maketrans({'\x0E': '\x19b', '\x0F': '\x19o', '\x10': '\x19u',
- '\x11': '\x191', '\x12': '\x192', '\x13': '\x193',
- '\x14': '\x194', '\x15': '\x195', '\x16': '\x196',
- '\x17': '\x197', '\x18': '\x198', '\x19': '\x199'})
- text = text.translate(mapping)
- def add_curly_bracket(match):
- return match.group(0) + '}'
- return re.sub(xhtml_simple_attr_re, add_curly_bracket, text)
-
-number_to_color_names = {
- 1: 'red',
- 2: 'green',
- 3: 'yellow',
- 4: 'blue',
- 5: 'violet',
- 6: 'turquoise',
- 7: 'white'
-}
-
-def format_inline_css(_dict):
- return ''.join(('%s: %s;' % (key, value) for key, value in _dict.items()))
-
-def poezio_colors_to_html(string):
- """
- Convert poezio colors to html
- (e.g. \x191}: <span style='color: red'>)
- """
- # Maintain a list of the current css attributes used
- # And check if a tag is open (by design, we only open
- # spans tag, and they cannot be nested.
- current_attrs = {}
- tag_open = False
- next_attr_char = string.find('\x19')
- build = ["<body xmlns='http://www.w3.org/1999/xhtml'><p>"]
-
- def check_property(key, value):
- nonlocal tag_open
- if current_attrs.get(key, None) == value:
- return
- current_attrs[key] = value
- if tag_open:
- tag_open = False
- build.append('</span>')
-
- while next_attr_char != -1:
- attr_char = string[next_attr_char+1].lower()
-
- if next_attr_char != 0 and string[:next_attr_char]:
- if current_attrs and not tag_open:
- build.append('<span style="%s">' % format_inline_css(current_attrs))
- tag_open = True
- build.append(saxutils.escape(string[:next_attr_char]))
-
- if attr_char == 'o':
- if tag_open:
- build.append('</span>')
- tag_open = False
- current_attrs = {}
- elif attr_char == 'b':
- check_property('font-weight', 'bold')
- elif attr_char == 'u':
- check_property('text-decoration', 'underline')
-
- if attr_char in digits:
- number_str = string[next_attr_char+1:string.find('}', next_attr_char)]
- number = int(number_str)
- if number in number_to_color_names:
- check_property('color', number_to_color_names.get(number, 'black'))
- else:
- check_property('color', ncurses_color_to_html(number))
- string = string[next_attr_char+len(number_str)+2:]
- else:
- string = string[next_attr_char+2:]
- next_attr_char = string.find('\x19')
-
- if current_attrs and not tag_open and string:
- build.append('<span style="%s">' % format_inline_css(current_attrs))
- tag_open = True
- build.append(saxutils.escape(string))
- if tag_open:
- build.append('</span>')
- build.append("</p></body>")
- text = ''.join(build)
- return text.replace('\n', '<br />')