diff options
author | mathieui <mathieui@mathieui.net> | 2013-04-18 19:54:13 +0200 |
---|---|---|
committer | mathieui <mathieui@mathieui.net> | 2013-04-18 19:54:13 +0200 |
commit | 93cdac01ae103d544dcf8dc8924a647531b37ffc (patch) | |
tree | 60b4cc45b92d642c1e623bfab6c19b0a7b9ae56b /src | |
parent | 9fd15cc2a25b31dad3e70422fc16335f8b143525 (diff) | |
download | poezio-93cdac01ae103d544dcf8dc8924a647531b37ffc.tar.gz poezio-93cdac01ae103d544dcf8dc8924a647531b37ffc.tar.bz2 poezio-93cdac01ae103d544dcf8dc8924a647531b37ffc.tar.xz poezio-93cdac01ae103d544dcf8dc8924a647531b37ffc.zip |
Use ''.join(str) instead of str += …
Should improve speed on xhtml parsing, but be noticeable only
with *heavy* (e.g. 1000chars with the rainbow plugin) xhtml stanzas.
Diffstat (limited to 'src')
-rw-r--r-- | src/xhtml.py | 44 |
1 files changed, 23 insertions, 21 deletions
diff --git a/src/xhtml.py b/src/xhtml.py index cd68b14a..23de8a32 100644 --- a/src/xhtml.py +++ b/src/xhtml.py @@ -286,32 +286,34 @@ def xhtml_to_poezio_colors(xml): def trim(string): return re.sub(whitespace_re, ' ', string) - message = '' + builder = [] + if version_info[1] == 2: elems = xml.iter() else: elems = xml.getiterator() + for elem in elems: if elem.tag == '{http://www.w3.org/1999/xhtml}a': if 'href' in elem.attrib and elem.attrib['href'] != elem.text: - message += '\x19u%s\x19o (%s)' % (trim(elem.attrib['href']), trim(elem.text if elem.text else "")) + builder.append('\x19u%s\x19o (%s)' % (trim(elem.attrib['href']), trim(elem.text if elem.text else ""))) else: - message += '\x19u' + (elem.text if elem.text else "") + '\x19o' + builder.append('\x19u' + (elem.text if elem.text else "") + '\x19o') elif elem.tag == '{http://www.w3.org/1999/xhtml}blockquote': - message += '“' + builder.append('“') elif elem.tag == '{http://www.w3.org/1999/xhtml}body': pass elif elem.tag == '{http://www.w3.org/1999/xhtml}br': - message += '\n' + builder.append('\n') elif elem.tag == '{http://www.w3.org/1999/xhtml}cite': - message += '\x19u' + builder.append('\x19u') elif elem.tag == '{http://www.w3.org/1999/xhtml}em': - message += '\x19i' + builder.append('\x19i') elif elem.tag == '{http://www.w3.org/1999/xhtml}img' and 'src' in elem.attrib: if 'alt' in elem.attrib: - message += '%s (%s)' % (trim(elem.attrib['src']), trim(elem.attrib['alt'])) + builder.append('%s (%s)' % (trim(elem.attrib['src']), trim(elem.attrib['alt']))) else: - message += elem.attrib['src'] + builder.append(elem.attrib['src']) elif elem.tag == '{http://www.w3.org/1999/xhtml}li': pass elif elem.tag == '{http://www.w3.org/1999/xhtml}ol': @@ -321,42 +323,42 @@ def xhtml_to_poezio_colors(xml): elif elem.tag == '{http://www.w3.org/1999/xhtml}span': pass elif elem.tag == '{http://www.w3.org/1999/xhtml}strong': - message += '\x19b' + builder.append('\x19b') elif elem.tag == '{http://www.w3.org/1999/xhtml}ul': pass if ('style' in elem.attrib and elem.tag != '{http://www.w3.org/1999/xhtml}br' and elem.tag != '{http://www.w3.org/1999/xhtml}em' and elem.tag != '{http://www.w3.org/1999/xhtml}strong'): - message += parse_css(elem.attrib['style']) + builder.append(parse_css(elem.attrib['style'])) if (elem.text and elem.tag != '{http://www.w3.org/1999/xhtml}a' and elem.tag != '{http://www.w3.org/1999/xhtml}br' and elem.tag != '{http://www.w3.org/1999/xhtml}img'): - message += trim(elem.text) + builder.append(trim(elem.text)) if ('style' in elem.attrib and elem.tag != '{http://www.w3.org/1999/xhtml}br' and elem.tag != '{http://www.w3.org/1999/xhtml}em' and elem.tag != '{http://www.w3.org/1999/xhtml}strong'): - message += '\x19o' + builder.append('\x19o') if elem.tag == '{http://www.w3.org/1999/xhtml}blockquote': - message += '”' + builder.append('”') elif elem.tag == '{http://www.w3.org/1999/xhtml}cite': - message += '\x19o' + builder.append('\x19o') elif elem.tag == '{http://www.w3.org/1999/xhtml}em': - message += '\x19o' + builder.append('\x19o') elif elem.tag == '{http://www.w3.org/1999/xhtml}strong' or elem.tag == '{http://www.w3.org/1999/xhtml}b': - message += '\x19o' + builder.append('\x19o') elif elem.tag == '{http://www.w3.org/1999/xhtml}u': - message += '\x19o' + builder.append('\x19o') if 'title' in elem.attrib: - message += ' [' + elem.attrib['title'] + ']' + builder.append(' [' + elem.attrib['title'] + ']') if elem.tail: - message += trim(elem.tail) - return message + builder.append(trim(elem.tail)) + return ''.join(builder) def clean_text(s): """ |