diff options
Diffstat (limited to 'sleekxmpp/xmlstream/tostring')
-rw-r--r-- | sleekxmpp/xmlstream/tostring/__init__.py | 75 | ||||
-rw-r--r-- | sleekxmpp/xmlstream/tostring/tostring.py | 95 | ||||
-rw-r--r-- | sleekxmpp/xmlstream/tostring/tostring26.py | 104 |
3 files changed, 216 insertions, 58 deletions
diff --git a/sleekxmpp/xmlstream/tostring/__init__.py b/sleekxmpp/xmlstream/tostring/__init__.py index 6603cbb8..5852cba2 100644 --- a/sleekxmpp/xmlstream/tostring/__init__.py +++ b/sleekxmpp/xmlstream/tostring/__init__.py @@ -1,60 +1,19 @@ +""" + SleekXMPP: The Sleek XMPP Library + Copyright (C) 2010 Nathanael C. Fritz + This file is part of SleekXMPP. -class ToString(object): - def __str__(self, xml=None, xmlns='', stringbuffer=''): - if xml is None: - xml = self.xml - newoutput = [stringbuffer] - #TODO respect ET mapped namespaces - itag = xml.tag.split('}', 1)[-1] - if '}' in xml.tag: - ixmlns = xml.tag.split('}', 1)[0][1:] - else: - ixmlns = '' - nsbuffer = '' - if xmlns != ixmlns and ixmlns != '' and ixmlns != self.namespace: - if self.stream is not None and ixmlns in self.stream.namespace_map: - if self.stream.namespace_map[ixmlns] != '': - itag = "%s:%s" % (self.stream.namespace_map[ixmlns], itag) - else: - nsbuffer = """ xmlns="%s\"""" % ixmlns - if ixmlns not in ('', xmlns, self.namespace): - nsbuffer = """ xmlns="%s\"""" % ixmlns - newoutput.append("<%s" % itag) - newoutput.append(nsbuffer) - for attrib in xml.attrib: - if '{' not in attrib: - newoutput.append(""" %s="%s\"""" % (attrib, self.xmlesc(xml.attrib[attrib]))) - if len(xml) or xml.text or xml.tail: - newoutput.append(">") - if xml.text: - newoutput.append(self.xmlesc(xml.text)) - if len(xml): - for child in xml.getchildren(): - newoutput.append(self.__str__(child, ixmlns)) - newoutput.append("</%s>" % (itag, )) - if xml.tail: - newoutput.append(self.xmlesc(xml.tail)) - elif xml.text: - newoutput.append(">%s</%s>" % (self.xmlesc(xml.text), itag)) - else: - newoutput.append(" />") - return ''.join(newoutput) + See the file LICENSE for copying permission. +""" - def xmlesc(self, text): - text = list(text) - cc = 0 - matches = ('&', '<', '"', '>', "'") - for c in text: - if c in matches: - if c == '&': - text[cc] = '&' - elif c == '<': - text[cc] = '<' - elif c == '>': - text[cc] = '>' - elif c == "'": - text[cc] = ''' - else: - text[cc] = '"' - cc += 1 - return ''.join(text) +import sys + +# Import the correct tostring and xml_escape functions based on the Python +# version in order to properly handle Unicode. + +if sys.version_info < (3, 0): + from sleekxmpp.xmlstream.tostring.tostring26 import tostring, xml_escape +else: + from sleekxmpp.xmlstream.tostring.tostring import tostring, xml_escape + +__all__ = ['tostring', 'xml_escape'] diff --git a/sleekxmpp/xmlstream/tostring/tostring.py b/sleekxmpp/xmlstream/tostring/tostring.py new file mode 100644 index 00000000..c2696321 --- /dev/null +++ b/sleekxmpp/xmlstream/tostring/tostring.py @@ -0,0 +1,95 @@ +""" + SleekXMPP: The Sleek XMPP Library + Copyright (C) 2010 Nathanael C. Fritz + This file is part of SleekXMPP. + + See the file LICENSE for copying permission. +""" + + +def tostring(xml=None, xmlns='', stanza_ns='', stream=None, outbuffer=''): + """ + Serialize an XML object to a Unicode string. + + If namespaces are provided using xmlns or stanza_ns, then elements + that use those namespaces will not include the xmlns attribute in + the output. + + Arguments: + xml -- The XML object to serialize. If the value is None, + then the XML object contained in this stanza + object will be used. + xmlns -- Optional namespace of an element wrapping the XML + object. + stanza_ns -- The namespace of the stanza object that contains + the XML object. + stream -- The XML stream that generated the XML object. + outbuffer -- Optional buffer for storing serializations during + recursive calls. + """ + # Add previous results to the start of the output. + output = [outbuffer] + + # Extract the element's tag name. + tag_name = xml.tag.split('}', 1)[-1] + + # Extract the element's namespace if it is defined. + if '}' in xml.tag: + tag_xmlns = xml.tag.split('}', 1)[0][1:] + else: + tag_xmlns = '' + + # Output the tag name and derived namespace of the element. + namespace = '' + if tag_xmlns not in ['', xmlns, stanza_ns]: + namespace = ' xmlns="%s"' % tag_xmlns + if stream and tag_xmlns in stream.namespace_map: + mapped_namespace = stream.namespace_map[tag_xmlns] + if mapped_namespace: + tag = "%s:%s" % (mapped_namespace, tag_name) + output.append("<%s" % tag_name) + output.append(namespace) + + # Output escaped attribute values. + for attrib, value in xml.attrib.items(): + if '{' not in attrib: + value = xml_escape(value) + output.append(' %s="%s"' % (attrib, value)) + + if len(xml) or xml.text: + # If there are additional child elements to serialize. + output.append(">") + if xml.text: + output.append(xml_escape(xml.text)) + if len(xml): + for child in xml.getchildren(): + output.append(tostring(child, tag_xmlns, stanza_ns, stream)) + output.append("</%s>" % tag_name) + elif xml.text: + # If we only have text content. + output.append(">%s</%s>" % (xml_escape(xml.text), tag_name)) + else: + # Empty element. + output.append(" />") + if xml.tail: + # If there is additional text after the element. + output.append(xml_escape(xml.tail)) + return ''.join(output) + + +def xml_escape(text): + """ + Convert special characters in XML to escape sequences. + + Arguments: + text -- The XML text to convert. + """ + text = list(text) + escapes = {'&': '&', + '<': '<', + '>': '>', + "'": ''', + '"': '"'} + for i, c in enumerate(text): + text[i] = escapes.get(c, c) + return ''.join(text) diff --git a/sleekxmpp/xmlstream/tostring/tostring26.py b/sleekxmpp/xmlstream/tostring/tostring26.py new file mode 100644 index 00000000..7a376374 --- /dev/null +++ b/sleekxmpp/xmlstream/tostring/tostring26.py @@ -0,0 +1,104 @@ +""" + SleekXMPP: The Sleek XMPP Library + Copyright (C) 2010 Nathanael C. Fritz + This file is part of SleekXMPP. + + See the file LICENSE for copying permission. +""" + +from __future__ import unicode_literals +import types + + +def tostring(xml=None, xmlns='', stanza_ns='', stream=None, outbuffer=''): + """ + Serialize an XML object to a Unicode string. + + If namespaces are provided using xmlns or stanza_ns, then elements + that use those namespaces will not include the xmlns attribute in + the output. + + Arguments: + xml -- The XML object to serialize. If the value is None, + then the XML object contained in this stanza + object will be used. + xmlns -- Optional namespace of an element wrapping the XML + object. + stanza_ns -- The namespace of the stanza object that contains + the XML object. + stream -- The XML stream that generated the XML object. + outbuffer -- Optional buffer for storing serializations during + recursive calls. + """ + # Add previous results to the start of the output. + output = [outbuffer] + + # Extract the element's tag name. + tag_name = xml.tag.split('}', 1)[-1] + + # Extract the element's namespace if it is defined. + if '}' in xml.tag: + tag_xmlns = xml.tag.split('}', 1)[0][1:] + else: + tag_xmlns = u'' + + # Output the tag name and derived namespace of the element. + namespace = u'' + if tag_xmlns not in ['', xmlns, stanza_ns]: + namespace = u' xmlns="%s"' % tag_xmlns + if stream and tag_xmlns in stream.namespace_map: + mapped_namespace = stream.namespace_map[tag_xmlns] + if mapped_namespace: + tag = u"%s:%s" % (mapped_namespace, tag_name) + output.append(u"<%s" % tag_name) + output.append(namespace) + + # Output escaped attribute values. + for attrib, value in xml.attrib.items(): + if '{' not in attrib: + value = xml_escape(value) + output.append(u' %s="%s"' % (attrib, value)) + + if len(xml) or xml.text: + # If there are additional child elements to serialize. + output.append(u">") + if xml.text: + output.append(xml_escape(xml.text)) + if len(xml): + for child in xml.getchildren(): + output.append(tostring(child, tag_xmlns, stanza_ns, stream)) + output.append(u"</%s>" % tag_name) + if xml.tail: + # If there is additional text after the element. + output.append(xml_escape(xml.tail)) + elif xml.text: + # If we only have text content. + output.append(u">%s</%s>" % (xml_escape(xml.text), tag_name)) + else: + # Empty element. + output.append(u" />") + if xml.tail: + # If there is additional text after the element. + output.append(xml_escape(xml.tail)) + return u''.join(output) + + +def xml_escape(text): + """ + Convert special characters in XML to escape sequences. + + Arguments: + text -- The XML text to convert. + """ + if type(text) != types.UnicodeType: + text = list(unicode(text, 'utf-8', 'ignore')) + else: + text = list(text) + escapes = {u'&': u'&', + u'<': u'<', + u'>': u'>', + u"'": u''', + u'"': u'"'} + for i, c in enumerate(text): + text[i] = escapes.get(c, c) + return u''.join(text) |