diff options
Diffstat (limited to 'sleekxmpp/xmlstream/tostring.py')
-rw-r--r-- | sleekxmpp/xmlstream/tostring.py | 83 |
1 files changed, 56 insertions, 27 deletions
diff --git a/sleekxmpp/xmlstream/tostring.py b/sleekxmpp/xmlstream/tostring.py index 2480f9b2..c49abd3e 100644 --- a/sleekxmpp/xmlstream/tostring.py +++ b/sleekxmpp/xmlstream/tostring.py @@ -24,25 +24,25 @@ if sys.version_info < (3, 0): XML_NS = 'http://www.w3.org/XML/1998/namespace' -def tostring(xml=None, xmlns='', stanza_ns='', stream=None, - outbuffer='', top_level=False, open_only=False): +def tostring(xml=None, xmlns='', stream=None, outbuffer='', + top_level=False, open_only=False, namespaces=None): """Serialize an XML object to a Unicode string. - If namespaces are provided using ``xmlns`` or ``stanza_ns``, then - elements that use those namespaces will not include the xmlns attribute - in the output. + If an outer xmlns is provided using ``xmlns``, then the current element's + namespace will not be included if it matches the outer namespace. An + exception is made for elements that have an attached stream, and appear + at the stream root. :param XML xml: The XML object to serialize. :param string xmlns: Optional namespace of an element wrapping the XML object. - :param string stanza_ns: The namespace of the stanza object that contains - the XML object. :param stream: The XML stream that generated the XML object. :param string outbuffer: Optional buffer for storing serializations during recursive calls. :param bool top_level: Indicates that the element is the outermost element. - + :param set namespaces: Track which namespaces are in active use so + that new ones can be declared when needed. :type xml: :py:class:`~xml.etree.ElementTree.Element` :type stream: :class:`~sleekxmpp.xmlstream.xmlstream.XMLStream` @@ -63,15 +63,19 @@ def tostring(xml=None, xmlns='', stanza_ns='', stream=None, default_ns = '' stream_ns = '' + use_cdata = False + if stream: default_ns = stream.default_ns stream_ns = stream.stream_ns + use_cdata = stream.use_cdata # Output the tag name and derived namespace of the element. namespace = '' - if top_level and tag_xmlns not in ['', default_ns, stream_ns] or \ - tag_xmlns not in ['', xmlns, stanza_ns, stream_ns]: - namespace = ' xmlns="%s"' % tag_xmlns + if tag_xmlns: + if top_level and tag_xmlns not in [default_ns, xmlns, stream_ns] \ + or not top_level and tag_xmlns != xmlns: + namespace = ' xmlns="%s"' % tag_xmlns if stream and tag_xmlns in stream.namespace_map: mapped_namespace = stream.namespace_map[tag_xmlns] if mapped_namespace: @@ -80,21 +84,28 @@ def tostring(xml=None, xmlns='', stanza_ns='', stream=None, output.append(namespace) # Output escaped attribute values. + new_namespaces = set() for attrib, value in xml.attrib.items(): - value = xml_escape(value) + value = escape(value, use_cdata) if '}' not in attrib: output.append(' %s="%s"' % (attrib, value)) else: attrib_ns = attrib.split('}')[0][1:] attrib = attrib.split('}')[1] - if stream and attrib_ns in stream.namespace_map: + if attrib_ns == XML_NS: + output.append(' xml:%s="%s"' % (attrib, value)) + elif stream and attrib_ns in stream.namespace_map: mapped_ns = stream.namespace_map[attrib_ns] if mapped_ns: - output.append(' %s:%s="%s"' % (mapped_ns, - attrib, - value)) - elif attrib_ns == XML_NS: - output.append(' xml:%s="%s"' % (attrib, value)) + if namespaces is None: + namespaces = set() + if attrib_ns not in namespaces: + namespaces.add(attrib_ns) + new_namespaces.add(attrib_ns) + output.append(' xmlns:%s="%s"' % ( + mapped_ns, attrib_ns)) + output.append(' %s:%s="%s"' % ( + mapped_ns, attrib, value)) if open_only: # Only output the opening tag, regardless of content. @@ -105,24 +116,30 @@ def tostring(xml=None, xmlns='', stanza_ns='', stream=None, # If there are additional child elements to serialize. output.append(">") if xml.text: - output.append(xml_escape(xml.text)) + output.append(escape(xml.text, use_cdata)) if len(xml): for child in xml: - output.append(tostring(child, tag_xmlns, stanza_ns, stream)) + output.append(tostring(child, tag_xmlns, stream, + namespaces=namespaces)) output.append("</%s>" % tag_name) elif xml.text: # If we only have text content. - output.append(">%s</%s>" % (xml_escape(xml.text), tag_name)) + output.append(">%s</%s>" % (escape(xml.text, use_cdata), tag_name)) else: # Empty element. output.append(" />") if xml.tail: # If there is additional text after the element. - output.append(xml_escape(xml.tail)) + output.append(escape(xml.tail, use_cdata)) + for ns in new_namespaces: + # Remove namespaces introduced in this context. This is necessary + # because the namespaces object continues to be shared with other + # contexts. + namespaces.remove(ns) return ''.join(output) -def xml_escape(text): +def escape(text, use_cdata=False): """Convert special characters in XML to escape sequences. :param string text: The XML text to convert. @@ -132,12 +149,24 @@ def xml_escape(text): if type(text) != types.UnicodeType: text = unicode(text, 'utf-8', 'ignore') - text = list(text) escapes = {'&': '&', '<': '<', '>': '>', "'": ''', '"': '"'} - for i, c in enumerate(text): - text[i] = escapes.get(c, c) - return ''.join(text) + + if not use_cdata: + text = list(text) + for i, c in enumerate(text): + text[i] = escapes.get(c, c) + return ''.join(text) + else: + escape_needed = False + for c in text: + if c in escapes: + escape_needed = True + break + if escape_needed: + escaped = map(lambda x : "<![CDATA[%s]]>" % x, text.split("]]>")) + return "<![CDATA[]]]><![CDATA[]>]]>".join(escaped) + return text |