6 files changed, 327 insertions, 108 deletions
diff --git a/sleekxmpp/xmlstream/matcher/base.py b/sleekxmpp/xmlstream/matcher/base.py
index 51da0942..701ab32f 100644
--- a/sleekxmpp/xmlstream/matcher/base.py
+++ b/sleekxmpp/xmlstream/matcher/base.py
@@ -5,10 +5,30 @@
 
     See the file LICENSE for copying permission.
 """
+
+
 class MatcherBase(object):
 
-	def __init__(self, criteria):
-		self._criteria = criteria
-	
-	def match(self, xml):
-		return False
+    """
+    Base class for stanza matchers. Stanza matchers are used to pick
+    stanzas out of the XML stream and pass them to the appropriate
+    stream handlers.
+    """
+
+    def __init__(self, criteria):
+        """
+        Create a new stanza matcher.
+
+        Arguments:
+            criteria -- Object to compare some aspect of a stanza
+                        against.
+        """
+        self._criteria = criteria
+
+    def match(self, xml):
+        """
+        Check if a stanza matches the stored criteria.
+
+        Meant to be overridden.
+        """
+        return False
diff --git a/sleekxmpp/xmlstream/matcher/id.py b/sleekxmpp/xmlstream/matcher/id.py
index 43972c23..0c8ce2d8 100644
--- a/sleekxmpp/xmlstream/matcher/id.py
+++ b/sleekxmpp/xmlstream/matcher/id.py
@@ -5,9 +5,28 @@
 
     See the file LICENSE for copying permission.
 """
-from . import base
 
-class MatcherId(base.MatcherBase):
-	
-	def match(self, xml):
-		return xml['id'] == self._criteria
+from sleekxmpp.xmlstream.matcher.base import MatcherBase
+
+
+class MatcherId(MatcherBase):
+
+    """
+    The ID matcher selects stanzas that have the same stanza 'id'
+    interface value as the desired ID.
+
+    Methods:
+        match -- Overrides MatcherBase.match.
+    """
+
+    def match(self, xml):
+        """
+        Compare the given stanza's 'id' attribute to the stored
+        id value.
+
+        Overrides MatcherBase.match.
+
+        Arguments:
+            xml -- The stanza to compare against.
+        """
+        return xml['id'] == self._criteria
diff --git a/sleekxmpp/xmlstream/matcher/many.py b/sleekxmpp/xmlstream/matcher/many.py
index ff0c4e4d..f470ec9c 100644
--- a/sleekxmpp/xmlstream/matcher/many.py
+++ b/sleekxmpp/xmlstream/matcher/many.py
@@ -5,13 +5,36 @@
 
     See the file LICENSE for copying permission.
 """
-from . import base
-from xml.etree import cElementTree
 
-class MatchMany(base.MatcherBase):
+from sleekxmpp.xmlstream.matcher.base import MatcherBase
 
-	def match(self, xml):
-		for m in self._criteria:
-			if m.match(xml):
-				return True
-		return False
+
+class MatchMany(MatcherBase):
+
+    """
+    The MatchMany matcher may compare a stanza against multiple
+    criteria. It is essentially an OR relation combining multiple
+    matchers.
+
+    Each of the criteria must implement a match() method.
+
+    Methods:
+        match -- Overrides MatcherBase.match.
+    """
+
+    def match(self, xml):
+        """
+        Match a stanza against multiple criteria. The match is successful
+        if one of the criteria matches.
+
+        Each of the criteria must implement a match() method.
+
+        Overrides MatcherBase.match.
+
+        Arguments:
+            xml -- The stanza object to compare against.
+        """
+        for m in self._criteria:
+            if m.match(xml):
+                return True
+        return False
diff --git a/sleekxmpp/xmlstream/matcher/stanzapath.py b/sleekxmpp/xmlstream/matcher/stanzapath.py
index e315445d..f8ff283d 100644
--- a/sleekxmpp/xmlstream/matcher/stanzapath.py
+++ b/sleekxmpp/xmlstream/matcher/stanzapath.py
@@ -5,10 +5,34 @@
 
     See the file LICENSE for copying permission.
 """
-from . import base
-from xml.etree import cElementTree
 
-class StanzaPath(base.MatcherBase):
+from sleekxmpp.xmlstream.matcher.base import MatcherBase
 
-	def match(self, stanza):
-		return stanza.match(self._criteria)
+
+class StanzaPath(MatcherBase):
+
+    """
+    The StanzaPath matcher selects stanzas that match a given "stanza path",
+    which is similar to a normal XPath except that it uses the interfaces and
+    plugins of the stanza instead of the actual, underlying XML.
+
+    In most cases, the stanza path and XPath should be identical, but be
+    aware that differences may occur.
+
+    Methods:
+        match -- Overrides MatcherBase.match.
+    """
+
+    def match(self, stanza):
+        """
+        Compare a stanza against a "stanza path". A stanza path is similar to
+        an XPath expression, but uses the stanza's interfaces and plugins
+        instead of the underlying XML. For most cases, the stanza path and
+        XPath should be identical, but be aware that differences may occur.
+
+        Overrides MatcherBase.match.
+
+        Arguments:
+            stanza -- The stanza object to compare against.
+        """
+        return stanza.match(self._criteria)
diff --git a/sleekxmpp/xmlstream/matcher/xmlmask.py b/sleekxmpp/xmlstream/matcher/xmlmask.py
index 89fd6422..2967a2af 100644
--- a/sleekxmpp/xmlstream/matcher/xmlmask.py
+++ b/sleekxmpp/xmlstream/matcher/xmlmask.py
@@ -5,63 +5,151 @@
 
     See the file LICENSE for copying permission.
 """
-from . import base
-from xml.etree import cElementTree
+
 from xml.parsers.expat import ExpatError
 
-ignore_ns = False
-
-class MatchXMLMask(base.MatcherBase):
-
-	def __init__(self, criteria):
-		base.MatcherBase.__init__(self, criteria)
-		if type(criteria) == type(''):
-			self._criteria = cElementTree.fromstring(self._criteria)
-		self.default_ns = 'jabber:client'
-	
-	def setDefaultNS(self, ns):
-		self.default_ns = ns
-
-	def match(self, xml):
-		if hasattr(xml, 'xml'):
-			xml = xml.xml
-		return self.maskcmp(xml, self._criteria, True)
-	
-	def maskcmp(self, source, maskobj, use_ns=False, default_ns='__no_ns__'):
-		"""maskcmp(xmlobj, maskobj):
-		Compare etree xml object to etree xml object mask"""
-		use_ns = not ignore_ns
-		#TODO require namespaces
-		if source == None: #if element not found (happens during recursive check below)
-			return False
-		if not hasattr(maskobj, 'attrib'): #if the mask is a string, make it an xml obj
-			try:
-				maskobj = cElementTree.fromstring(maskobj)
-			except ExpatError:
-				logging.log(logging.WARNING, "Expat error: %s\nIn parsing: %s" % ('', maskobj))
-		if not use_ns and source.tag.split('}', 1)[-1] != maskobj.tag.split('}', 1)[-1]: # strip off ns and compare
-			return False
-		if use_ns and (source.tag != maskobj.tag and "{%s}%s" % (self.default_ns, maskobj.tag) != source.tag ):
-			return False
-		if maskobj.text and source.text != maskobj.text:
-			return False
-		for attr_name in maskobj.attrib: #compare attributes
-			if source.attrib.get(attr_name, "__None__") != maskobj.attrib[attr_name]:
-				return False
-		#for subelement in maskobj.getiterator()[1:]: #recursively compare subelements
-		for subelement in maskobj: #recursively compare subelements
-			if use_ns:
-				if not self.maskcmp(source.find(subelement.tag), subelement, use_ns):
-					return False
-			else:
-				if not self.maskcmp(self.getChildIgnoreNS(source, subelement.tag), subelement, use_ns):
-					return False
-		return True
-	
-	def getChildIgnoreNS(self, xml, tag):
-		tag = tag.split('}')[-1]
-		try:
-			idx = [c.tag.split('}')[-1] for c in xml.getchildren()].index(tag)
-		except ValueError:
-			return None
-		return xml.getchildren()[idx]
+from sleekxmpp.xmlstream.stanzabase import ET
+from sleekxmpp.xmlstream.matcher.base import MatcherBase
+
+
+# Flag indicating if the builtin XPath matcher should be used, which
+# uses namespaces, or a custom matcher that ignores namespaces.
+# Changing this will affect ALL XMLMask matchers.
+IGNORE_NS = False
+
+
+class MatchXMLMask(MatcherBase):
+
+    """
+    The XMLMask matcher selects stanzas whose XML matches a given
+    XML pattern, or mask. For example, message stanzas with body elements
+    could be matched using the mask:
+
+        <message xmlns="jabber:client"><body /></message>
+
+    Use of XMLMask is discouraged, and XPath or StanzaPath should be used
+    instead.
+
+    The use of namespaces in the mask comparison is controlled by
+    IGNORE_NS. Setting IGNORE_NS to True will disable namespace based matching
+    for ALL XMLMask matchers.
+
+    Methods:
+        match        -- Overrides MatcherBase.match.
+        setDefaultNS -- Set the default namespace for the mask.
+    """
+
+    def __init__(self, criteria):
+        """
+        Create a new XMLMask matcher.
+
+        Arguments:
+            criteria -- Either an XML object or XML string to use as a mask.
+        """
+        MatcherBase.__init__(self, criteria)
+        if isinstance(criteria, str):
+            self._criteria = ET.fromstring(self._criteria)
+        self.default_ns = 'jabber:client'
+
+    def setDefaultNS(self, ns):
+        """
+        Set the default namespace to use during comparisons.
+
+        Arguments:
+            ns -- The new namespace to use as the default.
+        """
+        self.default_ns = ns
+
+    def match(self, xml):
+        """
+        Compare a stanza object or XML object against the stored XML mask.
+
+        Overrides MatcherBase.match.
+
+        Arguments:
+            xml -- The stanza object or XML object to compare against.
+        """
+        if hasattr(xml, 'xml'):
+            xml = xml.xml
+        return self._mask_cmp(xml, self._criteria, True)
+
+    def _mask_cmp(self, source, mask, use_ns=False, default_ns='__no_ns__'):
+        """
+        Compare an XML object against an XML mask.
+
+        Arguments:
+            source     -- The XML object to compare against the mask.
+            mask       -- The XML object serving as the mask.
+            use_ns     -- Indicates if namespaces should be respected during
+                          the comparison.
+            default_ns -- The default namespace to apply to elements that
+                          do not have a specified namespace.
+                          Defaults to "__no_ns__".
+        """
+        use_ns = not IGNORE_NS
+
+        if source is None:
+            # If the element was not found. May happend during recursive calls.
+            return False
+
+        # Convert the mask to an XML object if it is a string.
+        if not hasattr(mask, 'attrib'):
+            try:
+                mask = ET.fromstring(mask)
+            except ExpatError:
+                logging.log(logging.WARNING,
+                            "Expat error: %s\nIn parsing: %s" % ('', mask))
+
+        if not use_ns:
+            # Compare the element without using namespaces.
+            source_tag = source.tag.split('}', 1)[-1]
+            mask_tag = mask.tag.split('}', 1)[-1]
+            if source_tag != mask_tag:
+                return False
+        else:
+            # Compare the element using namespaces
+            mask_ns_tag = "{%s}%s" % (self.default_ns, mask.tag)
+            if source.tag not in [mask.tag, mask_ns_tag]:
+                return False
+
+        # If the mask includes text, compare it.
+        if mask.text and source.text != mask.text:
+            return False
+
+        # Compare attributes. The stanza must include the attributes
+        # defined by the mask, but may include others.
+        for name, value in mask.attrib.items():
+            if source.attrib.get(name, "__None__") != value:
+                return False
+
+        # Recursively check subelements.
+        for subelement in mask:
+            if use_ns:
+                if not self._mask_cmp(source.find(subelement.tag),
+                                      subelement, use_ns):
+                    return False
+            else:
+                if not self._mask_cmp(self._get_child(source, subelement.tag),
+                                      subelement, use_ns):
+                    return False
+
+        # Everything matches.
+        return True
+
+    def _get_child(self, xml, tag):
+        """
+        Return a child element given its tag, ignoring namespace values.
+
+        Returns None if the child was not found.
+
+        Arguments:
+            xml -- The XML object to search for the given child tag.
+            tag -- The name of the subelement to find.
+        """
+        tag = tag.split('}')[-1]
+        try:
+            children = [c.tag.split('}')[-1] for c in xml.getchildren()]
+            index = children.index(tag)
+        except ValueError:
+            return None
+        return xml.getchildren()[index]
diff --git a/sleekxmpp/xmlstream/matcher/xpath.py b/sleekxmpp/xmlstream/matcher/xpath.py
index 7f3d20be..669c9f16 100644
--- a/sleekxmpp/xmlstream/matcher/xpath.py
+++ b/sleekxmpp/xmlstream/matcher/xpath.py
@@ -5,30 +5,75 @@
 
     See the file LICENSE for copying permission.
 """
-from . import base
-from xml.etree import cElementTree
-
-ignore_ns = False
-
-class MatchXPath(base.MatcherBase):
-
-	def match(self, xml):
-		if hasattr(xml, 'xml'):
-			xml = xml.xml
-		x = cElementTree.Element('x')
-		x.append(xml)
-		if not ignore_ns:
-			if x.find(self._criteria) is not None:
-				return True
-			return False
-		else:
-			criteria = [c.split('}')[-1] for c in self._criteria.split('/')]
-			xml = x
-			for tag in criteria:
-				children = [c.tag.split('}')[-1] for c in xml.getchildren()]
-				try:
-					idx = children.index(tag)
-				except ValueError:
-					return False
-				xml = xml.getchildren()[idx]
-			return True
+
+from sleekxmpp.xmlstream.stanzabase import ET
+from sleekxmpp.xmlstream.matcher.base import MatcherBase
+
+
+# Flag indicating if the builtin XPath matcher should be used, which
+# uses namespaces, or a custom matcher that ignores namespaces.
+# Changing this will affect ALL XPath matchers.
+IGNORE_NS = False
+
+
+class MatchXPath(MatcherBase):
+
+    """
+    The XPath matcher selects stanzas whose XML contents matches a given
+    XPath expression.
+
+    Note that using this matcher may not produce expected behavior when using
+    attribute selectors. For Python 2.6 and 3.1, the ElementTree find method
+    does not support the use of attribute selectors. If you need to support
+    Python 2.6 or 3.1, it might be more useful to use a StanzaPath matcher.
+
+    If the value of IGNORE_NS is set to true, then XPath expressions will
+    be matched without using namespaces.
+
+    Methods:
+        match -- Overrides MatcherBase.match.
+    """
+
+    def match(self, xml):
+        """
+        Compare a stanza's XML contents to an XPath expression.
+
+        If the value of IGNORE_NS is set to true, then XPath expressions
+        will be matched without using namespaces.
+
+        Note that in Python 2.6 and 3.1 the ElementTree find method does
+        not support attribute selectors in the XPath expression.
+
+        Arguments:
+            xml -- The stanza object to compare against.
+        """
+        if hasattr(xml, 'xml'):
+            xml = xml.xml
+        x = ET.Element('x')
+        x.append(xml)
+
+        if not IGNORE_NS:
+            # Use builtin, namespace respecting, XPath matcher.
+            if x.find(self._criteria) is not None:
+                return True
+            return False
+        else:
+            # Remove namespaces from the XPath expression.
+            criteria = []
+            for ns_block in self._criteria.split('{'):
+                criteria.extend(ns_block.split('}')[-1].split('/'))
+
+            # Walk the XPath expression.
+            xml = x
+            for tag in criteria:
+                if not tag:
+                    # Skip empty tag name artifacts from the cleanup phase.
+                    continue
+
+                children = [c.tag.split('}')[-1] for c in xml.getchildren()]
+                try:
+                    index = children.index(tag)
+                except ValueError:
+                    return False
+                xml = xml.getchildren()[index]
+            return True