diff options
author | Lance Stout <lancestout@gmail.com> | 2011-08-03 18:35:01 -0700 |
---|---|---|
committer | Lance Stout <lancestout@gmail.com> | 2011-08-03 18:35:01 -0700 |
commit | 9591cd3a7e94a663675d97b1db93c2c585d948dc (patch) | |
tree | b9e067447d7a269651b0b4e60fc7255b127aed79 /sleekxmpp/thirdparty/suelta/saslprep.py | |
parent | db92fa23303f1115ef8bf938efb6d686d9c3fa0a (diff) | |
parent | afeb8a679a9895726eea5669b73c83d57bb03dff (diff) | |
download | slixmpp-9591cd3a7e94a663675d97b1db93c2c585d948dc.tar.gz slixmpp-9591cd3a7e94a663675d97b1db93c2c585d948dc.tar.bz2 slixmpp-9591cd3a7e94a663675d97b1db93c2c585d948dc.tar.xz slixmpp-9591cd3a7e94a663675d97b1db93c2c585d948dc.zip |
Merge branch 'stream_features' into develop
Diffstat (limited to 'sleekxmpp/thirdparty/suelta/saslprep.py')
-rw-r--r-- | sleekxmpp/thirdparty/suelta/saslprep.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/sleekxmpp/thirdparty/suelta/saslprep.py b/sleekxmpp/thirdparty/suelta/saslprep.py new file mode 100644 index 00000000..fe58d58b --- /dev/null +++ b/sleekxmpp/thirdparty/suelta/saslprep.py @@ -0,0 +1,78 @@ +from __future__ import unicode_literals + +import sys +import stringprep +import unicodedata + + +def saslprep(text, strict=True): + """ + Return a processed version of the given string, using the SASLPrep + profile of stringprep. + + :param text: The string to process, in UTF-8. + :param strict: If ``True``, prevent the use of unassigned code points. + """ + + if sys.version_info < (3, 0): + if type(text) == str: + text = text.decode('us-ascii') + + # Mapping: + # + # - non-ASCII space characters [StringPrep, C.1.2] that can be + # mapped to SPACE (U+0020), and + # + # - the 'commonly mapped to nothing' characters [StringPrep, B.1] + # that can be mapped to nothing. + buffer = '' + for char in text: + if stringprep.in_table_c12(char): + buffer += ' ' + elif not stringprep.in_table_b1(char): + buffer += char + + # Normalization using form KC + text = unicodedata.normalize('NFKC', buffer) + + # Check for bidirectional string + buffer = '' + first_is_randal = False + if text: + first_is_randal = stringprep.in_table_d1(text[0]) + if first_is_randal and not stringprep.in_table_d1(text[-1]): + raise UnicodeError('Section 6.3 [end]') + + # Check for prohibited characters + for x in range(len(text)): + if strict and stringprep.in_table_a1(text[x]): + raise UnicodeError('Unassigned Codepoint') + if stringprep.in_table_c12(text[x]): + raise UnicodeError('In table C.1.2') + if stringprep.in_table_c21(text[x]): + raise UnicodeError('In table C.2.1') + if stringprep.in_table_c22(text[x]): + raise UnicodeError('In table C.2.2') + if stringprep.in_table_c3(text[x]): + raise UnicodeError('In table C.3') + if stringprep.in_table_c4(text[x]): + raise UnicodeError('In table C.4') + if stringprep.in_table_c5(text[x]): + raise UnicodeError('In table C.5') + if stringprep.in_table_c6(text[x]): + raise UnicodeError('In table C.6') + if stringprep.in_table_c7(text[x]): + raise UnicodeError('In table C.7') + if stringprep.in_table_c8(text[x]): + raise UnicodeError('In table C.8') + if stringprep.in_table_c9(text[x]): + raise UnicodeError('In table C.9') + if x: + if first_is_randal and stringprep.in_table_d2(text[x]): + raise UnicodeError('Section 6.2') + if not first_is_randal and \ + x != len(text) - 1 and \ + stringprep.in_table_d1(text[x]): + raise UnicodeError('Section 6.3') + + return text |