diff options
author | Lance Stout <lancestout@gmail.com> | 2011-08-04 11:52:17 -0700 |
---|---|---|
committer | Lance Stout <lancestout@gmail.com> | 2011-08-04 11:52:17 -0700 |
commit | 89cffd43f48cfc835b70e137776eb8c2e73a0b67 (patch) | |
tree | 088035aeb688edc6a0f5675de31eedf169f87fd3 /sleekxmpp/thirdparty/suelta/saslprep.py | |
parent | ad978700fc891602c826dea03615c396f39364a0 (diff) | |
parent | b9764cc120c48576be1fe6cadb11813d12f91f4c (diff) | |
download | slixmpp-89cffd43f48cfc835b70e137776eb8c2e73a0b67.tar.gz slixmpp-89cffd43f48cfc835b70e137776eb8c2e73a0b67.tar.bz2 slixmpp-89cffd43f48cfc835b70e137776eb8c2e73a0b67.tar.xz slixmpp-89cffd43f48cfc835b70e137776eb8c2e73a0b67.zip |
Merge branch 'develop' into roster
Conflicts:
setup.py
Diffstat (limited to 'sleekxmpp/thirdparty/suelta/saslprep.py')
-rw-r--r-- | sleekxmpp/thirdparty/suelta/saslprep.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/sleekxmpp/thirdparty/suelta/saslprep.py b/sleekxmpp/thirdparty/suelta/saslprep.py new file mode 100644 index 00000000..fe58d58b --- /dev/null +++ b/sleekxmpp/thirdparty/suelta/saslprep.py @@ -0,0 +1,78 @@ +from __future__ import unicode_literals + +import sys +import stringprep +import unicodedata + + +def saslprep(text, strict=True): + """ + Return a processed version of the given string, using the SASLPrep + profile of stringprep. + + :param text: The string to process, in UTF-8. + :param strict: If ``True``, prevent the use of unassigned code points. + """ + + if sys.version_info < (3, 0): + if type(text) == str: + text = text.decode('us-ascii') + + # Mapping: + # + # - non-ASCII space characters [StringPrep, C.1.2] that can be + # mapped to SPACE (U+0020), and + # + # - the 'commonly mapped to nothing' characters [StringPrep, B.1] + # that can be mapped to nothing. + buffer = '' + for char in text: + if stringprep.in_table_c12(char): + buffer += ' ' + elif not stringprep.in_table_b1(char): + buffer += char + + # Normalization using form KC + text = unicodedata.normalize('NFKC', buffer) + + # Check for bidirectional string + buffer = '' + first_is_randal = False + if text: + first_is_randal = stringprep.in_table_d1(text[0]) + if first_is_randal and not stringprep.in_table_d1(text[-1]): + raise UnicodeError('Section 6.3 [end]') + + # Check for prohibited characters + for x in range(len(text)): + if strict and stringprep.in_table_a1(text[x]): + raise UnicodeError('Unassigned Codepoint') + if stringprep.in_table_c12(text[x]): + raise UnicodeError('In table C.1.2') + if stringprep.in_table_c21(text[x]): + raise UnicodeError('In table C.2.1') + if stringprep.in_table_c22(text[x]): + raise UnicodeError('In table C.2.2') + if stringprep.in_table_c3(text[x]): + raise UnicodeError('In table C.3') + if stringprep.in_table_c4(text[x]): + raise UnicodeError('In table C.4') + if stringprep.in_table_c5(text[x]): + raise UnicodeError('In table C.5') + if stringprep.in_table_c6(text[x]): + raise UnicodeError('In table C.6') + if stringprep.in_table_c7(text[x]): + raise UnicodeError('In table C.7') + if stringprep.in_table_c8(text[x]): + raise UnicodeError('In table C.8') + if stringprep.in_table_c9(text[x]): + raise UnicodeError('In table C.9') + if x: + if first_is_randal and stringprep.in_table_d2(text[x]): + raise UnicodeError('Section 6.2') + if not first_is_randal and \ + x != len(text) - 1 and \ + stringprep.in_table_d1(text[x]): + raise UnicodeError('Section 6.3') + + return text |