diff options
author | Nathan Fritz <nathan@andyet.net> | 2011-08-10 13:37:49 -0700 |
---|---|---|
committer | Nathan Fritz <nathan@andyet.net> | 2011-08-10 13:37:49 -0700 |
commit | a189cb8333d5f59caa9015f0ded222696987d957 (patch) | |
tree | 467f202bc7f85a4cde85a5add3c372515f18adc3 /sleekxmpp/thirdparty/suelta/saslprep.py | |
parent | 0d4825d3ea0562f305939e653e3d414e70e4aaa8 (diff) | |
parent | 156b3200e3b5ad1b2e64eecc48cdc792f7b2ffd9 (diff) | |
download | slixmpp-a189cb8333d5f59caa9015f0ded222696987d957.tar.gz slixmpp-a189cb8333d5f59caa9015f0ded222696987d957.tar.bz2 slixmpp-a189cb8333d5f59caa9015f0ded222696987d957.tar.xz slixmpp-a189cb8333d5f59caa9015f0ded222696987d957.zip |
Merge branch 'develop' of github.com:fritzy/SleekXMPP into develop
Diffstat (limited to 'sleekxmpp/thirdparty/suelta/saslprep.py')
-rw-r--r-- | sleekxmpp/thirdparty/suelta/saslprep.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/sleekxmpp/thirdparty/suelta/saslprep.py b/sleekxmpp/thirdparty/suelta/saslprep.py new file mode 100644 index 00000000..fe58d58b --- /dev/null +++ b/sleekxmpp/thirdparty/suelta/saslprep.py @@ -0,0 +1,78 @@ +from __future__ import unicode_literals + +import sys +import stringprep +import unicodedata + + +def saslprep(text, strict=True): + """ + Return a processed version of the given string, using the SASLPrep + profile of stringprep. + + :param text: The string to process, in UTF-8. + :param strict: If ``True``, prevent the use of unassigned code points. + """ + + if sys.version_info < (3, 0): + if type(text) == str: + text = text.decode('us-ascii') + + # Mapping: + # + # - non-ASCII space characters [StringPrep, C.1.2] that can be + # mapped to SPACE (U+0020), and + # + # - the 'commonly mapped to nothing' characters [StringPrep, B.1] + # that can be mapped to nothing. + buffer = '' + for char in text: + if stringprep.in_table_c12(char): + buffer += ' ' + elif not stringprep.in_table_b1(char): + buffer += char + + # Normalization using form KC + text = unicodedata.normalize('NFKC', buffer) + + # Check for bidirectional string + buffer = '' + first_is_randal = False + if text: + first_is_randal = stringprep.in_table_d1(text[0]) + if first_is_randal and not stringprep.in_table_d1(text[-1]): + raise UnicodeError('Section 6.3 [end]') + + # Check for prohibited characters + for x in range(len(text)): + if strict and stringprep.in_table_a1(text[x]): + raise UnicodeError('Unassigned Codepoint') + if stringprep.in_table_c12(text[x]): + raise UnicodeError('In table C.1.2') + if stringprep.in_table_c21(text[x]): + raise UnicodeError('In table C.2.1') + if stringprep.in_table_c22(text[x]): + raise UnicodeError('In table C.2.2') + if stringprep.in_table_c3(text[x]): + raise UnicodeError('In table C.3') + if stringprep.in_table_c4(text[x]): + raise UnicodeError('In table C.4') + if stringprep.in_table_c5(text[x]): + raise UnicodeError('In table C.5') + if stringprep.in_table_c6(text[x]): + raise UnicodeError('In table C.6') + if stringprep.in_table_c7(text[x]): + raise UnicodeError('In table C.7') + if stringprep.in_table_c8(text[x]): + raise UnicodeError('In table C.8') + if stringprep.in_table_c9(text[x]): + raise UnicodeError('In table C.9') + if x: + if first_is_randal and stringprep.in_table_d2(text[x]): + raise UnicodeError('Section 6.2') + if not first_is_randal and \ + x != len(text) - 1 and \ + stringprep.in_table_d1(text[x]): + raise UnicodeError('Section 6.3') + + return text |