blob: fe58d58b5a90f38500138b8f1028e5a19de4d496 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
from __future__ import unicode_literals
import sys
import stringprep
import unicodedata
def saslprep(text, strict=True):
"""
Return a processed version of the given string, using the SASLPrep
profile of stringprep.
:param text: The string to process, in UTF-8.
:param strict: If ``True``, prevent the use of unassigned code points.
"""
if sys.version_info < (3, 0):
if type(text) == str:
text = text.decode('us-ascii')
# Mapping:
#
# - non-ASCII space characters [StringPrep, C.1.2] that can be
# mapped to SPACE (U+0020), and
#
# - the 'commonly mapped to nothing' characters [StringPrep, B.1]
# that can be mapped to nothing.
buffer = ''
for char in text:
if stringprep.in_table_c12(char):
buffer += ' '
elif not stringprep.in_table_b1(char):
buffer += char
# Normalization using form KC
text = unicodedata.normalize('NFKC', buffer)
# Check for bidirectional string
buffer = ''
first_is_randal = False
if text:
first_is_randal = stringprep.in_table_d1(text[0])
if first_is_randal and not stringprep.in_table_d1(text[-1]):
raise UnicodeError('Section 6.3 [end]')
# Check for prohibited characters
for x in range(len(text)):
if strict and stringprep.in_table_a1(text[x]):
raise UnicodeError('Unassigned Codepoint')
if stringprep.in_table_c12(text[x]):
raise UnicodeError('In table C.1.2')
if stringprep.in_table_c21(text[x]):
raise UnicodeError('In table C.2.1')
if stringprep.in_table_c22(text[x]):
raise UnicodeError('In table C.2.2')
if stringprep.in_table_c3(text[x]):
raise UnicodeError('In table C.3')
if stringprep.in_table_c4(text[x]):
raise UnicodeError('In table C.4')
if stringprep.in_table_c5(text[x]):
raise UnicodeError('In table C.5')
if stringprep.in_table_c6(text[x]):
raise UnicodeError('In table C.6')
if stringprep.in_table_c7(text[x]):
raise UnicodeError('In table C.7')
if stringprep.in_table_c8(text[x]):
raise UnicodeError('In table C.8')
if stringprep.in_table_c9(text[x]):
raise UnicodeError('In table C.9')
if x:
if first_is_randal and stringprep.in_table_d2(text[x]):
raise UnicodeError('Section 6.2')
if not first_is_randal and \
x != len(text) - 1 and \
stringprep.in_table_d1(text[x]):
raise UnicodeError('Section 6.3')
return text
|