From e4e18a416f63cfe44c1db92e5e18c4dfe8e229c1 Mon Sep 17 00:00:00 2001 From: Lance Stout Date: Sun, 22 Jul 2012 00:16:35 -0700 Subject: Add validation for JIDs. --- sleekxmpp/util/__init__.py | 0 sleekxmpp/util/stringprep_profiles.py | 116 ++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 sleekxmpp/util/__init__.py create mode 100644 sleekxmpp/util/stringprep_profiles.py (limited to 'sleekxmpp/util') diff --git a/sleekxmpp/util/__init__.py b/sleekxmpp/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sleekxmpp/util/stringprep_profiles.py b/sleekxmpp/util/stringprep_profiles.py new file mode 100644 index 00000000..a75bb9dd --- /dev/null +++ b/sleekxmpp/util/stringprep_profiles.py @@ -0,0 +1,116 @@ +from __future__ import unicode_literals + +import sys +import stringprep +import unicodedata + + +class StringPrepError(UnicodeError): + pass + + +def to_unicode(data): + if sys.version_info < (3, 0): + return unicode(data) + else: + return str(data) + + +def b1_mapping(char): + return '' if stringprep.in_table_c12(char) else None + + +def c12_mapping(char): + return ' ' if stringprep.in_table_c12(char) else None + + +def map_input(data, tables=None): + """ + Each character in the input stream MUST be checked against + a mapping table. + """ + result = [] + for char in data: + replacement = None + + for mapping in tables: + replacement = mapping(char) + if replacement is not None: + break + + if replacement is None: + replacement = char + result.append(replacement) + return ''.join(result) + + +def normalize(data, nfkc=True): + """ + A profile can specify one of two options for Unicode normalization: + - no normalization + - Unicode normalization with form KC + """ + if nfkc: + data = unicodedata.normalize('NFKC', data) + return data + + +def prohibit_output(data, tables=None): + """ + Before the text can be emitted, it MUST be checked for prohibited + code points. + """ + for char in data: + for check in tables: + if check(char): + raise StringPrepError("Prohibited code point: %s" % char) + + +def check_bidi(data): + """ + 1) The characters in section 5.8 MUST be prohibited. + + 2) If a string contains any RandALCat character, the string MUST NOT + contain any LCat character. + + 3) If a string contains any RandALCat character, a RandALCat + character MUST be the first character of the string, and a + RandALCat character MUST be the last character of the string. + """ + has_lcat = False + has_randal = False + + for c in data: + if stringprep.in_table_c8(c): + raise StringPrepError("BIDI violation: seciton 6 (1)") + if stringprep.in_table_d1(c): + has_randal = True + elif stringprep.in_table_d2(c): + has_lcat = True + + if has_randal and has_lcat: + raise StringPrepError("BIDI violation: section 6 (2)") + + first_randal = stringprep.in_table_d1(data[0]) + last_randal = stringprep.in_table_d1(data[-1]) + if has_randal and not (first_randal and last_randal): + raise StringPrepError("BIDI violation: section 6 (3)") + + +def create(nfkc=True, bidi=True, mappings=None, + prohibited=None, unassigned=None): + def profile(data, query=False): + try: + data = to_unicode(data) + except UnicodeError: + raise StringPrepError + + data = map_input(data, mappings) + data = normalize(data, nfkc) + prohibit_output(data, prohibited) + if bidi: + check_bidi(data) + if query and unassigned: + check_unassigned(data, unassigned) + return data + return profile -- cgit v1.2.3 From 352ee2f2fd6458a46e046ecaedb78addd5d6ac20 Mon Sep 17 00:00:00 2001 From: Lance Stout Date: Mon, 23 Jul 2012 21:45:24 -0700 Subject: Fix JID validation bugs, add lots of tests. --- sleekxmpp/util/stringprep_profiles.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'sleekxmpp/util') diff --git a/sleekxmpp/util/stringprep_profiles.py b/sleekxmpp/util/stringprep_profiles.py index a75bb9dd..6844c9ac 100644 --- a/sleekxmpp/util/stringprep_profiles.py +++ b/sleekxmpp/util/stringprep_profiles.py @@ -77,6 +77,9 @@ def check_bidi(data): character MUST be the first character of the string, and a RandALCat character MUST be the last character of the string. """ + if not data: + return data + has_lcat = False has_randal = False -- cgit v1.2.3 From 3e43b36a9d70801d90a6b09046f93879f2e29b89 Mon Sep 17 00:00:00 2001 From: Lance Stout Date: Tue, 24 Jul 2012 02:39:54 -0700 Subject: Standardize importing of queue class. This will make it easier to enable gevent support. --- sleekxmpp/util/__init__.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'sleekxmpp/util') diff --git a/sleekxmpp/util/__init__.py b/sleekxmpp/util/__init__.py index e69de29b..86a87222 100644 --- a/sleekxmpp/util/__init__.py +++ b/sleekxmpp/util/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +""" + sleekxmpp.util + ~~~~~~~~~~~~~~ + + Part of SleekXMPP: The Sleek XMPP Library + + :copyright: (c) 2012 Nathanael C. Fritz, Lance J.T. Stout + :license: MIT, see LICENSE for more details +""" + + +# ===================================================================== +# Standardize import of Queue class: + +try: + import queue +except ImportError: + import Queue as queue + + +Queue = queue.Queue +QueueEmpty = queue.Empty -- cgit v1.2.3