From 0ec82c104ded01a44ed36d20e25220fa41887fd0 Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Fri, 27 Feb 2015 12:18:34 +0100
Subject: Add louloulibs as a submodule

---
 louloulibs | 1 +
 1 file changed, 1 insertion(+)
 create mode 160000 louloulibs

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
new file mode 160000
index 0000000..b6af145
--- /dev/null
+++ b/louloulibs
@@ -0,0 +1 @@
+Subproject commit b6af145bfb9561a1bb1ecb940f50163c5ce4dbbb
-- 
cgit v1.2.3


From e6569a1090be063f34624474f0d4578f37a169ae Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Fri, 27 Feb 2015 12:40:50 +0100
Subject: Only use include_directory() if the directory path is defined

---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index b6af145..d6a3724 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit b6af145bfb9561a1bb1ecb940f50163c5ce4dbbb
+Subproject commit d6a3724c6a0127a49a9e7adb1090bb7438c8d0f2
-- 
cgit v1.2.3


From e4c696861d86b62305ca0ec8136e79f147837b94 Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Mon, 2 Mar 2015 11:06:40 +0100
Subject: Update louloulibs to last revision

---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index d6a3724..5f3a1bb 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit d6a3724c6a0127a49a9e7adb1090bb7438c8d0f2
+Subproject commit 5f3a1bb54df4de5f332282bbdf791bdce07c71c4
-- 
cgit v1.2.3


From d88ec5fdf10ecb168355bc38dc81d83ff59a0234 Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Mon, 2 Mar 2015 11:32:18 +0100
Subject: Update to latest louloulibs revision

---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index 5f3a1bb..d0b8695 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit 5f3a1bb54df4de5f332282bbdf791bdce07c71c4
+Subproject commit d0b8695ceb13e0c6d72821fe605de36e494afcdf
-- 
cgit v1.2.3


From c243fea660723eba00b65e639b76d0783cb59064 Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Wed, 4 Mar 2015 05:56:44 +0100
Subject: Update to latest louloulibs revision

---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index d0b8695..99757a4 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit d0b8695ceb13e0c6d72821fe605de36e494afcdf
+Subproject commit 99757a44b49619ff59cae9e6d983a3b7c20c56bf
-- 
cgit v1.2.3


From ad0465b32051e224f6a234f3ed36494905e59cbf Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Mon, 20 Apr 2015 20:33:02 +0200
Subject: Decode incoming JIDs local part according to xep 0106

This let users send message to nicks such as Q@CServe.quakenet.org

fix #3047
---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index 99757a4..88d2b13 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit 99757a44b49619ff59cae9e6d983a3b7c20c56bf
+Subproject commit 88d2b136e5f133f0d0dc01f59449284f663d53ea
-- 
cgit v1.2.3


From 0d706741c6b3a8bdf6b4f8ca0b1ac00cb27bd8b8 Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Mon, 20 Apr 2015 20:35:32 +0200
Subject: Update louloulibs submodule to the correct revision

---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index 88d2b13..b53ae92 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit 88d2b136e5f133f0d0dc01f59449284f663d53ea
+Subproject commit b53ae922f48f1465a7fa61136f65ec39e38a452e
-- 
cgit v1.2.3


From a8225dc54c019788722bda3bda8d55151c1ccdef Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Tue, 21 Apr 2015 15:35:10 +0200
Subject: Properly check for connecting or connected status before reconnecting

Note, in our context, is_connecting() includes the resolving part as well as
the actual connection (if we are using c-ares)

fix #3048
---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index b53ae92..6c812cd 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit b53ae922f48f1465a7fa61136f65ec39e38a452e
+Subproject commit 6c812cd86e31569db61cac4e30f77e296d207191
-- 
cgit v1.2.3


From 71fec776c4d7b99b76a44deae6f333d9cffa1496 Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Thu, 7 May 2015 17:42:37 +0200
Subject: Update to latest louloulibs

fix #3042
---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index 6c812cd..eaa4fbb 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit 6c812cd86e31569db61cac4e30f77e296d207191
+Subproject commit eaa4fbba814b56b4fe7ffb62984fddfbb9280291
-- 
cgit v1.2.3


From fbeb5af364db54c8a82f5ea30b83df441988ea4b Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Wed, 13 May 2015 20:17:43 +0200
Subject: Update to latest louloulibs revision, and add test for hostname
 validity

fix #2694
---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index eaa4fbb..89398b5 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit eaa4fbba814b56b4fe7ffb62984fddfbb9280291
+Subproject commit 89398b5d886744c3812b65195308cae57eca2b53
-- 
cgit v1.2.3


From 897b281e67dc82700db9fd9c2dedc5e01e5871ee Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Wed, 27 May 2015 23:44:23 +0200
Subject: Avoid some potential race conditions by blocking the signals we
 manage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

They are atomically unblocked in the ppoll/epoll_pwait calls, avoiding any
race condition on the check of the “stop” or “reload” booleans.
---
 louloulibs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
index 89398b5..0f3c118 160000
--- a/louloulibs
+++ b/louloulibs
@@ -1 +1 @@
-Subproject commit 89398b5d886744c3812b65195308cae57eca2b53
+Subproject commit 0f3c1183e2bf0941ae2bffd3f31577bce4f3001c
-- 
cgit v1.2.3


From e1a7114c8daa10589c830ce972cf461c3540111b Mon Sep 17 00:00:00 2001
From: Florent Le Coz <louiz@louiz.org>
Date: Thu, 28 May 2015 23:42:52 +0200
Subject: louloulibs is directly included, instead of being a submodule

Because this is a nightmare to manage
---
 louloulibs                    |   1 -
 louloulibs/utils/encoding.cpp | 254 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 254 insertions(+), 1 deletion(-)
 delete mode 160000 louloulibs
 create mode 100644 louloulibs/utils/encoding.cpp

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs b/louloulibs
deleted file mode 160000
index 0f3c118..0000000
--- a/louloulibs
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 0f3c1183e2bf0941ae2bffd3f31577bce4f3001c
diff --git a/louloulibs/utils/encoding.cpp b/louloulibs/utils/encoding.cpp
new file mode 100644
index 0000000..f738ce2
--- /dev/null
+++ b/louloulibs/utils/encoding.cpp
@@ -0,0 +1,254 @@
+#include <utils/encoding.hpp>
+
+#include <utils/scopeguard.hpp>
+
+#include <stdexcept>
+
+#include <assert.h>
+#include <string.h>
+#include <iconv.h>
+
+#include <map>
+#include <bitset>
+
+/**
+ * The UTF-8-encoded character used as a place holder when a character conversion fails.
+ * This is U+FFFD � "replacement character"
+ */
+static const char* invalid_char = "\xef\xbf\xbd";
+static const size_t invalid_char_len = 3;
+
+namespace utils
+{
+  /**
+   * Based on http://en.wikipedia.org/wiki/UTF-8#Description
+   */
+  bool is_valid_utf8(const char* s)
+  {
+    if (!s)
+      return false;
+
+    const unsigned char* str = reinterpret_cast<const unsigned char*>(s);
+
+    while (*str)
+      {
+        // 4 bytes:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+        if ((str[0] & 0b11111000) == 0b11110000)
+          {
+            if (!str[1] || !str[2] || !str[3]
+                || ((str[1] & 0b11000000) != 0b10000000)
+                || ((str[2] & 0b11000000) != 0b10000000)
+                || ((str[3] & 0b11000000) != 0b10000000))
+              return false;
+            str += 4;
+          }
+        // 3 bytes:  1110xxx 10xxxxxx 10xxxxxx
+        else if ((str[0] & 0b11110000) == 0b11100000)
+          {
+            if (!str[1] || !str[2]
+                || ((str[1] & 0b11000000) != 0b10000000)
+                || ((str[2] & 0b11000000) != 0b10000000))
+              return false;
+            str += 3;
+          }
+        // 2 bytes:  110xxxxx 10xxxxxx
+        else if (((str[0]) & 0b11100000) == 0b11000000)
+          {
+            if (!str[1] ||
+                ((str[1] & 0b11000000) != 0b10000000))
+              return false;
+            str += 2;
+          }
+        // 1 byte:  0xxxxxxx
+        else if ((str[0] & 0b10000000) != 0)
+          return false;
+        else
+          str++;
+      }
+    return true;
+  }
+
+  std::string remove_invalid_xml_chars(const std::string& original)
+  {
+    // The given string MUST be a valid utf-8 string
+    unsigned char* res = new unsigned char[original.size()];
+    ScopeGuard sg([&res]() { delete[] res;});
+
+    // pointer where we write valid chars
+    unsigned char* r = res;
+
+    const unsigned char* str = reinterpret_cast<const unsigned char*>(original.c_str());
+    std::bitset<20> codepoint;
+
+    while (*str)
+      {
+        // 4 bytes:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+        if ((str[0] & 0b11111000) == 0b11110000)
+          {
+            codepoint  = ((str[0] & 0b00000111) << 18);
+            codepoint |= ((str[1] & 0b00111111) << 12);
+            codepoint |= ((str[2] & 0b00111111) << 6 );
+            codepoint |= ((str[3] & 0b00111111) << 0 );
+            if (codepoint.to_ulong() <= 0x10FFFF)
+              {
+                ::memcpy(r, str, 4);
+                r += 4;
+              }
+            str += 4;
+          }
+        // 3 bytes:  1110xxx 10xxxxxx 10xxxxxx
+        else if ((str[0] & 0b11110000) == 0b11100000)
+          {
+            codepoint  = ((str[0] & 0b00001111) << 12);
+            codepoint |= ((str[1] & 0b00111111) << 6);
+            codepoint |= ((str[2] & 0b00111111) << 0 );
+            if (codepoint.to_ulong() <= 0xD7FF ||
+                (codepoint.to_ulong() >= 0xE000 && codepoint.to_ulong() <= 0xFFFD))
+              {
+                ::memcpy(r, str, 3);
+                r += 3;
+              }
+            str += 3;
+          }
+        // 2 bytes:  110xxxxx 10xxxxxx
+        else if (((str[0]) & 0b11100000) == 0b11000000)
+          {
+            // All 2 bytes char are valid, don't even bother calculating
+            // the codepoint
+            ::memcpy(r, str, 2);
+            r += 2;
+            str += 2;
+          }
+        // 1 byte:  0xxxxxxx
+        else if ((str[0] & 0b10000000) == 0)
+          {
+            codepoint = ((str[0] & 0b01111111));
+            if (codepoint.to_ulong() == 0x09 ||
+                codepoint.to_ulong() == 0x0A ||
+                codepoint.to_ulong() == 0x0D ||
+                codepoint.to_ulong() >= 0x20)
+              {
+                ::memcpy(r, str, 1);
+                r += 1;
+              }
+            str += 1;
+          }
+        else
+          throw std::runtime_error("Invalid UTF-8 passed to remove_invalid_xml_chars");
+      }
+    return std::string(reinterpret_cast<char*>(res), r-res);
+  }
+
+  std::string convert_to_utf8(const std::string& str, const char* charset)
+  {
+    std::string res;
+
+    const iconv_t cd = iconv_open("UTF-8", charset);
+    if (cd == (iconv_t)-1)
+      throw std::runtime_error("Cannot convert into UTF-8");
+
+    // Make sure cd is always closed when we leave this function
+    ScopeGuard sg([&]{ iconv_close(cd); });
+
+    size_t inbytesleft = str.size();
+
+    // iconv will not attempt to modify this buffer, but some plateform
+    // require a char** anyway
+#ifdef ICONV_SECOND_ARGUMENT_IS_CONST
+    const char* inbuf_ptr = str.c_str();
+#else
+    char* inbuf_ptr = const_cast<char*>(str.c_str());
+#endif
+
+    size_t outbytesleft = str.size() * 4;
+    char* outbuf = new char[outbytesleft];
+    char* outbuf_ptr = outbuf;
+
+    // Make sure outbuf is always deleted when we leave this function
+    sg.add_callback([&]{ delete[] outbuf; });
+
+    bool done = false;
+    while (done == false)
+      {
+        size_t error = iconv(cd, &inbuf_ptr, &inbytesleft, &outbuf_ptr, &outbytesleft);
+        if ((size_t)-1 == error)
+          {
+            switch (errno)
+              {
+              case EILSEQ:
+                // Invalid byte found. Insert a placeholder instead of the
+                // converted character, jump one byte and continue
+                memcpy(outbuf_ptr, invalid_char, invalid_char_len);
+                outbuf_ptr += invalid_char_len;
+                inbytesleft--;
+                inbuf_ptr++;
+                break;
+              case EINVAL:
+                // A multibyte sequence is not terminated, but we can't
+                // provide any more data, so we just add a placeholder to
+                // indicate that the character is not properly converted,
+                // and we stop the conversion
+                memcpy(outbuf_ptr, invalid_char, invalid_char_len);
+                outbuf_ptr += invalid_char_len;
+                outbuf_ptr++;
+                done = true;
+                break;
+              case E2BIG:
+                // This should never happen
+                done = true;
+                break;
+              default:
+                // This should happen even neverer
+                done = true;
+                break;
+              }
+          }
+        else
+          {
+            // The conversion finished without any error, stop converting
+            done = true;
+          }
+      }
+    // Terminate the converted buffer, and copy that buffer it into the
+    // string we return
+    *outbuf_ptr = '\0';
+    res = outbuf;
+    return res;
+  }
+
+}
+
+namespace xep0106
+{
+  static const std::map<const char, const std::string> encode_map = {
+    {' ', "\\20"},
+    {'"', "\\22"},
+    {'&', "\\26"},
+    {'\'',"\\27"},
+    {'/', "\\2f"},
+    {':', "\\3a"},
+    {'<', "\\3c"},
+    {'>', "\\3e"},
+    {'@', "\\40"},
+  };
+
+  void decode(std::string& s)
+  {
+    std::string::size_type pos;
+    for (const auto& pair: encode_map)
+      while ((pos = s.find(pair.second)) != std::string::npos)
+        s.replace(pos, pair.second.size(),
+                  1, pair.first);
+  }
+
+  void encode(std::string& s)
+  {
+    std::string::size_type pos;
+    while ((pos = s.find_first_of(" \"&'/:<>@")) != std::string::npos)
+      {
+        auto it = encode_map.find(s[pos]);
+        assert(it != encode_map.end());
+        s.replace(pos, 1, it->second);
+      }
+  }
+}
-- 
cgit v1.2.3


From 6235fb2d0326b18a9e013ae13dfb1fd0577ffd9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?louiz=E2=80=99?= <louiz@louiz.org>
Date: Wed, 15 Jun 2016 00:38:43 +0200
Subject: Add get_next_codepoint_size

---
 louloulibs/utils/encoding.cpp | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'louloulibs/utils/encoding.cpp')

diff --git a/louloulibs/utils/encoding.cpp b/louloulibs/utils/encoding.cpp
index f738ce2..507f38a 100644
--- a/louloulibs/utils/encoding.cpp
+++ b/louloulibs/utils/encoding.cpp
@@ -23,6 +23,17 @@ namespace utils
   /**
    * Based on http://en.wikipedia.org/wiki/UTF-8#Description
    */
+  std::size_t get_next_codepoint_size(const unsigned char c)
+  {
+    if ((c & 0b11111000) == 0b11110000)          // 4 bytes:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+      return 4;
+    else if ((c & 0b11110000) == 0b11100000)     // 3 bytes:  1110xxx 10xxxxxx 10xxxxxx
+      return 3;
+    else if ((c & 0b11100000) == 0b11000000)     // 2 bytes:  110xxxxx 10xxxxxx
+      return 2;
+    return 1;                                    // 1 byte:  0xxxxxxx
+  }
+
   bool is_valid_utf8(const char* s)
   {
     if (!s)
@@ -32,38 +43,31 @@ namespace utils
 
     while (*str)
       {
-        // 4 bytes:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-        if ((str[0] & 0b11111000) == 0b11110000)
+        const auto codepoint_size = get_next_codepoint_size(str[0]);
+        if (codepoint_size == 4)
           {
             if (!str[1] || !str[2] || !str[3]
                 || ((str[1] & 0b11000000) != 0b10000000)
                 || ((str[2] & 0b11000000) != 0b10000000)
                 || ((str[3] & 0b11000000) != 0b10000000))
               return false;
-            str += 4;
           }
-        // 3 bytes:  1110xxx 10xxxxxx 10xxxxxx
-        else if ((str[0] & 0b11110000) == 0b11100000)
+        else if (codepoint_size == 3)
           {
             if (!str[1] || !str[2]
                 || ((str[1] & 0b11000000) != 0b10000000)
                 || ((str[2] & 0b11000000) != 0b10000000))
               return false;
-            str += 3;
           }
-        // 2 bytes:  110xxxxx 10xxxxxx
-        else if (((str[0]) & 0b11100000) == 0b11000000)
+        else if (codepoint_size == 2)
           {
             if (!str[1] ||
                 ((str[1] & 0b11000000) != 0b10000000))
               return false;
-            str += 2;
           }
-        // 1 byte:  0xxxxxxx
         else if ((str[0] & 0b10000000) != 0)
           return false;
-        else
-          str++;
+        str += codepoint_size;
       }
     return true;
   }
-- 
cgit v1.2.3