From 4b1c580bb9bc03d656e59d702c72c3e793a1bbe0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?louiz=E2=80=99?= <louiz@louiz.org>
Date: Wed, 15 Jun 2016 12:19:19 +0200
Subject: cut messages at 512 bytes, taking into account the UTF-8 codepoints

ref #3067
---
 louloulibs/utils/string.cpp | 14 ++++++++++++--
 src/irc/irc_client.cpp      |  5 +++--
 tests/utils.cpp             |  8 +++++++-
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/louloulibs/utils/string.cpp b/louloulibs/utils/string.cpp
index 7ed1aa3..2447f84 100644
--- a/louloulibs/utils/string.cpp
+++ b/louloulibs/utils/string.cpp
@@ -1,4 +1,5 @@
 #include <utils/string.hpp>
+#include <utils/encoding.hpp>
 
 bool to_bool(const std::string& val)
 {
@@ -11,8 +12,17 @@ std::vector<std::string> cut(const std::string& val, const std::size_t size)
   std::string::size_type pos = 0;
   while (pos < val.size())
     {
-      res.emplace_back(val.substr(pos, size));
-      pos += size;
+      // Get the number of chars, <= size, that contain only whole
+      // UTF-8 codepoints.
+      std::size_t s = 0;
+      auto codepoint_size = utils::get_next_codepoint_size(val[pos + s]);
+      while (s + codepoint_size <= size)
+        {
+          s += codepoint_size;
+          codepoint_size = utils::get_next_codepoint_size(val[pos + s]);
+        }
+      res.emplace_back(val.substr(pos, s));
+      pos += s;
     }
   return res;
 }
diff --git a/src/irc/irc_client.cpp b/src/irc/irc_client.cpp
index 2cf0840..1d56361 100644
--- a/src/irc/irc_client.cpp
+++ b/src/irc/irc_client.cpp
@@ -388,6 +388,8 @@ void IrcClient::send_message(IrcMessage&& message)
       res += " " + arg;
     }
   res += "\r\n";
+  log_debug("Effective size: ", res.size());
+  log_debug(res);
   this->send_data(std::move(res));
 }
 
@@ -458,8 +460,7 @@ bool IrcClient::send_channel_message(const std::string& chan_name, const std::st
     }
   // Cut the message body into 512-bytes parts, because the whole command
   // must fit into 512 bytes.
-  // Count the ':' at the start of the text, and two spaces
-  const auto line_size = 512 - ::strlen("PRIVMSG") - chan_name.length() - 3;
+  const auto line_size = 500 - ::strlen("PRIVMSG ") - chan_name.length() - ::strlen(" :\r\n");
   const auto lines = cut(body, line_size);
   for (const auto& line: lines)
     this->send_message(IrcMessage("PRIVMSG", {chan_name, line}));
diff --git a/tests/utils.cpp b/tests/utils.cpp
index 54e743f..01d070e 100644
--- a/tests/utils.cpp
+++ b/tests/utils.cpp
@@ -93,4 +93,10 @@ TEST_CASE("string cut")
 {
   CHECK(cut("coucou", 2).size() == 3);
   CHECK(cut("bonjour les copains", 6).size() == 4);
-}
\ No newline at end of file
+  CHECK(cut("««««", 2).size() == 4);
+  CHECK(cut("a««««", 2).size() == 5);
+  const auto res = cut("rhello, ♥", 10);
+  CHECK(res.size() == 2);
+  CHECK(res[0] == "rhello, ");
+  CHECK(res[1] == "♥");
+}
-- 
cgit v1.2.3