From 4b1c580bb9bc03d656e59d702c72c3e793a1bbe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?louiz=E2=80=99?= Date: Wed, 15 Jun 2016 12:19:19 +0200 Subject: cut messages at 512 bytes, taking into account the UTF-8 codepoints ref #3067 --- louloulibs/utils/string.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'louloulibs/utils') diff --git a/louloulibs/utils/string.cpp b/louloulibs/utils/string.cpp index 7ed1aa3..2447f84 100644 --- a/louloulibs/utils/string.cpp +++ b/louloulibs/utils/string.cpp @@ -1,4 +1,5 @@ #include +#include bool to_bool(const std::string& val) { @@ -11,8 +12,17 @@ std::vector cut(const std::string& val, const std::size_t size) std::string::size_type pos = 0; while (pos < val.size()) { - res.emplace_back(val.substr(pos, size)); - pos += size; + // Get the number of chars, <= size, that contain only whole + // UTF-8 codepoints. + std::size_t s = 0; + auto codepoint_size = utils::get_next_codepoint_size(val[pos + s]); + while (s + codepoint_size <= size) + { + s += codepoint_size; + codepoint_size = utils::get_next_codepoint_size(val[pos + s]); + } + res.emplace_back(val.substr(pos, s)); + pos += s; } return res; } -- cgit v1.2.3