summaryrefslogtreecommitdiff
path: root/louloulibs/utils/encoding.hpp
blob: 586edd804c4492b75adb3032000bc86f13d98042 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#pragma once


#include <string>

namespace utils
{
  /**
   * Return the size, in bytes, of the next UTF-8 codepoint, based on
   * the given char.
   */
  std::size_t get_next_codepoint_size(const unsigned char c);
  /**
   * Returns true if the given null-terminated string is valid utf-8.
   *
   * Based on http://en.wikipedia.org/wiki/UTF-8#Description
   */
  bool is_valid_utf8(const char* s);
  /**
   * Remove all invalid codepoints from the given utf-8-encoded string.
   * The value returned is a copy of the string, without the removed chars.
   *
   * See http://www.w3.org/TR/xml/#charsets for the list of valid characters
   * in XML.
   */
  std::string remove_invalid_xml_chars(const std::string& original);
  /**
   * Convert the given string (encoded is "encoding") into valid utf-8.
   * If some decoding fails, insert an utf-8 placeholder character instead.
   */
  std::string convert_to_utf8(const std::string& str, const char* encoding);
}

namespace xep0106
{
  /**
   * Decode and encode inplace.
   */
  void decode(std::string&);
  void encode(std::string&);
}