summaryrefslogtreecommitdiff
path: root/src/utils/encoding.hpp
blob: a3bccfc4fc0c19322d4658938994371eaa2b777b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#ifndef ENCODING_INCLUDED
# define ENCODING_INCLUDED

#include <string>

namespace utils
{
  /**
   * Returns true if the given null-terminated string is valid utf-8.
   *
   * Based on http://en.wikipedia.org/wiki/UTF-8#Description
   */
  bool is_valid_utf8(const char* s);
  /**
   * Remove all invalid codepoints from the given utf-8-encoded string.
   * The value returned is a copy of the string, without the removed chars.
   *
   * See http://www.w3.org/TR/xml/#charsets for the list of valid characters
   * in XML.
   */
  std::string remove_invalid_xml_chars(const std::string& original);
  /**
   * Convert the given string (encoded is "encoding") into valid utf-8.
   * If some decoding fails, insert an utf-8 placeholder character instead.
   */
  std::string convert_to_utf8(const std::string& str, const char* encoding);
}

#endif // ENCODING_INCLUDED