blob: 586edd804c4492b75adb3032000bc86f13d98042 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
#pragma once
#include <string>
namespace utils
{
/**
* Return the size, in bytes, of the next UTF-8 codepoint, based on
* the given char.
*/
std::size_t get_next_codepoint_size(const unsigned char c);
/**
* Returns true if the given null-terminated string is valid utf-8.
*
* Based on http://en.wikipedia.org/wiki/UTF-8#Description
*/
bool is_valid_utf8(const char* s);
/**
* Remove all invalid codepoints from the given utf-8-encoded string.
* The value returned is a copy of the string, without the removed chars.
*
* See http://www.w3.org/TR/xml/#charsets for the list of valid characters
* in XML.
*/
std::string remove_invalid_xml_chars(const std::string& original);
/**
* Convert the given string (encoded is "encoding") into valid utf-8.
* If some decoding fails, insert an utf-8 placeholder character instead.
*/
std::string convert_to_utf8(const std::string& str, const char* encoding);
}
namespace xep0106
{
/**
* Decode and encode inplace.
*/
void decode(std::string&);
void encode(std::string&);
}
|