blob: b5192ffbdb8de4905ebe8655a7fe066ba3539e4d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
#include "catch.hpp"
#include <utils/encoding.hpp>
TEST_CASE("UTF-8 validation")
{
const char* valid = "C̡͔͕̩͙̽ͫ̈́ͥ̿̆ͧ̚r̸̩̘͍̻͖̆͆͛͊̉̕͡o͇͈̳̤̱̊̈͢q̻͍̦̮͕ͥͬͬ̽ͭ͌̾ͅǔ͉͕͇͚̙͉̭͉̇̽ȇ͈̮̼͍͔ͣ͊͞͝ͅ ͫ̾ͪ̓ͥ̆̋̔҉̢̦̠͈͔̖̲̯̦ụ̶̯͐̃̋ͮ͆͝n̬̱̭͇̻̱̰̖̤̏͛̏̿̑͟ë́͐҉̸̥̪͕̹̻̙͉̰ ̹̼̱̦̥ͩ͑̈́͑͝ͅt͍̥͈̹̝ͣ̃̔̈̔ͧ̕͝ḙ̸̖̟̙͙ͪ͢ų̯̞̼̲͓̻̞͛̃̀́b̮̰̗̩̰̊̆͗̾̎̆ͯ͌͝.̗̙͎̦ͫ̈́ͥ͌̈̓ͬ";
CHECK(utils::is_valid_utf8(valid));
CHECK_FALSE(utils::is_valid_utf8("\xF0\x0F"));
CHECK_FALSE(utils::is_valid_utf8("\xFE\xFE\xFF\xFF"));
std::string in = "Biboumi ╯°□°)╯︵ ┻━┻";
CHECK(utils::is_valid_utf8(in.data()));
}
TEST_CASE("UTF-8 conversion")
{
std::string in = "Biboumi ╯°□°)╯︵ ┻━┻";
REQUIRE(utils::is_valid_utf8(in.data()));
SECTION("Converting UTF-8 to UTF-8 should return the same string")
{
std::string res = utils::convert_to_utf8(in, "UTF-8");
CHECK(utils::is_valid_utf8(res.c_str()) == true);
CHECK(res == in);
}
SECTION("Testing latin-1 conversion")
{
std::string original_utf8("couc¥ou");
std::string original_latin1("couc\xa5ou");
SECTION("Convert proper latin-1 to UTF-8")
{
std::string from_latin1 = utils::convert_to_utf8(original_latin1.c_str(), "ISO-8859-1");
CHECK(from_latin1 == original_utf8);
}
SECTION("Check the behaviour when the decoding fails (here because we provide a wrong charset)")
{
std::string from_ascii = utils::convert_to_utf8(original_latin1, "US-ASCII");
CHECK(from_ascii == "couc�ou");
}
}
}
TEST_CASE("Remove invalid XML chars")
{
std::string without_ctrl_char("𤭢€¢$");
std::string in = "Biboumi ╯°□°)╯︵ ┻━┻";
CHECK(utils::remove_invalid_xml_chars(without_ctrl_char) == without_ctrl_char);
CHECK(utils::remove_invalid_xml_chars(in) == in);
CHECK(utils::remove_invalid_xml_chars("\acouco\u0008u\uFFFEt\uFFFFe\r\n♥") == "coucoute\r\n♥");
}
|