diff options
author | Vasudev Kamath <vasudev@copyninja.info> | 2016-10-23 21:09:40 +0530 |
---|---|---|
committer | Vasudev Kamath <vasudev@copyninja.info> | 2016-10-23 21:09:40 +0530 |
commit | eda4b75b1cff83336e87da90efca9fd6b4ced2c7 (patch) | |
tree | 491317ce50b5d19bc434ccc4b448d1bc70520177 /tests/encoding.cpp | |
parent | 716c40e4ec45f8d538695225f4f06d541d959084 (diff) | |
parent | 0f14fe83ef53b08bd8fa09670c82f4996c329bdc (diff) | |
download | biboumi-eda4b75b1cff83336e87da90efca9fd6b4ced2c7.tar.gz biboumi-eda4b75b1cff83336e87da90efca9fd6b4ced2c7.tar.bz2 biboumi-eda4b75b1cff83336e87da90efca9fd6b4ced2c7.tar.xz biboumi-eda4b75b1cff83336e87da90efca9fd6b4ced2c7.zip |
New upstream version 3.0upstream/3.0
Diffstat (limited to 'tests/encoding.cpp')
-rw-r--r-- | tests/encoding.cpp | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/tests/encoding.cpp b/tests/encoding.cpp new file mode 100644 index 0000000..389cf23 --- /dev/null +++ b/tests/encoding.cpp @@ -0,0 +1,56 @@ +#include "catch.hpp" + +#include <utils/encoding.hpp> + + +TEST_CASE("UTF-8 validation") +{ + const char* valid = "C̡͔͕̩͙̽ͫ̈́ͥ̿̆ͧ̚r̸̩̘͍̻͖̆͆͛͊̉̕͡o͇͈̳̤̱̊̈͢q̻͍̦̮͕ͥͬͬ̽ͭ͌̾ͅǔ͉͕͇͚̙͉̭͉̇̽ȇ͈̮̼͍͔ͣ͊͞͝ͅ ͫ̾ͪ̓ͥ̆̋̔҉̢̦̠͈͔̖̲̯̦ụ̶̯͐̃̋ͮ͆͝n̬̱̭͇̻̱̰̖̤̏͛̏̿̑͟ë́͐҉̸̥̪͕̹̻̙͉̰ ̹̼̱̦̥ͩ͑̈́͑͝ͅt͍̥͈̹̝ͣ̃̔̈̔ͧ̕͝ḙ̸̖̟̙͙ͪ͢ų̯̞̼̲͓̻̞͛̃̀́b̮̰̗̩̰̊̆͗̾̎̆ͯ͌͝.̗̙͎̦ͫ̈́ͥ͌̈̓ͬ"; + CHECK(utils::is_valid_utf8(valid)); + CHECK_FALSE(utils::is_valid_utf8("\xF0\x0F")); + CHECK_FALSE(utils::is_valid_utf8("\xFE\xFE\xFF\xFF")); + + std::string in = "Biboumi ╯°□°)╯︵ ┻━┻"; + INFO(in); + CHECK(utils::is_valid_utf8(in.data())); +} + +TEST_CASE("UTF-8 conversion") +{ + std::string in = "Biboumi ╯°□°)╯︵ ┻━┻"; + REQUIRE(utils::is_valid_utf8(in.data())); + + SECTION("Converting UTF-8 to UTF-8 should return the same string") + { + std::string res = utils::convert_to_utf8(in, "UTF-8"); + CHECK(utils::is_valid_utf8(res.c_str()) == true); + CHECK(res == in); + } + + SECTION("Testing latin-1 conversion") + { + std::string original_utf8("couc¥ou"); + std::string original_latin1("couc\xa5ou"); + + SECTION("Convert proper latin-1 to UTF-8") + { + std::string from_latin1 = utils::convert_to_utf8(original_latin1.c_str(), "ISO-8859-1"); + CHECK(from_latin1 == original_utf8); + } + SECTION("Check the behaviour when the decoding fails (here because we provide a wrong charset)") + { + std::string from_ascii = utils::convert_to_utf8(original_latin1, "US-ASCII"); + CHECK(from_ascii == "couc�ou"); + } + } +} + +TEST_CASE("Remove invalid XML chars") +{ + std::string without_ctrl_char("𤭢€¢$"); + std::string in = "Biboumi ╯°□°)╯︵ ┻━┻"; + INFO(in); + CHECK(utils::remove_invalid_xml_chars(without_ctrl_char) == without_ctrl_char); + CHECK(utils::remove_invalid_xml_chars(in) == in); + CHECK(utils::remove_invalid_xml_chars("\acouco\u0008u\uFFFEt\uFFFFe\r\n♥") == "coucoute\r\n♥"); +} |