From ccebe901d7d76dfddc082d994efa54ef2aefee57 Mon Sep 17 00:00:00 2001 From: Florent Le Coz Date: Sat, 9 Nov 2013 06:01:47 +0100 Subject: Check UTF-8 encoding, and convert strings to UTF-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle conversion errors properly by inserting � instead. Add a binary header to provide portable way to write binary literals (I like them) Also add a test file. ref #2404 --- src/test.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/test.cpp (limited to 'src/test.cpp') diff --git a/src/test.cpp b/src/test.cpp new file mode 100644 index 0000000..e3bfa55 --- /dev/null +++ b/src/test.cpp @@ -0,0 +1,43 @@ +/** + * Just a very simple test suite, by hand, using assert() + */ + +#include + +#include + +#include +#include + +#include + +int main() +{ + /** + * Encoding + */ + const char* valid = "C̡͔͕̩͙̽ͫ̈́ͥ̿̆ͧ̚r̸̩̘͍̻͖̆͆͛͊̉̕͡o͇͈̳̤̱̊̈͢q̻͍̦̮͕ͥͬͬ̽ͭ͌̾ͅǔ͉͕͇͚̙͉̭͉̇̽ȇ͈̮̼͍͔ͣ͊͞͝ͅ ͫ̾ͪ̓ͥ̆̋̔҉̢̦̠͈͔̖̲̯̦ụ̶̯͐̃̋ͮ͆͝n̬̱̭͇̻̱̰̖̤̏͛̏̿̑͟ë́͐҉̸̥̪͕̹̻̙͉̰ ̹̼̱̦̥ͩ͑̈́͑͝ͅt͍̥͈̹̝ͣ̃̔̈̔ͧ̕͝ḙ̸̖̟̙͙ͪ͢ų̯̞̼̲͓̻̞͛̃̀́b̮̰̗̩̰̊̆͗̾̎̆ͯ͌͝.̗̙͎̦ͫ̈́ͥ͌̈̓ͬ"; + assert(utils::is_valid_utf8(valid) == true); + const char* invalid = "\xF0\x0F"; + assert(utils::is_valid_utf8(invalid) == false); + const char* invalid2 = "\xFE\xFE\xFF\xFF"; + assert(utils::is_valid_utf8(invalid2) == false); + + std::string in = "coucou les copains ♥ "; + assert(utils::is_valid_utf8(in.c_str()) == true); + std::string res = utils::convert_to_utf8(in, "UTF-8"); + assert(utils::is_valid_utf8(res.c_str()) == true && res == in); + + std::string original_utf8("couc¥ou"); + std::string original_latin1("couc\xa5ou"); + + // When converting back to utf-8 + std::string from_latin1 = utils::convert_to_utf8(original_latin1.c_str(), "ISO-8859-1"); + assert(from_latin1 == original_utf8); + + // Check the behaviour when the decoding fails (here because we provide a + // wrong charset) + std::string from_ascii = utils::convert_to_utf8(original_latin1, "US-ASCII"); + assert(from_ascii == "couc�ou"); + return 0; +} -- cgit v1.2.3