diff options
author | Florent Le Coz <louiz@louiz.org> | 2013-11-09 06:01:47 +0100 |
---|---|---|
committer | Florent Le Coz <louiz@louiz.org> | 2013-11-09 06:01:47 +0100 |
commit | ccebe901d7d76dfddc082d994efa54ef2aefee57 (patch) | |
tree | 97a542cce7c3c3185553859f679dc074f8f0286f /src/test.cpp | |
parent | a418b6ed5d70f0e61e71bb1adce2a693ade89e30 (diff) | |
download | biboumi-ccebe901d7d76dfddc082d994efa54ef2aefee57.tar.gz biboumi-ccebe901d7d76dfddc082d994efa54ef2aefee57.tar.bz2 biboumi-ccebe901d7d76dfddc082d994efa54ef2aefee57.tar.xz biboumi-ccebe901d7d76dfddc082d994efa54ef2aefee57.zip |
Check UTF-8 encoding, and convert strings to UTF-8
Handle conversion errors properly by inserting � instead. Add a binary
header to provide portable way to write binary literals (I like them) Also
add a test file. ref #2404
Diffstat (limited to 'src/test.cpp')
-rw-r--r-- | src/test.cpp | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/src/test.cpp b/src/test.cpp new file mode 100644 index 0000000..e3bfa55 --- /dev/null +++ b/src/test.cpp @@ -0,0 +1,43 @@ +/** + * Just a very simple test suite, by hand, using assert() + */ + +#include <assert.h> + +#include <iostream> + +#include <utils/encoding.hpp> +#include <string.h> + +#include <fstream> + +int main() +{ + /** + * Encoding + */ + const char* valid = "C̡͔͕̩͙̽ͫ̈́ͥ̿̆ͧ̚r̸̩̘͍̻͖̆͆͛͊̉̕͡o͇͈̳̤̱̊̈͢q̻͍̦̮͕ͥͬͬ̽ͭ͌̾ͅǔ͉͕͇͚̙͉̭͉̇̽ȇ͈̮̼͍͔ͣ͊͞͝ͅ ͫ̾ͪ̓ͥ̆̋̔҉̢̦̠͈͔̖̲̯̦ụ̶̯͐̃̋ͮ͆͝n̬̱̭͇̻̱̰̖̤̏͛̏̿̑͟ë́͐҉̸̥̪͕̹̻̙͉̰ ̹̼̱̦̥ͩ͑̈́͑͝ͅt͍̥͈̹̝ͣ̃̔̈̔ͧ̕͝ḙ̸̖̟̙͙ͪ͢ų̯̞̼̲͓̻̞͛̃̀́b̮̰̗̩̰̊̆͗̾̎̆ͯ͌͝.̗̙͎̦ͫ̈́ͥ͌̈̓ͬ"; + assert(utils::is_valid_utf8(valid) == true); + const char* invalid = "\xF0\x0F"; + assert(utils::is_valid_utf8(invalid) == false); + const char* invalid2 = "\xFE\xFE\xFF\xFF"; + assert(utils::is_valid_utf8(invalid2) == false); + + std::string in = "coucou les copains ♥ "; + assert(utils::is_valid_utf8(in.c_str()) == true); + std::string res = utils::convert_to_utf8(in, "UTF-8"); + assert(utils::is_valid_utf8(res.c_str()) == true && res == in); + + std::string original_utf8("couc¥ou"); + std::string original_latin1("couc\xa5ou"); + + // When converting back to utf-8 + std::string from_latin1 = utils::convert_to_utf8(original_latin1.c_str(), "ISO-8859-1"); + assert(from_latin1 == original_utf8); + + // Check the behaviour when the decoding fails (here because we provide a + // wrong charset) + std::string from_ascii = utils::convert_to_utf8(original_latin1, "US-ASCII"); + assert(from_ascii == "couc�ou"); + return 0; +} |