summaryrefslogtreecommitdiff
path: root/src/test.cpp
diff options
context:
space:
mode:
authorFlorent Le Coz <louiz@louiz.org>2013-11-09 06:01:47 +0100
committerFlorent Le Coz <louiz@louiz.org>2013-11-09 06:01:47 +0100
commitccebe901d7d76dfddc082d994efa54ef2aefee57 (patch)
tree97a542cce7c3c3185553859f679dc074f8f0286f /src/test.cpp
parenta418b6ed5d70f0e61e71bb1adce2a693ade89e30 (diff)
downloadbiboumi-ccebe901d7d76dfddc082d994efa54ef2aefee57.tar.gz
biboumi-ccebe901d7d76dfddc082d994efa54ef2aefee57.tar.bz2
biboumi-ccebe901d7d76dfddc082d994efa54ef2aefee57.tar.xz
biboumi-ccebe901d7d76dfddc082d994efa54ef2aefee57.zip
Check UTF-8 encoding, and convert strings to UTF-8
Handle conversion errors properly by inserting � instead. Add a binary header to provide portable way to write binary literals (I like them) Also add a test file. ref #2404
Diffstat (limited to 'src/test.cpp')
-rw-r--r--src/test.cpp43
1 files changed, 43 insertions, 0 deletions
diff --git a/src/test.cpp b/src/test.cpp
new file mode 100644
index 0000000..e3bfa55
--- /dev/null
+++ b/src/test.cpp
@@ -0,0 +1,43 @@
+/**
+ * Just a very simple test suite, by hand, using assert()
+ */
+
+#include <assert.h>
+
+#include <iostream>
+
+#include <utils/encoding.hpp>
+#include <string.h>
+
+#include <fstream>
+
+int main()
+{
+ /**
+ * Encoding
+ */
+ const char* valid = "C̡͔͕̩͙̽ͫ̈́ͥ̿̆ͧ̚r̸̩̘͍̻͖̆͆͛͊̉̕͡o͇͈̳̤̱̊̈͢q̻͍̦̮͕ͥͬͬ̽ͭ͌̾ͅǔ͉͕͇͚̙͉̭͉̇̽ȇ͈̮̼͍͔ͣ͊͞͝ͅ ͫ̾ͪ̓ͥ̆̋̔҉̢̦̠͈͔̖̲̯̦ụ̶̯͐̃̋ͮ͆͝n̬̱̭͇̻̱̰̖̤̏͛̏̿̑͟ë́͐҉̸̥̪͕̹̻̙͉̰ ̹̼̱̦̥ͩ͑̈́͑͝ͅt͍̥͈̹̝ͣ̃̔̈̔ͧ̕͝ḙ̸̖̟̙͙ͪ͢ų̯̞̼̲͓̻̞͛̃̀́b̮̰̗̩̰̊̆͗̾̎̆ͯ͌͝.̗̙͎̦ͫ̈́ͥ͌̈̓ͬ";
+ assert(utils::is_valid_utf8(valid) == true);
+ const char* invalid = "\xF0\x0F";
+ assert(utils::is_valid_utf8(invalid) == false);
+ const char* invalid2 = "\xFE\xFE\xFF\xFF";
+ assert(utils::is_valid_utf8(invalid2) == false);
+
+ std::string in = "coucou les copains ♥ ";
+ assert(utils::is_valid_utf8(in.c_str()) == true);
+ std::string res = utils::convert_to_utf8(in, "UTF-8");
+ assert(utils::is_valid_utf8(res.c_str()) == true && res == in);
+
+ std::string original_utf8("couc¥ou");
+ std::string original_latin1("couc\xa5ou");
+
+ // When converting back to utf-8
+ std::string from_latin1 = utils::convert_to_utf8(original_latin1.c_str(), "ISO-8859-1");
+ assert(from_latin1 == original_utf8);
+
+ // Check the behaviour when the decoding fails (here because we provide a
+ // wrong charset)
+ std::string from_ascii = utils::convert_to_utf8(original_latin1, "US-ASCII");
+ assert(from_ascii == "couc�ou");
+ return 0;
+}