From 3960e4d5afa09c299f595b411ee8522db30580fd Mon Sep 17 00:00:00 2001 From: Florent Le Coz Date: Wed, 11 Dec 2013 21:07:39 +0100 Subject: Functions to provide xml-valid strings By removing invalid chars, see http://www.w3.org/TR/xml/#charsets --- src/utils/encoding.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/utils/encoding.hpp') diff --git a/src/utils/encoding.hpp b/src/utils/encoding.hpp index 362f1df..a3bccfc 100644 --- a/src/utils/encoding.hpp +++ b/src/utils/encoding.hpp @@ -11,6 +11,14 @@ namespace utils * Based on http://en.wikipedia.org/wiki/UTF-8#Description */ bool is_valid_utf8(const char* s); + /** + * Remove all invalid codepoints from the given utf-8-encoded string. + * The value returned is a copy of the string, without the removed chars. + * + * See http://www.w3.org/TR/xml/#charsets for the list of valid characters + * in XML. + */ + std::string remove_invalid_xml_chars(const std::string& original); /** * Convert the given string (encoded is "encoding") into valid utf-8. * If some decoding fails, insert an utf-8 placeholder character instead. -- cgit v1.2.3