From e6f20d3c0fd4ba8696a4410a366741c9b9f3562d Mon Sep 17 00:00:00 2001 From: Florent Le Coz Date: Fri, 22 Nov 2013 21:00:32 +0100 Subject: Implement IRC format to xhtml-im conversion The generated XML is very verbose because each IRC formatting tag makes us close a element and reopen it with the new style applied. However, this works quite well and is easy to implement. --- src/bridge/bridge.cpp | 26 ++++---- src/bridge/bridge.hpp | 3 +- src/bridge/colors.cpp | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/bridge/colors.hpp | 56 +++++++++++++---- 4 files changed, 220 insertions(+), 27 deletions(-) (limited to 'src/bridge') diff --git a/src/bridge/bridge.cpp b/src/bridge/bridge.cpp index e08e2a4..7e6f801 100644 --- a/src/bridge/bridge.cpp +++ b/src/bridge/bridge.cpp @@ -21,14 +21,14 @@ Bridge::~Bridge() { } -std::string Bridge::sanitize_for_xmpp(const std::string& str) +Xmpp::body Bridge::make_xmpp_body(const std::string& str) { std::string res; if (utils::is_valid_utf8(str.c_str())) res = str; else res = utils::convert_to_utf8(str, "ISO-8859-1"); - return res; + return irc_format_to_xhtmlim(res); } IrcClient* Bridge::get_irc_client(const std::string& hostname, const std::string& username) @@ -102,7 +102,8 @@ void Bridge::send_channel_message(const Iid& iid, const std::string& body) irc->send_channel_message(iid.chan, *it); // We do not need to convert body to utf-8: it comes from our XMPP server, // so it's ok to send it back - this->xmpp->send_muc_message(iid.chan + "%" + iid.server, irc->get_own_nick(), body, this->user_jid); + this->xmpp->send_muc_message(iid.chan + "%" + iid.server, irc->get_own_nick(), + this->make_xmpp_body(body), this->user_jid); } void Bridge::send_private_message(const Iid& iid, const std::string& body) @@ -137,22 +138,17 @@ void Bridge::send_irc_kick(const Iid& iid, const std::string& target, const std: void Bridge::send_message(const Iid& iid, const std::string& nick, const std::string& body, const bool muc) { - std::string utf8_body = this->sanitize_for_xmpp(body); - if (utf8_body.substr(0, action_prefix_len) == action_prefix) - { // Special case for ACTION (/me) messages: - // "\01ACTION goes out\01" == "/me goes out" - utf8_body = std::string("/me ") + - utf8_body.substr(action_prefix_len, utf8_body.size() - action_prefix_len - 1); - } if (muc) - this->xmpp->send_muc_message(iid.chan + "%" + iid.server, nick, utf8_body, this->user_jid); + this->xmpp->send_muc_message(iid.chan + "%" + iid.server, nick, + this->make_xmpp_body(body), this->user_jid); else - this->xmpp->send_message(iid.chan + "%" + iid.server, utf8_body, this->user_jid); + this->xmpp->send_message(iid.chan + "%" + iid.server, + this->make_xmpp_body(body), this->user_jid); } void Bridge::send_muc_leave(Iid&& iid, std::string&& nick, const std::string& message, const bool self) { - this->xmpp->send_muc_leave(std::move(iid.chan) + "%" + std::move(iid.server), std::move(nick), this->sanitize_for_xmpp(message), this->user_jid, self); + this->xmpp->send_muc_leave(std::move(iid.chan) + "%" + std::move(iid.server), std::move(nick), this->make_xmpp_body(message), this->user_jid, self); } void Bridge::send_nick_change(Iid&& iid, const std::string& old_nick, const std::string& new_nick, const bool self) @@ -168,7 +164,7 @@ void Bridge::send_xmpp_message(const std::string& from, const std::string& autho body = std::string("[") + author + std::string("] ") + msg; else body = msg; - this->xmpp->send_message(from, this->sanitize_for_xmpp(body), this->user_jid); + this->xmpp->send_message(from, this->make_xmpp_body(body), this->user_jid); } void Bridge::send_user_join(const std::string& hostname, const std::string& chan_name, const std::string nick) @@ -183,7 +179,7 @@ void Bridge::send_self_join(const std::string& hostname, const std::string& chan void Bridge::send_topic(const std::string& hostname, const std::string& chan_name, const std::string topic) { - this->xmpp->send_topic(chan_name + "%" + hostname, this->sanitize_for_xmpp(topic), this->user_jid); + this->xmpp->send_topic(chan_name + "%" + hostname, this->make_xmpp_body(topic), this->user_jid); } std::string Bridge::get_own_nick(const Iid& iid) diff --git a/src/bridge/bridge.hpp b/src/bridge/bridge.hpp index b2124bd..1443191 100644 --- a/src/bridge/bridge.hpp +++ b/src/bridge/bridge.hpp @@ -2,6 +2,7 @@ # define BRIDGE_INCLUDED #include +#include #include #include @@ -23,7 +24,7 @@ public: explicit Bridge(const std::string& user_jid, XmppComponent* xmpp, Poller* poller); ~Bridge(); - static std::string sanitize_for_xmpp(const std::string& str); + static Xmpp::body make_xmpp_body(const std::string& str); /*** ** ** From XMPP to IRC. diff --git a/src/bridge/colors.cpp b/src/bridge/colors.cpp index 2f30354..024121b 100644 --- a/src/bridge/colors.cpp +++ b/src/bridge/colors.cpp @@ -1,7 +1,169 @@ #include +#include +#include + #include #include +#include + +static const char IRC_NUM_COLORS = 16; + +static const char* irc_colors_to_css[IRC_NUM_COLORS] = { + "white", + "black", + "blue", + "green", + "indianred", + "red", + "magenta", + "brown", + "yellow", + "lightgreen", + "cyan", + "lightcyan", + "lightblue", + "lightmagenta", + "gray", + "white", +}; + +#define XHTML_NS "http://www.w3.org/1999/xhtml" + +struct styles_t { + bool strong; + bool underline; + bool italic; + int fg; + int bg; +}; + +/** We keep the currently-applied CSS styles in a structure. Each time a tag + * is found, update this style list, then close the current span XML element + * (if it is open), then reopen it with all the new styles in it. This is + * done this way because IRC formatting does not map well with XML + * (hierarchical tags), it’s a lot easier and cleaner to remove all styles + * and reapply them for each tag, instead of trying to keep a consistent + * hierarchy of span, strong, em etc tags. The generated XML is one-level + * deep only. +*/ +Xmpp::body irc_format_to_xhtmlim(const std::string& s) +{ + if (s.find_first_of(irc_format_char) == std::string::npos) + // there is no special formatting at all + return std::make_tuple(s, nullptr); + + std::string cleaned; + + styles_t styles = {false, false, false, -1, -1}; + + std::unique_ptr result = std::make_unique("body"); + (*result)["xmlns"] = XHTML_NS; + + XmlNode* current_node = result.get(); + std::string::size_type pos_start = 0; + std::string::size_type pos_end; + + while ((pos_end = s.find_first_of(irc_format_char, pos_start)) != std::string::npos) + { + const std::string txt = s.substr(pos_start, pos_end-pos_start); + cleaned += txt; + if (current_node->has_children()) + current_node->get_last_child()->set_tail(txt); + else + current_node->set_inner(txt); + + if (s[pos_end] == IRC_FORMAT_BOLD_CHAR) + styles.strong = !styles.strong; + else if (s[pos_end] == IRC_FORMAT_UNDERLINE_CHAR) + styles.underline = !styles.underline; + else if (s[pos_end] == IRC_FORMAT_ITALIC_CHAR) + styles.italic = !styles.italic; + else if (s[pos_end] == IRC_FORMAT_RESET_CHAR) + styles = {false, false, false, -1, -1}; + else if (s[pos_end] == IRC_FORMAT_REVERSE_CHAR) + { } // TODO + else if (s[pos_end] == IRC_FORMAT_REVERSE2_CHAR) + { } // TODO + else if (s[pos_end] == IRC_FORMAT_FIXED_CHAR) + { } // TODO + else if (s[pos_end] == IRC_FORMAT_COLOR_CHAR) + { + size_t pos = pos_end + 1; + styles.fg = -1; + styles.bg = -1; + // get the first number following the format char + if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9') + { // first digit + styles.fg = s[pos++] - '0'; + if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9') + // second digit + styles.fg = styles.fg * 10 + s[pos++] - '0'; + } + if (pos < s.size() && s[pos] == ',') + { // get bg color after the comma + pos++; + if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9') + { // first digit + styles.bg = s[pos++] - '0'; + if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9') + // second digit + styles.bg = styles.bg * 10 + s[pos++] - '0'; + } + } + pos_end = pos - 1; + } + + // close opened span, if any + if (current_node != result.get()) + { + current_node->close(); + result->add_child(current_node); + current_node = result.get(); + } + // Take all currently-applied style and create a new span with it + std::string styles_str; + if (styles.strong) + styles_str += "font-weight:bold;"; + if (styles.underline) + styles_str += "text-decoration:underline;"; + if (styles.italic) + styles_str += "font-style:italic;"; + if (styles.fg != -1) + styles_str += std::string("color:") + + irc_colors_to_css[styles.fg % IRC_NUM_COLORS] + ";"; + if (styles.bg != -1) + styles_str += std::string("background-color:") + + irc_colors_to_css[styles.bg % IRC_NUM_COLORS] + ";"; + if (!styles_str.empty()) + { + current_node = new XmlNode("span"); + (*current_node)["style"] = styles_str; + } + + pos_start = pos_end + 1; + } + + // If some text remains, without any format char, just append that text at + // the end of the current node + const std::string txt = s.substr(pos_start, pos_end-pos_start); + cleaned += txt; + if (current_node->has_children()) + current_node->get_last_child()->set_tail(txt); + else + current_node->set_inner(txt); + + if (current_node != result.get()) + { + current_node->close(); + result->add_child(current_node); + current_node = result.get(); + } + + + result->close(); + Xmpp::body body_res = std::make_tuple(cleaned, std::move(result)); + return body_res; } diff --git a/src/bridge/colors.hpp b/src/bridge/colors.hpp index da4498c..82e6faf 100644 --- a/src/bridge/colors.hpp +++ b/src/bridge/colors.hpp @@ -1,20 +1,54 @@ #ifndef COLORS_INCLUDED # define COLORS_INCLUDED -#include - /** - * A module handling the conversion between IRC colors and XHTML-IM, and vice versa. + * A module handling the conversion between IRC colors and XHTML-IM, and + * vice versa. */ -#define IRC_COLOR_BOLD_CHAR '\x02' -#define IRC_COLOR_COLOR_CHAR '\x03' -#define IRC_COLOR_RESET_CHAR '\x0F' -#define IRC_COLOR_FIXED_CHAR '\x11' -#define IRC_COLOR_REVERSE_CHAR '\x12' -#define IRC_COLOR_REVERSE2_CHAR '\x16' -#define IRC_COLOR_ITALIC_CHAR '\x1D' -#define IRC_COLOR_UNDERLINE_CHAR '\x1F' +#include +#include +#include + +class XmlNode; + +namespace Xmpp +{ +// Contains: +// - an XMPP-valid UTF-8 body +// - an XML node representing the XHTML-IM body, or null + typedef std::tuple> body; +} +#define IRC_FORMAT_BOLD_CHAR '\x02' // done +#define IRC_FORMAT_COLOR_CHAR '\x03' // done +#define IRC_FORMAT_RESET_CHAR '\x0F' // done +#define IRC_FORMAT_FIXED_CHAR '\x11' // ?? +#define IRC_FORMAT_REVERSE_CHAR '\x12' // maybe one day +#define IRC_FORMAT_REVERSE2_CHAR '\x16' // wat +#define IRC_FORMAT_ITALIC_CHAR '\x1D' // done +#define IRC_FORMAT_UNDERLINE_CHAR '\x1F' // done + +static const char irc_format_char[] = { + IRC_FORMAT_BOLD_CHAR, + IRC_FORMAT_COLOR_CHAR, + IRC_FORMAT_RESET_CHAR, + IRC_FORMAT_FIXED_CHAR, + IRC_FORMAT_REVERSE_CHAR, + IRC_FORMAT_REVERSE2_CHAR, + IRC_FORMAT_ITALIC_CHAR, + IRC_FORMAT_UNDERLINE_CHAR, + '\x00' +}; + +/** + * Convert the passed string into an XML tree representing the XHTML version + * of the message, converting the IRC colors symbols into xhtml-im + * formatting. + * + * Returns the body cleaned from any IRC formatting (but without any xhtml), + * and the body as XHTML-IM + */ +Xmpp::body irc_format_to_xhtmlim(const std::string& str); #endif // COLORS_INCLUDED -- cgit v1.2.3