From e6f20d3c0fd4ba8696a4410a366741c9b9f3562d Mon Sep 17 00:00:00 2001 From: Florent Le Coz Date: Fri, 22 Nov 2013 21:00:32 +0100 Subject: Implement IRC format to xhtml-im conversion The generated XML is very verbose because each IRC formatting tag makes us close a element and reopen it with the new style applied. However, this works quite well and is easy to implement. --- src/bridge/bridge.cpp | 26 +++---- src/bridge/bridge.hpp | 3 +- src/bridge/colors.cpp | 162 ++++++++++++++++++++++++++++++++++++++++++++ src/bridge/colors.hpp | 56 ++++++++++++--- src/test.cpp | 25 +++++++ src/xmpp/xmpp_component.cpp | 32 ++++++--- src/xmpp/xmpp_component.hpp | 8 +-- src/xmpp/xmpp_stanza.hpp | 10 +-- 8 files changed, 275 insertions(+), 47 deletions(-) diff --git a/src/bridge/bridge.cpp b/src/bridge/bridge.cpp index e08e2a4..7e6f801 100644 --- a/src/bridge/bridge.cpp +++ b/src/bridge/bridge.cpp @@ -21,14 +21,14 @@ Bridge::~Bridge() { } -std::string Bridge::sanitize_for_xmpp(const std::string& str) +Xmpp::body Bridge::make_xmpp_body(const std::string& str) { std::string res; if (utils::is_valid_utf8(str.c_str())) res = str; else res = utils::convert_to_utf8(str, "ISO-8859-1"); - return res; + return irc_format_to_xhtmlim(res); } IrcClient* Bridge::get_irc_client(const std::string& hostname, const std::string& username) @@ -102,7 +102,8 @@ void Bridge::send_channel_message(const Iid& iid, const std::string& body) irc->send_channel_message(iid.chan, *it); // We do not need to convert body to utf-8: it comes from our XMPP server, // so it's ok to send it back - this->xmpp->send_muc_message(iid.chan + "%" + iid.server, irc->get_own_nick(), body, this->user_jid); + this->xmpp->send_muc_message(iid.chan + "%" + iid.server, irc->get_own_nick(), + this->make_xmpp_body(body), this->user_jid); } void Bridge::send_private_message(const Iid& iid, const std::string& body) @@ -137,22 +138,17 @@ void Bridge::send_irc_kick(const Iid& iid, const std::string& target, const std: void Bridge::send_message(const Iid& iid, const std::string& nick, const std::string& body, const bool muc) { - std::string utf8_body = this->sanitize_for_xmpp(body); - if (utf8_body.substr(0, action_prefix_len) == action_prefix) - { // Special case for ACTION (/me) messages: - // "\01ACTION goes out\01" == "/me goes out" - utf8_body = std::string("/me ") + - utf8_body.substr(action_prefix_len, utf8_body.size() - action_prefix_len - 1); - } if (muc) - this->xmpp->send_muc_message(iid.chan + "%" + iid.server, nick, utf8_body, this->user_jid); + this->xmpp->send_muc_message(iid.chan + "%" + iid.server, nick, + this->make_xmpp_body(body), this->user_jid); else - this->xmpp->send_message(iid.chan + "%" + iid.server, utf8_body, this->user_jid); + this->xmpp->send_message(iid.chan + "%" + iid.server, + this->make_xmpp_body(body), this->user_jid); } void Bridge::send_muc_leave(Iid&& iid, std::string&& nick, const std::string& message, const bool self) { - this->xmpp->send_muc_leave(std::move(iid.chan) + "%" + std::move(iid.server), std::move(nick), this->sanitize_for_xmpp(message), this->user_jid, self); + this->xmpp->send_muc_leave(std::move(iid.chan) + "%" + std::move(iid.server), std::move(nick), this->make_xmpp_body(message), this->user_jid, self); } void Bridge::send_nick_change(Iid&& iid, const std::string& old_nick, const std::string& new_nick, const bool self) @@ -168,7 +164,7 @@ void Bridge::send_xmpp_message(const std::string& from, const std::string& autho body = std::string("[") + author + std::string("] ") + msg; else body = msg; - this->xmpp->send_message(from, this->sanitize_for_xmpp(body), this->user_jid); + this->xmpp->send_message(from, this->make_xmpp_body(body), this->user_jid); } void Bridge::send_user_join(const std::string& hostname, const std::string& chan_name, const std::string nick) @@ -183,7 +179,7 @@ void Bridge::send_self_join(const std::string& hostname, const std::string& chan void Bridge::send_topic(const std::string& hostname, const std::string& chan_name, const std::string topic) { - this->xmpp->send_topic(chan_name + "%" + hostname, this->sanitize_for_xmpp(topic), this->user_jid); + this->xmpp->send_topic(chan_name + "%" + hostname, this->make_xmpp_body(topic), this->user_jid); } std::string Bridge::get_own_nick(const Iid& iid) diff --git a/src/bridge/bridge.hpp b/src/bridge/bridge.hpp index b2124bd..1443191 100644 --- a/src/bridge/bridge.hpp +++ b/src/bridge/bridge.hpp @@ -2,6 +2,7 @@ # define BRIDGE_INCLUDED #include +#include #include #include @@ -23,7 +24,7 @@ public: explicit Bridge(const std::string& user_jid, XmppComponent* xmpp, Poller* poller); ~Bridge(); - static std::string sanitize_for_xmpp(const std::string& str); + static Xmpp::body make_xmpp_body(const std::string& str); /*** ** ** From XMPP to IRC. diff --git a/src/bridge/colors.cpp b/src/bridge/colors.cpp index 2f30354..024121b 100644 --- a/src/bridge/colors.cpp +++ b/src/bridge/colors.cpp @@ -1,7 +1,169 @@ #include +#include +#include + #include #include +#include + +static const char IRC_NUM_COLORS = 16; + +static const char* irc_colors_to_css[IRC_NUM_COLORS] = { + "white", + "black", + "blue", + "green", + "indianred", + "red", + "magenta", + "brown", + "yellow", + "lightgreen", + "cyan", + "lightcyan", + "lightblue", + "lightmagenta", + "gray", + "white", +}; + +#define XHTML_NS "http://www.w3.org/1999/xhtml" + +struct styles_t { + bool strong; + bool underline; + bool italic; + int fg; + int bg; +}; + +/** We keep the currently-applied CSS styles in a structure. Each time a tag + * is found, update this style list, then close the current span XML element + * (if it is open), then reopen it with all the new styles in it. This is + * done this way because IRC formatting does not map well with XML + * (hierarchical tags), it’s a lot easier and cleaner to remove all styles + * and reapply them for each tag, instead of trying to keep a consistent + * hierarchy of span, strong, em etc tags. The generated XML is one-level + * deep only. +*/ +Xmpp::body irc_format_to_xhtmlim(const std::string& s) +{ + if (s.find_first_of(irc_format_char) == std::string::npos) + // there is no special formatting at all + return std::make_tuple(s, nullptr); + + std::string cleaned; + + styles_t styles = {false, false, false, -1, -1}; + + std::unique_ptr result = std::make_unique("body"); + (*result)["xmlns"] = XHTML_NS; + + XmlNode* current_node = result.get(); + std::string::size_type pos_start = 0; + std::string::size_type pos_end; + + while ((pos_end = s.find_first_of(irc_format_char, pos_start)) != std::string::npos) + { + const std::string txt = s.substr(pos_start, pos_end-pos_start); + cleaned += txt; + if (current_node->has_children()) + current_node->get_last_child()->set_tail(txt); + else + current_node->set_inner(txt); + + if (s[pos_end] == IRC_FORMAT_BOLD_CHAR) + styles.strong = !styles.strong; + else if (s[pos_end] == IRC_FORMAT_UNDERLINE_CHAR) + styles.underline = !styles.underline; + else if (s[pos_end] == IRC_FORMAT_ITALIC_CHAR) + styles.italic = !styles.italic; + else if (s[pos_end] == IRC_FORMAT_RESET_CHAR) + styles = {false, false, false, -1, -1}; + else if (s[pos_end] == IRC_FORMAT_REVERSE_CHAR) + { } // TODO + else if (s[pos_end] == IRC_FORMAT_REVERSE2_CHAR) + { } // TODO + else if (s[pos_end] == IRC_FORMAT_FIXED_CHAR) + { } // TODO + else if (s[pos_end] == IRC_FORMAT_COLOR_CHAR) + { + size_t pos = pos_end + 1; + styles.fg = -1; + styles.bg = -1; + // get the first number following the format char + if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9') + { // first digit + styles.fg = s[pos++] - '0'; + if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9') + // second digit + styles.fg = styles.fg * 10 + s[pos++] - '0'; + } + if (pos < s.size() && s[pos] == ',') + { // get bg color after the comma + pos++; + if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9') + { // first digit + styles.bg = s[pos++] - '0'; + if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9') + // second digit + styles.bg = styles.bg * 10 + s[pos++] - '0'; + } + } + pos_end = pos - 1; + } + + // close opened span, if any + if (current_node != result.get()) + { + current_node->close(); + result->add_child(current_node); + current_node = result.get(); + } + // Take all currently-applied style and create a new span with it + std::string styles_str; + if (styles.strong) + styles_str += "font-weight:bold;"; + if (styles.underline) + styles_str += "text-decoration:underline;"; + if (styles.italic) + styles_str += "font-style:italic;"; + if (styles.fg != -1) + styles_str += std::string("color:") + + irc_colors_to_css[styles.fg % IRC_NUM_COLORS] + ";"; + if (styles.bg != -1) + styles_str += std::string("background-color:") + + irc_colors_to_css[styles.bg % IRC_NUM_COLORS] + ";"; + if (!styles_str.empty()) + { + current_node = new XmlNode("span"); + (*current_node)["style"] = styles_str; + } + + pos_start = pos_end + 1; + } + + // If some text remains, without any format char, just append that text at + // the end of the current node + const std::string txt = s.substr(pos_start, pos_end-pos_start); + cleaned += txt; + if (current_node->has_children()) + current_node->get_last_child()->set_tail(txt); + else + current_node->set_inner(txt); + + if (current_node != result.get()) + { + current_node->close(); + result->add_child(current_node); + current_node = result.get(); + } + + + result->close(); + Xmpp::body body_res = std::make_tuple(cleaned, std::move(result)); + return body_res; } diff --git a/src/bridge/colors.hpp b/src/bridge/colors.hpp index da4498c..82e6faf 100644 --- a/src/bridge/colors.hpp +++ b/src/bridge/colors.hpp @@ -1,20 +1,54 @@ #ifndef COLORS_INCLUDED # define COLORS_INCLUDED -#include - /** - * A module handling the conversion between IRC colors and XHTML-IM, and vice versa. + * A module handling the conversion between IRC colors and XHTML-IM, and + * vice versa. */ -#define IRC_COLOR_BOLD_CHAR '\x02' -#define IRC_COLOR_COLOR_CHAR '\x03' -#define IRC_COLOR_RESET_CHAR '\x0F' -#define IRC_COLOR_FIXED_CHAR '\x11' -#define IRC_COLOR_REVERSE_CHAR '\x12' -#define IRC_COLOR_REVERSE2_CHAR '\x16' -#define IRC_COLOR_ITALIC_CHAR '\x1D' -#define IRC_COLOR_UNDERLINE_CHAR '\x1F' +#include +#include +#include + +class XmlNode; + +namespace Xmpp +{ +// Contains: +// - an XMPP-valid UTF-8 body +// - an XML node representing the XHTML-IM body, or null + typedef std::tuple> body; +} +#define IRC_FORMAT_BOLD_CHAR '\x02' // done +#define IRC_FORMAT_COLOR_CHAR '\x03' // done +#define IRC_FORMAT_RESET_CHAR '\x0F' // done +#define IRC_FORMAT_FIXED_CHAR '\x11' // ?? +#define IRC_FORMAT_REVERSE_CHAR '\x12' // maybe one day +#define IRC_FORMAT_REVERSE2_CHAR '\x16' // wat +#define IRC_FORMAT_ITALIC_CHAR '\x1D' // done +#define IRC_FORMAT_UNDERLINE_CHAR '\x1F' // done + +static const char irc_format_char[] = { + IRC_FORMAT_BOLD_CHAR, + IRC_FORMAT_COLOR_CHAR, + IRC_FORMAT_RESET_CHAR, + IRC_FORMAT_FIXED_CHAR, + IRC_FORMAT_REVERSE_CHAR, + IRC_FORMAT_REVERSE2_CHAR, + IRC_FORMAT_ITALIC_CHAR, + IRC_FORMAT_UNDERLINE_CHAR, + '\x00' +}; + +/** + * Convert the passed string into an XML tree representing the XHTML version + * of the message, converting the IRC colors symbols into xhtml-im + * formatting. + * + * Returns the body cleaned from any IRC formatting (but without any xhtml), + * and the body as XHTML-IM + */ +Xmpp::body irc_format_to_xhtmlim(const std::string& str); #endif // COLORS_INCLUDED diff --git a/src/test.cpp b/src/test.cpp index 1f2d185..b33ff1d 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -84,6 +84,31 @@ int main() assert(xml_escape(unescaped) == "'coucou'<cc>/&"gaga""); assert(xml_unescape(xml_escape(unescaped)) == unescaped); + /** + * Colors conversion + */ + std::unique_ptr xhtml; + std::string cleaned_up; + + std::tie(cleaned_up, xhtml) = + irc_format_to_xhtmlim("normalboldunder-and-boldbold normal" + "5red,5default-on-red10,2cyan-on-blue"); + assert(xhtml); + assert(xhtml->to_string() == "normalboldunder-and-boldbold normalreddefault-on-redcyan-on-blue"); + assert(cleaned_up == "normalboldunder-and-boldbold normalreddefault-on-redcyan-on-blue"); + + std::tie(cleaned_up, xhtml) = irc_format_to_xhtmlim("normal"); + assert(!xhtml && cleaned_up == "normal"); + + std::tie(cleaned_up, xhtml) = irc_format_to_xhtmlim(""); + assert(xhtml && !xhtml->has_children() && cleaned_up.empty()); + + std::tie(cleaned_up, xhtml) = irc_format_to_xhtmlim(",a"); + assert(xhtml && !xhtml->has_children() && cleaned_up == "a"); + + std::tie(cleaned_up, xhtml) = irc_format_to_xhtmlim(","); + assert(xhtml && !xhtml->has_children() && cleaned_up.empty()); + /** * JID parsing */ diff --git a/src/xmpp/xmpp_component.cpp b/src/xmpp/xmpp_component.cpp index 2d891bc..9245fde 100644 --- a/src/xmpp/xmpp_component.cpp +++ b/src/xmpp/xmpp_component.cpp @@ -18,7 +18,7 @@ #define DISCO_NS "http://jabber.org/protocol/disco" #define DISCO_ITEMS_NS DISCO_NS"#items" #define DISCO_INFO_NS DISCO_NS"#info" - +#define XHTMLIM_NS "http://jabber.org/protocol/xhtml-im" XmppComponent::XmppComponent(const std::string& hostname, const std::string& secret): served_hostname(hostname), @@ -257,13 +257,13 @@ Bridge* XmppComponent::get_user_bridge(const std::string& user_jid) } } -void XmppComponent::send_message(const std::string& from, const std::string& body, const std::string& to) +void XmppComponent::send_message(const std::string& from, Xmpp::body&& body, const std::string& to) { XmlNode node("message"); node["to"] = to; node["from"] = from + "@" + this->served_hostname; XmlNode body_node("body"); - body_node.set_inner(body); + body_node.set_inner(std::get<0>(body)); body_node.close(); node.add_child(std::move(body_node)); node.close(); @@ -319,21 +319,21 @@ void XmppComponent::send_self_join(const std::string& from, const std::string& n this->send_stanza(node); } -void XmppComponent::send_topic(const std::string& from, const std::string& topic, const std::string& to) +void XmppComponent::send_topic(const std::string& from, Xmpp::body&& topic, const std::string& to) { XmlNode message("message"); message["to"] = to; message["from"] = from + "@" + this->served_hostname; message["type"] = "groupchat"; XmlNode subject("subject"); - subject.set_inner(topic); + subject.set_inner(std::get<0>(topic)); subject.close(); message.add_child(std::move(subject)); message.close(); this->send_stanza(message); } -void XmppComponent::send_muc_message(const std::string& muc_name, const std::string& nick, const std::string body_str, const std::string& jid_to) +void XmppComponent::send_muc_message(const std::string& muc_name, const std::string& nick, Xmpp::body&& xmpp_body, const std::string& jid_to) { Stanza message("message"); message["to"] = jid_to; @@ -343,24 +343,34 @@ void XmppComponent::send_muc_message(const std::string& muc_name, const std::str message["from"] = muc_name + "@" + this->served_hostname; message["type"] = "groupchat"; XmlNode body("body"); - body.set_inner(body_str); + body.set_inner(std::get<0>(xmpp_body)); body.close(); message.add_child(std::move(body)); + if (std::get<1>(xmpp_body)) + { + XmlNode html("html"); + html["xmlns"] = XHTMLIM_NS; + // Pass the ownership of the pointer to this xmlnode + html.add_child(std::get<1>(xmpp_body).release()); + html.close(); + message.add_child(std::move(html)); + } message.close(); this->send_stanza(message); } -void XmppComponent::send_muc_leave(std::string&& muc_name, std::string&& nick, std::string&& message, const std::string& jid_to, const bool self) +void XmppComponent::send_muc_leave(std::string&& muc_name, std::string&& nick, Xmpp::body&& message, const std::string& jid_to, const bool self) { Stanza presence("presence"); presence["to"] = jid_to; presence["from"] = muc_name + "@" + this->served_hostname + "/" + nick; presence["type"] = "unavailable"; - if (!message.empty() || self) + const std::string message_str = std::get<0>(message); + if (message_str.empty() || self) { XmlNode status("status"); - if (!message.empty()) - status.set_inner(std::move(message)); + if (!message_str.empty()) + status.set_inner(message_str); if (self) status["code"] = "110"; status.close(); diff --git a/src/xmpp/xmpp_component.hpp b/src/xmpp/xmpp_component.hpp index e45c64b..bf85536 100644 --- a/src/xmpp/xmpp_component.hpp +++ b/src/xmpp/xmpp_component.hpp @@ -58,7 +58,7 @@ public: /** * Send a message from from@served_hostname, with the given body */ - void send_message(const std::string& from, const std::string& body, const std::string& to); + void send_message(const std::string& from, Xmpp::body&& body, const std::string& to); /** * Send a join from a new participant */ @@ -70,15 +70,15 @@ public: /** * Send the MUC topic to the user */ - void send_topic(const std::string& from, const std::string& topic, const std::string& to); + void send_topic(const std::string& from, Xmpp::body&& xmpp_topic, const std::string& to); /** * Send a (non-private) message to the MUC */ - void send_muc_message(const std::string& muc_name, const std::string& nick, const std::string body_str, const std::string& jid_to); + void send_muc_message(const std::string& muc_name, const std::string& nick, Xmpp::body&& body, const std::string& jid_to); /** * Send an unavailable presence for this nick */ - void send_muc_leave(std::string&& muc_name, std::string&& nick, std::string&& message, const std::string& jid_to, const bool self); + void send_muc_leave(std::string&& muc_name, std::string&& nick, Xmpp::body&& message, const std::string& jid_to, const bool self); /** * Indicate that a participant changed his nick */ diff --git a/src/xmpp/xmpp_stanza.hpp b/src/xmpp/xmpp_stanza.hpp index ca21ab4..87a80e9 100644 --- a/src/xmpp/xmpp_stanza.hpp +++ b/src/xmpp/xmpp_stanza.hpp @@ -21,9 +21,9 @@ class AttributeNotFound: public std::exception nullptr) * - zero, one or more children XML nodes * - A name - * - attributes - * - inner data (inside the node) - * - tail data (just after the node) + * - A map of attributes + * - inner data (text inside the node) + * - tail data (text just after the node) */ class XmlNode { @@ -32,8 +32,8 @@ public: explicit XmlNode(const std::string& name); XmlNode(XmlNode&& node): name(std::move(node.name)), - parent(std::move(node.parent)), - closed(std::move(node.closed)), + parent(node.parent), + closed(node.closed), attributes(std::move(node.attributes)), children(std::move(node.children)), inner(std::move(node.inner)), -- cgit v1.2.3