看了看,jsoncpp里面的代码,发现其实并不难,代码很有规矩、易懂,一下就能定位到关键的位置:
json_tool.h中的codePointToUTF8函数。
贴上这个函数的原型来看看就能明白了:
/// Converts a unicode code-point to UTF-8.
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
JSONCPP_STRING result;
// based on description from http://en.wikipedia.org/wiki/UTF-8
if (cp <= 0x7f) {
result.resize(1);
result[0] = static_cast<char>(cp);
} else if (cp <= 0x7FF) {
result.resize(2);
result[1] = static_cast<char>(0x80 | (0x3f & cp));
result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
} else if (cp <= 0xFFFF) {
result.resize(3);
result[2] = static_cast<char>(0x80 | (0x3f & cp));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
} else if (cp <= 0x10FFFF) {
result.resize(4);
result[3] = static_cast<char>(0x80 | (0x3f & cp));
result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
}
return result;
}
稍微改改就可以在json解析的时候直接支持unicode编码的字符串了:
/// Converts a unicode code-point to UTF-8.
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
JSONCPP_STRING result;
// based on description from http://en.wikipedia.org/wiki/UTF-8
if (cp <= 0x7f) {
result.resize(1);
result[0] = static_cast<char>(cp);
} else if (cp <= 0x7FF) {
result.resize(2);
result[1] = static_cast<char>(0x80 | (0x3f & cp));
result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
} else if (cp <= 0xFFFF) {
if (cp >= 0x4E00 && cp <= 0x9FA5 || (cp >= 0xF900 && cp <= 0xFA2D))
{
wchar_t src[2] = { 0 };
char dest[5] = { 0 };
src[0] = static_cast<wchar_t>(cp);
std::string curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
wcstombs_s(NULL, dest, 5, src, 2);
result = dest;
setlocale(LC_ALL, curLocale.c_str());
} else {
result.resize(3);
result[2] = static_cast<char>(0x80 | (0x3f & cp));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
}
} else if (cp <= 0x10FFFF) {
result.resize(4);
result[3] = static_cast<char>(0x80 | (0x3f & cp));
result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
}
return result;
}
换上去之后,编译,运行,一切正常,让人头疼的编码问题终于告一段落了。
本文参考至:http://blog.youkuaiyun.com/harrycris/article/details/7733386