//utf8Str:以字节(char*或者Byte*)读取中文的字符串(乱码)
CString UTF8toUnicode(const char* utf8Str)
{
UINT theLength=strlen(utf8Str);
return UTF8toUnicode(utf8Str,theLength);
}
CString UTF8toUnicode(const char* utf8Str,UINT length)
{
CString unicodeStr;
unicodeStr=_T("");
if (!utf8Str)
return unicodeStr;
if (length==0)
return unicodeStr;
WCHAR chr=0;//一个中文字符
for (UINT i=0;i<length;)
{
//UTF8的三种中文格式
if ((0x80&utf8Str[i])==0) //只占用一个字节
{
chr=utf8Str[i];
i++;
}
else if((0xE0&utf8Str[i])==0xC0) //占用两个字节
{
chr =(utf8Str[i+0]&0x3F)<<6;
chr|=(utf8Str[i+1]&0x3F);
i+=2;
}
else if((0xF0&utf8Str[i])==0xE0)//占用三个字节
{
chr =(utf8Str[i+0]&0x1F)<<12;
chr|=(utf8Str[i+1]&0x3F)<<6;
chr|=(utf8Str[i+2]&0x3F);
i+=3;
}
else
{
return unicodeStr;
}
unicodeStr.AppendChar(chr);
}
return unicodeStr;
}
详解见:UTF-8百度百科仔细研究