//GB2312到UTF-8的转换
static int GB2312ToUtf8(const char* gb2312, char* utf8)
{
int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}
//判断是否是utf8
bool IsTextUTF8(const char* str, long length)
{
unsigned char chr;
int nBytes = 0; // UFT8可用1-6个字节编码,ASCII用一个字节
bool bAllAscii = true; // 如果全部都是ASCII, 说明不是UTF-8
for (int i=0; i < length; i++)
{
chr = *(str + i);
if ((chr&0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8, ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx
{
bAllAscii = false;
}
if (nBytes == 0) // 如果不是ASCII码,应该是多字节符,计算字节数
{
if (chr >= 0x80)
{
if (chr>=0xFC && chr<=0xFD)
nBytes = 6;
else if (chr>=0xF8)
nBytes = 5;
else if (chr>=0xF0)
nBytes = 4;
else if (chr>=0xE0)
nBytes = 3;
else if (chr>=0xC0)
nBytes = 2;
else
return false;
nBytes--;
}
}
else // every char of ascii buffer looks like 10xxxxxx, except the first char
{
if( (chr&0xC0) != 0x80 )
{
return false;
}
nBytes--;
}
}
if (nBytes > 0) // format error
{
return false;
}
if (bAllAscii) // if all chars are ascii, the buffer is not utf-8
{
return false;
}
return true;
}
//UTF-8到GB2312的转换
static int Utf8ToGB2312(const char* utf8, char* gb2312)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_ACP, 0, wstr, -1, gb2312, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}
//GB2312到Unicode的转换
static int GB2312ToUnicode(const char* gb2312, char* unicode)
{
UINT nCodePage = 936; //GB2312
int len = MultiByteToWideChar(nCodePage, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(nCodePage, 0, gb2312, -1, wstr, len);
len = len*sizeof(wchar_t);
memcpy(unicode, wstr, len);
if(wstr) delete[] wstr;
return len;
}
//Unicode到GB2312的转换
static int UnicodeToGB2312(const char* unicode, int size, char*gb2312)
{
UINT nCodePage = 936; //GB2312
wchar_t* wstr = new wchar_t[size/2+1];
memcpy(wstr, unicode, size);
int len = WideCharToMultiByte(nCodePage, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(nCodePage, 0, wstr, -1, gb2312, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}
//UTF-8到Unicode的转换
static int Utf8ToUnicode(const char* utf8, char*unicode)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
memcpy(unicode, wstr, len);
if(wstr) delete[] wstr;
return len;
}
//Unicode到UTF-8的转换
static int UnicodeToUtf8(const char* unicode, int size, char* utf8)
{
wchar_t* wstr = new wchar_t[size/2+1];
memcpy(wstr, unicode, size);
int len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}
std::wstring UTF8ToUnicode(const std::string & str)
{
int len = 0;
len = str.length();
int unicodeLen = ::MultiByteToWideChar(CP_UTF8,
0,
str.c_str(),
-1,
NULL,
0);
wchar_t* pUnicode;
pUnicode = new wchar_t[unicodeLen + 1];
memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
::MultiByteToWideChar(CP_UTF8,
0,
str.c_str(),
-1,
(LPWSTR)pUnicode,
unicodeLen);
std::wstring rt;
rt = (wchar_t*)pUnicode;
delete pUnicode;
return rt;
}
std::wstring GBKToUnicode(const std::string& str)
{
int len = 0;
len = str.length();
int unicodeLen = ::MultiByteToWideChar(CP_ACP,
0,
str.c_str(),
-1,
NULL,
0);
wchar_t* pUnicode;
pUnicode = new wchar_t[unicodeLen + 1];
memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
::MultiByteToWideChar(CP_ACP,
0,
str.c_str(),
-1,
(LPWSTR)pUnicode,
unicodeLen);
std::wstring rt;
rt = (wchar_t*)pUnicode;
delete pUnicode;
return rt;
}
std::string UnicodeToUTF8(const std::wstring& str)
{
char* pElementText;
int iTextLen;
// wide char to multi char
iTextLen = WideCharToMultiByte(CP_UTF8,
0,
str.c_str(),
-1,
NULL,
0,
NULL,
NULL);
pElementText = new char[iTextLen + 1];
memset((void*)pElementText, 0, sizeof(char) * (iTextLen + 1));
::WideCharToMultiByte(CP_UTF8,
0,
str.c_str(),
-1,
pElementText,
iTextLen,
NULL,
NULL);
std::string strText;
strText = pElementText;
delete[] pElementText;
return strText;
}
std::string UnicodeToGbk(const std::wstring& str)
{
char* pElementText;
int iTextLen;
// wide char to multi char
iTextLen = WideCharToMultiByte(CP_ACP,
0,
str.c_str(),
-1,
NULL,
0,
NULL,
NULL);
pElementText = new char[iTextLen + 1];
memset((void*)pElementText, 0, sizeof(char) * (iTextLen + 1));
::WideCharToMultiByte(CP_ACP,
0,
str.c_str(),
-1,
pElementText,
iTextLen,
NULL,
NULL);
std::string strText;
strText = pElementText;
delete[] pElementText;
return strText;
}
std::string GBKToUTF8(const std::string &strGbk)
{
int len = MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, NULL, 0);
wchar_t* wszUtf8 = new wchar_t[len + 1];
memset(wszUtf8, 0, len * 2 + 2);
MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, wszUtf8, len);
len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL);
char* szUtf8 = new char[len + 1];
memset(szUtf8, 0, len + 1);
WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL, NULL);
string strTemp(szUtf8);
if (wszUtf8) delete[] wszUtf8;
if (szUtf8) delete[] szUtf8;
return strTemp;
}
std::string UTF8ToGBK(const std::string &strUtf)
{
int len = MultiByteToWideChar(CP_UTF8, 0, strUtf.c_str(), -1, NULL, 0);
wchar_t* wszGbk = new wchar_t[len + 1];
memset(wszGbk, 0, len * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0, strUtf.c_str(), -1, wszGbk, len);
len = WideCharToMultiByte(CP_ACP, 0, wszGbk, -1, NULL, 0, NULL, NULL);
char* szgGbk = new char[len + 1];
memset(szgGbk, 0, len + 1);
WideCharToMultiByte(CP_ACP, 0, wszGbk, -1, szgGbk, len, NULL, NULL);
string strTemp(szgGbk);
if (wszGbk) delete[] wszGbk;
if (szgGbk) delete[] szgGbk;
return strTemp;
}
std::string QT_UTF8ToGBK(const std::string& strUtf)
{
QTextCodec* utf8 = QTextCodec::codecForName("UTF-8");
QTextCodec* gbk = QTextCodec::codecForName("GBK");
QString strUnicode = utf8->toUnicode(strUtf.c_str());
QByteArray gbkStr = gbk->fromUnicode(strUnicode);
return gbkStr.toStdString();
}
std::string QT_Local8BitTo(const QString& qsSrt)
{
std::string strTmp;
QByteArray qBytTmp = qsSrt.toLocal8Bit();
int iSzie = qBytTmp.size();
if (iSzie <= 0)
{
return strTmp;
}
strTmp.resize(iSzie);
memcpy(&strTmp[0], qBytTmp.data(), iSzie);
return strTmp;
}
转自http://blog.youkuaiyun.com/seven407/article/details/7712823