LPWSTR GB2312ToUnicode(LPCSTR lpszInBuf)
{
if (NULL == lpszInBuf)
{
return NULL;
}
WCHAR* pBuf = NULL;
int nLen = MultiByteToWideChar(CP_ACP, 0, lpszInBuf, -1, pBuf, 0);
pBuf = new WCHAR[nLen];
memset(pBuf, 0, sizeof(WCHAR) * nLen);
MultiByteToWideChar(CP_ACP, 0, lpszInBuf, -1, pBuf, nLen);
return pBuf;
}
LPSTR UnicodeToGB2312(LPCWSTR lpszInBuf)
{
if (NULL == lpszInBuf)
{
return NULL;
}
CHAR* pBuf = NULL;
int nLen = WideCharToMultiByte(CP_ACP, 0, lpszInBuf, -1, pBuf, 0, NULL, NULL);
pBuf = new CHAR[nLen];
memset(pBuf, 0, sizeof(CHAR) * nLen);
WideCharToMultiByte(CP_ACP, 0, lpszInBuf, -1, pBuf, nLen, NULL, NULL);
return pBuf;
}
LPWSTR UTF8ToUnicode(LPCSTR lpszInBuf)
{
if (NULL == lpszInBuf)
{
return NULL;
}
WCHAR* pBuf = NULL;
int nLen = MultiByteToWideChar(CP_UTF8, 0, lpszInBuf, -1, pBuf, 0);
pBuf = new WCHAR[nLen];
memset(pBuf, 0, sizeof(WCHAR) * nLen);
MultiByteToWideChar(CP_UTF8, 0, lpszInBuf, -1, pBuf, nLen);
return pBuf;
}
LPSTR UnicodeToUTF8(LPCWSTR lpszInBuf)
{
if (NULL == lpszInBuf)
{
return NULL;
}
CHAR* pBuf = NULL;
int nLen = WideCharToMultiByte(CP_UTF8, 0, lpszInBuf, -1, pBuf, 0, NULL, NULL);
pBuf = new CHAR[nLen];
memset(pBuf, 0, sizeof(CHAR) * nLen);
WideCharToMultiByte(CP_UTF8, 0, lpszInBuf, -1, pBuf, nLen, NULL, NULL);
return pBuf;
}
LPSTR GB2312ToUTF8(LPCSTR lpszInBuf)
{
if (NULL == lpszInBuf)
{
return NULL;
}
LPWSTR lpszTmp = GB2312ToUnicode(lpszInBuf);
LPSTR lpszBuf = UnicodeToUTF8(lpszTmp);
delete[] lpszTmp;
lpszTmp = NULL;
return lpszBuf;
}
LPSTR UTF8ToGB2312(LPCSTR lpszInBuf)
{
if (NULL == lpszInBuf)
{
return NULL;
}
LPWSTR lpszTmp = UTF8ToUnicode(lpszInBuf);
LPSTR lpszBuf = UnicodeToGB2312(lpszTmp);
delete[] lpszTmp;
lpszTmp = NULL;
return lpszBuf;
}
LPSTR GB2312Encoding(LPCSTR lpszInBuf)
{
if (NULL == lpszInBuf)
{
return NULL;
}
const int nLen = strlen(lpszInBuf) * 3 + 1;
CHAR* pBuf = new CHAR[nLen];
memset(pBuf, 0, sizeof(CHAR) * nLen);
CHAR szTmp[4] = { 0 };
for (int i = 0; i != strlen(lpszInBuf); ++i)
{
if (IsDBCSLeadByte(lpszInBuf[i]))
{
sprintf_s(szTmp, _countof(szTmp), "%%%02X", lpszInBuf[i] & 0xFF);
}
else
{
sprintf_s(szTmp, _countof(szTmp), "%c", lpszInBuf[i]);
}
sprintf_s(pBuf + strlen(pBuf), nLen - strlen(pBuf), "%s", szTmp);
}
return pBuf;
}
LPSTR UTF8Encoding(LPCSTR lpszInBuf)
{
if (NULL == lpszInBuf)
{
return NULL;
}
CHAR* pBuffer = GB2312ToUTF8(lpszInBuf);
const int nLen = strlen(pBuffer) * 3 + 1;
CHAR* pBuf = new CHAR[nLen];
memset(pBuf, 0, sizeof(CHAR) * nLen);
CHAR szTmp[4] = { 0 };
for (int i = 0; i != strlen(pBuffer); ++i)
{
if (IsDBCSLeadByte(pBuffer[i]))
{
sprintf_s(szTmp, _countof(szTmp), "%%%02X", pBuffer[i] & 0xFF);
}
else
{
sprintf_s(szTmp, _countof(szTmp), "%c", pBuffer[i]);
}
sprintf_s(pBuf + strlen(pBuf), nLen - strlen(pBuf), "%s", szTmp);
}
if (NULL != pBuffer) { delete[] pBuffer; pBuffer = NULL; }
return pBuf;
}
int main()
{
CHAR szText[] = "百度Hi";
CHAR* pBuf = UTF8Encoding(szText);
MessageBoxA(NULL, pBuf, NULL, 0);
delete[] pBuf;
pBuf = NULL;
return 0;
}字符串编码之间的转换(GB2312<->UTF8<->Unicode)及URLEncoding
最新推荐文章于 2018-09-19 21:14:30 发布
本文介绍了一种实现不同字符集(如GB2312、UTF-8和Unicode)间相互转换的方法,并提供了具体的C/C++代码示例。通过这些函数可以进行字符串编码的转换,例如从GB2312到UTF-8或者相反。此外还展示了如何将字符串转换为适用于网页显示的转义格式。
7228

被折叠的 条评论
为什么被折叠?



