MFC GB2312、UTF-8、unicode 之间转换

原创已于 2025-05-27 17:40:14 修改 · 2.6k 阅读

7 ·

CC 4.0 BY-SA版权

文章标签：

#MFC #UTF8 #GB312 #unicode #转换

于 2017-09-26 10:49:57 首次发布

MFC 专栏收录该内容

10 篇文章

订阅专栏

本文详细介绍了几种常见字符编码之间的转换方法，包括GB2312到UTF-8、UTF-8到GB2312、GB2312到Unicode、Unicode到GB2312、UTF-8到Unicode及Unicode到UTF-8等。此外，还提供了一个用于判断字符串是否为UTF-8编码的函数。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

//GB2312到UTF-8的转换
static int GB2312ToUtf8(const char* gb2312, char* utf8)
{
	int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
	wchar_t* wstr = new wchar_t[len+1];
	memset(wstr, 0, len+1);
	MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
	len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
	WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
	if(wstr) delete[] wstr;
		return len;
}

//判断是否是utf8
bool IsTextUTF8(const char* str, long length)
{
	unsigned char chr;

	int nBytes = 0; // UFT8可用1-6个字节编码,ASCII用一个字节
	bool bAllAscii = true; // 如果全部都是ASCII, 说明不是UTF-8

	for (int i=0; i < length; i++)
	{
		chr = *(str + i);

		if ((chr&0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8, ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx
		{
			bAllAscii = false;
		}

		if (nBytes == 0) // 如果不是ASCII码,应该是多字节符,计算字节数
		{
			if (chr >= 0x80)
			{
				if (chr>=0xFC && chr<=0xFD)
					nBytes = 6;
				else if (chr>=0xF8)
					nBytes = 5;
				else if (chr>=0xF0)
					nBytes = 4;
				else if (chr>=0xE0)
					nBytes = 3;
				else if (chr>=0xC0)
					nBytes = 2;
				else
					return false;

				nBytes--;
			}
		}
		else // every char of ascii buffer looks like 10xxxxxx, except the first char
		{
			if( (chr&0xC0) != 0x80 )
			{
				return false;
			}
			nBytes--;
		}
	}

	if (nBytes > 0) // format error
	{
		return false;
	}

	if (bAllAscii) // if all chars are ascii, the buffer is not utf-8
	{
		return false;
	}

	return true;
}


//UTF-8到GB2312的转换
static int Utf8ToGB2312(const char* utf8, char* gb2312)
{
	int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
	wchar_t* wstr = new wchar_t[len+1];
	memset(wstr, 0, len+1);
	MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
	len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
	WideCharToMultiByte(CP_ACP, 0, wstr, -1, gb2312, len, NULL, NULL);
	if(wstr) delete[] wstr;
	return len;
}


//GB2312到Unicode的转换
static int GB2312ToUnicode(const char* gb2312, char* unicode)
{
	UINT nCodePage = 936; //GB2312
	int len = MultiByteToWideChar(nCodePage, 0, gb2312, -1, NULL, 0);
	wchar_t* wstr = new wchar_t[len+1];
	memset(wstr, 0, len+1);
	MultiByteToWideChar(nCodePage, 0, gb2312, -1, wstr, len);
	len = len*sizeof(wchar_t);
	memcpy(unicode, wstr, len);
	if(wstr) delete[] wstr;
		return len;
}


//Unicode到GB2312的转换
static int UnicodeToGB2312(const char* unicode, int size, char*gb2312)
{
	UINT nCodePage = 936; //GB2312
	wchar_t* wstr = new wchar_t[size/2+1];
	memcpy(wstr, unicode, size);
	int len = WideCharToMultiByte(nCodePage, 0, wstr, -1, NULL, 0, NULL, NULL);
	WideCharToMultiByte(nCodePage, 0, wstr, -1, gb2312, len, NULL, NULL);
	if(wstr) delete[] wstr;
		return len;
}


//UTF-8到Unicode的转换
static int Utf8ToUnicode(const char* utf8, char*unicode)
{
	int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
	wchar_t* wstr = new wchar_t[len+1];
	memset(wstr, 0, len+1);
	MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
	memcpy(unicode, wstr, len);
	if(wstr) delete[] wstr;
		return len;
}


//Unicode到UTF-8的转换
static int UnicodeToUtf8(const char* unicode, int size, char* utf8)
{
	wchar_t* wstr = new wchar_t[size/2+1];
	memcpy(wstr, unicode, size);
	int len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
	WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
	if(wstr) delete[] wstr;
	return len;
}



std::wstring UTF8ToUnicode(const std::string & str)
{
	int  len = 0;
	len = str.length();
	int  unicodeLen = ::MultiByteToWideChar(CP_UTF8,
		0,
		str.c_str(),
		-1,
		NULL,
		0);
	wchar_t* pUnicode;
	pUnicode = new  wchar_t[unicodeLen + 1];
	memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
	::MultiByteToWideChar(CP_UTF8,
		0,
		str.c_str(),
		-1,
		(LPWSTR)pUnicode,
		unicodeLen);
	std::wstring  rt;
	rt = (wchar_t*)pUnicode;
	delete  pUnicode;

	return  rt;
}

std::wstring GBKToUnicode(const std::string& str)
{
	int  len = 0;
	len = str.length();
	int  unicodeLen = ::MultiByteToWideChar(CP_ACP,
		0,
		str.c_str(),
		-1,
		NULL,
		0);
	wchar_t* pUnicode;
	pUnicode = new  wchar_t[unicodeLen + 1];
	memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
	::MultiByteToWideChar(CP_ACP,
		0,
		str.c_str(),
		-1,
		(LPWSTR)pUnicode,
		unicodeLen);
	std::wstring  rt;
	rt = (wchar_t*)pUnicode;
	delete  pUnicode;

	return  rt;
}

std::string UnicodeToUTF8(const std::wstring& str)
{
	char* pElementText;
	int    iTextLen;
	// wide char to multi char
	iTextLen = WideCharToMultiByte(CP_UTF8,
		0,
		str.c_str(),
		-1,
		NULL,
		0,
		NULL,
		NULL);
	pElementText = new char[iTextLen + 1];
	memset((void*)pElementText, 0, sizeof(char) * (iTextLen + 1));
	::WideCharToMultiByte(CP_UTF8,
		0,
		str.c_str(),
		-1,
		pElementText,
		iTextLen,
		NULL,
		NULL);
	std::string strText;
	strText = pElementText;
	delete[] pElementText;
	return strText;
}

std::string UnicodeToGbk(const std::wstring& str)
{
	char* pElementText;
	int    iTextLen;
	// wide char to multi char
	iTextLen = WideCharToMultiByte(CP_ACP,
		0,
		str.c_str(),
		-1,
		NULL,
		0,
		NULL,
		NULL);
	pElementText = new char[iTextLen + 1];
	memset((void*)pElementText, 0, sizeof(char) * (iTextLen + 1));
	::WideCharToMultiByte(CP_ACP,
		0,
		str.c_str(),
		-1,
		pElementText,
		iTextLen,
		NULL,
		NULL);
	std::string strText;
	strText = pElementText;
	delete[] pElementText;
	return strText;
}

std::string GBKToUTF8(const std::string &strGbk)
{
	int len = MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, NULL, 0);
	wchar_t* wszUtf8 = new wchar_t[len + 1];
	memset(wszUtf8, 0, len * 2 + 2);
	MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, wszUtf8, len);
	len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL);
	char* szUtf8 = new char[len + 1];
	memset(szUtf8, 0, len + 1);
	WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL, NULL);
	string strTemp(szUtf8);
	if (wszUtf8) delete[] wszUtf8;
	if (szUtf8) delete[] szUtf8;
	return strTemp;
}

std::string UTF8ToGBK(const std::string &strUtf)
{
	int len = MultiByteToWideChar(CP_UTF8, 0, strUtf.c_str(), -1, NULL, 0);
	wchar_t* wszGbk = new wchar_t[len + 1];
	memset(wszGbk, 0, len * 2 + 2);
	MultiByteToWideChar(CP_UTF8, 0, strUtf.c_str(), -1, wszGbk, len);
	len = WideCharToMultiByte(CP_ACP, 0, wszGbk, -1, NULL, 0, NULL, NULL);
	char* szgGbk = new char[len + 1];
	memset(szgGbk, 0, len + 1);
	WideCharToMultiByte(CP_ACP, 0, wszGbk, -1, szgGbk, len, NULL, NULL);
	string strTemp(szgGbk);
	if (wszGbk) delete[] wszGbk;
	if (szgGbk) delete[] szgGbk;
	return strTemp;
}


std::string QT_UTF8ToGBK(const std::string& strUtf)
{
	QTextCodec* utf8 = QTextCodec::codecForName("UTF-8");
	QTextCodec* gbk = QTextCodec::codecForName("GBK");
	QString strUnicode = utf8->toUnicode(strUtf.c_str());
	QByteArray gbkStr = gbk->fromUnicode(strUnicode);
	return gbkStr.toStdString();
}

std::string QT_Local8BitTo(const QString& qsSrt)
{
	std::string strTmp;
	QByteArray qBytTmp = qsSrt.toLocal8Bit();
	int iSzie = qBytTmp.size();
	if (iSzie <= 0)
	{
		return strTmp;
	}
	strTmp.resize(iSzie);
	memcpy(&strTmp[0], qBytTmp.data(), iSzie);

	return strTmp;
}

转自http://blog.youkuaiyun.com/seven407/article/details/7712823