首先转码方法网上很多,如下面:
__inline void Convert(const char* strIn, char* strOut, int sourceCodepage, int targetCodepage)
{
int len=lstrlen(strIn);
int unicodeLen=MultiByteToWideChar(sourceCodepage,0,strIn,-1,NULL,0);
wchar_t* pUnicode;
pUnicode=new wchar_t[unicodeLen+1];
memset(pUnicode,0,(unicodeLen+1)*sizeof(wchar_t));
MultiByteToWideChar(sourceCodepage,0,strIn,-1,(LPWSTR)pUnicode,unicodeLen);
BYTE * pTargetData = NULL;
int targetLen=WideCharToMultiByte(targetCodepage,0,(LPWSTR)pUnicode,-1,(char *)pTargetData,0,NULL,NULL);
pTargetData=new BYTE[targetLen+1];
memset(pTargetData,0,targetLen+1);
WideCharToMultiByte(targetCodepage,0,(LPWSTR)pUnicode,-1,(char *)pTargetData,targetLen,NULL,NULL);
lstrcpy(strOut,(char*)pTargetData);
delete pUnicode;
pUnicode = NULL;
delete pTargetData;
pTargetData = NULL;
}
GBK转成UTF-8:
CString strConfKind="天安门";
int nLength = strConfKind.GetLength();
char *strOut = new char[nLength+1];
Convert(strConfKind.GetBuffer(0),strOut, CP_ACP, CP_UTF8);
...
delete []strOut;
strOut = NULL;
但上面的代码存在两个问题:
1、char *strOut = new char[nLength+1]; 申请的长度是不够的,如上面“天安门”是3个汉字,nLength为3。但UTF-8格式一个汉字是占三个字符,至少申请10位(3*3+1)。2、奇数个汉字转码后,再由UTF-8转成GBK时,最后一个字符一直显示为“?”。因为一个汉字转成UTF-8是需要3个字节,3个汉字就成了9个字节,而它会2个字节2个字节地转换成字符,当字节是奇数时最后1个字节转字符就会计算错误,然后直接赋予最后这个字符为“?”,这样改变了数据,影响后面的解码。
解决方案:
CString strConfKind="天安门";;
int Lenth = strConfKind.GetLength();
int nELenth = 0; //英文字符数
int nCLenth = 0; //中文字符数
for(i=0;i<Lenth;i++)
{
char c = strConfKind.GetAt(i);
//是中文字符 中文字符编码 1XXX XXXX 1XXX XXXX
if(c<0||c>255)
{
i++; //跳过汉字的第二个字节
continue;
}
//是英文字符
else
nELenth++;
}
//计算中文字符数,每个中文字符占两个字节
nCLenth = (Lenth-nELenth)/2;
char *strOut = new char[nELenth+nCLenth*3+1];
Convert(strConfKind.GetBuffer(0),strOut, CP_ACP, CP_UTF8);
GBK转成UTF-8:一个汉字需要三个字节,一个英文需要一个字节。这样申请nELenth+nCLenth*3+1个字节。
真正的操作是和服务端交互,上传XML文件,服务器返回的是有乱码。但直接在代码测试没出现这个问题,了解的朋友请提示一下。
测试代码:
void test_convert()
{
CString str11 = "天安门";
int Lenth = str11.GetLength();
int nELenth = 0; //英文字符数
int nCLenth = 0; //中文字符数
int nTotalLenth = 0;//总共字符数
for(int i=0;i<Lenth;i++)
{
char c = str11.GetAt(i);
//是中文字符 中文字符编码 1XXX XXXX 1XXX XXXX
if(c<0||c>255)
{
i++; //跳过汉字的第二个字节
continue;
}
//是英文字符
else
nELenth ++;
}
//计算中文字符数,每个中文字符占两个字节
nCLenth = (Lenth-nELenth)/2;
char *strOut = new char[nELenth+nCLenth*3+1];
Convert(str11.GetBuffer(0),strOut, CP_ACP, CP_UTF8);//yangzenghua_2010071316:55
cout << strOut << endl;
CString str = "";
for (int i=0; i<strlen(strOut); i++)
{
cout << (int)strOut[i] << " ";
str.Format("%s,%d", str, (int)strOut[i]);
}
CFile sourceFile;
CFileException ex;
if (!sourceFile.Open("C:\\1.txt",
CFile::modeWrite |CFile::shareExclusive | CFile::modeCreate, &ex))
{
// complain if an error happened
// no need to delete the ex object
TCHAR szError[1024];
ex.GetErrorMessage(szError, 1024);
cout << "Couldn't open source file: ";
cout << szError;
return ;
}
sourceFile.Write(strOut, nELenth+nCLenth*3+1);
delete strOut;
strOut = NULL;
sourceFile.Close();
if (!sourceFile.Open("C:\\1.txt",
CFile::modeRead | CFile::shareDenyWrite, &ex))
{
// complain if an error happened
// no need to delete the ex object
TCHAR szError[1024];
ex.GetErrorMessage(szError, 1024);
cout << "Couldn't open source file: ";
cout << szError;
return ;
}
char *strOut2 = new char[nELenth+nCLenth*3+1];
sourceFile.Read(strOut2, nELenth+nCLenth*3+1);
m_log.Add("%s,%s", str11, str);
strOut = new char[nELenth+nCLenth*3+1];
Convert(strOut2, strOut, CP_UTF8, CP_ACP);
cout << endl;
cout << strOut << endl;
delete strOut;
strOut = NULL;
delete strOut2;
strOut2 = NULL;
}
本文探讨了在C++中将GBK编码转换为UTF-8时遇到的两个问题:内存分配不足和奇数个汉字解码导致的乱码。提出了解决方案,即增加内存申请长度,并详细解释了问题原因。测试代码表明,该问题可能在与服务器交互时出现。
2390





