sContent为存放网页源文件的string串,用到头文件CodeConverter.h(非库文件,他人编写),转换结束后的编码重新赋值给sContent
//转换所需内存申请、赋初值
size_t nFileSize = sContent.size();
char *pcTemp = new char[nFileSize+1];
memcpy(pcTemp, sContent.c_str(), nFileSize);
pcTemp[nFileSize] = 0;
size_t nNewCodeSize = 3*sizeof(char)*nFileSize;
char *pcBuffer = new char[nNewCodeSize];
memset(pcBuffer, 0, nNewCodeSize);
//编码转换
CCodeConverter *pclCodeIconv;
if (sCharset=="utf-8" || sCharset=="UTF-8")
{
pclCodeIconv = new CCodeConverter("UTF-8", "GB2312");
nNewCodeSize = pclCodeIconv->convert(pcTemp, nFileSize, pcBuffer, nNewCodeSize, 1); //源码字符类型转换
sContent = pcBuffer;
delete pclCodeIconv;
pclCodeIconv = NULL;
}
else if (sCharset=="big5" || sCharset=="BIG5")
{
pclCodeIconv = new CCodeConverter("BIG5", "GBK");
nNewCodeSize = pclCodeIconv->convert(pcTemp, nFileSize, pcBuffer, nNewCodeSize, 1);
sContent = pcBuffer;
delete pclCodeIconv;
pclCodeIconv = NULL;
}
else
{
}
delete [] pcTemp;
delete [] pcBuffer;
本文介绍了一种将网页源文件从不同编码(如UTF-8或Big5)转换为GB2312或GBK编码的方法。通过使用CCodeConverter类,文章详细展示了如何分配内存、进行编码转换,并最终将转换后的字符串重新赋值给原始变量。
655

被折叠的 条评论
为什么被折叠?



