场景:
1.有些字符串编码是unicode的,但是第3方库需要的是utf8字符串,这时候就需要unicode转utf8的接口了。
//unicodeתutf8
#include <iostream>
#include <fstream>
using namespace std;
#define MAX_LENGTH 1024
int UniCharToUTF8(wchar_t UniChar, char* OutUTFString)
{
int UTF8CharLength = 0;
if (UniChar < 0x80)
{
if ( OutUTFString )
OutUTFString[UTF8CharLength++] = (char)UniChar;
else
UTF8CharLength++;
}
else if(UniChar < 0x800)
{
if ( OutUTFString )
{
OutUTFString[UTF8CharLength++] = 0xc0 | ( UniChar >> 6 );
OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
}
else
{
UTF8CharLength += 2;
}
}
else if(UniChar < 0x10000 )
{
if ( OutUTFString )
{
OutUTFString[UTF8CharLength++] = 0xe0 | ( UniChar >> 12 );
OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 6) & 0x3f );
OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
}
else
{
UTF8CharLength += 3;
}
}
else if( UniChar < 0x200000 )
{
if ( OutUTFString )
{
OutUTFString[UTF8CharLength++] = 0xf0 | ( (int)UniChar >> 18 );
OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 12) & 0x3f );
OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 6) & 0x3f );
OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
}
else
{
UTF8CharLength += 4;
}
}
return UTF8CharLength;
}
void main()
{
FILE* pFile=fopen("E:/project/11111/Debug/11.txt","r");
fseek(pFile, 2, SEEK_SET);
long int file_length = 0;
int index = 1;
char* pBuf;
pBuf = (char*)calloc(1, MAX_LENGTH);
char* curr_buf;
wchar_t w_ch=fgetwc(pFile);
while(!feof(pFile))
{
if(file_length + 6 > index * MAX_LENGTH)
{
index++;
realloc(pBuf, index * MAX_LENGTH);
}
curr_buf = pBuf + file_length;
file_length += UniCharToUTF8(w_ch, curr_buf);
printf("%s", curr_buf);
w_ch=fgetwc(pFile);
}
pBuf[file_length] = '\0';
fclose(pFile);
getchar();
}