根据对UTF8编码格式的研究,作出如下的简单的实现代码,希望大家指正批评并继续完善:
int
WideCharToUTF8(wchar_t
*
source,
int
sourceLen, unsigned
char
*
dest,
int
&
destLen)

...
{
destLen = 0;
if (sourceLen <= 0)

...{
return destLen = 0;
}
wchar_t* p = source;
for (int i = 0; i < sourceLen; ++i)

...{
if (p[i] <= 0x7F)//0111,0000

...{
if(dest)
*dest++ = static_cast<unsigned char>(p[i]);
destLen++;
}
else if (p[i] <= 0x7FF)//11111,111111

...{
if (dest)

...{
*dest++ = 0xC0 + static_cast<unsigned char>((p[i] & 0xFFC0) >> 6);
*dest++ = 0x80 + static_cast<unsigned char>(p[i] & ~0xFFC0);
}
destLen+=2;
}
else if (p[i] <= 0xFFFF)//1111,111111,111111

...{
if (dest)

...{
*dest++ = 0xE0 + static_cast<unsigned char>((p[i] & 0xF000) >> 12);
*dest++ = 0x80 + static_cast<unsigned char>((p[i] & 0x0FC0) >> 6);
*dest++ = 0x80 + static_cast<unsigned char>(p[i] & ~0xFFC0);
}
destLen+=3;
}
}
return destLen;
}

int
UTF8ToWideChar(unsigned
char
*
source,
int
sourceLen, wchar_t
*
dest,
int
&
destLen)

...
{
destLen = 0;
if (sourceLen <= 0)

...{
return destLen = 0;
}
unsigned char* p = source;
for (int i = 0; i < sourceLen; ++i)

...{
if ((p[i] & 0x80) == 0)

...{
if (dest)
*dest++ = static_cast<wchar_t>(p[i]);
}
else if ((p[i] & 0xE0) == 0xC0)

...{
if (dest)
*dest++ = (static_cast<wchar_t>(p[i] & 0x1F) << 6)
+ static_cast<wchar_t>(p[i+1] & 0x7F);
++i;
}
else if ((p[i] & 0xF0) == 0xE0)

...{
wchar_t x;
if (dest)

...{
*dest++ = (static_cast<wchar_t>(p[i] & 0x0F) << 12)
+ (static_cast<wchar_t>(p[i+1] & 0x7F) << 6)
+ static_cast<wchar_t>(p[i+2] & 0x7F);
}
i+=2;
}
++destLen;
}
return destLen;
}