int utf8_check(const char* str,size_t length)
{
size_t i = 0;
int nBytes = 0;UTF8可用1-6个字节编码,ASCII用一个字节
unsigned char ch = 0;
bool bAllAscii = true;//如果全部都是ASCII,说明不是UTF-8
while(i < length)
{
ch = *(str + i);
if ((ch & 0x80) != 0)
bAllAscii = false;
if(nBytes == 0)
{
if((ch & 0x80) != 0)
{
while((ch & 0x80) != 0)
{
ch <<= 1;
nBytes ++;
}
if((nBytes < 2) || (nBytes > 6))
{
return 0;
}
nBytes --;
}
}
else
{
if((ch & 0xc0) != 0x80)
{
return 0;
}
nBytes --;
}
i ++;
}
if(bAllAscii)
return false;
return (nBytes == 0);
}
判断char*是否为utf8编码
最新推荐文章于 2024-12-24 16:25:25 发布