/*代码在安全性验证处理上不是很好,见谅*/
/*
UTF编码格式转为16位Unicode编码格式
⑴ 普通ASCII码(单字节)
例如a为0x61,即0110 0001;1为0x31,即0011 0001;首位为0,即0*******,表示普通ASCII码。将单字节的ASCII编码转为十六进制Unicode时,只需将其填充为两个字节,即前面填充为0,例如将a表示为0x0061。
⑵ 多字节编码
110***** 10******表示占用两个字节的UTF编码,1110**** 10****** 10******表示占用三个字节的UTF编码。例如 “韩杉你好”的UTF编码为E99FA9 E69D89 E4BDA0 E5A5BD,十六进制Unicode编码为97E9 6749 4F60 597D。“韩”为E99FA9,展开二进制位1110 1001 1001 1111 1010 1001,而有效的Unicode为加下划线部分。将加下划线部分提取出来为1001 0111 1110 1001,转换为十六进制为97E9。“杉”为E69D89,展开二进制位1110 0110 1001 1101 1000 1001,而有效的Unicode为加下划线部分。将加下划线部分提取出来为1010 0111 0100 1001,转换为十六进制为6749。
*/
/*********************************************************************
2010-4-29
Author: HanShan
function: translate utf to unicode16(chinese)
EMail: hanshan.mail@gmail.com
*********************************************************************/
int translateUtfToUnicode(char *str,char *res) /*src is source address ,res is destination address*/
{
char temp1; /*save the char*/
char temp2[3]; /*save the %x of the char*/
int i=0;
memset(res,'\0',sizeof(res));
printf("the str len %d!\n",strlen(str));
while(i<strlen(str))/*this code translate utf to unicode*/
{
if( !(str[i]&0x080) )/*just ascii ,1 utf code*/
{
//printf("u 1\n");
strcat(res,"00");
temp1=str[i++]&0x07F;
sprintf(temp2,"%2X",temp1&0x0ff);
temp2[2]='\0';
strcat(res,temp2);
}
else if( !( (str[i]&0x0e0)&~0x0c0 ) )/*just 2 utf code*/
{
//printf("u 2\n");
temp1=(str[i]&0x01f)>>2;
sprintf(temp2,"%2x",temp1&0x0ff);
temp2[2]='\0';
strcat(res,temp2);
temp1=str[i++]<<6;
temp1|=(str[i++]&0x03f);
sprintf(temp2,"%2x",temp1&0x0ff);
temp2[2]='\0';
strcat(res,temp2);
}
else if( !( (str[i]&0x0f0)&~0x0e0 ) )/*just 3 utf code*/
{
temp1=(str[i++]<<4)&0x0f0;
temp1|=((str[i]&0x0ff)>>2)&0x00f;
sprintf(temp2,"%2X",temp1&0x0ff);
temp2[2]='\0';
strcat(res,temp2);
temp1=(str[i++]<<6)&0x0c0;
temp1|=str[i++]&0x03f;
sprintf(temp2,"%2X",temp1&0x0ff);
temp2[2]='\0';
strcat(res,temp2);
}
else
{
printf("the code over 3!\n");
break;
}
}/*while*/
//printf("\n");
printf("the mes: %d %s\n",strlen(str),str);
printf("the res: %d %s\n",strlen(res)/2,res);
}
/*
16位Unicode编码格式转为UTF编码格式
例如短信内容为“韩杉你好a123p”,其Unicode编码为 97E9 6749 4F60 597D 0061 0031 0032 0033 0070
⑴ 普通ASCII码(单字节)
例如“a”的ASCII码味97,换为16进制即为61,我们只取0061Unicode编码的低8位即可。
⑵ 多字节编码
16位的Unicode编码为00000XXX XXXXXXXX即为占用两个UTF编码。其他的的16位Unicode编码则占用三个UTF编码。“韩”的Unicode编码为97E9,对应二进制码为1001 0111 1110 1001,跟据前面UTF编码转Unicode编码的逆过程,将其转换为对应的UTF编码为1110 1001 1001 1111 1010 1001,即为 E9 9F A9。
*/
/*********************************************************************
2010-6-2
Author: HanShan
function: translate unicode16(chinese) to utf
EMail: hanshan.mail@gmail.com
*********************************************************************/
void translateUnicodeToUtf(char *src,char *des,int size) /*src is source address ,res is destination address*/
{
int i,j;
for ( i = 0, j = 0; i < size; i++ , j++)
{
//if ( src[i] )
//printf("src[i]=%2x, src[i+1]=%x, i=%d\n",src[i],src[i+1],i);
if ( !( src[i] & (0X0FF) ) ) /* judge ascii code , user one utf code */
{
i++;
des[j] = src[i];
des[j] = des[j] & (0X07F);
//printf("des[j]=%X j=%d\n",des[j],j);
}
else if ( !( src[i] & (0X0F8) ) )/* use two utf code */
{
des[j] = 0X0C0;
src[i] = src[i] & 0X07;
des[j] = des[j] | ( src[i] << 2 );
i++;
src[i] = src[i] & 0X0FF;
des[j] = des[j] | ( src[i] >> 6 );
j++;
des[j] = 0X080;
des[j] = des[j] | ( src[i] & 0X03F ) ;
}
else /* do as three utf code */
{
des[j] = 0X0E0;
des[j] = des[j] | ( ( src[i] & 0X0FF ) >> 4 );
j++;
des[j] = 0X080;
des[j] = des[j] | ( ( src[i] & 0X0F ) << 2 );
i++;
des[j] = des[j] | ( ( src[i] & 0X0FF ) >> 6 );
j++;
des[j] = 0x080;
des[j] = des[j] | ( src[i] & 0X3F );
}
}/* for */
des[j] = '\0';
}