#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define u8 unsigned char
#define u16 unsigned short
static int getUtf8ByteNumForWord(u8 firstCh)
{
int num = 0;
int temp = 0x80;
while (firstCh & temp)
{
num++;
temp = temp >> 1;
}
if (num == 0)
return 1;
else
return num;
}
int utf8ToUnicode(u8 *utf8, u16 *unicodeKey)
{
int i = 0;
int j = 0;
u8 firstCh = utf8[0];
u8 temp[2] = {0, 0};
int num = getUtf8ByteNumForWord(firstCh);
switch(num) {
case 1:
temp[j] = utf8[i];
break;
case 2:
temp[j + 1] = (utf8[i] >> 2) & 0x07;
temp[j] = utf8[i+1] | ((utf8[i] << 6) & 0xc0);
break;
case 3:
/* utf-8 >>>>>> unicode */
temp[j + 1] = ((utf8[i] & 0x0f) << 4) | ((utf8[i + 1] >> 2) & 0x0f);
temp[j] = ((utf8[i + 1] << 6) & 0xc0) | ((utf8[i + 2]) & 0x3f);
break;
case 4:
case 5:
case 6:
default:
break;
}
memcpy(unicodeKey, temp, 2);
return 0;
}
int main()
{
int i, a , b ,c;
u8 word_utf8[10][3];
u8 print_word[3];
char *stdin_st = malloc(31);
u16 unicode = 0;
while(1) {
a = b = c = 0;
fgets(stdin_st, 31, stdin);
while(c < 31) {
if (stdin_st[c] == '\n')
break;
word_utf8[a][b++] = stdin_st[c++];
if (b == 3) {
a++;
b = 0;
}
}
for (i = 0; i < a; i++) {
utf8ToUnicode(word_utf8[i], &unicode);
memcpy(print_word, word_utf8[i], 3);
printf("%s=%d\n", print_word, unicode);
}
}
return 0;
}
汉字utf-8转unicode
最新推荐文章于 2017-12-15 14:51:55 发布