最近写程序时需要用到一个功能:根据一个UTF-8字符串判断其需要的显示空间(固定宽度)。这样,就需要对字符串中包含的ANSI字符和UTF-8字符(中文)进行统计。针对这种情况,写了一个函数尚觉满意,发布于此,供大家参考。 /**** str_utf8_summary: get the count of chars classified by ANSI & UTF-8. str : string limit : limited the maximum length of the string. ansi : return the count of ansi chars. utf8 : return the count of UTF-8 chars. unknow: return the count of Unknow chars. return: count of the bytes in the string. */ size_t str_utf8_summary(const char *str, size_t limit, size_t *ansi, size_t *utf8, size_t *unknow) { const char *bp, *ep, hc; if(NULL == ansi)*ansi = 0; if(NULL == utf8)*utf8 = 0; if(NULL == unknow)*unknow = 0; if(NULL == str)return 0; bp = str; for(ep = str + limit, hc = '/0'; *str && str < ep; str++){ switch( (char)(*str & 0xC0) ){ case 0x80 : /* Follow-byte */ if( hc & 0xC0 == 0xC0){ hc <<= 1; }else{ if(NULL != unknow)(*unknow)++; } break; case 0xC0 : /* First-byte */ hc = *str; if(NULL != utf8)(*utf8)++; break; default: if(NULL != ansi)(*ansi)++; hc = '/0'; break; } } return (str - bp); }