/* 功能:实现文件编码格式的判断 通过一个文件的最前面三个字节,可以判断出该的编码类型: ANSI: 无格式定义;(第一个字节开始就是文件内容) Unicode:
前两个字节为FFFE; Unicode
big endian: 前两字节为FEFF; UTF-8:
前两字节为EFBB,第三字节为BF */ #include
<stdio.h> #include
<stdlib.h> #include
<string.h> //
读取一个文件的最前面n个字节,并以十六进制形式输出每个字节的值 void readNBytes( char *fileName,
int n) { FILE *fp
= fopen (fileName,
"r" ); unsigned
char *buf
= (unsigned char *) malloc ( sizeof (unsigned
char )*n); int i; if (fp
== NULL) { printf ( "open
file [%s] failed.\n" ,
fileName); return ; } fread (buf,
sizeof (unsigned
char ),
n, fp); fclose (fp); printf ( "%s:\t" ,
fileName); for (i
= 0; i < n; i++) { printf ( "%x\t" ,
buf[i]); } printf ( "\n" ); free (buf); } void main() { char fileName[][50]
= { "ansi.txt" ,
"unicode.txt" ,
"ubigendian.txt" ,
"utf8.txt" }; int i; for (i
= 0; i < 4; i++) { //
每个文件中的内容都是:你what123456 readNBytes(fileName[i],
3); } } |
每个测试文件中的内容都是:你what123456
运行结果为:
ansi.txt: c4 e3 77
unicode.txt: ff fe 60
ubigendian.txt: fe ff 4f
utf8.txt: ef bb bf