/*Talk is cheap , show me the Code .*/
import java.io.*;
/**
* 将文件的字符编码转为UTF-8
* 有个BUG ,就是转换过的文件 ,不能进行第二次转换, 否则乱码,认为是其它编码
*/
public class ChangeEncode{
public static String SRC_FILE_DIR="D:\\TestData\\";
public static String DES_FILE_DIR="D:\\TestData2\\";
public static void main(String args[]) throws Exception{
File file = new File(SRC_FILE_DIR);
if(file.isDirectory()){
String[] files = file.list(new FilenameFilter(){
public boolean accept(File dir,String name){
//这里我将 文件过滤了
return name.endsWith(".txt") || name.endsWith(".html") || name.endsWith(".php");
}
});
for(String f:files){
changeFileEncode(SRC_FILE_DIR+f);
}
}
else{
changeFileEncode(SRC_FILE_DIR);
}
}
/*
此方法用于获取某个文件的字符编码
传入的参数为文件名称
*/
public static String getCode(String fileName)throws Exception{
FileInputStream fis = new FileInputStream(fileName);
int p = (fis.read() << 8) + fis.read();
String code = null;
switch (p) {
case 0xefbb:
code = "UTF-8";
break;
case 0xfffe:
code = "Unicode";
break;
case 0xfeff:
code = "UTF-16BE";
break;
default:
code = "GBK";
}
fis.close();
return code;
}
/*
此方法将文件编码格式转成UTF-8格式
参数为 文件名称(全称)
*/
public static void changeFileEncode(String fileName) throws Exception{
FileInputStream fis = new FileInputStream(fileName);
//获取源文件 的字符编码
String code = getCode(fileName);
System.out.println(code);
FileOutputStream fos = new FileOutputStream(DES_FILE_DIR+fileName.substring(fileName.lastIndexOf("\\")+1));
byte[] bytes = new byte[1024];
int len = -1;
if("UTF-8".equals(code)){
//无须转换
while((len=fis.read(bytes))!=-1){
fos.write(bytes,0,len);
//System.out.println(new String(bytes,0,len,code));
}
}
else{
//将数据以原来的编码形式读取,存入sb中
StringBuilder sb = new StringBuilder();
while((len=fis.read(bytes))!=-1){
sb.append(new String(bytes,0,len,code));
//System.out.println(new String(bytes,0,len,code));
}
//将sb中数据 转为UTF-8编码格式 并输出!
byte[] bs=sb.toString().getBytes("UTF-8");
fos.write(bs);
}
fos.close();
fis.close();
}
}
一上午搞java编码问题,写了一个将批量文件编码转为UTF-8的小程序(有BUG-.-!)
最新推荐文章于 2024-06-23 14:23:28 发布