import info.monitorenter.cpdetector.io.*;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
public class FileCharsetCheckUtils {
public static void main(String[] args) throws IOException {
File file = null;
BufferedReader bufferedReader = null;
try {
//如果能得到File对象,直接传入getFileCharset方法即可,如果只知道InputStream流信息,则如下
InputStream is = new FileInputStream(new File("C:\\Users\\Admin\\Desktop\\ageing.data"));
file = File.createTempFile("temp", ".txt");//创建临时文件
Files.copy(is, file.toPath(), StandardCopyOption.REPLACE_EXISTING);
String filetype = getFileCharset(file);
//此时 is 中的内容是空的
bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), filetype));
System.out.println(bufferedReader.readLine());
} catch (Exception ex) {
ex.printStackTrace();
} finally {
if (file != null && bufferedReader != null) {
bufferedReader.close();
file.deleteOnExit();
}
}
}
public static String getFileCharset(File file) throws Exception {
CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
detector.add(new ParsingDetector(false));
detector.add(JChardetFacade.getInstance());
detector.add(ASCIIDetector.getInstance());
detector.add(UnicodeDetector.getInstance());
Charset charset = null;
try {
charset = detector.detectCodepage(file.toURI().toURL());
} catch (Exception e) {
e.printStackTrace();
throw e;
}
String charsetName = "GBK";
if (charset != null) {
if (charset.name().equals("US-ASCII")) {
charsetName = "ISO_8859_1";
} else if (charset.name().startsWith("UTF")) {
charsetName = charset.name();// 例如:UTF-8,UTF-16BE.
}
}
return charsetName;
}
}
cpdetector所需Jar包
优快云下载地址:https://download.youkuaiyun.com/download/xll_csdn/12923009
百度网盘下载地址:https://pan.baidu.com/s/1MIyOg3wjVcT-iICvPL6owg 提取码:fu6o
如需maven引入,则将压缩包直接解压到本地maven仓库根目录,如图
pom.xml引入
<dependency>
<groupId>cpdetector</groupId>
<artifactId>cpdetector</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>cpdetector</groupId>
<artifactId>antlr</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>cpdetector</groupId>
<artifactId>chardet</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>cpdetector</groupId>
<artifactId>jargs</artifactId>
<version>1.0</version>
</dependency>