第一步,引入maven
<!--Charset获取文件原来编码方式的-->
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>59.1</version>
</dependency>
第二步代码
package com.hzt.controller.test;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import org.apache.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
public class JudgeFileCharset {
private static Logger logger = Logger.getLogger(JudgeFileCharset.class);
public JudgeFileCharset() {
}
public static String getFileCharsetByICU4J(File file) {
String encoding = null;
try {
Path path = Paths.get(file.getPath());
byte[] data = Files.readAllBytes(path);
CharsetDetector detector = new CharsetDetector();
detector.setText(data);
CharsetMatch match = detector.detect();
if (match == null) {
return encoding;
}
encoding = match.getName();
} catch (IOException var6) {
logger.error(var6.getMessage());
}
return encoding;
}
}
第三,要注意
编码方式有可能为空,这个时候手动设置你想要的编码方式
String charset = JudgeFileCharset.getFileCharsetByICU4J(file);
if(StringUtils.isNullOrEmpty(charset)){
charset="UTF-8";
}