java判断文本文件编码格式juniversalchardet使用
今天项目经理让写一个判断文本文件格式的接口,废话不多说直接上代码。
juniversalchardet官网地址: http:code.google.com/p/juniversalchardet/
github地址 https://github.com/thkoch2001/juniversalchardet
支持的编码格式
- Chinese
-
ISO-2022-CN
-
BIG-5
-
EUC-TW
-
GB18030
-
HZ-GB-2312
-
Cyrillic
- ISO-8859-5
- KOI8-R
- WINDOWS-1251
- MACCYRILLIC
- IBM866
- IBM855
-
Greek
- ISO-8859-7
- WINDOWS-1253
-
Hebrew
- ISO-8859-8
- WINDOWS-1255
-
Japanese
- ISO-2022-JP
- Shift_JIS
- EUC-JP
-
Korean
- ISO-2022-KR
- EUC-KR
-
Unicode
- UTF-8
- UTF-16BE / UTF-16LE
- UTF-32BE / UTF-32LE / X-ISO-10646-UCS-4-3412 / X-ISO-10646-UCS-4-2143
-
Others
- WINDOWS-1252
实践
pom.xml文件
<dependency>
<groupId>com.googlecode.juniversalchardet</groupId>
<artifactId>juniversalchardet</artifactId>
<version>1.0.3</version>
</dependency>
java代码
@RequestMapping("/resultCode")
public Map<String,String> resultCode(@RequestParam("file") MultipartFile file){
Map<String,String> map = new HashMap<>();
/**
*获取文件类型
*/
String fileType = file.getContentType();
try {
/**
*判断文件类型
*如.text/.sql/.xls/.xlxs等;
*/
if(fileType.contains("text") || fileType.contains("sql")||fileType.contains("excel")||fileType.contains("openxmlformats-officedocument.spreadsheetml.sheet")){
byte[] buf = new byte[4096];
InputStream fis = file.getInputStream();
// (1)
UniversalDetector detector = new UniversalDetector(null);
// (2)
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
// (3)
detector.dataEnd();
// (4)
String encoding = detector.getDetectedCharset();
if (encoding != null) {
map.put("encoding",encoding);
} else {
map.put("error","No encoding detected.");
}
// (5)
detector.reset();
map.put("code","0");
map.put("msg","成功!");
}else{
map.put("error","请上传SQL/TEXT/xls/xlsx格式文件!");
}
}catch (Exception e){
map.put("code","-1");
map.put("msg","失败!");
}
return map;
}
然后就可以直接调用接口测试。