获取file文件字符编码
package com.file;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class CharsetCodeTest {
public static void main(String[] args) throws Exception {
String filePath = "D:\\1.txt";
String content = readTxt(filePath);
System.out.println(content);
}
public static String readTxt(String path) {
StringBuilder content = new StringBuilder("");
try {
String fileCharsetName = getFileCharsetName(path);
System.out.println("文件的编码格式为:" + fileCharsetName);
InputStream is = new FileInputStream(path);
InputStreamReader isr = new InputStreamReader(is, fileCharsetName);
BufferedReader br = new BufferedReader(isr);
String str = "";
boolean isFirst = true;
while (null != (str = br.readLine())) {
if (!isFirst)
content.append(System.lineSeparator());
// System.getProperty("line.separator");
else
isFirst = false;
content.append(str);
}
br.close();
} catch (Exception e) {
e.printStackTrace();
System.err.println("读取文件:" + path + "失败!");
}
return content.toString();
}
public static String getFileCharsetName(String fileName) throws IOException {
InputStream inputStream = new FileInputStream(fileName);
byte[] head = new byte[3];
inputStream.read(head);
String charsetName = "GBK";// 或GB2312,即ANSI
if (head[0] == -1 && head[1] == -2) // 0xFFFE
charsetName = "UTF-16";
else if (head[0] == -2 && head[1] == -1) // 0xFEFF
charsetName = "Unicode";// 包含两种编码格式:UCS2-Big-Endian和UCS2-Little-Endian
else if (head[0] == -27 && head[1] == -101 && head[2] == -98)
charsetName = "UTF-8"; // UTF-8(不含BOM)
else if (head[0] == -17 && head[1] == -69 && head[2] == -65)
charsetName = "UTF-8"; // UTF-8-BOM
inputStream.close();
System.out.println(charsetName);
return charsetName;
}
}
获取zip文件编码格式(解决读取乱码)
package com.file;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import info.monitorenter.cpdetector.io.ASCIIDetector;
import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
import info.monitorenter.cpdetector.io.JChardetFacade;
import info.monitorenter.cpdetector.io.ParsingDetector;
import info.monitorenter.cpdetector.io.UnicodeDetector;
/**
* 1、cpDetector内置了一些常用的探测实现类,这些探测实现类的实例可以通过add方法加进来,
* ParsingDetector、 JChardetFacade、ASCIIDetector、UnicodeDetector.
* 2、detector按照“谁最先返回非空的探测结果,就以该结果为准”的原则.
* 3、cpDetector是基于统计学原理的,不保证完全正确.
*/
public class FileCharsetDetector {
/**
* 利用第三方开源包cpdetector获取文件编码格式.
* @param is
*/
public static String getFileEncode(InputStream is) {
// begin 此段为zip格式文件的处理关键
BufferedInputStream bis = null;
if (is instanceof BufferedInputStream) {
bis = (BufferedInputStream) is;
} else {
bis = new BufferedInputStream(is);
}
// end
CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
detector.add(new ParsingDetector(false));
detector.add(UnicodeDetector.getInstance());
detector.add(JChardetFacade.getInstance());// 内部引用了 chardet.jar的类
detector.add(ASCIIDetector.getInstance());
Charset charset = null;
try {
charset = detector.detectCodepage(bis, Integer.MAX_VALUE);// zip 判断的关键代码
} catch (Exception e) {
} finally {
if (bis != null) {
try {
bis.close();
} catch (IOException e) {
}
}
}
// 默认为GBK
String charsetName = "GBK";
if (charset != null) {
if (charset.name().equals("US-ASCII")) {
charsetName = "ISO_8859_1";
} else {
charsetName = charset.name();
}
}
System.out.println(charsetName);
return charsetName;
}
public static void main(String[] args) throws IOException {
String path = "D:\\20210827092009_erqi.zip";
ZipFile zipFile = new ZipFile(path);
File file = new File(path);
ZipInputStream zipInput = new ZipInputStream(new FileInputStream(file));
ZipEntry zipEntry = null;
while((zipEntry = zipInput.getNextEntry())!=null){
zipFile = new ZipFile(file);
InputStream inputStream = zipFile.getInputStream(zipEntry);
String encode=getFileEncode(inputStream);
BufferedReader bufferRead = new BufferedReader(new InputStreamReader(zipFile.getInputStream(zipEntry),encode));
String strInsert = null;
while((strInsert = bufferRead.readLine()) != null){
System.out.println(strInsert);
}
}
}
}
jar包下载地址https://download.youkuaiyun.com/download/m0_37987151/21615417