代码
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
public class ReadTxt {
/**
* @param args
* @author Leemeea
* @date Nov 26, 2018
*/
public static void main(String[] args) {
//String filePath = "C:\\Users\\Administrator\\Desktop\\测试sql.sql";
String filePath = "C:\\Users\\Administrator\\Desktop\\新建文本文档 (2).txt";
List<String> list = ReadTxt.readFileByLines(filePath);
for (String string : list) {
System.out.println(string);
}
}
// 读取文件
public static List<String> readFileByLines(String fileName) {
List<String> list = new ArrayList<String>();
File file = new File(fileName);
BufferedReader reader = null;
InputStream inputStream = null;
try {
// 判断的文件输入流
inputStream = new FileInputStream(file);
byte[] head = new byte[3];
inputStream.read(head);
//判断TXT文件编码格式
if (head[0] == -1 && head[1] == -2 ){
//Unicode -1,-2,84
reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"Unicode"));
}else if (head[0] == -2 && head[1] == -1 ){
//Unicode big endian -2,-1,0,84
reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-16"));
}else if(head[0]==-17 && head[1]==-69 && head[2] ==-65) {
//UTF-8 -17,-69,-65,84
reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"));
}else{
//ANSI 84 = T
reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"gb2312"));
}
String tempString = null;
int line = 1;
while ((tempString = reader.readLine()) != null) {
list.add(tempString);
line++;
}
inputStream.close();
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e1) {
}
}
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return list;
}
}
20220425补一个代码,上面获取字符编码有问题,下面是获取字符编码的代码
private static String getFilecharset(File sourceFile) {
String charset = "GBK";
byte[] first3Bytes = new byte[3];
try {
boolean checked = false;
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(sourceFile));
bis.mark(0);
int read = bis.read(first3Bytes, 0, 3);
if (read == -1) {
//文件编码为 ANSI
return charset;
} else if (first3Bytes[0] == (byte) 0xFF
&& first3Bytes[1] == (byte) 0xFE) {
//文件编码为 Unicode
charset = "UTF-16LE";
checked = true;
} else if (first3Bytes[0] == (byte) 0xFE
&& first3Bytes[1] == (byte) 0xFF) {
//文件编码为 Unicode big endian
charset = "UTF-16BE";
checked = true;
} else if (first3Bytes[0] == (byte) 0xEF
&& first3Bytes[1] == (byte) 0xBB
&& first3Bytes[2] == (byte) 0xBF) {
//文件编码为 UTF-8
charset = "UTF-8";
checked = true;
}
bis.reset();
if (!checked) {
int loc = 0;
while ((read = bis.read()) != -1) {
loc++;
if (read >= 0xF0) {
break;
}
if (0x80 <= read && read <= 0xBF){
// 单独出现BF如下的,也算是GBK
break;
}
if (0xC0 <= read && read <= 0xDF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF){
// 双字节 (0xC0 - 0xDF)
// (0x80
// - 0xBF),也可能在GB编码内
continue;
} else {
break;
}
} else if (0xE0 <= read && read <= 0xEF) {
// 也有可能出错,可是概率较小
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
charset = "UTF-8";
break;
} else {
break;
}
} else {
break;
}
}
}
}
bis.close();
} catch (Exception e) {
e.printStackTrace();
}
return charset;
}