/**
* 解析csv文件 到一个list中
* 每个单元个为一个String类型记录,每一行为一个list。
* 再将所有的行放到一个总list中
*
* @return
* @throws IOException
*/
public static List<List<String>> importCsv(MultipartFile file) {
List<List<String>> dataList = new ArrayList<>();
BufferedReader brReader = null;
InputStreamReader inReader = null;
try {
inReader = new InputStreamReader(file.getInputStream());
brReader = new BufferedReader(inReader);
String rec = null;//一行
String str;//一个单元格
while ((rec = brReader.readLine()) != null) {
Pattern pCells = Pattern.compile("(\"[^\"]*(\"{2})*[^\"]*\")*[^,]*,");
Matcher mCells = pCells.matcher(rec);
List<String> cells = new ArrayList<>(); //每行记录一个list
//读取每个单元格
while (mCells.find()) {
str = mCells.group();
str = str.replaceAll("(?sm)\"?([^\"]*(\"{2})*[^\"]*)\"?.*,", "$1");
str = str.replaceAll("(?sm)(\"(\"))", "$2");
cells.add(str);
}
dataList.add(cells);
}
} catch (Exception e) {
} finally {
if (brReader != null) {
try {
brReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (inReader != null) {
try {
inReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return dataList;
}
解析pdf文件
需要的jar包,配置到maven
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.6</version>
</dependency>
//demo
public static void main(String[] args) {
try (PDDocument document = PDDocument.load(new File("pdf文件路径"))) {
document.getClass();
if(!document.isEncrypted()) {
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.setSortByPosition(true);
PDFTextStripper tStripper = new PDFTextStripper();
String pdfFileInText = tStripper.getText(document);
String[] lines = pdfFileInText.split("\\r?\\n");
for(String line : lines) {
System.out.println(line);
}
}
} catch (InvalidPasswordException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}