引入依赖:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.15</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox-app -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-app</artifactId>
<version>2.0.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/net.sourceforge.jexcelapi/jxl -->
<dependency>
<groupId>net.sourceforge.jexcelapi</groupId>
<artifactId>jxl</artifactId>
<version>2.6.12</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/fontbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.3</version>
</dependency>
解析pdf为格式化文档 用于读取数据
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class PdfUtil {
public static final File input = new File("pdf2excel/1");
public static void main(String[] args) throws Exception {
String title = "鼎和财产保险股份有限公司双流支公司手续费结算单";
String[] rowsName = new String[]{"序号", "保/批单号", "险种", "投保人", "被保险人", "保单币种", "交费期数", "不含税保费", "销项税", "不含税手续费", "手续费进项税", "手续费价税合计","业务部门", "业务员"};
List<Object[]> dataList = doc2Obj();
Export2Excel ex = new Export2Excel(title, rowsName, dataList);
ex.export();
}
public static void pdf2Txt(){
PDDocument pd;
BufferedWriter wr;
try {
File sourceFile = new File("1.pdf");
File output = new File(sourceFile.getName().split("\\.")[0] + ".txt");
pd = PDDocument.load(sourceFile);
pd.save("CopyOf" + sourceFile.getName().split("\\.")[0] + ".pdf"); // Creates a copy called
PDFTextStripper stripper = new PDFTextStripper();
wr = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(output)));
stripper.writeText(pd, wr);
if (pd != null) {
pd.close();
}
wr.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static List<Object[]> doc2Obj() throws Exception{
InputStreamReader isr = new InputStreamReader(new FileInputStream(input), "utf-8");
BufferedReader br = new BufferedReader(isr);
String lineTxt = null;
List<String> doc = new ArrayList<String>();
List<String> res = new ArrayList<String>();
while ((lineTxt = br.readLine()) != null) {
doc.add(lineTxt);
}
br.close();
int cs = 1;
System.out.println("文档行数:" + doc.size());
for (int i = 0; i < doc.size(); i++) {
// System.out.println(i);
String line = doc.get(i).trim();
//