自己记录一下,方便下次用,判断文件是否为pdf类型,并且解析文件内容
1.需要依赖的包
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.19</version>
</dependency>
2.Util类
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import sun.misc.BASE64Decoder;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Iterator;
/**
* 解析PDF文件內容
* @author: Tang qiqi
* @create: 2023-02-21 15:11
* @Description:
*/
@Slf4j
public class PDFParserUtils {
public static final String PDF = "PDF";
/**
* base64文件字符串
* @param base64Content
* @return
*/
public static String parserFileContent(String base64Content, String fileName){
// 判斷base64字符串內容是否為空
if(base64Content == null || base64Content.trim().length() == 0) {
return "";
}
FileOutputStream fos = null;
try {
File tempFile = File.createTempFile(fileName, ".PDF");
fos = new FileOutputStream(tempFile);
BASE64Decoder decoder = new BASE64Decoder();
// Base64解码,对字节数组字符串进行Base64解码并生成文件
byte[] byt = decoder.decodeBuffer(base64Content);
for (int i = 0, len = byt.length;<