依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.19</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.19</version>
</dependency>
package com.sensetime.cpic_ocr.service;
import com.sensetime.cpic_ocr.CpicOcrApplication;
import com.sensetime.cpic_ocr.common.entity.pdfconv.ImageObj;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = CpicOcrApplication.class)
public class PdfTest {
public static void main(String[] args) {
pdfPageToImage();
}
@Test
public static void pdfPageToImage(){
PDDocument pdDoc = null;
Integer numOfPages = 0;
try {
PDFRenderer pdRender;
String pdfImgPath;
try {
pdDoc = PDDocument.load(new File("D://pdftest.pdf"));
numOfPages = pdDoc.getNumberOfPages();
pdRender = new PDFRenderer(pdDoc);
for(int i=0;i<numOfPages;i++){
convertSingleImage(pdRender,0,i);
}
} catch (Exception e1) {
e1.printStackTrace();
}
}catch (Exception e2){
e2.printStackTrace();
}
}
private static void convertSingleImage(PDFRenderer pdRender, int pgInit, int pgIdx) throws IOException {
BufferedImage image;
String imageName=new SimpleDateFormat("yyyyMMdd").format(new Date()).toLowerCase();
try {
image = pdRender.renderImage(pgIdx - pgInit, 2f);
File file=new File("D://pdf" + File.separator + imageName+pgIdx+".JPEG");
if(!file.getParentFile().exists()){
file.getParentFile().mkdirs();
}
ImageIO.write(image, "JPEG",file);
} catch (Exception e) {
throw new IOException();
}
}
}