private boolean pdfPersistence(MultipartFile multipartFile, String url, String pafName, String fileName) throws IOException { // 是否排序 boolean sort = false; // 开始提取页数 int startPage = 1; // 结束提取页数 int endPage = Integer.MAX_VALUE; SysHitch bean = new SysHitch(); ArcDocuments documents = new ArcDocuments(); String content = null; InputStream input = null; PDDocument document = null; try { long size = multipartFile.getSize() / 1024; input = multipartFile.getInputStream(); // 加载 pdf 文档 PDFParser parser = new PDFParser(input); parser.parse(); document = parser.getPDDocument(); // 获取内容信息 PDFTextStripper pts = new PDFTextStripper(); pts.setSortByPosition(sort); endPage = document.getNumberOfPages(); for (int s = 1; s <= endPage; s++) { String[] split = fileName.split("/"); String suffix = null; if (split.length > 1) { suffix = split[split.length - 1]; } String suffixName = ""; for (int i = 0; i < suffix.length(); i++) { char item = suffix.charAt(i); if (item == '.') { break; } suffixName = suffixName + item; } String contrast = arcHitchMapper.contrast(suffixName); if (contrast == null) { throw new ServiceException(fileName + "失败"); } else { try { pts.setStartPage(s); pts.setEndPage(s); content = pts.getText(document); bean.setPagination(s); bean.setPageMaterials(content); bean.setFileName(contrast); bean.setUrl(url); bean.setCapacity((int) size); int i = arcHitchMapper.addUrl(bean); if (i != 1) { return false; } } catch (Exception e) { throw e; } } } } catch (Exception e) { throw e; } finally { if (null != input) input.close(); if (null != document) document.close(); } return true; }
上传双层pdf文档,提取pdf文字数据
最新推荐文章于 2024-07-18 14:14:40 发布