docx、ppt、xls、pdf文件转html

本文介绍了一种将各种格式的文档(如DOCX、XLS、PPT及PDF)转换为HTML的方法,以便在前端应用中进行在线预览。通过使用特定的Java库,实现了不同文档类型的解析和转换。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >


场景:

后台上传的文档要再前端app上在线预览


解决办法:

将文档转成html用于前端显示


maven引入

<dependency>
		    <groupId>fr.opensagres.xdocreport</groupId>
		    <artifactId>fr.opensagres.xdocreport.document</artifactId>
		    <version>1.0.5</version>
		</dependency>
		<dependency>  
		    <groupId>fr.opensagres.xdocreport</groupId>  
		    <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>  
		    <version>1.0.5</version>  
		</dependency>
		<dependency>
		    <groupId>org.apache.poi</groupId>
		    <artifactId>poi-ooxml</artifactId>
		    <version>3.9</version>
		</dependency>
		<dependency>
		    <groupId>org.apache.poi</groupId>
		    <artifactId>poi-scratchpad</artifactId>
		    <version>3.9</version>
		</dependency>


具体代码:

1、docx转html

/**
	 * docx文件转html
	 * @param tempContextUrl 项目访问名
	 * @return
	 */
	public int Word2007ToHtml(String tempContextUrl) {
		int rv = 0;
	    try {
			String path =  presentationDto.getWordPath();
			//word路径
			String wordPath = path.substring(0, path.indexOf("upload")+6);
			//word文件名
			String wordName = path.substring(path.lastIndexOf(File.separator)+1,path.lastIndexOf("."));
			//后缀
			String suffix = path.substring(path.lastIndexOf("."));
			//生成html路径
			String htmlPath = wordPath + File.separator + System.currentTimeMillis() + "_show" + File.separator;
			//生成html文件名
			String htmlName = System.currentTimeMillis() + ".html";
			//图片路径
			String imagePath = htmlPath + "image" + File.separator;
			 
			//判断html文件是否存在
			File htmlFile = new File(htmlPath + htmlName);
			     
			//word文件
			File wordFile = new File(wordPath + File.separator + wordName + suffix); 
			 
			// 1) 加载word文档生成 XWPFDocument对象 
			InputStream in = new FileInputStream(wordFile); 
			XWPFDocument document = new XWPFDocument(in); 
 
			// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录) 
			File imgFolder = new File(imagePath);
			XHTMLOptions options = XHTMLOptions.create();
			options.setExtractor(new FileImageExtractor(imgFolder));
			//html中图片的路径 相对路径 
			options.URIResolver(new BasicURIResolver("image"));
			options.setIgnoreStylesIfUnused(false); 
			options.setFragment(true); 
			 
			// 3) 将 XWPFDocument转换成XHTML
			//生成html文件上级文件夹
			File folder = new File(htmlPath);
			if(!folder.exists()){ 
			  folder.mkdirs(); 
			}
			OutputStream out = new FileOutputStream(htmlFile); 
			XHTMLConverter.getInstance().convert(document, out, options);

			// 4) 转换为项目访问路径
			String absolutePath = htmlFile.getAbsolutePath();
			htmlPath = tempContextUrl + absolutePath.substring(absolutePath.indexOf("upload"));
			presentationDto.setHtmlPath(htmlPath);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		    return rv; 
		} catch (XWPFConverterException e) {
			e.printStackTrace();
		    return rv; 
		} catch (IOException e) {
			e.printStackTrace();
		    return rv; 
		}
		rv = 1;
	    return rv; 
	}

2、xls转html

private int xlsToHtml(String tempContextUrl){
		int rv = 0;
		String path =  presentationDto.getWordPath();
		//word路径
		String wordPath = path.substring(0, path.indexOf("upload")+6) + File.separator;
		//word文件名
		String wordName = path.substring(path.lastIndexOf(File.separator)+1);
		try {
			InputStream input=new FileInputStream(wordPath+wordName);
			HSSFWorkbook excelBook=new HSSFWorkbook(input);
			ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );
			excelToHtmlConverter.processWorkbook(excelBook);
			List pics = excelBook.getAllPictures();
			if (pics != null) {
			    for (int i = 0; i < pics.size(); i++) {
			        Picture pic = (Picture) pics.get (i);
			        try {
			            pic.writeImageContent (new FileOutputStream (wordPath + pic.suggestFullFileName() ) );
			        } catch (FileNotFoundException e) {
			            e.printStackTrace();
			        }
			    }
			}
			Document htmlDocument =excelToHtmlConverter.getDocument();
			ByteArrayOutputStream outStream = new ByteArrayOutputStream();
			DOMSource domSource = new DOMSource (htmlDocument);
			StreamResult streamResult = new StreamResult (outStream);
			TransformerFactory tf = TransformerFactory.newInstance();
			Transformer serializer = tf.newTransformer();
			serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");
			serializer.setOutputProperty (OutputKeys.INDENT, "yes");
			serializer.setOutputProperty (OutputKeys.METHOD, "html");
			serializer.transform (domSource, streamResult);
			outStream.close();
	
			String content = new String (outStream.toByteArray(),"utf-8");
	
			String uuid = UidUtil.generateUUID();
			FileUtils.writeStringToFile(new File(wordPath, uuid+".html"), content, "utf-8");
			presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html");
		} catch (Exception e) {
			e.printStackTrace();
		    return rv; 
		}
		rv = 1;
	    return rv; 
	}

3、ppt转html

其实只是ppt转图片,有了图片后放到页面上去显示。

/**
	 * ppt转html
	 * @param tempContextUrl
	 * @return
	 */
	private int pptToHtml(String tempContextUrl){
		int rv = 0;
		String path = presentationDto.getWordPath();
		//word路径
		String wordPath = path.substring(0, path.indexOf("upload")+6);
		//文件夹名
		String folderName = UidUtil.generateUUID();
		List<String> imgList = new ArrayList<String>();
		File file = new File(path);
		
		File folder = new File(wordPath + File.separator + folderName);
		try {   
			folder.mkdirs();
            FileInputStream is = new FileInputStream(file);   
            SlideShow ppt = new SlideShow(is);   
            is.close();   
            Dimension pgsize = ppt.getPageSize();   
            org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides();   
            for (int i = 0; i < slide.length; i++) {
                TextRun[] truns = slide[i].getTextRuns();      
                for ( int k=0;k<truns.length;k++){      
                   RichTextRun[] rtruns = truns[k].getRichTextRuns();      
                  for(int l=0;l<rtruns.length;l++){      
                        rtruns[l].setFontIndex(1);      
                        rtruns[l].setFontName("宋体");  
                   }      
                }      
                BufferedImage img = new BufferedImage(pgsize.width,pgsize.height, BufferedImage.TYPE_INT_RGB);   
                Graphics2D graphics = img.createGraphics();   
                graphics.setPaint(Color.BLUE);   
                graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));   
                slide[i].draw(graphics);   

                // 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径   
                String imgName = File.separator + folderName + File.separator +"pict_"+ (i + 1) + ".jpeg";
                
                FileOutputStream out = new FileOutputStream(wordPath + imgName);   
                javax.imageio.ImageIO.write(img, "jpeg", out);
                out.close();   
                
                imgList.add(File.separator + "upload" + imgName);
            }
        } catch (FileNotFoundException e) {
        	e.printStackTrace();
        	return rv;
        } catch (IOException e) {
        	e.printStackTrace();
        	return rv;
        }
        rv = createHtml(wordPath,imgList, tempContextUrl);
        return rv;
	}
	
	/**
	 * ppt转html时生成html
	 * @param wordPath	upload根目录
	 * @param imgList	所有幻灯片路径
	 * @param tempContextUrl	项目访问路径
	 * @return
	 */
	private int createHtml(String wordPath,List<String> imgList,String tempContextUrl){
		int rv = 0;
		StringBuilder sb = new StringBuilder("<!doctype html><html><head><meta charset='utf-8'><title>无标题文档</title></head><body>");
		if (imgList != null && !imgList.isEmpty()) {
			for (String img : imgList) {
				sb.append("<img src='" + img + "' /><br>");
			}
		}
		sb.append("</body></html>");
		
		String uuid = UidUtil.generateUUID();
		try {
			File file = new File(wordPath + File.separator + uuid + ".html");
			BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));
			bufferedWriter.write(sb.toString());
			bufferedWriter.close();
		} catch (IOException e) {
			e.printStackTrace();
			return rv;
		}
		presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html");
		return 1;
	}

4、pdf转html

原理同ppt转html一样

/**
	 * pdf转html
	 * @param tempContextUrl
	 * @return
	 */
	private int pdfToHtml(String tempContextUrl){
		int rv = 0;
		String path = presentationDto.getWordPath();
		//word路径
		String wordPath = path.substring(0, path.indexOf("upload")+6);
		//文件夹名
		String folderName = UidUtil.generateUUID();
		List<String> imgList = new ArrayList<String>();
		File file = new File(path);
		try {
			PDDocument doc = PDDocument.load(path);
			int pageCount = doc.getPageCount(); 
			System.out.println(pageCount); 
			List pages = doc.getDocumentCatalog().getAllPages(); 
			for(int i=0;i<pages.size();i++){
			    PDPage page = (PDPage)pages.get(i); 
			    BufferedImage image = page.convertToImage(); 
			    Iterator iter = ImageIO.getImageWritersBySuffix("jpg"); 
			    ImageWriter writer = (ImageWriter)iter.next(); 
			    String imgName = File.separator + folderName + File.separator +i+".jpg";
			    File folder = new File(wordPath + File.separator + folderName);	//先创建文件夹
			    folder.mkdirs();
			    File outFile = new File(wordPath + imgName);	//再创建文件
			    imgList.add(File.separator + "upload" + imgName);
			    outFile.createNewFile();
			    FileOutputStream out = new FileOutputStream(outFile); 
			    ImageOutputStream outImage = ImageIO.createImageOutputStream(out); 
			    writer.setOutput(outImage); 
			    writer.write(new IIOImage(image,null,null)); 
			}
			doc.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
			return rv;
		} catch (IOException e) {
			e.printStackTrace();
			return rv;
		}
		rv = createHtml(wordPath, imgList, tempContextUrl);
		return 1;
	}



评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值