txt,rtf,rtfd->pdf,mac下面有个非常好用的工具,TextEditor,简洁实用!
pdf->txt,如下(依赖两个jar 包:pdfbox-1.6.0.jar、fontbox-1.6.0.jar):
- package org.bruce.toolkit.experiments;
- import java.io.ByteArrayOutputStream;
- import java.io.File;
- import java.io.OutputStreamWriter;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.util.PDFTextStripper;
- /**
- * @author Bruce Yang
- * 将 pdf 文件中的字符解析为字符串中的字符~
- */
- public class Pdf2Text {
- /**
- * @param args
- * @throws Exception
- */
- public static void main(String[] args) throws Exception {
- // System.out.println(parsePDF("/Users/user/Desktop/QT教程.pdf"));
- System.out.println(parsePDF("/Users/user/Novels/pdf/《盗墓笔记》第一季:.第二部.怒海潜沙.pdf"));
- }
- /**
- * @param filePath
- * @return
- * @throws Exception
- */
- public static String parsePDF(String filePath) throws Exception {
- File file = new File(filePath);
- String context = "";
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- OutputStreamWriter writer = new OutputStreamWriter(out);
- PDDocument pdfdocument = null;
- pdfdocument = PDDocument.load(file);
- PDFTextStripper stripper = new PDFTextStripper();
- stripper.writeText(pdfdocument, writer);
- byte[] contents = out.toByteArray();
- System.out.println(contents.length);
- context = new String(contents);
- writer.close();
- if (pdfdocument != null) {
- pdfdocument.close();
- }
- return context;
- }
- }