在实际开发环境中,我们经常会向数据库导入数据,但是通常都会从页面获取数导入数据库,但是这种方式效率低下,有时并不能满足我们的实际需求,因此我们需要将word文本中的数据解析出来,然后封装成相应的javaBean,这样通常效率更高。因此,作为一个java开发工程师,这可能是我们必备的技能,我这里使用的apache组织的开源框架poi解析word文档,这里只展示最初级的读取(文档中无图片,无公式),后续将展示带图片的文档解析!
package com.ilike.poi;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;
import com.ilike.domain.Chapter;
/**
* 解析word文档,获取相应的数据,并封装成一个javaBean
*
* @author 桑伟东
*
*/
public class wordToHtmlDemo3 {
public static String readFile = “D:\tmp\old_doc\swdTest2.doc”;
public static String ziful = “【”;
public static String zifur = “】”;
public static void main(String[] args) throws Exception {
List<Integer> points = readFilePoints(readFile);
Map<String, String> map = AnalysePoints(points, readFile);
assembleBean(new Chapter(), map);
}
/**
* 1.获取文档中需要解析的所有节点
*
* @param fileName
* @throws Exception
*/
public static List<Integer> readFilePoints(String fileName) throws Exception {
// 1.创建字节输入流读取要解析的文档
FileInputStream in = new FileInputStream(new File(fileName));
// 2.创建文档对象
HWPFDocument doc = new HWPFDocument(in);
// 3.取得文档中字符的总数
int length = doc.characterLength();
// 4.读取文档的所有要解析的节点
List<Integer> indexs = new ArrayList<Integer>();
for (int i = 0; i < length; i++) {
// 4.1获取每一个字符
Range range = new Range(i, i + 1, doc);
CharacterRun cr = range.getCharacterRun(0);
String text = cr.text();
if (ziful.equals(text)) {
indexs.add(i + 1);
}
if (zifur.equals(text)) {
indexs.add(i);
}
if (i == length - 1) {
indexs.add(i);
}
}
System.out.println(indexs); // [4, 6, 20, 22, 35, 37, 62]
return indexs;
}
/**
* 2.获取对应节点的值,并封装在map集合中
*
* @param points
* @param fileName
* @throws Exception
*/
// [4, 6, 20, 22, 35, 37, 62]
public static Map<String, String> AnalysePoints(List<Integer> points, String fileName) throws Exception {
if (points.size() < 1) {
System.out.println("节点异常,不能解析");
}
// 1.创建字节输入流读取要解析的文档
FileInputStream in = new FileInputStream(new File(fileName));
// 2.创建文档对象
HWPFDocument doc = new HWPFDocument(in);
// 3.获取对应节点的值,并封装在map集合中
Map<String, String> map = new HashMap<String, String>();
for (int i = 0; i < points.size() - 2; i += 2) {
Range range1 = new Range(points.get(i), points.get(i + 1), doc);
CharacterRun cr1 = range1.getCharacterRun(0);
String key = cr1.text();// 获取key值
Range range2 = new Range(points.get(i + 1) + 1, points.get(i + 2), doc);
CharacterRun cr2 = range2.getCharacterRun(0);
String value = cr2.text();// 获取value值
map.put(key, value);
System.out.println("key:" + key + "------" + "value:" + value);
}
return map;
}
/**
* 3.将解析好的数据封装成相应的Bean
*
* @param chapter
* @param map
*/
public static void assembleBean(Chapter chapter, Map<String, String> map) {
//遍历map集合,封装成相应的Bean
for (String key : map.keySet()) {
if (key.equals("分析")) {
chapter.setAnalyse(map.get(key));
} else if (key.equals("考点")) {
chapter.setKeys(map.get(key));
} else if (key.equals("解答")) {
chapter.setAnswer(map.get(key));
}
}
System.out.println(chapter);
}
}