- Maven依赖
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
poi-ooxml jar包
链接:https://pan.baidu.com/s/1EjGABz_xL4LebcrH-Il2nQ
提取码:i8y0
- 代码
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.BreakType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.List;
public class WordFileUtil {
public static void main(String[] args) {
try {
listTextTitleLvl("E:\\text.docx");
} catch (Exception ignored) {
}
}
/**
* 查看Word文档的文本及对应的标题等级枚举
* @param filePath 文件绝对路径
* @throws Exception ex
*/
public static void listTextTitleLvl(String filePath) throws Exception{
if (!filePath.endsWith(".docx")){
return;
}
File file = new File(filePath);
FileInputStream fis = new FileInputStream(file);
XWPFDocument xdoc = new XWPFDocument(fis);
List<XWPFParagraph> paragraphs = xdoc.getParagraphs();
for (int i = 0; i < paragraphs.size(); i++) {
XWPFParagraph paragraph = paragraphs.get(i);
String text = paragraph.getText();
String titleLvl = getTitleLvl(xdoc, paragraph);
if (StringUtils.isNotEmpty(titleLvl)) {
System.out.println("text: " + text + ", titleLvl: " + titleLvl);
}
}
xdoc.close();
}
/**
* word文件 指定等级标题前批量插入分页符
* @param filePath 文件绝对路径
* @param addTitleLvl 标题等级 (已知 1: 二级标题
* @throws Exception ex
*/
public static void addBreak(String filePath,int addTitleLvl) throws Exception {
if (!filePath.endsWith(".docx")){
return;
}
String fileName = filePath.substring(0, filePath.length() - 5);
String newFilePath = fileName + System.currentTimeMillis() + ".docx";
File file = new File(filePath);
File newFile = new File(newFilePath);
FileInputStream fis = new FileInputStream(file);
XWPFDocument xdoc = new XWPFDocument(fis);
List<XWPFParagraph> paragraphs = xdoc.getParagraphs();
XWPFParagraph preParagraph;
for (int i = 0; i < paragraphs.size(); i++) {
XWPFParagraph paragraph = paragraphs.get(i);
String text = paragraph.getText();
String titleLvl = getTitleLvl(xdoc, paragraph);
if (StringUtils.isNotEmpty(titleLvl)) {
int level = Integer.parseInt(titleLvl);
System.out.println("text: " + text + ", titleLvl: " + titleLvl);
if (level == addTitleLvl) {
//level = 1的是二级标题
preParagraph = paragraphs.get(i - 1);
preParagraph.createRun().addBreak(BreakType.PAGE);
}
}
}
FileOutputStream out = new FileOutputStream(newFile);
xdoc.write(out);
xdoc.close();
}
/**
* Word中的大纲级别,可以通过getPPr().getOutlineLvl()直接提取,但需要注意,Word中段落级别,通过如下三种方式定义:
* 1、直接对段落进行定义;
* 2、对段落的样式进行定义;
* 3、对段落样式的基础样式进行定义。
* 因此,在通过“getPPr().getOutlineLvl()”提取时,需要依次在如上三处读取。
*
* @param doc
* @param para
* @return
*/
private static String getTitleLvl(XWPFDocument doc, XWPFParagraph para) {
String titleLvl = "";
try {
//判断该段落是否设置了大纲级别
if (para.getCTP().getPPr().getOutlineLvl() != null) {
return String.valueOf(para.getCTP().getPPr().getOutlineLvl().getVal());
}
} catch (Exception ignored) {
}
try {
//判断该段落的样式是否设置了大纲级别
if (doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl() != null) {
return String.valueOf(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl().getVal());
}
} catch (Exception ignored) {
}
try {
//判断该段落的样式的基础样式是否设置了大纲级别
if (doc.getStyles().getStyle(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal())
.getCTStyle().getPPr().getOutlineLvl() != null) {
String styleName = doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal();
return String.valueOf(doc.getStyles().getStyle(styleName).getCTStyle().getPPr().getOutlineLvl().getVal());
}
} catch (Exception ignored) {
}
try {
if (para.getStyleID() != null) {
return para.getStyleID();
}
} catch (Exception ignored) {
}
return titleLvl;
}
}