注意1:本文使用了apache的老版本依赖,请斟酌后引用
注意2:这个方法会导致doc文档的表格&图片内容丢失
一:依赖
小工具引入的依赖如下(老版本)
<!-- POI 核心 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<!-- HWPF (Word 97-2003) 支持 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.17</version>
</dependency>
<!-- XWPF (Word 2007+) 支持 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
二:正文
依赖不需要过多介绍,直接看代码
1.带上导入的依赖结构是为了让代码可以直接使用,小伙伴们直接cv即可
2.该方法使用 MultipartFile 作为传入传出对象,主要是业务需要,也可以改为流式传输,本文不介绍其他方式
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.web.multipart.MultipartFile;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Objects;
public class DocToDocxUtil {
public static MultipartFile convert(MultipartFile originalFile) throws IOException {
// 参数校验
Objects.requireNonNull(originalFile, "Original file cannot be null");
if (originalFile.isEmpty()) {
throw new IllegalArgumentException("The provided file is empty.");
}
String originalFilename = originalFile.getOriginalFilename();
if (originalFilename == null) {
throw new IllegalArgumentException("Original filename cannot be null.");
}
if (!originalFilename.toLowerCase().endsWith(".doc")) {
throw new IllegalArgumentException("The file must have a .doc extension. Received: " + originalFilename);
}
HWPFDocument hwpfDocument = null;
XWPFDocument xwpfDocument = null;
InputStream inputStream = null;
ByteArrayOutputStream baos = null;
try {
inputStream = originalFile.getInputStream();
hwpfDocument = new HWPFDocument(inputStream);
xwpfDocument = new XWPFDocument();
baos = new ByteArrayOutputStream();
// 获取文档范围并处理内容
Range range = hwpfDocument.getRange();
int paragraphCount = range.numParagraphs();
// 复制段落内容
for (int i = 0; i < paragraphCount; i++) {
Paragraph paragraph = range.getParagraph(i);
String text = paragraph.text();
// 清理文本
text = cleanDocText(text);
if (!text.isEmpty()) {
XWPFParagraph xwpfParagraph = xwpfDocument.createParagraph();
XWPFRun xwpfRun = xwpfParagraph.createRun();
xwpfRun.setText(text);
}
}
// 写入输出流
xwpfDocument.write(baos);
byte[] docxBytes = baos.toByteArray();
// 创建新的MultipartFile
return createMultipartFile(originalFile, originalFilename, docxBytes);
} catch (Exception e) {
throw new IOException("Failed to convert DOC to DOCX: " + e.getMessage(), e);
} finally {
// 手动关闭资源(3.17可能不支持try-with-resources)
closeQuietly(hwpfDocument);
closeQuietly(xwpfDocument);
closeQuietly(inputStream);
closeQuietly(baos);
}
}
private static String cleanDocText(String text) {
if (text == null || text.isEmpty()) {
return "";
}
String cleaned = text.trim();
if (cleaned.endsWith("\r")) {
cleaned = cleaned.substring(0, cleaned.length() - 1);
} else if (cleaned.endsWith("\r\n")) {
cleaned = cleaned.substring(0, cleaned.length() - 2);
} else if (cleaned.endsWith("\n")) {
cleaned = cleaned.substring(0, cleaned.length() - 1);
}
return cleaned;
}
private static MultipartFile createMultipartFile(MultipartFile originalFile,
String originalFilename,
byte[] content) {
String newFilename = originalFilename.replaceAll("(?i)\\.doc$", ".docx");
ByteArrayResource resource = new ByteArrayResource(content) {
@Override
public String getFilename() {
return newFilename;
}
};
return new MultipartFile() {
@Override
public String getName() {
return originalFile.getName();
}
@Override
public String getOriginalFilename() {
return newFilename;
}
@Override
public String getContentType() {
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
}
@Override
public boolean isEmpty() {
return content.length == 0;
}
@Override
public long getSize() {
return content.length;
}
@Override
public byte[] getBytes() {
return content;
}
@Override
public InputStream getInputStream() {
return new ByteArrayInputStream(content);
}
@Override
public void transferTo(java.io.File dest) throws IOException {
try (InputStream is = getInputStream();
java.io.FileOutputStream fos = new java.io.FileOutputStream(dest)) {
byte[] buffer = new byte[8192];
int bytesRead;
while ((bytesRead = is.read(buffer)) != -1) {
fos.write(buffer, 0, bytesRead);
}
}
}
};
}
private static void closeQuietly(AutoCloseable closeable) {
if (closeable != null) {
try {
closeable.close();
} catch (Exception e) {
// 安静关闭,忽略异常
}
}
}
6852

被折叠的 条评论
为什么被折叠?



