package com.gohouse.oss.servlet;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import javax.naming.InitialContext;
import javax.naming.NamingException;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileItemFactory;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.mime.MultipartEntity;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import com.gohouse.oss.util.ContextUtil;
import com.gohouse.oss.util.HttpClientHelper;
import com.gohouse.util.log.Log;
import com.gohouse.util.log.Logger;
import com.google.gson.Gson;
public class WordToHtml extends HttpServlet {
private static final long serialVersionUID = 1L;
private static Log log = Logger.getLogger(WordToHtml.class);
/**
* 回车符ASCII码
*/
private static final short ENTER_ASCII = 13;
/**
* 空格符ASCII码
*/
private static final short SPACE_ASCII = 32;
/**
* 水平制表符ASCII码
*/
private static final short TABULATION_ASCII = 9;
private String htmlText = "";
public String htmlTextTbl = "";
public int counter = 0;
public int beginPosi = 0;
public int endPosi = 0;
public int beginArray[];
public int endArray[];
public String htmlTextArray[];
public boolean tblExist = false;
public final String inputFile = "C:/Users/miju/Desktop/aa.doc";
// public static void main(String argv[]) {
// try {
// getWordAndStyle(inputFile);
// } catch (Exception e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// }
@SuppressWarnings("rawtypes")
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
request.setCharacterEncoding("UTF-8");
response.setContentType("text/html");
PrintWriter out = response.getWriter();
String firePath = getServletContext().getRealPath("/files");
FileItemFactory factory = new DiskFileItemFactory();
ServletFileUpload upload = new ServletFileUpload(factory);
upload.setHeaderEncoding("UTF-8");
File file = null;
FileInputStream in = null;
try {
List items = upload.parseRequest(request);
if (null != items) {
Iterator itr = items.iterator();
while (itr.hasNext()) {
FileItem item = (FileItem) itr.next();
String uploadName = item.getName();
if((uploadName.toLowerCase()).endsWith("doc")){
if (item.isFormField()) {
continue;
} else {
// 以当前精确到秒的日期为上传的文件的文件名
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddkkmmss");
String fileName = sdf.format(new Date());
// 创建上传文件夹,已时间命名
file = new File(firePath + File.separator + fileName);
if(!file.exists()){
file.mkdirs();
}
// 保存word文件
File savedFile = new File(file, item.getName());
item.write(savedFile);
in = new FileInputStream(savedFile);
String content = getWordAndStyle(in,firePath + File.separator + fileName);
Gson gosn = new Gson();
out.print("{'content':" + gosn.toJson(content) + "}");
// 删除上传文件
deleteFile(file);
}
}else{
out.print("{'error': '请上传正确的word-2003格式'}");
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
out.flush();
out.close();
}
/**
* 读取每个文字样式
*
* @param fileName
* @throws Exception
*/
public String getWordAndStyle(InputStream in,String path) throws Exception {
HWPFDocument doc = new HWPFDocument(in);
// 取得文档中字符的总数
int length = doc.characterLength();
// 创建图片容器
PicturesTable pTable = doc.getPicturesTable();
htmlText = "";
// 创建临时字符串,好加以判断一串字符是否存在相同格式
String tempString = "";
for (int i = 0; i < length - 1; i++) {
// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围
Range range = new Range(i, i + 1, doc);
CharacterRun cr = range.getCharacterRun(0);
if (pTable.hasPicture(cr)) {
// 读写图片
tempString += this.readPicture(pTable, cr, path);
} else {
Range range2 = new Range(i + 1, i + 2, doc);
// 第二个字符
CharacterRun cr2 = range2.getCharacterRun(0);
// 当前字符
char currentChar = cr.text().charAt(0);
// 判断是否为回车符
if (currentChar == ENTER_ASCII)
tempString += "<br/>";
// 判断是否为空格符
else if (currentChar == SPACE_ASCII)
tempString += " ";
// 判断是否为水平制表符
else if (currentChar == TABULATION_ASCII)
tempString += " ";
// 比较前后2个字符是否具有相同的格式
boolean flag = compareCharStyle(cr, cr2);
String fontStyle = "<span style='font-family:" + cr.getFontName() + ";font-size:" + cr.getFontSize()/ 2 + "pt;";
if (cr.isBold())
fontStyle += "font-weight:bold;";
if (cr.isItalic())
fontStyle += "font-style:italic;";
if (flag && i != length - 2)
tempString += currentChar;
else if (!flag) {
htmlText += fontStyle + "'>" + tempString + currentChar + "</span>";
tempString = "";
} else
htmlText += fontStyle + "'>" + tempString + currentChar + "</span>";
}
}
return htmlText;
}
/**
* 读写文档中的图片
*
* @param pTable
* @param cr
* @throws Exception
*/
private String readPicture(PicturesTable pTable, CharacterRun cr, String path)
throws Exception {
// 提取图片
Picture pic = pTable.extractPicture(cr, false);
// 返回POI建议的图片文件名
String afileName = pic.suggestFullFileName();
OutputStream out = new FileOutputStream(new File(path + File.separator + afileName));
pic.writeImageContent(out);
out.flush();
out.close();
// 上传图片
HttpClientHelper.loginApi("", "");
String token = ContextUtil.getToken(ContextUtil.API_KEY);
String url = domain() + "/rest/images/attachments/json/0/0/0/0/-1/" + token;
String guid = postUploadImg(url, new File(path + File.separator + afileName));
return "<img src='" + domain() + "/rest/images/" + guid +"'/>";
}
/**
* 上传图片
* @param url 上传图片地址
* @param file 图片文件
* @return guid
*/
private String postUploadImg(String url,File file){
try {
// 上传图片
HttpClient httpclient = new DefaultHttpClient();
HttpPost httppost = new HttpPost(url);
MultipartEntity reqEntity = new MultipartEntity();
reqEntity.addPart("files", new FileBody(file));
httppost.setEntity(reqEntity);
log.info("执行: " + httppost.getRequestLine());
HttpResponse response = httpclient.execute(httppost);
log.info("StatusCode = " + response.getStatusLine().getStatusCode());
HttpEntity resEntity = response.getEntity();
String responseText = null;
if (resEntity != null) {
log.info("----------------------------------------");
log.info(response.getStatusLine().toString());
log.info("返回长度: " + resEntity.getContentLength());
log.info("返回类型: " + resEntity.getContentType());
InputStream in = resEntity.getContent();
log.info("responseText = " + (responseText = HttpClientHelper.getStringByInputStream(in)));
}
if (resEntity != null) {
InputStream is = resEntity.getContent();
if (is != null) {
is.close();
}
}
return responseText.substring(responseText.indexOf("photo")+8, responseText.indexOf(",",responseText.indexOf("photo"))-1);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IllegalStateException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return "";
}
private boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2) {
if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic()
&& cr1.getFontName().equals(cr2.getFontName())
&& cr1.getFontSize() == cr2.getFontSize()) {
return true;
}
return false;
}
/**
* 读取配置文件中的rest服务器地址
*
* @return
*/
private String domain() {
try {
InitialContext ic = new InitialContext();
return "http://" + (String) ic.lookup("java:comp/env/API_SITE_DOMAIN");
} catch (NamingException e) {
log.error("获取 domain 失败!" + e.getMessage());
}
return "";
}
/**
* 删除文件夹
* @param file
*/
private void deleteFile(File file) {
File[] files = file.listFiles();
for (File deleteFile : files) {
if (deleteFile.isDirectory()) {
// 如果是文件夹,则递归删除下面的文件后再删除该文件夹
deleteFile(deleteFile);
} else {
deleteFile.delete();
}
}
file.delete();
}
}
word转html
最新推荐文章于 2025-05-28 16:23:03 发布