package com.wcpdoc.exam.exam.utils;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPOutputStream;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.w3c.dom.Document;
import com.wcpdoc.exam.exam.entity.Question;
import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
/**
*
* @Description:Word试卷文档模型化解析
* @author <a href="mailto:thoslbt@163.com">Thos</a> 42 * @ClassName: WordToHtml 44 * @version V1.0
*
*/
public class WordUtils {
/**
* 回车符ASCII码
*/
private static final short ENTER_ASCII = 13;
/**
* 空格符ASCII码
*/
private static final short SPACE_ASCII = 32;
/**
* 水平制表符ASCII码
*/
private static final short TABULATION_ASCII = 9;
public static String htmlText = "";
public static String htmlTextTbl = "";
public static int counter=0;
public static int beginPosi=0;
public static int endPosi=0;
public static int beginArray[];
public static int endArray[];
public static String htmlTextArray[];
public static boolean tblExist=false;
public static final String inputFile="D:/kaoshi/gongshi/111222.doc";
public static final String htmlFile="D:/kaoshi/gongshi/abc.html";
public static void main(String argv[])
{
try {
getWordAndStyle(inputFile);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* word文档图片存储路径
* @return
*/
public static String wordImageFilePath(){
return "D:/kaoshi/gongshi"+ DateFormatUtil.formatDate(new Date());
}
/**
* word文档图片Web访问路径
* @return
*/
public static String wordImgeWebPath(){
return "D:/kaoshi/gongshi"+ DateFormatUtil.formatDate(new Date())+"/";
}
/**
* 读取每个文字样式
*
* @param fileName
* @throws Exception
*/
public static List<Question> getWordAndStyle(String fileName) throws Exception {
System.out.println(fileName);
FileInputStream in = new FileInputStream(new File(fileName));
HWPFDocument doc = new HWPFDocument(in);
Range rangetbl = doc.getRange();//得到文档的读取范围
TableIterator it = new TableIterator(rangetbl);
int num=100;
beginArray=new int[num];
endArray=new int[num];
htmlTextArray=new String[num];
// 取得文档中字符的总数
int length = doc.characterLength();
// 创建图片容器
PicturesTable pTable = doc.getPicturesTable();
htmlText = doc.getSummaryInformation().getTitle();
// 创建临时字符串,好加以判断一串字符是否存在相同格式
if(it.hasNext())
{
readTable(it,rangetbl);
}
int cur=0;
String tempString = "";
for (int i = 0; i < length - 1; i++) {
// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围
Range range = new Range(i, i + 1, doc);
CharacterRun cr = range.getCharacterRun(0);
if(tblExist)
{
if(i==beginArray[cur])
{
htmlText+=tempString+htmlTextArray[cur];
tempString="";
i=endArray[cur]-1;
cur++;
continue;
}
}
if (pTable.hasPicture(cr)) {
htmlText += tempString ;
// 读写图片
readPicture(pTable, cr);
tempString = "";
}
else {
Range range2 = new Range(i + 1, i + 2, doc);
// 第二个字符
CharacterRun cr2 = range2.getCharacterRun(0);
char c = cr.text().charAt(0);
// 判断是否为空格符
if (c == SPACE_ASCII)
tempString += " ";
// 判断是否为水平制表符
else if (c == TABULATION_ASCII)
tempString += " ";
// 比较前后2个字符是否具有相同的格式
boolean flag = compareCharStyle(cr, cr2);
if (flag&&c !=ENTER_ASCII)
tempString += cr.text();
else {
htmlText += tempString + cr.text();
tempString = "";
}
// 判断是否为回车符
if (c == ENTER_ASCII)
htmlText += "<br/>";
}
}
htmlText += tempString;
//生成html文件
writeFile(htmlText);
System.out.println("------------WordToHtml转换成功----------------");
//word试卷数据模型化
System.out.println("------------WordToHtml模型化成功----------------");
return analysisHtmlString(htmlText);
}
/**
* 读写文档中的表格
*
* @param pTable
* @param cr
* @throws Exception
*/
public static void readTable(TableIterator it, Range rangetbl) throws Exception {
htmlTextTbl="";
//迭代文档中的表格
counter=-1;
while (it.hasNext())
{
tblExist=true;
htmlTextTbl="";
Table tb = (Table) it.next();
beginPosi=tb.getStartOffset() ;
endPosi=tb.getEndOffset();
//System.out.println("............"+beginPosi+"...."+endPosi);
counter=counter+1;
//迭代行,默认从0开始
beginArray[counter]=beginPosi;
endArray[counter]=endPosi;
htmlTextTbl+="<table border>";
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
htmlTextTbl+="<tr>";
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
int cellWidth=td.getWidth();
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text().toString().trim();
if(s=="")
{
s=" ";
}
htmlTextTbl += "<td width="+cellWidth+ ">"+s+"</td>";
}
}
}
htmlTextTbl+="</table>" ;
htmlTextArray[counter]=htmlTextTbl;
} //end while
}
/**
* 读写文档中的图片
*
* @param pTable
* @param cr
* @throws Exception
*/
public static void readPicture(PicturesTable pTable, CharacterRun cr) throws Exception {
// 提取图片
Picture pic = pTable.extractPicture(cr, false);
// 返回POI建议的图片文件名
String afileName = pic.suggestFullFileName();
String suffix = afileName.substring(afileName.lastIndexOf(".")+1);
String prefix = afileName.substring(0, afileName.lastIndexOf("."));
File file = new File(wordImageFilePath());
System.out.println(file.mkdirs());
OutputStream out = new FileOutputStream(new File( wordImageFilePath()+ File.separator + afileName));
pic.writeImageContent(out);
if(suffix.equals("wmf")){
convert(wordImgeWebPath()+afileName, wordImgeWebPath()+prefix+".svg");
SVGUtils svgUtils = new SVGUtils();
svgUtils.svg2PNG(new File(wordImgeWebPath()+prefix+".svg"), new File(wordImgeWebPath()+prefix+".png"));
afileName = prefix+".png";
htmlText += "<img src='question/getPic?path="+wordImgeWebPath()+ afileName
+ "' mce_src='question/getPic?path="+wordImgeWebPath()+ afileName + "' style='height:30px;width:50px'/>";
}else{
htmlText += "<img src='question/getPic?path="+wordImgeWebPath()+ afileName
+ "' mce_src='question/getPic?path="+wordImgeWebPath()+ afileName + "' />";
}
}
public static void convert(String file,String dest){
try{
InputStream in = new FileInputStream(new File(file));
WmfParser parser = new WmfParser();
final SvgGdi gdi = new SvgGdi(false);
parser.parse(in, gdi);
Document doc = gdi.getDocument();
OutputStream out = new FileOutputStream(dest);
if (dest.endsWith(".svgz")) {
out = new GZIPOutputStream(out);
}
output(doc, out);
}
catch(Exception e){
System.out.println("edn?????"+e.getMessage());
}
}
public static void output(Document doc, OutputStream out) throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,"-//W3C//DTD SVG 1.0//EN");
transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
transformer.transform(new DOMSource(doc), new StreamResult(out));
ByteArrayOutputStream bos = new ByteArrayOutputStream();
transformer.transform(new DOMSource(doc), new StreamResult(bos));
out.flush();
out.close();
}
public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2)
{
boolean flag = false;
if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic() && cr1.getFontName().equals(cr2.getFontName())
&& cr1.getFontSize() == cr2.getFontSize()&& cr1.getColor() == cr2.getColor())
{
flag = true;
}
return flag;
}
/*** 字体颜色模块start ********/
public static int red(int c) {
return c & 0XFF;
}
public static int green(int c) {
return (c >> 8) & 0XFF;
}
public static int blue(int c) {
return (c >> 16) & 0XFF;
}
public static int rgb(int c) {
return (red(c) << 16) | (green(c) << 8) | blue(c);
}
public static String rgbToSix(String rgb) {
int length = 6 - rgb.length();
String str = "";
while (length > 0) {
str += "0";
length--;
}
return str + rgb;
}
public static String getHexColor(int color) {
color = color == -1 ? 0 : color;
int rgb = rgb(color);
return "#" + rgbToSix(Integer.toHexString(rgb));
}
/** 字体颜色模块end ******/
/**
* 写文件
*
* @param s
*/
public static void writeFile(String s) {
FileOutputStream fos = null;
BufferedWriter bw = null;
PrintWriter writer = null;
try {
File file = new File(htmlFile);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos));
bw.write(s);
bw.close();
fos.close();
//编码转换
writer = new PrintWriter(file, "GB2312");
writer.write(s);
writer.flush();
writer.close();
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
/**
* 分析html
* @param s
*/
public static List<Question> analysisHtmlString(String s){
String[] split2 = s.split(" EMBED Equation.DSMT4 ");
String a = "";
for (String string : split2) {
a+=string;
}
/*for (int i = 0; i < a.length(); i++) {
char ch = a.charAt(i);
//如果是无效字符,则替换成空字符
if ((ch >= 0x00 && ch <= 0x08) || (ch >= 0x0b && ch <= 0x0c) || (ch >= 0x0e && ch <= 0x1f)){
a = a.replace(ch, ' ');
}
}*/
String q[] = a.split("<br/>[0-9]+、");
List<Question> list = new ArrayList<>();
for (int i = 1; i < q.length; i++) {
Question question = new Question();
String[] split = q[i].split("<br/>");
String ti = split[0].substring(1, 4);
String timu = split[0].substring(5, split[0].length());
System.out.println(timu);
if(ti.equals("单选题") || ti.equals("选择题")){
question.setType(1);
}else if(ti.equals("多选题")){
question.setType(2);
}else if(ti.equals("填空题")){
question.setType(3);
}else if(ti.equals("判断题")){
question.setType(4);
}else if(ti.equals("解答题") || ti.equals("问答题") || ti.equals("简答题")){
question.setType(5);
}
question.setTitle("<p>"+timu+"</p>");
if(ti.equals("单选题") || ti.equals("选择题")){
question.setOptionA("<p>"+split[1].substring(2)+"</p>");
question.setOptionB("<p>"+split[2].substring(2)+"</p>");
question.setOptionC("<p>"+split[3].substring(2)+"</p>");
question.setOptionD("<p>"+split[4].substring(2)+"</p>");
question.setAnswer(split[5].substring(split[5].indexOf(":")+1));
question.setAnalysis("<p>"+split[6].substring(split[6].indexOf(":")+1)+"</p>");
}else{
question.setAnswer("<p>"+split[1].substring(split[1].indexOf(":")+1)+"</p>");
question.setAnalysis("<p>"+split[2].substring(split[2].indexOf(":")+1)+"</p>");
}
list.add(question);
}
return list;
/* LinkedList<String> list = new LinkedList<String>();
//清除空字符
for (int i = 0; i < q.length; i++) {
if(StringUtils.isNotBlank(q[i].toString().replaceAll("</?[^>]+>","").trim())){
list.add(q[i].toString().trim());
}
}
String[] result = {};
String ws[]=list.toArray(result);
for (String string : ws) {
System.out.println(string);
}
int singleScore = 0;
int multipleScore = 0;
int fillingScore = 0;
int judgeScore = 0;
int askScore = 0;
int singleNum = 0;
int multipleNum = 0;
int fillingNum = 0;
int judgeNum = 0;
int askNum = 0;*/
//***********试卷基础数据赋值*********************//*
/*for (int i = 0; i < ws.length; i++) {
String delHtml=ws[i].toString().replaceAll("</?[^>]+>","").trim();//去除html
if(delHtml.contains("、单选题")){
String numScore=numScore(delHtml);
singleNum= Integer.parseInt(numScore.split(",")[0]) ;
singleScore=Integer.parseInt(numScore.split(",")[1]) ;
}else if(delHtml.contains("、多择题")){
String numScore=numScore(delHtml);
multipleNum= Integer.parseInt(numScore.split(",")[0]) ;
multipleScore=Integer.parseInt(numScore.split(",")[1]) ;
}else if(delHtml.contains("、填空题")){
String numScore=numScore(delHtml);
fillingNum= Integer.parseInt(numScore.split(",")[0]) ;
fillingScore=Integer.parseInt(numScore.split(",")[1]) ;
}else if(delHtml.contains("、判断题")){
String numScore=numScore(delHtml);
judgeNum= Integer.parseInt(numScore.split(",")[0]) ;
judgeScore=Integer.parseInt(numScore.split(",")[1]) ;
}else if(delHtml.contains("、问答题")){
String numScore=numScore(delHtml);
askNum= Integer.parseInt(numScore.split(",")[0]) ;
askScore=Integer.parseInt(numScore.split(",")[1]) ;
}
}
*//**************word试卷数据模型化****************//*
List<Map<String, Object>> bigTiMaps = new ArrayList<Map<String,Object>>();
List<Map<String, Object>> smalMaps = new ArrayList<Map<String,Object>>();
List<Map<String, Object>> sleMaps = new ArrayList<Map<String,Object>>();
String htmlText="";
int smalScore=0;
for (int j = ws.length-1; j>=0; j--) {
String html= ws[j].toString().trim();//html格式
String delHtml=ws[j].toString().replaceAll("</?[^>]+>","").trim();//去除html
if(!isSelecteTitele(delHtml)&&!isTitele(delHtml)&&!isBigTilete(delHtml)){//无
if(isTitele(delHtml)){
smalScore=itemNum(delHtml);
}
htmlText=html+htmlText;
}else if(isSelecteTitele(delHtml)){//选择题选择项
Map<String, Object> sleMap = new HashMap<String, Object>();//选择题选择项
sleMap.put("选项", delHtml.substring(0, 1));
sleMap.put("选择项", html+htmlText);
sleMaps.add(sleMap);
}else if(isTitele(delHtml)){//小标题
Map<String, Object> smalMap = new HashMap<String, Object>();//小标题
smalMap.put("smalTilete", html+htmlText);
smalMap.put("smalScore", smalScore>0?smalScore+"":itemNum(delHtml)+"");
smalMap.put("sleMaps", sleMaps);
smalMaps.add(smalMap);
}else if(isBigTilete(delHtml)){//大标题
Map<String, Object> bigTiMap = new HashMap<String, Object>();//大标题
bigTiMap.put("bigTilete", delHtml.substring(2, 5));
bigTiMap.put("smalMaps", smalMaps);
bigTiMaps.add(bigTiMap);
}
}
for (Map<String, Object> map : sleMaps) {
System.out.println(map.toString());
}*/
}
//获取大题-题目数量以及题目总计分数
public static String numScore(String delHtml){
String regEx="[^0-9+,|,+^0-9]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(delHtml);
String s=m.replaceAll("").trim();
if(StringUtils.isNotBlank(s)){
if(s.contains(",")){
return s;
}else if(s.contains(",")){
return s.replace(",", ",");
}else{
return "0,0";
}
}else{
return "0,0";
}
}
//获取每小题分数
public static int itemNum(String delHtml){
Pattern pattern = Pattern.compile("((.*?))"); //中文括号
Matcher matcher = pattern.matcher(delHtml);
if (matcher.find()&&isNumeric(matcher.group(1))){
return Integer.parseInt(matcher.group(1));
}else {
return 0;
}
}
//判断Str是否是 数字
public static boolean isNumeric(String str){
Pattern pattern = Pattern.compile("[0-9]*");
return pattern.matcher(str).matches();
}
//判断Str是否存在小标题号
public static boolean isTitele(String str){
Pattern pattern = Pattern.compile("^([\\d]+[-\\、].*)");
return pattern.matcher(str).matches();
}
//判断Str是否是选择题选择项
public static boolean isSelecteTitele(String str){
Pattern pattern = Pattern.compile("^([a-zA-Z]+[-\\:].*)");
return pattern.matcher(str).matches();
}
//判断Str是否是大标题
public static boolean isBigTilete(String str){
boolean iso= false ;
if(str.contains("一、")){
iso=true;
}else if(str.contains("二、")){
iso=true;
}else if(str.contains("三、")){
iso=true;
}else if(str.contains("四、")){
iso=true;
}else if(str.contains("五、")){
iso=true;
}else if(str.contains("六、")){
iso=true;
}else if(str.contains("七、")){
iso=true;
}else if(str.contains("八、")){
iso=true;
}
return iso;
}
}
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPOutputStream;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.w3c.dom.Document;
import com.wcpdoc.exam.exam.entity.Question;
import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
/**
*
* @Description:Word试卷文档模型化解析
* @author <a href="mailto:thoslbt@163.com">Thos</a> 42 * @ClassName: WordToHtml 44 * @version V1.0
*
*/
public class WordUtils {
/**
* 回车符ASCII码
*/
private static final short ENTER_ASCII = 13;
/**
* 空格符ASCII码
*/
private static final short SPACE_ASCII = 32;
/**
* 水平制表符ASCII码
*/
private static final short TABULATION_ASCII = 9;
public static String htmlText = "";
public static String htmlTextTbl = "";
public static int counter=0;
public static int beginPosi=0;
public static int endPosi=0;
public static int beginArray[];
public static int endArray[];
public static String htmlTextArray[];
public static boolean tblExist=false;
public static final String inputFile="D:/kaoshi/gongshi/111222.doc";
public static final String htmlFile="D:/kaoshi/gongshi/abc.html";
public static void main(String argv[])
{
try {
getWordAndStyle(inputFile);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* word文档图片存储路径
* @return
*/
public static String wordImageFilePath(){
return "D:/kaoshi/gongshi"+ DateFormatUtil.formatDate(new Date());
}
/**
* word文档图片Web访问路径
* @return
*/
public static String wordImgeWebPath(){
return "D:/kaoshi/gongshi"+ DateFormatUtil.formatDate(new Date())+"/";
}
/**
* 读取每个文字样式
*
* @param fileName
* @throws Exception
*/
public static List<Question> getWordAndStyle(String fileName) throws Exception {
System.out.println(fileName);
FileInputStream in = new FileInputStream(new File(fileName));
HWPFDocument doc = new HWPFDocument(in);
Range rangetbl = doc.getRange();//得到文档的读取范围
TableIterator it = new TableIterator(rangetbl);
int num=100;
beginArray=new int[num];
endArray=new int[num];
htmlTextArray=new String[num];
// 取得文档中字符的总数
int length = doc.characterLength();
// 创建图片容器
PicturesTable pTable = doc.getPicturesTable();
htmlText = doc.getSummaryInformation().getTitle();
// 创建临时字符串,好加以判断一串字符是否存在相同格式
if(it.hasNext())
{
readTable(it,rangetbl);
}
int cur=0;
String tempString = "";
for (int i = 0; i < length - 1; i++) {
// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围
Range range = new Range(i, i + 1, doc);
CharacterRun cr = range.getCharacterRun(0);
if(tblExist)
{
if(i==beginArray[cur])
{
htmlText+=tempString+htmlTextArray[cur];
tempString="";
i=endArray[cur]-1;
cur++;
continue;
}
}
if (pTable.hasPicture(cr)) {
htmlText += tempString ;
// 读写图片
readPicture(pTable, cr);
tempString = "";
}
else {
Range range2 = new Range(i + 1, i + 2, doc);
// 第二个字符
CharacterRun cr2 = range2.getCharacterRun(0);
char c = cr.text().charAt(0);
// 判断是否为空格符
if (c == SPACE_ASCII)
tempString += " ";
// 判断是否为水平制表符
else if (c == TABULATION_ASCII)
tempString += " ";
// 比较前后2个字符是否具有相同的格式
boolean flag = compareCharStyle(cr, cr2);
if (flag&&c !=ENTER_ASCII)
tempString += cr.text();
else {
htmlText += tempString + cr.text();
tempString = "";
}
// 判断是否为回车符
if (c == ENTER_ASCII)
htmlText += "<br/>";
}
}
htmlText += tempString;
//生成html文件
writeFile(htmlText);
System.out.println("------------WordToHtml转换成功----------------");
//word试卷数据模型化
System.out.println("------------WordToHtml模型化成功----------------");
return analysisHtmlString(htmlText);
}
/**
* 读写文档中的表格
*
* @param pTable
* @param cr
* @throws Exception
*/
public static void readTable(TableIterator it, Range rangetbl) throws Exception {
htmlTextTbl="";
//迭代文档中的表格
counter=-1;
while (it.hasNext())
{
tblExist=true;
htmlTextTbl="";
Table tb = (Table) it.next();
beginPosi=tb.getStartOffset() ;
endPosi=tb.getEndOffset();
//System.out.println("............"+beginPosi+"...."+endPosi);
counter=counter+1;
//迭代行,默认从0开始
beginArray[counter]=beginPosi;
endArray[counter]=endPosi;
htmlTextTbl+="<table border>";
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
htmlTextTbl+="<tr>";
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
int cellWidth=td.getWidth();
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text().toString().trim();
if(s=="")
{
s=" ";
}
htmlTextTbl += "<td width="+cellWidth+ ">"+s+"</td>";
}
}
}
htmlTextTbl+="</table>" ;
htmlTextArray[counter]=htmlTextTbl;
} //end while
}
/**
* 读写文档中的图片
*
* @param pTable
* @param cr
* @throws Exception
*/
public static void readPicture(PicturesTable pTable, CharacterRun cr) throws Exception {
// 提取图片
Picture pic = pTable.extractPicture(cr, false);
// 返回POI建议的图片文件名
String afileName = pic.suggestFullFileName();
String suffix = afileName.substring(afileName.lastIndexOf(".")+1);
String prefix = afileName.substring(0, afileName.lastIndexOf("."));
File file = new File(wordImageFilePath());
System.out.println(file.mkdirs());
OutputStream out = new FileOutputStream(new File( wordImageFilePath()+ File.separator + afileName));
pic.writeImageContent(out);
if(suffix.equals("wmf")){
convert(wordImgeWebPath()+afileName, wordImgeWebPath()+prefix+".svg");
SVGUtils svgUtils = new SVGUtils();
svgUtils.svg2PNG(new File(wordImgeWebPath()+prefix+".svg"), new File(wordImgeWebPath()+prefix+".png"));
afileName = prefix+".png";
htmlText += "<img src='question/getPic?path="+wordImgeWebPath()+ afileName
+ "' mce_src='question/getPic?path="+wordImgeWebPath()+ afileName + "' style='height:30px;width:50px'/>";
}else{
htmlText += "<img src='question/getPic?path="+wordImgeWebPath()+ afileName
+ "' mce_src='question/getPic?path="+wordImgeWebPath()+ afileName + "' />";
}
}
public static void convert(String file,String dest){
try{
InputStream in = new FileInputStream(new File(file));
WmfParser parser = new WmfParser();
final SvgGdi gdi = new SvgGdi(false);
parser.parse(in, gdi);
Document doc = gdi.getDocument();
OutputStream out = new FileOutputStream(dest);
if (dest.endsWith(".svgz")) {
out = new GZIPOutputStream(out);
}
output(doc, out);
}
catch(Exception e){
System.out.println("edn?????"+e.getMessage());
}
}
public static void output(Document doc, OutputStream out) throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,"-//W3C//DTD SVG 1.0//EN");
transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
transformer.transform(new DOMSource(doc), new StreamResult(out));
ByteArrayOutputStream bos = new ByteArrayOutputStream();
transformer.transform(new DOMSource(doc), new StreamResult(bos));
out.flush();
out.close();
}
public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2)
{
boolean flag = false;
if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic() && cr1.getFontName().equals(cr2.getFontName())
&& cr1.getFontSize() == cr2.getFontSize()&& cr1.getColor() == cr2.getColor())
{
flag = true;
}
return flag;
}
/*** 字体颜色模块start ********/
public static int red(int c) {
return c & 0XFF;
}
public static int green(int c) {
return (c >> 8) & 0XFF;
}
public static int blue(int c) {
return (c >> 16) & 0XFF;
}
public static int rgb(int c) {
return (red(c) << 16) | (green(c) << 8) | blue(c);
}
public static String rgbToSix(String rgb) {
int length = 6 - rgb.length();
String str = "";
while (length > 0) {
str += "0";
length--;
}
return str + rgb;
}
public static String getHexColor(int color) {
color = color == -1 ? 0 : color;
int rgb = rgb(color);
return "#" + rgbToSix(Integer.toHexString(rgb));
}
/** 字体颜色模块end ******/
/**
* 写文件
*
* @param s
*/
public static void writeFile(String s) {
FileOutputStream fos = null;
BufferedWriter bw = null;
PrintWriter writer = null;
try {
File file = new File(htmlFile);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos));
bw.write(s);
bw.close();
fos.close();
//编码转换
writer = new PrintWriter(file, "GB2312");
writer.write(s);
writer.flush();
writer.close();
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
/**
* 分析html
* @param s
*/
public static List<Question> analysisHtmlString(String s){
String[] split2 = s.split(" EMBED Equation.DSMT4 ");
String a = "";
for (String string : split2) {
a+=string;
}
/*for (int i = 0; i < a.length(); i++) {
char ch = a.charAt(i);
//如果是无效字符,则替换成空字符
if ((ch >= 0x00 && ch <= 0x08) || (ch >= 0x0b && ch <= 0x0c) || (ch >= 0x0e && ch <= 0x1f)){
a = a.replace(ch, ' ');
}
}*/
String q[] = a.split("<br/>[0-9]+、");
List<Question> list = new ArrayList<>();
for (int i = 1; i < q.length; i++) {
Question question = new Question();
String[] split = q[i].split("<br/>");
String ti = split[0].substring(1, 4);
String timu = split[0].substring(5, split[0].length());
System.out.println(timu);
if(ti.equals("单选题") || ti.equals("选择题")){
question.setType(1);
}else if(ti.equals("多选题")){
question.setType(2);
}else if(ti.equals("填空题")){
question.setType(3);
}else if(ti.equals("判断题")){
question.setType(4);
}else if(ti.equals("解答题") || ti.equals("问答题") || ti.equals("简答题")){
question.setType(5);
}
question.setTitle("<p>"+timu+"</p>");
if(ti.equals("单选题") || ti.equals("选择题")){
question.setOptionA("<p>"+split[1].substring(2)+"</p>");
question.setOptionB("<p>"+split[2].substring(2)+"</p>");
question.setOptionC("<p>"+split[3].substring(2)+"</p>");
question.setOptionD("<p>"+split[4].substring(2)+"</p>");
question.setAnswer(split[5].substring(split[5].indexOf(":")+1));
question.setAnalysis("<p>"+split[6].substring(split[6].indexOf(":")+1)+"</p>");
}else{
question.setAnswer("<p>"+split[1].substring(split[1].indexOf(":")+1)+"</p>");
question.setAnalysis("<p>"+split[2].substring(split[2].indexOf(":")+1)+"</p>");
}
list.add(question);
}
return list;
/* LinkedList<String> list = new LinkedList<String>();
//清除空字符
for (int i = 0; i < q.length; i++) {
if(StringUtils.isNotBlank(q[i].toString().replaceAll("</?[^>]+>","").trim())){
list.add(q[i].toString().trim());
}
}
String[] result = {};
String ws[]=list.toArray(result);
for (String string : ws) {
System.out.println(string);
}
int singleScore = 0;
int multipleScore = 0;
int fillingScore = 0;
int judgeScore = 0;
int askScore = 0;
int singleNum = 0;
int multipleNum = 0;
int fillingNum = 0;
int judgeNum = 0;
int askNum = 0;*/
//***********试卷基础数据赋值*********************//*
/*for (int i = 0; i < ws.length; i++) {
String delHtml=ws[i].toString().replaceAll("</?[^>]+>","").trim();//去除html
if(delHtml.contains("、单选题")){
String numScore=numScore(delHtml);
singleNum= Integer.parseInt(numScore.split(",")[0]) ;
singleScore=Integer.parseInt(numScore.split(",")[1]) ;
}else if(delHtml.contains("、多择题")){
String numScore=numScore(delHtml);
multipleNum= Integer.parseInt(numScore.split(",")[0]) ;
multipleScore=Integer.parseInt(numScore.split(",")[1]) ;
}else if(delHtml.contains("、填空题")){
String numScore=numScore(delHtml);
fillingNum= Integer.parseInt(numScore.split(",")[0]) ;
fillingScore=Integer.parseInt(numScore.split(",")[1]) ;
}else if(delHtml.contains("、判断题")){
String numScore=numScore(delHtml);
judgeNum= Integer.parseInt(numScore.split(",")[0]) ;
judgeScore=Integer.parseInt(numScore.split(",")[1]) ;
}else if(delHtml.contains("、问答题")){
String numScore=numScore(delHtml);
askNum= Integer.parseInt(numScore.split(",")[0]) ;
askScore=Integer.parseInt(numScore.split(",")[1]) ;
}
}
*//**************word试卷数据模型化****************//*
List<Map<String, Object>> bigTiMaps = new ArrayList<Map<String,Object>>();
List<Map<String, Object>> smalMaps = new ArrayList<Map<String,Object>>();
List<Map<String, Object>> sleMaps = new ArrayList<Map<String,Object>>();
String htmlText="";
int smalScore=0;
for (int j = ws.length-1; j>=0; j--) {
String html= ws[j].toString().trim();//html格式
String delHtml=ws[j].toString().replaceAll("</?[^>]+>","").trim();//去除html
if(!isSelecteTitele(delHtml)&&!isTitele(delHtml)&&!isBigTilete(delHtml)){//无
if(isTitele(delHtml)){
smalScore=itemNum(delHtml);
}
htmlText=html+htmlText;
}else if(isSelecteTitele(delHtml)){//选择题选择项
Map<String, Object> sleMap = new HashMap<String, Object>();//选择题选择项
sleMap.put("选项", delHtml.substring(0, 1));
sleMap.put("选择项", html+htmlText);
sleMaps.add(sleMap);
}else if(isTitele(delHtml)){//小标题
Map<String, Object> smalMap = new HashMap<String, Object>();//小标题
smalMap.put("smalTilete", html+htmlText);
smalMap.put("smalScore", smalScore>0?smalScore+"":itemNum(delHtml)+"");
smalMap.put("sleMaps", sleMaps);
smalMaps.add(smalMap);
}else if(isBigTilete(delHtml)){//大标题
Map<String, Object> bigTiMap = new HashMap<String, Object>();//大标题
bigTiMap.put("bigTilete", delHtml.substring(2, 5));
bigTiMap.put("smalMaps", smalMaps);
bigTiMaps.add(bigTiMap);
}
}
for (Map<String, Object> map : sleMaps) {
System.out.println(map.toString());
}*/
}
//获取大题-题目数量以及题目总计分数
public static String numScore(String delHtml){
String regEx="[^0-9+,|,+^0-9]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(delHtml);
String s=m.replaceAll("").trim();
if(StringUtils.isNotBlank(s)){
if(s.contains(",")){
return s;
}else if(s.contains(",")){
return s.replace(",", ",");
}else{
return "0,0";
}
}else{
return "0,0";
}
}
//获取每小题分数
public static int itemNum(String delHtml){
Pattern pattern = Pattern.compile("((.*?))"); //中文括号
Matcher matcher = pattern.matcher(delHtml);
if (matcher.find()&&isNumeric(matcher.group(1))){
return Integer.parseInt(matcher.group(1));
}else {
return 0;
}
}
//判断Str是否是 数字
public static boolean isNumeric(String str){
Pattern pattern = Pattern.compile("[0-9]*");
return pattern.matcher(str).matches();
}
//判断Str是否存在小标题号
public static boolean isTitele(String str){
Pattern pattern = Pattern.compile("^([\\d]+[-\\、].*)");
return pattern.matcher(str).matches();
}
//判断Str是否是选择题选择项
public static boolean isSelecteTitele(String str){
Pattern pattern = Pattern.compile("^([a-zA-Z]+[-\\:].*)");
return pattern.matcher(str).matches();
}
//判断Str是否是大标题
public static boolean isBigTilete(String str){
boolean iso= false ;
if(str.contains("一、")){
iso=true;
}else if(str.contains("二、")){
iso=true;
}else if(str.contains("三、")){
iso=true;
}else if(str.contains("四、")){
iso=true;
}else if(str.contains("五、")){
iso=true;
}else if(str.contains("六、")){
iso=true;
}else if(str.contains("七、")){
iso=true;
}else if(str.contains("八、")){
iso=true;
}
return iso;
}
}