效果图展示
需求说明
- 需要提取docx中的内容,经过工作流处理之后,重新生成文档
- 要求支持公式,表格样式,图片大小还原到新的文档中
参考文章/网站
方案选择
由于之前使用过mammoth,并且实现过mammoth的定制开发,因此决定使用mammoth.js和docx.js来实现这个功能
方案流程
- mammoth提取docx内容转换成html
- html实现还原docx中的样式并转换成docx.js能支持的数据格式
- 还原docx
本文主要是实现公式的提取
实现方法
- 通过查看docx原始的xml(通过更改docx文件拓展名,改为zip之后解压缩),获取公式的各种标签
- 定制开发解析的方法
- 解析对象转换成html能显示的MathML
xml标签示例
关键代码
document.js
function OMath(children) {
return {
type: types.math,
children: children
};
}
function OMathText(value) {
return {
type: types.mathText,
value: value
};
}
function OMathFraction(numerator, denominator) {
return {
type: types.mathFraction,
numerator: numerator,
denominator: denominator
};
}
function OMathRadical(children, degree) {
return {
type: types.mathRadical,
children: children,
degree: degree
};
}
function OMathNary(children, options, subScript, superScript){
return {
type: types.mathNary,
children: children,
options: options,
subScript: subScript,
superScript: superScript
};
}
function OMathFunction(children, name) {
return {
type: types.mathFunction,
children: children,
name: name
};
}
function OMathSuperScript(children, superScript) {
return {
type: types.mathSuperScript,
children: children,
superScript: superScript
};
}
function OMathSubScript(children, subScript) {
return {
type: types.mathSubScript,
children: children,
subScript: subScript
};
}
function OMathSubSuperScript(children, subScript, superScript) {
return {
type: types.mathSubSuperScript,
children: children,
subScript: subScript,
superScript: superScript
};
}
function OMathPreSubSuperScript(children, subScript, superScript) {
return {
type: types.mathPreSubSuperScript,
children: children,
subScript: subScript,
superScript: superScript
};
}
function OMathBracket(children, bracketType) {
return {
type: types.mathBracket,
children: children,
bracketType: bracketType
};
}
function OMathLimitUpper(children, limit) {
return {
type: types.mathLimitUpper,
children: children,
limit: limit
};
}
function OMathLimitLower(children, limit) {
return {
type: types.mathLimitLower,
children: children,
limit: limit
};
}
body-reader.js
var xmlElementReaders = {
_oMath: function(element) {
return elementResult(new documents.OMath(readChildElements(element)));
},
// 对应children属性
_e: readChildElements,
// 处理公式内的元素转换
_r: readChildElements,
_t: function(element) {
return elementResult(new documents.OMathText(element.text()));
},
_f: function(element) {
return elementResult(
new documents.OMathFraction(readChildElements(element.mathFirst('num')), readChildElements(element.mathFirst('den')))
);
},
_num: readChildElements,
_den: readChildElements,
_rad: function(element) {
return elementResult(
new documents.OMathRadical(readChildElements(element.mathFirst('e')), readChildElements(element.mathFirst('deg')))
);
},
// 对应开根号的指数
_deg: readChildElements,
// 积分运算
_nary: function(element) {
var narySub = element.mathFirst('sub');
var narySup = element.mathFirst('sup');
var naryPr = element.mathFirst('naryPr');
var chr = naryPr.mathFirstOrEmpty('chr').attributes[mathVersion + 'val'] || '';
var limLoc = naryPr.mathFirstOrEmpty('limLoc').attributes[mathVersion + 'val'] || '';
var options = {
chr: chr,
limLoc: limLoc
};
var subScript = narySub ? readChildEleme