最近使用htmlunit来作为后端抽取数据,htmlunit的DOM解析,使用xpath定位结点的过程发现有这个问题。
不知道是故意这么做,还是个bug。
于是对重写了前端获取xpath的代码,让它也忽略不可见Element,保持一致,可以解决我们的问题:
function getXPath(element){ if (element && element.id) return '//*[@id="' + element.id + '"]'; var paths = []; for (; element && element.nodeType == 1; element = element.parentNode){ var index = 0; for (var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling){ if (sibling.localName == element.localName && isVisible(sibling)) ++index; } var tagName = element.localName.toLowerCase(); var pathIndex = (index ? "[" + (index+1) + "]" : ""); paths.splice(0, 0, tagName + pathIndex); } return paths.length ? "/" + paths.join("/") : null; };
判断是否可见:
function isVisible(element){ var doc = element.ownerDocument; var docView = XPCOMUtils.QI(doc,_CI.nsIDOMDocumentView); var viewCss = XPCOMUtils.QI(docView.defaultView,_CI.nsIDOMViewCSS); var computedCss = viewCss.getComputedStyle(element,""); var visiable = computedCss.getPropertyCSSValue("visibility").getStringValue(); var display = computedCss.getPropertyCSSValue("display").getStringValue(); return visiable != "hidden" && display != "none"; }
其中XPCOMUtils是XPCOM的QueryInterface, createInstance,getService的一个代码简化的封装:
var _CI = Components.interfaces;
var _CC = Components.classes;
function XPCOMUtils() {}
(function() {
this.CCSV = function(cName, ifaceName)
{
if (_CC[cName])
return _CC[cName].getService(_CI[ifaceName]); // if fbs fails to load, the error can be _CC[cName] has no properties
else
alert("Can't get the components class name: " + cName);
};
this.CCIN = function(cName, ifaceName)
{
return _CC[cName].createInstance(_CI[ifaceName]);
};
this.QI = function(obj, iface)
{
return obj.QueryInterface(iface);
};
// ************************************************************************************************
}).apply(XPCOMUtils);