1.相关依赖的包
Winista.Text.HtmlParser
从NutGet中获取就好
2.使用方法
将html文件导入
String html= "<!DOC......"//此为String版的html代码
//进行解析
Parser parser = Parser.CreateParser(html, "utf-8");
//筛选要查找的对象 这里查找td,封装成过滤器
NodeFilter filter = new TagNameFilter("td");
//将过滤器导入筛选,得到对象列表
NodeList nodes = parser.Parse(filter);
//遍历对象列表,进行取值
for (int i = 0; i < nodes.Size(); i++)
{
INode textnode = nodes[i];
ITag tag = getTag(textnode.FirstChild);
String id= tag.GetAttribute("value") ;
String value= textnode.ToPlainTextString();
result.Add(new pojo.Game(id,value));
}
private static ITag getTag(INode node)
{
if (node == null)
return null;
return node is ITag ? node as ITag : null;
}
官方文档:
http://www.netomatix.com/Products/DocumentManagement/HTMLParserDocs.aspx
参考:
http://www.cnblogs.com/doll-net/archive/2007/06/29/800396.html
https://blog.youkuaiyun.com/cdefg198/article/details/8004203