在学习HTML Xpath之前呢我们先来下载一下Dll文件
下载地址:http://htmlagilitypack.codeplex.com/
大家下载单击如下图片下载就行了
<ignore_js_op>
接下来就是在程序中引用一下,
<ignore_js_op>
然后就可以直接调用 了,大家看看
代码吧
-
//htmlDcoument对象用来访问Html文档s
-
HtmlAgilityPack.HtmlDocument hd =
new HtmlAgilityPack.HtmlDocument
(
)
;
-
//加载Html文档
-
hd.LoadHtml
(strhtml
)
;
-
string str = hd.DocumentNode.SelectSingleNode
(
"//*[@id='e_font']"
).OuterHtml
;
这样就可以得到一个标签的HTml代码了
OuterHtml是取包含本身的Html如果是InnerHtml就是取的包含在这个标签之内的所有Html代码了
这点大家要注意了
如果大家想获取Html代码的Xpath路径就是这部分
- //*[@id='e_font']
这个其实很简单只在大家安装一个Firbug就行了,
看下图片
<ignore_js_op>
大家只要进入选择模式,然后选择你要的内容,然后右键复制一下就行了。
然后放在SelectSingleNode()方法里就OK了
下面我说说几个方法和属性的意思吧、
方法
SelectNodes 获取的是一个集合
SelectSingleNode 获取一个标签
SetAttributeValue 设置标签的属性值例如:SetAttributeValue("name","xpath-89");这说明把name属性的值修改为xpath-89
属性
OuterHtml 是取包含本身的Html
InnerHtml 取的包含在这个标签之内的所有Html代码了
XPath 获取相对应的Xpath值
Attributes 获取一个属性的值例如:Attributes("name")
也可以进行添加属性例如:
-
hd.DocumentNode.SelectSingleNode
(item.Key
).Attributes.Add
(
"xpathid",
"xpath_1"
)
;
下面我写了一个递归获取Html页面所有Xpath值的方法大家看一下吧
-
//key(Xpath),value(整个节点)
-
public List<ObjXpath> XpathList =
new List<ObjXpath>
(
)
;
-
public
string strhtml =
""
;
//这里就是你的Html代码具体怎么获取请参考我的<a href=\"http://www.sufeinet.com/thread-3-1-1.html\" target=\"_blank\">HttpHelper</a>类吧
-
private
int Index =
0
;
-
//开始处理Node
-
private
void SartNode
(
)
-
{
-
//htmlDcoument对象用来访问Html文档s
-
HtmlAgilityPack.HtmlDocument hd =
new HtmlAgilityPack.HtmlDocument
(
)
;
-
//加载Html文档
-
hd.LoadHtml
(strhtml
)
;
-
HtmlNodeCollection htmllist = hd.DocumentNode.ChildNodes
;
-
Index =
0
;
-
XpathList.Clear
(
)
;
-
foreach
(HtmlNode em
in htmllist
)
-
{
-
Setxpath
(em
)
;
-
}
-
}
-
/// <summary>
-
/// 递归获取Html Dom
-
/// </summary>
-
/// <param name="node">要处理的节点</param>
-
private
void Setxpath
(HtmlNode node
)
-
{
-
foreach
(HtmlNode item
in node.ChildNodes
)
-
{
-
if
(item.
XPath.Contains
(
"#"
)
)
-
{
-
continue
;
-
}
-
if
(item.ChildNodes.Count >
0
)
-
{
-
XpathList.Add
(
new ObjXpath
(
) { id = Index.ToString
(
), Key = item.
XPath, Value =
"" }
)
;
-
Index++
;
-
Setxpath
(item
)
;
-
}
-
else
-
{
-
XpathList.Add
(
new ObjXpath
(
) { id = Index.ToString
(
), Key = item.
XPath, Value =
"" }
)
;
-
Index++
;
-
}
-
}
-
}
-
public
class ObjXpath
-
{
-
public
string id { get
; set
; }
-
public
string Key { get
; set
; }
-
public
string Value { get
; set
; }
-
}
XpathList 就是获取的所有Xpath值了,大家有兴趣的话可以试试
我们先来看看效果吧
<ignore_js_op>
好了下面放出所有代码给大家
-
using
System
;
-
using
System.
Collections.
Generic
;
-
using
System.
ComponentModel
;
-
using
System.
Data
;
-
using
System.
Drawing
;
-
using
System.Linq
;
-
using
System.
Text
;
-
using
System.Windows.Forms
;
-
using
System.
Text.
RegularExpressions
;
-
using
System.
Threading
;
-
using HtmlAgilityPack
;
-
using
System.
IO
;
-
using
System.
Runtime.
Serialization.Json
;
-
-
namespace AutoXpathTools
-
{
-
public partial
class Form1 : Form
-
{
-
public Form1
(
)
-
{
-
InitializeComponent
(
)
;
-
}
-
-
#region 私有变量和方法
-
-
//委托传入一个字符串
-
private
delegate
void SetListBox
(
string str
)
;
-
-
//key(Xpath),value(整个节点)
-
List<ObjXpath> XpathList =
new List<ObjXpath>
(
)
;
-
private
int Index =
0
;
-
//htmlDcoument对象用来访问Html文档
-
HtmlAgilityPack.HtmlDocument hd =
new HtmlAgilityPack.HtmlDocument
(
)
;
-
-
#endregion
-
-
//分析Xpath的所有代码
-
private
void btnGetXpath_Click
(
object sender, EventArgs e
)
-
{
-
try
-
{
-
HttpHelper http =
new HttpHelper
(
)
;
-
HttpItem item =
new HttpItem
(
) { URL = textBox1.
Text.Trim
(
), IsToLower =
false, Encoding =
"gbk" }
;
-
txtXml.
Text = http.GetHtml
(item
)
;
-
if
(!
string.IsNullOrWhiteSpace
(txtXml.
Text
) && txtXml.
Text.Trim
(
).ToLower
(
) !=
"error"
)
-
{
-
//加载Html文档
-
hd.LoadHtml
(txtXml.
Text
)
;
-
-
-
Thread pingTask =
new Thread
(
new ThreadStart
(
delegate
-
{
-
//代码,线程要执行的代码
-
SartNode
(txtXml.
Text
)
;
-
}
)
)
;
-
pingTask.Start
(
)
;
-
-
}
-
else
-
{
-
txtXml.
Text =
"根据您的的ULR:" + textBox1.
Text.Trim
(
) +
"无法得到任何内容"
;
-
}
-
}
-
catch
(Exception ex
)
-
{
-
txtXml.
Text = ex.Message.Trim
(
)
;
-
}
-
}
-
-
-
//开始处理Node
-
private
void SartNode
(
string strhtml
)
-
{
-
//htmlDcoument对象用来访问Html文档s
-
HtmlAgilityPack.HtmlDocument hd =
new HtmlAgilityPack.HtmlDocument
(
)
;
-
//加载Html文档
-
hd.LoadHtml
(strhtml
)
;
-
HtmlNodeCollection htmllist = hd.DocumentNode.ChildNodes
;
-
Index =
0
;
-
XpathList.Clear
(
)
;
-
foreach
(HtmlNode em
in htmllist
)
-
{
-
Setxpath
(em
)
;
-
}
-
}
-
/// <summary>
-
/// 递归获取Html Dom
-
/// </summary>
-
/// <param name="node">要处理的节点</param>
-
private
void Setxpath
(HtmlNode node
)
-
{
-
foreach
(HtmlNode item
in node.ChildNodes
)
-
{
-
if
(item.
XPath.Contains
(
"#"
)
)
-
{
-
continue
;
-
}
-
if
(item.ChildNodes.Count >
0
)
-
{
-
XpathList.Add
(
new ObjXpath
(
) { id = Index.ToString
(
), Key = item.
XPath, Value =
"" }
)
;
-
UIContorol
(item.
XPath
)
;
-
Index++
;
-
Setxpath
(item
)
;
-
}
-
else
-
{
-
XpathList.Add
(
new ObjXpath
(
) { id = Index.ToString
(
), Key = item.
XPath, Value =
"" }
)
;
-
UIContorol
(item.
XPath
)
;
-
Index++
;
-
}
-
}
-
}
-
-
//使用委托给控件赋值
-
private
void UIContorol
(
string str
)
-
{
-
listBox1.Items.Add
(str
)
;
-
toolStripStatusLabel1.
Text = str
;
-
}
-
-
private
void listBox1_SelectedValueChanged
(
object sender, EventArgs e
)
-
{
-
if
(listBox1.SelectedItem !=
null
)
-
{
-
txtPath.
Text = listBox1.SelectedItem.ToString
(
).Trim
(
)
;
-
}
-
}
-
-
private
void button3_Click
(
object sender, EventArgs e
)
-
{
-
txtContents.
Text = hd.DocumentNode.SelectSingleNode
(txtPath.
Text.Trim
(
)
).OuterHtml
;
-
}
-
-
private
void Form1_Load
(
object sender, EventArgs e
)
-
{
-
//HttpItem item = new HttpItem()
-
//{
-
// URL = "http://www.diandian.com/login",
-
// Method = "post",
-
// Cookie = "dtid=ZfXUVo1IsplHR4mHW1HYmgKbY4GJa003; kvf=1358855337188; alf=1; dru=1356356040; _l5=y",
-
// ContentType = "application/x-www-form-urlencoded",
-
// Postdata = "account=xinsuilie1998@163.com&password=wjlove520&nextUrl=&lcallback=&persistent=1",
-
// Referer = "http://www.diandian.com/logout?formKey=e4714d863c862a84fafd83d98e5ecb22"
-
//};
-
//HttpHelper http = new HttpHelper();
-
//string html = http.GetHtml(item);
-
//string cookie = item.Cookie;
-
//item = new HttpItem() { URL = "http://www.diandian.com/home", Cookie = cookie };
-
//html = http.GetHtml(item);
-
}
-
}
-
public
class ObjXpath
-
{
-
public
string id { get
; set
; }
-
public
string Key { get
; set
; }
-
public
string Value { get
; set
; }
-
}
-
}
就到这里吧,大家可以下载我的源代码试试手
打包下载:
<ignore_js_op>
AutoXpathTools.zip (76.32 KB, 下载次数: 0)
如果你感觉可以话就给我推荐一下吧。感谢大家
1万+

被折叠的 条评论
为什么被折叠?



