/// <summary>
/// 读取html页面内容
/// </summary>
/// <param name="uri">网址</param>
/// <param name="xpath">xpath标签</param>
/// <returns></returns>
private string GetWellFormedHTML(string uri, string xpath)
{
StreamReader sReader = null;//读取字节流
StringWriter sw = null;//写入字符串
SgmlReader reader = null;//sgml读取方法
XmlTextWriter writer = null;//生成xml数据流
try
{
if (uri == String.Empty)
uri = "http://www.ypshop.net/list--91-940-940--search-1.html";
WebClient webclient = new WebClient();
webclient.Encoding = Encoding.UTF8;
//页面内容
string strWebContent = webclient.DownloadString(uri);
reader = new SgmlReader();
reader.DocType = "HTML";
reader.InputStream = new StringReader(strWebContent);
sw = new StringWriter();
writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Whitespace)
{
writer.WriteNode(reader, true);
}
}
return sw.ToString();
}
catch (Exception exp)
{
writer.Close();
reader.Close();
sw.Close();
sReader.Close();
return exp.Message;
}
}
#endregion
下面这个是参考的
/// <summary>
private string GetWellFormedHTML_Handle(string uri)
{
StreamReader sReader = null;
StringWriter sw = null;
SgmlReader reader = null;
XmlTextWriter writer = null;
try
{
if (uri == String.Empty) uri = "http://www.ypshop.net/list--91-940-940--search-1.html";
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse res = (HttpWebResponse)req.GetResponse();
sReader = new StreamReader(res.GetResponseStream());
reader = new SgmlReader();
reader.DocType = "HTML";
reader.InputStream = new StringReader(sReader.ReadToEnd());
sw = new StringWriter();
writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Whitespace)
{
writer.WriteNode(reader, true);
}
}
StringBuilder sb = new StringBuilder();
XPathDocument doc = new XPathDocument(new StringReader(sw.ToString()));
XPathNavigator nav = doc.CreateNavigator();
//XPathNodeIterator nodes = nav.Select(xpath);
//while (nodes.MoveNext())
//{
// sb.Append(nodes.Current.Value + " ");
//}
return sb.ToString();
}
catch (Exception exp)
{
writer.Close();
reader.Close();
sw.Close();
sReader.Close();
return exp.Message;
}
}