using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
namespace CaptureHtmlConent
{
class Program
{
static void Main(string[] args)
{
//测试
string url = "http://dig.chouti.com/link/search";
string referer = "nsearch.eastday.com";
string data = "basenames=rmwsite&where=(CONTENT%3D(%E5%B7%A5)%20or%20TITLE%3D(%E5%B7%A5)%20or%20AUTHOR%3D(%E5%B7%A5))&curpage=1&pagecount=20&classvalue=ALL&classfield=CLASS2&isclass=1&keyword=%E5%B7%A5&sortfield=-INPUTTIME";
string encondingForPostData = "utf-8";
string encondingForPage = "utf-8";
//string cookies=spider.getCookie();
//string cookies = "";
//string content = spider.getPageContent(url, referer, data, encondingForPostData, encondingForPage, out cookies);
string hostUrl = "http://search.people.com.cn/rmw/GB/rmwsearch/dj_index.jsp";
string postUrl = "http://search.people.com.cn/rmw/GB/rmwsearch/gj_searchht.jsp";
string contentByCookie=spider.getPageContent(hostUrl,postUrl, referer, data, encondingForPostData, encondingForPage);
}
}
class spider
{
/// <summary>
/// POST方式获得页面信息
/// </summary>
/// <param name="url">请求的URL</param>
/// <param name="referer"></param>
/// <param name="data">模拟请求数据</param>
/// <param name="encondingForPostData">模拟请求数据的编码</param>
/// <param name="encondingForPage">请求页面的编码</param>
/// <param name="cookies">当前页面的COOKIE</param>
/// <returns></returns>
public static string getPageContent(string url, string referer, string data, string encondingForPostData, string encondingForPage, out string cookies)
{
string htmlcontent = "";
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader sr = null;
string gethost = string.Empty;
string strCookie = string.Empty;
CookieContainer cc = new CookieContainer();
try
{
//第一次POST请求
string postdata = data;//模拟请求数据
request = (HttpWebRequest)WebRequest.Create(url);//实例化web访问类
request.Method = "POST";//数据提交方式为POST
//模拟头
request.ContentType = "application/x-www-form-urlencoded";
request.UserAgent = "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50";
byte[] postdatabytes = Encoding.GetEncoding(encondingForPostData).GetBytes(postdata);
request.ContentLength = postdatabytes.Length;
request.Referer = referer;
request.AllowAutoRedirect = false;
request.CookieContainer = cc;
request.Headers.Add("Cookie:" + strCookie);
request.KeepAlive = true;
//提交请求
Stream stream;
stream = request.GetRequestStream();
stream.Write(postdatabytes, 0, postdatabytes.Length);
stream.Close();
//接收响应
response = (HttpWebResponse)request.GetResponse();
//保存返回cookie
response.Cookies = request.CookieContainer.GetCookies(request.RequestUri);
CookieCollection cook = response.Cookies;
cookies = request.CookieContainer.GetCookieHeader(request.RequestUri);
//取网页信息。
sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(encondingForPage));
htmlcontent = sr.ReadToEnd();
response.Close();
}
catch (Exception)
{
cookies = "";
return "第一次POST出错";
//第一次POST出错;
}
finally
{
if(request!=null) request.Abort();
if (sr != null) sr.Close();
if (response != null) response.Close();
}
return htmlcontent;
}
public static string getPageContent(string hostUrl, string postUrl, string referer, string data, string encondingForPostData, string encondingForPage)
{
string htmlcontent = "";
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader sr = null;
string gethost = string.Empty;
string strCookie = string.Empty;
CookieContainer cc = new CookieContainer();
try
{
//第一次GET地址
request = (HttpWebRequest)WebRequest.Create(hostUrl);
request.Method = "GET";
request.KeepAlive = true;
request.Headers.Add("Cookie:" + strCookie);
request.CookieContainer = cc;
request.AllowAutoRedirect = false;
response = (HttpWebResponse)request.GetResponse();
//获得cookie
strCookie = request.CookieContainer.GetCookieHeader(request.RequestUri);
}
catch (Exception)
{
return "获得cookie出错";
//获得cookie出错;
}
try
{
//第一次POST请求
string postdata = data;//模拟请求数据
request = (HttpWebRequest)WebRequest.Create(postUrl);//实例化web访问类
request.Method = "POST";//数据提交方式为POST
//模拟头
request.ContentType = "application/x-www-form-urlencoded";
request.UserAgent = "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50";
byte[] postdatabytes = Encoding.GetEncoding(encondingForPostData).GetBytes(postdata);
request.ContentLength = postdatabytes.Length;
request.Referer = referer;
request.AllowAutoRedirect = false;
request.CookieContainer = cc;
request.Headers.Add("Cookie:" + strCookie);
request.KeepAlive = true;
//提交请求
Stream stream;
stream = request.GetRequestStream();
stream.Write(postdatabytes, 0, postdatabytes.Length);
stream.Close();
//接收响应
response = (HttpWebResponse)request.GetResponse();
//保存返回cookie
response.Cookies = request.CookieContainer.GetCookies(request.RequestUri);
CookieCollection cook = response.Cookies;
strCookie = request.CookieContainer.GetCookieHeader(request.RequestUri);
//取网页信息。
sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(encondingForPage));
htmlcontent = sr.ReadToEnd();
response.Close();
}
catch (Exception)
{
return "第一次POST出错";
//第一次POST出错;
}
finally
{
if (request != null) request.Abort();
if (sr != null) sr.Close();
if (response != null) response.Close();
}
return htmlcontent;
}
public static string getCookie()
{
HttpWebRequest request = null;
HttpWebResponse response = null;
string gethost = string.Empty;
string strCookie = string.Empty;
CookieContainer cc = new CookieContainer();
gethost = "http://dig.chouti.com/"; //第一次GET地址
request = (HttpWebRequest)WebRequest.Create(gethost);
request.Method = "GET";
request.KeepAlive = true;
request.Headers.Add("Cookie:" + strCookie);
request.CookieContainer = cc;
request.AllowAutoRedirect = false;
response = (HttpWebResponse)request.GetResponse();
//获得cookie
strCookie = request.CookieContainer.GetCookieHeader(request.RequestUri);
return strCookie;
}
}
}
POST小例
最新推荐文章于 2024-09-12 16:31:33 发布