POST小例

本文介绍了一个使用C#实现的网页内容抓取程序。该程序通过POST请求获取指定URL的网页内容,并能处理Cookie及编码问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;

namespace CaptureHtmlConent
{
    class Program
    {
        static void Main(string[] args)
        {
            //测试
            
            string url = "http://dig.chouti.com/link/search";
            string referer = "nsearch.eastday.com";
            string data = "basenames=rmwsite&where=(CONTENT%3D(%E5%B7%A5)%20or%20TITLE%3D(%E5%B7%A5)%20or%20AUTHOR%3D(%E5%B7%A5))&curpage=1&pagecount=20&classvalue=ALL&classfield=CLASS2&isclass=1&keyword=%E5%B7%A5&sortfield=-INPUTTIME";
            string encondingForPostData =  "utf-8";
            string encondingForPage = "utf-8";
            //string cookies=spider.getCookie();
            //string cookies = "";
            //string content = spider.getPageContent(url, referer, data, encondingForPostData, encondingForPage, out cookies);
            string hostUrl = "http://search.people.com.cn/rmw/GB/rmwsearch/dj_index.jsp";
            string postUrl = "http://search.people.com.cn/rmw/GB/rmwsearch/gj_searchht.jsp";
            string contentByCookie=spider.getPageContent(hostUrl,postUrl, referer, data, encondingForPostData, encondingForPage);
        }
    }
    class spider
    {
        /// <summary>
        /// POST方式获得页面信息
        /// </summary>
        /// <param name="url">请求的URL</param>
        /// <param name="referer"></param>
        /// <param name="data">模拟请求数据</param>
        /// <param name="encondingForPostData">模拟请求数据的编码</param>
        /// <param name="encondingForPage">请求页面的编码</param>
        /// <param name="cookies">当前页面的COOKIE</param>
        /// <returns></returns>
        public static string getPageContent(string url, string referer, string data, string encondingForPostData, string encondingForPage, out string cookies)
        {
            string htmlcontent = "";
            HttpWebRequest request = null;
            HttpWebResponse response = null;
            StreamReader sr = null;
            string gethost = string.Empty;
            string strCookie = string.Empty;
            CookieContainer cc = new CookieContainer();
            try
            {
                //第一次POST请求
                string postdata = data;//模拟请求数据              
                request = (HttpWebRequest)WebRequest.Create(url);//实例化web访问类
                request.Method = "POST";//数据提交方式为POST
                //模拟头
                request.ContentType = "application/x-www-form-urlencoded";
                request.UserAgent = "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50";
                byte[] postdatabytes = Encoding.GetEncoding(encondingForPostData).GetBytes(postdata);
                request.ContentLength = postdatabytes.Length;
                request.Referer = referer;
                request.AllowAutoRedirect = false;
                request.CookieContainer = cc;
                request.Headers.Add("Cookie:" + strCookie);
                request.KeepAlive = true;
                //提交请求
                Stream stream;
                stream = request.GetRequestStream();
                stream.Write(postdatabytes, 0, postdatabytes.Length);
                stream.Close();
                //接收响应
                response = (HttpWebResponse)request.GetResponse();
                //保存返回cookie
                response.Cookies = request.CookieContainer.GetCookies(request.RequestUri);
                CookieCollection cook = response.Cookies;
                cookies = request.CookieContainer.GetCookieHeader(request.RequestUri);
                //取网页信息。
                sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(encondingForPage));
                htmlcontent = sr.ReadToEnd();
                response.Close();
            }
            catch (Exception)
            {
                cookies = "";
                return "第一次POST出错";
                //第一次POST出错;
            }
            finally
            {
                if(request!=null) request.Abort();
                if (sr != null) sr.Close();
                if (response != null) response.Close(); 
            }
            return htmlcontent;
        }
        public static string getPageContent(string hostUrl, string postUrl, string referer, string data, string encondingForPostData, string encondingForPage)
        {
            string htmlcontent = "";
            HttpWebRequest request = null;
            HttpWebResponse response = null;
            StreamReader sr = null;
            string gethost = string.Empty;
            string strCookie = string.Empty;
            CookieContainer cc = new CookieContainer();
            try
            {
                //第一次GET地址
                request = (HttpWebRequest)WebRequest.Create(hostUrl);
                request.Method = "GET";
                request.KeepAlive = true;
                request.Headers.Add("Cookie:" + strCookie);
                request.CookieContainer = cc;
                request.AllowAutoRedirect = false;
                response = (HttpWebResponse)request.GetResponse();
                //获得cookie   
                strCookie = request.CookieContainer.GetCookieHeader(request.RequestUri);
            }
            catch (Exception)
            {
                return "获得cookie出错";
                //获得cookie出错;
            }
            try
            {
                //第一次POST请求
                string postdata = data;//模拟请求数据              
                request = (HttpWebRequest)WebRequest.Create(postUrl);//实例化web访问类
                request.Method = "POST";//数据提交方式为POST
                //模拟头
                request.ContentType = "application/x-www-form-urlencoded";
                request.UserAgent = "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50";
                byte[] postdatabytes = Encoding.GetEncoding(encondingForPostData).GetBytes(postdata);
                request.ContentLength = postdatabytes.Length;
                request.Referer = referer;
                request.AllowAutoRedirect = false;
                request.CookieContainer = cc;
                request.Headers.Add("Cookie:" + strCookie);
                request.KeepAlive = true;
                //提交请求
                Stream stream;
                stream = request.GetRequestStream();
                stream.Write(postdatabytes, 0, postdatabytes.Length);
                stream.Close();
                //接收响应
                response = (HttpWebResponse)request.GetResponse();
                //保存返回cookie
                response.Cookies = request.CookieContainer.GetCookies(request.RequestUri);
                CookieCollection cook = response.Cookies;
                strCookie = request.CookieContainer.GetCookieHeader(request.RequestUri);
                //取网页信息。
                sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(encondingForPage));
                htmlcontent = sr.ReadToEnd();
                response.Close();
            }
            catch (Exception)
            {              
                return "第一次POST出错";
                //第一次POST出错;
            }
            finally
            {
                if (request != null) request.Abort();
                if (sr != null) sr.Close();
                if (response != null) response.Close();
            }
            return htmlcontent;
        }
        public static string getCookie()
        {
            HttpWebRequest request = null;
            HttpWebResponse response = null;
            string gethost = string.Empty;
            string strCookie = string.Empty;
            CookieContainer cc = new CookieContainer();

            gethost = "http://dig.chouti.com/"; //第一次GET地址
            request = (HttpWebRequest)WebRequest.Create(gethost);
            request.Method = "GET";
            request.KeepAlive = true;
            request.Headers.Add("Cookie:" + strCookie);
            request.CookieContainer = cc;
            request.AllowAutoRedirect = false;
            response = (HttpWebResponse)request.GetResponse();
            //获得cookie   
            strCookie = request.CookieContainer.GetCookieHeader(request.RequestUri);
            return strCookie;
        }
    }
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值