public WebPage(string _url, string _loginurl, string _post)
{
string uurl = "";
try
{
uurl = Uri.UnescapeDataString(_url);
_url = uurl;
}
catch { };
Regex re = new Regex("(?<h>[^\x00-\xff]+)");
Match mc = re.Match(_url);
if (mc.Success)
{
string han = mc.Groups["h"].Value;
_url = _url.Replace(han, System.Web.HttpUtility.UrlEncode(han, Encoding.GetEncoding("GB2312")));
}
if (_loginurl.Trim() == "" || _post.Trim() == "" || WebPage.webcookies.ContainsKey(new Uri(_url).Host))
{
Init(_url);
}
else
{
#region 登陆
string indata = _post;
m_post = _post;
m_loginurl = _loginurl;
byte[] bytes = Encoding.Default.GetBytes(_post);
CookieContainer myCookieContainer = new CookieContainer();
try
{
//新建一个CookieContainer来存放Cookie集合
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(_loginurl);
//新建一个HttpWebRequest
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
myHttpWebRequest.AllowAutoRedirect = false;
myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
myHttpWebRequest.Timeout = 60000;
myHttpWebRequest.KeepAlive = true;
myHttpWebRequest.ContentLength = bytes.Length;
myHttpWebRequest.Method = "POST";
myHttpWebRequest.CookieContainer = myCookieContainer;
//设置HttpWebRequest的CookieContainer为刚才建立的那个myCookieContainer
Stream myRequestStream = myHttpWebRequest.GetRequestStream();
myRequestStream.Write(bytes, 0, bytes.Length);
myRequestStream.Close();
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
foreach (Cookie ck in myHttpWebResponse.Cookies)
{
myCookieContainer.Add(ck);
}
myHttpWebResponse.Close();
}
catch
{
Init(_url);
return;
}
#endregion
#region 登陆后再访问页面
try
{
m_uri = new Uri(_url);
m_links = new List<Link>();
m_html = "";
m_outstr = "";
m_title = "";
m_good = true;
if (_url.EndsWith(".rar") || _url.EndsWith(".dat") || _url.EndsWith(".msi"))
{
m_good = false;
return;
}
HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri);
rqst.AllowAutoRedirect = true;
rqst.MaximumAutomaticRedirections = 3;
rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
rqst.KeepAlive = true;
rqst.Timeout = 30000;
rqst.CookieContainer = myCookieContainer;
lock (WebPage.webcookies)
{
WebPage.webcookies[m_uri.Host] = myCookieContainer;
}
HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse();
Stream sm = rsps.GetResponseStream();
if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22)
{
rsps.Close();
m_good = false;
return;
}
Encoding cding = System.Text.Encoding.Default;
int ix = rsps.ContentType.ToLower().IndexOf("charset=");
if (ix != -1)
{
try
{
cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1));
}
catch
{
cding = Encoding.Default;
}
}
m_html = new StreamReader(sm, cding).ReadToEnd();
m_pagesize = m_html.Length;
m_uri = rsps.ResponseUri;
rsps.Close();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message+m_uri.ToString());
m_good = false;
}
#endregion
}
}
#endregion
c# HttpWebRequest 抓取网页 自动编码
最新推荐文章于 2023-11-16 15:55:50 发布