private string GetUrlHtml_mail(string url)//
{
HttpWebRequest request = null;
HttpWebResponse response = null;
string strHtml = string.Empty;
string strHtml_1 = string.Empty;
string strHtml_1_1 = string.Empty;
System.Net.ServicePointManager.DefaultConnectionLimit = 50;
Stream respStream, respStream_1;
StreamReader str,str_1;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0";//用户代理
// request.AllowAutoRedirect = false;//设置不自动跳转
request.Method = "GET";
request.KeepAlive = false;
request.Timeout = 10000;
response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
response.GetResponseHeader("Location");
if (response.CharacterSet.ToLower().Trim() != "")
{
textBox5.Text = response.CharacterSet.ToLower();
switch (response.CharacterSet.ToLower())
{
case "gbk":
respStream = response.GetResponseStream();//读取数据流
str = new StreamReader(respStream, System.Text.Encoding.Default);//读取数据
strHtml_1 = str.ReadToEnd().ToString().Trim();
respStream.Close();
response.Close();
str.Close();
break;
case "gb2312":
respStream = response.GetResponseStream();//读取数据流
str = new StreamReader(respStream, System.Text.Encoding.Default);//读取数据
strHtml_1 = str.ReadToEnd().ToString().Trim();
respStream.Close();
response.Close();
str.Close();
break;
case "utf-8":
respStream = response.GetResponseStream();//读取数据流
str = new StreamReader(respStream, System.Text.Encoding.UTF8);//读取数据
strHtml_1 = str.ReadToEnd().ToString().Trim();
respStream.Close();
response.Close();
str.Close();
break;
case "iso-8859-1":
respStream_1= response.GetResponseStream();//读取数据流
str_1 = new StreamReader(respStream_1, System.Text.Encoding.Default);//读取数据
strHtml_1_1 = str_1.ReadToEnd().ToString().Trim();
strHtml_1 = strHtml_1_1.Replace("UTF", "utf");
if (strHtml_1.IndexOf("text/html; charset=utf-8") > 0 || strHtml_1.IndexOf("text/html;charset=utf-8") > 0 || strHtml_1.IndexOf("meta charset=\"utf-8\"") > 0) //<script的位置
{
request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0";//用户代理
// request.AllowAutoRedirect = false;//设置不自动跳转
request.Method = "GET";
request.KeepAlive = false;
request.Timeout = 10000;
response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
response.GetResponseHeader("Location");
respStream = response.GetResponseStream();//读取数据流
str = new StreamReader(respStream, System.Text.Encoding.UTF8);//读取数据
strHtml_1 = str.ReadToEnd().ToString().Trim();
}
else
{
request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0";//用户代理
// request.AllowAutoRedirect = false;//设置不自动跳转
request.Method = "GET";
request.KeepAlive = false;
request.Timeout = 10000;
response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
response.GetResponseHeader("Location");
respStream = response.GetResponseStream();//读取数据流
str = new StreamReader(respStream, System.Text.Encoding.Default);//读取数据
strHtml_1 = str.ReadToEnd().ToString().Trim();
}
strHtml_1_1 = string.Empty;
respStream_1.Close();
respStream_1.Dispose();
str_1.Close();
respStream.Close();
respStream.Dispose();
response.Close();
str.Close();
break;
default:
respStream = response.GetResponseStream();//读取数据流
str = new StreamReader(respStream, System.Text.Encoding.UTF8);//读取数据
strHtml_1 = str.ReadToEnd().ToString().Trim();
respStream.Close();
response.Close();
str.Close();
break;
}
}
}
}
catch (Exception e1)
{
MessageBox.Show(e1.ToString());
strHtml = "连接不上服务器!";
}
return strHtml;
}
c#爬虫之HttpWebRequest 中文乱码(utf-8,gbk,gb2312)解决方法
最新推荐文章于 2022-12-07 14:27:08 发布