c#爬虫之HttpWebRequest 中文乱码(utf-8,gbk,gb2312)解决方法

本文介绍了一个使用C#编写的函数,该函数通过HTTP请求获取指定URL的HTML内容,并根据响应字符集进行适当的编码转换。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

    private string GetUrlHtml_mail(string url)//
    {
        HttpWebRequest request = null;
        HttpWebResponse response = null;
       string strHtml = string.Empty;
        string strHtml_1 = string.Empty;
        string strHtml_1_1 = string.Empty;
        System.Net.ServicePointManager.DefaultConnectionLimit = 50;
        Stream respStream, respStream_1;
         StreamReader str,str_1;
        try
        {
            request = (HttpWebRequest)WebRequest.Create(url);
            request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0";//用户代理
            // request.AllowAutoRedirect = false;//设置不自动跳转
            request.Method = "GET";
            request.KeepAlive = false;
            request.Timeout = 10000;
         response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
            response.GetResponseHeader("Location");
         if (response.CharacterSet.ToLower().Trim() != "")
            {
                textBox5.Text = response.CharacterSet.ToLower();
                switch (response.CharacterSet.ToLower())
                {
                    case "gbk":
                       respStream = response.GetResponseStream();//读取数据流
                        str = new StreamReader(respStream, System.Text.Encoding.Default);//读取数据
                        strHtml_1 = str.ReadToEnd().ToString().Trim();
                         respStream.Close();
                        response.Close();
                        str.Close();
                        break;
                    case "gb2312":
                        respStream = response.GetResponseStream();//读取数据流
                        str = new StreamReader(respStream, System.Text.Encoding.Default);//读取数据
                        strHtml_1 = str.ReadToEnd().ToString().Trim();
                        respStream.Close();
                        response.Close();
                        str.Close();
                        break;
                    case "utf-8":
                      respStream = response.GetResponseStream();//读取数据流
                        str = new StreamReader(respStream, System.Text.Encoding.UTF8);//读取数据
                        strHtml_1 = str.ReadToEnd().ToString().Trim();
                        respStream.Close();
                        response.Close();
                        str.Close();
                        break;
                    case "iso-8859-1":
                        respStream_1= response.GetResponseStream();//读取数据流
                        str_1 = new StreamReader(respStream_1, System.Text.Encoding.Default);//读取数据
                        strHtml_1_1 = str_1.ReadToEnd().ToString().Trim();
                        strHtml_1 = strHtml_1_1.Replace("UTF", "utf");
                        if (strHtml_1.IndexOf("text/html; charset=utf-8") > 0 || strHtml_1.IndexOf("text/html;charset=utf-8") > 0 || strHtml_1.IndexOf("meta charset=\"utf-8\"") > 0) //<script的位置    
                        {
                           request = (HttpWebRequest)WebRequest.Create(url);
                            request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0";//用户代理
                            // request.AllowAutoRedirect = false;//设置不自动跳转
                            request.Method = "GET";
                            request.KeepAlive = false;
                            request.Timeout = 10000;
                            response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
                            response.GetResponseHeader("Location");
                            respStream = response.GetResponseStream();//读取数据流
                            str = new StreamReader(respStream, System.Text.Encoding.UTF8);//读取数据
                            strHtml_1 = str.ReadToEnd().ToString().Trim();
                           }
                        else
                        {
                          request = (HttpWebRequest)WebRequest.Create(url);
                            request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0";//用户代理
                            // request.AllowAutoRedirect = false;//设置不自动跳转
                            request.Method = "GET";
                            request.KeepAlive = false;
                            request.Timeout = 10000;
                            response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
                            response.GetResponseHeader("Location");
                             respStream = response.GetResponseStream();//读取数据流
                             str = new StreamReader(respStream, System.Text.Encoding.Default);//读取数据
                            strHtml_1 = str.ReadToEnd().ToString().Trim();
                     }
                        strHtml_1_1 = string.Empty;
                        respStream_1.Close();
                        respStream_1.Dispose();
                        str_1.Close();
                        respStream.Close();
                        respStream.Dispose();
                        response.Close();
                        str.Close();
                        break;
                     default:
                       respStream = response.GetResponseStream();//读取数据流
                        str = new StreamReader(respStream, System.Text.Encoding.UTF8);//读取数据
                        strHtml_1 = str.ReadToEnd().ToString().Trim();
                          respStream.Close();
                        response.Close();
                        str.Close();
                        break;
                }
               }
            }
        }
        catch (Exception e1)
        {
           MessageBox.Show(e1.ToString());
            strHtml = "连接不上服务器!";
        }
     return strHtml;
    }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值