抓取动态页面: public static string AubOrgCnGetUrltoHtml(string Url, Encoding encode) { try { //构造httpwebrequest对象,注意,这里要用Create而不是new HttpWebRequest wReq = (HttpWebRequest)WebRequest.Create(Url); //伪造浏览器数据,避免被防采集程序过滤 wReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50215; CrazyCoder.cn;www.aub.org.cn)"; //注意,为了更全面,可以加上如下一行,避开ASP常用的POST检查 wReq.Referer = "http://www.aub.org.cn/";//指明来源网页,http://www.aub.org.cn是来源网页,您可以将这里替换成您要采集页面的主页 HttpWebResponse wResp = wReq.GetResponse() as HttpWebResponse; // 获取输入流 System.IO.Stream respStream = wResp.GetResponseStream(); System.IO.StreamReader reader = new System.IO.StreamReader(respStream, encode); string content = reader.ReadToEnd(); reader.Close(); reader.Dispose(); return content; } catch (System.Exception ex) { } return ""; } 抓取静态页面: WebClient webclient = new WebClient(); byte[] pagedate = webclient.DownloadData(url); pageHtml = Encoding.Default.GetString(pagedate); textBox1.Text = pageHtml ;
C#抓取页面数据汇总
最新推荐文章于 2022-11-09 14:25:04 发布