从网络抓取数据的方法我知道的就一下几种。
1. 首先我用WebClient去请求此地址,核心代码如下:
1 WebClient wc = new WebClient(); 2 wc.Encoding = Encoding.UTF8; 3 string str = wc.DownloadString("http://typhoon.weather.gov.cn/Typhoon/data/20130041.xml");
2.换个方法,这次我选择使用Http请求去获取,核心代码如下:
/// <summary> /// 传入get请求地址,和页面编码格式,返回该页面html源文件,返回wrong则出现异常。 /// </summary> /// <param name="tUrl">传入url</param> /// <param name="encodeType">传入 页面的编码格式</param> /// <returns></returns> internal static string Get_HttpAll(string tUrl, string encodeType) { string strResult; try { HttpWebRequest hwr = CreateHttpWebRequest(tUrl); hwr.Timeout = 19990; CookieContainer cc = new CookieContainer(); hwr.CookieContainer = cc; HttpWebResponse hwrs = (HttpWebResponse)hwr.GetResponse(); Stream myStream = hwrs.GetResponseStream(); Encoding encoding = Encoding.GetEncoding(encodeType); StreamReader sr = new StreamReader(myStream, encoding); strResult = sr.ReadToEnd(); hwrs.Close(); } catch { strResult = "wrong"; } return strResult; }
string xmlStr = Common.Get_HttpAll("http://typhoon.weather.gov.cn/Typhoon/data/20130041.xml", "utf-8");
3.选择使用了Webbrowser类去获取数据,核心代码如下(利用Webbrowser加载文档完毕后的状态进行判断然后获取文档):
private void Delay(int Millisecond) { DateTime current = DateTime.Now; while (current.AddMilliseconds(Millisecond) > DateTime.Now) { Application.DoEvents(); } return; } WebBrowser wb = new WebBrowser(); string xmlStr = string.Empty; wb.Navigate("http://typhoon.weather.gov.cn/Typhoon/" + dataUrl); while (true) { Delay(50);//延迟50毫秒 if (wb.ReadyState == WebBrowserReadyState.Complete)//判断文档是否加载完毕 { if (!wb.IsBusy) { xmlStr = wb.Document.Body.InnerText; break; } } continue;