staticbool ValidataLink(int i) ...{ if (i >=9189&& i <=9192) returnfalse; if (i >=9228&& i <=9415) returnfalse; if (i >=9597&& i <=9618) returnfalse; if (i >=9669&& i <=9730) returnfalse; if (i >=9762&& i <=10053) returnfalse; if (i >=10057&& i <=10118) returnfalse; returntrue; }
using System; using System.Collections.Generic; using System.Text; namespace ConsoleApplication2 ...{ class Program ...{ staticbool ValidataLink(int i) ...{ if (i >=9189&& i <=9192) returnfalse; if (i >=9228&& i <=9415) returnfalse; if (i >=9597&& i <=9618) returnfalse; if (i >=9669&& i <=9730) returnfalse; if (i >=9762&& i <=10053) returnfalse; if (i >=10057&& i <=10118) returnfalse; returntrue; } staticvoid Main(string[] args) ...{ string content =@"<html><head><meta http-equiv=""Content-Type"" content=""text/html; charset=UTF-8"" /><body>"; int start =9010; int end =10119; //int end = 9300; for (int i = start; i <= end; i++) ...{ if (!ValidataLink(i)) continue; string http = String.Format("http://www.kekenet.com/kouyu/{0}.shtml", i); string currentContent = GetContentByHTTP(http); content += currentContent; Console.WriteLine(string.Format("{0}:{1}", i, currentContent)); } content +=@"</body></html>"; using (System.IO.StreamWriter sw = System.IO.File.CreateText(@"d: est.html")) ...{ sw.Write(content); } } staticstring GetContentByHTTP(string url) ...{ string sourceHTML = GetHTMLfromHTTP(url); string returnHTML = FiterHTML(sourceHTML); return returnHTML; } staticstring GetHTMLfromHTTP(string url) ...{ try ...{ string requestURL = url; System.Net.HttpWebRequest httpRequest = System.Net.HttpWebRequest.Create(requestURL) as System.Net.HttpWebRequest; using (System.Net.HttpWebResponse httpResponse = httpRequest.GetResponse() as System.Net.HttpWebResponse) ...{ if (httpResponse.StatusCode == System.Net.HttpStatusCode.OK) ...{ //string encoding = httpResponse.CharacterSet; string encoding ="GB2312"; using (System.IO.Stream receiveStream = httpResponse.GetResponseStream()) ...{ Encoding encode = System.Text.Encoding.GetEncoding(encoding); using (System.IO.StreamReader readStream =new System.IO.StreamReader(receiveStream, encode)) ...{ string sourceHTML = readStream.ReadToEnd(); return sourceHTML; } } } else ...{ return""; } } } catch (System.Exception ee) ...{ return""; } } staticstring FiterHTML(string source) ...{ try ...{ string sourceHTML = source; string returnHTML =""; string regularExpressions =@"(?<=</script></SPAN>)[Ww]*?(?=</SPAN>)"; System.Text.RegularExpressions.Regex r =new System.Text.RegularExpressions.Regex(regularExpressions, System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Match m = r.Match(sourceHTML); while (m.Success) ...{ System.Text.RegularExpressions.CaptureCollection cc = m.Captures; foreach (System.Text.RegularExpressions.Capture c in cc) ...{ returnHTML +=string.Format(@"<P>{0}</P><BR/>", c.Value); } m = m.NextMatch(); } return returnHTML; } catch (System.Exception ee) ...{ return""; } } } }