publicclassSimpleHtmlParser
{//解析函数//解析字符串///解析后的控件列表///返回控件树publicstaticElement ParseHtml(strings,outListelements)
{
elements=newList();
elements.Clear();
Stackes=newStack();stringpattern=@"(?=(?table.*?>)|(?div/?.*?>))";
RegexOptions options=RegexOptions.None|RegexOptions.IgnoreCase|RegexOptions.Singleline;
Regex regex=newRegex(pattern, options);
MatchCollection matches=regex.Matches(s);varelement=newElement();varlastElement=element;foreach(Match matchinmatches)
{varwordindex=0;varwordlength=0;varword="";for(inti=0; i
{vart=match.Groups[i];if(t.Length>0)
{
wordindex=t.Index;
wordlength=t.Length;
word=t.Value;break;
}
}if(wordlength<=0)continue;if(word=="
{//新标签Element ee;if(isDiv)
{
ee=newDivElement();
}elseif(isTable)
{
ee=newTableElement();
}else{
ee=newElement();
}
ee.StartTagIndex=wordindex;
ee.StartTagLength=wordlength;
ee.BegTag=word;//设定父级ee.Parent=lastElement;
lastElement=ee;
ee.Parent.Children.Add(ee);//进栈es.Push(ee);
}else{//闭合标签vart=es.Pop();
t.EndTag=word;
t.EndIndex=wordindex;
t.EndTagLength=wordlength;
lastElement=t.Parent;
t.OuterHtml=s.Substring(t.StartTagIndex, (t.EndIndex-t.StartTagIndex)+t.EndTagLength);
t.InnerHtml=s.Substring(t.StartTagIndex+t.StartTagLength, (t.EndIndex-t.StartTagIndex-t.StartTagLength));
elements.Add(t);
}
}returnelement;
}//去除代码中无用的标签publicstaticstringReplaceFontSpan(strings)
{
Regex r=newRegex("
.*?");s=r.Replace(s,"");
r=newRegex("?font.*?>");
s=r.Replace(s,"");
r=newRegex("?span.*?>");
s=r.Replace(s,"");
r=newRegex("?a.*?>");
s=r.Replace(s,"");returns;
}//下载网页源文件publicstaticstringDownLoadHtml(stringurl)
{try{
HttpWebRequest r=(HttpWebRequest)WebRequest.Create(url);
r.Method="get";
HttpWebResponse rep=(HttpWebResponse)r.GetResponse();
Stream receiveStream=rep.GetResponseStream();
StreamReader readStream=newStreamReader(receiveStream, System.Text.Encoding.Default);varresult=readStream.ReadToEnd();returnresult.ToString();
}catch{return"";
}
}
}publicclassElement : StringElement
{publicintStartTagIndex {get;set; }publicintStartTagLength {get;set;}publicintEndIndex {get;set; }publicintEndTagLength {get;set; }publicstringBegTag {get;set; }publicstringEndTag {get;set;}publicListChildren=newList();publicElement Parent
{get;set;
}
}publicclassDivElement : Element
{
}publicclassTableElement : Element
{
}publicclassTrElement : Element
{
}publicclassStringElement
{publicstringOuterHtml
{get;set;
}publicstringInnerHtml
{get;set;
}
}