HTML只能有一个table,发一个解析HTML的代码.目前只能解析table与div....

publicclassSimpleHtmlParser

{//解析函数//解析字符串///解析后的控件列表///返回控件树publicstaticElement ParseHtml(strings,outListelements)

{

elements=newList();

elements.Clear();

Stackes=newStack();stringpattern=@"(?=(?table.*?>)|(?div/?.*?>))";

RegexOptions options=RegexOptions.None|RegexOptions.IgnoreCase|RegexOptions.Singleline;

Regex regex=newRegex(pattern, options);

MatchCollection matches=regex.Matches(s);varelement=newElement();varlastElement=element;foreach(Match matchinmatches)

{varwordindex=0;varwordlength=0;varword="";for(inti=0; i

{vart=match.Groups[i];if(t.Length>0)

{

wordindex=t.Index;

wordlength=t.Length;

word=t.Value;break;

}

}if(wordlength<=0)continue;if(word=="

{//新标签Element ee;if(isDiv)

{

ee=newDivElement();

}elseif(isTable)

{

ee=newTableElement();

}else{

ee=newElement();

}

ee.StartTagIndex=wordindex;

ee.StartTagLength=wordlength;

ee.BegTag=word;//设定父级ee.Parent=lastElement;

lastElement=ee;

ee.Parent.Children.Add(ee);//进栈es.Push(ee);

}else{//闭合标签vart=es.Pop();

t.EndTag=word;

t.EndIndex=wordindex;

t.EndTagLength=wordlength;

lastElement=t.Parent;

t.OuterHtml=s.Substring(t.StartTagIndex, (t.EndIndex-t.StartTagIndex)+t.EndTagLength);

t.InnerHtml=s.Substring(t.StartTagIndex+t.StartTagLength, (t.EndIndex-t.StartTagIndex-t.StartTagLength));

elements.Add(t);

}

}returnelement;

}//去除代码中无用的标签publicstaticstringReplaceFontSpan(strings)

{

Regex r=newRegex("

.*?");

s=r.Replace(s,"");

r=newRegex("?font.*?>");

s=r.Replace(s,"");

r=newRegex("?span.*?>");

s=r.Replace(s,"");

r=newRegex("?a.*?>");

s=r.Replace(s,"");returns;

}//下载网页源文件publicstaticstringDownLoadHtml(stringurl)

{try{

HttpWebRequest r=(HttpWebRequest)WebRequest.Create(url);

r.Method="get";

HttpWebResponse rep=(HttpWebResponse)r.GetResponse();

Stream receiveStream=rep.GetResponseStream();

StreamReader readStream=newStreamReader(receiveStream, System.Text.Encoding.Default);varresult=readStream.ReadToEnd();returnresult.ToString();

}catch{return"";

}

}

}publicclassElement : StringElement

{publicintStartTagIndex {get;set; }publicintStartTagLength {get;set;}publicintEndIndex {get;set; }publicintEndTagLength {get;set; }publicstringBegTag {get;set; }publicstringEndTag {get;set;}publicListChildren=newList();publicElement Parent

{get;set;

}

}publicclassDivElement : Element

{

}publicclassTableElement : Element

{

}publicclassTrElement : Element

{

}publicclassStringElement

{publicstringOuterHtml

{get;set;

}publicstringInnerHtml

{get;set;

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值