HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create("http://www.sina.com.cn");
myReq.Method = "GET";
myReq.ContentType = "application/x-www-form-urlencoded";
HttpWebResponse myRes = (HttpWebResponse)myReq.GetResponse();
Stream resStream = myRes.GetResponseStream();
System.IO.StreamReader streamReader = new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
string mystring = streamReader.ReadToEnd();
Regex Myregex = new Regex(@"<[^>]*>");
mystring = Myregex.Replace(mystring,"");
myRes.Close();
resStream.Close();
Response.Write(mystring);
myReq.Method = "GET";
myReq.ContentType = "application/x-www-form-urlencoded";
HttpWebResponse myRes = (HttpWebResponse)myReq.GetResponse();
Stream resStream = myRes.GetResponseStream();
System.IO.StreamReader streamReader = new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
string mystring = streamReader.ReadToEnd();
Regex Myregex = new Regex(@"<[^>]*>");
mystring = Myregex.Replace(mystring,"");
myRes.Close();
resStream.Close();
Response.Write(mystring);
本示例使用 C# 通过 HttpWebRequest 和 HttpWebResponse 获取新浪网站的内容,并利用正则表达式去除 HTML 标签,最终输出纯文本信息。

被折叠的 条评论
为什么被折叠?



