1.首先要引用类库
using
System.Text.RegularExpressions;
2.方法
private string StripHT(string
strHtml)
{
Regex regex = new Regex("<.+?>",
RegexOptions.IgnoreCase);
string strOutput = regex.Replace(strHtml,
"");//替换掉"<"和">"之间的内容
strOutput = strOutput.Replace("<", "");
strOutput = strOutput.Replace(">", "");
strOutput = strOutput.Replace(" ", "");
return strOutput;
}
public static string NoHTML(string
Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring,
@"<script[^>]*?>.*?</script>",
"", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring,
@"<(.[^>]*)>", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(quot|#34);", "\"",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(amp|#38);", "&",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(lt|#60);", "<",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(gt|#62);", ">",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(nbsp|#160);", " ",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(iexcl|#161);", "\xa1",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(cent|#162);", "\xa2",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(pound|#163);", "\xa3",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(copy|#169);", "\xa9",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
Htmlstring =
HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}