1、ASP版本
Function RemoveHTML(strHTML)
ON ERROR RESUME NEXT
Dim objRegExp, strOutput
strHTML = replace(strHTML,"; ",";")
strHTML = replace(strHTML,chr(13),";")
strHTML = replace(strHTML,chr(10),";")
strHTML = replace(strHTML,chr(32),";")
Set objRegExp = New Regexp
objRegExp.IgnoreCase = True ’忽略大小写
objRegExp.Global = True ’设置为全文搜索
objRegExp.Pattern = ""
strOutput = objRegExp.Replace(strHTML, "")
objRegExp.Pattern = "
]*?>.*?"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern = "
[/s/S]+?
"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern = "
]*>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern = "<(/w[^>]*) class=([^ |>]*)([^>]*)"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="<(/w[^>]*) style=’([^’]*)’([^>]*)"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="<(/w[^>]*) lang=([^ |>]*)([^>]*)"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="<
//?/?xml[^>]*>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="
]*>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="<(.[^>]*)>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="([/r/n])[/s]+"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="-->"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="
", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"
本文介绍了一种去除HTML标签的方法,并提供了ASP和ASP.NET两种版本的实现代码。通过正则表达式匹配与替换,有效清除了HTML源码中的标签与特殊字符。
1265

被折叠的 条评论
为什么被折叠?



