'过滤HTML各种标签样式脚本
'来源:http://jorkin.reallydo.com/article.asp?id=521
'需要RegReplace函数: http://jorkin.reallydo.com/article.asp?id=345
Function HTMLFilter(sHTML, sFilters)
If sHTML & "" = "" Then Exit Function
If sFilters & "" = "" Then sFilters = "SCRIPT,OBJECT"
Dim aFilters
aFilters = Split(UCase(sFilters), ",")
For i = 0 To UBound(aFilters)
Select Case UCase(Trim(aFilters(i)))
Case "JORKIN"
Do While InStr(sHTML, " ") >0
sHTML = Replace(sHTML, " ", " ")
Loop
Case "SCRIPT"
'// 去除脚本及 onload 等
sHTML = RegReplace(sHTML, "
sHTML = RegReplace(sHTML, "\s[on].+?=\s+?([\""|\'])(.*?)\1", "")
sHTML = RegReplace(sHTML, "(JAVASCRIPT|JSCRIPT|VBSCRIPT|VBS):", "$1:")
Case "FIXIMG"
sHTML = RegReplace(sHTML, "]*).*?>", "")
sHTML = RegReplace(sHTML, "", "")
Case "TABLE"
'// 去除表格
sHTML = RegReplace(sHTML, "?TABLE[^>]*>", "") sHTML = RegReplace(sHTML, "?TBODY[^>]*>", "") sHTML = RegReplace(sHTML, "<(/?)TR[^>]*>", "<$1p>") sHTML = RegReplace(sHTML, "?TH[^>]*>", " ") sHTML = RegReplace(sHTML, "?TD[^>]*>", " ") Case "CLASS" '// 去除样式类class="" sHTML = RegReplace(sHTML, "(<[^>]+) CLASS=[^ |^>]+([^>]*>)", "$1 $2") sHTML = RegReplace(sHTML, "\sCLASS\s*?=\s*?([\""|\'])(.*?)\1", "") Case "STYLE" '// 去除样式style="" sHTML = RegReplace(sHTML, "(<[^>]+) STYLE=[^ |^>]+([^>]*>)", "$1 $2") sHTML = RegReplace(sHTML, "\sSTYLE\s*?=\s*?([\""|\'])(.*?)\1", "") Case "XML" '// 去除XML<?xml> sHTML = RegReplace(sHTML, "<?XML[^>]*>", "") Case "NAMESPACE" '// 去除命名空间 sHTML = RegReplace(sHTML, "<\/?[a-z]+:[^>]*>", "") Case "FONT" '// 去除字体 sHTML = RegReplace(sHTML, "?FONT[^>]*>", "") Case "MARQUEE" '// 去除字幕 sHTML = RegReplace(sHTML, "?MARQUEE[^>]*>", "") Case "OBJECT" '// 去除对象 sHTML = RegReplace(sHTML, "?OBJECT[^>]*>", "") sHTML = RegReplace(sHTML, "?PARAM[^>]*>", "") sHTML = RegReplace(sHTML, "?EMBED[^>]*>", "") Case "COMMENT" '// 去除HTML注释, 会处理 sHTML = RegReplace(sHTML, "", "") Case Else '// 去除其它标签 sHTML = RegReplace(sHTML, "?" & aFilters(i) & "[^>]*?>", "") End Select Next HTMLFilter = sHTML End Function '功能:使用正则表示式对字符串进行替换 '来源:http://jorkin.reallydo.com/article.asp?id=345 Function RegReplace(Str, PatternStr, RepStr) Dim NewStr, regEx NewStr = Str If IsNull(NewStr) Then RegReplace = "" Exit Function End If Set regEx = New RegExp regEx.IgnoreCase = True regEx.Global = True regEx.Pattern = PatternStr NewStr = regEx.Replace(NewStr, RepStr) RegReplace = NewStr End Function %> |
---|