asp数据采集数据采集程序 'On Error Resume NextServer.Scripttimeout=300'---------------------------------------------------------------------'采集数据Function getHTTPData(url) dim http set http=Server.createobject("Msxml2.XMLHTTP") if instr(url,"http://")=0 then url="http://"&url Http.open "GET",url,false Http.send() if Http.Status<>200 then exit function getHTTPData=bytesToBSTR(Http.responseBody,"UTF-8") set http=nothing if err.number<>0 then err.Clear sCharset="" End function'--------------------------------------------------------------------- Function BytesToBstr(body,Cset) dim objstream set objstream = Server.CreateObject("adodb.stream") objstream.Type = 1 objstream.Mode =3 objstream.Open objstream.Write body objstream.Position = 0 objstream.Type = 2 objstream.Charset = Cset BytesToBstr = objstream.ReadText objstream.Close set objstream = nothingEnd Function'--------------------------------------------------------------------- '服务器登录Function login(url) dim http set http=Server.createobject("Msxml2.XMLHTTP") if instr(url,"http://")=0 then url="http://"&url Http.open "GET",url,false Http.send() if Http.Status<>200 then exit function set http=nothing if err.number<>0 then err.ClearEnd function'---------------------------------------------------------------------'正则替换Function ReplaceText(fString,patrn, replStr) Set regEx = New RegExp regEx.Pattern = patrn regEx.IgnoreCase = True regEx.Global = True ReplaceText = regEx.Replace(fString, replStr)End Function'---------------------------------------------------------------------'去标签 包括内容Function ReplaceTag(str, tag) Set regEx = New RegExp regEx.Pattern = "<"&tag&"[^>]*?>.*?<\/"&tag&">" regEx.IgnoreCase = True regEx.Global = True ReplaceTag=regEx.Replace(str, "")End Function'--------------------------------------------------------------------- '去标签 不包括内容Function ReplaceTab(str, tag) Set regEx = New RegExp regEx.Pattern = "<\/?"&tag&"[^>]*>" regEx.IgnoreCase = True regEx.Global = True ReplaceTab=regEx.Replace(str, "")End Function'--------------------------------------------------------------------- '去标签属性 保留标签Function ReplaceinnerTag(str, tag) Set regEx = New RegExp regEx.Pattern = "(<\/?"&tag&")[^>]*>" regEx.IgnoreCase = True regEx.Global = True ReplaceinnerTag=regEx.Replace(str, "$1>")End Function'--------------------------------------------------------------------- '按正则取数据Function getText(fString, patrn,n) dim Matches, tStr tStr = fString Set re = New Regexp re.IgnoreCase = True re.Global = True re.Pattern = patrn set Matches = re.Execute(tStr) set re = nothing rStr = "" For Each Match in Matches rStr = Match.SubMatches(n) exit for Next getText = rStrEnd Function'---------------------------------------------------------------------'数据过滤Function Encode_text(str) If Isnull(str) Then Encode_text = "" Exit Function End If str = ReplaceText(str, "<\/?br[^>]*>" , vbCrlf ) str = ReplaceText(str, "<\/?p[^>]*>" , vbCrlf ) str = ReplaceTab(str, "[a-zA-Z]") str = ReplaceText(str, "\n\s*\r" ,Chr(10)&Chr(13)) str = Replace(str, "&" , "&" ) str = Replace(str, ";" , ";" ) str = Replace(str, "&" , "&" ) str = Replace(str,Chr(34), """ ) str = Replace(str, "'" , "'" ) str = Replace(str, "<" , "<" ) str = Replace(str, ">" , ">" ) str = Replace(str, "(" , "(" ) str = Replace(str, ")" , ")" ) str = Replace(str, "*" , "*" ) str = Replace(str, "%" , "%" ) str = Replace(str,vbCrlf, "<br/>" ) Encode_text = strEnd Function'---------------------------------------------------------------------'通过Matches取数据dim Matchessub setMatches(str,sRe) Set re = New Regexp re.IgnoreCase = True re.Global = True re.Pattern = sRe set Matches = re.Execute(str) set re=nothing end sub'--------------------------------------------------------------------- 例子 '例子call setMatches(textcontent, re)For Each Match in Matches response.write Match.valueNext