迅雷资讯弹出网页采用UTF-8编码,需要编制自定义函数对XMLHTTP对象获得的源代码进行转换:
- Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
- Private Const CP_UTF8 = 65001
- Function Utf8ToUnicode(ByRef Utf() As Byte) As String
- Dim lRet As Long
- Dim lLength As Long
- Dim lBufferSize As Long
- lLength = UBound(Utf) - LBound(Utf) + 1
- If lLength <= 0 Then Exit Function
- lBufferSize = lLength * 2
- Utf8ToUnicode = String$(lBufferSize, Chr(0))
- lRet = MultiByteToWideChar(CP_UTF8, 0, VarPtr(Utf(0)), lLength, StrPtr(Utf8ToUnicode), lBufferSize)
- If lRet <> 0 Then
- Utf8ToUnicode = Left(Utf8ToUnicode, lRet)
- End If
- End Function
- Function SourcecodeofXunleizixun(ByVal menuindex As Long) As String
- Dim b() As Byte, indexurl(5) As String
- If Not menuindex Like "[0-5]" Then Exit Function
- indexurl(0) = "http://recommend.xunlei.com/desknews_v2_game.html" '0---游戏
- indexurl(1) = "http://recommend.xunlei.com/desknews_v2_ent.html" '1---娱乐
- indexurl(2) = "http://biz5c.sandai.net/desktopnews/iframe/hot_14.htm" '2---婚恋
- indexurl(3) = "http://www.eachnet.com/landing/xunlei08_3.html" '3---网购
- indexurl(4) = "http://biz5c.sandai.net/desktopnews/iframe/hot_2.htm" '4---财经
- indexurl(5) = "http://biz5c.sandai.net/desktopnews/iframe/hot_3.htm" '5---IT
- With CreateObject("Msxml2.XMLHTTP")
- .Open "GET", indexurl(menuindex), False
- .Send
- b = .ResponseBody
- End With
- SourcecodeofXunleizixun = Utf8ToUnicode(b)
- End Function
- Sub Getxunlei()
- Debug.Print SourcecodeofXunleizixun(1) '娱乐资讯
- End Sub
本文介绍了一种使用VBScript自定义函数从迅雷资讯页面抓取UTF-8编码网页内容的方法。通过创建XMLHTTP对象并发送GET请求来获取指定URL的内容,并利用MultiByteToWideChar函数将获取到的UTF-8编码的网页源码转换为Unicode编码。
1507

被折叠的 条评论
为什么被折叠?



