菲度垂直搜索引擎 代码注释 3

本文介绍了一款使用Visual Basic编写的网页数据抓取器,该工具能够从指定URL抓取页面内容,并通过异步方式解析出图片链接、附加信息、价格等商品详情。此外,还实现了根据不同电商平台(如当当网、淘宝等)调整抓取策略的功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

‘给定一个地址 进行页面抓取

Imports System
Imports System.Net
Imports System.Threading
Imports System.Text
Imports System.IO

Class ClientGetAsync
    Public allDone As New ManualResetEvent(False)
    Const BUFFER_SIZE As Integer = 1024
    Private mC As String
    Private mText As String
    Private munode As UNode
    Private msno As String
    Private BasicPath As String
    Private encode As String
    Private Declare Function GetTickCount Lib "kernel32" () As Long
    Dim a As Long
    Private mmeta As String
    Private mpagetitle As String
    Private mytempid As Integer
    Private mpager As PagerGeter
    Private mpic As String
    Private maddition As String
    Private mprice As Single
    Private mboostvalue As Single

    Public Property Picture() As String
        Get
            Return mpic
        End Get
        Set(ByVal Value As String)
            mpic = Value
        End Set
    End Property

    Public Property Addtion() As String
        Get
            Return maddition
        End Get
        Set(ByVal Value As String)
            maddition = Value
        End Set
    End Property

    Public Property Price() As Single
        Get
            Return mprice
        End Get
        Set(ByVal Value As Single)
            mprice = Value
        End Set
    End Property

    Public Property Boost() As Single
        Get
            Return mboostvalue
        End Get
        Set(ByVal Value As Single)
            mboostvalue = Value
        End Set
    End Property

    Public Property CText() As String
        Get
            Return mText
        End Get
        Set(ByVal Value As String)
            mText = Value
        End Set
    End Property
    Public Property MyMetaValue() As String
        Get
            Return mmeta
        End Get
        Set(ByVal Value As String)
            mmeta = Value
        End Set
    End Property
    Public Property MyPageTile() As String
        Get
            Return mpagetitle
        End Get
        Set(ByVal Value As String)
            mpagetitle = Value
        End Set
    End Property
    Private Sub GetInfo(ByVal i As Integer)
        Try
            Select Case i
                Case 1
                    Dim temp As DangDang
                    temp = New DangDang("<td class=""pad"">", "</div>", "dangdang", munode.Rank)
                    mpic = temp.GetImage(Von)
                    maddition = temp.GetAddit(Von)
                    mboostvalue = temp.SetMyBoost(Von)
                    mprice = temp.MyPrice
                    mText = temp.GetInformation(Von)
                Case 2
                    Dim temp As Taobao
                    temp = New Taobao("<div id=""DetailInfo"">", "<div id=""DetailInfoContent"">", "taobao", munode.Rank)
                    mpic = temp.GetImage(Von)
                    maddition = temp.GetAddit(Von)
                    mboostvalue = temp.SetMyBoost(Von)
                    mprice = temp.MyPrice
                    mText = temp.GetInformation(Von)
                    Console.WriteLine("price {0}", mprice)
                Case 3
                    Dim temp As M2688
                    temp = New M2688("id=""Repeater1__ctl1_Label4""", "</span>", "taobao", munode.Rank)
                    mpic = temp.GetImage(Von)
                    maddition = temp.GetAddit(Von)
                    mboostvalue = temp.SetMyBoost(Von)
                    mprice = temp.MyPrice
                    mText = temp.GetInformation(Von)
                Case 4
                    Dim temp As M18
                    temp = New M18(munode.Rank)
                    mpic = temp.GetImage(Von)
                    maddition = temp.GetAddit(Von)
                    mboostvalue = temp.SetMyBoost(Von)
                    mprice = temp.MyPrice
                    mText = temp.GetInformation(Von)
                Case 5
                    Dim temp As ebay
                    temp = New ebay(munode.Rank)
                    mpic = temp.GetImage(Von)
                    maddition = temp.GetAddit(Von)
                    mboostvalue = temp.SetMyBoost(Von)
                    mprice = temp.MyPrice
                    mText = temp.GetInformation(Von)
            End Select
            mmeta = Meta(Von)
            mpagetitle = PageTitle(Von)
        Catch ex As Exception
            Console.Write(ex.ToString)
        End Try

    End Sub

    Public Property Von() As String
        Get
            Return mC
        End Get
        Set(ByVal Value As String)
            mC = Value
        End Set
    End Property
    Sub New(ByRef u As UNode, ByVal flag As String, ByVal Tempid As Integer, Optional ByVal mcide As String = "gb2312")
        BasicPath = u.Address
        msno = flag
        encode = mcide
        mytempid = Tempid
        munode = u
    End Sub
    Sub GMain(ByVal UriPath As Object)
        Try

            a = GetTickCount
            BasicPath = UriPath
            MachineName = BasicPath.Replace("http://", "")

            Dim HttpSite As Uri = New Uri(UriPath)
            Dim wreq As HttpWebRequest = _
               CType(WebRequest.Create(HttpSite), HttpWebRequest)
            Dim rs As RequestState = New RequestState
            rs.Request = wreq

            Dim r As IAsyncResult = _
               CType(wreq.BeginGetResponse( _
               New AsyncCallback(AddressOf RespCallback), rs), IAsyncResult)
            allDone.WaitOne()
            GetInfo(mytempid)
            allDone.Set()
            Console.WriteLine("cmst--{0}", UriPath)

        Catch ex As Exception
            allDone.Set()
            Console.WriteLine(ex.ToString)
            Exit Sub
        End Try

    End Sub


    Sub RespCallback(ByVal ar As IAsyncResult)
        Try
            Dim rs As RequestState = CType(ar.AsyncState, RequestState)


            Dim req As HttpWebRequest = rs.Request


            Dim resp As HttpWebResponse = _
                CType(req.EndGetResponse(ar), HttpWebResponse)


            Dim ResponseStream As Stream = resp.GetResponseStream()

            rs.ResponseStream = ResponseStream


            Try
                Dim iarRead As IAsyncResult = _
                               ResponseStream.BeginRead(rs.BufferRead, 0, BUFFER_SIZE, _
                               New AsyncCallback(AddressOf ReadCallBack), rs)
            Catch ex As Exception

            End Try

        Catch ex As Exception
            allDone.Set()
            Console.WriteLine(String.Format("Path{0};Sn.No{1}", Me.BasicPath, msno))
            Exit Sub
        End Try


    End Sub

    Sub ReadCallBack(ByVal asyncResult As IAsyncResult)
        Try
            Dim rs As RequestState = CType(asyncResult.AsyncState, RequestState)


            Dim responseStream As Stream = rs.ResponseStream


            Dim read As Integer = responseStream.EndRead(asyncResult)
            If (read > 0) And ((GetTickCount - a) / 10000000000) < 30 Then

                Dim charBuffer(1024) As Char


                Dim len As Integer = _
                  rs.StreamDecode.GetChars(rs.BufferRead, 0, read, charBuffer, 0)
                Dim str As String = New String(charBuffer, 0, len)


                rs.RequestData.Append( _
                   Encoding.GetEncoding(encode).GetString(rs.BufferRead, 0, read))

             
                Dim ar As IAsyncResult = _
                   responseStream.BeginRead(rs.BufferRead, 0, BUFFER_SIZE, _
                   New AsyncCallback(AddressOf ReadCallBack), rs)
            Else
                If rs.RequestData.Length > 1 Then
                    Von = rs.RequestData.ToString()

                End If


                responseStream.Close()
                rs.disposeres()

                allDone.Set()
            End If

            Return
        Catch ex As Exception
            Console.WriteLine("ReadCallBack")
            allDone.Set()
        End Try


    End Sub
End Class

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值