------------------get 6642 Content--------------------------------------------
Form1.frm
'UptadeRunlight(Html Marquee Element) in vb6
'By wgscd 2008/11/7
'HTTP://www.blog.youkuaiyun.com/wgsnet
'QQ:153964481
VERSION 5.00
Object = "{EAB22AC0-30C1-11CF-A7EB-0000C05BAE0B}#1.1#0"; "shdocvw.dll"
Object = "{831FDD16-0C5C-11D2-A9FC-0000F8754DA1}#2.0#0"; "mscomctl.ocx"
Begin VB.Form Form1
BorderStyle = 1 'Fixed Single
Caption = "Form1"
ClientHeight = 3330
ClientLeft = 45
ClientTop = 330
ClientWidth = 8805
LinkTopic = "Form1"
MaxButton = 0 'False
MinButton = 0 'False
ScaleHeight = 3330
ScaleWidth = 8805
StartUpPosition = 3 '窗口缺省
Begin VB.TextBox TextInitID
Height = 375
Left = 3840
TabIndex = 6
Text = "10000"
Top = 240
Width = 735
End
Begin VB.TextBox TextMaxid
Height = 375
Left = 5880
TabIndex = 5
Text = "16808"
Top = 240
Width = 1215
End
Begin MSComctlLib.ProgressBar ProgressBar1
Height = 255
Left = 240
TabIndex = 3
Top = 3000
Width = 8415
_ExtentX = 14843
_ExtentY = 450
_Version = 393216
Appearance = 1
End
Begin VB.TextBox Text1
Height = 1455
Left = 240
MultiLine = -1 'True
ScrollBars = 3 'Both
TabIndex = 2
Top = 1080
Width = 7695
End
Begin VB.CommandButton Command1
Caption = "Command1"
Height = 495
Left = 7200
TabIndex = 1
Top = 240
Width = 1215
End
Begin SHDocVwCtl.WebBrowser WebBrowser1
Height = 255
Left = 240
TabIndex = 0
Top = 1680
Width = 7695
ExtentX = 13573
ExtentY = 450
ViewMode = 0
Offline = 0
Silent = 0
RegisterAsBrowser= 0
RegisterAsDropTarget= 1
AutoArrange = 0 'False
NoClientEdge = 0 'False
AlignLeft = 0 'False
NoWebView = 0 'False
HideFileNames = 0 'False
SingleClick = 0 'False
SingleSelection = 0 'False
NoFolders = 0 'False
Transparent = 0 'False
ViewID = "{0057D0E0-3573-11CF-AE69-08002B2E1262}"
Location = "http://www.soso.com/"
End
Begin VB.Label Label3
Caption = "结束ID:"
Height = 375
Left = 5040
TabIndex = 8
Top = 240
Width = 855
End
Begin VB.Label Label2
Caption = "开始ID"
Height = 375
Left = 2760
TabIndex = 7
Top = 240
Width = 975
End
Begin VB.Label Label1
Height = 375
Left = 240
TabIndex = 4
Top = 2520
Width = 2415
End
End
Attribute VB_Name = "Form1"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Dim strResult As String
Dim maxid As Integer
Sub Form_Load()
maxid = GetHttpPage("http://www.haosemm.com/Thief/AccessTopicMaxid.asp")
TextMaxid.Text = maxid
End Sub
'http://www.haosemm.com/Thief/AccessTopic.asp?id=16709
'Thief/AccessTopicMaxID.asp
Private Sub Command1_Click()
'GetHttpPage ("http://blog.qq.com/2008/pic08/aymv/")
Dim maxNum As Integer
maxNum = TextMaxid.Text
Dim i As Single
ProgressBar1.Max = maxNum
For i = TextInitID.Text To maxNum
Dim str As String
Dim tempstr
str = GetHttpPage("http://www.haosemm.com/Thief/AccessTopic.asp?id=" & Format(i, "00000"))
If str <> "Null" Then
tempstr = Split(str, "<|>")
strResult = strResult & "[-title-]" & tempstr(3) & "[-content-]" & tempstr(4) & "[-itemspliter-]" & vbNewLine
Dim rstr As String
rstr = "<script language='JavaScript' src='http://www.sms888.net/web/script/photojet.js?uid=5494&t=1' type='text/javascript' id='SMS888_Pic' charset='gb2312'></script>"
strResult = Replace(strResult, rstr, "<a target=""_blank"" href=""http://blog.sina.com.cn/s/blog_56fda1650100ao5p.html"">雪白少妇无法控制自己的强烈欲望</a>")
Else
strResult = strResult & str
End If
ProgressBar1.Value = i
ProgressBar1.Refresh
Me.Label1.Caption = i
Me.Label1.Refresh
Next
'Me.Text1.Text = str
SaveToFile "C:/1.txt", strResult
'
End Sub
Function GetHttpPage(HttpUrl)
If IsNull(HttpUrl) = True Or HttpUrl = "$False$" Then
GetHttpPage = "$False$"
Exit Function
End If
Dim http
Set http = CreateObject("MicroSoft.XMLHTTP")
http.Open "GET", HttpUrl, False
http.Send
If http.ReadyState <> 4 Then
Set http = Nothing
GetHttpPage = "$False$"
Exit Function
End If
If http.Status = 404 Then '找不到页面
'MsgBox "404"
GetHttpPage = "$False$"
Exit Function
End If
GetHttpPage = BytesToBstr(http.responseBody, "GB2312")
Set http = Nothing
If Err.Number <> 0 Then
Err.Clear
End If
End Function
Function BytesToBstr(Body, Cset)
Dim Objstream
Set Objstream = CreateObject("adodb.stream")
Objstream.Type = 1
Objstream.Mode = 3
Objstream.Open
Objstream.Write Body
Objstream.position = 0
Objstream.Type = 2
Objstream.Charset = Cset
BytesToBstr = Objstream.ReadText
Objstream.Close
Set Objstream = Nothing
End Function
Function GetBody(ConStr, StartStr, OverStr, IncluL, IncluR)
If ConStr = "$False$" Or ConStr = "" Or IsNull(ConStr) = True Or StartStr = "" Or IsNull(StartStr) = True Or OverStr = "" Or IsNull(OverStr) = True Then
GetBody = "$False$"
Exit Function
End If
Dim ConStrTemp
Dim Start, Over
ConStrTemp = LCase(ConStr)
StartStr = LCase(StartStr)
OverStr = LCase(OverStr)
Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
If Start <= 0 Then
GetBody = "$False$"
Exit Function
Else
If IncluL = False Then
Start = Start + LenB(StartStr)
End If
End If
Over = InStrB(Start, ConStrTemp, OverStr, vbBinaryCompare)
If Over <= 0 Or Over <= Start Then
GetBody = "$False$"
Exit Function
Else
If IncluR = True Then
Over = Over + LenB(OverStr)
End If
End If
GetBody = MidB(ConStr, Start, Over - Start)
End Function
Function ShowErr(ErrMsg)
response.Write "<script>alert('" & ErrMsg & "');history.back();</script>"
response.End
End Function
'-------------------------------------------------
'过程名: RegUrl(TheStr)
'说明 : 提取需要的HTML内容 'By wgcd
'返回类型: string
'----------------------------------------------------
Function RegUrl(TheStr)
Dim matchStr, strTemp
Set regEx = New RegExp
regEx.IgnoreCase = True
regEx.Global = True '****这一句加上是全部替换,如果不加,只替换第一个
regEx.Pattern = "(<li><img style=""cursor:pointer.*/></li>)"
' 把所有匹配的HTML代码放入Matches集合
'dim str1
'str1="<li><img style=""cursor:pointer; width:120px; height:90px"" alt="""
Set Matches = regEx.Execute(TheStr)
' 显示所有匹配的HTML代码
For Each Item In Matches
List1.AddItem (Item.Value)
matchStr = matchStr & Item.Value
matchCount = matchCount + 1
Next
' 显示其中一项
' List1.AddItem (Matches.Item(0).Value)
matchStr = Replace(matchStr, "<li><img style=""cursor:pointer; width:120px; height:90px"" alt=""", vbNewLine)
matchStr = Replace(matchStr, """ title=""", "|")
matchStr = Replace(matchStr, """ οnclick=""window.open('/b/", "|http://www.showle.com/flvplayer.swf?auto=1&vid=")
matchStr = regReplace(matchStr, "-.*html.*src=/""", "|") '正则表达式替换
matchStr = Replace(matchStr, """ /></li>", "")
List1.AddItem (matchStr) '输出
RegUrl = matchStr
End Function
'--------------------------------------------------------
'-------------------------------------------------
'过程名: regReplace
'说明 : '正则表达式替换 By wgcd
'返回类型: string
'----------------------------------------------------
Function regReplace(str, patrn, rstr)
Dim regEx
Set regEx = New RegExp
regEx.Pattern = patrn
regEx.Global = True
regReplace = regEx.Replace(str, rstr)
End Function
'----------------------------------------------------
'-------------------------------------------------
'过程名: SaveToFile(ByVal strFileName,strContent)
'说明 : '保存文件 By wgcd
'参数 : strFileName 保存名字
'参数 : strContent 内容
'返回类型: string
'----------------------------------------------------
Function SaveToFile(ByVal strFileName, strContent)
Set fso = CreateObject("Scripting.FileSystemObject")
Set hf = fso.CreateTextFile(strFileName, True)
'hf.write vbcrlf
hf.Write strContent
hf.Close
Set hf = Nothing
Set fso = Nothing
End Function
Private Sub Command2_Click()
Me.Text1.Text = RegUrl(Me.Text1.Text)
End Sub
Private Sub WebBrowser1_StatusTextChange(ByVal Text As String)
If Text = "完毕" Then
Me.Caption = "OK"
End If
End Sub