规律抓取游戏数据

<!--#include file="conn.asp" -->
<%
server.scripttimeout = 99999
Dim SQL
strSQL = "select id,name from mobile"
mobileList = DB_Query(strSQL)

strSQL = "select id,name from provider"
spList = DB_Query(strSQL)

strSQL = "select id,name from provider"
cpList = DB_Query(strSQL)

'Set zk = Server.CreateObject("ZKLib.ZKComLib")
GameTypeStr = "动作游戏|ACT,体育游戏"
GameTypeArr = split(GameTypeStr,"|")

GameMboxStr = "动作,射击"
GameMboxArr = split(GameMboxStr,",")

webstr = FSOFileRead("games.sina.com.cn/info/cmgames/cmgames.html")
webstr = SeparateHTML(webstr,"<!--采集开始-->","<!--采集结束-->")'先从大的范围入手
'***注意:截取后的字符中不能包含有空格,否则出错
webstr= Manhunt(webstr,"<td width=""25%""> <A HREF=../..","class=pink1",true,true,false,true)
webstrs=Split(webstr,"$url$")

num = 0
errNum = 0
'for WVP_i=1 to ubound(webstrs)-1
for WVP_i=0 to ubound(webstrs)-1
'for WVP_i=901 to ubound(webstrs)-1
    Dim GameName,GameTitlePic,P1,GameIsdownload,GameDownloadDesc,GameDowntype,IntroHtml,strText1,strText,strPic
Dim GameIntro,controlHtml,control,controlStr,GameControl,GameDesc,GameStar,GameType,GameTypeID,PriceStr
Dim GameMbox,GamePrice,GameSize,GameSP,MobileHtml,MobileStr,MobileIDStr,cpName,spName,GameDeveloper,GameServicer
webtemp=FSOFileRead("games.sina.com.cn"&webstrs(WVP_i))

if WVP_i=208 or WVP_i=223 then
flagid=true
else
flagid=false
end if

'游戏名
'P1 = SeparateHTML(webtemp,"<p align=""center""><B>","</B>")
GameName=""
GameName = SeparateHTML(webtemp,"<p align=""center""><B>","</B>")
GameTitlePic=""
if GameName<>"" then
'游戏小图
TitlePic = SeparateHTML(webtemp,"<table width=""90%"" border=""0"" cellspacing=""4"" cellpadding=""0"">",""" width=""100"" height=""75"" class=""img01"">")
if instr(TitlePic,"/")>0 then
    TitlePicArr = split(TitlePic,"/")
GameTitlePic = TitlePicArr(ubound(TitlePicArr))
else
    GameTitlePic = TitlePic
end if
GameTitlePic = "/"&GameTitlePic

'游戏是否下载
GameIsdownload = 1
GameDowntype = "|22|"
'百宝箱下载
GameDownloadDesc=""
GameDownloadDesc = SeparateHTML(webtemp,"<b>下载渠道:</b>","<br>")
'短信点播下载
GameDownloadNote=""
if instr(webtemp,"短信点播下载") then
GameDownloadNote = SeparateHTML(webtemp,"短信点播下载</b></TD>","</TD>")
GameDownloadNote=replace(GameDownloadNote,"<TD bgcolor=""F8F8DF"">","")
GameDownloadNote=replace(GameDownloadNote,"<TR>","")
GameDownloadNote=replace(GameDownloadNote,"</TR>","")
GameDownloadNote=GameDownloadNote&"|"
GameDowntype=GameDowntype&"23|"'加入短信
end if
GameDownloadDesc="|"&GameDownloadDesc&"|"&GameDownloadNote

'游戏介绍
IntroHtml=""
IntroHtml = SeparateHTML(webtemp,"id=""table315"">","</TD>")
IntroHtml=replace(IntroHtml,"<TD style=""font-size: 12px; color: #000000; line-height: 150%"">","")
IntroHtml=replace(IntroHtml,"<TBODY>","")
IntroHtml=replace(IntroHtml,"<TR>","")
'IntroArr=split(IntroHtml,"150%"">")
'IntroHtml=IntroArr(ubound(IntroArr))

'游戏简介---获取游戏介绍25个字符f_desc
GameDesc=""
GameDesc = Cut_Title(replace(RemoveHTML(IntroHtml)," ",""),130)'去除HTML代码RemoveHTML
GameDesc=GameDesc&"..."

'获取截图片
strText1 = SeparateHTML(webtemp,"id=""table314"">","</tr>")
strText = replace(strText1,"'","")
strPic = Manhunt(strText1,"<img class=img01 src=","></td>",true,true,false,true)
PicArr = Split(strPic,"$url$")
strPicName=""

for picNum=0 to ubound(PicArr)
    if PicArr(picNum)&"" <>"" then
  PicNameArr=split(PicArr(picNum),"/")
  PicName=PicNameArr(ubound(PicNameArr))
        strPicName = strPicName & "<img src=/"&PicName&" border=0>  "
    end if
Next
GameIntro = IntroHtml &"<DIV align=center>"&strPicName&"</DIV>"'游戏介绍加截图片

'游戏控制
controlHtml=""
controlHtml = SeparateHTML(webtemp,"<TD style=""font-size: 12px; color:#000000; line-height:150%"">","<TD>")
'control = Manhunt(controlHtml,"<TD>","</TD>",true,true,false,true)
'controlArr = Split(control,"$url$")
'for controlNum=0 to ubound(controlArr)
'  controlStr = controlStr & controlArr(controlNum)&"<br/>"
'Next
'GameControl = replace(replace(controlStr,"<STRONG>",""),"</STRONG>","")
GameControl=controlHtml



'游戏难度
GameDiff = 1
       
GameCanplay = "5分钟"
'游戏分数
'GameStar = Replace(Replace(RemoveHTML(SeparateHTML(webtemp,"<b>指欢堂评分:</b>","</font></b></TD>")),"分","")," ","")
'if GameStar="" then GameStar="8"

'游戏类型
GameOnline = 0
GameType = SeparateHTML(webtemp,"class=""img01""><br>","]</td>")
GameType=right(GameType,len(GameType)-3)


GameTypeID = ubound(GameTypeArr) + 4
for gameNum=0 to ubound(GameTypeArr)
    if instr(GameTypeArr(gameNum),GameType)>0 then
    GameTypeID = gameNum+4

exit for
end if
next

        GameTypeID = "|"&GameTypeID&"|"

GameMbox = 28
for gameNum2=0 to ubound(GameMboxArr)
    if instr(GameMboxArr(gameNum2),GameType)>0 then
    GameMbox = gameNum2+19
exit for
end if       
next
GameMbox = "|"&GameMbox&"|"

GameCountry = 6
GameMarket = 1
GameMarketDate = "2005"
'
GameIsCharge = 1

'收费价格
PriceStr=""
PriceStr = SeparateHTML(webtemp,"<b>收费价格:</b>","</td>")

'if instr(PriceStr,",")>0 then
'  PriceStr = split(PriceStr,",")(0)
    'elseif instr(PriceStr,",")>0 then
'  PriceStr = split(PriceStr,",")(0)
    'elseif instr(PriceStr,"、")>0 then
'  PriceStr = split(PriceStr,"、")(0)
        'end if
'GamePrice = cut_title(PriceStr,18)
GamePrice=PriceStr

GameVersion = "正式版"
'游戏大小
GameSize=""
GameSize = SeparateHTML(webtemp,"<b>游戏容量</b>:","</td>")


if Instr(GameMbox,"联通")>0 then
    GameSP = 21
else
    GameSP = 20
end if

GameSystem = 12

'对应手机
MobileHtml=""
MobileHtml = SeparateHTML(webtemp,"<table width=""120"">","</td>")
MobileIDStr=""
For n=0 to ubound(mobileList,2)
if instr(MobileHtml,mobileList(1,n))>0 then
MobileIDStr = MobileIDStr&"|"&mobileList(0,n)
'exit for
end if
next
MobileIDStr = MobileIDStr & "|"


       
'游戏厂商
GameDeveloper=0
        cpName = SeparateHTML(webtemp,"<b>开发厂商:</b>","<br>")
if cpName<>"" then
for cp=0 to ubound(cpList,2)
    if cpList(1,cp)=cpName then
    GameDeveloper=cpList(0,cp)
exit for
else
GameDeveloper=0
end if
Next
else
GameDeveloper=0
        end if

        spName = SeparateHTML(webtemp,"<b>运营厂商:</b>","<br>")
if spName<>"" then
for sp=0 to ubound(spList,2)
    if spList(1,sp)=spName then
    GameServicer=spList(0,sp)
exit for
else
GameServicer=0
end if
Next
else
GameServicer=0
end if



response.write gamename&"-"&WVP_i&"|"
'response.end
'f_star,
''"&GameStar&"',
if flagid then

else
SQL = "INSERT INTO table(name,id) "&_
          "VALUES('"&GameName&"','"&GameTypeID&"');"&vbcrlf
    'response.write SQL
'response.flush
conn.execute(SQL)
end if
'num = num + 1
'if num>30 and num<40 then
'    response.write SQL
        '    if num=39 then response.End()
'end if

else
    errNum = errNum + 1
response.write errNum & ".  <a href=" & webstrs(WVP_i)&" target=_blank>"&webstrs(WVP_i)&"</a><br/>"
response.Flush()
end if
    'if (WVP_i+1) mod 10=0 then zk.sleepEx(20)
Next


Function getMobile(patrn, strng)
  Dim regEx,Match,Matches,strText  '建立变量。
  Set regEx = New RegExp  '建立一般表达式。
  regEx.Pattern= patrn  '设置模式。
  regEx.IgnoreCase = True  '设置是否区分大小写。
  regEx.Global = True  '设置全局可用性。
  Set Matches=regEx.Execute(strng)  '执行搜索。
  For each match in matches  '重复匹配集合
      strText=strText& "|"&Match.SubMatches(0)
  Next
  set Matches=nothing
  set regEx=nothing
  getMobile=strText
End Function

Function Cut_Title(Title,TLen)
Dim k,i,d,c
Dim iStr

k=0
d=StrLen(Title)
iStr=""
For i=1 To Len(Title)
c=Abs(Asc(Mid(Title,i,1)))
If c>255 Then
k=k+2
Else
k=k+1
End If
iStr=iStr&Mid(Title,i,1)
If CLng(k)>CLng(TLen) Then
iStr=iStr
Exit For
End If
Next

Cut_Title=iStr
End Function

Function StrLen(strText)
Dim k,i,c
k=0
For i=1 To Len(strText)
c=Abs(Asc(Mid(strText,i,1)))
If c>255 Then
k=k+2
Else
k=k+1
End If    
Next
StrLen=k
End Function

'/***************移除HTML标签*************/
Function RemoveHTML(strText)
Dim RegEx

Set RegEx = New RegExp

RegEx.Pattern = "<[^>]*>"
RegEx.Global = True

RemoveHTML = RegEx.Replace(strText, "")
End Function

'/**************获取文件路径*****************/
function FSOFileRead(filename)
Dim objFSO,objCountFile,FiletempData
Set objFSO = Server.CreateObject("Scripting.FileSystemObject")
Set objCountFile = objFSO.OpenTextFile(Server.MapPath(filename),1,True)
FSOFileRead = objCountFile.ReadAll
objCountFile.Close
Set objCountFile=Nothing
Set objFSO = Nothing
End Function

'/***************通过规律的获取,提取二者之间的数据************************/
Function SeparateHTML(strHTML,ben,und)
SearchFile = InStrB(1, strHTML, ben, vbBinaryCompare) > 0
SearchFile1 = InStrB(1,strHTML, und, vbBinaryCompare) > 0
If SearchFile and  SearchFile1 Then
pos1 = InStrB(1, strHTML, ben, vbBinaryCompare)
pos2 = InStrB(pos1 + lenB(ben), strHTML, und, vbBinaryCompare)
SeparateHTML = MidB(strHTML,pos1 + lenB(ben),pos2 - pos1 - lenB(ben))
else
SeparateHTML = ""
end if
End Function

'/**************当有多个同类型的条件时,通过加入$url$来分解,再划分为数组,调用SeparateHTML(strHTML,ben,und)来细分,以此类推。*********************/
Function Manhunt(webstr,Label1,Label2,IgnoreCase,Global,Include,Stir)
Set objRegExp = New Regexp

With objRegExp
.IgnoreCase = IgnoreCase
.Global = Global
.Pattern = "(" & Label1 & ").+?(" & Label2 & ")"
Set Matches = .Execute(webstr)

For Each Match in Matches
str = str & Match.Value & "$url$"
Next

set Matches = nothing

if not Include then
.Pattern = "(" & Label1 & ")"
str = .replace(str,"")
.Pattern = "(" & Label2 & ")"
str = .replace(str,"")
end if

if Stir then
str = replace(str,"""","")
str = replace(str,"'","")
end if

if not Global then
str = replace(str,"$url$","")
end if
end with

set objRegExp = nothing

if str = "" then
Manhunt = ""
else
Manhunt = str
end if

End Function

Function DB_Query(sSQL)
On Error Resume Next
Err.Clear

Set Rs=Conn.Execute(sSQL)
If Not Rs.EOF And Not Rs.BOF Then
DB_Query=Rs.GetRows()
Else
DB_Query=0
End If
Rs.Close

QueryTotal=QueryTotal+1

If Err Then
ErrMsg="查询数据的时候发现错误。系统已关闭"
Call writeErrMsg(ErrMsg)
End If
End Function
%>
### 收集和分析游戏评论的舆情数据 #### 1. 数据收集 对于游戏评论的数据收集,通常会采用网络爬虫技术来获取目标社交平台上用户的反馈信息。例如,在天涯社区中抓取特定事件下的992条评论信息[^1]。这些信息不仅限于文本内容本身,还包括诸如ID、主题用户、评论点赞数、评论被追评数以及时间和情绪评分等元数据。 针对具体的游戏如《黑神话:悟空》,可以通过编写定制化的Python脚本访问B站API接口或其他公开可用的应用程序编程接口(API),从而自动化地提取预告片下方观众留下的意见表达[^3]。此过程可能涉及身份验证机制以遵循各网站的服务条款并确保合法合规操作。 ```python import requests def fetch_comments(api_url, params): response = requests.get(url=api_url, params=params) if response.status_code == 200: data = response.json() comments = [] for item in data['items']: comment_info = { 'id': item['id'], 'author': item['snippet']['topLevelComment']['snippet']['authorDisplayName'], 'text': item['snippet']['topLevelComment']['snippet']['textDisplay'], 'likeCount': item['snippet']['topLevelComment']['snippet']['likeCount'], 'publishedAt': item['snippet']['topLevelComment']['snippet']['publishedAt'] } comments.append(comment_info) return comments else: raise Exception(f"Failed to retrieve comments with status code {response.status_code}") ``` #### 2. 舆情数据分析 一旦获得了足够的原始资料之后,则需对其进行预处理以便后续更深层次的理解工作能够顺利开展。这一步骤往往包含了去除噪声(比如HTML标签)、分词处理、停用词过滤等一系列自然语言处理(NLP)任务;随后借助机器学习算法或是专用库来进行情感倾向度量——即判断每一条留言所传达出来的正面还是负面态度,并计算相应的得分情况。 最后,通过图表形式展示上述所得结论有助于更加直观清晰地呈现整体趋势变化规律。例如制作柱状图比较不同时间段内正负向声音的比例关系,亦或是构建热力地图反映地理区域间的差异特征等等[^2]。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值