def sousousearch(request,url):
import re
from urllib import FancyURLopener
from random import choice
#模拟随即浏览器
user_agents = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'
]
class MyOpener(FancyURLopener, object):
version = choice(user_agents)
def MyFunc(url):
myopener = MyOpener()
s = myopener.open(url).read()
body=findbiaoqian(s,'<div id="result"','div')
foot=buquan(findbiaoqian(s,'<div id="pager">','div').replace('/q?','/vip/search/'))
print body
print foot
a=[]
a.append(body.decode('gbk'))
a.append(foot.decode('gbk'))
return a
def buquan(string):
split_s=string.split('"')
for i in split_s:
if i.find('vip')!=-1:
split_s[split_s.index(i)]=i+'/'
return '"'.join(split_s)
#捡取自己喜欢的部分,如<div id="pager"和对应的</div>之间
def findbiaoqian(string,biaoqian,tebie):
begin=string.find(biaoqian)
i=1
tlen=len(tebie)
temp=string[begin+tlen:].find(tebie)+begin+tlen
while i>0:
print temp
if string[temp-1]=='/':
i-=1
print '/'+str(i)
else:
i+=1
print i
temp=string[temp+tlen:].find(tebie)+temp+tlen
return string[begin:temp-1]
a=MyFunc('http://www.soso.com/q?'+url)
return render_to_response('vip/search.html', {'body': a[0],'foot':a[1],})
用的django模版,