import urllib
import re,xlwt
from bs4 import BeautifulSoup
import demjson
def gydzf():
workbook=xlwt.Workbook()
num=0
worksheet=workbook.add_sheet("爬虫数据")
for k in range(1,135):
url="http://stockdata.stock.hexun.com/zrbg/data/zrbList.aspx?date=2011-12-31&count=20&pname=20&titType=null&page="+str(k)+"&callback=hxbase_json11621778963605"
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
headers={"User-Agent":user_agent}
request=urllib.request.Request(url,headers=headers)
response=urllib.request.urlopen(request)
contents = response.read()
soup = BeautifulSoup(contents, "html.parser")
t = str(soup)[13:-1]
data = demjson.decode(t)
ll=[]
ll=data["list"]
for i in ll:
xuhao=int(i["Number"])
gupiao=i["industry"]
defen=float(i['industryrate'])
dengji=i['Pricelimit']
gudong=float(i['stockNumber'])
yuangong=float(i['lootingchips'])
gongying=float(i['Scramble'])
huanjing=float(i['rscramble'])
shehui=float(i['Strongstock'])
valuelist=[]
valuelist=[xuhao,gupiao,defen,dengji,gudong,yuangong,gongying,huanjing,shehui]
for j in range(len(valuelist)):
worksheet.write(num,j,valuelist[j])
num+=1
print(num)
workbook.save("getValue.xls")
if __name__ == '__main__':
gydzf()