#!/usr/bin/env python
# -*- coding:utf-8 -*-
#爬虫,搜索热点排行
import urllib.request
import urllib
import re
import json
import xlwt
import os
#获取网站首页全部内容
cnt = 50 #只能1-50
url = 'https://zhidao.baidu.com/question/api/hotword?rn='+cnt.__str__()+'&t=1535421904906'
print(url)
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
req = urllib.request.Request(url, headers={'User-Agent': user_agent})
response = urllib.request.urlopen(req)
content = response.read().decode('utf-8')
#print(content)
workbook = xlwt.Workbook()
sheet1 = workbook.add_sheet('sheet1',cell_overwrite_ok=True)
sheet1.write(0,0,'排名')
sheet1.write(0,1,'新闻名称')
sheet1.write(0,2,'搜索人数')
sheet1.write(0,3,'变化数量')
sheet1.write(0,4,'新的新闻')
sheet1.write(0,5,'热度上升')
dataList = json.loads(content)['data']
j = 1
for data in dataList:
print(data)
sheet1.write(j, 0,j)
sheet1.write(j, 1,data['keyword'])
sheet1.write(j, 2, data['searches'])
sheet1.write(j, 3, data['changeRate'])
isNew = data['isNew'];
if isNew==0:
isNew = '否'
elif isNew==1:
isNew = '是'
sheet1.write(j, 4, isNew.__str__())
trend = data['trend']
style5 = xlwt.XFStyle()
font = xlwt.Font()
style5.font = font
if trend == 'fall':
font.colour_index = 3
trend = '下降'
elif trend == 'rise':
font.colour_index = 2
trend = '上升'
sheet1.write(j, 5, trend,style5)
j = j + 1
#保存该excel文件,有同名文件时直接覆盖
path = 'D:\\Python'
if not os.path.isdir(path):
os.makedirs(path)
paths = path + '\\'
filename = 'test1'
workbook.save('{}{}.xls'.format(paths,filename))
print('创建excel文件完成!')
百度时候总能看到热搜排行,以上代码就是爬虫获取排行