动态讲求 ,
翻页参数:
# -*- coding: utf-8 -*-
# 斌彬电脑
# @Time : 2018/9/1 0001 3:44
import requests,json
class DouBan:
def __init__(self):
#请求头部信息
self.headers = {'User-Agent':"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Mobile Safari/537.36"}
self.offset = 0 # 翻页参数
self.n = 0 # 页码
self.file = open('douban.json', 'a', encoding='utf8')
def start_request(self):
self.n += 1 # 页码
print('第%s页'%(str(self.n)))
# url 与 翻页参数 数拼接
# url = 'https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=' +str(self.offset)+ '&limit=1'
url = 'https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=' +str(self.offset)+ '&limit=20'
res = requests.get(url,self.headers)
# res.content.decode() # 获取源码
cont = res.json() # 获取 json 数据
# return cont
self.write_file(cont)
self.file.write('\n#############第%s页##################\n\n'%(str(self.n)))
self.offset += 20
if self.offset > 600:
return
self.start_request() # 回调下自己,
def write_file(self,cont): # 保存数据
item = {} # 定义个字典 重装
for i in cont:
item['regions'] = i['regions'][0] # 地区
item['title'] = i['title'] # 电影名
item['url1'] = i['url']
item['release_date'] = i['release_date'] #时间
item1 = json.dumps(item, ensure_ascii=False ) # ensure_ascii=False 让 json 可读 数据转换
self.file.write(item1+'\n') # 写数据
if __name__ == '__main__':
spider = DouBan()
spider.start_request()
spider.file.close()