import requests
import time
import json
from contextlib import contextmanager
url = "https://movie.douban.com/j/chart/top_list"
params = {
"type": 24,
"interval_id": "100:90",
"action": "",
"start": 0,
"limit": 20
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
}
@contextmanager
def request_data(url, params, headers):
response = None
try:
response = requests.get(url, params=params, headers=headers)
except Exception as e:
print(f"请求失败: {e}")
finally:
if response:
response.close()
yield response.json()
def get_data(start):
while True:
params["start"] = start
with request_data(url, params, headers) as response:
if len(response) < 20:
print("数据获取完毕")
break
start += 20
time.sleep(0.5)
print(f"数据获取完毕,开始获取第{start}页数据")
yield response
for data in get_data(0):
with open("douban.json", "a", encoding="utf-8") as f:
for item in data:
f.write(json.dumps(item, ensure_ascii=False) + "\n")