import requests
from bs4 import BeautifulSoup
headers={"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0'}
#伪装为一个正常的用户,防止网站拦截爬虫
for start_num in range(0,250,25):
#改变网站
response = requests.get("https://movie.douban.com/top250?start={}&filter=".format(start_num), headers=headers)
if response.ok:#访问成功则执行
print("successful")
html = response.text
soup = BeautifulSoup(html, "html.parser")
all_title = soup.find_all('span', attrs={"class": "title"})
for title in all_title:
title_string = title.string
if '/' not in title_string:
print(title_string)
else:
print("fail")