import requests
from bs4 import BeautifulSoup
top250 =[]defgetHTMLText(url):
hd ={'user-agent':'Mozilla/5.0'}
r = requests.get(url, headers=hd)try:
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:return''defgetlist(url):global count
html = getHTMLText(url)
soup = BeautifulSoup(html,'html.parser')
info = soup.find_all('span', attrs={'class':'title'})for i in info:if i.text.split()[0]=='/':continue
top250.append(i.text)if __name__ =='__main__':
index =[i for i inrange(0,250,25)]for i in index:
url ='https://movie.douban.com/top250?start={}&filter='.format(i)
getlist(url)
path = r'D:\MyPython\Spyder\sele\top250.txt'withopen(path,'w', encoding='utf-8')as f:for i in top250:
f.write(i +'\n')