import requests
from bs4 import BeautifulSoup
import pandas as pd
# 读取豆瓣TOP250的电影名
def get_movies():
# 请求头
headers = {
'UserAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'}
title_list = []
for i in range(0,10):
link='https://movie.douban.com/top250?start='+str(i*25)
r=requests.get(link,headers=headers,timeout=10)
print(str(i+1)+'页面响应码状态:',r.status_code)
soup=BeautifulSoup(r.text,'lxml')
movies_titles =soup.find_all('div',class_='hd')
for each in movies_titles:
getTitle=each.a.span.text.strip()#获取电影名称
print(getTitle)
title_list.append(getTitle)
return title_list
movies=get_movies()
#将获取的数据转存为CSV文件
name=['movies_name']
test=pd.DataFrame(columns=name,data=movies)
test.to_csv('e:/movies.csv',encoding="gbk")
#将获取的数据存在本地的Txt文件中
# with open('movies.txt','w',encoding='utf-
Requests:爬取豆瓣排名前250的电影名称
最新推荐文章于 2021-11-25 15:50:55 发布
