import requests
from bs4 import BeautifulSoup
import lxml
base_url="https://book.douban.com/top250?"
headers={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'
}
with open("豆瓣图书2.csv","w",encoding="utf-8") as pf:
for start in range(0,226,25):
params={
"start":start
}
response=requests.get(url=base_url,params=params,headers=headers)
content=response.text
soup=BeautifulSoup(content,'lxml')
books=soup.find_all('table')
for book in books:
title=book.div.a.get_text().replace('\n','').replace(' ',' ')
info_list=book.p.string
pingfen=book.select('span[class="rating_nums"]')[0].get_text()
pf.write(title+","+info_list+","+pingfen+"\n")