将数据写入Excel文件
Excel 2007以前的版本---->xls---->xlwt/xlrd
写/读
Excel 2007以后的版本---->xlsx—>openpyxl
一个工作簿下可以创建多个工作表(worksheet)
工作表是二维表,有行有列,列和行交汇的地方叫单元格
import re
import time
import random
import csv
import bs4
import requests
import xlwt
# 创建一个工作簿对象
wb = xlwt.Workbook()
# 通过工作簿对象的add_sheet方法添加一张工作表
sheet = wb.add_sheet('TOP250')
# 向表中写入数据
sheet.write(0,0,'标题')
sheet.write(0,1,'评分')
row = 1
pattern = re.compile(r'<span class="title">(.+?)</span>')
for page in range(10):
resp = requests.get(
url=f'https://movie.douban.com/top250?start={page * 25}',
headers={
'User-Agent': 'http://piping.mogumiao.com/proxy/api/get_ip_bs?appKey=3ee6f035175f4b508d8a825da0fb3833&count=4&expiryDate=0&format=2&newLine=3'
}
)
# print(resp.status_code)
if resp.status_code == 200:
soup = bs4.BeautifulSoup(resp.text, 'html.parser')
# nth-child(1)--- a标签的span里第一个孩子的元素
title_span_list = soup.select('div.info>div.hd>a>span:nth-child(1)')
rating_span_list = soup.select('div.info>div.bd>div>span.rating_num')
for title_span, rating_span in zip(title_span_list, rating_span_list):
sheet.write(row,0,title_span.text)
sheet.write(row,1,rating_span.text)
row += 1
time.sleep(random.randint(1,3))
# 保存工作簿
wb.save('豆瓣电影.xls')