获取酷狗音乐抖音热歌榜前3页数据
关键信息:排名,歌手,歌名,时长
使用requests抓取
BeautifulSoup提取信息
保存在excel表格
from bs4 import BeautifulSoup
import requests
import time
import xlwt
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0'}
# 抓取抖音热歌榜某个页面的html,并提取信息
def get_info(url):
dy_data=requests.get(url,headers=headers)
soup=BeautifulSoup(dy_data.text,'lxml')
# 提取排名
ranks=soup.select('.pc_temp_num')
# 提取歌手和歌名
titles=soup.select('.pc_temp_songname')
# 提取歌曲时长
times=soup.select('.pc_temp_time')
for rank,title,time in zip(ranks,titles,times):
yield {
'rank':rank.get_text().strip(),
'singer': title['title'].split('-')[0].strip(),
'song': title['title'].split('-')[1].strip(),
'time':time.get_text().strip()
}
if __name__ == '__main__':
urls=['https://www.kugou.com/yy/rank/home/{}-52144.html?from=rank'.format(str(i)) for i in range(1,4)]
book=xlwt.Workbook(encoding='utf-8')
sheet1=book.add_sheet('sheet1')
header=['排名','歌名','歌手','时长']
for h in range(len(header)):
sheet1.write(0,h,header[h])
i=1
for url in urls:
getinfos=get_info(url)
for getinfo in getinfos:
rank_info=sheet1.write(i,0,getinfo['rank'])
singer_info=sheet1.write(i,1,getinfo['singer'])
song_info=sheet1.write(i,2,getinfo['song'])
time_info=sheet1.write(i,3,getinfo['time'])
time.sleep(0.5)
i+=1
book.save('info.xls')