爬取网易云歌单里面音乐列表

最新推荐文章于 2024-10-21 13:22:35 发布

原创最新推荐文章于 2024-10-21 13:22:35 发布 · 822 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#python #正则表达式

爬虫专栏收录该内容

1 篇文章

订阅专栏

"""
爬取网易云歌单里面音乐列表
"""
import re
import xlwt
import xlrd
import xlutils.copy
# 将复制到的源码放在一个html文件里面 以rb格式读入
with open('3.html', 'rb') as f:
    # 读取之后需要进行解码
    html = f.read().decode('utf-8')
# 使用第一次匹配获取一收歌曲的全部信息
pattern = r'<tr.+?</tr>'
# 设定flags = re.S 匹配包括换行符在内的所有字符
res_list = re.findall(pattern=pattern, string=html, flags=re.S)
# 可以输出匹配到的结果的长度 核对是否匹配完整
print(len(res_list))
# print(res_list[0])
# 用来保存歌曲名和作者名的数组
name_author_list = []
# 同时匹配歌曲名和作者名的正则表达式
name_author_pattern = r'data-res-name="(.+?)".+?data-res-author="(.+?)"'
# 从每首歌曲中匹配出需要的信息
for item in res_list:
    name_author_list.extend(re.findall(pattern=name_author_pattern, string=item, flags=re.S))
# 打印结果核对
print(name_author_list[0])
# print(len(name_author_list))
# 创建excel工作空间
data = xlrd.open_workbook('歌单.xls')
ws = xlutils.copy.copy(data)
# workbook = xlwt.Workbook(encoding='utf-8', style_compression=0)
# 创建sheet对象
sheet = ws.add_sheet('sheet3', cell_overwrite_ok=True)
sheet.write(0, 0, '歌名')
sheet.write(0, 1, '作者')
# 将匹配到的结果写入excel
for index, item in enumerate(name_author_list):
    sheet.write(index + 1, 0, name_author_list[index][0])
    sheet.write(index + 1, 1, name_author_list[index][1])
ws.save(r'歌单.xls')