import requests
import re,os
#1.起始目标
shouyeurl='https://www.hifini.com/'
#伪装
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'}
# 1.发送请求
def get_data(url):
response = requests.get(url, headers=headers)
if response.status_code==200:
html_data=response.text
return html_data
else:
print(response.status_code)
#2.解析数据
def parse_data(data):
z='<li\sclass="media\sthread\stap\s\s".*?<div\sclass="subject\sbreak-all">.*?<a\shref="(.*?)"(.*?)</a>'
result=re.findall(z,data,re.S)
# print(result)
for i in result:
# print(i)
href="https://hifini.com/"+i[0]#详情链接
name=i[1]
print(name)
print(href)
print('---'*12)
get_singer_link(href)
#向详情页发起请求
def get_singer_link(link):
song_html_data=get_data(link)
# https://www.hifini.com/get_music.php?key=uNhoYJwwwHiFiNicomAq8wTYXz1OT8jqodm9zwvrrIvm9D95XvUtO3zTozHHFWKU54V7SVdbYf9F1lZg731yM3xxHiFiNixxKWZewwwHiFiNicomKs&p=KUDEIHJHE4GBIJIULIZTQFDUKBNQOMIRE4HBABK5GQAA4WBAIBIFYWB3HQBH6PJFBUAQYKAHEFIQ43YHEYOBADQOBNOBMMYCA4OV2RSYP5BCKOYSHYSFCGRAKMMBURL7KBZR4OI6FMDCWJZBLMJSQPABAQFQIEKEI4MV6KQPEITQEJZHKB3HO4ADFU2XSXBDLJ3SCVR6
song_re="music:\s\[.*?title:\s'(.*?)',.*?url:\s'(.*?)',"
r=re.findall(song_re,song_html_data,re.S)
print("歌曲信息:",r)
for i in r:
song_name=i[0]
song_link=i[1]
print("歌名:",song_name)
print("歌曲链接:",song_link)
print('=++++++++++++++++')
data_byts=requests.get(song_link,headers=headers).content
print(data_byts)
if not os.path.exists('歌曲'):
os.makedirs('歌曲')
song_name=re.sub('[\/:*?"<>|]','-',song_name)
with open('歌曲\{}.m4a'.format(song_name),'wb')as f:
f.write(data_byts)
if __name__ == '__main__':
htmls=get_data(shouyeurl)
parse_data(htmls)
此代码仅为练习,并无其他目的