# -*- encoding=utf-8 -*-
"""
@coder: github@akin
@since: 2022/6/8上午8:43
@desc: 根据一个BV号码,抓取一个BiliBili视频
"""
"""
实现一个爬虫需求?(装大象的过程?)
1、明确需求
想要获取的内容是什么?(视频内容,视频标题,视频概述)通过f12抓包分析
2、发送请求
3、获取数据
4、解析数据
5、保存数据
"""
# 测试BV号码:BV1fr4y1G7e8
# 找到这个BV号的网页,右键查看源代码,搜索 "playinfo",都找到了
# <script>window.__playinfo__={"code":0,"message":"0","ttl":1,"data":{"from
# print("helo")
# https://www.bilibili.com/video/BV1fr4y1G7e8
# 导入请求模块
import requests
import re
import pprint
import json
# 导入进程模块
import subprocess
# 确定请求网址
url = "https://www.bilibili.com/video/BV1fr4y1G7e8"
# 添加headers请求头,对于py代码进行伪装
headers = {
'referer': 'https://www.bilibili.com/',
'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36"
}
response = requests.get(url=url, headers=headers)
# <Response [200]> : 状态码,代码请求成功
# print(response)
# response.text : 获取网页源代码
# 2. 获取数据
# print(response.text)
# 3. 解析数据
# title = re.findall('<h1 title="(.*?)"', response.text)
title = re.findall('<h1 title="(.*?)"', response.text)[0]
# re.sub()替换
re.sub(r'[\/:*?"<>|]', '', title)
# 返回的是列表 :['【小鹿】💗成人礼💗']
# print(title)
# 返回单个字符串数据 : 【小鹿】💗成人礼💗
# print(title[0])
# ['{"
# html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', response.text)
# '{"
html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', response.text)[0]
# print(html_data)
# 可读性更好地打印
pprint.pprint(html_data)
# 确认这个数据是字符串,手动转成字典
print(type(html_data))
# str转json
json_data = json.loads(html_data)
# <class 'dict'>
print(type(json_data))
# 字典取值,通过冒号左边提取冒号右边内容
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
print(audio_url)
vod_url = json_data['data']['dash']['video'][0]['baseUrl']
# 403 Forbidden : 加一个防盗链 referer
print(vod_url)
# 请求
audio_content = requests.get(url=audio_url, headers=headers).content
vod_content = requests.get(url=vod_url, headers=headers).content
# 下载
with open(title + '.mp3', mode='wb') as f:
f.write(audio_content)
with open(title + '.mp4', mode='wb') as f:
f.write(vod_content)
print(title + ",视频下载成功!")
# 合并
command = f"ffmpeg -i {title}.mp4 -i {title}.mp3 -c:v copy -c:a aac -strict experimental {title}_final.mp4"
subprocess.run(command, shell=True)
command2 = f"rm -rf {title}.mp4"
subprocess.run(command2, shell=True)
command3 = f"rm -rf {title}.mp3"
subprocess.run(command3, shell=True)
print(title + ", 视频合并完成!")
py3BiliBili爬单个视频
最新推荐文章于 2025-07-10 22:47:51 发布