python下载单个B站视频

下载B站单个视频

准备工作:

1.下载ffmpeg,pycharm自带的ffmpeg好像不能用,下载地址:https://github.com/BtbN/FFmpeg-Builds/releases
2.安装ffmpeg,解压即可
3.添加环境变量,将ffmpeg.exe所在路径添加到环境变量

win10安装HEVC补丁

参考:https://www.zhihu.com/tardis/zm/art/427863173?source_id=1005

1.获取audio和video地址

def get_url():
    url = 'https://www.bilibili.com/video/BV1zm4y1t7P7/'
    resp = requests.get(url)
    resp.close()
    # print(resp.text)
    title_obj = re.compile(r'<title data-vue-meta="true">(.*?)_哔哩哔哩_bilibili</title>')
    title = title_obj.search(resp.text).group(1)
    obj = re.compile(r'<script>window.__playinfo__=(.*?)</script>')
    data = obj.search(resp.text).group(1)
    data_json = json.loads(data)
    # pprint(data_json)
    audio_url = data_json['data']['dash']['audio'][0]['backupUrl'][0]
    video_url = data_json['data']['dash']['video'][0]['backupUrl'][0]
    return audio_url, video_url, title

2.下载audio和video

def download(audio_url, video_url):
    resp1 = requests.get(url=audio_url, headers=headers)
    resp1.close()
    with open('video/1', 'wb') as f1:
        f1.write(resp1.content)
    print('audio下载完成')
    resp2 = requests.get(url=video_url, headers=headers)
    resp2.close()
    with open('video/2', 'wb') as f2:
        f2.write(resp2.content)
    print('video下载完成')

3.合并audio和video

def merge(title):
    com = f'E:\\ffmpeg-master-latest-win64-gpl\\bin\\ffmpeg.exe -i "video/1" -i "video/2" -acodec copy -vcodec copy "video/{title}.mp4"'
    # print(com)
    os.system(com)
    os.remove('video/1')
    os.remove('video/2')

完整代码

import re
import requests
import json
from pprint import pprint
import os


headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43',
    'Referer': 'https://www.bilibili.com/video/BV1zm4y1t7P7/?vd_source=6ebafa29b438a17b55f878b8d296faea'
}


def merge(title):
    com = f'E:\\ffmpeg-master-latest-win64-gpl\\bin\\ffmpeg.exe -i "video/1" -i "video/2" -acodec copy -vcodec copy "video/{title}.mp4"'
    # print(com)
    os.system(com)
    os.remove('video/1')
    os.remove('video/2')



def download(audio_url, video_url):
    resp1 = requests.get(url=audio_url, headers=headers)
    resp1.close()
    with open('video/1', 'wb') as f1:
        f1.write(resp1.content)
    print('audio下载完成')
    resp2 = requests.get(url=video_url, headers=headers)
    resp2.close()
    with open('video/2', 'wb') as f2:
        f2.write(resp2.content)
    print('video下载完成')


def get_url():
    url = 'https://www.bilibili.com/video/BV1zm4y1t7P7/'
    resp = requests.get(url)
    resp.close()
    # print(resp.text)
    title_obj = re.compile(r'<title data-vue-meta="true">(.*?)_哔哩哔哩_bilibili</title>')
    title = title_obj.search(resp.text).group(1)
    obj = re.compile(r'<script>window.__playinfo__=(.*?)</script>')
    data = obj.search(resp.text).group(1)
    data_json = json.loads(data)
    # pprint(data_json)
    audio_url = data_json['data']['dash']['audio'][0]['backupUrl'][0]
    video_url = data_json['data']['dash']['video'][0]['backupUrl'][0]
    return audio_url, video_url, title


def main():
    au_url, vi_url, title = get_url()
    download(au_url, vi_url)
    merge(title)


if __name__ == '__main__':
    main()

### 如何使用 Python 编写爬虫抓取 B 视频数据 #### 准备工作 为了实现这一目标,需要安装一些必要的库。这些库可以帮助处理 HTTP 请求、解析 JSON 数据以及管理异步操作。 ```bash pip install requests aiohttp bilibili-api-python ``` #### 抓取视频基本信息 通过调用 `bilibili-api` 库中的接口方法可以直接获取到指定 AV/BV 号的视频详情: ```python from bilibili_api import video as bvid_video, sync def fetch_basic_info(bv_id): v = bvid_video.Video(bvid=bv_id) info_dict = sync(v.get_info()) title = info_dict['title'] pub_date = info_dict['pubdate'] # 时间戳形式返回发布时间 return { "标题": title, "发布时间": pub_date } ``` 此部分代码利用了第三方封装好的 API 接口来简化请求过程[^1]。 #### 获取弹幕列表 针对每一条视频记录其对应的 XML 格式的弹幕文件链接,并下载保存至本地;接着读取该文件提取其中的有效字段完成进一步的数据挖掘任务。 ```python import xml.etree.ElementTree as ET from datetime import datetime async def download_danmaku(video_bvid, output_file='danmakus.xml'): vid = bvid_video.Video(bvid=video_bvid) danmu_url = await vid.get_dm_xml() async with aiohttp.ClientSession() as session: resp = await session.get(danmu_url[0]) content = await resp.text() with open(output_file, 'w', encoding='utf8') as f: f.write(content) # 解析XML格式的弹幕文档 def parse_danmaku(file_path): tree = ET.parse(file_path) root = tree.getroot() items = [] for item in root.findall('d'): text = item.text.strip() timestamp_str = float(item.attrib['p'].split(',')[0]) # 提取消息显示的时间轴位置 formatted_time = str(datetime.fromtimestamp(timestamp_str)) items.append({ "content": text, "time": formatted_time }) return items ``` 上述函数实现了从远程服务器拉取特定编号影片关联的所有即时聊天消息并将其转换成易于理解的形式存储下来供后续分析使用[^2]。 #### 清洗与统计分析 对于收集来的原始弹幕资料而言,在正式投入应用之前往往还需要经历一系列预处理环节,比如去除无关字符、过滤敏感词汇等。之后再基于清理后的高质量语料开展诸如词频计算之类的量化研究活动。 ```python import jieba.analyse import matplotlib.pyplot as plt from wordcloud import WordCloud from collections import Counter # 对中文字符串做分词处理 def tokenize(texts_list): words = [] for line in texts_list: seg_result = list(jieba.cut(line)) filtered_words = filter(lambda w: len(w)>1 and not w.isdigit(), seg_result) # 过滤掉单个字母/数字 words.extend(filtered_words) return words # 绘制词云图像 def plot_word_cloud(word_freq_dist): wc = WordCloud(font_path='/path/to/simhei.ttf', background_color="white").generate_from_frequencies(dict(word_freq_dist.most_common())) plt.imshow(wc, interpolation='bilinear') plt.axis("off") plt.show() if __name__ == '__main__': bv_num = input("请输入要查询的BV号:") basic_data = fetch_basic_info(bv_num) print(f'视频名称:{basic_data["标题"]}\n发布日期:{datetime.utcfromtimestamp(int(basic_data["发布时间"]))}') asyncio.run(download_danmaku(bv_num)) parsed_comments = parse_danmaku('./danmakus.xml') all_texts = ''.join([item['content'] for item in parsed_comments]) tokens = tokenize(all_texts.split()) freq_distribution = Counter(tokens) top_keywords = dict(freq_distribution.most_common(50)) # 输出最常见的前五十个关键字及其出现次数 plot_word_cloud(top_keywords) ``` 这段脚本综合运用多种技术手段完成了对所关注对象全面而深入的理解——不仅限于表面层次的信息检索,更涉及到深层次的内容解读和模式识别层面的工作[^3]。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值