import os
import requests
import json
import re
from moviepy.audio.io.AudioFileClip import AudioFileClip
from moviepy.video.io.VideoFileClip import VideoFileClip
def merge_video_audio(video_path, audio_path):
print('原始视频音频合并中,请耐心等待~')
vd = VideoFileClip(video_path)
ad = AudioFileClip(audio_path)
vd2 = vd.set_audio(ad)
output = video_path.replace('_video', '')
vd2.write_videofile(output)
os.remove(video_path)
os.remove(audio_path)
def download_video(jump_url_list, path, filename):
for jump_url in jump_url_list:
print('正在下载:', jump_url)
headers = {
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
}
response = requests.get(jump_url, headers=headers)
match = re.search('__playinfo__=(.*?)</script><script>', response.text)
playinfo = json.loads(match.group(1))
match = re.search(r'__INITIAL_STATE__=(.*?);\(function\(\)', response.text)
initial_state = json.loads(match.group(1))
video_url = playinfo['data']['dash']['video'][0]['baseUrl']
audio_url = playinfo['data']['dash']['audio'][0]['baseUrl']
title = initial_state['videoData']['title']
print('视频名字:', title)
if not os.path.exists(path):
os.mkdir(path)
headers.update({"Referer": jump_url})
video_content = requests.get(video_url, headers=headers)
received_video = 0
video_path = f'{path}/{filename}_video.mp4'
with open(video_path, 'ab') as output:
while int(video_content.headers['content-length']) > received_video:
headers['Range'] = 'bytes=' + str(received_video) + '-'
response = requests.get(video_url, headers=headers)
output.write(response.content)
received_video += len(response.content)
audio_content = requests.get(audio_url, headers=headers)
received_audio = 0
audio_path = f'{path}/{filename}_audio.mp4'
with open(audio_path, 'ab') as output:
while int(audio_content.headers['content-length']) > received_audio:
headers['Range'] = 'bytes=' + str(received_audio) + '-'
response = requests.get(audio_url, headers=headers)
output.write(response.content)
received_audio += len(response.content)
merge_video_audio(video_path, audio_path)
if __name__ == '__main__':
jump_url_list = ["https://www.bilibili.com/video/videoid/"]
path = r"D:/data/bilibili"
filename = "abc"
download_video(jump_url_list, path, filename)