import os
import subprocess
import time
import requests
import re
import concurrent.futures
from lxml import etree
'''
http://dalao.wahaha-kuyun.com/ 20201122 /1556_fd900088/ 1000k/hls/228f9bea95b000033.ts
我们观察几个ts链接(不同视频)看到ts链接共分为三部分 时间 id ts文件名
1,先找ts文件名是怎么来的 复制一个名字全局搜索 可以看到在一个index.m3u8地址返回的内容里面
2,查找这个 index.m3u8地址是哪里来的 全局搜索可以看到是那个地址返回的内容有这个链接
3,分析找到的地址参数
'''
def get_m3u8(url,id,nub):
'''
获取m3u8地址,视频名称
:param url:
:return:
'''
headers = {
'User-Agent':"User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) "
}
urls = f'http://www.milimili.tv/anime/{id}/{nub}/'
response = requests.get(url).text
m3u8_url = re.findall("src='/yun/yun\.php\?vid=(.*?)'", response)[0]
new_m3u8_url = m3u8_url.replace('index.m3u8', '1000k/hls/index.m3u8')
res = requests.get(urls,headers=headers).text
ret = etree.HTML(res)
name = ''.join(ret.xpath('/html/body/div[2]/div[1]//text()')).replace('>','').replace(' ','').replace('咪哩咪哩','')
return new_m3u8_url,name
def get_ts_url(url):
'''
获取ts视频流地址
:param url:
:return:
'''
response = requests.get(url).text
ts_list = re.findall('(.*?).ts', response)
li = {}
for ts in ts_list:
ts_url = url.replace('index.m3u8', ts) + '.ts'
li[ts] = ts_url
return li
def save_video(url,name):
'''
保存视频
:param url:
:return:
'''
response = requests.get(url).content
with open(name+'.ts','wb') as f:
f.write(response)
print(name)
if __name__ == '__main__':
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
id = int(input('请输入视频id:').strip())
nub = int(input('请输入视频集数:').strip())
url = f'http://www.milimili.tv/e/action/player_i.php?id={id}&pid={nub}'
new_m3u8_url,video_name = get_m3u8(url,id,nub)
ts_dict = get_ts_url(new_m3u8_url)
for name in ts_dict:
executor.submit(save_video, ts_dict[name],name)
executor.shutdown()
print('====正在合成视频:{}===='.format(video_name))
command = 'copy /b *.ts {}.mp4'.format(video_name)
subprocess.Popen(command, shell=True)
time.sleep(10)
print('====合成视频成功:{}===='.format(video_name))
for ts in ts_dict:
os.remove(f'{ts}.ts')