python多线程下载网络视频ts，并合并（踩坑记录）

本文链接：https://blog.youkuaiyun.com/weixin_43941882/article/details/132698486

python多线程下载网络视频ts，并合并（踩坑记录）

由于是加密ts，使用的AES加密，首先需安装Crypto模块

出现这个 Microsoft Visual C++ 14.0 is required. Get it with “Microsoft Visual C++ Build Tools 错误
解决方法

//使用 conda install -c conda-forge 包名称 来安装相关包
conda install libpython m2w64-toolchain -c python_3.8

下载这个包可以解决C++依赖的问题，而不用向网上绝大数博客你抄我我抄你一样要下载个visual studio，那么大的空间真的是不值得。

//用到的解密代码
from Crypto.Cipher import AES
# 创建AES解密器
def AESDecrypt(cipher_text, key):
    aes = AES.new(key, AES.MODE_CBC)
    cipher_text = aes.decrypt(cipher_text)
    return cipher_text

解析m3u8文件内容获取秘钥以及ts的URI列表

//解析m3u8文件内容
def get_real_url(baseurl,url):
    # 解析M3U8文件内容
    r = requests.get(url, headers=headers)
    while r.status_code != 200:
        r = requests.get(url, headers=headers)
    key_uri = None
    datalist = []
    for line in r.text.splitlines():
        if line.startswith("#EXT-X-KEY"):
            print(line)
            if(len(line.split(","))>1):
                # 获取秘钥URI
                key_uri = line.split(",")[1].split("=")[1].strip('"')
            continue
        if not line.startswith("#"):
            if "http" not in line:
                # 将ts链接存储到字典中
                datalist.append(line)

    if not key_uri:
        key=""
    else:
        # 通过秘钥URL获取秘钥
        keydata = requests.get(baseurl+key_uri, headers=headers)
        while keydata.status_code != 200:
            keydata = requests.get(baseurl+key_uri, headers=headers)
        key=keydata.content
    return key,datalist

下载文件

# 下载ts文件
def DownloadVideo(savePath,videoUrl,key):
    req = requests.get(videoUrl, headers=headers)
    try:
        while req.status_code != 200:
            req = requests.get(videoUrl, headers=headers)
        data=req.content
        if not key=="":
            try:
                data = AESDecrypt(data, key=key)
            except ValueError:
                pass
        videoName = videoUrl[videoUrl.rfind("/") + 1:]
        with open(savePath + videoName, "wb") as f:
            f.write(data)
    finally:
        connectlock.release()

处理下载好的文件，这里使用了subprocess.Popen代替os.system，同时使用ffmpeg进行ts文件合并，使得视频不会特别掉帧，同时不会向控制台输出太多的东西

ffmpeg安装
参考这个链接: link
如果使用的是conda 虚拟环境则需要进行环境变量配置，否则会报错----不是内部或外部命令，也不是可运行的程序”
参考这个链接: link

# 合并ts文件，并删除ts文件
def merge_file(path,i):
    # 匹配非法字符
    illegal_chars = r'[\\/:*?"<>|]'
    # 将非法字符替换为空
    new_filename = re.sub(illegal_chars, '', i)
    new_filename = re.sub(' ', '', new_filename)
    name = new_filename+".mp4"
    # 如果文件名存在则增加时间戳后缀
    if os.path.exists("E:\学习资料MP4/"+name):
        name=new_filename+str(int(time.time()))+".mp4"

    for filename in os.listdir(path):
        if filename.find('ts') != -1:
            with open(path + "file_list.txt", 'a', encoding='utf-8') as f:
                f.write("file '"+filename+"'\n")

    os.chdir(path)
    cmd="ffmpeg -f concat -i file_list.txt -c copy "+name
    p=subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    # p.wait()
    p.communicate()
    shutil.move('E:\学习资料/%s' % name, 'E:\学习资料MP4/')
    os.system('del /Q *.ts')
    os.system('del /Q *.txt')

多线程下载

# 多线程
from threading import *

nMaxThread = 8
connectlock = BoundedSemaphore(nMaxThread)

def chulishuju(baseurl,url, path, title):
    start1 = time.time()
    key,VideoList = get_real_url(baseurl,url)
    # for k in VideoList:
    i=0
    print(VideoList)
    for k in VideoList:
        k=baseurl+k
        # print(k)
        savePath= path +"%s."%str(i).rjust(5,'0')
        i+=1
        connectlock.acquire()
        t = Thread(target=DownloadVideo, args=(savePath, k,key))
        t.start()
    time.sleep(10)
    print("学习资料%s下载完成耗时%.5f秒" % (title, float(time.time() - start1)))
    merge_file(path,title);