视频文字提取（方法一）

PGZXXX

已于 2025-02-24 21:06:58 修改

阅读量521

点赞数 3

CC 4.0 BY-SA版权

分类专栏： DeepSeek实用技巧系列文章标签：音视频人工智能

于 2025-02-24 18:57:57 首次发布

本文链接：https://blog.youkuaiyun.com/wangchenaaaa/article/details/145835474

DeepSeek实用技巧系列专栏收录该内容

119 篇文章

订阅专栏

生活中很多时候需要提取视频中的文字，借助AI工具辅助编写python程序提取视频文字可以极大提高工作效率。

以下是利用ChatGPT编写的提取视频文件的代码供大家参考使用，该算法的基本思想是先将视频中的音频提取出来保存为wav文件，然后再从音频文件中提取文字。

import subprocess
import os
import speech_recognition as sr
import imageio_ffmpeg as ffmpeg

def extract_audio_with_ffmpeg(video_path, audio_output="替换为保存音频文件的路径/temp_audio.wav"):
    """使用imageio-ffmpeg从视频中提取音频并保存为wav格式"""
    ffmpeg_path = ffmpeg.get_ffmpeg_exe()  # 获取ffmpeg的路径
    command = [
        ffmpeg_path, "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2", audio_output
    ]
    try:
        subprocess.run(command, check=True)
        print(f"音频成功提取到 {audio_output}")
    except subprocess.CalledProcessError as e:
        print(f"提取音频时出错: {e}")
        return None
    return audio_output

def audio_to_text(audio_path, language="zh-CN"):
    """使用SpeechRecognition将音频转换为文字（支持中文）"""
    r = sr.Recognizer()

    try:
        with sr.AudioFile(audio_path) as source:
            audio = r.record(source)  # 加载整个音频文件

        # 使用Google Web API进行语音识别
        text = r.recognize_google(audio, language=language)
        return text
    except sr.UnknownValueError:
        return "无法识别音频"
    except sr.RequestError as e:
        return f"API请求失败: {e}"

def save_text_to_file(text, file_path):
    """将识别的文字保存到指定的文本文件"""
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(text)
    print(f"文字已保存到 {file_path}")

def main(video_file):
    # 步骤1：提取音频
    audio_file = extract_audio_with_ffmpeg(video_file)
    if not audio_file:
        return

    # 步骤2：语音识别
    result = audio_to_text(audio_file)

    # 将识别的文字保存到文件
    save_text_to_file(result, "recognized_text.txt")

    # 可选：删除临时音频文件
    if os.path.exists(audio_file):
        os.remove(audio_file)

    print("识别结果：")
    print(result)

if __name__ == "__main__":
    video_file = "video.mp4"  # 替换为你的视频文件路径
    main(video_file)