思路
1.对人像直播视频抽取关键帧
2.对关键帧做质量打分排序
3.利用语音和视频字幕抽取内容文本
4.利用文本做文章标题自动生成
5.基于模版合成视频封面
人像抽取部分
0.视频字幕去除
# 安装 AgentOCR
!pip install agentocr
# 安装 CPU 版本 ONNXRuntime
!pip install onnxruntime
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
def image_remove_word(img_path = 'images/frame.177000000.jpg'):
mask_threth = 50
# 设置测试图片路径
img_path = img_path
img = cv2.imread(img_path) #自己qq截图一张图片就行,要大于下面的坐标点
# 通过 config 参数来进行模型配置,内置多国语言的配置文件
ocr = OCRSystem(config='ch')
# 调用 OCR API 进行全流程识别
result = ocr.ocr(img_path)
coordinates = []
# 打印结果
for line in result:
coordinate1 = []
a0 = line[0][0][0]-5
a1 = line[0][0][1]-5
b0 = line[0][1][0]+5
c1 = line[0][2][1]+5
coordinate1.append([[a0,a1],[b0,a1],[b0,c1],[a0,c1]])
coordinate1 = np.array(coordinate1, np.int32)
coordinates.append(coordinate1)
#print(line[0])
# binary mask
mask = np.zeros(img.shape[:2], dtype=np.int8)
mask = cv2.fillPoly(mask, coordinates,255)
cv2.imwrite('images/mask1.png', mask)
mask = cv2.imread('images/mask1.png',0)
dst = cv2.inpaint(img,mask,3,cv2.INPAINT_TELEA)
cv2.imwrite(img_path,dst)
#img_path = 'images/frame.177000000.jpg'
#image_remove_word(img_path)
def getFrame(video_name, save_path):#将视频逐帧保存为图片
video = cv2.VideoCapture(video_name)
# 获取视频帧率
fps = video.get(cv2.CAP_PROP_FPS)
print(fps)
# 获取画面大小
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
size = (width, height)
# 获取帧数
frame_num = str(video.get(7))
name = int(math.pow(10, len(frame_num)))
ret, frame = video.read()
#ocr = OCRSystem(config='ch')
while ret:
cv2.imwrite(save_path + str(name) + '.jpg', frame)
#imagepath = save_path + str(name) + '.jpg'
#image_remove_word(imagepath,ocr)
ret, frame = video.read()
name += 1
video.release()
return fps, size, frame_num
def writeVideo(humanseg, fps, size):# 写入视频
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('./movies/green.mp4', fourcc, fps, size)
# 为每一帧设置背景
files = os.listdir(humanseg)
#将文件名按编号顺序排序
file_num_sort = []
for file in files:
file_num_sort.append(int(file.split('.')[0]))
file_num_sort.sort()
#print(file_num_sort)
file_sort = []
for file_num in file_num_sort:
file_sort.append('./frames/'+str(file_num)+'.jpg')
#print(file_sort)
ocr = OCRSystem(config='ch')
for file in file_sort:
print('将'+file+'写入视频')
#imagepath = save_path + str(name) + '.jpg'
image_remove_word(file,ocr)
im_array = cv.imread(file)#setImageBg(file, bg_im)
out.write(im_array)
out.release()
def getMusic(video_name):#获取音频
video = VideoFileClip(video_name)
return video.audio
def addMusic(video_name, audio):#给video_name添加音频
video = VideoFileClip(video_name)#读取
video = video.set_audio(audio)#设置
video.write_videofile(output_video)#保存
!pip install moviepy
import os
import math
#将视频按帧保存为图片
frame_path = "./frames/"
if not os.path.exists(frame_path):
os.makedirs(frame_path)
video_name = './movies/whitebottle.mp4'
fps, size, frame_number = getFrame(video_name, frame_path)
print(fps, size, frame_number)
from moviepy.editor import *
import cv2 as cv
# 最终视频的保存路径
output_video = './movies/result.mp4'
humanseg_path = 'frames'
writeVideo(humanseg_path, fps, size)
addMusic('./movies/green.mp4', getMusic(video_name))
#https://aistudio.baidu.com/aistudio/projectdetail/3919465?forkThirdPart=1
1.视频背景去除
!pip install --quiet av pims
import torch
model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3").cuda() # or "resnet50"
convert_video = torch.hub.load("PeterL1n/RobustVideoMatting", "converter")
convert_video(
model, # The loaded model, can be on any device (cpu or cuda).
input_source='input.mp4', # A video file or an image sequence directory.
downsample_ratio=None, # [Optional] If None, make downsampled max size be 512px.
output_type='video', # Choose "video" or "png_sequence"
output_composition='com.mp4', # File path if video; directory path if png sequence.
output_alpha="pha.mp4", # [Optional] Output the raw alpha prediction.
output_foreground="fgr.mp4", # [Optional] Output the raw foreground prediction.
output_video_mbps=4, # Output video mbps. Not needed for png sequence.
seq_chunk=12, # Process n frames at once for better parallelism.
num_workers=1, # Only for image sequence input. Reader threads.
progress=True # Print conversion progress.
)
2.视频关键帧提取
!pip install av
import av
import os
import shutil
# 视频提取关键帧工具类(支持批量视频)
class PyAvUtils:
def __init__(self, video_dir, keyframe_dir):
self.video_dir = video_dir
self.keyframe_dir = keyframe_dir
# 提取关键帧并保存
def do_video2image(self):
fo

本文介绍了通过人像直播视频的关键帧处理、质量评估,结合语音和视频字幕的文本提取,生成文章标题的方法,并利用模板合成视频封面,同时涉及面部特征分析如闭眼判断和颜值计算,以提升视频内容的个性化和智能化。
最低0.47元/天 解锁文章

2083

被折叠的 条评论
为什么被折叠?



