为以下程序生成对应的tk界面
import os
import cv2
import subprocess
import shutil
import tempfile
from PIL import Image
from tqdm import tqdm
import numpy as np
import queue
import threading
import time
import logging
import sys
# 设置标准输出编码为 UTF-8,确保能显示块字符
sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# ===== CUDA 路径修复 =====
cuda_path = "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.8"
os.environ["CUDA_PATH"] = cuda_path
os.environ["PATH"] = f"{cuda_path}\\bin;{cuda_path}\\libnvvp;{os.environ['PATH']}"
# 添加 DLL 搜索路径
if os.path.exists(cuda_path):
os.add_dll_directory(os.path.join(cuda_path, "bin"))
os.add_dll_directory(os.path.join(cuda_path, "lib", "x64"))
# 现在导入 onnxruntime
import onnxruntime as ort
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def has_ffmpeg():
"""检查系统是否安装了ffmpeg"""
try:
subprocess.run(['ffmpeg', '-version'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return True
except (FileNotFoundError, subprocess.CalledProcessError):
return False
def add_audio_to_video(video_path, original_video_path, output_path, fps):
"""使用ffmpeg为生成的视频添加原始音频"""
if not has_ffmpeg():
logger.warning("ffmpeg未安装,无法添加音频。请安装ffmpeg以支持音频处理。")
shutil.copyfile(video_path, output_path)
return output_path
# 使用临时文件避免覆盖问题
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
temp_path = temp_file.name
try:
# 检查原始视频是否有音频流
check_audio_cmd = [
'ffprobe',
'-v', 'error',
'-select_streams', 'a',
'-show_entries', 'stream=codec_type',
'-of', 'default=noprint_wrappers=1:nokey=1',
original_video_path
]
result = subprocess.run(check_audio_cmd, capture_output=True, text=True)
has_audio = 'audio' in result.stdout
if not has_audio:
logger.warning(f"原始视频 '{os.path.basename(original_video_path)}' 没有音频轨道")
shutil.copyfile(video_path, output_path)
return output_path
# 合并音频 - 使用H.264编码和yuv420p像素格式
cmd = [
'ffmpeg',
'-y', # 覆盖输出文件
'-r', str(fps), # 添加帧率
'-i', video_path, # 无声视频
'-i', original_video_path, # 原始视频(包含音频)
'-c:v', 'libx264', # 使用H.264编码
'-preset', 'fast', # 编码速度预设
'-crf', '23', # 质量控制
'-pix_fmt', 'yuv420p', # 视频号要求的像素格式
'-c:a', 'aac', # 音频编码
'-b:a', '128k', # 音频比特率
'-map', '0:v:0', # 选择第一个视频流
'-map', '1:a:0', # 选择第二个文件的音频流
'-shortest', # 以最短流结束
'-movflags', '+faststart', # 流媒体优化
temp_path
]
# 执行命令并完全隐藏输出
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# 移动临时文件到最终位置
shutil.move(temp_path, output_path)
return output_path
except subprocess.CalledProcessError as e:
logger.error(f"音频合并失败: {e}")
logger.warning("将使用无声视频")
shutil.copyfile(video_path, output_path)
return output_path
finally:
# 清理临时文件
if os.path.exists(temp_path):
os.remove(temp_path)
class Videocap:
def __init__(self, video, model_name, limit=1280):
self.model_name = model_name
vid = cv2.VideoCapture(video)
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.total = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
self.fps = vid.get(cv2.CAP_PROP_FPS)
self.ori_width, self.ori_height = width, height
max_edge = max(width, height)
scale_factor = limit / max_edge if max_edge > limit else 1.
height = int(round(height * scale_factor))
width = int(round(width * scale_factor))
self.width, self.height = self.to_16s(width), self.to_16s(height) # 修改为16的倍数
self.count = 0
self.cap = vid
self.ret, frame = self.cap.read()
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
self.q = queue.Queue(maxsize=100)
t = threading.Thread(target=self._reader)
t.daemon = True
t.start()
def _reader(self):
while True:
self.ret, frame = self.cap.read()
if not self.ret:
break
frame = np.asarray(self.process_frame(frame, self.width, self.height))
self.q.put(frame)
self.count += 1
self.cap.release()
def read(self):
try:
# 添加超时机制防止阻塞
f = self.q.get(timeout=5.0)
self.q.task_done()
return f
except queue.Empty:
return None
def to_16s(self, x):
"""确保分辨率是16的倍数(H.264兼容性要求)"""
if x < 256:
return 256
# 确保宽度和高度都能被16整除
return x - x % 16
def process_frame(self, img, width, height):
img = Image.fromarray(img[:, :, ::-1]).resize((width, height), Image.Resampling.BILINEAR)
img = np.array(img).astype(np.float32) / 127.5 - 1.0
return np.expand_dims(img, axis=0)
class Cartoonizer():
def __init__(self, model_path, device="gpu"):
self.model_path = model_path
self.device = device
self.name = os.path.basename(model_path).rsplit('.', 1)[0]
# 根据 CUDA 版本选择合适的提供器
cuda_version = self.get_cuda_version()
logger.info(f"检测到 CUDA 版本: {cuda_version}")
if device.lower() == "gpu" and ort.get_device() == 'GPU':
if cuda_version.startswith("11"):
# CUDA 11.x 配置
providers = ['CUDAExecutionProvider']
provider_options = [{
'device_id': 0,
'arena_extend_strategy': 'kSameAsRequested',
'cudnn_conv_algo_search': 'HEURISTIC',
'do_copy_in_default_stream': True,
}]
logger.info("使用 CUDA 11.x 优化配置")
else:
# CUDA 12.x 配置
providers = ['CUDAExecutionProvider']
provider_options = [{'device_id': 0}]
logger.info("使用 CUDA 12.x 配置")
else:
providers = ['CPUExecutionProvider']
provider_options = None
logger.warning("使用 CPU 模式,处理速度可能较慢")
# 会话选项 - 启用所有优化
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
sess_options.enable_mem_pattern = True
sess_options.enable_cpu_mem_arena = True
# 创建推理会话
self.sess_land = ort.InferenceSession(
model_path,
sess_options=sess_options,
providers=providers,
provider_options=provider_options
)
# 打印使用的执行提供器
logger.info(f"使用的执行提供器: {self.sess_land.get_providers()}")
def get_cuda_version(self):
"""获取 CUDA 版本"""
try:
# 尝试从环境变量获取
cuda_path = os.environ.get('CUDA_PATH', '')
if cuda_path:
version = os.path.basename(cuda_path).replace('v', '')
if version.replace('.', '').isdigit():
return version
# 尝试执行 nvcc 命令
nvcc_path = shutil.which("nvcc")
if nvcc_path:
output = subprocess.check_output([nvcc_path, '--version'], stderr=subprocess.STDOUT)
version_line = [line for line in output.decode().split('\n') if 'release' in line][0]
return version_line.split('release')[-1].strip().split(',')[0]
return "未知"
except Exception as e:
logger.error(f"获取 CUDA 版本失败: {str(e)}")
return "未知"
def post_precess(self, img, wh):
img = (img.squeeze() + 1.) / 2 * 255
img = img.clip(0, 255).astype(np.uint8)
img = Image.fromarray(img).resize((wh[0], wh[1]), Image.Resampling.BILINEAR)
img = np.array(img).astype(np.uint8)
return img
def process_video(self, video_path, output_path):
"""处理视频并添加原始音频"""
# 创建临时无声视频文件
temp_dir = tempfile.mkdtemp()
temp_video_path = os.path.join(temp_dir, "temp_no_audio.mp4")
try:
# 处理视频(无声)
vid = Videocap(video_path, self.name)
# 使用FFmpeg进行高效视频编码
if has_ffmpeg():
logger.info(f"处理视频: {os.path.basename(video_path)} - 使用FFmpeg进行视频编码")
try:
# 构建FFmpeg命令 -
cmd = [
'ffmpeg',
'-y',
# '-hwaccel', 'cuda', # 启用 CUDA 硬件加速
# '-hwaccel_output_format', 'cuda', # 指定硬件加速输出格式为 CUDA
# '-threads', '12', # 指定线程数
# '-b:v', '10M', # 设置视频比特率
'-loglevel', 'error', # 只显示错误信息
'-f', 'rawvideo',
'-vcodec', 'rawvideo',
'-s', f'{vid.ori_width}x{vid.ori_height}',
'-pix_fmt', 'rgb24',
'-r', str(vid.fps),
'-i', '-',
'-an', # 无音频
'-c:v', 'h264_nvenc',#libx264
'-preset', 'fast',
'-crf', '23',
'-pix_fmt', 'yuv420p',
'-movflags', '+faststart',
temp_video_path
]
# 启动FFmpeg进程 - 完全隐藏输出
ffmpeg_proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
num = vid.total
# 使用块字符的进度条
pbar = tqdm(total=vid.total,
desc=f"处理进度",
mininterval=0.5,
maxinterval=1.0,
ascii=False,
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')
try:
while num > 0:
frame = vid.read()
if frame is None:
# 处理读取失败
time.sleep(0.1)
continue
fake_img = self.sess_land.run(None, {self.sess_land.get_inputs()[0].name: frame})[0]
fake_img = self.post_precess(fake_img, (vid.ori_width, vid.ori_height))
# 转换为RGB格式并写入FFmpeg
rgb_frame = fake_img[:, :, ::-1] # BGR to RGB
ffmpeg_proc.stdin.write(rgb_frame.tobytes())
pbar.update(1)
num -= 1
finally:
pbar.close()
# 关闭FFmpeg进程
ffmpeg_proc.stdin.close()
retcode = ffmpeg_proc.wait()
if retcode != 0:
raise subprocess.CalledProcessError(retcode, cmd)
except Exception as e:
logger.error(f"FFmpeg编码失败: {e}")
logger.warning("将回退到OpenCV编码")
self._fallback_video_encoding(vid, temp_video_path)
else:
logger.warning("FFmpeg不可用,使用OpenCV编码")
self._fallback_video_encoding(vid, temp_video_path)
# 添加原始音频
logger.info("添加音频到视频...")
return add_audio_to_video(temp_video_path, video_path, output_path, vid.fps)
finally:
# 清理临时文件
if os.path.exists(temp_video_path):
os.remove(temp_video_path)
shutil.rmtree(temp_dir, ignore_errors=True)
def _fallback_video_encoding(self, vid, output_path):
"""OpenCV回退编码方案"""
# 尝试不同的H.264编码器
codec_options = ['avc1', 'h264', 'x264']
video_out = None
for codec in codec_options:
try:
fourcc = cv2.VideoWriter_fourcc(*codec)
video_out = cv2.VideoWriter(
output_path,
fourcc,
vid.fps,
(vid.ori_width, vid.ori_height)
)
if video_out.isOpened():
logger.info(f"使用 {codec} 编码器")
break
except:
video_out = None
if video_out is None or not video_out.isOpened():
# 如果所有H.264编码器都失败,使用默认编码器
logger.warning("H.264编码器不可用,将使用默认编码器")
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_out = cv2.VideoWriter(
output_path,
fourcc,
vid.fps,
(vid.ori_width, vid.ori_height)
)
num = vid.total
# 使用块字符的进度条
pbar = tqdm(total=vid.total,
desc=f"处理进度 (OpenCV)",
mininterval=0.5,
maxinterval=1.0,
ascii=False,
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')
try:
while num > 0:
frame = vid.read()
if frame is None:
# 处理读取失败
time.sleep(0.1)
continue
fake_img = self.sess_land.run(None, {self.sess_land.get_inputs()[0].name: frame})[0]
fake_img = self.post_precess(fake_img, (vid.ori_width, vid.ori_height))
video_out.write(fake_img[:, :, ::-1]) # RGB to BGR
pbar.update(1)
num -= 1
finally:
pbar.close()
video_out.release()
def process_image(self, image_path, output_path):
img = cv2.imread(image_path)
if img is None:
logger.error(f"无法读取图片: {image_path}")
return None
ori_height, ori_width = img.shape[:2]
# 计算目标尺寸
max_edge = max(ori_width, ori_height)
scale_factor = 1280 / max_edge if max_edge > 1280 else 1.
height = int(round(ori_height * scale_factor))
width = int(round(ori_width * scale_factor))
# 确保分辨率是16的倍数
width = width - width % 16
height = height - height % 16
# 预处理图片
img_rgb = Image.fromarray(img[:, :, ::-1]).resize((width, height), Image.Resampling.BILINEAR)
img_np = np.array(img_rgb).astype(np.float32) / 127.5 - 1.0
input_data = np.expand_dims(img_np, axis=0)
# 运行模型
fake_img = self.sess_land.run(None, {self.sess_land.get_inputs()[0].name: input_data})[0]
# 后处理
result_img = self.post_precess(fake_img, (ori_width, ori_height))
# 保存结果
cv2.imwrite(output_path, result_img[:, :, ::-1])
return output_path
def videopic_to_new(params):
"""
将输入目录中的图片和视频转换为动漫风格
参数:
params (dict): 包含以下键的字典:
"video_dir": 输入目录路径
"model": ONNX模型文件路径
"output_dir": 输出目录路径
"device": (可选) 运行设备 ("cpu" 或 "gpu"),默认为 "gpu"
"""
# 从参数中提取值
input_dir = params["video_dir"]
model_path = params["model"]
output_dir = params["output_dir"]
device = params.get("device", "gpu")
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
# 检查ffmpeg是否可用
if has_ffmpeg():
logger.info("ffmpeg已安装,将自动为视频添加音频并使用高效编码")
else:
logger.warning("ffmpeg未安装,生成的视频将没有声音且编码效率较低")
# 初始化卡通化器
cartoonizer = Cartoonizer(model_path, device)
model_name = os.path.basename(model_path).rsplit('.', 1)[0]
# 支持的媒体格式
image_exts = ['.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.webp']
video_exts = ['.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv']
# 处理所有文件
processed_files = []
for filename in os.listdir(input_dir):
filepath = os.path.join(input_dir, filename)
if not os.path.isfile(filepath):
continue
ext = os.path.splitext(filename)[1].lower()
output_name = f"{os.path.splitext(filename)[0]}_{model_name}{ext}"
output_path = os.path.join(output_dir, output_name)
try:
if ext in image_exts:
logger.info(f"\n处理图片: {filename}")
result = cartoonizer.process_image(filepath, output_path)
if result:
processed_files.append(result)
logger.info(f"保存为: {output_name}")
elif ext in video_exts:
logger.info(f"\n处理视频: {filename}")
result = cartoonizer.process_video(filepath, output_path)
if result:
processed_files.append(result)
logger.info(f"保存为: {output_name}")
except Exception as e:
logger.error(f"处理 {filename} 时出错: {str(e)}")
logger.info("\n处理完成。")
logger.info(f"共处理文件: {len(processed_files)}")
return processed_files
def check_video_format(path):
"""检查视频格式是否符合要求"""
if not has_ffmpeg():
logger.warning("无法检查视频格式: ffmpeg未安装")
return
cmd = ['ffprobe', '-v', 'error', '-select_streams', 'v:0',
'-show_entries', 'stream=codec_name,pix_fmt', '-of',
'csv=p=0', path]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
return result.stdout.strip()
except Exception as e:
return f"检查失败: {str(e)}"
if __name__ == "__main__":
# 构建参数字典
params = {
"video_dir": r"D:\Personal\Downloads\611",
"model": r"E:\python成品\15视频转绘\AnimeGANv3-1.1.0\AnimeGANv3-1.1.0\deploy\AnimeGANv3_Hayao_36.onnx",
"output_dir": r"E:\软件视频类型测试\1带货测试\成品\成品",
"device": "gpu" # 可选,默认为gpu
}
# 执行转换
results = videopic_to_new(params)
# 打印结果
logger.info("\n处理后的文件:")
for res in results:
logger.info(f" - {res}")
# 检查视频格式
if res.lower().endswith(('.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv')):
logger.info(f" 视频格式: {check_video_format(res)}")
最新发布