bilive项目中的视频上传队列重复记录问题分析与解决
问题背景与痛点分析
在bilive项目的实际使用中,用户经常遇到视频上传队列重复记录的问题。当系统处理直播录制、渲染、字幕识别等复杂流程时,偶尔会出现同一个视频文件被多次插入到上传队列中的情况,导致:
- 重复上传:同一个视频被多次尝试上传到B站
- 资源浪费:消耗额外的网络带宽和系统资源
- 数据不一致:数据库中出现重复记录,影响系统稳定性
- 上传失败:B站API可能因重复内容而拒绝上传
技术架构深度解析
上传队列核心组件
bilive项目的上传队列系统基于SQLite数据库构建,主要包含以下核心模块:
数据库表结构设计
-- upload_queue 表结构
CREATE TABLE upload_queue (
id INTEGER PRIMARY KEY AUTOINCREMENT,
video_path TEXT,
locked INTEGER DEFAULT 0
);
-- 唯一索引确保视频路径不重复
CREATE UNIQUE INDEX idx_video_path ON upload_queue(video_path);
重复记录问题根源分析
1. 并发处理导致的竞态条件
在pipeline模式下,多个处理线程可能同时尝试插入相同的视频路径:
# src/burn/render_video.py 中的潜在问题
def render_video():
# 多个线程可能同时执行到此处
if not insert_upload_queue(format_video_path):
scan_log.error("Insert upload queue failed")
2. 异常处理不完善
当insert_upload_queue函数因唯一约束冲突返回False时,系统仅记录错误但未采取进一步措施:
# src/db/conn.py
def insert_upload_queue(video_path: str):
try:
db = connect()
cursor = db.cursor()
cursor.execute(
"insert into upload_queue (video_path) values (?);", (video_path,)
)
db.commit()
db.close()
return True
except sqlite3.IntegrityError:
print("Insert Upload Queue failed, the video path already exists.")
return False # 仅返回False,未处理重复问题
3. 文件路径规范化不一致
不同的模块可能对同一视频文件生成不同的路径格式,导致唯一索引失效:
# src/burn/render_then_merge.py
def normalize_video_path(filepath):
"""路径规范化函数,但可能与其他模块不一致"""
parts = filepath.rsplit("/", 1)[-1].split("_")
date_time_parts = parts[1].split("-")
new_date_time = f"{date_time_parts[0][:4]}-{date_time_parts[0][4:6]}-{date_time_parts[0][6:8]}-{date_time_parts[1]}-{date_time_parts[2]}"
return filepath.rsplit("/", 1)[0] + "/" + parts[0] + "_" + new_date_time + "-.mp4"
解决方案与实现
方案一:增强型插入函数(推荐)
修改insert_upload_queue函数,实现"插入或忽略"的原子操作:
def insert_upload_queue(video_path: str):
"""
增强型队列插入函数,处理重复记录问题
Args:
video_path: 视频文件路径
Returns:
bool: 是否成功插入或已存在
"""
try:
db = connect()
cursor = db.cursor()
# 先检查是否已存在
cursor.execute(
"SELECT 1 FROM upload_queue WHERE video_path = ? AND locked = 0;",
(video_path,)
)
existing = cursor.fetchone()
if existing:
# 记录已存在且未锁定,返回True表示无需重复插入
db.close()
return True
# 使用INSERT OR IGNORE避免重复
cursor.execute(
"INSERT OR IGNORE INTO upload_queue (video_path) VALUES (?);",
(video_path,)
)
db.commit()
# 验证是否插入成功
cursor.execute(
"SELECT 1 FROM upload_queue WHERE video_path = ?;",
(video_path,)
)
inserted = cursor.fetchone()
db.close()
return inserted is not None
except sqlite3.Error as e:
print(f"Database error in insert_upload_queue: {e}")
return False
方案二:统一路径规范化标准
创建统一的路径处理工具函数:
# src/utils/path_utils.py
import os
from datetime import datetime
def normalize_video_path(filepath):
"""
统一视频路径规范化标准
Args:
filepath: 原始文件路径
Returns:
str: 标准化后的路径
"""
if not filepath:
return filepath
# 提取文件名和目录
directory = os.path.dirname(filepath)
filename = os.path.basename(filepath)
# 统一处理逻辑
if filename.endswith('.flv'):
# 切片视频保持原样
return filepath
else:
# 主视频文件标准化
parts = filename.split('_')
if len(parts) >= 2 and '-' in parts[1]:
try:
# 统一时间格式: YYYY-MM-DD-HH-MM-SS
time_part = parts[1]
if len(time_part) == 15: # YYYYMMDD-HHMMSS
date_str = time_part[:8]
time_str = time_part[9:]
normalized_time = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:8]}-{time_str[:2]}-{time_str[2:4]}-{time_str[4:6]}"
new_filename = f"{parts[0]}_{normalized_time}.mp4"
return os.path.join(directory, new_filename)
except (ValueError, IndexError):
pass
# 无法标准化时返回原路径
return filepath
方案三:添加分布式锁机制
对于高并发场景,添加基于文件系统的锁机制:
import fcntl
import time
from pathlib import Path
def with_upload_queue_lock(func):
"""
上传队列操作分布式锁装饰器
"""
def wrapper(video_path, *args, **kwargs):
lock_file = Path("/tmp/bilive_upload_queue.lock")
try:
with open(lock_file, 'w') as f:
# 获取排他锁
fcntl.flock(f, fcntl.LOCK_EX)
# 执行原函数
result = func(video_path, *args, **kwargs)
# 释放锁
fcntl.flock(f, fcntl.LOCK_UN)
return result
except (IOError, BlockingIOError):
# 获取锁失败,等待重试
time.sleep(0.1)
return wrapper(video_path, *args, **kwargs)
return wrapper
# 应用锁装饰器
@with_upload_queue_lock
def safe_insert_upload_queue(video_path: str):
return insert_upload_queue(video_path)
完整解决方案实施步骤
1. 数据库迁移脚本
# scripts/migrate_upload_queue.py
import sqlite3
import os
from src.db.conn import DATA_BASE_FILE
def migrate_upload_queue():
"""清理重复记录并重建索引"""
try:
db = sqlite3.connect(DATA_BASE_FILE)
cursor = db.cursor()
# 临时禁用索引
cursor.execute("DROP INDEX IF EXISTS idx_video_path;")
# 删除重复记录
cursor.execute("""
DELETE FROM upload_queue
WHERE id NOT IN (
SELECT MIN(id)
FROM upload_queue
GROUP BY video_path
);
""")
# 重建唯一索引
cursor.execute("""
CREATE UNIQUE INDEX IF NOT EXISTS idx_video_path
ON upload_queue(video_path);
""")
db.commit()
db.close()
print("数据库迁移完成,重复记录已清理")
except sqlite3.Error as e:
print(f"数据库迁移失败: {e}")
if __name__ == "__main__":
migrate_upload_queue()
2. 监控与日志增强
# src/monitoring/queue_monitor.py
import time
from datetime import datetime
from src.db.conn import get_all_upload_queue
from src.log.logger import upload_log
class UploadQueueMonitor:
def __init__(self, check_interval=300): # 5分钟检查一次
self.check_interval = check_interval
self.duplicate_count = 0
def check_duplicates(self):
"""检查并报告重复记录"""
queues = get_all_upload_queue()
path_count = {}
for queue in queues:
video_path = queue['video_path']
path_count[video_path] = path_count.get(video_path, 0) + 1
duplicates = {k: v for k, v in path_count.items() if v > 1}
if duplicates:
self.duplicate_count += 1
upload_log.warning(
f"发现 {len(duplicates)} 个重复记录,总计 {self.duplicate_count} 次重复事件"
)
for path, count in duplicates.items():
upload_log.debug(f"重复路径: {path}, 出现次数: {count}")
return len(duplicates)
def start_monitoring(self):
"""启动监控循环"""
while True:
self.check_duplicates()
time.sleep(self.check_interval)
性能优化与最佳实践
并发处理优化
# 使用线程安全的队列操作
from threading import Lock
class UploadQueueManager:
_instance = None
_lock = Lock()
def __new__(cls):
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance.queue_lock = Lock()
return cls._instance
@with_upload_queue_lock
def safe_insert(self, video_path):
"""线程安全的队列插入"""
return insert_upload_queue(video_path)
批量处理优化
def batch_process_upload_queue(batch_size=10):
"""批量处理上传队列,减少数据库操作"""
queues = get_all_upload_queue()
processed_paths = set()
batch = []
for queue in queues:
if queue['video_path'] not in processed_paths:
processed_paths.add(queue['video_path'])
batch.append(queue)
if len(batch) >= batch_size:
process_batch(batch)
batch = []
if batch:
process_batch(batch)
def process_batch(batch):
"""处理批量上传任务"""
# 实现批量上传逻辑
pass
测试方案与验证
单元测试用例
# tests/test_upload_queue.py
import unittest
import tempfile
import os
from src.db.conn import insert_upload_queue, delete_upload_queue
from src.db.conn import get_all_upload_queue
class TestUploadQueue(unittest.TestCase):
def setUp(self):
# 创建测试数据库
self.test_db = tempfile.NamedTemporaryFile(delete=False)
os.environ['TEST_DB_PATH'] = self.test_db.name
def test_duplicate_insertion(self):
"""测试重复插入处理"""
test_path = "/test/video.mp4"
# 第一次插入应该成功
result1 = insert_upload_queue(test_path)
self.assertTrue(result1)
# 第二次插入应该处理重复(返回True表示已存在)
result2 = insert_upload_queue(test_path)
self.assertTrue(result2)
# 检查队列中应该只有一条记录
queues = get_all_upload_queue()
self.assertEqual(len(queues), 1)
def test_concurrent_insertion(self):
"""测试并发插入场景"""
# 使用多线程模拟并发
pass
def tearDown(self):
# 清理测试数据库
if os.path.exists(self.test_db.name):
os.unlink(self.test_db.name)
总结与展望
通过本文的分析与解决方案,bilive项目的视频上传队列重复记录问题得到了全面解决。关键改进包括:
- 数据库层面:增强唯一约束和错误处理
- 业务逻辑层面:统一路径标准化和并发控制
- 监控层面:添加实时监控和告警机制
- 测试层面:完善单元测试和集成测试
这些改进不仅解决了当前的重复记录问题,还为系统未来的扩展性和稳定性奠定了坚实基础。建议用户定期运行数据库维护脚本,并启用监控功能以确保系统长期稳定运行。
下一步优化方向:
- 实现基于Redis的分布式锁机制
- 添加自动修复重复记录的定时任务
- 开发Web界面用于队列管理和监控
- 集成Prometheus监控指标
通过持续优化,bilive项目将能够更好地服务于大规模直播录制和自动化上传场景。
创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考



