目录
2.5 最后一个动作区间的最后一个视频帧之前的内容不再显示(最好用)
4.1 mmaction2训练TSN模型脚踢/非脚踢 两个分类的动作识别,需要多少训练和验证样本、需要多少正负样本?各需要多少视频才能效果好
一、前言
《动作识别5——mmaction2的训练和测试》里面说了官方文档介绍的使用kinetics400_tiny数据进行训练的demo。那我们就模仿kinetics400_tiny搞个数据集,然后训练TSN试试。
这一篇的内容主要是写如何构建一个标注的工具以及训练和测试和推理。这一篇训练的部分我只是先把训练跑起来,不代表是最佳训练方式,这个训练准确率怎么提高上去,后面的文章会去考虑。 我也是探索式的训练,如果你有更好的训练建议,可以讨论一下。
二、找视频和剪视频
找视频然后下载视频就不说了,去找那种在线下载B站视频的网址。说一下怎么剪视频。虽然我们在《动作识别1——2D姿态估计+ 几何分析 + 有限状态机》第三节的工具中有视频剪辑和视频转图片文件夹的代码,但是我觉得那个剪视频的代码不是很好用啊,因为还得知道要剪视频的帧号范围。所以我在2.2中搞了个图像二分类标注工具代码,并进行了一些修改。
如果你是要做图像的二分类你可以直接用2.1和2.3的代码,如果你是要做视频动作识别的二分类,你可以直接用2.1和2.5的代码。
2.6和2.7因为不好用我已经废弃,但由于它们在特定情况下可能也是有用,所以没删它们。为什么2.6和2.7废弃呢?你可以去看一下2.4的问题描述就知道,实际上当时为了解决2.4中产生的问题,弄了2.6和2.7的代码,后来发现了一个更好的思路,就补充了2.5。
2.1 视频转图片文件夹
首先,我搞了个视频转图片文件夹的代码。然后我把下载的视频都放在D:\zero_track\mmaction2\input_videos,然后在mmaction2新建了一个my_tools文件夹,里面放一个mp4tojpg.py,下面脚本就会在视频的所在路径下生成一个跟视频名称一样的图片文件夹,比方说test1.mp4就会生成一个test1文件夹。
#!/usr/bin/env python3
import cv2
import os
import argparse
# ---------- 可自定义 ----------
VIDEO_EXTS = ('.mp4', '.avi', '.mov', '.mkv') # 支持的视频扩展名
# ----------------------------
def extract_frames(video_path):
"""把单个视频拆帧成图片文件夹"""
base_dir = os.path.dirname(video_path)
video_name = os.path.splitext(os.path.basename(video_path))[0]
output_dir = os.path.join(base_dir, video_name)
os.makedirs(output_dir, exist_ok=True)
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f'[WARN] 无法打开视频: {video_path}')
return 0
frame_idx = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_path = os.path.join(output_dir, f'{frame_idx:06d}.jpg')
cv2.imwrite(frame_path, frame)
frame_idx += 1
cap.release()
print(f'[INFO] 提取完成: {frame_idx} 张 -> {output_dir}')
return frame_idx
def walk_and_extract(root_dir):
"""递归遍历目录,对所有视频拆帧"""
count = 0
for dirpath, _, filenames in os.walk(root_dir):
for file in filenames:
if file.lower().endswith(VIDEO_EXTS):
video_path = os.path.join(dirpath, file)
extract_frames(video_path)
count += 1
print(f'[INFO] 批量提取完成,共处理 {count} 个视频')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='视频逐帧提取为图片(支持单文件或整目录)')
# 如果要将一个视频文件夹里面的视频都转图片,input_path设为None并且修改下面args.dir的路径
# 如果只要将一个视频转图片,直接设置input_path为 .mp4的路径
input_path = None
parser.add_argument('--input', default=input_path, help='单个视频路径')
parser.add_argument('--dir', help='批量提取目录')
args = parser.parse_args()
# 缺省行为:如果两个都没给,就默认用指定目录
if args.input is None and args.dir is None:
# 修改这里为你的视频文件夹目录
args.dir = r'D:\zero_track\mmaction2\input_videos' # 修改args.dir的路径
# 互斥检查
if args.input is not None and args.dir is not None:
parser.error('不能同时指定 --input 和 --dir,请二选一')
# 执行
if args.input:
extract_frames(args.input)
else:
walk_and_extract(args.dir)
2.2 图像二分类标注工具
然后,我搞了一个图像二分类的标注工具
import os
import pygame
import sys
import shutil
import time
import json
from pygame.locals import *
# 初始化pygame
pygame.init()
# 配置参数
SCREEN_WIDTH, SCREEN_HEIGHT = pygame.display.Info().current_w, pygame.display.Info().current_h
WINDOW_WIDTH, WINDOW_HEIGHT = SCREEN_WIDTH - 100, SCREEN_HEIGHT - 100
BG_COLOR = (40, 44, 52)
TEXT_COLOR = (220, 220, 220)
HIGHLIGHT_COLOR = (97, 175, 239)
BUTTON_COLOR = (56, 58, 66)
BUTTON_HOVER_COLOR = (72, 74, 82)
WARNING_COLOR = (255, 152, 0)
CONFIRM_COLOR = (76, 175, 80)
# 创建窗口
screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("图像分类标注工具")
# 字体
font = pygame.font.SysFont("SimHei", 24)
small_font = pygame.font.SysFont("SimHei", 18)
class ImageLabelingTool:
def __init__(self, root_path):
self.root_path = root_path
self.folders = [] # 所有含图片的文件夹绝对路径
self.current_folder_index = 0 # 当前文件夹索引
self.images = [] # 当前文件夹内所有图片绝对路径
self.current_image_index = 0 # 当前图片索引
self.labels = {} # 路径 -> 'positive' / 'negative'
# 标记状态
self.continuous_mode = False # 是否处于连续标记模式
self.continuous_label = None # 连续标记时统一的标签
self.continuous_start_index = None # 连续标记起始索引
# 键盘长按状态
self.key_pressed = {"left": False, "right": False}
self.last_key_time = 0 # 长按重复计时
self.key_repeat_delay = 0.8 # 初始延迟增加到0.8秒
self.key_repeat_interval = 0.15 # 重复间隔增加到0.15秒
# 操作历史(用于撤销)
self.undo_stack = []
self.max_undo_steps = 50
# 确认对话框状态
self.show_confirm_dialog = False
self.confirm_message = ""
self.confirm_action = "" # 标记确认对话框触发动作
# 获取所有包含图片的文件夹
self.find_image_folders()
# 加载当前文件夹的图片
if self.folders:
self.load_current_folder_images()
# 加载保存的标记状态
self.load_labels() # 尝试加载历史标签
def find_image_folders(self):
"""查找所有包含图片的文件夹"""
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for root, dirs, files in os.walk(self.root_path):
has_images = any(file.lower().endswith(image_extensions) for file in files)
if has_images:
self.folders.append(root)
def load_current_folder_images(self):
"""加载当前文件夹中的所有图片"""
folder_path = self.folders[self.current_folder_index]
self.images = []
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for file in os.listdir(folder_path):
if file.lower().endswith(image_extensions):
self.images.append(os.path.join(folder_path, file))
# 按文件名排序
self.images.sort()
self.current_image_index = 0
def get_current_image(self):
"""获取当前图片"""
if not self.images:
return None
return self.images[self.current_image_index]
def next_image(self):
"""切换到下一张图片"""
if self.current_image_index < len(self.images) - 1:
self.save_state() # 保存状态以便撤销
self.current_image_index += 1
return True
return False
def prev_image(self):
"""切换到上一张图片"""
if self.current_image_index > 0:
self.current_image_index -= 1
return True
return False
def label_current_image(self, label):
"""标记当前图片"""
current_image= self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
self.labels[current_image] = label
# 自动保存标记状态
self.save_labels()
def start_continuous_labeling(self):
"""开始连续标记"""
current_image = self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
# 如果当前图片已经有标签,使用该标签
if current_image in self.labels:
self.continuous_label = self.labels[current_image]
else:
# 如果没有标签,默认为正样本
self.continuous_label = "positive"
self.labels[current_image] = self.continuous_label
self.continuous_mode = True
self.continuous_start_index = self.current_image_index
# 自动保存标记状态
self.save_labels()
return True
return False
def end_continuous_labeling(self):
"""结束连续标记"""
if self.continuous_mode and self.continuous_start_index is not None:
self.save_state() # 保存状态以便撤销
start = min(self.continuous_start_index, self.current_image_index)
end = max(self.continuous_start_index, self.current_image_index)
for i in range(start, end + 1):
self.labels[self.images[i]] = self.continuous_label
self.continuous_mode = False
self.continuous_start_index = None
# 自动保存标记状态
self.save_labels()
return True
return False
def move_labeled_files(self, positive_dir, negative_dir):
"""移动已标记的文件到正负样本文件夹"""
if not os.path.exists(positive_dir):
os.makedirs(positive_dir)
if not os.path.exists(negative_dir):
os.makedirs(negative_dir)
moved_count = 0
files_to_remove = []
for img_path, label in self.labels.items():
if label in ["positive", "negative"] and os.path.exists(img_path):
filename = os.path.basename(img_path)
dest_dir = positive_dir if label == "positive" else negative_dir
# 处理文件名冲突
counter = 1
base_name, ext = os.path.splitext(filename)
new_filename = filename
while os.path.exists(os.path.join(dest_dir, new_filename)):
new_filename = f"{base_name}_{counter}{ext}"
counter += 1
try:
shutil.move(img_path, os.path.join(dest_dir, new_filename))
moved_count += 1
files_to_remove.append(img_path)
except Exception as e:
print(f"移动文件失败: {e}")
# 从标签字典中移除已移动的文件
for img_path in files_to_remove:
if img_path in self.labels:
del self.labels[img_path]
# 更新当前文件夹的图片列表
self.load_current_folder_images()
# 自动保存标记状态
self.save_labels()
return moved_count
def next_folder(self):
"""切换到下一个文件夹"""
if self.current_folder_index < len(self.folders) - 1:
# 检查当前文件夹是否有未移动的标记文件
current_folder = self.folders[self.current_folder_index]
has_unmoved_labels = any(
img_path.startswith(current_folder) and os.path.exists(img_path)
for img_path in self.labels.keys()
)
if has_unmoved_labels:
# 显示确认对话框
self.show_confirm_dialog = True
self.confirm_action = "next_folder"
self.confirm_message = "当前文件夹有未移动的标记文件,确定要切换到下一个文件夹吗?"
return False
else:
# 直接切换文件夹
self.current_folder_index += 1
self.load_current_folder_images()
return True
return False
def prev_folder(self):
"""切换到上一个文件夹"""
if self.current_folder_index > 0:
self.current_folder_index -= 1
self.load_current_folder_images()
return True
return False
def handle_key_repeats(self):
"""处理方向键长按"""
current_time = time.time()
# 检查是否需要触发按键重复
if any(self.key_pressed.values()):
# 如果是第一次按下,等待较长时间
if self.last_key_time == 0:
if current_time - self.key_pressed_time > self.key_repeat_delay:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
# 后续重复,使用较短的间隔
elif current_time - self.last_key_time > self.key_repeat_interval:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
def save_state(self):
"""保存当前状态以便撤销"""
if len(self.undo_stack) >= self.max_undo_steps:
self.undo_stack.pop(0) # 移除最旧的状态
state = {
"current_image_index": self.current_image_index,
"labels": self.labels.copy(),
"continuous_mode": self.continuous_mode,
"continuous_start_index": self.continuous_start_index,
"continuous_label": self.continuous_label
}
self.undo_stack.append(state)
def undo(self):
"""撤销上一次操作"""
if self.undo_stack:
state = self.undo_stack.pop()
self.current_image_index = state["current_image_index"]
self.labels = state["labels"]
self.continuous_mode = state["continuous_mode"]
self.continuous_start_index = state["continuous_start_index"]
self.continuous_label = state["continuous_label"]
return True
return False
def save_labels(self):
"""保存标记状态到文件"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
try:
# 只保存仍然存在的文件的标记
existing_labels = {k: v for k, v in self.labels.items() if os.path.exists(k)}
with open(labels_file, 'w') as f:
json.dump(existing_labels, f)
except Exception as e:
print(f"保存标记状态失败: {e}")
def load_labels(self):
"""从文件加载标记状态"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
if os.path.exists(labels_file):
try:
with open(labels_file, 'r') as f:
self.labels = json.load(f)
except Exception as e:
print(f"加载标记状态失败: {e}")
def draw_button(screen, text, rect, hover=False, color=None):
"""绘制按钮"""
if color is None:
color = BUTTON_HOVER_COLOR if hover else BUTTON_COLOR
# 先画主体
pygame.draw.rect(screen, color, rect, border_radius=5)
# 再画边框
pygame.draw.rect(screen, (100, 100, 100), rect, 2, border_radius=5)
# 文字居中
text_surface= small_font.render(text, True, TEXT_COLOR)
txt_rect = text_surface.get_rect(center=rect.center)
screen.blit(text_surface, txt_rect)
def draw_confirm_dialog(screen, message, width=400, height=200):
"""绘制确认对话框"""
dialog_rect = pygame.Rect(
(WINDOW_WIDTH - width) // 2,
(WINDOW_HEIGHT - height) // 2,
width, height
)
# 绘制对话框背景
pygame.draw.rect(screen, BG_COLOR, dialog_rect, border_radius=10)
pygame.draw.rect(screen, TEXT_COLOR, dialog_rect, 2, border_radius=10)
# 绘制消息
lines = []
words = message.split()
current_line = ""
for word in words:
test_line = current_line + word + " "
if small_font.size(test_line)[0] < width - 40:
current_line = test_line
else:
lines.append(current_line)
current_line = word + " "
if current_line:
lines.append(current_line)
for i, line in enumerate(lines):
text_surface = small_font.render(line, True, TEXT_COLOR)
screen.blit(text_surface, (dialog_rect.x + 20, dialog_rect.y + 30 + i * 25))
# 绘制按钮
yes_button= pygame.Rect(dialog_rect.x + width // 2 - 100, dialog_rect.y + height - 50, 80, 30)
no_button = pygame.Rect(dialog_rect.x + width // 2 + 20, dialog_rect.y + height - 50, 80, 30)
draw_button(screen, "是", yes_button, color=CONFIRM_COLOR)
draw_button(screen, "否", no_button, color=WARNING_COLOR)
return dialog_rect, yes_button, no_button
def main():
# 假设的根路径,实际使用时需要修改
root_path = r"D:\zero_track\mmaction2\input_videos\test1"
# 创建标注工具实例
tool = ImageLabelingTool(root_path)
# 创建正负样本输出目录
# positive_dir = os.path.join(root_path, "positive_samples")
# negative_dir = os.path.join(root_path, "negative_samples")
positive_dir = os.path.join(root_path, "1")
negative_dir = os.path.join(root_path, "0")
# 主循环
running = True
clock = pygame.time.Clock()
# 按钮区域 - 分为两行
button_height = 40
button_width = 140
button_margin =15
button_row1_y = WINDOW_HEIGHT - button_height - button_margin
button_row2_y = WINDOW_HEIGHT - 2 * button_height - 2 * button_margin
# 第一行按钮(导航按钮)
nav_buttons = {
"prev": pygame.Rect(button_margin, button_row2_y, button_width, button_height),
"next": pygame.Rect(button_margin * 2 + button_width, button_row2_y, button_width, button_height),
"prev_folder": pygame.Rect(button_margin * 3 + button_width * 2, button_row2_y, button_width, button_height),
"next_folder": pygame.Rect(button_margin * 4 + button_width * 3, button_row2_y, button_width, button_height),
"undo": pygame.Rect(button_margin * 5 + button_width * 4, button_row2_y, button_width, button_height),
}
# 第二行按钮(标注按钮)
label_buttons = {
"positive": pygame.Rect(button_margin, button_row1_y, button_width, button_height),
"negative": pygame.Rect(button_margin * 2 + button_width, button_row1_y, button_width, button_height),
"continuous_start": pygame.Rect(button_margin * 3 + button_width * 2, button_row1_y, button_width, button_height),
"continuous_end": pygame.Rect(button_margin * 4 + button_width * 3, button_row1_y, button_width, button_height),
"move_files": pygame.Rect(button_margin * 5 + button_width * 4, button_row1_y, button_width, button_height),
}
# 图片显示区域
image_area = pygame.Rect(50, 80, WINDOW_WIDTH - 100, WINDOW_HEIGHT - 220)
# 添加按键按下时间记录
tool.key_pressed_time = 0
while running:
mouse_pos = pygame.mouse.get_pos()
# 处理按键重复
tool.handle_key_repeats()
for event in pygame.event.get():
if event.type == QUIT:
running = False
elif event.type == KEYDOWN:
if event.key == K_RIGHT:
tool.key_pressed["right"] = True
tool.key_pressed["left"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.next_image() # 立即响应一次
elif event.key == K_LEFT:
tool.key_pressed["left"] = True
tool.key_pressed["right"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.prev_image() # 立即响应一次
elif event.key == K_w: # 标记为正样本
tool.label_current_image("positive")
elif event.key == K_s: # 标记为负样本
tool.label_current_image("negative")
elif event.key == K_UP: # 开始连续标记
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif event.key == K_DOWN: # 结束连续标记
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif event.key == K_x: # 移动文件
moved = tool.move_labeled_files(positive_dir, negative_dir)
print(f"已移动 {moved} 个文件")
elif event.key == K_c: # 下一个文件夹
tool.next_folder()
elif event.key == K_z: # 上一个文件夹
tool.prev_folder()
elif event.key == K_z and (pygame.key.get_mods() & KMOD_CTRL): # Ctrl+Z 撤销
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
elif event.key == K_ESCAPE: # ESC 键取消确认对话框
if tool.show_confirm_dialog:
tool.show_confirm_dialog = False
elif event.type == KEYUP:
if event.key == K_RIGHT:
tool.key_pressed["right"] = False
tool.last_key_time = 0 # 重置重复计时
elif event.key == K_LEFT:
tool.key_pressed["left"] = False
tool.last_key_time =0 # 重置重复计时
elif event.type == MOUSEBUTTONDOWN:
if event.button == 1: # 左键点击
# 检查是否点击了确认对话框
if tool.show_confirm_dialog:
dialog_rect, yes_button, no_button = draw_confirm_dialog(screen, tool.confirm_message)
if yes_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = False
if tool.confirm_action == "next_folder":
tool.current_folder_index += 1
tool.load_current_folder_images()
elif no_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = Fasle
else:
# 导航按钮
if nav_buttons["prev"].collidepoint(mouse_pos):
tool.prev_image()
elif nav_buttons["next"].collidepoint(mouse_pos):
tool.next_image()
elif nav_buttons["prev_folder"].collidepoint(mouse_pos):
tool.prev_folder()
elif nav_buttons["next_folder"].collidepoint(mouse_pos):
tool.next_folder()
elif nav_buttons["undo"].collidepoint(mouse_pos):
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
# 标注按钮
elif label_buttons["positive"].collidepoint(mouse_pos):
tool.label_current_image("positive")
elif label_buttons["negative"].collidepoint(mouse_pos):
tool.label_current_image("negative")
elif label_buttons["continuous_start"].collidepoint(mouse_pos):
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif label_buttons["continuous_end"].collidepoint(mouse_pos):
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif label_buttons["move_files"].collidepoint(mouse_pos):
moved = tool.move_labeled_files(positive_dir, negative_dir)
print("已移动 {moved} 个文件")
# 清屏
screen.fill(BG_COLOR)
# 显示文件信息
if tool.folders:
folder_text = f"当前文件夹: {os.path.basename(tool.folders[tool.current_folder_index])} ({tool.current_folder_index + 1}/{len(tool.folders)})"
text_surface = small_font.render(folder_text, True, TEXT_COLOR)
screen.blit(text_surface, (20, 20))
# 显示当前图片
current_image_path = tool.get_current_image()
if current_image_path and os.path.exists(current_image_path):
try:
img = pygame.image.load(current_image_path)
img_rect = img.get_rect()
# 缩放图片以适应显示区域
scale = min(image_area.width / img_rect.width, image_area.height / img_rect.height)
new_size = (int(img_rect.width * scale), int(img_rect.height * scale))
img = pygame.transform.smoothscale(img, new_size)
img_rect = img.get_rect(center=image_area.center)
screen.blit(img, img_rect)
# 显示图片信息(在图片上方)
info_text = f"{os.path.basename(current_image_path)} ({tool.current_image_index + 1}/{len(tool.images)})"
if current_image_path in tool.labels:
label = tool.labels[current_image_path]
info_text += f" - 已标记: {'正样本' if label == 'positive' else '负样本'}"
text_surface = font.render(info_text, True, TEXT_COLOR)
text_rect = text_surface.get_rect(center=(WINDOW_WIDTH // 2, image_area.y - 20))
screen.blit(text_surface, text_rect)
# 在连续标记模式下显示标记范围
if tool.continuous_mode and tool.continuous_start_index is not None:
start_idx = min(tool.continuous_start_index, tool.current_image_index)
end_idx = max(tool.continuous_start_index, tool.current_image_index)
range_text = f"标记范围: {start_idx + 1} - {end_idx + 1}"
range_surface = small_font.render(range_text, True, HIGHLIGHT_COLOR)
screen.blit(range_surface, (20, 50))
# 绘制标记范围的指示器
marker_width = image_area.width / len(tool.images)
start_x = image_area.x + start_idx * marker_width
end_x = image_area.x + (end_idx + 1) * marker_width
pygame.draw.rect(screen, HIGHLIGHT_COLOR,
(start_x, image_area.y + image_area.height + 5,
end_x -start_x, 5))
except Exception as e:
error_text = f"无法加载图片: {e}"
text_surface = font.render(error_text, True, (255, 0, 0))
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
else:
no_image_text = "没有图片可显示"
text_surface = font.render(no_image_text, True, TEXT_COLOR)
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
# 显示连续标记状态
if tool.continuous_mode:
mode_text = f"连续标记模式已启动 - 标记类型: {'正样本' if tool.continuous_label == 'positive' else '负样本'}"
text_surface = small_font.render(mode_text, True, HIGHLIGHT_COLOR)
screen.blit(text_surface, (WINDOW_WIDTH - text_surface.get_width() - 20, 50))
# 绘制导航按钮
draw_button(screen, "上一张 (←)", nav_buttons["prev"], nav_buttons["prev"].collidepoint(mouse_pos))
draw_button(screen, "下一张 (→)", nav_buttons["next"], nav_buttons["next"].collidepoint(mouse_pos))
draw_button(screen, "上个文件夹 (z)", nav_buttons["prev_folder"], nav_buttons["prev_folder"].collidepoint(mouse_pos))
draw_button(screen, "下个文件夹 (c)", nav_buttons["next_folder"], nav_buttons["next_folder"].collidepoint(mouse_pos))
draw_button(screen, "撤销 (Ctrl+Z)", nav_buttons["undo"], nav_buttons["undo"].collidepoint(mouse_pos))
# 绘制标注按钮
draw_button(screen, "正样本 (w)", label_buttons["positive"], label_buttons["positive"].collidepoint(mouse_pos))
draw_button(screen, "负样本 (s)", label_buttons["negative"], label_buttons["negative"].collidepoint(mouse_pos))
draw_button(screen, "开始连续标(↑)", label_buttons["continuous_start"], label_buttons["continuous_start"].collidepoint(mouse_pos))
draw_button(screen, "结束连续标(↓)", label_buttons["continuous_end"], label_buttons["continuous_end"].collidepoint(mouse_pos))
draw_button(screen, "移动文件 (x)", label_buttons["move_files"], label_buttons["move_files"].collidepoint(mouse_pos))
# 显示确认对话框
if tool.show_confirm_dialog:
draw_confirm_dialog(screen, tool.confirm_message)
# 更新屏幕
pygame.display.flip()
clock.tick(30)
# 退出前保存标记状态
tool.save_labels()
pygame.quit()
sys.exit()
if __name__ == "__main__":
main()
上面这个工具就是方向键左右控制向前向后查看图片,然后w表示把图片标记为正样本,s表示把图标记为负样本,方向键上表示开始连续标记(如果当前是正样本就是连续标记为正样本,如果当前是负样本就是连续标记为负样本),方向键下表示结束连续标记。x表示移动正负样本到新建的文件夹1和0。注意,我发现我运行这个py程序的时候,搜狗输入法会自动变成中文输入法状态(真坑),要手动shift键切回英文再用鼠标点一下界面,才能确保字母键w、s、x能够正常使用,如果你强制设定电脑默认输入法是英文应该不会有这个问题。
还有就是运行一次之后它会在图片文件夹下面生成labels_backup.json,如果下次运行你不手动删除它,是会加载上一次的结果的。
2.3 改为方向键自动播放
把原先的方向键左和右的单独退后和前进改成a和d,然后方向键的左改成向前播放,右改成向后播放,空格表示暂停播放。
这样我们就可以用方向键去自动播放图片了,不用一直长按,长按太累了,同时我们把原先的方向键的功能改到a和d,也就是说要是觉得自动播放太快了,也可以使用a和d去慢慢看、或者长按播放。
把“单张前进/后退”的热键从方向键改成 A / D
位置:main() 函数里的KEYDOWN分支
原代码:elif event.key == K_RIGHT: tool.key_pressed["right"] = True tool.key_pressed["left"] = False tool.key_pressed_time = time.time() tool.next_image() # 立即响应一次 elif event.key == K_LEFT: tool.key_pressed["left"] = True tool.key_pressed["right"] = False tool.key_pressed_time = time.time() tool.prev_image() # 立即响应一次改成:
elif event.key == K_d: # 单张前进 tool.key_pressed["right"] = True tool.key_pressed["left"] = False tool.key_pressed_time = time.time() tool.next_image() elif event.key == K_a: # 单张后退 tool.key_pressed["left"] = True tool.key_pressed["right"] = False tool.key_pressed_time = time.time() tool.prev_image()对应的
KEYUP分支也同步改掉:elif event.key == K_d: tool.key_pressed["right"] = False tool.last_key_time = 0 elif event.key == K_a: tool.key_pressed["left"] = False tool.last_key_time = 0
把方向键 ← → 变成“自动播放”
仍在KEYDOWN分支,在刚刚改完 A/D 的下面新增:elif event.key == K_RIGHT: # 向后自动播放 tool.play_direction = 1 tool.playing = True tool.last_play_tick = pygame.time.get_ticks() elif event.key == K_LEFT: # 向前自动播放 tool.play_direction = -1 tool.playing = True tool.last_play_tick = pygame.time.get_ticks() elif event.key == K_SPACE: # 暂停/继续 tool.playing = not tool.playing if tool.playing: tool.last_play_tick = pygame.time.get_ticks()
在
ImageLabelingTool.__init__末尾加 3 个状态量
任意位置(例如self.max_undo_steps = 50之后)追加:# 自动播放相关 self.playing = False # 是否处于自动播放 self.play_direction = 1 # 1 下一张,-1 上一张 self.last_play_tick = 0 # 上一次翻片的时间 self.play_interval = 400 # 毫秒,每 0.4 s 翻一张
主循环里真正执行“定时翻片”
在while running:的最前面(靠近handle_key_repeats()即可)插入:# 自动播放逻辑 if tool.playing: now = pygame.time.get_ticks() if now - tool.last_play_tick > tool.play_interval: if tool.play_direction == 1: tool.next_image() else: tool.prev_image() tool.last_play_tick = now
按钮提示文字同步改一下(可选)
把下面两行:draw_button(screen, "上一张 (←)", nav_buttons["prev"], nav_buttons["prev"].collidepoint(mouse_pos)) draw_button(screen, "下一张 (→)", nav_buttons["next"], nav_buttons["next"].collidepoint(mouse_pos))改成:
draw_button(screen, "上一张 (A)", nav_buttons["prev"], nav_buttons["prev"].collidepoint(mouse_pos)) draw_button(screen, "下一张 (D)", nav_buttons["next"], nav_buttons["next"].collidepoint(mouse_pos))
下面是修改之后的完整代码
import os
import pygame
import sys
import shutil
import time
import json
from pygame.locals import *
# 初始化pygame
pygame.init()
# 配置参数
SCREEN_WIDTH, SCREEN_HEIGHT = pygame.display.Info().current_w, pygame.display.Info().current_h
WINDOW_WIDTH, WINDOW_HEIGHT = SCREEN_WIDTH - 100, SCREEN_HEIGHT - 100
BG_COLOR = (40, 44, 52)
TEXT_COLOR = (220, 220, 220)
HIGHLIGHT_COLOR = (97, 175, 239)
BUTTON_COLOR = (56, 58, 66)
BUTTON_HOVER_COLOR = (72, 74, 82)
WARNING_COLOR = (255, 152, 0)
CONFIRM_COLOR = (76, 175, 80)
# 创建窗口
screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("图像分类标注工具")
# 字体
font = pygame.font.SysFont("SimHei", 24)
small_font = pygame.font.SysFont("SimHei", 18)
class ImageLabelingTool:
def __init__(self, root_path):
self.root_path = root_path
self.folders = [] # 所有含图片的文件夹绝对路径
self.current_folder_index = 0 # 当前文件夹索引
self.images = [] # 当前文件夹内所有图片绝对路径
self.current_image_index = 0 # 当前图片索引
self.labels = {} # 路径 -> 'positive' / 'negative'
# 自动播放相关
self.playing = False # 是否处于自动播放
self.play_direction = 1 # 1 下一张,-1 上一张
self.last_play_tick = 0 # 上一次翻片的时间
self.play_interval = 30 # 毫秒,每 0.03 s 翻一张
# 标记状态
self.continuous_mode = False # 是否处于连续标记模式
self.continuous_label = None # 连续标记时统一的标签
self.continuous_start_index = None # 连续标记起始索引
# 键盘长按状态
self.key_pressed = {"left": False, "right": False}
self.last_key_time = 0 # 长按重复计时
self.key_repeat_delay = 0.8 # 初始延迟增加到0.8秒
self.key_repeat_interval = 0.15 # 重复间隔增加到0.15秒
# 操作历史(用于撤销)
self.undo_stack = []
self.max_undo_steps = 50
# 确认对话框状态
self.show_confirm_dialog = False
self.confirm_message = ""
self.confirm_action = "" # 标记确认对话框触发动作
# 获取所有包含图片的文件夹
self.find_image_folders()
# 加载当前文件夹的图片
if self.folders:
self.load_current_folder_images()
# 加载保存的标记状态
self.load_labels() # 尝试加载历史标签
def find_image_folders(self):
"""查找所有包含图片的文件夹"""
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for root, dirs, files in os.walk(self.root_path):
has_images = any(file.lower().endswith(image_extensions) for file in files)
if has_images:
self.folders.append(root)
def load_current_folder_images(self):
"""加载当前文件夹中的所有图片"""
folder_path = self.folders[self.current_folder_index]
self.images = []
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for file in os.listdir(folder_path):
if file.lower().endswith(image_extensions):
self.images.append(os.path.join(folder_path, file))
# 按文件名排序
self.images.sort()
self.current_image_index = 0
def get_current_image(self):
"""获取当前图片"""
if not self.images:
return None
return self.images[self.current_image_index]
def next_image(self):
"""切换到下一张图片"""
if self.current_image_index < len(self.images) - 1:
self.save_state() # 保存状态以便撤销
self.current_image_index += 1
return True
return False
def prev_image(self):
"""切换到上一张图片"""
if self.current_image_index > 0:
self.current_image_index -= 1
return True
return False
def label_current_image(self, label):
"""标记当前图片"""
current_image= self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
self.labels[current_image] = label
# 自动保存标记状态
self.save_labels()
def start_continuous_labeling(self):
"""开始连续标记"""
current_image = self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
# 如果当前图片已经有标签,使用该标签
if current_image in self.labels:
self.continuous_label = self.labels[current_image]
else:
# 如果没有标签,默认为正样本
self.continuous_label = "positive"
self.labels[current_image] = self.continuous_label
self.continuous_mode = True
self.continuous_start_index = self.current_image_index
# 自动保存标记状态
self.save_labels()
return True
return False
def end_continuous_labeling(self):
"""结束连续标记"""
if self.continuous_mode and self.continuous_start_index is not None:
self.save_state() # 保存状态以便撤销
start = min(self.continuous_start_index, self.current_image_index)
end = max(self.continuous_start_index, self.current_image_index)
for i in range(start, end + 1):
self.labels[self.images[i]] = self.continuous_label
self.continuous_mode = False
self.continuous_start_index = None
# 自动保存标记状态
self.save_labels()
return True
return False
def move_labeled_files(self, positive_dir, negative_dir):
"""移动已标记的文件到正负样本文件夹"""
if not os.path.exists(positive_dir):
os.makedirs(positive_dir)
if not os.path.exists(negative_dir):
os.makedirs(negative_dir)
moved_count = 0
files_to_remove = []
for img_path, label in self.labels.items():
if label in ["positive", "negative"] and os.path.exists(img_path):
filename = os.path.basename(img_path)
dest_dir = positive_dir if label == "positive" else negative_dir
# 处理文件名冲突
counter = 1
base_name, ext = os.path.splitext(filename)
new_filename = filename
while os.path.exists(os.path.join(dest_dir, new_filename)):
new_filename = f"{base_name}_{counter}{ext}"
counter += 1
try:
shutil.move(img_path, os.path.join(dest_dir, new_filename))
moved_count += 1
files_to_remove.append(img_path)
except Exception as e:
print(f"移动文件失败: {e}")
# 从标签字典中移除已移动的文件
for img_path in files_to_remove:
if img_path in self.labels:
del self.labels[img_path]
# 更新当前文件夹的图片列表
self.load_current_folder_images()
# 自动保存标记状态
self.save_labels()
return moved_count
def next_folder(self):
"""切换到下一个文件夹"""
if self.current_folder_index < len(self.folders) - 1:
# 检查当前文件夹是否有未移动的标记文件
current_folder = self.folders[self.current_folder_index]
has_unmoved_labels = any(
img_path.startswith(current_folder) and os.path.exists(img_path)
for img_path in self.labels.keys()
)
if has_unmoved_labels:
# 显示确认对话框
self.show_confirm_dialog = True
self.confirm_action = "next_folder"
self.confirm_message = "当前文件夹有未移动的标记文件,确定要切换到下一个文件夹吗?"
return False
else:
# 直接切换文件夹
self.current_folder_index += 1
self.load_current_folder_images()
return True
return False
def prev_folder(self):
"""切换到上一个文件夹"""
if self.current_folder_index > 0:
self.current_folder_index -= 1
self.load_current_folder_images()
return True
return False
def handle_key_repeats(self):
"""处理方向键长按"""
current_time = time.time()
# 检查是否需要触发按键重复
if any(self.key_pressed.values()):
# 如果是第一次按下,等待较长时间
if self.last_key_time == 0:
if current_time - self.key_pressed_time > self.key_repeat_delay:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
# 后续重复,使用较短的间隔
elif current_time - self.last_key_time > self.key_repeat_interval:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
def save_state(self):
"""保存当前状态以便撤销"""
if len(self.undo_stack) >= self.max_undo_steps:
self.undo_stack.pop(0) # 移除最旧的状态
state = {
"current_image_index": self.current_image_index,
"labels": self.labels.copy(),
"continuous_mode": self.continuous_mode,
"continuous_start_index": self.continuous_start_index,
"continuous_label": self.continuous_label
}
self.undo_stack.append(state)
def undo(self):
"""撤销上一次操作"""
if self.undo_stack:
state = self.undo_stack.pop()
self.current_image_index = state["current_image_index"]
self.labels = state["labels"]
self.continuous_mode = state["continuous_mode"]
self.continuous_start_index = state["continuous_start_index"]
self.continuous_label = state["continuous_label"]
return True
return False
def save_labels(self):
"""保存标记状态到文件"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
try:
# 只保存仍然存在的文件的标记
existing_labels = {k: v for k, v in self.labels.items() if os.path.exists(k)}
with open(labels_file, 'w') as f:
json.dump(existing_labels, f)
except Exception as e:
print(f"保存标记状态失败: {e}")
def load_labels(self):
"""从文件加载标记状态"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
if os.path.exists(labels_file):
try:
with open(labels_file, 'r') as f:
self.labels = json.load(f)
except Exception as e:
print(f"加载标记状态失败: {e}")
def draw_button(screen, text, rect, hover=False, color=None):
"""绘制按钮"""
if color is None:
color = BUTTON_HOVER_COLOR if hover else BUTTON_COLOR
# 先画主体
pygame.draw.rect(screen, color, rect, border_radius=5)
# 再画边框
pygame.draw.rect(screen, (100, 100, 100), rect, 2, border_radius=5)
# 文字居中
text_surface= small_font.render(text, True, TEXT_COLOR)
txt_rect = text_surface.get_rect(center=rect.center)
screen.blit(text_surface, txt_rect)
def draw_confirm_dialog(screen, message, width=400, height=200):
"""绘制确认对话框"""
dialog_rect = pygame.Rect(
(WINDOW_WIDTH - width) // 2,
(WINDOW_HEIGHT - height) // 2,
width, height
)
# 绘制对话框背景
pygame.draw.rect(screen, BG_COLOR, dialog_rect, border_radius=10)
pygame.draw.rect(screen, TEXT_COLOR, dialog_rect, 2, border_radius=10)
# 绘制消息
lines = []
words = message.split()
current_line = ""
for word in words:
test_line = current_line + word + " "
if small_font.size(test_line)[0] < width - 40:
current_line = test_line
else:
lines.append(current_line)
current_line = word + " "
if current_line:
lines.append(current_line)
for i, line in enumerate(lines):
text_surface = small_font.render(line, True, TEXT_COLOR)
screen.blit(text_surface, (dialog_rect.x + 20, dialog_rect.y + 30 + i * 25))
# 绘制按钮
yes_button= pygame.Rect(dialog_rect.x + width // 2 - 100, dialog_rect.y + height - 50, 80, 30)
no_button = pygame.Rect(dialog_rect.x + width // 2 + 20, dialog_rect.y + height - 50, 80, 30)
draw_button(screen, "是", yes_button, color=CONFIRM_COLOR)
draw_button(screen, "否", no_button, color=WARNING_COLOR)
return dialog_rect, yes_button, no_button
def main():
# 假设的根路径,实际使用时需要修改
root_path = r"D:\zero_track\mmaction2\input_videos\test1"
# 创建标注工具实例
tool = ImageLabelingTool(root_path)
# 创建正负样本输出目录
# positive_dir = os.path.join(root_path, "positive_samples")
# negative_dir = os.path.join(root_path, "negative_samples")
positive_dir = os.path.join(root_path, "1")
negative_dir = os.path.join(root_path, "0")
# 主循环
running = True
clock = pygame.time.Clock()
# 按钮区域 - 分为两行
button_height = 40
button_width = 140
button_margin =15
button_row1_y = WINDOW_HEIGHT - button_height - button_margin
button_row2_y = WINDOW_HEIGHT - 2 * button_height - 2 * button_margin
# 第一行按钮(导航按钮)
nav_buttons = {
"prev": pygame.Rect(button_margin, button_row2_y, button_width, button_height),
"next": pygame.Rect(button_margin * 2 + button_width, button_row2_y, button_width, button_height),
"prev_folder": pygame.Rect(button_margin * 3 + button_width * 2, button_row2_y, button_width, button_height),
"next_folder": pygame.Rect(button_margin * 4 + button_width * 3, button_row2_y, button_width, button_height),
"undo": pygame.Rect(button_margin * 5 + button_width * 4, button_row2_y, button_width, button_height),
}
# 第二行按钮(标注按钮)
label_buttons = {
"positive": pygame.Rect(button_margin, button_row1_y, button_width, button_height),
"negative": pygame.Rect(button_margin * 2 + button_width, button_row1_y, button_width, button_height),
"continuous_start": pygame.Rect(button_margin * 3 + button_width * 2, button_row1_y, button_width, button_height),
"continuous_end": pygame.Rect(button_margin * 4 + button_width * 3, button_row1_y, button_width, button_height),
"move_files": pygame.Rect(button_margin * 5 + button_width * 4, button_row1_y, button_width, button_height),
}
# 图片显示区域
image_area = pygame.Rect(50, 80, WINDOW_WIDTH - 100, WINDOW_HEIGHT - 220)
# 添加按键按下时间记录
tool.key_pressed_time = 0
while running:
mouse_pos = pygame.mouse.get_pos()
# 处理按键重复
tool.handle_key_repeats()
# 自动播放逻辑
if tool.playing:
now = pygame.time.get_ticks()
if now - tool.last_play_tick > tool.play_interval:
if tool.play_direction == 1:
tool.next_image()
else:
tool.prev_image()
tool.last_play_tick = now
for event in pygame.event.get():
if event.type == QUIT:
running = False
elif event.type == KEYDOWN:
if event.key == K_d:
tool.key_pressed["right"] = True
tool.key_pressed["left"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.next_image() # 立即响应一次
elif event.key == K_a:
tool.key_pressed["left"] = True
tool.key_pressed["right"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.prev_image() # 立即响应一次
elif event.key == K_RIGHT: # 向后自动播放
tool.play_direction = 1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_LEFT: # 向前自动播放
tool.play_direction = -1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_SPACE: # 暂停/继续
tool.playing = not tool.playing
if tool.playing:
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_w: # 标记为正样本
tool.label_current_image("positive")
elif event.key == K_s: # 标记为负样本
tool.label_current_image("negative")
elif event.key == K_UP: # 开始连续标记
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif event.key == K_DOWN: # 结束连续标记
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif event.key == K_x: # 移动文件
moved = tool.move_labeled_files(positive_dir, negative_dir)
print(f"已移动 {moved} 个文件")
elif event.key == K_c: # 下一个文件夹
tool.next_folder()
elif event.key == K_z: # 上一个文件夹
tool.prev_folder()
elif event.key == K_z and (pygame.key.get_mods() & KMOD_CTRL): # Ctrl+Z 撤销
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
elif event.key == K_ESCAPE: # ESC 键取消确认对话框
if tool.show_confirm_dialog:
tool.show_confirm_dialog = False
elif event.type == KEYUP:
if event.key == K_d:
tool.key_pressed["right"] = False
tool.last_key_time = 0 # 重置重复计时
elif event.key == K_a:
tool.key_pressed["left"] = False
tool.last_key_time =0 # 重置重复计时
elif event.type == MOUSEBUTTONDOWN:
if event.button == 1: # 左键点击
# 检查是否点击了确认对话框
if tool.show_confirm_dialog:
dialog_rect, yes_button, no_button = draw_confirm_dialog(screen, tool.confirm_message)
if yes_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = False
if tool.confirm_action == "next_folder":
tool.current_folder_index += 1
tool.load_current_folder_images()
elif no_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = Fasle
else:
# 导航按钮
if nav_buttons["prev"].collidepoint(mouse_pos):
tool.prev_image()
elif nav_buttons["next"].collidepoint(mouse_pos):
tool.next_image()
elif nav_buttons["prev_folder"].collidepoint(mouse_pos):
tool.prev_folder()
elif nav_buttons["next_folder"].collidepoint(mouse_pos):
tool.next_folder()
elif nav_buttons["undo"].collidepoint(mouse_pos):
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
# 标注按钮
elif label_buttons["positive"].collidepoint(mouse_pos):
tool.label_current_image("positive")
elif label_buttons["negative"].collidepoint(mouse_pos):
tool.label_current_image("negative")
elif label_buttons["continuous_start"].collidepoint(mouse_pos):
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif label_buttons["continuous_end"].collidepoint(mouse_pos):
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif label_buttons["move_files"].collidepoint(mouse_pos):
moved = tool.move_labeled_files(positive_dir, negative_dir)
print("已移动 {moved} 个文件")
# 清屏
screen.fill(BG_COLOR)
# 显示文件信息
if tool.folders:
folder_text = f"当前文件夹: {os.path.basename(tool.folders[tool.current_folder_index])} ({tool.current_folder_index + 1}/{len(tool.folders)})"
text_surface = small_font.render(folder_text, True, TEXT_COLOR)
screen.blit(text_surface, (20, 20))
# 显示当前图片
current_image_path = tool.get_current_image()
if current_image_path and os.path.exists(current_image_path):
try:
img = pygame.image.load(current_image_path)
img_rect = img.get_rect()
# 缩放图片以适应显示区域
scale = min(image_area.width / img_rect.width, image_area.height / img_rect.height)
new_size = (int(img_rect.width * scale), int(img_rect.height * scale))
img = pygame.transform.smoothscale(img, new_size)
img_rect = img.get_rect(center=image_area.center)
screen.blit(img, img_rect)
# 显示图片信息(在图片上方)
info_text = f"{os.path.basename(current_image_path)} ({tool.current_image_index + 1}/{len(tool.images)})"
if current_image_path in tool.labels:
label = tool.labels[current_image_path]
info_text += f" - 已标记: {'正样本' if label == 'positive' else '负样本'}"
text_surface = font.render(info_text, True, TEXT_COLOR)
text_rect = text_surface.get_rect(center=(WINDOW_WIDTH // 2, image_area.y - 20))
screen.blit(text_surface, text_rect)
# 在连续标记模式下显示标记范围
if tool.continuous_mode and tool.continuous_start_index is not None:
start_idx = min(tool.continuous_start_index, tool.current_image_index)
end_idx = max(tool.continuous_start_index, tool.current_image_index)
range_text = f"标记范围: {start_idx + 1} - {end_idx + 1}"
range_surface = small_font.render(range_text, True, HIGHLIGHT_COLOR)
screen.blit(range_surface, (20, 50))
# 绘制标记范围的指示器
marker_width = image_area.width / len(tool.images)
start_x = image_area.x + start_idx * marker_width
end_x = image_area.x + (end_idx + 1) * marker_width
pygame.draw.rect(screen, HIGHLIGHT_COLOR,
(start_x, image_area.y + image_area.height + 5,
end_x -start_x, 5))
except Exception as e:
error_text = f"无法加载图片: {e}"
text_surface = font.render(error_text, True, (255, 0, 0))
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
else:
no_image_text = "没有图片可显示"
text_surface = font.render(no_image_text, True, TEXT_COLOR)
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
# 显示连续标记状态
if tool.continuous_mode:
mode_text = f"连续标记模式已启动 - 标记类型: {'正样本' if tool.continuous_label == 'positive' else '负样本'}"
text_surface = small_font.render(mode_text, True, HIGHLIGHT_COLOR)
screen.blit(text_surface, (WINDOW_WIDTH - text_surface.get_width() - 20, 50))
# 绘制导航按钮
draw_button(screen, "上一张 (a)", nav_buttons["prev"], nav_buttons["prev"].collidepoint(mouse_pos))
draw_button(screen, "下一张 (d)", nav_buttons["next"], nav_buttons["next"].collidepoint(mouse_pos))
draw_button(screen, "上个文件夹 (z)", nav_buttons["prev_folder"], nav_buttons["prev_folder"].collidepoint(mouse_pos))
draw_button(screen, "下个文件夹 (c)", nav_buttons["next_folder"], nav_buttons["next_folder"].collidepoint(mouse_pos))
draw_button(screen, "撤销 (Ctrl+Z)", nav_buttons["undo"], nav_buttons["undo"].collidepoint(mouse_pos))
# 绘制标注按钮
draw_button(screen, "正样本 (w)", label_buttons["positive"], label_buttons["positive"].collidepoint(mouse_pos))
draw_button(screen, "负样本 (s)", label_buttons["negative"], label_buttons["negative"].collidepoint(mouse_pos))
draw_button(screen, "开始连续标(↑)", label_buttons["continuous_start"], label_buttons["continuous_start"].collidepoint(mouse_pos))
draw_button(screen, "结束连续标(↓)", label_buttons["continuous_end"], label_buttons["continuous_end"].collidepoint(mouse_pos))
draw_button(screen, "移动文件 (x)", label_buttons["move_files"], label_buttons["move_files"].collidepoint(mouse_pos))
# 显示确认对话框
if tool.show_confirm_dialog:
draw_confirm_dialog(screen, tool.confirm_message)
# 更新屏幕
pygame.display.flip()
clock.tick(30)
# 退出前保存标记状态
tool.save_labels()
pygame.quit()
sys.exit()
if __name__ == "__main__":
main()
2.4 新增转视频功能
上面的逻辑是按x就把正样本和负样本移动到文件夹1和0中,但是我想新增一个变量控制是否在转移的时候同时把转移的这些图片转成视频(图片不要了),比如说现在第10到第20帧是正样本,第21帧到30帧是负样本,用户按了x之后,原先的逻辑是第10到第20帧移动到文件夹1中,第21帧到30帧移动到文件夹0中,现在的逻辑变成第10到第20帧转成一个视频片段放到文件夹1中,第21帧到30帧也转成一个视频片段放到文件夹0中。允许没有负样本或者没有正样本。
这个脚本的使用方法就是先用右方向键前进到正样本的位置,然后找到脚踢动作之后,用a回退到动作一开始的视频帧,然后按w将这一帧标记为正样本,然后按方向键的上键,表示开启连续标记,然后长按d找到动作结束的视频帧(或者方向键右找,空格键暂停),在动作结束的视频帧按方向键的下键,表示结束连续标记。这就完成了这一区间的正样本标记。如果你要标记一个负样本区间,也是同理,只是动作开始的第一帧改成按s标记为负样本即可。当你把整个视频的正样本区间以及负样本区间标记好后,按x即可将标记的区间转为视频。正样本区间的视频帧会转为视频存在文件夹1中,负样本区间的视频帧会转为视频放在文件夹0中。
但是这个脚本有个缺陷就是,如果10到30帧你要标记10-20帧是一个负样本视频,20帧到30帧是个负样本视频,是做不到的。因为这相当于你把10到30帧都标记为了负样本,最后会生成一个负样本视频而不是两个。除非你标记完一个区间就按x生成一个视频,
import os
import pygame
import sys
import shutil
import time
import json
from pygame.locals import *
import cv2 # pip install opencv-python
# 初始化pygame
pygame.init()
# 配置参数
SCREEN_WIDTH, SCREEN_HEIGHT = pygame.display.Info().current_w, pygame.display.Info().current_h
WINDOW_WIDTH, WINDOW_HEIGHT = SCREEN_WIDTH - 100, SCREEN_HEIGHT - 100
BG_COLOR = (40, 44, 52)
TEXT_COLOR = (220, 220, 220)
HIGHLIGHT_COLOR = (97, 175, 239)
BUTTON_COLOR = (56, 58, 66)
BUTTON_HOVER_COLOR = (72, 74, 82)
WARNING_COLOR = (255, 152, 0)
CONFIRM_COLOR = (76, 175, 80)
# 创建窗口
screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("图像分类标注工具")
# 字体
font = pygame.font.SysFont("SimHei", 24)
small_font = pygame.font.SysFont("SimHei", 18)
class ImageLabelingTool:
def __init__(self, root_path):
self.root_path = root_path
self.folders = [] # 所有含图片的文件夹绝对路径
self.current_folder_index = 0 # 当前文件夹索引
self.images = [] # 当前文件夹内所有图片绝对路径
self.current_image_index = 0 # 当前图片索引
self.labels = {} # 路径 -> 'positive' / 'negative'
self.convert_to_video = True # 是否启用转视频模式
self.video_fps = 10 # 视频帧率
# 自动播放相关
self.playing = False # 是否处于自动播放
self.play_direction = 1 # 1 下一张,-1 上一张
self.last_play_tick = 0 # 上一次翻片的时间
self.play_interval = 100 # 每 多少 毫秒 翻一张
# 标记状态
self.continuous_mode = False # 是否处于连续标记模式
self.continuous_label = None # 连续标记时统一的标签
self.continuous_start_index = None # 连续标记起始索引
# 键盘长按状态
self.key_pressed = {"left": False, "right": False}
self.last_key_time = 0 # 长按重复计时
self.key_repeat_delay = 0.8 # 初始延迟增加到0.8秒
self.key_repeat_interval = 0.15 # 重复间隔增加到0.15秒
# 操作历史(用于撤销)
self.undo_stack = []
self.max_undo_steps = 50
# 确认对话框状态
self.show_confirm_dialog = False
self.confirm_message = ""
self.confirm_action = "" # 标记确认对话框触发动作
# 获取所有包含图片的文件夹
self.find_image_folders()
# 加载当前文件夹的图片
if self.folders:
self.load_current_folder_images()
# 加载保存的标记状态
self.load_labels() # 尝试加载历史标签
def images_to_video(self, image_paths, output_path, fps=10):
"""将图片序列转为视频"""
if not image_paths:
return
# 读取第一张图获取尺寸
frame = cv2.imread(image_paths[0])
h, w, _ = frame.shape
# 初始化视频写入器
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
for img_path in image_paths:
frame = cv2.imread(img_path)
out.write(frame)
out.release()
# 删除原图
for img_path in image_paths:
if os.path.exists(img_path):
os.remove(img_path)
def find_image_folders(self):
"""查找所有包含图片的文件夹"""
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for root, dirs, files in os.walk(self.root_path):
has_images = any(file.lower().endswith(image_extensions) for file in files)
if has_images:
self.folders.append(root)
def load_current_folder_images(self):
"""加载当前文件夹中的所有图片"""
folder_path = self.folders[self.current_folder_index]
self.images = []
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for file in os.listdir(folder_path):
if file.lower().endswith(image_extensions):
self.images.append(os.path.join(folder_path, file))
# 按文件名排序
self.images.sort()
self.current_image_index = 0
def get_current_image(self):
"""获取当前图片"""
if not self.images:
return None
return self.images[self.current_image_index]
def next_image(self):
"""切换到下一张图片"""
if self.current_image_index < len(self.images) - 1:
self.save_state() # 保存状态以便撤销
self.current_image_index += 1
return True
return False
def prev_image(self):
"""切换到上一张图片"""
if self.current_image_index > 0:
self.current_image_index -= 1
return True
return False
def label_current_image(self, label):
"""标记当前图片"""
current_image= self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
self.labels[current_image] = label
# 自动保存标记状态
self.save_labels()
def start_continuous_labeling(self):
"""开始连续标记"""
current_image = self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
# 如果当前图片已经有标签,使用该标签
if current_image in self.labels:
self.continuous_label = self.labels[current_image]
else:
# 如果没有标签,默认为正样本
self.continuous_label = "positive"
self.labels[current_image] = self.continuous_label
self.continuous_mode = True
self.continuous_start_index = self.current_image_index
# 自动保存标记状态
self.save_labels()
return True
return False
def end_continuous_labeling(self):
"""结束连续标记"""
if self.continuous_mode and self.continuous_start_index is not None:
self.save_state() # 保存状态以便撤销
start = min(self.continuous_start_index, self.current_image_index)
end = max(self.continuous_start_index, self.current_image_index)
for i in range(start, end + 1):
self.labels[self.images[i]] = self.continuous_label
self.continuous_mode = False
self.continuous_start_index = None
# 自动保存标记状态
self.save_labels()
return True
return False
def move_labeled_files(self, positive_dir, negative_dir):
"""移动已标记的文件到正负样本文件夹"""
if not os.path.exists(positive_dir):
os.makedirs(positive_dir)
if not os.path.exists(negative_dir):
os.makedirs(negative_dir)
# 按标签分组
from collections import defaultdict
groups = defaultdict(list)
# 按标签分组
from collections import defaultdict
groups = defaultdict(list)
for img_path, label in self.labels.items():
if label in ["positive", "negative"] and os.path.exists(img_path):
groups[label].append(img_path)
# 排序每组图片(按文件名)
for label in groups:
groups[label].sort()
# 处理每组
for label, image_paths in groups.items():
if not image_paths:
continue
dest_dir = positive_dir if label == "positive" else negative_dir
if self.convert_to_video:
# 合成视频
# 提取帧号范围
def extract_index(path):
# 假设文件名格式为 frame_0010.jpg 或 0010.jpg
name = os.path.splitext(os.path.basename(path))[0]
try:
return int(name.split('_')[-1])
except ValueError:
return None
start_idx = extract_index(image_paths[0])
end_idx = extract_index(image_paths[-1])
if start_idx is not None and end_idx is not None:
range_str = f"{start_idx}to{end_idx}"
else:
range_str = "unknown_range"
folder_name = os.path.basename(os.path.dirname(image_paths[0]))
video_name = f"{folder_name}_{label}_{range_str}.mp4"
video_path = os.path.join(dest_dir, video_name)
self.images_to_video(image_paths, video_path, fps=self.video_fps)
else:
# 原逻辑:移动文件
for img_path in image_paths:
filename = os.path.basename(img_path)
dest_path = os.path.join(dest_dir, filename)
shutil.move(img_path, dest_path)
# 从标签中移除
for img_path in image_paths:
self.labels.pop(img_path, None)
# 重新加载当前文件夹
self.load_current_folder_images()
self.save_labels()
def next_folder(self):
"""切换到下一个文件夹"""
if self.current_folder_index < len(self.folders) - 1:
# 检查当前文件夹是否有未移动的标记文件
current_folder = self.folders[self.current_folder_index]
has_unmoved_labels = any(
img_path.startswith(current_folder) and os.path.exists(img_path)
for img_path in self.labels.keys()
)
if has_unmoved_labels:
# 显示确认对话框
self.show_confirm_dialog = True
self.confirm_action = "next_folder"
self.confirm_message = "当前文件夹有未移动的标记文件,确定要切换到下一个文件夹吗?"
return False
else:
# 直接切换文件夹
self.current_folder_index += 1
self.load_current_folder_images()
return True
return False
def prev_folder(self):
"""切换到上一个文件夹"""
if self.current_folder_index > 0:
self.current_folder_index -= 1
self.load_current_folder_images()
return True
return False
def handle_key_repeats(self):
"""处理方向键长按"""
current_time = time.time()
# 检查是否需要触发按键重复
if any(self.key_pressed.values()):
# 如果是第一次按下,等待较长时间
if self.last_key_time == 0:
if current_time - self.key_pressed_time > self.key_repeat_delay:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
# 后续重复,使用较短的间隔
elif current_time - self.last_key_time > self.key_repeat_interval:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
def save_state(self):
"""保存当前状态以便撤销"""
if len(self.undo_stack) >= self.max_undo_steps:
self.undo_stack.pop(0) # 移除最旧的状态
state = {
"current_image_index": self.current_image_index,
"labels": self.labels.copy(),
"continuous_mode": self.continuous_mode,
"continuous_start_index": self.continuous_start_index,
"continuous_label": self.continuous_label
}
self.undo_stack.append(state)
def undo(self):
"""撤销上一次操作"""
if self.undo_stack:
state = self.undo_stack.pop()
self.current_image_index = state["current_image_index"]
self.labels = state["labels"]
self.continuous_mode = state["continuous_mode"]
self.continuous_start_index = state["continuous_start_index"]
self.continuous_label = state["continuous_label"]
return True
return False
def save_labels(self):
"""保存标记状态到文件"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
try:
# 只保存仍然存在的文件的标记
existing_labels = {k: v for k, v in self.labels.items() if os.path.exists(k)}
with open(labels_file, 'w') as f:
json.dump(existing_labels, f)
except Exception as e:
print(f"保存标记状态失败: {e}")
def load_labels(self):
"""从文件加载标记状态"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
if os.path.exists(labels_file):
try:
with open(labels_file, 'r') as f:
self.labels = json.load(f)
except Exception as e:
print(f"加载标记状态失败: {e}")
def draw_button(screen, text, rect, hover=False, color=None):
"""绘制按钮"""
if color is None:
color = BUTTON_HOVER_COLOR if hover else BUTTON_COLOR
# 先画主体
pygame.draw.rect(screen, color, rect, border_radius=5)
# 再画边框
pygame.draw.rect(screen, (100, 100, 100), rect, 2, border_radius=5)
# 文字居中
text_surface= small_font.render(text, True, TEXT_COLOR)
txt_rect = text_surface.get_rect(center=rect.center)
screen.blit(text_surface, txt_rect)
def draw_confirm_dialog(screen, message, width=400, height=200):
"""绘制确认对话框"""
dialog_rect = pygame.Rect(
(WINDOW_WIDTH - width) // 2,
(WINDOW_HEIGHT - height) // 2,
width, height
)
# 绘制对话框背景
pygame.draw.rect(screen, BG_COLOR, dialog_rect, border_radius=10)
pygame.draw.rect(screen, TEXT_COLOR, dialog_rect, 2, border_radius=10)
# 绘制消息
lines = []
words = message.split()
current_line = ""
for word in words:
test_line = current_line + word + " "
if small_font.size(test_line)[0] < width - 40:
current_line = test_line
else:
lines.append(current_line)
current_line = word + " "
if current_line:
lines.append(current_line)
for i, line in enumerate(lines):
text_surface = small_font.render(line, True, TEXT_COLOR)
screen.blit(text_surface, (dialog_rect.x + 20, dialog_rect.y + 30 + i * 25))
# 绘制按钮
yes_button= pygame.Rect(dialog_rect.x + width // 2 - 100, dialog_rect.y + height - 50, 80, 30)
no_button = pygame.Rect(dialog_rect.x + width // 2 + 20, dialog_rect.y + height - 50, 80, 30)
draw_button(screen, "是", yes_button, color=CONFIRM_COLOR)
draw_button(screen, "否", no_button, color=WARNING_COLOR)
return dialog_rect, yes_button, no_button
def main():
# 假设的根路径,实际使用时需要修改
root_path = r"D:\zero_track\mmaction2\input_videos\test1"
# 创建标注工具实例
tool = ImageLabelingTool(root_path)
# 创建正负样本输出目录
# positive_dir = os.path.join(root_path, "positive_samples")
# negative_dir = os.path.join(root_path, "negative_samples")
positive_dir = os.path.join(root_path, "1")
negative_dir = os.path.join(root_path, "0")
# 主循环
running = True
clock = pygame.time.Clock()
# 按钮区域 - 分为两行
button_height = 40
button_width = 140
button_margin =15
button_row1_y = WINDOW_HEIGHT - button_height - button_margin
button_row2_y = WINDOW_HEIGHT - 2 * button_height - 2 * button_margin
# 第一行按钮(导航按钮)
nav_buttons = {
"prev": pygame.Rect(button_margin, button_row2_y, button_width, button_height),
"next": pygame.Rect(button_margin * 2 + button_width, button_row2_y, button_width, button_height),
"prev_folder": pygame.Rect(button_margin * 3 + button_width * 2, button_row2_y, button_width, button_height),
"next_folder": pygame.Rect(button_margin * 4 + button_width * 3, button_row2_y, button_width, button_height),
"undo": pygame.Rect(button_margin * 5 + button_width * 4, button_row2_y, button_width, button_height),
}
# 第二行按钮(标注按钮)
label_buttons = {
"positive": pygame.Rect(button_margin, button_row1_y, button_width, button_height),
"negative": pygame.Rect(button_margin * 2 + button_width, button_row1_y, button_width, button_height),
"continuous_start": pygame.Rect(button_margin * 3 + button_width * 2, button_row1_y, button_width, button_height),
"continuous_end": pygame.Rect(button_margin * 4 + button_width * 3, button_row1_y, button_width, button_height),
"move_files": pygame.Rect(button_margin * 5 + button_width * 4, button_row1_y, button_width, button_height),
}
# 图片显示区域
image_area = pygame.Rect(50, 80, WINDOW_WIDTH - 100, WINDOW_HEIGHT - 220)
# 添加按键按下时间记录
tool.key_pressed_time = 0
while running:
mouse_pos = pygame.mouse.get_pos()
# 处理按键重复
tool.handle_key_repeats()
# 自动播放逻辑
if tool.playing:
now = pygame.time.get_ticks()
if now - tool.last_play_tick > tool.play_interval:
if tool.play_direction == 1:
tool.next_image()
else:
tool.prev_image()
tool.last_play_tick = now
for event in pygame.event.get():
if event.type == QUIT:
running = False
elif event.type == KEYDOWN:
if event.key == K_d:
tool.key_pressed["right"] = True
tool.key_pressed["left"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.next_image() # 立即响应一次
elif event.key == K_a:
tool.key_pressed["left"] = True
tool.key_pressed["right"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.prev_image() # 立即响应一次
elif event.key == K_RIGHT: # 向后自动播放
tool.play_direction = 1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_LEFT: # 向前自动播放
tool.play_direction = -1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_SPACE: # 暂停/继续
tool.playing = not tool.playing
if tool.playing:
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_v:
tool.convert_to_video = not tool.convert_to_video
print("转视频模式:" + ("开启" if tool.convert_to_video else "关闭"))
elif event.key == K_w: # 标记为正样本
tool.label_current_image("positive")
elif event.key == K_s: # 标记为负样本
tool.label_current_image("negative")
elif event.key == K_UP: # 开始连续标记
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif event.key == K_DOWN: # 结束连续标记
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif event.key == K_x: # 移动文件
moved = tool.move_labeled_files(positive_dir, negative_dir)
print(f"已移动 {moved} 个文件")
elif event.key == K_c: # 下一个文件夹
tool.next_folder()
elif event.key == K_z: # 上一个文件夹
tool.prev_folder()
elif event.key == K_z and (pygame.key.get_mods() & KMOD_CTRL): # Ctrl+Z 撤销
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
elif event.key == K_ESCAPE: # ESC 键取消确认对话框
if tool.show_confirm_dialog:
tool.show_confirm_dialog = False
elif event.type == KEYUP:
if event.key == K_d:
tool.key_pressed["right"] = False
tool.last_key_time = 0 # 重置重复计时
elif event.key == K_a:
tool.key_pressed["left"] = False
tool.last_key_time =0 # 重置重复计时
elif event.type == MOUSEBUTTONDOWN:
if event.button == 1: # 左键点击
# 检查是否点击了确认对话框
if tool.show_confirm_dialog:
dialog_rect, yes_button, no_button = draw_confirm_dialog(screen, tool.confirm_message)
if yes_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = False
if tool.confirm_action == "next_folder":
tool.current_folder_index += 1
tool.load_current_folder_images()
elif no_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = Fasle
else:
# 导航按钮
if nav_buttons["prev"].collidepoint(mouse_pos):
tool.prev_image()
elif nav_buttons["next"].collidepoint(mouse_pos):
tool.next_image()
elif nav_buttons["prev_folder"].collidepoint(mouse_pos):
tool.prev_folder()
elif nav_buttons["next_folder"].collidepoint(mouse_pos):
tool.next_folder()
elif nav_buttons["undo"].collidepoint(mouse_pos):
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
# 标注按钮
elif label_buttons["positive"].collidepoint(mouse_pos):
tool.label_current_image("positive")
elif label_buttons["negative"].collidepoint(mouse_pos):
tool.label_current_image("negative")
elif label_buttons["continuous_start"].collidepoint(mouse_pos):
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif label_buttons["continuous_end"].collidepoint(mouse_pos):
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif label_buttons["move_files"].collidepoint(mouse_pos):
moved = tool.move_labeled_files(positive_dir, negative_dir)
print("已移动 {moved} 个文件")
# 清屏
screen.fill(BG_COLOR)
# 显示文件信息
if tool.folders:
folder_text = f"当前文件夹: {os.path.basename(tool.folders[tool.current_folder_index])} ({tool.current_folder_index + 1}/{len(tool.folders)})"
text_surface = small_font.render(folder_text, True, TEXT_COLOR)
screen.blit(text_surface, (20, 20))
# 显示当前图片
current_image_path = tool.get_current_image()
if current_image_path and os.path.exists(current_image_path):
try:
img = pygame.image.load(current_image_path)
img_rect = img.get_rect()
# 缩放图片以适应显示区域
scale = min(image_area.width / img_rect.width, image_area.height / img_rect.height)
new_size = (int(img_rect.width * scale), int(img_rect.height * scale))
img = pygame.transform.smoothscale(img, new_size)
img_rect = img.get_rect(center=image_area.center)
screen.blit(img, img_rect)
# 显示图片信息(在图片上方)
info_text = f"{os.path.basename(current_image_path)} ({tool.current_image_index + 1}/{len(tool.images)})"
if current_image_path in tool.labels:
label = tool.labels[current_image_path]
info_text += f" - 已标记: {'正样本' if label == 'positive' else '负样本'}"
text_surface = font.render(info_text, True, TEXT_COLOR)
text_rect = text_surface.get_rect(center=(WINDOW_WIDTH // 2, image_area.y - 20))
screen.blit(text_surface, text_rect)
# 在连续标记模式下显示标记范围
if tool.continuous_mode and tool.continuous_start_index is not None:
start_idx = min(tool.continuous_start_index, tool.current_image_index)
end_idx = max(tool.continuous_start_index, tool.current_image_index)
range_text = f"标记范围: {start_idx + 1} - {end_idx + 1}"
range_surface = small_font.render(range_text, True, HIGHLIGHT_COLOR)
screen.blit(range_surface, (20, 50))
# 绘制标记范围的指示器
marker_width = image_area.width / len(tool.images)
start_x = image_area.x + start_idx * marker_width
end_x = image_area.x + (end_idx + 1) * marker_width
pygame.draw.rect(screen, HIGHLIGHT_COLOR,
(start_x, image_area.y + image_area.height + 5,
end_x -start_x, 5))
except Exception as e:
error_text = f"无法加载图片: {e}"
text_surface = font.render(error_text, True, (255, 0, 0))
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
else:
no_image_text = "没有图片可显示"
text_surface = font.render(no_image_text, True, TEXT_COLOR)
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
# 显示连续标记状态
if tool.continuous_mode:
mode_text = f"连续标记模式已启动 - 标记类型: {'正样本' if tool.continuous_label == 'positive' else '负样本'}"
text_surface = small_font.render(mode_text, True, HIGHLIGHT_COLOR)
screen.blit(text_surface, (WINDOW_WIDTH - text_surface.get_width() - 20, 50))
# 绘制导航按钮
draw_button(screen, "上一张 (a)", nav_buttons["prev"], nav_buttons["prev"].collidepoint(mouse_pos))
draw_button(screen, "下一张 (d)", nav_buttons["next"], nav_buttons["next"].collidepoint(mouse_pos))
draw_button(screen, "上个文件夹 (z)", nav_buttons["prev_folder"], nav_buttons["prev_folder"].collidepoint(mouse_pos))
draw_button(screen, "下个文件夹 (c)", nav_buttons["next_folder"], nav_buttons["next_folder"].collidepoint(mouse_pos))
draw_button(screen, "撤销 (Ctrl+Z)", nav_buttons["undo"], nav_buttons["undo"].collidepoint(mouse_pos))
# 绘制标注按钮
draw_button(screen, "正样本 (w)", label_buttons["positive"], label_buttons["positive"].collidepoint(mouse_pos))
draw_button(screen, "负样本 (s)", label_buttons["negative"], label_buttons["negative"].collidepoint(mouse_pos))
draw_button(screen, "开始连续标(↑)", label_buttons["continuous_start"], label_buttons["continuous_start"].collidepoint(mouse_pos))
draw_button(screen, "结束连续标(↓)", label_buttons["continuous_end"], label_buttons["continuous_end"].collidepoint(mouse_pos))
draw_button(screen, "移动文件 (x)", label_buttons["move_files"], label_buttons["move_files"].collidepoint(mouse_pos))
# 显示确认对话框
if tool.show_confirm_dialog:
draw_confirm_dialog(screen, tool.confirm_message)
# 更新屏幕
pygame.display.flip()
clock.tick(30)
# 退出前保存标记状态
tool.save_labels()
pygame.quit()
sys.exit()
if __name__ == "__main__":
main()
2.5 最后一个动作区间的最后一个视频帧之前的内容不再显示(最好用)
我想改一下这个脚本,现在你标完一个正样本区间或者标完一个负样本区间之后,按x确实会生成视频,但是我希望新增一个变量用于控制是否丢弃显示最后一个动作区间之前的所有帧,意思是比如0到10帧没被标记,10帧到20帧是负样本,20帧到30帧是正样本,这时候按x,10帧到20帧到了文件夹0,20帧到30帧到了文件夹1,现在界面显示又回到了第0帧,我希望的是回到31帧,即最后一个动作区间的最后一个视频帧(第30帧)之前的内容不再显示
import os
import pygame
import sys
import shutil
import time
import json
from pygame.locals import *
import cv2 # pip install opencv-python
# 初始化pygame
pygame.init()
# 配置参数
SCREEN_WIDTH, SCREEN_HEIGHT = pygame.display.Info().current_w, pygame.display.Info().current_h
WINDOW_WIDTH, WINDOW_HEIGHT = SCREEN_WIDTH - 100, SCREEN_HEIGHT - 100
BG_COLOR = (40, 44, 52)
TEXT_COLOR = (220, 220, 220)
HIGHLIGHT_COLOR = (97, 175, 239)
BUTTON_COLOR = (56, 58, 66)
BUTTON_HOVER_COLOR = (72, 74, 82)
WARNING_COLOR = (255, 152, 0)
CONFIRM_COLOR = (76, 175, 80)
# 创建窗口
screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("图像分类标注工具")
# 字体
font = pygame.font.SysFont("SimHei", 24)
small_font = pygame.font.SysFont("SimHei", 18)
class ImageLabelingTool:
def __init__(self, root_path):
self.root_path = root_path
self.folders = [] # 所有含图片的文件夹绝对路径
self.current_folder_index = 0 # 当前文件夹索引
self.images = [] # 当前文件夹内所有图片绝对路径
self.current_image_index = 0 # 当前图片索引
self.labels = {} # 路径 -> 'positive' / 'negative'
self.convert_to_video = True # 是否启用转视频模式
self.video_fps = 10 # 视频帧率
# 自动播放相关
self.playing = False # 是否处于自动播放
self.play_direction = 1 # 1 下一张,-1 上一张
self.last_play_tick = 0 # 上一次翻片的时间
self.play_interval = 100 # 每 多少 毫秒 翻一张
# 标记状态
self.continuous_mode = False # 是否处于连续标记模式
self.continuous_label = None # 连续标记时统一的标签
self.continuous_start_index = None # 连续标记起始索引
# 键盘长按状态
self.key_pressed = {"left": False, "right": False}
self.last_key_time = 0 # 长按重复计时
self.key_repeat_delay = 0.8 # 初始延迟增加到0.8秒
self.key_repeat_interval = 0.15 # 重复间隔增加到0.15秒
# 操作历史(用于撤销)
self.undo_stack = []
self.max_undo_steps = 50
# 新增:是否丢弃“最后一个动作区间”之前的所有帧
self.discard_before_last_action = True
self.last_ended_index = -1 # 最后一个动作区间的结束索引(含)
# 确认对话框状态
self.show_confirm_dialog = False
self.confirm_message = ""
self.confirm_action = "" # 标记确认对话框触发动作
# 获取所有包含图片的文件夹
self.find_image_folders()
# 加载当前文件夹的图片
if self.folders:
self.load_current_folder_images()
# 加载保存的标记状态
self.load_labels() # 尝试加载历史标签
def images_to_video(self, image_paths, output_path, fps=10):
"""将图片序列转为视频"""
if not image_paths:
return
# 读取第一张图获取尺寸
frame = cv2.imread(image_paths[0])
h, w, _ = frame.shape
# 初始化视频写入器
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
for img_path in image_paths:
frame = cv2.imread(img_path)
out.write(frame)
out.release()
# 删除原图
for img_path in image_paths:
if os.path.exists(img_path):
os.remove(img_path)
def find_image_folders(self):
"""查找所有包含图片的文件夹"""
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for root, dirs, files in os.walk(self.root_path):
has_images = any(file.lower().endswith(image_extensions) for file in files)
if has_images:
self.folders.append(root)
def load_current_folder_images(self):
"""加载当前文件夹中的所有图片"""
folder_path = self.folders[self.current_folder_index]
self.images = []
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for file in os.listdir(folder_path):
if file.lower().endswith(image_extensions):
self.images.append(os.path.join(folder_path, file))
# 重置指针
self.current_image_index = 0
# 按文件名排序
self.images.sort()
self.current_image_index = 0
def get_current_image(self):
"""获取当前图片"""
if not self.images:
return None
return self.images[self.current_image_index]
def next_image(self):
"""切换到下一张图片"""
if self.current_image_index < len(self.images) - 1:
self.save_state() # 保存状态以便撤销
self.current_image_index += 1
return True
return False
def prev_image(self):
"""切换到上一张图片"""
if self.current_image_index > 0:
self.current_image_index -= 1
return True
return False
def label_current_image(self, label):
"""标记当前图片"""
current_image= self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
self.labels[current_image] = label
# 自动保存标记状态
self.save_labels()
def start_continuous_labeling(self):
"""开始连续标记"""
current_image = self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
# 如果当前图片已经有标签,使用该标签
if current_image in self.labels:
self.continuous_label = self.labels[current_image]
else:
# 如果没有标签,默认为正样本
self.continuous_label = "positive"
self.labels[current_image] = self.continuous_label
self.continuous_mode = True
self.continuous_start_index = self.current_image_index
# 自动保存标记状态
self.save_labels()
return True
return False
def end_continuous_labeling(self):
"""结束连续标记"""
if self.continuous_mode and self.continuous_start_index is not None:
self.save_state() # 保存状态以便撤销
start = min(self.continuous_start_index, self.current_image_index)
end = max(self.continuous_start_index, self.current_image_index)
for i in range(start, end + 1):
self.labels[self.images[i]] = self.continuous_label
self.continuous_mode = False
self.continuous_start_index = None
# 自动保存标记状态
self.save_labels()
return True
return False
def move_labeled_files(self, positive_dir, negative_dir):
"""移动已标记的文件到正负样本文件夹"""
if not os.path.exists(positive_dir):
os.makedirs(positive_dir)
if not os.path.exists(negative_dir):
os.makedirs(negative_dir)
# 按标签分组
from collections import defaultdict
groups = defaultdict(list)
# 按标签分组
from collections import defaultdict
groups = defaultdict(list)
for img_path, label in self.labels.items():
if label in ["positive", "negative"] and os.path.exists(img_path):
groups[label].append(img_path)
# 排序每组图片(按文件名)
for label in groups:
groups[label].sort()
# 处理每组
for label, image_paths in groups.items():
if not image_paths:
continue
dest_dir = positive_dir if label == "positive" else negative_dir
if self.convert_to_video:
# 合成视频
# 提取帧号范围
def extract_index(path):
# 假设文件名格式为 frame_0010.jpg 或 0010.jpg
name = os.path.splitext(os.path.basename(path))[0]
try:
return int(name.split('_')[-1])
except ValueError:
return None
start_idx = extract_index(image_paths[0])
end_idx = extract_index(image_paths[-1])
if start_idx is not None and end_idx is not None:
range_str = f"{start_idx}to{end_idx}"
else:
range_str = "unknown_range"
folder_name = os.path.basename(os.path.dirname(image_paths[0]))
video_name = f"{folder_name}_{label}_{range_str}.mp4"
video_path = os.path.join(dest_dir, video_name)
self.images_to_video(image_paths, video_path, fps=self.video_fps)
else:
# 原逻辑:移动文件
for img_path in image_paths:
filename = os.path.basename(img_path)
dest_path = os.path.join(dest_dir, filename)
shutil.move(img_path, dest_path)
# 从标签中移除
for img_path in image_paths:
self.labels.pop(img_path, None)
# ===== 新增:在删图前完成列表裁剪 =====
if self.discard_before_last_action and image_paths:
# 用同样方法拿到本组最大帧号
def get_frame(p):
name = os.path.splitext(os.path.basename(p))[0]
return int(name) # 你的 6 位数字
self.last_ended_index = max(get_frame(p) for p in image_paths)
# 直接对 self.images 做“内存级”裁剪
drop_idx = -1
for i, p in enumerate(self.images):
if get_frame(p) == self.last_ended_index:
drop_idx = i
break
if drop_idx >= 0:
self.images = self.images[drop_idx + 1:] # 扔掉 前面的
# 指针归零(此时 images[0] 已是 31)
self.current_image_index = 0
# =====================================
# 重新加载当前文件夹
# self.load_current_folder_images()
self.save_labels()
def next_folder(self):
"""切换到下一个文件夹"""
if self.current_folder_index < len(self.folders) - 1:
# 检查当前文件夹是否有未移动的标记文件
current_folder = self.folders[self.current_folder_index]
has_unmoved_labels = any(
img_path.startswith(current_folder) and os.path.exists(img_path)
for img_path in self.labels.keys()
)
if has_unmoved_labels:
# 显示确认对话框
self.show_confirm_dialog = True
self.confirm_action = "next_folder"
self.confirm_message = "当前文件夹有未移动的标记文件,确定要切换到下一个文件夹吗?"
return False
else:
# 直接切换文件夹
self.current_folder_index += 1
self.load_current_folder_images()
return True
return False
def prev_folder(self):
"""切换到上一个文件夹"""
if self.current_folder_index > 0:
self.current_folder_index -= 1
self.load_current_folder_images()
return True
return False
def handle_key_repeats(self):
"""处理方向键长按"""
current_time = time.time()
# 检查是否需要触发按键重复
if any(self.key_pressed.values()):
# 如果是第一次按下,等待较长时间
if self.last_key_time == 0:
if current_time - self.key_pressed_time > self.key_repeat_delay:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
# 后续重复,使用较短的间隔
elif current_time - self.last_key_time > self.key_repeat_interval:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
def save_state(self):
"""保存当前状态以便撤销"""
if len(self.undo_stack) >= self.max_undo_steps:
self.undo_stack.pop(0) # 移除最旧的状态
state = {
"current_image_index": self.current_image_index,
"labels": self.labels.copy(),
"continuous_mode": self.continuous_mode,
"continuous_start_index": self.continuous_start_index,
"continuous_label": self.continuous_label
}
self.undo_stack.append(state)
def undo(self):
"""撤销上一次操作"""
if self.undo_stack:
state = self.undo_stack.pop()
self.current_image_index = state["current_image_index"]
self.labels = state["labels"]
self.continuous_mode = state["continuous_mode"]
self.continuous_start_index = state["continuous_start_index"]
self.continuous_label = state["continuous_label"]
return True
return False
def save_labels(self):
"""保存标记状态到文件"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
try:
# 只保存仍然存在的文件的标记
existing_labels = {k: v for k, v in self.labels.items() if os.path.exists(k)}
with open(labels_file, 'w') as f:
json.dump(existing_labels, f)
except Exception as e:
print(f"保存标记状态失败: {e}")
def load_labels(self):
"""从文件加载标记状态"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
if os.path.exists(labels_file):
try:
with open(labels_file, 'r') as f:
self.labels = json.load(f)
except Exception as e:
print(f"加载标记状态失败: {e}")
def draw_button(screen, text, rect, hover=False, color=None):
"""绘制按钮"""
if color is None:
color = BUTTON_HOVER_COLOR if hover else BUTTON_COLOR
# 先画主体
pygame.draw.rect(screen, color, rect, border_radius=5)
# 再画边框
pygame.draw.rect(screen, (100, 100, 100), rect, 2, border_radius=5)
# 文字居中
text_surface= small_font.render(text, True, TEXT_COLOR)
txt_rect = text_surface.get_rect(center=rect.center)
screen.blit(text_surface, txt_rect)
def draw_confirm_dialog(screen, message, width=400, height=200):
"""绘制确认对话框"""
dialog_rect = pygame.Rect(
(WINDOW_WIDTH - width) // 2,
(WINDOW_HEIGHT - height) // 2,
width, height
)
# 绘制对话框背景
pygame.draw.rect(screen, BG_COLOR, dialog_rect, border_radius=10)
pygame.draw.rect(screen, TEXT_COLOR, dialog_rect, 2, border_radius=10)
# 绘制消息
lines = []
words = message.split()
current_line = ""
for word in words:
test_line = current_line + word + " "
if small_font.size(test_line)[0] < width - 40:
current_line = test_line
else:
lines.append(current_line)
current_line = word + " "
if current_line:
lines.append(current_line)
for i, line in enumerate(lines):
text_surface = small_font.render(line, True, TEXT_COLOR)
screen.blit(text_surface, (dialog_rect.x + 20, dialog_rect.y + 30 + i * 25))
# 绘制按钮
yes_button= pygame.Rect(dialog_rect.x + width // 2 - 100, dialog_rect.y + height - 50, 80, 30)
no_button = pygame.Rect(dialog_rect.x + width // 2 + 20, dialog_rect.y + height - 50, 80, 30)
draw_button(screen, "是", yes_button, color=CONFIRM_COLOR)
draw_button(screen, "否", no_button, color=WARNING_COLOR)
return dialog_rect, yes_button, no_button
def main():
# 假设的根路径,实际使用时需要修改
root_path = r"D:\zero_track\mmaction2\input_videos\test1"
# 创建标注工具实例
tool = ImageLabelingTool(root_path)
# 创建正负样本输出目录
# positive_dir = os.path.join(root_path, "positive_samples")
# negative_dir = os.path.join(root_path, "negative_samples")
positive_dir = os.path.join(root_path, "1")
negative_dir = os.path.join(root_path, "0")
# 主循环
running = True
clock = pygame.time.Clock()
# 按钮区域 - 分为两行
button_height = 40
button_width = 140
button_margin =15
button_row1_y = WINDOW_HEIGHT - button_height - button_margin
button_row2_y = WINDOW_HEIGHT - 2 * button_height - 2 * button_margin
# 第一行按钮(导航按钮)
nav_buttons = {
"prev": pygame.Rect(button_margin, button_row2_y, button_width, button_height),
"next": pygame.Rect(button_margin * 2 + button_width, button_row2_y, button_width, button_height),
"prev_folder": pygame.Rect(button_margin * 3 + button_width * 2, button_row2_y, button_width, button_height),
"next_folder": pygame.Rect(button_margin * 4 + button_width * 3, button_row2_y, button_width, button_height),
"undo": pygame.Rect(button_margin * 5 + button_width * 4, button_row2_y, button_width, button_height),
}
# 第二行按钮(标注按钮)
label_buttons = {
"positive": pygame.Rect(button_margin, button_row1_y, button_width, button_height),
"negative": pygame.Rect(button_margin * 2 + button_width, button_row1_y, button_width, button_height),
"continuous_start": pygame.Rect(button_margin * 3 + button_width * 2, button_row1_y, button_width, button_height),
"continuous_end": pygame.Rect(button_margin * 4 + button_width * 3, button_row1_y, button_width, button_height),
"move_files": pygame.Rect(button_margin * 5 + button_width * 4, button_row1_y, button_width, button_height),
}
# 图片显示区域
image_area = pygame.Rect(50, 80, WINDOW_WIDTH - 100, WINDOW_HEIGHT - 220)
# 添加按键按下时间记录
tool.key_pressed_time = 0
while running:
mouse_pos = pygame.mouse.get_pos()
# 处理按键重复
tool.handle_key_repeats()
# 自动播放逻辑
if tool.playing:
now = pygame.time.get_ticks()
if now - tool.last_play_tick > tool.play_interval:
if tool.play_direction == 1:
tool.next_image()
else:
tool.prev_image()
tool.last_play_tick = now
for event in pygame.event.get():
if event.type == QUIT:
running = False
elif event.type == KEYDOWN:
if event.key == K_d:
tool.key_pressed["right"] = True
tool.key_pressed["left"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.next_image() # 立即响应一次
elif event.key == K_a:
tool.key_pressed["left"] = True
tool.key_pressed["right"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.prev_image() # 立即响应一次
elif event.key == K_RIGHT: # 向后自动播放
tool.play_direction = 1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_LEFT: # 向前自动播放
tool.play_direction = -1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_SPACE: # 暂停/继续
tool.playing = not tool.playing
if tool.playing:
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_v:
tool.convert_to_video = not tool.convert_to_video
print("转视频模式:" + ("开启" if tool.convert_to_video else "关闭"))
elif event.key == K_w: # 标记为正样本
tool.label_current_image("positive")
elif event.key == K_s: # 标记为负样本
tool.label_current_image("negative")
elif event.key == K_UP: # 开始连续标记
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif event.key == K_DOWN: # 结束连续标记
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif event.key == K_x: # 移动文件
moved = tool.move_labeled_files(positive_dir, negative_dir)
print(f"已移动 {moved} 个文件")
elif event.key == K_c: # 下一个文件夹
tool.next_folder()
elif event.key == K_z: # 上一个文件夹
tool.prev_folder()
elif event.key == K_z and (pygame.key.get_mods() & KMOD_CTRL): # Ctrl+Z 撤销
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
elif event.key == K_ESCAPE: # ESC 键取消确认对话框
if tool.show_confirm_dialog:
tool.show_confirm_dialog = False
elif event.type == KEYUP:
if event.key == K_d:
tool.key_pressed["right"] = False
tool.last_key_time = 0 # 重置重复计时
elif event.key == K_a:
tool.key_pressed["left"] = False
tool.last_key_time =0 # 重置重复计时
elif event.type == MOUSEBUTTONDOWN:
if event.button == 1: # 左键点击
# 检查是否点击了确认对话框
if tool.show_confirm_dialog:
dialog_rect, yes_button, no_button = draw_confirm_dialog(screen, tool.confirm_message)
if yes_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = False
if tool.confirm_action == "next_folder":
tool.current_folder_index += 1
tool.load_current_folder_images()
elif no_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = Fasle
else:
# 导航按钮
if nav_buttons["prev"].collidepoint(mouse_pos):
tool.prev_image()
elif nav_buttons["next"].collidepoint(mouse_pos):
tool.next_image()
elif nav_buttons["prev_folder"].collidepoint(mouse_pos):
tool.prev_folder()
elif nav_buttons["next_folder"].collidepoint(mouse_pos):
tool.next_folder()
elif nav_buttons["undo"].collidepoint(mouse_pos):
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
# 标注按钮
elif label_buttons["positive"].collidepoint(mouse_pos):
tool.label_current_image("positive")
elif label_buttons["negative"].collidepoint(mouse_pos):
tool.label_current_image("negative")
elif label_buttons["continuous_start"].collidepoint(mouse_pos):
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif label_buttons["continuous_end"].collidepoint(mouse_pos):
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif label_buttons["move_files"].collidepoint(mouse_pos):
moved = tool.move_labeled_files(positive_dir, negative_dir)
print("已移动 {moved} 个文件")
# 清屏
screen.fill(BG_COLOR)
# 显示文件信息
if tool.folders:
folder_text = f"当前文件夹: {os.path.basename(tool.folders[tool.current_folder_index])} ({tool.current_folder_index + 1}/{len(tool.folders)})"
text_surface = small_font.render(folder_text, True, TEXT_COLOR)
screen.blit(text_surface, (20, 20))
# 显示当前图片
current_image_path = tool.get_current_image()
if current_image_path and os.path.exists(current_image_path):
try:
img = pygame.image.load(current_image_path)
img_rect = img.get_rect()
# 缩放图片以适应显示区域
scale = min(image_area.width / img_rect.width, image_area.height / img_rect.height)
new_size = (int(img_rect.width * scale), int(img_rect.height * scale))
img = pygame.transform.smoothscale(img, new_size)
img_rect = img.get_rect(center=image_area.center)
screen.blit(img, img_rect)
# 显示图片信息(在图片上方)
info_text = f"{os.path.basename(current_image_path)} ({tool.current_image_index + 1}/{len(tool.images)})"
if current_image_path in tool.labels:
label = tool.labels[current_image_path]
info_text += f" - 已标记: {'正样本' if label == 'positive' else '负样本'}"
text_surface = font.render(info_text, True, TEXT_COLOR)
text_rect = text_surface.get_rect(center=(WINDOW_WIDTH // 2, image_area.y - 20))
screen.blit(text_surface, text_rect)
# 在连续标记模式下显示标记范围
if tool.continuous_mode and tool.continuous_start_index is not None:
start_idx = min(tool.continuous_start_index, tool.current_image_index)
end_idx = max(tool.continuous_start_index, tool.current_image_index)
range_text = f"标记范围: {start_idx + 1} - {end_idx + 1}"
range_surface = small_font.render(range_text, True, HIGHLIGHT_COLOR)
screen.blit(range_surface, (20, 50))
# 绘制标记范围的指示器
marker_width = image_area.width / len(tool.images)
start_x = image_area.x + start_idx * marker_width
end_x = image_area.x + (end_idx + 1) * marker_width
pygame.draw.rect(screen, HIGHLIGHT_COLOR,
(start_x, image_area.y + image_area.height + 5,
end_x -start_x, 5))
except Exception as e:
error_text = f"无法加载图片: {e}"
text_surface = font.render(error_text, True, (255, 0, 0))
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
else:
no_image_text = "没有图片可显示"
text_surface = font.render(no_image_text, True, TEXT_COLOR)
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
# 显示连续标记状态
if tool.continuous_mode:
mode_text = f"连续标记模式已启动 - 标记类型: {'正样本' if tool.continuous_label == 'positive' else '负样本'}"
text_surface = small_font.render(mode_text, True, HIGHLIGHT_COLOR)
screen.blit(text_surface, (WINDOW_WIDTH - text_surface.get_width() - 20, 50))
# 绘制导航按钮
draw_button(screen, "上一张 (a)", nav_buttons["prev"], nav_buttons["prev"].collidepoint(mouse_pos))
draw_button(screen, "下一张 (d)", nav_buttons["next"], nav_buttons["next"].collidepoint(mouse_pos))
draw_button(screen, "上个文件夹 (z)", nav_buttons["prev_folder"], nav_buttons["prev_folder"].collidepoint(mouse_pos))
draw_button(screen, "下个文件夹 (c)", nav_buttons["next_folder"], nav_buttons["next_folder"].collidepoint(mouse_pos))
draw_button(screen, "撤销 (Ctrl+Z)", nav_buttons["undo"], nav_buttons["undo"].collidepoint(mouse_pos))
# 绘制标注按钮
draw_button(screen, "正样本 (w)", label_buttons["positive"], label_buttons["positive"].collidepoint(mouse_pos))
draw_button(screen, "负样本 (s)", label_buttons["negative"], label_buttons["negative"].collidepoint(mouse_pos))
draw_button(screen, "开始连续标(↑)", label_buttons["continuous_start"], label_buttons["continuous_start"].collidepoint(mouse_pos))
draw_button(screen, "结束连续标(↓)", label_buttons["continuous_end"], label_buttons["continuous_end"].collidepoint(mouse_pos))
draw_button(screen, "移动文件 (x)", label_buttons["move_files"], label_buttons["move_files"].collidepoint(mouse_pos))
# 显示确认对话框
if tool.show_confirm_dialog:
draw_confirm_dialog(screen, tool.confirm_message)
# 更新屏幕
pygame.display.flip()
clock.tick(30)
# 退出前保存标记状态
tool.save_labels()
pygame.quit()
sys.exit()
if __name__ == "__main__":
main()
2.6 基于2.4的新增自动补标负样本功能(已废弃)
我想新增一个变量用于控制是否仅标注正样本,如果变量为True的话,即使用户只标注了正样本,也会把正样本之前的帧全都视为负样本,比如说用户标注了10到20帧是正样本,但是0到10帧没有标注,在按x移动的时候,不但会把10-20帧视为正样本转为一个视频并放到1的文件夹,也会将0-10视为负样本并且转为一个视频并放到0的文件夹。
我的意思是从第一帧开始找到第一个正样本,在此之前的自动视为负样本,并不是说之后的只要没标的都是负样本,这两种情况要区分一下。
新增一个枚举变量
auto_neg_mode,可切换三种行为:
auto_neg_mode = 0关闭自动补标(原逻辑)
auto_neg_mode = 1仅把“第一个正样本之前”的帧补成负样本(你要的新需求)
auto_neg_mode = 2把所有未标帧都补成负样本
下面给出“仅把第一个正样本之前的帧自动补成负样本”的完整实现。
新增一个枚举变量auto_neg_mode,可切换三种行为:
auto_neg_mode = 0关闭自动补标(原逻辑)
auto_neg_mode = 1仅把“第一个正样本之前”的帧补成负样本(你要的新需求)
auto_neg_mode = 2把所有未标帧都补成负样本(上一版实现)---------------- 代码改动 ----------------
在
__init__里加两个变量# 自动补标负样本模式 # 0=关闭 1=仅第一个正样本之前 2=全部未标 self.auto_neg_mode = 1
在
move_labeled_files里替换原来的“补标”逻辑
找到你原来已经有的# 按标签分组 from collections import defaultdict groups = defaultdict(list)把上一回我给的补标代码整块删掉,换成下面这段:
# ---------- 自动补标负样本 ---------- if self.auto_neg_mode: # 当前文件夹已标正样本的索引 pos_idx = [i for i, p in enumerate(self.images) if p in self.labels and self.labels[p] == 'positive'] if pos_idx: # 有正样本才需要补 first_pos = min(pos_idx) # 第一个正样本序号 end_idx = len(self.images) if self.auto_neg_mode == 2 else first_pos for i in range(0, end_idx): p = self.images[i] if p not in self.labels: # 未标才补 self.labels[p] = 'negative' # -----------------------------------
如果想在界面上随时切换模式,可在
main()里再加一组热键,例如elif event.key == K_1: tool.auto_neg_mode = (tool.auto_neg_mode + 1) % 3 modes = ['关闭', '首正之前', '全部未标'] print('自动补标模式:', modes[tool.auto_neg_mode])---------------- 效果 ----------------
设
auto_neg_mode = 1后,用户只标 10-20 帧为正样本:
– 0-9 帧 → 自动补标为负样本,生成xxx_negative_0to9.mp4放入0文件夹;
– 10-20 帧 → 正样本视频放入1文件夹;
– 21 帧以后若未标,保持未标状态,不会被处理。若想回到“全部未标都当负样本”或完全关闭,只需把
auto_neg_mode切到 2 或 0 即可,互不干扰。
import os
import pygame
import sys
import shutil
import time
import json
from pygame.locals import *
import cv2 # pip install opencv-python
# 初始化pygame
pygame.init()
# 配置参数
SCREEN_WIDTH, SCREEN_HEIGHT = pygame.display.Info().current_w, pygame.display.Info().current_h
WINDOW_WIDTH, WINDOW_HEIGHT = SCREEN_WIDTH - 100, SCREEN_HEIGHT - 100
BG_COLOR = (40, 44, 52)
TEXT_COLOR = (220, 220, 220)
HIGHLIGHT_COLOR = (97, 175, 239)
BUTTON_COLOR = (56, 58, 66)
BUTTON_HOVER_COLOR = (72, 74, 82)
WARNING_COLOR = (255, 152, 0)
CONFIRM_COLOR = (76, 175, 80)
# 创建窗口
screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("图像分类标注工具")
# 字体
font = pygame.font.SysFont("SimHei", 24)
small_font = pygame.font.SysFont("SimHei", 18)
class ImageLabelingTool:
def __init__(self, root_path):
self.root_path = root_path
self.folders = [] # 所有含图片的文件夹绝对路径
self.current_folder_index = 0 # 当前文件夹索引
self.images = [] # 当前文件夹内所有图片绝对路径
self.current_image_index = 0 # 当前图片索引
self.labels = {} # 路径 -> 'positive' / 'negative'
# 自动补标负样本模式
# 0=关闭 1=仅第一个正样本之前 2=全部未标
self.auto_neg_mode = 1
self.convert_to_video = True # 是否启用转视频模式
self.video_fps = 10 # 视频帧率
# 自动播放相关
self.playing = False # 是否处于自动播放
self.play_direction = 1 # 1 下一张,-1 上一张
self.last_play_tick = 0 # 上一次翻片的时间
self.play_interval = 100 # 每 多少 毫秒 翻一张
# 标记状态
self.continuous_mode = False # 是否处于连续标记模式
self.continuous_label = None # 连续标记时统一的标签
self.continuous_start_index = None # 连续标记起始索引
# 键盘长按状态
self.key_pressed = {"left": False, "right": False}
self.last_key_time = 0 # 长按重复计时
self.key_repeat_delay = 0.8 # 初始延迟增加到0.8秒
self.key_repeat_interval = 0.15 # 重复间隔增加到0.15秒
# 操作历史(用于撤销)
self.undo_stack = []
self.max_undo_steps = 50
# 确认对话框状态
self.show_confirm_dialog = False
self.confirm_message = ""
self.confirm_action = "" # 标记确认对话框触发动作
# 获取所有包含图片的文件夹
self.find_image_folders()
# 加载当前文件夹的图片
if self.folders:
self.load_current_folder_images()
# 加载保存的标记状态
self.load_labels() # 尝试加载历史标签
def images_to_video(self, image_paths, output_path, fps=10):
"""将图片序列转为视频"""
if not image_paths:
return
# 读取第一张图获取尺寸
frame = cv2.imread(image_paths[0])
h, w, _ = frame.shape
# 初始化视频写入器
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
for img_path in image_paths:
frame = cv2.imread(img_path)
out.write(frame)
out.release()
# 删除原图
for img_path in image_paths:
if os.path.exists(img_path):
os.remove(img_path)
def find_image_folders(self):
"""查找所有包含图片的文件夹"""
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for root, dirs, files in os.walk(self.root_path):
has_images = any(file.lower().endswith(image_extensions) for file in files)
if has_images:
self.folders.append(root)
def load_current_folder_images(self):
"""加载当前文件夹中的所有图片"""
folder_path = self.folders[self.current_folder_index]
self.images = []
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for file in os.listdir(folder_path):
if file.lower().endswith(image_extensions):
self.images.append(os.path.join(folder_path, file))
# 按文件名排序
self.images.sort()
self.current_image_index = 0
def get_current_image(self):
"""获取当前图片"""
if not self.images:
return None
return self.images[self.current_image_index]
def next_image(self):
"""切换到下一张图片"""
if self.current_image_index < len(self.images) - 1:
self.save_state() # 保存状态以便撤销
self.current_image_index += 1
return True
return False
def prev_image(self):
"""切换到上一张图片"""
if self.current_image_index > 0:
self.current_image_index -= 1
return True
return False
def label_current_image(self, label):
"""标记当前图片"""
current_image= self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
self.labels[current_image] = label
# 自动保存标记状态
self.save_labels()
def start_continuous_labeling(self):
"""开始连续标记"""
current_image = self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
# 如果当前图片已经有标签,使用该标签
if current_image in self.labels:
self.continuous_label = self.labels[current_image]
else:
# 如果没有标签,默认为正样本
self.continuous_label = "positive"
self.labels[current_image] = self.continuous_label
self.continuous_mode = True
self.continuous_start_index = self.current_image_index
# 自动保存标记状态
self.save_labels()
return True
return False
def end_continuous_labeling(self):
"""结束连续标记"""
if self.continuous_mode and self.continuous_start_index is not None:
self.save_state() # 保存状态以便撤销
start = min(self.continuous_start_index, self.current_image_index)
end = max(self.continuous_start_index, self.current_image_index)
for i in range(start, end + 1):
self.labels[self.images[i]] = self.continuous_label
self.continuous_mode = False
self.continuous_start_index = None
# 自动保存标记状态
self.save_labels()
return True
return False
def move_labeled_files(self, positive_dir, negative_dir):
"""移动已标记的文件到正负样本文件夹"""
if not os.path.exists(positive_dir):
os.makedirs(positive_dir)
if not os.path.exists(negative_dir):
os.makedirs(negative_dir)
# 按标签分组
from collections import defaultdict
groups = defaultdict(list)
# ---------- 自动补标负样本 ----------
if self.auto_neg_mode:
# 当前文件夹已标正样本的索引
pos_idx = [i for i, p in enumerate(self.images)
if p in self.labels and self.labels[p] == 'positive']
if pos_idx: # 有正样本才需要补
first_pos = min(pos_idx) # 第一个正样本序号
end_idx = len(self.images) if self.auto_neg_mode == 2 else first_pos
for i in range(0, end_idx):
p = self.images[i]
if p not in self.labels: # 未标才补
self.labels[p] = 'negative'
# -----------------------------------
# 按标签分组
from collections import defaultdict
groups = defaultdict(list)
for img_path, label in self.labels.items():
if label in ["positive", "negative"] and os.path.exists(img_path):
groups[label].append(img_path)
# 排序每组图片(按文件名)
for label in groups:
groups[label].sort()
# 处理每组
for label, image_paths in groups.items():
if not image_paths:
continue
dest_dir = positive_dir if label == "positive" else negative_dir
if self.convert_to_video:
# 合成视频
# 提取帧号范围
def extract_index(path):
# 假设文件名格式为 frame_0010.jpg 或 0010.jpg
name = os.path.splitext(os.path.basename(path))[0]
try:
return int(name.split('_')[-1])
except ValueError:
return None
start_idx = extract_index(image_paths[0])
end_idx = extract_index(image_paths[-1])
if start_idx is not None and end_idx is not None:
range_str = f"{start_idx}to{end_idx}"
else:
range_str = "unknown_range"
folder_name = os.path.basename(os.path.dirname(image_paths[0]))
video_name = f"{folder_name}_{label}_{range_str}.mp4"
video_path = os.path.join(dest_dir, video_name)
self.images_to_video(image_paths, video_path, fps=self.video_fps)
else:
# 原逻辑:移动文件
for img_path in image_paths:
filename = os.path.basename(img_path)
dest_path = os.path.join(dest_dir, filename)
shutil.move(img_path, dest_path)
# 从标签中移除
for img_path in image_paths:
self.labels.pop(img_path, None)
# 重新加载当前文件夹
self.load_current_folder_images()
self.save_labels()
def next_folder(self):
"""切换到下一个文件夹"""
if self.current_folder_index < len(self.folders) - 1:
# 检查当前文件夹是否有未移动的标记文件
current_folder = self.folders[self.current_folder_index]
has_unmoved_labels = any(
img_path.startswith(current_folder) and os.path.exists(img_path)
for img_path in self.labels.keys()
)
if has_unmoved_labels:
# 显示确认对话框
self.show_confirm_dialog = True
self.confirm_action = "next_folder"
self.confirm_message = "当前文件夹有未移动的标记文件,确定要切换到下一个文件夹吗?"
return False
else:
# 直接切换文件夹
self.current_folder_index += 1
self.load_current_folder_images()
return True
return False
def prev_folder(self):
"""切换到上一个文件夹"""
if self.current_folder_index > 0:
self.current_folder_index -= 1
self.load_current_folder_images()
return True
return False
def handle_key_repeats(self):
"""处理方向键长按"""
current_time = time.time()
# 检查是否需要触发按键重复
if any(self.key_pressed.values()):
# 如果是第一次按下,等待较长时间
if self.last_key_time == 0:
if current_time - self.key_pressed_time > self.key_repeat_delay:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
# 后续重复,使用较短的间隔
elif current_time - self.last_key_time > self.key_repeat_interval:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
def save_state(self):
"""保存当前状态以便撤销"""
if len(self.undo_stack) >= self.max_undo_steps:
self.undo_stack.pop(0) # 移除最旧的状态
state = {
"current_image_index": self.current_image_index,
"labels": self.labels.copy(),
"continuous_mode": self.continuous_mode,
"continuous_start_index": self.continuous_start_index,
"continuous_label": self.continuous_label
}
self.undo_stack.append(state)
def undo(self):
"""撤销上一次操作"""
if self.undo_stack:
state = self.undo_stack.pop()
self.current_image_index = state["current_image_index"]
self.labels = state["labels"]
self.continuous_mode = state["continuous_mode"]
self.continuous_start_index = state["continuous_start_index"]
self.continuous_label = state["continuous_label"]
return True
return False
def save_labels(self):
"""保存标记状态到文件"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
try:
# 只保存仍然存在的文件的标记
existing_labels = {k: v for k, v in self.labels.items() if os.path.exists(k)}
with open(labels_file, 'w') as f:
json.dump(existing_labels, f)
except Exception as e:
print(f"保存标记状态失败: {e}")
def load_labels(self):
"""从文件加载标记状态"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
if os.path.exists(labels_file):
try:
with open(labels_file, 'r') as f:
self.labels = json.load(f)
except Exception as e:
print(f"加载标记状态失败: {e}")
def draw_button(screen, text, rect, hover=False, color=None):
"""绘制按钮"""
if color is None:
color = BUTTON_HOVER_COLOR if hover else BUTTON_COLOR
# 先画主体
pygame.draw.rect(screen, color, rect, border_radius=5)
# 再画边框
pygame.draw.rect(screen, (100, 100, 100), rect, 2, border_radius=5)
# 文字居中
text_surface= small_font.render(text, True, TEXT_COLOR)
txt_rect = text_surface.get_rect(center=rect.center)
screen.blit(text_surface, txt_rect)
def draw_confirm_dialog(screen, message, width=400, height=200):
"""绘制确认对话框"""
dialog_rect = pygame.Rect(
(WINDOW_WIDTH - width) // 2,
(WINDOW_HEIGHT - height) // 2,
width, height
)
# 绘制对话框背景
pygame.draw.rect(screen, BG_COLOR, dialog_rect, border_radius=10)
pygame.draw.rect(screen, TEXT_COLOR, dialog_rect, 2, border_radius=10)
# 绘制消息
lines = []
words = message.split()
current_line = ""
for word in words:
test_line = current_line + word + " "
if small_font.size(test_line)[0] < width - 40:
current_line = test_line
else:
lines.append(current_line)
current_line = word + " "
if current_line:
lines.append(current_line)
for i, line in enumerate(lines):
text_surface = small_font.render(line, True, TEXT_COLOR)
screen.blit(text_surface, (dialog_rect.x + 20, dialog_rect.y + 30 + i * 25))
# 绘制按钮
yes_button= pygame.Rect(dialog_rect.x + width // 2 - 100, dialog_rect.y + height - 50, 80, 30)
no_button = pygame.Rect(dialog_rect.x + width // 2 + 20, dialog_rect.y + height - 50, 80, 30)
draw_button(screen, "是", yes_button, color=CONFIRM_COLOR)
draw_button(screen, "否", no_button, color=WARNING_COLOR)
return dialog_rect, yes_button, no_button
def main():
# 假设的根路径,实际使用时需要修改
root_path = r"D:\zero_track\mmaction2\input_videos\test1"
# 创建标注工具实例
tool = ImageLabelingTool(root_path)
# 创建正负样本输出目录
# positive_dir = os.path.join(root_path, "positive_samples")
# negative_dir = os.path.join(root_path, "negative_samples")
positive_dir = os.path.join(root_path, "1")
negative_dir = os.path.join(root_path, "0")
os.makedirs(positive_dir, exist_ok=True)
os.makedirs(negative_dir, exist_ok=True)
# 主循环
running = True
clock = pygame.time.Clock()
# 按钮区域 - 分为两行
button_height = 40
button_width = 140
button_margin =15
button_row1_y = WINDOW_HEIGHT - button_height - button_margin
button_row2_y = WINDOW_HEIGHT - 2 * button_height - 2 * button_margin
# 第一行按钮(导航按钮)
nav_buttons = {
"prev": pygame.Rect(button_margin, button_row2_y, button_width, button_height),
"next": pygame.Rect(button_margin * 2 + button_width, button_row2_y, button_width, button_height),
"prev_folder": pygame.Rect(button_margin * 3 + button_width * 2, button_row2_y, button_width, button_height),
"next_folder": pygame.Rect(button_margin * 4 + button_width * 3, button_row2_y, button_width, button_height),
"undo": pygame.Rect(button_margin * 5 + button_width * 4, button_row2_y, button_width, button_height),
}
# 第二行按钮(标注按钮)
label_buttons = {
"positive": pygame.Rect(button_margin, button_row1_y, button_width, button_height),
"negative": pygame.Rect(button_margin * 2 + button_width, button_row1_y, button_width, button_height),
"continuous_start": pygame.Rect(button_margin * 3 + button_width * 2, button_row1_y, button_width, button_height),
"continuous_end": pygame.Rect(button_margin * 4 + button_width * 3, button_row1_y, button_width, button_height),
"move_files": pygame.Rect(button_margin * 5 + button_width * 4, button_row1_y, button_width, button_height),
}
# 图片显示区域
image_area = pygame.Rect(50, 80, WINDOW_WIDTH - 100, WINDOW_HEIGHT - 220)
# 添加按键按下时间记录
tool.key_pressed_time = 0
while running:
mouse_pos = pygame.mouse.get_pos()
# 处理按键重复
tool.handle_key_repeats()
# 自动播放逻辑
if tool.playing:
now = pygame.time.get_ticks()
if now - tool.last_play_tick > tool.play_interval:
if tool.play_direction == 1:
tool.next_image()
else:
tool.prev_image()
tool.last_play_tick = now
for event in pygame.event.get():
if event.type == QUIT:
running = False
elif event.type == KEYDOWN:
if event.key == K_d:
tool.key_pressed["right"] = True
tool.key_pressed["left"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.next_image() # 立即响应一次
elif event.key == K_a:
tool.key_pressed["left"] = True
tool.key_pressed["right"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.prev_image() # 立即响应一次
elif event.key == K_RIGHT: # 向后自动播放
tool.play_direction = 1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_LEFT: # 向前自动播放
tool.play_direction = -1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_SPACE: # 暂停/继续
tool.playing = not tool.playing
if tool.playing:
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_v:
tool.convert_to_video = not tool.convert_to_video
print("转视频模式:" + ("开启" if tool.convert_to_video else "关闭"))
elif event.key == K_w: # 标记为正样本
tool.label_current_image("positive")
elif event.key == K_s: # 标记为负样本
tool.label_current_image("negative")
elif event.key == K_UP: # 开始连续标记
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif event.key == K_DOWN: # 结束连续标记
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif event.key == K_x: # 移动文件
moved = tool.move_labeled_files(positive_dir, negative_dir)
print(f"已移动 {moved} 个文件")
elif event.key == K_c: # 下一个文件夹
tool.next_folder()
elif event.key == K_z: # 上一个文件夹
tool.prev_folder()
elif event.key == K_z and (pygame.key.get_mods() & KMOD_CTRL): # Ctrl+Z 撤销
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
elif event.key == K_ESCAPE: # ESC 键取消确认对话框
if tool.show_confirm_dialog:
tool.show_confirm_dialog = False
elif event.type == KEYUP:
if event.key == K_d:
tool.key_pressed["right"] = False
tool.last_key_time = 0 # 重置重复计时
elif event.key == K_a:
tool.key_pressed["left"] = False
tool.last_key_time =0 # 重置重复计时
elif event.type == MOUSEBUTTONDOWN:
if event.button == 1: # 左键点击
# 检查是否点击了确认对话框
if tool.show_confirm_dialog:
dialog_rect, yes_button, no_button = draw_confirm_dialog(screen, tool.confirm_message)
if yes_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = False
if tool.confirm_action == "next_folder":
tool.current_folder_index += 1
tool.load_current_folder_images()
elif no_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = Fasle
else:
# 导航按钮
if nav_buttons["prev"].collidepoint(mouse_pos):
tool.prev_image()
elif nav_buttons["next"].collidepoint(mouse_pos):
tool.next_image()
elif nav_buttons["prev_folder"].collidepoint(mouse_pos):
tool.prev_folder()
elif nav_buttons["next_folder"].collidepoint(mouse_pos):
tool.next_folder()
elif nav_buttons["undo"].collidepoint(mouse_pos):
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
# 标注按钮
elif label_buttons["positive"].collidepoint(mouse_pos):
tool.label_current_image("positive")
elif label_buttons["negative"].collidepoint(mouse_pos):
tool.label_current_image("negative")
elif label_buttons["continuous_start"].collidepoint(mouse_pos):
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif label_buttons["continuous_end"].collidepoint(mouse_pos):
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif label_buttons["move_files"].collidepoint(mouse_pos):
moved = tool.move_labeled_files(positive_dir, negative_dir)
print("已移动 {moved} 个文件")
# 清屏
screen.fill(BG_COLOR)
# 显示文件信息
if tool.folders:
folder_text = f"当前文件夹: {os.path.basename(tool.folders[tool.current_folder_index])} ({tool.current_folder_index + 1}/{len(tool.folders)})"
text_surface = small_font.render(folder_text, True, TEXT_COLOR)
screen.blit(text_surface, (20, 20))
# 显示当前图片
current_image_path = tool.get_current_image()
if current_image_path and os.path.exists(current_image_path):
try:
img = pygame.image.load(current_image_path)
img_rect = img.get_rect()
# 缩放图片以适应显示区域
scale = min(image_area.width / img_rect.width, image_area.height / img_rect.height)
new_size = (int(img_rect.width * scale), int(img_rect.height * scale))
img = pygame.transform.smoothscale(img, new_size)
img_rect = img.get_rect(center=image_area.center)
screen.blit(img, img_rect)
# 显示图片信息(在图片上方)
info_text = f"{os.path.basename(current_image_path)} ({tool.current_image_index + 1}/{len(tool.images)})"
if current_image_path in tool.labels:
label = tool.labels[current_image_path]
info_text += f" - 已标记: {'正样本' if label == 'positive' else '负样本'}"
text_surface = font.render(info_text, True, TEXT_COLOR)
text_rect = text_surface.get_rect(center=(WINDOW_WIDTH // 2, image_area.y - 20))
screen.blit(text_surface, text_rect)
# 在连续标记模式下显示标记范围
if tool.continuous_mode and tool.continuous_start_index is not None:
start_idx = min(tool.continuous_start_index, tool.current_image_index)
end_idx = max(tool.continuous_start_index, tool.current_image_index)
range_text = f"标记范围: {start_idx + 1} - {end_idx + 1}"
range_surface = small_font.render(range_text, True, HIGHLIGHT_COLOR)
screen.blit(range_surface, (20, 50))
# 绘制标记范围的指示器
marker_width = image_area.width / len(tool.images)
start_x = image_area.x + start_idx * marker_width
end_x = image_area.x + (end_idx + 1) * marker_width
pygame.draw.rect(screen, HIGHLIGHT_COLOR,
(start_x, image_area.y + image_area.height + 5,
end_x -start_x, 5))
except Exception as e:
error_text = f"无法加载图片: {e}"
text_surface = font.render(error_text, True, (255, 0, 0))
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
else:
no_image_text = "没有图片可显示"
text_surface = font.render(no_image_text, True, TEXT_COLOR)
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
# 显示连续标记状态
if tool.continuous_mode:
mode_text = f"连续标记模式已启动 - 标记类型: {'正样本' if tool.continuous_label == 'positive' else '负样本'}"
text_surface = small_font.render(mode_text, True, HIGHLIGHT_COLOR)
screen.blit(text_surface, (WINDOW_WIDTH - text_surface.get_width() - 20, 50))
# 绘制导航按钮
draw_button(screen, "上一张 (a)", nav_buttons["prev"], nav_buttons["prev"].collidepoint(mouse_pos))
draw_button(screen, "下一张 (d)", nav_buttons["next"], nav_buttons["next"].collidepoint(mouse_pos))
draw_button(screen, "上个文件夹 (z)", nav_buttons["prev_folder"], nav_buttons["prev_folder"].collidepoint(mouse_pos))
draw_button(screen, "下个文件夹 (c)", nav_buttons["next_folder"], nav_buttons["next_folder"].collidepoint(mouse_pos))
draw_button(screen, "撤销 (Ctrl+Z)", nav_buttons["undo"], nav_buttons["undo"].collidepoint(mouse_pos))
# 绘制标注按钮
draw_button(screen, "正样本 (w)", label_buttons["positive"], label_buttons["positive"].collidepoint(mouse_pos))
draw_button(screen, "负样本 (s)", label_buttons["negative"], label_buttons["negative"].collidepoint(mouse_pos))
draw_button(screen, "开始连续标(↑)", label_buttons["continuous_start"], label_buttons["continuous_start"].collidepoint(mouse_pos))
draw_button(screen, "结束连续标(↓)", label_buttons["continuous_end"], label_buttons["continuous_end"].collidepoint(mouse_pos))
draw_button(screen, "移动文件 (x)", label_buttons["move_files"], label_buttons["move_files"].collidepoint(mouse_pos))
# 显示确认对话框
if tool.show_confirm_dialog:
draw_confirm_dialog(screen, tool.confirm_message)
# 更新屏幕
pygame.display.flip()
clock.tick(30)
# 退出前保存标记状态
tool.save_labels()
pygame.quit()
sys.exit()
if __name__ == "__main__":
main()
2.7 基于2.4的新增负样本模式(已废弃)
我想新增一个变量,用于控制将auto_neg_mode为1的时候,现在的逻辑是当auto_neg_mode为1的时候,第一个正样本之前的视为负样本,我想可以切换到以负样本为主的标注方式,即用户只标负样本,然后按x之后就会将负样本的视频帧区间变为视频片段放在0文件夹,但是在第一个负样本之前的图片帧不自动补标为正样本但是会将其从当前图片帧读取中丢弃
import os
import pygame
import sys
import shutil
import time
import json
from pygame.locals import *
import cv2 # pip install opencv-python
# 初始化pygame
pygame.init()
# 配置参数
SCREEN_WIDTH, SCREEN_HEIGHT = pygame.display.Info().current_w, pygame.display.Info().current_h
WINDOW_WIDTH, WINDOW_HEIGHT = SCREEN_WIDTH - 100, SCREEN_HEIGHT - 100
BG_COLOR = (40, 44, 52)
TEXT_COLOR = (220, 220, 220)
HIGHLIGHT_COLOR = (97, 175, 239)
BUTTON_COLOR = (56, 58, 66)
BUTTON_HOVER_COLOR = (72, 74, 82)
WARNING_COLOR = (255, 152, 0)
CONFIRM_COLOR = (76, 175, 80)
# 创建窗口
screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("图像分类标注工具")
# 字体
font = pygame.font.SysFont("SimHei", 24)
small_font = pygame.font.SysFont("SimHei", 18)
class ImageLabelingTool:
def __init__(self, root_path):
self.root_path = root_path
self.folders = [] # 所有含图片的文件夹绝对路径
self.current_folder_index = 0 # 当前文件夹索引
self.images = [] # 当前文件夹内所有图片绝对路径
self.current_image_index = 0 # 当前图片索引
self.labels = {} # 路径 -> 'positive' / 'negative'
# 自动补标负样本模式
# 0=关闭 1=仅第一个正样本之前 2=全部未标
self.auto_neg_mode = 1
# 新增:标注模式
# "positive" = 正样本为主模式(默认)
# "negative" = 负样本为主模式
self.labeling_mode = "negative"
self.convert_to_video = True # 是否启用转视频模式
self.video_fps = 10 # 视频帧率
# 自动播放相关
self.playing = False # 是否处于自动播放
self.play_direction = 1 # 1 下一张,-1 上一张
self.last_play_tick = 0 # 上一次翻片的时间
self.play_interval = 100 # 每 多少 毫秒 翻一张
# 标记状态
self.continuous_mode = False # 是否处于连续标记模式
self.continuous_label = None # 连续标记时统一的标签
self.continuous_start_index = None # 连续标记起始索引
# 键盘长按状态
self.key_pressed = {"left": False, "right": False}
self.last_key_time = 0 # 长按重复计时
self.key_repeat_delay = 0.8 # 初始延迟增加到0.8秒
self.key_repeat_interval = 0.15 # 重复间隔增加到0.15秒
# 操作历史(用于撤销)
self.undo_stack = []
self.max_undo_steps = 50
# 确认对话框状态
self.show_confirm_dialog = False
self.confirm_message = ""
self.confirm_action = "" # 标记确认对话框触发动作
# 获取所有包含图片的文件夹
self.find_image_folders()
# 加载当前文件夹的图片
if self.folders:
self.load_current_folder_images()
# 加载保存的标记状态
self.load_labels() # 尝试加载历史标签
def images_to_video(self, image_paths, output_path, fps=10):
"""将图片序列转为视频"""
if not image_paths:
return
# 读取第一张图获取尺寸
frame = cv2.imread(image_paths[0])
h, w, _ = frame.shape
# 初始化视频写入器
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
for img_path in image_paths:
frame = cv2.imread(img_path)
out.write(frame)
out.release()
# 删除原图
for img_path in image_paths:
if os.path.exists(img_path):
os.remove(img_path)
def find_image_folders(self):
"""查找所有包含图片的文件夹"""
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for root, dirs, files in os.walk(self.root_path):
has_images = any(file.lower().endswith(image_extensions) for file in files)
if has_images:
self.folders.append(root)
def load_current_folder_images(self):
"""加载当前文件夹中的所有图片"""
folder_path = self.folders[self.current_folder_index]
self.images = []
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
for file in os.listdir(folder_path):
if file.lower().endswith(image_extensions):
self.images.append(os.path.join(folder_path, file))
# 按文件名排序
self.images.sort()
self.current_image_index = 0
def get_current_image(self):
"""获取当前图片"""
if not self.images:
return None
return self.images[self.current_image_index]
def next_image(self):
"""切换到下一张图片"""
if self.current_image_index < len(self.images) - 1:
self.save_state() # 保存状态以便撤销
self.current_image_index += 1
return True
return False
def prev_image(self):
"""切换到上一张图片"""
if self.current_image_index > 0:
self.current_image_index -= 1
return True
return False
def label_current_image(self, label):
"""标记当前图片"""
current_image= self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
self.labels[current_image] = label
# 自动保存标记状态
self.save_labels()
def start_continuous_labeling(self):
"""开始连续标记"""
current_image = self.get_current_image()
if current_image:
self.save_state() # 保存状态以便撤销
# 如果当前图片已经有标签,使用该标签
if current_image in self.labels:
self.continuous_label = self.labels[current_image]
else:
# 如果没有标签,默认为正样本
self.continuous_label = "positive"
self.labels[current_image] = self.continuous_label
self.continuous_mode = True
self.continuous_start_index = self.current_image_index
# 自动保存标记状态
self.save_labels()
return True
return False
def end_continuous_labeling(self):
"""结束连续标记"""
if self.continuous_mode and self.continuous_start_index is not None:
self.save_state() # 保存状态以便撤销
start = min(self.continuous_start_index, self.current_image_index)
end = max(self.continuous_start_index, self.current_image_index)
for i in range(start, end + 1):
self.labels[self.images[i]] = self.continuous_label
self.continuous_mode = False
self.continuous_start_index = None
# 自动保存标记状态
self.save_labels()
return True
return False
def move_labeled_files(self, positive_dir, negative_dir):
"""移动已标记的文件到正负样本文件夹"""
if not os.path.exists(positive_dir):
os.makedirs(positive_dir)
if not os.path.exists(negative_dir):
os.makedirs(negative_dir)
# 按标签分组
from collections import defaultdict
groups = defaultdict(list)
# ---------- 自动补标负样本 ----------
# ---------- 自动补标逻辑 ----------
if self.labeling_mode == "positive":
if self.auto_neg_mode:
# 当前文件夹已标正样本的索引
pos_idx = [i for i, p in enumerate(self.images)
if p in self.labels and self.labels[p] == 'positive']
if pos_idx: # 有正样本才需要补
first_pos = min(pos_idx) # 第一个正样本序号
end_idx = len(self.images) if self.auto_neg_mode == 2 else first_pos
for i in range(0, end_idx):
p = self.images[i]
if p not in self.labels: # 未标才补
self.labels[p] = 'negative'
else:
# 新逻辑:负样本为主模式
if self.auto_neg_mode:
neg_idx = [i for i, p in enumerate(self.images)
if p in self.labels and self.labels[p] == 'negative']
if neg_idx:
first_neg = min(neg_idx)
# 丢弃第一个负样本之前的图片(不处理)
self.images = self.images[first_neg:]
self.current_image_index = 0
# 重新调整标签字典,只保留处理后的图片
new_labels = {}
for img_path in self.images:
if img_path in self.labels:
new_labels[img_path] = self.labels[img_path]
self.labels = new_labels
# -----------------------------------
# 按标签分组
from collections import defaultdict
groups = defaultdict(list)
for img_path, label in self.labels.items():
if label in ["positive", "negative"] and os.path.exists(img_path):
groups[label].append(img_path)
# 排序每组图片(按文件名)
for label in groups:
groups[label].sort()
# 处理每组
for label, image_paths in groups.items():
if not image_paths:
continue
dest_dir = positive_dir if label == "positive" else negative_dir
if self.convert_to_video:
# 合成视频
# 提取帧号范围
def extract_index(path):
# 假设文件名格式为 frame_0010.jpg 或 0010.jpg
name = os.path.splitext(os.path.basename(path))[0]
try:
return int(name.split('_')[-1])
except ValueError:
return None
start_idx = extract_index(image_paths[0])
end_idx = extract_index(image_paths[-1])
if start_idx is not None and end_idx is not None:
range_str = f"{start_idx}to{end_idx}"
else:
range_str = "unknown_range"
folder_name = os.path.basename(os.path.dirname(image_paths[0]))
video_name = f"{folder_name}_{label}_{range_str}.mp4"
video_path = os.path.join(dest_dir, video_name)
self.images_to_video(image_paths, video_path, fps=self.video_fps)
else:
# 原逻辑:移动文件
for img_path in image_paths:
filename = os.path.basename(img_path)
dest_path = os.path.join(dest_dir, filename)
shutil.move(img_path, dest_path)
# 从标签中移除
for img_path in image_paths:
self.labels.pop(img_path, None)
# 重新加载当前文件夹
self.load_current_folder_images()
self.save_labels()
def next_folder(self):
"""切换到下一个文件夹"""
if self.current_folder_index < len(self.folders) - 1:
# 检查当前文件夹是否有未移动的标记文件
current_folder = self.folders[self.current_folder_index]
has_unmoved_labels = any(
img_path.startswith(current_folder) and os.path.exists(img_path)
for img_path in self.labels.keys()
)
if has_unmoved_labels:
# 显示确认对话框
self.show_confirm_dialog = True
self.confirm_action = "next_folder"
self.confirm_message = "当前文件夹有未移动的标记文件,确定要切换到下一个文件夹吗?"
return False
else:
# 直接切换文件夹
self.current_folder_index += 1
self.load_current_folder_images()
return True
return False
def prev_folder(self):
"""切换到上一个文件夹"""
if self.current_folder_index > 0:
self.current_folder_index -= 1
self.load_current_folder_images()
return True
return False
def handle_key_repeats(self):
"""处理方向键长按"""
current_time = time.time()
# 检查是否需要触发按键重复
if any(self.key_pressed.values()):
# 如果是第一次按下,等待较长时间
if self.last_key_time == 0:
if current_time - self.key_pressed_time > self.key_repeat_delay:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
# 后续重复,使用较短的间隔
elif current_time - self.last_key_time > self.key_repeat_interval:
if self.key_pressed["left"]:
self.prev_image()
elif self.key_pressed["right"]:
self.next_image()
self.last_key_time = current_time
def save_state(self):
"""保存当前状态以便撤销"""
if len(self.undo_stack) >= self.max_undo_steps:
self.undo_stack.pop(0) # 移除最旧的状态
state = {
"current_image_index": self.current_image_index,
"labels": self.labels.copy(),
"continuous_mode": self.continuous_mode,
"continuous_start_index": self.continuous_start_index,
"continuous_label": self.continuous_label
}
self.undo_stack.append(state)
def undo(self):
"""撤销上一次操作"""
if self.undo_stack:
state = self.undo_stack.pop()
self.current_image_index = state["current_image_index"]
self.labels = state["labels"]
self.continuous_mode = state["continuous_mode"]
self.continuous_start_index = state["continuous_start_index"]
self.continuous_label = state["continuous_label"]
return True
return False
def save_labels(self):
"""保存标记状态到文件"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
try:
# 只保存仍然存在的文件的标记
existing_labels = {k: v for k, v in self.labels.items() if os.path.exists(k)}
with open(labels_file, 'w') as f:
json.dump(existing_labels, f)
except Exception as e:
print(f"保存标记状态失败: {e}")
def load_labels(self):
"""从文件加载标记状态"""
labels_file = os.path.join(self.root_path, "labels_backup.json")
if os.path.exists(labels_file):
try:
with open(labels_file, 'r') as f:
self.labels = json.load(f)
except Exception as e:
print(f"加载标记状态失败: {e}")
def draw_button(screen, text, rect, hover=False, color=None):
"""绘制按钮"""
if color is None:
color = BUTTON_HOVER_COLOR if hover else BUTTON_COLOR
# 先画主体
pygame.draw.rect(screen, color, rect, border_radius=5)
# 再画边框
pygame.draw.rect(screen, (100, 100, 100), rect, 2, border_radius=5)
# 文字居中
text_surface= small_font.render(text, True, TEXT_COLOR)
txt_rect = text_surface.get_rect(center=rect.center)
screen.blit(text_surface, txt_rect)
def draw_confirm_dialog(screen, message, width=400, height=200):
"""绘制确认对话框"""
dialog_rect = pygame.Rect(
(WINDOW_WIDTH - width) // 2,
(WINDOW_HEIGHT - height) // 2,
width, height
)
# 绘制对话框背景
pygame.draw.rect(screen, BG_COLOR, dialog_rect, border_radius=10)
pygame.draw.rect(screen, TEXT_COLOR, dialog_rect, 2, border_radius=10)
# 绘制消息
lines = []
words = message.split()
current_line = ""
for word in words:
test_line = current_line + word + " "
if small_font.size(test_line)[0] < width - 40:
current_line = test_line
else:
lines.append(current_line)
current_line = word + " "
if current_line:
lines.append(current_line)
for i, line in enumerate(lines):
text_surface = small_font.render(line, True, TEXT_COLOR)
screen.blit(text_surface, (dialog_rect.x + 20, dialog_rect.y + 30 + i * 25))
# 绘制按钮
yes_button= pygame.Rect(dialog_rect.x + width // 2 - 100, dialog_rect.y + height - 50, 80, 30)
no_button = pygame.Rect(dialog_rect.x + width // 2 + 20, dialog_rect.y + height - 50, 80, 30)
draw_button(screen, "是", yes_button, color=CONFIRM_COLOR)
draw_button(screen, "否", no_button, color=WARNING_COLOR)
return dialog_rect, yes_button, no_button
def main():
# 假设的根路径,实际使用时需要修改
root_path = r"D:\zero_track\mmaction2\input_videos\test1"
# 创建标注工具实例
tool = ImageLabelingTool(root_path)
# 创建正负样本输出目录
# positive_dir = os.path.join(root_path, "positive_samples")
# negative_dir = os.path.join(root_path, "negative_samples")
positive_dir = os.path.join(root_path, "1")
negative_dir = os.path.join(root_path, "0")
os.makedirs(positive_dir, exist_ok=True)
os.makedirs(negative_dir, exist_ok=True)
# 主循环
running = True
clock = pygame.time.Clock()
# 按钮区域 - 分为两行
button_height = 40
button_width = 140
button_margin =15
button_row1_y = WINDOW_HEIGHT - button_height - button_margin
button_row2_y = WINDOW_HEIGHT - 2 * button_height - 2 * button_margin
# 第一行按钮(导航按钮)
nav_buttons = {
"prev": pygame.Rect(button_margin, button_row2_y, button_width, button_height),
"next": pygame.Rect(button_margin * 2 + button_width, button_row2_y, button_width, button_height),
"prev_folder": pygame.Rect(button_margin * 3 + button_width * 2, button_row2_y, button_width, button_height),
"next_folder": pygame.Rect(button_margin * 4 + button_width * 3, button_row2_y, button_width, button_height),
"undo": pygame.Rect(button_margin * 5 + button_width * 4, button_row2_y, button_width, button_height),
}
# 添加模式切换按钮(放在导航按钮和标注按钮之间)
mode_button = pygame.Rect(button_margin * 6 + button_width * 5, button_row2_y, button_width, button_height)
# 第二行按钮(标注按钮)
label_buttons = {
"positive": pygame.Rect(button_margin, button_row1_y, button_width, button_height),
"negative": pygame.Rect(button_margin * 2 + button_width, button_row1_y, button_width, button_height),
"continuous_start": pygame.Rect(button_margin * 3 + button_width * 2, button_row1_y, button_width, button_height),
"continuous_end": pygame.Rect(button_margin * 4 + button_width * 3, button_row1_y, button_width, button_height),
"move_files": pygame.Rect(button_margin * 5 + button_width * 4, button_row1_y, button_width, button_height),
}
# 图片显示区域
image_area = pygame.Rect(50, 80, WINDOW_WIDTH - 100, WINDOW_HEIGHT - 220)
# 添加按键按下时间记录
tool.key_pressed_time = 0
while running:
mouse_pos = pygame.mouse.get_pos()
# 处理按键重复
tool.handle_key_repeats()
# 自动播放逻辑
if tool.playing:
now = pygame.time.get_ticks()
if now - tool.last_play_tick > tool.play_interval:
if tool.play_direction == 1:
tool.next_image()
else:
tool.prev_image()
tool.last_play_tick = now
for event in pygame.event.get():
if event.type == QUIT:
running = False
elif event.type == KEYDOWN:
if event.key == K_d:
tool.key_pressed["right"] = True
tool.key_pressed["left"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.next_image() # 立即响应一次
elif event.key == K_a:
tool.key_pressed["left"] = True
tool.key_pressed["right"] = False
tool.key_pressed_time = time.time() # 记录按下时间
tool.prev_image() # 立即响应一次
elif event.key == K_RIGHT: # 向后自动播放
tool.play_direction = 1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_LEFT: # 向前自动播放
tool.play_direction = -1
tool.playing = True
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_SPACE: # 暂停/继续
tool.playing = not tool.playing
if tool.playing:
tool.last_play_tick = pygame.time.get_ticks()
elif event.key == K_v:
tool.convert_to_video = not tool.convert_to_video
print("转视频模式:" + ("开启" if tool.convert_to_video else "关闭"))
elif event.key == K_w: # 标记为正样本
tool.label_current_image("positive")
elif event.key == K_s: # 标记为负样本
tool.label_current_image("negative")
elif event.key == K_UP: # 开始连续标记
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif event.key == K_DOWN: # 结束连续标记
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif event.key == K_x: # 移动文件
moved = tool.move_labeled_files(positive_dir, negative_dir)
print(f"已移动 {moved} 个文件")
elif event.key == K_c: # 下一个文件夹
tool.next_folder()
elif event.key == K_z: # 上一个文件夹
tool.prev_folder()
elif event.key == K_z and (pygame.key.get_mods() & KMOD_CTRL): # Ctrl+Z 撤销
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
elif event.key == K_ESCAPE: # ESC 键取消确认对话框
if tool.show_confirm_dialog:
tool.show_confirm_dialog = False
elif event.type == KEYUP:
if event.key == K_d:
tool.key_pressed["right"] = False
tool.last_key_time = 0 # 重置重复计时
elif event.key == K_a:
tool.key_pressed["left"] = False
tool.last_key_time =0 # 重置重复计时
elif event.type == MOUSEBUTTONDOWN:
if event.button == 1: # 左键点击
# 检查是否点击了确认对话框
if tool.show_confirm_dialog:
dialog_rect, yes_button, no_button = draw_confirm_dialog(screen, tool.confirm_message)
if yes_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = False
if tool.confirm_action == "next_folder":
tool.current_folder_index += 1
tool.load_current_folder_images()
elif no_button.collidepoint(mouse_pos):
tool.show_confirm_dialog = Fasle
else:
# 导航按钮
if nav_buttons["prev"].collidepoint(mouse_pos):
tool.prev_image()
elif nav_buttons["next"].collidepoint(mouse_pos):
tool.next_image()
elif nav_buttons["prev_folder"].collidepoint(mouse_pos):
tool.prev_folder()
elif nav_buttons["next_folder"].collidepoint(mouse_pos):
tool.next_folder()
elif nav_buttons["undo"].collidepoint(mouse_pos):
if tool.undo():
print("已撤销上一次操作")
else:
print("没有可撤销的操作")
# 添加模式切换按钮检测
elif mode_button.collidepoint(mouse_pos):
tool.labeling_mode = "negative" if tool.labeling_mode == "positive" else "positive"
print(f"切换到{'负样本' if tool.labeling_mode == 'negative' else '正样本'}为主模式")
# 标注按钮
elif label_buttons["positive"].collidepoint(mouse_pos):
tool.label_current_image("positive")
elif label_buttons["negative"].collidepoint(mouse_pos):
tool.label_current_image("negative")
elif label_buttons["continuous_start"].collidepoint(mouse_pos):
if not tool.start_continuous_labeling():
print("无法开始连续标记")
elif label_buttons["continuous_end"].collidepoint(mouse_pos):
if not tool.end_continuous_labeling():
print("没有激活的连续标记")
elif label_buttons["move_files"].collidepoint(mouse_pos):
moved = tool.move_labeled_files(positive_dir, negative_dir)
print("已移动 {moved} 个文件")
# 清屏
screen.fill(BG_COLOR)
# 显示文件信息
if tool.folders:
folder_text = f"当前文件夹: {os.path.basename(tool.folders[tool.current_folder_index])} ({tool.current_folder_index + 1}/{len(tool.folders)})"
text_surface = small_font.render(folder_text, True, TEXT_COLOR)
screen.blit(text_surface, (20, 20))
# 显示当前图片
current_image_path = tool.get_current_image()
if current_image_path and os.path.exists(current_image_path):
try:
img = pygame.image.load(current_image_path)
img_rect = img.get_rect()
# 缩放图片以适应显示区域
scale = min(image_area.width / img_rect.width, image_area.height / img_rect.height)
new_size = (int(img_rect.width * scale), int(img_rect.height * scale))
img = pygame.transform.smoothscale(img, new_size)
img_rect = img.get_rect(center=image_area.center)
screen.blit(img, img_rect)
# 显示图片信息(在图片上方)
info_text = f"{os.path.basename(current_image_path)} ({tool.current_image_index + 1}/{len(tool.images)})"
if current_image_path in tool.labels:
label = tool.labels[current_image_path]
info_text += f" - 已标记: {'正样本' if label == 'positive' else '负样本'}"
text_surface = font.render(info_text, True, TEXT_COLOR)
text_rect = text_surface.get_rect(center=(WINDOW_WIDTH // 2, image_area.y - 20))
screen.blit(text_surface, text_rect)
# 在连续标记模式下显示标记范围
if tool.continuous_mode and tool.continuous_start_index is not None:
start_idx = min(tool.continuous_start_index, tool.current_image_index)
end_idx = max(tool.continuous_start_index, tool.current_image_index)
range_text = f"标记范围: {start_idx + 1} - {end_idx + 1}"
range_surface = small_font.render(range_text, True, HIGHLIGHT_COLOR)
screen.blit(range_surface, (20, 50))
# 绘制标记范围的指示器
marker_width = image_area.width / len(tool.images)
start_x = image_area.x + start_idx * marker_width
end_x = image_area.x + (end_idx + 1) * marker_width
pygame.draw.rect(screen, HIGHLIGHT_COLOR,
(start_x, image_area.y + image_area.height + 5,
end_x -start_x, 5))
except Exception as e:
error_text = f"无法加载图片: {e}"
text_surface = font.render(error_text, True, (255, 0, 0))
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
else:
no_image_text = "没有图片可显示"
text_surface = font.render(no_image_text, True, TEXT_COLOR)
screen.blit(text_surface, (image_area.centerx - text_surface.get_width() // 2, image_area.centery - text_surface.get_height() // 2))
# 显示连续标记状态
if tool.continuous_mode:
mode_text = f"连续标记模式已启动 - 标记类型: {'正样本' if tool.continuous_label == 'positive' else '负样本'}"
text_surface = small_font.render(mode_text, True, HIGHLIGHT_COLOR)
screen.blit(text_surface, (WINDOW_WIDTH - text_surface.get_width() - 20, 50))
# 绘制导航按钮
draw_button(screen, "上一张 (a)", nav_buttons["prev"], nav_buttons["prev"].collidepoint(mouse_pos))
draw_button(screen, "下一张 (d)", nav_buttons["next"], nav_buttons["next"].collidepoint(mouse_pos))
draw_button(screen, "上个文件夹 (z)", nav_buttons["prev_folder"], nav_buttons["prev_folder"].collidepoint(mouse_pos))
draw_button(screen, "下个文件夹 (c)", nav_buttons["next_folder"], nav_buttons["next_folder"].collidepoint(mouse_pos))
draw_button(screen, "撤销 (Ctrl+Z)", nav_buttons["undo"], nav_buttons["undo"].collidepoint(mouse_pos))
# 绘制模式切换按钮
mode_text = f"{'负样本' if tool.labeling_mode == 'negative' else '正样本'}模式"
draw_button(screen, mode_text, mode_button, mode_button.collidepoint(mouse_pos))
# 绘制标注按钮
draw_button(screen, "正样本 (w)", label_buttons["positive"], label_buttons["positive"].collidepoint(mouse_pos))
draw_button(screen, "负样本 (s)", label_buttons["negative"], label_buttons["negative"].collidepoint(mouse_pos))
draw_button(screen, "开始连续标(↑)", label_buttons["continuous_start"], label_buttons["continuous_start"].collidepoint(mouse_pos))
draw_button(screen, "结束连续标(↓)", label_buttons["continuous_end"], label_buttons["continuous_end"].collidepoint(mouse_pos))
draw_button(screen, "移动文件 (x)", label_buttons["move_files"], label_buttons["move_files"].collidepoint(mouse_pos))
# 显示确认对话框
if tool.show_confirm_dialog:
draw_confirm_dialog(screen, tool.confirm_message)
# 更新屏幕
pygame.display.flip()
clock.tick(30)
# 退出前保存标记状态
tool.save_labels()
pygame.quit()
sys.exit()
if __name__ == "__main__":
main()
三、数据整合
用os.walk读取一个输入路径(跳过output文件夹),找到这个路径下面所有名字为“1”的文件夹路径,然后逐个遍历,将每个名字为“1” 的文件夹里面的视频文件移动到 新建的 文件夹路径:输入路径下的output下的“1”文件夹。
运行一次后,把my_dir_name = "1"改为my_dir_name = "0"再运行一遍,就整合了所有正样本视频和负样本视频,它们都在输入路径的output下面。
import os
import shutil
# 支持的常见视频文件扩展名
VIDEO_EXTENSIONS = ('.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv', '.rmvb', '.ts')
def move_videos_from_1_folders(input_path):
output_dir = os.path.join(input_path, 'output', my_dir_name)
os.makedirs(output_dir, exist_ok=True)
for root, dirs, files in os.walk(input_path):
# 跳过 output 文件夹
dirs[:] = [d for d in dirs if os.path.join(root, d) != os.path.join(input_path, 'output')]
folder_name = os.path.basename(root)
if folder_name == my_dir_name:
for file in files:
if file.lower().endswith(VIDEO_EXTENSIONS):
src = os.path.join(root, file)
dest = os.path.join(output_dir, file)
# 防止重名
counter = 1
base, ext = os.path.splitext(file)
while os.path.exists(dest):
dest = os.path.join(output_dir, f"{base}_{counter}{ext}")
counter += 1
print(f"移动: {src} -> {dest}")
shutil.move(src, dest)
if __name__ == "__main__":
input_path = r"D:\zero_track\mmaction2\input_videos"
my_dir_name = "1" # 如果要整合负样本,改为"0"
if not os.path.isdir(input_path):
print("输入路径无效,请检查。")
else:
move_videos_from_1_folders(input_path)
print("处理完成。")
四、数据量的问题
4.1 mmaction2训练TSN模型脚踢/非脚踢 两个分类的动作识别,需要多少训练和验证样本、需要多少正负样本?各需要多少视频才能效果好
经验表明,用 MMAction2 训练 TSN 做“脚踢 / 非脚踢”二分类,想“效果比较好”至少需要满足以下数据量:
正样本(脚踢)
100~200 段干净、已精准裁好的“脚踢”片段是“不爆”的底线;若场景复杂或要稳定 90%+ 的召回,建议 300 段左右。负样本(非脚踢)
按 1:1 ∼ 1:3 的比例配套,即 100 ∼ 600 段。背景越复杂、动作越多样,负样本越要向 1:3 靠。训练 / 验证划分
随机 8:2 或 9:1 切开即可。上面给出的数字是“训练+验证”的总量,验证集一般占 20% 左右。总视频数
把正负加在一起,300 ∼ 800 段是“起步价”;低于 200 段几乎必然过拟合,1000 段以上收益逐渐递减但会更稳。快速验证技巧
如果标注成本高,可先收集几十段正例 + 等负例(共 ≈100 段)做微小微调,同时用 VideoMAE V2 之类自监督模型做 backbone,也能在 10-20 epoch 内收敛到可用模型,之后再逐步补充数据迭代
4.2 每段视频有时间的要求吗?
在 MMAction2 里,TSN 对“每段视频”本身没有强制时长,但模型采样方式决定了“太短”和“太长”都会影响收敛和精度。经验值如下,你可以直接当硬性规范来剪片:
最短
至少包含 1 个完整动作周期。脚踢动作从抬腿到落脚通常 0.5–1 s;因此不要低于 8–10 帧(30 fps 下 ≈0.3 s)。
如果剪得太短,TSN 的稀疏采样容易采到全背景,标签就失效了。推荐
1–3 s(30–90 帧)是最甜点:
能包住动作完整边界,又不会引入过多无关背景;
与 Kinetics、Something-Something 等主流数据集的片段长度一致,复用预训练权重时域差异最小。
最长
单段不要超过 5–6 s。
TSN 只从整个视频里均匀抽 K 段(默认 K=8)各 1 帧,片段越长,抽到的帧越稀疏,动作占比被稀释,精度反而掉;
如果原始视频很长,先用工具(
trim_raw_video.py或 MMAction2 的trim.py)把脚踢事件精确裁剪成 1–3 s 的小段,再送进训练管道。统一尺寸
剪完后把帧率统一成 30 fps(或 25 fps),分辨率 256×256 或 340×256,这样后续做RawFrameDecode + Resize不会重复采样/掉帧。一句话总结:
脚踢/非脚踢二分类,每段 1–3 s、30–90 帧、包住完整动作,即可让 TSN 在 MMAction2 上稳定收敛;短于 0.3 s 或长于 6 s 都会直接掉点。
五、转数据
我用2.5的脚本标了70个正样本视频,140个负样本视频。为什么正样本跟负样本是1比2,因为标1比3太累了,如果1比1 和 1比2 和1比3 都是可以的话,我们先试一下1比2 吧。然后第三节的代码整合了视频之后就放到了output里面,里面有文件夹0和1,因为我们这里要做一个踢脚的动作识别,所以1里面放踢脚的视频,0里面放不是踢脚的视频。然后写个脚本把它转换为跟kinetics400_tiny一样的格式。
import os
import random
import shutil
# 原始数据路径:里面有 0,1,2,... 文件夹
src_root = r"D:\zero_track\mmaction2\input_videos\output" # 修改为你的数据路径
dst_root = r"D:\zero_track\mmaction2\input_videos\my_kinetics_data" # 输出路径
train_dir = os.path.join(dst_root, "train")
val_dir = os.path.join(dst_root, "val")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
train_txt = os.path.join(dst_root, "train_video.txt")
val_txt = os.path.join(dst_root, "val_video.txt")
# train/val 划分比例
train_ratio = 0.8
with open(train_txt, "w") as ftrain, open(val_txt, "w") as fval:
# 遍历类别文件夹
for cls_name in os.listdir(src_root):
cls_path = os.path.join(src_root, cls_name)
if not os.path.isdir(cls_path):
continue
videos = os.listdir(cls_path)
random.shuffle(videos)
split_idx = int(len(videos) * train_ratio)
train_videos = videos[:split_idx]
val_videos = videos[split_idx:]
# 复制到 train/val 并写入 txt
for v in train_videos:
src = os.path.join(cls_path, v)
dst = os.path.join(train_dir, v)
shutil.copy(src, dst)
ftrain.write(f"{v} {cls_name}\n")
for v in val_videos:
src = os.path.join(cls_path, v)
dst = os.path.join(val_dir, v)
shutil.copy(src, dst)
fval.write(f"{v} {cls_name}\n")
六、训练
6.2 训练配置
第一次训练,我直接原封不动按照《动作识别5——mmaction2的训练和测试》的方法训练,打开 configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py(按动作识别5修改配置) ,只改了数据名称:
data_name = "my_kinetics_data"
data_root = f'data/{data_name}/train'
data_root_val = f'data/{data_name}/val'
ann_file_train = f'data/{data_name}/train_video.txt'
ann_file_val = f'data/{data_name}/val_video.txt'
然后就训练了(看完第六节内容再训练)
python tools/train.py configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py
6.1 训练结果1(失败):准确率0.6429
因为batch_size=4爆显存了,显存不够,我改了一下batch_size=1
结果是在epoch=10时准确率只有0.6429。训练结果保存在work_dirs/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb/20251205_085342 (时间),最佳的epoch居然是epoch=3时,acc/top1: 0.6667
12/05 08:58:15 - mmengine - INFO - Saving checkpoint at 10 epochs
12/05 08:58:18 - mmengine - INFO - Epoch(val) [10][2/2] acc/top1: 0.6429 acc/top5: 1.0000 acc/mean1: 0.5179 data_time: 0.5654 time: 0.5654 time: 0.7220
把
batch_size=1直接砍到 1 以后,“梯度噪声爆炸 + BN 失效 + 学习率相对过大” 三件事同时发生,
10 个 epoch 内模型根本学不动,于是第 3 个 epoch 就提前达到最高点后开始震荡/退化,
0.6667 → 0.6429 只是随机波动,不是“过拟合”而是“训练失败”。
问题 原理 现象 BatchNorm 失效 BN 需要 batch 维度统计均值方差,batch=1 时 σ=0,训练集上均值=自己,导致输出分布漂移。 train/val 同时掉点。 梯度噪声↑ 单样本梯度方差大,更新方向抖动剧烈。 loss 曲线锯齿状,acc 震荡。 LR 相对过大 官方 8×GPU×32=256 的 LR=0.01,你 batch=1 时等效 LR 放大了 256 倍,一步飞出去。 loss 发 NaN 或一直不降。
6.2 训练结果2:准确率0.7619
还是把batch_size=4,这次突然又不爆显存了,奇怪。这次是在epoch=10的时候最佳,准确率是0.7619。一般刚好在最后一个epoch最佳,可能是epoch没给够
Epoch(val) [10][2/2] acc/top1: 0.7619 acc/top5: 1.0000 acc/mean1: 0.6607 data_time: 0.5662 time: 0.7350
6.3 训练结果3:准确率0.8095
官方给的例子是训练集30个,验证集10个视频,我是训练集168个,验证集42个视频。数据量变大 5 倍。
epoch 数:数据量变大 5 倍,如果担心 10 个 epoch 不够,可加到 20~30。
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=20, val_interval=1)
学习率 schedule | 把 epoch 数改了以后,milestones 要对应拉长。
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=10,
by_epoch=True,
milestones=[8, 16],
gamma=0.1)
]
把 epoch 数改了以后,milestones 要对应拉长。milestones是什么?为什么要对应拉长?
一句话先答:
milestones = [4, 8]告诉学习率调度器“在第 4 个 epoch 末和第 8 个 epoch 末把 LR 乘以 0.1”。
如果你把总 epoch 从 10 改成 20,而 milestones 还停在 [4, 8],那么 LR 过早地连降两次,模型还没充分训练就被“踩刹车”,性能就会掉。因此要把里程碑等比例往后移,例如改成 [8, 16]。
它到底是什么
在 MMAction2(更准确地说是 MMEngine 的MultiStepLR)里:param_scheduler = [ dict( type='MultiStepLR', ← 调度器名字 begin=0, end=10, ← 总训练 epoch 数 by_epoch=True, milestones=[4, 8], ← 里程碑 gamma=0.1) ← 降到 1/10 ]
训练过程中,
EpochBasedTrainLoop每跑完一个 epoch 会检查:
“当前 epoch 序号是否在 milestones 列表里?”如果在,就把优化器里的
lr *= gamma(这里是 0.1)。
为什么要“等比例拉长”
官方 demo 只有 30 条训练视频,10 epoch 足够;
你现在 168 条视频,往往要把 epoch 数翻倍(20)或翻三倍(30)才能让模型真正“看够”数据。
如果 milestones 仍保持 [4, 8],会出现:epoch 4 LR 从 0.01 → 0.001
epoch 8 LR 从 0.001 → 0.0001
epoch 9–20 LR 一直 0.0001,几乎不再更新结果:
前期 LR 骤降,模型还没收敛就进入“微调”阶段;
后期 LR 太小,loss 几乎不动,浪费计算;
最终精度偏低。
怎么“对应拉长”
最简单的做法——按 epoch 比例平移:
| 总 epoch | 推荐 milestones |
|----------|-----------------|
| 10 | [4, 8] |
| 20 | [8, 16] |
| 30 | [12, 24] |(也可以把第一次降 LR 放在 40%~50% 处,第二次放在 80% 处。)
一句话记住
milestones 是“学习率打折日”,总训练时间变长,就得把打折日往后顺延,否则模型会被提前“掐灭”。
训练结果是在epoch=14的时候是最佳,acc/top1: 0.8095
Epoch(val) [14][2/2] acc/top1: 0.8095 acc/top5: 1.0000 acc/mean1: 0.7143 data_time: 0.5568 time: 0.7272
acc/top1: 0.8095 acc/top5: 1.0000 acc/mean1: 0.7143 这三个指标意味着什么?
这三条数字都是在 验证集(42 个视频) 上算出来的,含义一句话就能分清:
acc/top1: 0.8095
模型“第一次猜”就对的视频占 80.95 %(≈ 34/42)。
acc/top5: 1.0000
把“概率最高的 5 个类别”都算作候选时,42/42 全对——说明错误样本的“真标签”至少排在第 2~5 位,没有离谱出错。
acc/mean1: 0.7143
这是 每个类别分别算 top-1 准确率再求平均(mean class accuracy)。
你总共有 2 类,假设:
A 类 30 条里对了 26 条 → 86.7 %
B 类 12 条里对了 7 条 → 58.3 %
平均 (86.7+58.3)/2 ≈ 72.5 %,报告里四舍五入成 0.7143。
它比“总体 top-1”低,说明 样本少的那个类拖了后腿,也是类别不平衡的典型信号。
一句话总结
模型整体已经不错(80 % 以上)。
所有错误都落在“前 5 名”里,没有完全误判。
小类(样本少的那一类)精度明显更低,想再提升就去 加权采样/加权 loss/数据增强 给小类加料。
七、测试:准确率0.8333
你看下你生成的best_acc_top1_epoch_x.pth那个x是什么数字就是代表你在哪个epoch达到最佳,我的是epoch=14。你的可能有所不同
python tools/test.py configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py work_dirs/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb/best_acc_top1_epoch_14.pth
测试结果:
Epoch(test) [42/42] acc/top1: 0.8333 acc/top5: 1.0000 acc/mean1: 0.7500 data_time: 0.2028 time: 0.7357
啊哈,我没有建测试集,它跑的应该也是验证集,怎么突然从0.8095变成0.8333?可能是因为“做了不同数据增强 / 不同裁剪” 。我们看一下配置文件:
configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py
train_pipeline = [
dict(type='DecordInit', **file_client_args),
dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(
type='MultiScaleCrop',
input_size=224,
scales=(1, 0.875, 0.75, 0.66),
random_crop=False,
max_wh_scale_gap=1),
dict(type='Resize', scale=(224, 224), keep_ratio=False),
dict(type='Flip', flip_ratio=0.5),
dict(type='FormatShape', input_format='NCHW'),
dict(type='PackActionInputs')
]
val_pipeline = [
dict(type='DecordInit', **file_client_args),
dict(
type='SampleFrames',
clip_len=1,
frame_interval=1,
num_clips=3,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='CenterCrop', crop_size=224),
dict(type='FormatShape', input_format='NCHW'),
dict(type='PackActionInputs')
]
test_pipeline = [
dict(type='DecordInit', **file_client_args),
dict(
type='SampleFrames',
clip_len=1,
frame_interval=1,
num_clips=25,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='TenCrop', crop_size=224),
dict(type='FormatShape', input_format='NCHW'),
dict(type='PackActionInputs')
]
把三段
pipeline摆在一起就能一眼看出为什么test.py的指标更高:
阶段 剪辑数/视图数 裁剪方式 集成效果 train num_clips=3随机 MultiScaleCrop + 随机 Flip 单视图,强增广,用于训练 val num_clips=3单次 CenterCrop 3 视图 × 1 裁剪 = 3 个特征平均 test num_clips=25TenCrop(10 裁剪) 25 视图 × 10 裁剪 = 250 个特征平均 → 推理时 250 个 logits 做平均,相当于自带“轻量级模型集成”,噪声被进一步抹平,于是
acc/top1比只用 3 视图的验证集高出一截(0.8095 → 0.8333)。
一句话总结
test_pipeline 用 25-clips+TenCrop=250 视图投票,val_pipeline 只用 3 视图,
同 42 条视频不同“集成力度”,所以数字好看,不是数据或代码出错。
八、推理:准确率0.8333
参考《动作识别3——mmpose和mmaction2》的第三节,我的验证集视频放在data\my_kinetics_data\val,我随便找个验证集视频试试,我试了几个,推理结果大部分都是负样本,怎么往负样本倾斜啊,我现在怀疑1:2是不是不对,难道1:1比较好。或者说正负样本的权重没有设置?
from mmaction.apis import inference_recognizer, init_recognizer
config_path = 'configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py'
checkpoint_path = 'work_dirs/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb/best_acc_top1_epoch_14.pth'
img_path = r'data\my_kinetics_data\val\test1_negative_0to34.mp4' # 您可以指定自己的视频路径
# 从配置文件和权重文件中构建模型
model = init_recognizer(config_path, checkpoint_path, device="cuda")
# 对单个视频进行测试
result = inference_recognizer(model, img_path)
print(f"result:{result}")
推理结果示例
result:<ActionDataSample(
META INFORMATION
num_classes: 2
img_shape: (224, 224)DATA FIELDS
gt_label: tensor([-1], device='cuda:0')
pred_label: tensor([1], device='cuda:0')
) at 0x1e36c985690>
data\my_kinetics_data\val 里面有所有验证集的视频,data\my_kinetics_data\val_video.txt里面有验证集的真值,其中一行是这样的:test2_negative_19to777_negative_319to348.mp4 0, 后面的数字是0或者1,表示负样本还是正样本。现在我要写一个代码可视化推理结果,就是给定一个输入路径,里面有很多视频,os.walk寻找输入路径下面的所有视频(跳过output文件夹),将每个带有推理结果的视频保存到output下面,视频名称改成output_xxx.mp4,推理结果用比较大的文字显示在视频的左上角,显示的方式是:推理结果:0或者1, 如果跟真值不同,则后面再加上:真值:0或者1
如果没有给出gt_file 或者gt_file 是空的,那就代表没有真值,只显示推理结果,不对比推理结果与真值,最后统计输出总共推理视频数,推理正样本数 推理的负样本数 如果确实有真值,使得最后输出总共推理视频数 推理正样本数 推理的负样本数 实际正样本数 实际的负样本数 正确推理数 错误推理数,正确率
新增一个变量,用于控制将所有推理错误的视频叠加为一个视频,并输出到输入路径/output/output_infer_error.mp4。叠加的视频都是左上角带有推理结果的。
如果你要保存将所有推理错误的合集视频,设置SAVE_ERROR_VIDEO = True;
如果你要关闭保存每个推理视频,设置SAVE_INFER_VIDEO = False
# infer_and_write.py
import os
import cv2
import numpy as np
from tqdm import tqdm
from mmaction.apis import init_recognizer, inference_recognizer
# ========== 参数区 ==========
config_path = r'configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py'
checkpoint_path = r'work_dirs/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb/best_acc_top1_epoch_14.pth'
input_root = r'data\my_kinetics_data\val' # 待扫描的根目录
gt_file = r'data\my_kinetics_data\val_video.txt' # 真值文件,如果为空也可以
SAVE_INFER_VIDEO = True # 是否保存推理视频
output_dir = input_root + os.sep + 'output' # 结果视频保存目录os.makedirs(output_dir, exist_ok=True)
SAVE_ERROR_VIDEO = True # 是否保存错误推理视频合集 output_infer_error.mp4
device = 'cuda:0'
# ============================
# 读取真值表
def load_gt(path):
gt = {}
if not path or not os.path.isfile(path) or os.path.getsize(path) == 0:
return gt
with open(path, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue
video_name, label = line.rsplit(' ', 1)
gt[video_name] = int(label)
return gt
gt_dict = load_gt(gt_file)
# 建模型
model = init_recognizer(config_path, checkpoint_path, device=device)
# 文字参数
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 2
thickness = 4
def draw_text(frame, text, color):
"""左上角写大字,自动换行预留 3 行高度"""
h, w = frame.shape[:2]
y0 = 60
dy = 70
for i, line in enumerate(text.split('\n')):
y = y0 + i * dy
cv2.putText(frame, line, (50, y), font, font_scale, color, thickness)
def process_one_video(video_path, save_path):
global total_videos, pred_pos, pred_neg, real_pos, real_neg, correct, wrong
global TP, TN, FP, FN
cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# 只有在开关打开时才建 VideoWriter
if SAVE_INFER_VIDEO:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(save_path, fourcc, fps, (w, h))
else:
out = None
# 推理
result = inference_recognizer(model, video_path)
# 旧MMAction2 返回格式:[(label_id, score), ...] 按 score 降序
# pred_label = int(result[0][0])
# inference_recognizer 在 MMAction2 1.x 返回的是 ActionDataSample 对象
pred_label = int(result.pred_label)
# 真值
video_name = os.path.basename(video_path)
true_label = gt_dict.get(video_name, None)
if true_label is None:
print(f'[WARN] 找不到真值:{video_name},按仅显示推理结果处理')
text = f'pred:{pred_label}'
color = (0, 255, 0) # 绿色
else:
match = pred_label == true_label
color = (0, 255, 0) if match else (0, 0, 255) # 绿/红
text = f'pred: {pred_label}'
if not match:
text += f'\nGT:{true_label}'
print(f"video_name:{video_name}, text:{text}")
# 逐帧写
frames_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
for _ in tqdm(range(frames_num), desc=f'Writing {os.path.basename(save_path)}'):
ret, frame = cap.read()
if not ret:
break
draw_text(frame, text, color) # 字照样画,方便后面错误合集用
if out is not None: # 开关开才写帧
out.write(frame)
# 如果这次推理错了,把“带字”帧收进合集
if SAVE_ERROR_VIDEO and has_gt and true_label is not None and pred_label != true_label:
error_clip_list.append(frame.copy()) # 注意是写完后那一帧
cap.release()
if out is not None:
out.release()
# ---------------- 统计 ----------------
total_videos += 1
if pred_label == 1:
pred_pos += 1
else:
pred_neg += 1
if has_gt and true_label is not None:
if true_label == 1:
real_pos += 1
else:
real_neg += 1
# 新统计:TP/TN/FP/FN
if pred_label == 1 and true_label == 1:
TP += 1
elif pred_label == 0 and true_label == 0:
TN += 1
elif pred_label == 1 and true_label == 0:
FP += 1
else: # pred=0, true=1
FN += 1
if pred_label == true_label:
correct += 1
else:
wrong += 1
# 主流程
supported_ext = ('.mp4', '.avi', '.mov', '.mkv')
# 全局计数器
total_videos = 0
pred_pos = 0
pred_neg = 0
real_pos = 0
real_neg = 0
correct = 0
wrong = 0
has_gt = bool(gt_dict) # 是否提供真值
error_clip_list = [] # 用来收集所有错误视频的帧
TP = 0 # 预测1且真1
TN = 0 # 预测0且真0
FP = 0 # 预测1且真0
FN = 0 # 预测0且真1
for root, dirs, files in os.walk(input_root):
# 跳过 output 文件夹
dirs[:] = [d for d in dirs if d.lower() != 'output']
for file in files:
if file.lower().endswith(supported_ext):
src = os.path.join(root, file)
dst = os.path.join(output_dir, f'output_{file}')
print(f'\n>>> 处理:{src}')
process_one_video(src, dst)
# 把所有错误帧写成一个大视频
if SAVE_ERROR_VIDEO and error_clip_list:
os.makedirs(output_dir, exist_ok=True)
h, w = error_clip_list[0].shape[:2]
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_err = cv2.VideoWriter(
os.path.join(output_dir, 'output_infer_error.mp4'),
fourcc,
25, # 统一用 25 fps,可改
(w, h)
)
for frm in tqdm(error_clip_list, desc='Writing error merge'):
out_err.write(frm)
out_err.release()
print(f'已生成错误合集视频:{os.path.join(output_dir, "output_infer_error.mp4")}')
# -------------- 打印汇总 --------------
if has_gt:
print('\n===== 带真值统计 =====')
print(f'总推理视频数: {total_videos}')
print(f'推理正样本数: {pred_pos} 推理负样本数: {pred_neg}')
print(f'实际正样本数: {real_pos} 实际负样本数: {real_neg}')
print(f'正确推理数: {correct} 错误推理数: {wrong}')
print(f'正确率: {correct / total_videos:.2%}')
print(f'TP: {TP} TN: {TN} FP: {FP} FN: {FN}')
print(f'Precision: {TP/(TP+FP):.3f} Recall: {TP/(TP+FN):.3f}')
else:
print('\n===== 无真值统计 =====')
print(f'总推理视频数: {total_videos}')
print(f'推理正样本数: {pred_pos} 推理负样本数: {pred_neg}')
print('全部完成!结果保存在', output_dir)
因为每个视频只有2到3秒,根本来不及看左上角的推理结果,所以我直接打印了推理结果。
===== 带真值统计 =====
总推理视频数: 42
推理正样本数: 7 推理负样本数: 35
实际正样本数: 14 实际负样本数: 28
正确推理数: 35 错误推理数: 7
正确率: 83.33%TP: 7 TN: 28 FP: 0 FN: 7
Precision: 1.000 Recall: 0.500
全部完成!结果保存在 data\my_kinetics_data
然后看看那些输出了GT的行,就是推理错误的视频。即输入路径/output/output_infer_error.mp4。感觉特点是:全是真值为1预测为0,也就是说把脚踢动作识别为非脚踢。原因可能是(只是初步猜测):
1、正样本:负样本是1比2,预测向样本数多的负样本倾斜
2、预测错误的那些正样本的角度都比较奇特,可能训练集里比较少那种角度的脚踢动作或者说人物身长比例、动作特殊。总之一句话,泛化不到。
九、接下来做什么
推理结果大部分都是负样本,怎么往负样本倾斜啊,我现在怀疑1:2是不是不对,难道1:1比较好。或者说正负样本的权重没有设置?接下来考虑训练一个1:1,或者更换个模型训练
325

被折叠的 条评论
为什么被折叠?



