# -*- coding: utf-8 -*-
# RockyVision 截图识别工具 V2.1 - By Rocky 2025
import os
import time
import cv2
import numpy as np
import keyboard
import sys
from PIL import ImageGrab
from paddleocr import PaddleOCR
from PyQt5.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QTextEdit, QWidget, QFileDialog,
QMessageBox, QInputDialog, QProgressBar)
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QTimer
from PyQt5.QtGui import QIcon, QPixmap
class CaptureTool:
"""独立的截图工具类"""
def __init__(self):
self.reset_state()
def reset_state(self):
self.dragging = False
self.start_pos = None
self.end_pos = None
self.saved = False
self.saved_path = None
def capture(self):
"""执行一次截图并返回路径"""
self.reset_state()
screen = np.array(ImageGrab.grab())
cv2.namedWindow("截图模式 (ESC退出)", cv2.WINDOW_NORMAL)
cv2.setWindowProperty("截图模式 (ESC退出)", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
cv2.setMouseCallback("截图模式 (ESC退出)", self.mouse_handler)
while True:
img = screen.copy()
if self.dragging and self.start_pos and self.end_pos:
x1, y1 = self.start_pos
x2, y2 = self.end_pos
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(img, f"{abs(x2-x1)}×{abs(y2-y1)}", (x1 + 10, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
cv2.imshow("截图模式 (ESC退出)", img)
key = cv2.waitKey(10)
if key == 27: # ESC直接退出
cv2.destroyAllWindows()
return None
if self.saved:
cv2.destroyAllWindows()
return self.saved_path
def mouse_handler(self, event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
self.dragging = True
self.start_pos = (x, y)
elif event == cv2.EVENT_MOUSEMOVE and self.dragging:
self.end_pos = (x, y)
elif event == cv2.EVENT_LBUTTONUP and self.dragging:
self.dragging = False
self.save_crop()
def save_crop(self):
try:
if not (self.start_pos and self.end_pos):
return
x1, y1 = self.start_pos
x2, y2 = self.end_pos
crop_img = np.array(ImageGrab.grab(
bbox=(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
))
os.makedirs("screenshots", exist_ok=True)
self.saved_path = f"screenshots/{time.strftime('%Y%m%d_%H%M%S')}.jpg"
cv2.imwrite(self.saved_path, cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
self.saved = True
except Exception as e:
print(f"截图保存错误: {e}")
class OCRThread(QThread):
finished = pyqtSignal(str)
progress = pyqtSignal(int)
log_signal = pyqtSignal(str)
def __init__(self, folder_path):
super().__init__()
self.folder_path = folder_path
self.supported_formats = ('.jpg', '.jpeg', '.png', '.bmp', '.tif')
self.ocr = None
def initialize_ocr(self):
"""初始化OCR引擎"""
try:
self.log_signal.emit("正在初始化OCR引擎,首次使用可能需要较长时间...")
self.ocr = PaddleOCR(
lang="ch",
use_gpu=True,
det_db_unclip_ratio=2.0,
rec_image_shape="3, 48, 320",
use_angle_cls=True,
show_log=False
)
return True
except Exception as e:
self.log_signal.emit(f"OCR引擎初始化失败: {str(e)}")
return False
def run(self):
if not self.initialize_ocr():
self.finished.emit("OCR引擎初始化失败!")
return
output_dir = f"OCR_Results_{time.strftime('%Y%m%d_%H%M%S')}"
os.makedirs(output_dir, exist_ok=True)
# 获取所有支持的图片文件
try:
image_files = [
os.path.join(self.folder_path, f)
for f in os.listdir(self.folder_path)
if os.path.splitext(f)[1].lower() in self.supported_formats
]
except Exception as e:
self.finished.emit(f"无法读取文件夹内容: {str(e)}")
return
if not image_files:
self.finished.emit("所选文件夹中没有支持的图片文件!")
return
total_files = len(image_files)
self.log_signal.emit(f"发现 {total_files} 个图片文件,开始批量处理...")
processed = 0
for idx, img_path in enumerate(image_files, 1):
filename = os.path.basename(img_path)
output_file = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}.txt")
self.log_signal.emit(f"正在处理 ({idx}/{total_files}): {filename}")
try:
result = self.ocr.ocr(img_path, cls=True)
with open(output_file, "w", encoding='utf-8') as f:
f.write(f"=== OCR识别结果 ===\n")
f.write(f"文件: {filename}\n")
f.write(f"时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
for item in (result[0] or []):
text, confidence = item[1]
if confidence > 0.7:
f.write(f"{text}\n")
else:
f.write(f"[低置信度: {confidence:.2f}] {text}\n")
processed += 1
except Exception as e:
self.log_signal.emit(f"处理失败: {filename} - {str(e)}")
self.progress.emit(int((idx / total_files) * 100))
self.finished.emit(f"处理完成! {processed}/{total_files} 个文件已处理. 结果保存在: {output_dir}")
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("RockyVision 截图识别工具 V2.1 - By Rocky 2025")
self.setGeometry(100, 100, 800, 600)
# 初始化变量
self.capture_tool = CaptureTool()
self.hotkey = "alt+q"
self.setup_ui()
self.setup_hotkey()
self.check_environment()
def check_environment(self):
"""检查必要的环境设置"""
self.log("正在检查系统环境...")
# 检查PaddleOCR是否可用
try:
import paddle
self.log(f"PaddlePaddle版本: {paddle.__version__}")
self.log("环境检查通过,工具已准备好")
except Exception as e:
self.log(f"环境检查错误: {str(e)}")
self.log("请确保已正确安装PaddleOCR: pip install paddlepaddle paddleocr")
def setup_ui(self):
main_widget = QWidget()
layout = QVBoxLayout(main_widget)
# 标题
title = QLabel("RockyVision 截图识别工具 V2.1")
title.setStyleSheet("font-size: 20px; font-weight: bold; margin-bottom: 20px;")
title.setAlignment(Qt.AlignCenter)
# 按钮区
btn_layout = QHBoxLayout()
self.capture_btn = QPushButton("截图 (快捷键: Alt+Q)")
self.capture_btn.setStyleSheet("font-size: 16px; padding: 10px;")
self.capture_btn.clicked.connect(self.start_capture)
self.ocr_btn = QPushButton("批量OCR识别")
self.ocr_btn.setStyleSheet("font-size: 16px; padding: 10px;")
self.ocr_btn.clicked.connect(self.start_ocr)
self.setting_btn = QPushButton("设置快捷键")
self.setting_btn.setStyleSheet("font-size: 16px; padding: 10px;")
self.setting_btn.clicked.connect(self.set_hotkey)
btn_layout.addWidget(self.capture_btn)
btn_layout.addWidget(self.ocr_btn)
btn_layout.addWidget(self.setting_btn)
# 进度条
self.progress_bar = QProgressBar()
self.progress_bar.setRange(0, 100)
self.progress_bar.setVisible(False)
# 日志区
self.log_area = QTextEdit()
self.log_area.setReadOnly(True)
self.log_area.setStyleSheet("font-family: Consolas; font-size: 12px;")
layout.addWidget(title)
layout.addLayout(btn_layout)
layout.addWidget(self.progress_bar)
layout.addWidget(self.log_area)
self.setCentralWidget(main_widget)
def setup_hotkey(self):
"""初始化快捷键"""
try:
keyboard.unhook_all()
keyboard.add_hotkey(self.hotkey, self.start_capture)
self.update_hotkey_display()
self.log("快捷键已设置: " + self.hotkey)
except Exception as e:
self.log("快捷键设置失败: " + str(e))
def start_capture(self):
"""开始截图"""
if not self.capture_btn.isEnabled():
return
self.log("截图已激活 (拖动鼠标选择区域,ESC退出)")
self.capture_btn.setEnabled(False)
# 使用定时器确保按钮能重新启用
QTimer.singleShot(100, lambda: self.execute_capture())
def execute_capture(self):
"""执行截图操作"""
saved_path = self.capture_tool.capture()
self.capture_btn.setEnabled(True)
if saved_path:
self.log(f"截图已保存: {saved_path}")
def start_ocr(self):
"""开始批量OCR识别"""
folder_path = QFileDialog.getExistingDirectory(
self, "选择图片文件夹",
os.getcwd(), QFileDialog.ShowDirsOnly)
if not folder_path:
return
self.log(f"开始批量处理文件夹: {folder_path}")
self.ocr_btn.setEnabled(False)
self.progress_bar.setVisible(True)
self.progress_bar.setValue(0)
self.ocr_thread = OCRThread(folder_path)
self.ocr_thread.log_signal.connect(self.log)
self.ocr_thread.progress.connect(self.progress_bar.setValue)
self.ocr_thread.finished.connect(self.ocr_finished)
self.ocr_thread.start()
def ocr_finished(self, message):
self.log(message)
self.ocr_btn.setEnabled(True)
self.progress_bar.setVisible(False)
def set_hotkey(self):
"""设置快捷键"""
new_key, ok = QInputDialog.getText(
self, "设置快捷键",
"输入新的截图快捷键组合 (如: ctrl+shift+q):",
text=self.hotkey)
if ok and new_key:
try:
keyboard.unhook_all()
keyboard.add_hotkey(new_key, self.start_capture)
self.hotkey = new_key.lower()
self.update_hotkey_display()
self.log(f"快捷键已更新为: {self.hotkey}")
except Exception as e:
self.log(f"快捷键设置失败: {str(e)}")
self.log("请尝试其他组合如: alt+q, ctrl+shift+s")
def update_hotkey_display(self):
"""更新界面显示的快捷键"""
self.capture_btn.setText(f"截图 (快捷键: {self.hotkey.upper()})")
def log(self, message):
"""记录日志"""
timestamp = time.strftime("%H:%M:%S")
self.log_area.append(f"[{timestamp}] {message}")
# 自动滚动到底部
scroll_bar = self.log_area.verticalScrollBar()
scroll_bar.setValue(scroll_bar.maximum())
def closeEvent(self, event):
"""关闭窗口时清理"""
keyboard.unhook_all()
event.accept()
if __name__ == "__main__":
app = QApplication(sys.argv)
app.setStyle('Fusion') # 使用更现代的风格
# 确保目录存在
os.makedirs("screenshots", exist_ok=True)
window = MainWindow()
window.show()
sys.exit(app.exec_())
请将识别的文本都写在一个txt的文件里,存放位置就在程序 的同一目录 下面