Python练手项目:PDF小工具-fitz版

Python练手项目:PDF小工具-fitz版
python对办公一族来说,是个提高办公效率的得力利器。熟练使用它,可以轻松完成很多繁杂的事情。它可以轻松地操作Word、Excel、PDF这种文档。
今天需要整理一份资料,需要把多个pdf合并为一个,wps这些软件自然是有这个功能,但一般都是收费的,百度上也有很多网站,但资料上传到别人的网站,始终觉得还是不太可靠,故自己搜索了一下使用python来处理pdf文件,故此用tk制作了PDF小工具。
安装以下库:
python -m pip install -U pip
pip install PyMuPDF==1.19.0 -i https://pypi.mirrors.ustc.edu.cn/simple/
pdf_tool_fitz.py
源码如下:

#pip install PyMuPDF==1.19.0 -i https://pypi.mirrors.ustc.edu.cn/simple/
from pathlib import Path
import subprocess
import time
import sys
import tkinter as tk
from tkinter import ttk
import tkinter.filedialog as fd
import tkinter.messagebox as mb
import base64
import fitz
import threading

class PdfHandler:
    page_count=None
    page_index=None
    img_path=None
    def pdf_to_imgs(self,pdf_path,img_path,zoom_x,zoom_y,rotation_angle):
        '''
        pdf文件每页内容都转换成图片
        :param pdf_path:pdf文件的路径
        :param img_path:图像要保存的路径
        :param zoom_x:x方向的缩放系数
        :param zoom_y:y方向的缩放系数
            zoom_x和zoom_y一般取相同值,值越大,图像分辨率越高
        :param rotation_angle:旋转角度
        :return:
        '''
        # 如果没有存储文件的目录,则创建
        if not Path(img_path).exists():
            Path(img_path).mkdir()
            
        # 打开PDF文件
        with fitz.open(pdf_path) as pdf:
            self.page_count=pdf.page_count
            # 逐页读取PDF
            for page_index in range(0, pdf.pageCount):
                self.page_index=page_index
                page = pdf[page_index]
                # 设置缩放和旋转系数,zoom_x, zoom_y取相同值,表示等比例缩放
                trans = fitz.Matrix(zoom_x, zoom_y).prerotate(rotation_angle)
                pm = page.getPixmap(matrix=trans, alpha=False)
                self.img_path=f'{img_path}/{page_index}.png'
                self.img_path=self.img_path.replace('/',"\\")
                # 开始写图像
                pm.writePNG(self.img_path) # 第1张图片名:0.png,以此类推
                pm=None

    def imgs_to_pdf(self,img_path, new_pdf_name):
        '''
        将图片转为pdf
        :param imgPath: 图片路径
        :param new_pdf_name: 多张图片合并成的pdf文件名
        :return:
        '''
        with fitz.open() as doc:  # 新建一个空文档
            img_list=[]
            for item in Path(img_path).iterdir():
                img_list.append(item.name)
            for page_index in range(len(img_list)):
                self.page_index=self.page_index+1
                img = fitz.open(f'{img_path}/{page_index}.png')  # 打开图片
                pdfbytes = img.convertToPDF()  # 使用图片创建单页的 PDF
                with fitz.open("pdf", pdfbytes) as imgpdf:
                    doc.insert_pdf(imgpdf)
                img.close()
            doc.save(new_pdf_name)
    
    def imgs_2_pdf(self,old_pdf_names, new_pdf_name):
        '''
        将图片转为pdf
        :param imgPath: 图片路径
        :param new_pdf_name: 多张图片合并成的pdf文件名
        :return:
        '''
        with fitz.open() as doc:  # 新建一个空文档
            for old_pdf_name in old_pdf_names:
                img = fitz.open(old_pdf_name)  # 打开图片
                pdfbytes = img.convertToPDF()  # 使用图片创建单页的 PDF
                with fitz.open("pdf", pdfbytes) as imgpdf:
                    doc.insert_pdf(imgpdf)
                img.close()
            doc.save(new_pdf_name)
    
    def img_invert_img(self,old_img_name, new_img_name):
        '''
        将图片转为pdf
        :param imgPath: 图片路径
        :param new_pdf_name: 多张图片合并成的pdf文件名
        :return:
        '''
        with fitz.open() as doc:  # 新建一个空文档
            img = fitz.open(old_img_name)  # 打开图片
            pdfbytes = img.convertToPDF()  # 使用图片创建单页的 PDF
            with fitz.open("pdf", pdfbytes) as imgpdf:
                doc.insert_pdf(imgpdf)
            img.close()
            img = doc.get_page_images(0)[0]
            xref = img[0] #图片的xref编号,
            base = img[1] #图片的基本名字,比如 'img0'
            pix = fitz.Pixmap(doc, xref)
            if pix.n < 5:  # 这是一个灰度或者RGB图片
                pix.invertIRect()
                pix.writePNG(new_img_name)
            else:  # 这是一个CMYK图片
                pix1 = fitz.Pixmap(fitz.csRGB, pix)
                pix1.invertIRect()
                pix1.writePNG(new_img_name)
                pix1 = None
            pix = None
            '''
            pm=doc[0].getPixmap(dpi=600,alpha=False)
            pm.invertIRect()
            pm.writePNG(new_img_name)
            pm=None
            '''
    
    def merge_pdfs(self,old_pdf_names,new_pdf_name):
        '''
        从pdf文件合成为新的pdf文件
        :param old_pdf_names: pdf文件名数组
        :param new_pdf_name: 合并出的新pdf文件名
        :return:
        '''
        print(old_pdf_names)
        print(new_pdf_name)
        file_writer = fitz.open()
        for pdf in old_pdf_names:
            file_reader = fitz.open(pdf)# 循环读取需要合并pdf文件
            file_writer.insert_pdf(file_reader)
            file_reader.close()
        file_writer.save(new_pdf_name)
        file_writer.close()
   
    def split_pdf(self,old_pdf_name, new_pdf_name, from_page, to_page):
        '''
        从pdf文件中拆分出部分,拆出部分成为新的pdf文件
        :param old_pdf_name: 原pdf文件名
        :param new_pdf_name: 拆分后合并出的新pdf文件名
        :param from_page: 原pdf文件起始页码索引(0开始,默认0)
        :param to_page: 原pdf文件结尾页码索引(0开始,默认最后一页)
        :return:
        '''
        with fitz.open(old_pdf_name) as old_pdf, fitz.open() as new_pdf:
            print(f"从:{from_page}")
            to_page-=1
            print(f"到:{to_page}")
            new_pdf.insert_pdf(old_pdf,from_page,to_page)
            new_pdf.save(new_pdf_name)
            
    def get_pdf_text(self,pdf_path):
        '''
        获取每页pdf文件文本
        :param pdf_path: pdf文件路径
        :return:
        '''
        # 打开pdf文件,并新建html文件
        with fitz.open(pdf_path) as pdf:

            # 遍历每一页pdf,并显示进度条
            for page in pdf:
                text = page.get_text() #提取文本,传入参数'html'即:page.get_text('html') 则提取每页内容为html
            print(f'第{page+1}页解析内容:\n{text}')
    
    def get_pdf_images(self,img_path,pdf_path):
        '''
        获取每页pdf文件所有图片
        :param pdf_path: pdf文件路径
        :return:
        '''
        # 如果没有存储文件的目录,则创建
        if not Path(img_path).exists():
            Path(img_path).mkdir()
        doc = fitz.open(pdf_path)
        self.page_count=doc.page_count
        page_index=0
        for i in range(len(doc)):
            self.page_index=i
            for img in doc.get_page_images(i):
                xref = img[0] #图片的xref编号,
                #base = img[1] #图片的基本名字,比如 'img0'
                pix = fitz.Pixmap(doc, xref)
                self.img_path=f"{img_path}/{page_index}.png"
                if pix.n < 5:  # 这是一个灰度或者RGB图片
                    pix.writePNG(self.img_path)
                else:  # 这是一个CMYK图片
                    pix1 = fitz.Pixmap(fitz.csRGB, pix)
                    pix1.writePNG(self.img_path)
                    pix1 = None
                pix = None  # 让图片像素图清除内存
                page_index+=1
        doc.close()

# 防止字符串乱码
# os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
data=[]
id=0
mydir='D:/'
kind='PDF'
PdfHandler=PdfHandler()

def thread_it(func,args):
	'''将函数打包进线程'''
	# 创建
	t = threading.Thread(target=func, args=args)
	# 守护 !!!
	t.setDaemon(True)
	# 启动
	t.start()
	# 阻塞--卡死界面!
	# t.join()

def base64_to_image(base64_string, image_path):
    base64_data = base64_string.encode("utf-8")
    image_data = base64.b64decode(base64_data)
    with open(image_path, "wb") as image_file:
        image_file.write(image_data)

base64_string = '''
AAABAAcAEBAAAAAAIADNAQAAdgAAABgYAAAAACAALQMAAEMCAAAgIAAAAAAgAFMEAABwBQAAMDAAAAAAIAAzBwAAwwkAAEBAAAAAACAAPwoAAPYQAACAgAAAAAAgAB4WAAA1GwAAAAAAAAAAIAAYIwAAUzEAAIlQTkcNChoKAAAADUlIRFIAAAAQAAAAEAgGAAAAH/P/YQAAAZRJREFUeJyNk0FKK0EQhv+ZnhndiotZeAjXQWNwHbyGF/AmBsxOiDgR3YsushG8gAcQVCTwIu7kacykPhfVGgkRbSiYqvr+7uqpahWFkedGCIZkNJtG78i4vTf+T9xu7z3WbDoTgmuKwlCWgQRFAfv78Drmx/U6dqYoXJNloDyHpSUYDCJl8FRDz2APt16MYY4MBq7Jc5AEnU4Uj+EEWAM0Z2t4jlhhp+NVqNGA2oAaTm0myObsM34a2dqg0QBVfd/xqYZVIAHCggpCzK1GFqDqgx6G7hza7OR58feqFFmAhyGkZSlJ0k0iJfp9JZGVpLKU0s8EfxAvYtPRyD/W+dsmRFaSRiMpvb5yZ8ekFUlTSWGBMMTcSmQl6fpK0uYGTGNr+vzexn4ctvcJbG3FQTrozgbpCCgXdKDEc7z5gVXlg5RkGeSZdHEptVp+yX8mnaezv72O1Dap/Ha34VDa3ZW+HtPyMnS7MK5/fkzvUzg7g+dn9ycT/DWGAEniG7VacFzB3SO81W53jx7b3nam3YaXFzCDD2tbU1K76U/cAAAAAElFTkSuQmCCiVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAC9ElEQVR4nJ2Vu24TURCG/72EGFDCtUECRYYiFBFGvAKPAFUQBYKKOhU9gqcAKQUXCcQTWKKgSipKQkFkaLgpsWXZCbt7PoqZZY8tRyIZabS7szPfnDPnMsmJE4Gy1D/JMmk8tvdOR7p9W7p5U1pelhYXzT4YSJ8+Sd2u9OaN9PGj2Y8fl6qqYeW5pDwHyTRN7dluw7NnMBgwIaVrLIOB+bbbkwwJ8hw0Nxd9CO7ehX6/AexXQAWEiBrMtl81pn7fYmPW3JwnyDIzrK01AXtFA90FPgAvXT+4rU62VzRxa2vGyjJP0GqZ4c4dcygK+OMj2wYeAhcATekF/7ft4D+VxYKxJGi1QGkKS0swHEJVNfC3wLkImACZaxLZz7lvnaSqjLW0VK+HYH3d6+0jeBcB8ilgnDCPvt8xyVhf98VeWYHxGMoAIUAPWIxGPA2e1npGi1hsCMYaj2FlBdLVVanVkqogJYn0WNJAUiYp2tIHSuW+A1lskhir1ZJWVyVtbtpOCBXsAGd9RLPKcpDW/mcxRvBtvbkJ6vebLf7eA9JDwDUV895ZATsb6cJCU4qepERS+h+lmZbUY3tR6RYWjsY6XOLh0BZJki5JQlI4Aih47CX/ziQNh1K6tWVTI0gdSafdMTkEPPGY0zIGwWxbW1La7ZpTgXRG0i13zmaiZkvmMbdkjAKzd7uSOh3Y22sO2hfgpG+7/9lNqfuexGJDgCoY89o1vypevJg85q+PcFW8nroqXr3yqyJN4fJlGI38svND8RI4NTXS3DWe2Sn3BSi8Cj9+wMWL3hfq6/rePXcqzBHgM3AfOD9jBuf93+cIXpaW4Ns3uHLFZxA3nEeP+CdxE/kJdIHnrl231bIf9dHS33s9uH59Rst88MDuc7Djvl9ycMuMwDs78OtXUwWA37+jBHWbk2B52RZ+NGJCCtdYRiPzbbfh6lX4+nVyJpqft85T67FjTcIbN+DJE9jYgN1dq28I9r6xAU+fmk/tX8d8/24bpizhLx7ka2lX8XPpAAAAAElFTkSuQmCCiVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAEGklEQVR4nLVXz2tTWRT+3k37kjahgt1oY6OgLRbF2J0rV1opuPIvEISu/D+kIKK46KYy/8Ewq+kiduFmxOJQI1RQBpQS4pRadxqTJu+bxTln7s3zxbbaHji85N57vvOd++ucG42NJex0kCm5HPDli/yuVIDr10WrVeDUKaJQkL5v34BGI0K9DtRqopub0lcsAr1eNn4cAxgeJoHBOj1NPn5MNhrctzQaYjM9/WPs4WEScdzfmMvJt1Ag790jWy0PnCRkr0f2ErLHlGpfkvjxrZZgFAr92KZxnCJgA6amyLU1D7S7K8Bdkrs/iHyXMiZJxMZkbU0w0yT6CFjH7Cz58aMYdjoatUYZSpPkhmoz1Wfjk0QwSMGcne339T+BKJKGs2fJrS0fNTUik79I3iV5kWSJJFRL2nZXx5iYrWFtbYkPQHzGMYl8nnRO1ml9Pdv5PyRvBQ730ltqk0VifV18OUfm8yRGR4XR4qKf9tDwT5LjChyRHCLp9DeCdqd91j6utiGWYS8uis/RURLOyQbpdMhu1282klxRUATf/WhosxKQSBLx0emIT+dIAOTSkp+mHsmE5HuSxxQodwDnSNkcU6yEgm1LsbSkp6FcJre3pdF2PEne/InIB83ETcW0k0GKz3KZxMKCdvb81D//hcgHzcTzYCl6GuXCAunm5oK7Se/o3/QbZV/hBxLDMExZdNG5OcBVq0AUAZEDhgB0ATzTwckhEDCMZ4o9BPEVRZLUXLncT/VfAJsB218Vw9hU7NBXuQy4fL7f4DOA1hEQaCl2KIUC4AYZHIVkYbt0MXIcgNYZh7oJC4odSrsNuGazn95JAJNHQGBSsUNfzSbg6nU9FonfpVfV8Lv1+QlxinUV/pQxEZ/1OuBqNT2GkWd7W0ke1iakYkLJmL9aDUClQu7sfH8V3zjEq/hGxlW8s0NWKpqMlpelMUxG7yiFhqXagzq3lF1SrHQyWl7W+9A5cmZG0mS3KyMtJ/yREc1BIodiWA5g4gvbmRlNx1aQPHzoZ4H0xefvJMeCqPYqSGy2xtQ2xOpqZPfvi8+REfqSrFgkNzb6SdhMbJCcz4g0ShExnVebECPR6L9+JU+fFgLFIvuL0vPn/YbMKkpXSd4hOUUyHzjMa9sdHWMSOif9u+HlS3J8XPdAuiy/coX89EkMrIazjWnSoVQ5f6u+1zYT23BWgpnz8PviBTkxweyHyYUL5OvXnn34MAlnJC3Wn36YpEnY/7dvOfhpViqRjx75wQaw19PMnNj4Bw/IS5fIN2+yyWQ+TqVaFa1WySdPfN24H9neFptq1eOcOOHfHeFyRCMjzHyeRxHgHGB9Z84A8/PAtWvA5cvAxARgtUS7LYnl1Svg6VNgZQX48EH64liw2m1gchJYXQXOnRNazgH/AQsN/3KKQ50eAAAAAElFTkSuQmCCiVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAG+k
import sys import os import fitz # PyMuPDF import pandas as pd import numpy as np import tempfile import shutil import re import time from datetime import datetime from PIL import Image, ImageDraw import cv2 import csv import json from collections import defaultdict from PyQt5.QtWidgets import ( QApplication, QMainWindow, QWidget, QStackedWidget, QVBoxLayout, QHBoxLayout, QLabel, QPushButton, QFileDialog, QListWidget, QTableWidget, QTableWidgetItem, QAbstractItemView, QHeaderView, QLineEdit, QFrame, QSizePolicy, QProgressBar, QMessageBox, QComboBox, QGridLayout, QTextEdit, QDialog, QDialogButtonBox ) from PyQt5.QtCore import Qt, QSize, QTimer, QThread, pyqtSignal from PyQt5.QtGui import QIcon, QFont, QColor, QPixmap, QBrush, QPainter # 集成PaddleOCR try: from paddleocr import PaddleOCR except ImportError: print("PaddleOCR not installed. Please install with: pip install paddlepaddle paddleocr") sys.exit(1) class OCRWorker(QThread): progress_updated = pyqtSignal(int, str) extraction_complete = pyqtSignal(list, str) watermark_removed = pyqtSignal(str, str) def __init__(self, pdf_path, output_dir, watermark_text=None, parent=None): super().__init__(parent) self.pdf_path = pdf_path self.output_dir = output_dir self.watermark_text = watermark_text self.file_name = os.path.basename(pdf_path) self.canceled = False self.ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=True) def run(self): try: # 第一步:去除水印(如果需要) processed_path = self.pdf_path if self.watermark_text: processed_path = self.remove_watermark() if processed_path: self.watermark_removed.emit(processed_path, self.file_name) else: self.progress_updated.emit(100, "水印去除失败") return # 第二步:提取内容 self.extract_content(processed_path) except Exception as e: self.progress_updated.emit(100, f"处理失败: {str(e)}") def remove_watermark(self): """使用OCR检测并去除水印""" try: doc = fitz.open(self.pdf_path) new_doc = fitz.open() output_path = os.path.join(self.output_dir, f"processed_{self.file_name}") total_pages = len(doc) for page_num in range(total_pages): if self.canceled: return None self.progress_updated.emit(int(30 * page_num / total_pages), f"正在处理第 {page_num+1} 页水印") page = doc.load_page(page_num) pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) img_np = np.array(img) # 使用PaddleOCR检测文本 result = self.ocr.ocr(img_np, cls=True) # 检测水印文本 watermark_boxes = [] for line in result: for word_info in line: text = word_info[1][0] if self.watermark_text.lower() in text.lower(): box = word_info[0] # 将浮点坐标转换为整数 int_box = [(int(x), int(y)) for x, y in box] watermark_boxes.append(int_box) # 去除水印(用白色覆盖) if watermark_boxes: img_pil = Image.fromarray(img_np) draw = ImageDraw.Draw(img_pil) for box in watermark_boxes: # 创建覆盖矩形 min_x = min(point[0] for point in box) max_x = max(point[0] for point in box) min_y = min(point[1] for point in box) max_y = max(point[1] for point in box) # 扩展矩形范围确保完全覆盖 expand = 5 draw.rectangle( [min_x - expand, min_y - expand, max_x + expand, max_y + expand], fill=(255, 255, 255) img_np = np.array(img_pil) # 保存处理后的页面 img_bytes = Image.fromarray(img_np).tobytes() new_page = new_doc.new_page(width=img_np.shape[1], height=img_np.shape[0]) new_page.insert_image(fitz.Rect(0, 0, img_np.shape[1], img_np.shape[0]), stream=img_bytes) new_doc.save(output_path) new_doc.close() doc.close() return output_path except Exception as e: print(f"Error removing watermark: {e}") return None def extract_content(self, pdf_path): """使用PaddleOCR提取内容""" try: doc = fitz.open(pdf_path) extracted_data = [] total_pages = len(doc) for page_num in range(total_pages): if self.canceled: return self.progress_updated.emit(30 + int(70 * page_num / total_pages), f"正在提取第 {page_num+1} 页内容") page = doc.load_page(page_num) pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) img_np = np.array(img) # 使用PaddleOCR提取文本 result = self.ocr.ocr(img_np, cls=True) # 处理OCR结果 page_text = "" for line in result: line_text = " ".join([word_info[1][0] for word_info in line]) page_text += line_text + "\n" # 提取结构化数据(示例逻辑) extracted = self.extract_structured_data(page_text, page_num + 1) extracted_data.extend(extracted) doc.close() self.extraction_complete.emit(extracted_data, self.file_name) except Exception as e: self.progress_updated.emit(100, f"内容提取失败: {str(e)}") def extact_structured_data(self, text, page_num): """从文本中提取结构化数据(示例实现)""" extracted = [] # 提取发票信息 invoice_match = re.search(r'Invoice\s+Number\s*:\s*(\w+)', text, re.IGNORECASE) date_match = re.search(r'Date\s*:\s*(\d{2}/\d{2}/\d{4})', text, re.IGNORECASE) total_match = re.search(r'Total\s+Amount\s*:\s*([\d,]+\.\d{2})', text, re.IGNORECASE) if invoice_match or date_match or total_match: extracted.append({ "Document": self.file_name, "Page": page_num, "Type": "Invoice", "Invoice Number": invoice_match.group(1) if invoice_match else "N/A", "Date": date_match.group(1) if date_match else "N/A", "Amount": f"${total_match.group(1)}" if total_match else "N/A" }) # 提取报告信息 report_match = re.search(r'Report\s+Title\s*:\s*(.+)', text, re.IGNORECASE) author_match = re.search(r'Author\s*:\s*(.+)', text, re.IGNORECASE) if report_match or author_match: extracted.append({ "Document": self.file_name, "Page": page_num, "Type": "Report", "Report Title": report_match.group(1) if report_match else "N/A", "Author": author_match.group(1) if author_match else "N/A", "Summary": text[:200] + "..." if len(text) > 200 else text }) # 如果没有匹配到特定结构,返回整个页面文本 if not extracted: extracted.append({ "Document": self.file_name, "Page": page_num, "Type": "General", "Content": text }) return extracted def cancel(self): self.canceled = True class MainWindow(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("PDF智能处理工具") self.setGeometry(100, 100, 1200, 800) self.setMinimumSize(1000, 700) # 应用主色调 self.primary_color = "#2c3e50" self.secondary_color = "#3498db" self.accent_color = "#e67e22" self.light_color = "#ecf0f1" self.dark_color = "#34495e" # 初始化状态 self.current_files = [] self.extracted_data = [] self.history_data = [] self.ocr_worker = None self.temp_dir = tempfile.mkdtemp() # 创建中央部件和主布局 central_widget = QWidget() self.setCentralWidget(central_widget) main_layout = QVBoxLayout(central_widget) main_layout.setContentsMargins(0, 0, 0, 0) main_layout.setSpacing(0) # 创建顶部栏 self.create_header(main_layout) # 创建主内容区域 self.content_stack = QStackedWidget() main_layout.addWidget(self.content_stack, 1) # 创建各个页面 self.home_page = self.create_home_page() self.upload_page = self.create_upload_page() self.history_page = self.create_history_page() self.analysis_page = self.create_analysis_page() self.content_stack.addWidget(self.home_page) self.content_stack.addWidget(self.upload_page) self.content_stack.addWidget(self.history_page) self.content_stack.addWidget(self.analysis_page) # 创建底部导航栏 self.create_footer(main_layout) # 模拟一些历史数据 self.simulate_history_data() # 设置首页为默认页面 self.content_stack.setCurrentIndex(0) self.update_home_page() def closeEvent(self, event): """清理临时文件""" try: shutil.rmtree(self.temp_dir, ignore_errors=True) except: pass event.accept() def simulate_history_data(self): """模拟一些历史数据用于展示""" for i in range(5): self.history_data.append({ "id": i, "file_name": f"document_{i+1}.pdf", "date": f"2023-0{i+1}-15", "status": "Completed", "pages": i+3, "type": "Invoice" if i % 2 == 0 else "Report", "extracted_data": [ { "Document": f"document_{i+1}.pdf", "Page": 1, "Type": "Invoice", "Invoice Number": f"INV-2023-{i+1:04d}", "Date": f"2023-0{i+1}-15", "Amount": f"${(i+1)*250:.2f}" } ] }) def create_header(self, main_layout): """创建应用头部""" header = QWidget() header.setStyleSheet(f"background-color: {self.primary_color}; padding: 15px;") header_layout = QHBoxLayout(header) header_layout.setContentsMargins(20, 10, 20, 10) # 应用标题 title_label = QLabel("PDF智能处理工具") title_label.setStyleSheet(f"color: white; font-size: 24px; font-weight: bold;") header_layout.addWidget(title_label) # 右侧用户区域 user_widget = QWidget() user_layout = QHBoxLayout(user_widget) user_layout.setSpacing(15) user_icon = QLabel() user_icon.setPixmap(self.create_icon("👤", 40)) user_layout.addWidget(user_icon) user_name = QLabel("管理员") user_name.setStyleSheet("color: white; font-size: 16px;") user_layout.addWidget(user_name) header_layout.addWidget(user_widget) main_layout.addWidget(header) def create_footer(self, main_layout): """创建底部导航栏""" footer = QWidget() footer.setStyleSheet(f"background-color: {self.dark_color};") footer.setFixedHeight(60) footer_layout = QHBoxLayout(footer) footer_layout.setContentsMargins(0, 0, 0, 0) footer_layout.setSpacing(0) # 导航按钮 nav_items = [ ("首页", "home", 0), ("上传与提取", "upload", 1), ("历史记录", "history", 2), ("数据分析", "analysis", 3) ] for text, icon_name, index in nav_items: btn = QPushButton(text) btn.setIcon(QIcon(self.create_icon("🏠" if icon_name=="home" else "📤" if icon_name=="upload" else "📋" if icon_name=="history" else "📊", 24))) btn.setIconSize(QSize(24, 24)) btn.setFixedHeight(60) btn.setStyleSheet(f""" QPushButton {{ color: {self.light_color}; font-size: 14px; font-weight: bold; border: none; background-color: {self.dark_color}; }} QPushButton:hover {{ background-color: {self.primary_color}; }} """) btn.clicked.connect(lambda _, idx=index: self.navigate_to(idx)) footer_layout.addWidget(btn) main_layout.addWidget(footer) def create_icon(self, emoji, size=24): """创建表情符号图标""" pixmap = QPixmap(size, size) pixmap.fill(Qt.transparent) painter = QPainter(pixmap) painter.setFont(QFont("Arial", size - 4)) painter.drawText(pixmap.rect(), Qt.AlignCenter, emoji) painter.end() return pixmap def navigate_to(self, index): """导航到指定页面""" self.content_stack.setCurrentIndex(index) # 更新页面内容 if index == 0: # 首页 self.update_home_page() elif index == 1: # 上传与提取 pass elif index == 2: # 历史记录 self.update_history_page() elif index == 3: # 数据分析 self.update_analysis_page() def create_home_page(self): """创建首页""" page = QWidget() layout = QVBoxLayout(page) layout.setContentsMargins(30, 20, 30, 20) layout.setSpacing(20) # 欢迎卡片 welcome_card = QFrame() welcome_card.setStyleSheet(f""" QFrame {{ background-color: white; border-radius: 10px; padding: 20px; }} """) welcome_layout = QVBoxLayout(welcome_card) welcome_title = QLabel("欢迎使用PDF智能处理工具") welcome_title.setStyleSheet("font-size: 24px; font-weight: bold; color: #2c3e50;") welcome_layout.addWidget(welcome_title) welcome_text = QLabel("本工具提供PDF水印去除、内容提取和数据分析功能,支持批量处理PDF文件,快速提取结构化数据并导出为Excel或CSV格式。") welcome_text.setStyleSheet("font-size: 16px; color: #7f8c8d;") welcome_text.setWordWrap(True) welcome_layout.addWidget(welcome_text) # OCR信息 ocr_info = QLabel("当前使用PaddleOCR引擎,支持中英文识别") ocr_info.setStyleSheet("font-size: 14px; color: #3498db; font-weight: bold;") welcome_layout.addWidget(ocr_info) layout.addWidget(welcome_card) # 最近上传区域 recent_label = QLabel("最近上传") recent_label.setStyleSheet("font-size: 20px; font-weight: bold; color: #2c3e50;") layout.addWidget(recent_label) # 最近上传列表 self.recent_list = QListWidget() self.recent_list.setStyleSheet(""" QListWidget { background-color: white; border-radius: 10px; border: 1px solid #ddd; } QListWidget::item { padding: 15px; border-bottom: 1px solid #eee; } QListWidget::item:selected { background-color: #e6f7ff; } """) self.recent_list.setAlternatingRowColors(True) layout.addWidget(self.recent_list, 1) # 快捷操作按钮 quick_actions = QWidget() quick_layout = QHBoxLayout(quick_actions) quick_layout.setSpacing(15) actions = [ ("上传新文件", "upload", self.navigate_to_upload), ("查看历史记录", "history", lambda: self.navigate_to(2)), ("数据分析", "analysis", lambda: self.navigate_to(3)) ] for text, icon_name, action in actions: btn = QPushButton(text) btn.setIcon(QIcon(self.create_icon("📤" if icon_name=="upload" else "📋" if icon_name=="history" else "📊", 24))) btn.setIconSize(QSize(24, 24)) btn.setStyleSheet(f""" QPushButton {{ background-color: {self.secondary_color}; color: white; font-size: 16px; font-weight: bold; padding: 12px 20px; border-radius: 8px; }} QPushButton:hover {{ background-color: #2980b9; }} """) btn.clicked.connect(action) quick_layout.addWidget(btn) layout.addWidget(quick_actions) return page def update_home_page(self): """更新首页内容""" self.recent_list.clear() # 显示最近的5条记录 for item in self.history_data[:5]: list_item = QLabel(f""" <div style="font-size: 16px; font-weight: bold;">{item['file_name']}</div> <div style="color: #7f8c8d; font-size: 14px;"> 上传时间: {item['date']} | 状态: <span style="color: #27ae60;">{item['status']}</span> | 类型: {item['type']} </div> """) list_widget = QListWidgetItem(self.recent_list) list_widget.setSizeHint(list_item.sizeHint()) self.recent_list.addItem(list_widget) self.recent_list.setItemWidget(list_widget, list_item) def navigate_to_upload(self): """导航到上传页面""" self.content_stack.setCurrentIndex(1) def create_upload_page(self): """创建上传与提取页面""" page = QWidget() layout = QVBoxLayout(page) layout.setContentsMargins(30, 20, 30, 20) layout.setSpacing(20) # 标题 title = QLabel("上传与提取") title.setStyleSheet("font-size: 24px; font-weight: bold; color: #2c3e50;") layout.addWidget(title) # 上传区域 upload_card = QFrame() upload_card.setStyleSheet(f""" QFrame {{ background-color: white; border-radius: 10px; border: 2px dashed {self.secondary_color}; padding: 40px; }} """) upload_layout = QVBoxLayout(upload_card) upload_layout.setAlignment(Qt.AlignCenter) upload_icon = QLabel() upload_icon.setPixmap(self.create_icon("📤", 80)) upload_icon.setAlignment(Qt.AlignCenter) upload_layout.addWidget(upload_icon) upload_text = QLabel("拖放PDF文件到此处,或点击选择文件") upload_text.setStyleSheet("font-size: 18px; color: #7f8c8d; margin-top: 20px;") upload_text.setAlignment(Qt.AlignCenter) upload_layout.addWidget(upload_text) upload_btn = QPushButton("选择PDF文件") upload_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.secondary_color}; color: white; font-size: 16px; padding: 10px 20px; border-radius: 5px; margin-top: 20px; }} QPushButton:hover {{ background-color: #2980b9; }} """) upload_btn.clicked.connect(self.select_pdf_files) upload_layout.addWidget(upload_btn, alignment=Qt.AlignCenter) # 水印选项 watermark_layout = QHBoxLayout() watermark_layout.setSpacing(10) watermark_label = QLabel("水印文本(可选):") watermark_label.setStyleSheet("font-size: 14px;") watermark_layout.addWidget(watermark_label) self.watermark_input = QLineEdit() self.watermark_input.setPlaceholderText("输入要删除的水印文本") self.watermark_input.setStyleSheet("padding: 5px; border: 1px solid #ddd; border-radius: 3px;") watermark_layout.addWidget(self.watermark_input, 1) upload_layout.addLayout(watermark_layout) layout.addWidget(upload_card, 1) # 进度区域 progress_layout = QVBoxLayout() progress_layout.setSpacing(10) progress_label = QLabel("处理进度") progress_label.setStyleSheet("font-size: 18px; font-weight: bold; color: #2c3e50;") progress_layout.addWidget(progress_label) self.progress_bar = QProgressBar() self.progress_bar.setStyleSheet(""" QProgressBar { border: 1px solid #ddd; border-radius: 5px; text-align: center; height: 25px; } QProgressBar::chunk { background-color: #3498db; width: 10px; } """) progress_layout.addWidget(self.progress_bar) self.progress_text = QLabel("等待处理文件...") self.progress_text.setStyleSheet("font-size: 14px; color: #7f8c8d;") progress_layout.addWidget(self.progress_text) # 取消按钮 self.cancel_btn = QPushButton("取消处理") self.cancel_btn.setStyleSheet(""" QPushButton { background-color: #e74c3c; color: white; padding: 8px 20px; border-radius: 5px; font-weight: bold; } QPushButton:hover { background-color: #c0392b; } """) self.cancel_btn.clicked.connect(self.cancel_processing) self.cancel_btn.setVisible(False) progress_layout.addWidget(self.cancel_btn, alignment=Qt.AlignRight) layout.addLayout(progress_layout) return page def select_pdf_files(self): """选择PDF文件""" files, _ = QFileDialog.getOpenFileNames( self, "选择PDF文件", "", "PDF文件 (*.pdf)" ) if files: self.current_files = files self.process_files() def process_files(self): """处理选中的文件""" if not self.current_files: return # 重置状态 self.extracted_data = [] # 显示取消按钮 self.cancel_btn.setVisible(True) # 处理第一个文件 file_path = self.current_files[0] file_name = os.path.basename(file_path) # 获取水印文本 watermark_text = self.watermark_input.text().strip() or None # 创建工作线程 self.ocr_worker = OCRWorker( file_path, self.temp_dir, watermark_text ) # 连接信号 self.ocr_worker.progress_updated.connect(self.update_progress) self.ocr_worker.watermark_removed.connect(self.on_watermark_removed) self.ocr_worker.extraction_complete.connect(self.on_extraction_complete) # 开始处理 self.ocr_worker.start() def update_progress(self, progress, message): """更新处理进度""" self.progress_bar.setValue(progress) self.progress_text.setText(message) def on_watermark_removed(self, output_path, file_name): """水印去除完成""" self.progress_text.setText(f"水印已移除: {file_name}") def on_extraction_complete(self, extracted_data, file_name): """内容提取完成""" self.extracted_data.extend(extracted_data) # 保存到历史记录 self.history_data.insert(0, { "id": len(self.history_data), "file_name": file_name, "date": datetime.now().strftime("%Y-%m-%d %H:%M"), "status": "Completed", "pages": len(extracted_data), "type": "Invoice" if any(d['Type'] == 'Invoice' for d in extracted_data) else "Report", "extracted_data": extracted_data }) # 显示预览 self.show_preview(extracted_data, file_name) # 处理下一个文件(如果有) if len(self.current_files) > 1: self.current_files.pop(0) self.process_files() else: self.current_files = [] self.cancel_btn.setVisible(False) def cancel_processing(self): """取消处理""" if self.ocr_worker and self.ocr_worker.isRunning(): self.ocr_worker.cancel() self.ocr_worker.wait() self.progress_text.setText("处理已取消") self.cancel_btn.setVisible(False) def show_preview(self, data, file_name): """显示预览窗口""" # 创建预览对话框 preview_dialog = QDialog(self) preview_dialog.setWindowTitle(f"预览 - {file_name}") preview_dialog.resize(1000, 700) layout = QVBoxLayout(preview_dialog) layout.setContentsMargins(20, 20, 20, 20) layout.setSpacing(15) # 标题 title = QLabel(f"文件内容提取结果: {file_name}") title.setStyleSheet("font-size: 20px; font-weight: bold; color: #2c3e50;") layout.addWidget(title) # OCR引擎信息 ocr_info = QLabel("使用PaddleOCR引擎提取内容") ocr_info.setStyleSheet("font-size: 14px; color: #3498db; font-weight: bold;") layout.addWidget(ocr_info) # 数据表格 if data: table = QTableWidget() table.setRowCount(len(data)) # 获取所有可能的列 all_columns = set() for item in data: all_columns.update(item.keys()) columns = sorted(all_columns) table.setColumnCount(len(columns)) table.setHorizontalHeaderLabels(columns) # 填充数据 for row_idx, row_data in enumerate(data): for col_idx, col_name in enumerate(columns): value = str(row_data.get(col_name, "")) item = QTableWidgetItem(value) table.setItem(row_idx, col_idx, item) # 表格样式 table.setStyleSheet(""" QTableWidget { background-color: white; border: 1px solid #ddd; gridline-color: #eee; } QHeaderView::section { background-color: #f8f9fa; padding: 8px; border: none; font-weight: bold; } """) table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) table.setEditTriggers(QAbstractItemView.DoubleClicked | QAbstractItemView.SelectedClicked) table.setSelectionMode(QAbstractItemView.SingleSelection) table.setSelectionBehavior(QAbstractItemView.SelectRows) layout.addWidget(table, 1) else: no_data_label = QLabel("未提取到有效数据") no_data_label.setStyleSheet("font-size: 16px; color: #7f8c8d;") no_data_label.setAlignment(Qt.AlignCenter) layout.addWidget(no_data_label, 1) # 操作按钮 btn_layout = QHBoxLayout() btn_layout.setSpacing(15) export_excel_btn = QPushButton("导出为Excel") export_excel_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.secondary_color}; color: white; padding: 10px 20px; border-radius: 5px; font-weight: bold; }} QPushButton:hover {{ background-color: #2980b9; }} """) export_excel_btn.clicked.connect(self.export_to_excel) btn_layout.addWidget(export_excel_btn) export_csv_btn = QPushButton("导出为CSV") export_csv_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.accent_color}; color: white; padding: 10px 20px; border-radius: 5px; font-weight: bold; }} QPushButton:hover {{ background-color: #d35400; }} """) export_csv_btn.clicked.connect(self.export_to_csv) btn_layout.addWidget(export_csv_btn) close_btn = QPushButton("关闭") close_btn.setStyleSheet(""" QPushButton { background-color: #95a5a6; color: white; padding: 10px 20px; border-radius: 5px; font-weight: bold; } QPushButton:hover { background-color: #7f8c8d; } """) close_btn.clicked.connect(preview_dialog.accept) btn_layout.addWidget(close_btn) layout.addLayout(btn_layout) preview_dialog.exec_() def export_to_excel(self): """导出为Excel""" if not self.extracted_data: QMessageBox.warning(self, "导出失败", "没有可导出的数据") return file_path, _ = QFileDialog.getSaveFileName( self, "保存Excel文件", "", "Excel文件 (*.xlsx)" ) if file_path: if not file_path.endswith('.xlsx'): file_path += '.xlsx' try: # 将数据转换为DataFrame df = pd.DataFrame(self.extracted_data) # 导出到Excel df.to_excel(file_path, index=False) QMessageBox.information(self, "导出成功", f"数据已成功导出到: {file_path}") except Exception as e: QMessageBox.critical(self, "导出失败", f"导出过程中发生错误: {str(e)}") def export_to_csv(self): """导出为CSV""" if not self.extracted_data: QMessageBox.warning(self, "导出失败", "没有可导出的数据") return file_path, _ = QFileDialog.getSaveFileName( self, "保存CSV文件", "", "CSV文件 (*.csv)" ) if file_path: if not file_path.endswith('.csv'): file_path += '.csv' try: # 获取所有可能的字段 all_fields = set() for item in self.extracted_data: all_fields.update(item.keys()) # 写入CSV with open(file_path, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=sorted(all_fields)) writer.writeheader() writer.writerows(self.extracted_data) QMessageBox.information(self, "导出成功", f"数据已成功导出到: {file_path}") except Exception as e: QMessageBox.critical(self, "导出失败", f"导出过程中发生错误: {str(e)}") def create_history_page(self): """创建历史记录页面""" page = QWidget() layout = QVBoxLayout(page) layout.setContentsMargins(30, 20, 30, 20) layout.setSpacing(20) # 标题和搜索 header_layout = QHBoxLayout() title = QLabel("历史记录") title.setStyleSheet("font-size: 24px; font-weight: bold; color: #2c3e50;") header_layout.addWidget(title) search_layout = QHBoxLayout() search_layout.setSpacing(10) self.search_input = QLineEdit() self.search_input.setPlaceholderText("搜索文件名...") self.search_input.setStyleSheet(""" QLineEdit { padding: 8px 15px; border: 1px solid #ddd; border-radius: 5px; font-size: 14px; } """) self.search_input.setFixedWidth(300) self.search_input.returnPressed.connect(self.search_history) search_layout.addWidget(self.search_input) search_btn = QPushButton("搜索") search_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.secondary_color}; color: white; padding: 8px 20px; border-radius: 5px; }} """) search_btn.clicked.connect(self.search_history) search_layout.addWidget(search_btn) header_layout.addLayout(search_layout) layout.addLayout(header_layout) # 历史记录表格 self.history_table = QTableWidget() self.history_table.setColumnCount(5) self.history_table.setHorizontalHeaderLabels(["文件名", "上传时间", "状态", "页数", "类型"]) self.history_table.setStyleSheet(""" QTableWidget { background-color: white; border: 1px solid #ddd; gridline-color: #eee; } QHeaderView::section { background-color: #f8f9fa; padding: 12px; border: none; font-weight: bold; } """) self.history_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.history_table.verticalHeader().setVisible(False) self.history_table.setSelectionBehavior(QAbstractItemView.SelectRows) self.history_table.setEditTriggers(QAbstractItemView.NoEditTriggers) self.history_table.setSortingEnabled(True) layout.addWidget(self.history_table, 1) # 操作按钮 btn_layout = QHBoxLayout() btn_layout.setSpacing(15) view_btn = QPushButton("查看详情") view_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.secondary_color}; color: white; padding: 10px 20px; border-radius: 5px; font-weight: bold; }} """) view_btn.clicked.connect(self.view_history_detail) btn_layout.addWidget(view_btn) export_btn = QPushButton("导出记录") export_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.accent_color}; color: white; padding: 10px 20px; border-radius: 5px; font-weight: bold; }} """) export_btn.clicked.connect(self.export_history) btn_layout.addWidget(export_btn) delete_btn = QPushButton("删除记录") delete_btn.setStyleSheet(""" QPushButton { background-color: #e74c3c; color: white; padding: 10px 20px; border-radius: 5px; font-weight: bold; } """) delete_btn.clicked.connect(self.delete_history) btn_layout.addWidget(delete_btn) layout.addLayout(btn_layout) return page def update_history_page(self): """更新历史记录页面""" self.history_table.setRowCount(len(self.history_data)) for row_idx, item in enumerate(self.history_data): self.history_table.setItem(row_idx, 0, QTableWidgetItem(item["file_name"])) self.history_table.setItem(row_idx, 1, QTableWidgetItem(item["date"])) status_item = QTableWidgetItem(item["status"]) if item["status"] == "Completed": status_item.setForeground(QBrush(QColor("#27ae60"))) else: status_item.setForeground(QBrush(QColor("#e74c3c"))) self.history_table.setItem(row_idx, 2, status_item) self.history_table.setItem(row_idx, 3, QTableWidgetItem(str(item["pages"]))) self.history_table.setItem(row_idx, 4, QTableWidgetItem(item["type"])) def search_history(self): """搜索历史记录""" search_text = self.search_input.text().lower() if not search_text: self.update_history_page() return filtered_data = [item for item in self.history_data if search_text in item["file_name"].lower()] self.history_table.setRowCount(len(filtered_data)) for row_idx, item in enumerate(filtered_data): self.history_table.setItem(row_idx, 0, QTableWidgetItem(item["file_name"])) self.history_table.setItem(row_idx, 1, QTableWidgetItem(item["date"])) status_item = QTableWidgetItem(item["status"]) if item["status"] == "Completed": status_item.setForeground(QBrush(QColor("#27ae60"))) else: status_item.setForeground(QBrush(QColor("#e74c3c"))) self.history_table.setItem(row_idx, 2, status_item) self.history_table.setItem(row_idx, 3, QTableWidgetItem(str(item["pages"]))) self.history_table.setItem(row_idx, 4, QTableWidgetItem(item["type"])) def view_history_detail(self): """查看历史记录详情""" selected_row = self.history_table.currentRow() if selected_row >= 0: file_name = self.history_table.item(selected_row, 0).text() history_item = next((item for item in self.history_data if item["file_name"] == file_name), None) if history_item: self.show_preview(history_item["extracted_data"], file_name) else: QMessageBox.warning(self, "选择记录", "请先选择一条历史记录") def export_history(self): """导出历史记录""" if not self.history_data: QMessageBox.warning(self, "导出失败", "没有可导出的历史记录") return file_path, _ = QFileDialog.getSaveFileName( self, "保存历史记录", "", "CSV文件 (*.csv)" ) if file_path: if not file_path.endswith('.csv'): file_path += '.csv' try: with open(file_path, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['id', 'file_name', 'date', 'status', 'pages', 'type'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for item in self.history_data: writer.writerow({ 'id': item['id'], 'file_name': item['file_name'], 'date': item['date'], 'status': item['status'], 'pages': item['pages'], 'type': item['type'] }) QMessageBox.information(self, "导出成功", f"历史记录已成功导出到: {file_path}") except Exception as e: QMessageBox.critical(self, "导出失败", f"导出过程中发生错误: {str(e)}") def delete_history(self): """删除历史记录""" selected_row = self.history_table.currentRow() if selected_row >= 0: file_name = self.history_table.item(selected_row, 0).text() reply = QMessageBox.question( self, '确认删除', f"确定要删除 '{file_name}' 的记录吗?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No ) if reply == QMessageBox.Yes: self.history_data = [item for item in self.history_data if item['file_name'] != file_name] self.update_history_page() else: QMessageBox.warning(self, "选择记录", "请先选择一条历史记录") def create_analysis_page(self): """创建数据分析页面""" page = QWidget() layout = QVBoxLayout(page) layout.setContentsMargins(30, 20, 30, 20) layout.setSpacing(20) # 标题 title = QLabel("数据分析") title.setStyleSheet("font-size: 24px; font-weight: bold; color: #2c3e50;") layout.addWidget(title) # 选择历史记录 select_layout = QHBoxLayout() select_label = QLabel("选择历史记录:") select_label.setStyleSheet("font-size: 16px;") select_layout.addWidget(select_label) self.history_combo = QComboBox() self.history_combo.setFixedWidth(300) self.history_combo.setStyleSheet("padding: 5px; border: 1px solid #ddd; border-radius: 3px;") select_layout.addWidget(self.history_combo) analyze_btn = QPushButton("分析数据") analyze_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.secondary_color}; color: white; padding: 8px 20px; border-radius: 5px; }} """) analyze_btn.clicked.connect(self.analyze_data) select_layout.addWidget(analyze_btn) layout.addLayout(select_layout) # 图表区域 chart_container = QWidget() chart_layout = QGridLayout(chart_container) # 图表1 self.chart1_label = QLabel() self.chart1_label.setStyleSheet("background-color: white; border-radius: 10px; padding: 10px;") self.chart1_label.setAlignment(Qt.AlignCenter) chart_layout.addWidget(self.chart1_label, 0, 0) # 图表2 self.chart2_label = QLabel() self.chart2_label.setStyleSheet("background-color: white; border-radius: 10px; padding: 10px;") self.chart2_label.setAlignment(Qt.AlignCenter) chart_layout.addWidget(self.chart2_label, 0, 1) # 图表3 self.chart3_label = QLabel() self.chart3_label.setStyleSheet("background-color: white; border-radius: 10px; padding: 10px;") self.chart3_label.setAlignment(Qt.AlignCenter) chart_layout.addWidget(self.chart3_label, 1, 0) # 图表4 self.chart4_label = QLabel() self.chart4_label.setStyleSheet("background-color: white; border-radius: 10px; padding: 10px;") self.chart4_label.setAlignment(Qt.AlignCenter) chart_layout.addWidget(self.chart4_label, 1, 1) layout.addWidget(chart_container, 1) # 操作按钮 btn_layout = QHBoxLayout() btn_layout.setSpacing(15) refresh_btn = QPushButton("刷新数据") refresh_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.secondary_color}; color: white; padding: 10px 20px; border-radius: 5px; font-weight: bold; }} """) refresh_btn.clicked.connect(self.update_analysis_page) btn_layout.addWidget(refresh_btn) export_btn = QPushButton("导出报告") export_btn.setStyleSheet(f""" QPushButton {{ background-color: {self.accent_color}; color: white; padding: 10px 20px; border-radius: 5px; font-weight: bold; }} """) export_btn.clicked.connect(self.export_report) btn_layout.addWidget(export_btn) layout.addLayout(btn_layout) return page def update_analysis_page(self): """更新数据分析页面""" # 更新历史记录选择框 self.history_combo.clear() for item in self.history_data: self.history_combo.addItem(f"{item['file_name']} - {item['date']}", item) # 显示初始图表 self.show_chart(self.chart1_label, "📊", "数据统计") self.show_chart(self.chart2_label, "📈", "趋势分析") self.show_chart(self.chart3_label, "📉", "比较分析") self.show_chart(self.chart4_label, "🧮", "类型分布") def show_chart(self, label, emoji, title): """显示模拟图表""" pixmap = QPixmap(400, 250) pixmap.fill(Qt.white) painter = QPainter(pixmap) painter.setRenderHint(QPainter.Antialiasing) # 绘制标题 painter.setFont(QFont("Arial", 14, QFont.Bold)) painter.drawText(pixmap.rect().adjusted(0, 10, 0, 0), Qt.AlignTop | Qt.AlignHCenter, title) # 绘制图表图标 painter.setFont(QFont("Arial", 80)) painter.drawText(pixmap.rect(), Qt.AlignCenter, emoji) # 绘制边框 painter.setPen(QColor("#ddd")) painter.drawRect(pixmap.rect().adjusted(0, 0, -1, -1)) painter.end() label.setPixmap(pixmap) def analyze_data(self): """分析数据""" if self.history_combo.currentIndex() < 0: QMessageBox.warning(self, "选择记录", "请先选择一条历史记录") return selected_item = self.history_combo.currentData() extracted_data = selected_item.get('extracted_data', []) if not extracted_data: QMessageBox.warning(self, "分析失败", "所选记录没有可分析的数据") return # 在实际应用中,这里会使用真实的数据分析逻辑 # 这里仅显示消息 QMessageBox.information( self, "数据分析", f"已对 '{selected_item['file_name']}' 进行数据分析\n" f"包含 {len(extracted_data)} 条记录" ) def export_report(self): """导出分析报告""" if self.history_combo.currentIndex() < 0: QMessageBox.warning(self, "选择记录", "请先选择一条历史记录") return selected_item = self.history_combo.currentData() file_path, _ = QFileDialog.getSaveFileName( self, "保存分析报告", "", "PDF文件 (*.pdf)" ) if file_path: if not file_path.endswith('.pdf'): file_path += '.pdf' try: # 在实际应用中,这里会生成真实的PDF报告 # 这里仅模拟导出 time.sleep(1) # 模拟生成报告的时间 QMessageBox.information( self, "导出成功", f"分析报告已成功导出到: {file_path}\n" f"包含对 '{selected_item['file_name']}' 的分析结果" ) except Exception as e: QMessageBox.critical(self, "导出失败", f"导出过程中发生错误: {str(e)}") if __name__ == "__main__": # 设置应用程序 app = QApplication(sys.argv) # 设置应用程序样式 app.setStyle("Fusion") # 创建并显示主窗口 window = MainWindow() window.show() # 执行应用程序 sys.exit(app.exec_())将数据分析功能改为接入本地部署的AI大模型进行分析和绘制图表
最新发布
07-22
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值