pdf图片识别分类

解析pdf数据

试了几种方法
fitz-get_image后面方法不适用,用pixmap分辨率低
用pypdf2版本低方法用不了
用pdf2image还要下依赖工具
用spire.pdf的SaveAsImage分辨率低,ExtractImages可以但运行慢
先用spire.pdf的ExtractImages吧

from spire.pdf.common import ImageFormat
from spire.pdf import PdfDocument

# 从PDF文档提取图片,PDF文档的路径,存放图片的文件夹路径
def extract_images_from_pdf(file_path, output_folder):
    # 创建PdfDocument类的实例
    doc = PdfDocument()
    # 加载PDF文档
    doc.LoadFromFile(file_path)
    # 创建list来存储提取的图片
    images = []
    # 遍历文档的页面
    for i in range(doc.Pages.Count):
        # 获取当前页
        page = doc.Pages.get_Item(i)
        # 从页面提取图片并存储到list
        for img in page.ExtractImages():
            # print(img)
            images.append(img)
    # 将图片保存到指定文件夹
    for i, image in enumerate(images):
        image.Save(f"{output_folder}Image-{i+1}.jpg", ImageFormat.get_Jpeg())
    doc.Close()

# 调用函数实现从PDF提取图片
file_path = "副本.pdf"
output_folder = "tup/"
extract_images_from_pdf(file_path, output_folder)

ocr识别

试验几种方法paddleocr umiocr cnocr 微信截图等
识别效果都差不多 最先进的也就这样了,就用cnocr 运行效率高 其他更慢
调了调一些参数,发现不调还好一些
ocr.ocr_for_single_lines用了不行啥结果都没有,用cnstd先看检测框再检测也不行,基础的ocr.ocr已经是最优了
cnocr也和opencv一样不支持中文路径,直接用PIL读取传对象给cnocr就可以

from cnocr import CnOcr # 识别
from PIL import Image
def ocr_cn(path): # 输入图片path
    img=Image.open(path)
    img = img.convert('L')  # 黑白 降为二维
    # print(img)
    # img=np.array(img) # 用来裁剪
    # r = ocr.ocr(img[img.shape[0]*//6:img.shape[0]*4//6, img.shape[1]//8:img.shape[1]7//8])
    r = ocr.ocr(img)
    t = []  # 文本
    t1={}  # 带位置的
    for each in r:
        # print(each['text'])
        t.append(each['text'])
        t1[each['text']]=list(each['position'][0])+list(each['position'][2]) # 文本位置,左上右下
    # print(t)
    return t , img.size , t1

分类方法

分类方法有基于规则、基于词频统计、nlp算法-朴素贝叶斯 textcnn什么的
一般数据量小就规则,一般数据量大也先用规则看看 一些能看到的共性可以直接提出来,剩下的再用AI学习

正则匹配

.所有 \d数字 \D非数字 \s空格 \S非空格 \w字母数字汉字下划线  \W非\w
*匹配0-无限次,+匹配1-无限次,?匹配0-1次,{n}匹配n次,{m,n}匹配m-n次
(19|20)  [1-9]
[^\d]  [^https://]  取非
'^www.*com$'  www开头com结尾
'[a-zA-Z0-9_]{4,20}@(163|126|qq|sina|yahoo)\.com' 邮件
'\d{3}-\d{8}|\d{4}-\d{7,8}' 电话号
re.match("<[a-zA-Z]+>(.*)</[a-zA-Z]+>", "<html>zzz</div>").group(1) 提取html内容
re.sub(r'[0-9]', '*', s)  # 把数字替换为*
re.sub(r'[0-9]+', "*", s) # 多个数字替换为1个*
re.sub(r'[^0-9]+', "*", s)# 匹配非数字

提取年月
a,b,c='2023-1-12', '2024.2.23','24年1月'
d="(\d{2})[- /.年](1[0-2]|0?[1-9])[- /.月]"  # 日期中间5种分隔 - 空格 / . 年
line=re.search(d,b)  # 匹配不到就是None,match是从第一个开始匹配
print(line) # <re.Match object; span=(2, 6), match='24.2'>
print(line.span())   # (2, 6)
print(line.group()) # 24.2.
print(line.group(1)) # 24 小括号是分组
print(line.group(2)) # 2  小括号是分组
print(re.findall(d,a)) # [('23','1')] 返回所有匹配结果,没匹配为空[]
print(re.split('[-年.]',line.group()))  # ['24', '2']

词频统计

worddict = {}
for i in words:
    print(i)
    if i in worddict:
        worddict[i] += 1
    else:
        worddict[i] = 1
print(worddict)
highest_key = max(worddict, key=worddict.get) if worddict!={} else None
print(highest_key)  # 输出: 分类
# 自定义分类
worddict = {'a':0,'b':0,'c':0}
if 'xxx' in worddict or 'xcc' in worddict:worddict['x'] += 1
highest_key = max(worddict, key=worddict.get) if max(worddict.values())!=0 else None
print(highest_key)  # 输出: 分类

# 按value从大到小排序
worddict=dict(sorted(worddict.items(), key=lambda item: item[1],reverse=True))

分类模型


分类完提示

用Tkinter或pyqt做窗口提示和交互

## 分类完提示
import tkinter as tk
from tkinter import messagebox
root = tk.Tk()
root.withdraw()
messagebox.showinfo("提示", "程序运行完成!")
messagebox.showinfo("提示", "分类完成!"+'\n'+'分类成功 条'+'\n'+'分类失败 条')

解析word

f='D:\code2024\1.doc'

# 解析doc
import win32com.client as win32
# 把doc转为docx再解析
word =win32.Dispatch('Word.Application')
doc = word.Documents.Open(f)
print(doc)
doc.SaveAs(f.replace('.doc','.docx'),12) # 12为docx?16?
doc.Close() # 关闭doc文件
word.Quit() # 退出word程序
# exit()

# 解析docx
from docx import Document
doc=Document(f+'x')
print(doc) # <docx.document.Document object at 0x00000206DB6A0A00>
print(doc.paragraphs) # [<docx.text.paragraph.Paragraph object at 0x00A00>, <docx.text.paragraph.Paragrap
print(doc.tables) # [<docx.table.Table object at 0x000002800AE749A0>, <docx.table.Table object at 0x0
# ptext = [p.text for p in doc.paragraphs]  # 按行来的
ptext = []
for i in doc.paragraphs:
    ptext.append(i.text)
print(ptext)

ttest=[]
for t in doc.tables: # 一个表
    onet=[]
    for row in t.rows: # 一行
        for cell in row.cells: # 一个格
            if (cell.text).replace(' ','')!='':
                onet+=[cell.text]
    ttest.append(onet)
print(ttest)

# 取table里内容会有重复的,因为有合并单元格
# 按行按列找单元格的下标,cell是地址,合并单元格的重复的去掉
for table in doc.tables:
    row_cells, column_cells = [], []
    index = []
    width, length = len(table.columns), len(table.rows)
    k = 0
    for row in table.rows:
        for cell in row.cells:
            if cell not in row_cells:
                index.append([k // width, k % width])
                row_cells.append(cell)
            k += 1
    k = 0
    for column in table.columns:
        for cell in column.cells:
            if cell not in column_cells:
                column_cells.append(cell)
            elif [k % length, k // length] in index:
                index.remove([k % length, k // length])
            k += 1
    for i in index:      # 单元格索引
        if (table.rows[i[0]].cells[i[1]].text).replace(' ', '') != '':
            ttest.append(table.rows[i[0]].cells[i[1]].text.replace(' ', ''))
print(ttest)

追加excel

import pandas as pd
data=[[1,2,300,4],[1,2,32,4]]
try:
    c=pd.read_excel('bg.xlsx')
    havebh = list(c.iloc[:, 0])  # 第一列的值
    for itm in data:
    	try:
        	c.iloc[havebh.index(itm[0])] = pd.array(itm,dtype=object)  # 查找重复数据的位置并覆盖
        except ValueError:  # 没查到重复,增加
            c.loc[len(c)] = itm
    c.to_excel("bg.xlsx", index=False)
    print("数据追加写入bg.xlsx")
except FileNotFoundError:
    c=pd.DataFrame(data)
    c.to_excel("bg.xlsx", index=False)
    print("新建文件bg.xlsx")

print(list(c.iloc[0])) # 取一行
print(list(c.iloc[:,2]))  # 取一列
print(list(c.iloc[:,2]).index('SL20'))  # 判断值位置 没有就ValueError

c.iloc[1,11]='asd'  # 数字索引 行,列,更改dataframe的值
c.loc[2]=[1,2,234,213,4,123,3,2,2] # 修改一行,要和shape长度对应
c.iloc[3]=[1,2,234,213,4,123,3,2,2] # iloc无法拓展 添加新行数据要用loc
c.iloc[3]=pd.array([1,2,123,3,[2,2],2,2,2],dtype=object) # 内置不均匀维度要先转换为array obj不然报错
c.iloc[:,4]=[2,3,2,32,3,3,3,3,3]  # 修改一列,要和shape长度对应 不然报错
c.iloc[:,4]=3

遍历
for i in c.iterrows():
    print(i[0], i[1].tolist()) # 行号,数据。读取文档默认str,转原数据格式用eval(str)

pxd=px[ (px['年']==year) & (px['月']==mon) & (px['人']==man)]  # 条件查询
if pxd.empty:tst+=[man]   # 判断有无取出数据

# print(yzd[ (bgd[2][:6] in yzd['编号']) & (str(int(bgd[2][-5:])) in yzd['编号']) ]) # 判断值是否在数据,错误写法
print(yzd[ (yzd['编号'].str.contains(bgd[2][:6])) &
 (yzd['编号'].str.contains(str(int(bgd[2][-5:])))) &
  (yzd['日期']==str(mon)+'.'+str(day)) ])  # 正确写法

to sql

import psycopg2 as pg
def doDB(sql):  
    conn = pg.connect(database="zzz", user="postgres", password="postgres", host="127.0.0.1",
                      port="5432")
    cur = conn.cursor()
    cur.execute(sql)
    conn.commit()
    cur.close()
    conn.close()
doDB("insert into end values ('1', '12', 1, 1) ON conflict (编号) do update set 年=22,日期='233'") # save or update


# # 一次性多条插入
# sql = "insert into acmr values "
# for i in datas:
#     sql += "('%s','%s',%s,%s,%s,%s)," % tuple(i)
# sql = sql[:-1]
# doDB(sql)


conn = pg.connect(database="zzz", user="postgres", password="postgres", host="127.0.0.1",port="5432")
cur = conn.cursor()
for i in c.iterrows():
    one = i[1].tolist()  # sql %s传参必须是单引号
    one=['' if pd.isnull(j) else j for j in one]  # 去掉nan 给''
    sql = "insert into end values ('%s','%s',%s,'%s',%s) on conflict (编号) do update set 类型='%s',年=%s,日期='%s',图片大小='%s'" % (one[:]+one[1:])
    cur.execute(sql)
conn.commit()
cur.close()
conn.close()


# 从数据库获取数据
conn = pg.connect(database="zzz", user="postgres", password="postgres", host="127.0.0.1", port="5432")
cur = conn.cursor()
cur.execute("select * from end ")
res = cur.fetchall()  # 或fetchone()
print(res)
cur.close()
conn.close()

·查看数据库连接数。还能通过工具-服务器监控
SELECT COUNT(*) FROM pg_stat_activity 

·数据库的save or update,可以do nothing
insert into end values (%s,%s) on conflict (编号) do update set 类型='%s',=%s    

·查一段时间的数据,一段时间每个人的工作量
SELECT * FROM end WHERE "日期" BETWEEN '20240601' AND '20240801';

SELECT
  p.name AS 姓名,
  COALESCE(批准, 0) AS 批准,
  COALESCE(校核, 0) AS 校核,
  COALESCE(检验, 0) AS 检验,
  (COALESCE(批准, 0) + COALESCE(校核, 0) + COALESCE(检验, 0)) ASFROM
  human p
LEFT JOIN (
  SELECT "批准人", COUNT(*) AS 批准 FROM end
	WHERE "日期" BETWEEN '20240601' AND '20240801'
  GROUP BY "批准人"
) a1 ON p.name = "批准人"
LEFT JOIN (
  SELECT "校核人", COUNT(*) AS 校核 FROM end
	WHERE "日期" BETWEEN '20240601' AND '20240801'
  GROUP BY "校核人"
) a2 ON p.name = "校核人"
LEFT JOIN (
  SELECT "检验人", COUNT(*) AS 检验 FROM end
	WHERE "日期" BETWEEN '20240601' AND '20240801'
  GROUP BY "检验人"
) a3 ON p.name = "检验人"
ORDER BYDESC;

统计界面

tkinter

from tkinter import messagebox
import tkinter as tk

w1 = tk.Tk()
w1.title('统计')
w1.geometry('900x700')

txt = '输入年份'
label_text = tk.Label(w1, text=txt, font=('楷体',15,'bold'), padx=15, pady=15) # borderwidth框,0无框
label_text.pack(side='top', padx=30) # 位置 右偏左

s1s = tk.StringVar()
def login():
    if str(s1s.get()) in s1:
        # 年份输入了,跳转下个窗口
        w1.destroy()

        w2 = tk.Tk()
        w2.title('统计')
        w2.geometry('900x700')

        # 选择了年 年份下每个有多少份资料
        txt = s1s.get() + '年的资料 '
        message = tk.Message(w2, text=txt, font=('楷体', 15, 'bold'), width=250, bg='#FFFFFF', padx=5, pady=5)
        message.pack(fill='both')
        for one in s2:
            a = os.walk(fpath + s1s.get() + '/' + one)
            count = 0
            for i in a:
                # print(i[0])
                count += len([j for j in os.listdir(i[0]) if j.endswith('.pdf')])
                txt =  '资料  有' + str(count) + '份'
            message = tk.Message(w2, text=txt, font=('楷体', 15, 'bold'), width=250, bg='#FFFFFF', padx=5,
                                 pady=5)
            message.pack(fill='both')

        txt = '输入月份'
        label_text1 = tk.Label(w2, text=txt, font=('楷体', 15, 'bold'), padx=10, pady=10)  # borderwidth框,0无框
        label_text1.pack(side='top', padx=30)  # 位置 右偏左
        s2s = tk.StringVar()

        def login1():
            if s2s.get() + '月' in s3:
                # 月份输入了,跳转下个窗口
                pass


            else:
                txt = '输入错误'
                messagebox.showinfo(title='登陆失败', message=txt)

        entry_username = tk.Entry(w2, textvariable=s2s)
        button1 = tk.Button(w2, text='选择', font=('宋体', 10, 'bold'), width=8, height=1, command=login1)
        entry_username.pack(pady=10)
        button1.pack(pady=10)

        w2.mainloop()

    else:
        txt = '输入错误'
        messagebox.showinfo(title='登陆失败', message=txt)

entry_username = tk.Entry (w1, textvariable=s1s)
button = tk.Button(w1, text='选择', font=('宋体',10,'bold'), width=8, height=1, command=login)
# button.place(x=425, y=425)
entry_username.pack(pady=10)
button.pack(pady=10)
w1.mainloop()

在这里插入图片描述

pyqt6

# 1一个简单的界面按钮
from PyQt6.QtWidgets import QApplication, QWidget, QPushButton
app = QApplication([])
window = QWidget()
def met():
    print(2)
button = QPushButton('Click me', window)
button.clicked.connect(met)

button.move(50, 100) # 怎么移动都是中心
window.show()
app.exec()

在这里插入图片描述

常用
# 2几个常用功能
import sys
from PyQt6.QtWidgets import (
    QApplication,
    QLabel,
    QLineEdit,
    QMainWindow,
    QPushButton,
    QVBoxLayout,
    QWidget
)
from PyQt6.QtGui import QIcon, QFont

class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Widgets App")
        layout = QVBoxLayout()   # 垂直布局,QHBoxLayout()是水平
        widgets = [
            QLabel,  # 文字
            QLineEdit,  # 输入框
            QPushButton # 按钮,主要就这三个
        ]

        for w in widgets:
            layout.addWidget(w('a'))

        a1=QLabel('2323')
        a1.setText('111')  # 赋值
        a1.setStyleSheet('color:red')  # 设置颜色
        # a1.move(100,100)
        font=a1.font()
        font.setPointSize(23)
        a1.setFont(font)  # 改字体大小
        layout.addWidget(a1)

        a3=QPushButton('but')
        a3.setGeometry(100,100,20,20)  # 设置位置和大小,其他也可以用
        a3.setFont(QFont('time',14))  # 改字体大小,其他也可以用
        layout.addWidget(a3)

        widget = QWidget()
        widget.setLayout(layout)
        self.setCentralWidget(widget)

app = QApplication(sys.argv)
window = MainWindow()
window.show()
app.exec()

在这里插入图片描述

登录
# 3登录界面。点击登录调用login方法,实现方法中的逻辑
import sys
from PyQt6.QtWidgets import QApplication, QWidget, QLabel, QLineEdit, QPushButton, QVBoxLayout, QHBoxLayout

class LoginApp(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Login")
        self.setGeometry(100, 100, 300, 200)
        self.username_label = QLabel("Username:")
        self.username_input = QLineEdit()
        self.password_label = QLabel("Password:")
        self.password_input = QLineEdit()  # 输入框
        self.password_input.setEchoMode(QLineEdit.EchoMode.Password)  # 隐藏输入的密码字符
        self.login_button = QPushButton("Login")
        self.login_button.clicked.connect(self.login)
        self.login_button2 = QPushButton("Login2")
        self.login_button2.clicked.connect(self.login2)  # 触发login方法
        #  创建垂直布局和水平布局
        vbox = QVBoxLayout()  #  垂直
        hbox1 = QHBoxLayout()  # 水平
        hbox2 = QHBoxLayout()
        # 将用户名和输入框添加到水平布局1
        hbox1.addWidget(self.username_label)
        hbox1.addWidget(self.username_input)
        # 将密码和输入框添加到水平布局2
        hbox2.addWidget(self.password_label)
        hbox2.addWidget(self.password_input)
        # 将两个水平布局添加到垂直布局
        vbox.addLayout(hbox1)
        vbox.addLayout(hbox2)
        # 添加登录按钮到垂直布局
        vbox.addWidget(self.login_button)
        vbox.addWidget(self.login_button2)
        # 将垂直布局设置为窗口的主布局
        self.setLayout(vbox)
    def login(self):
        username = self.username_input.text()
        password = self.password_input.text()
        # 在这里编写登录逻辑,可以根据输入的用户名和密码进行验证
        #  这里暂时简单地打印出输入的用户名和密码
        print(f"Username: {username}")
        print(f"Password: {password}")
    def login2(self):
        username = self.username_input.text()
        password = self.password_input.text()
        # 在这里编写登录逻辑,可以根据输入的用户名和密码进行验证
        #  这里暂时简单地打印出输入的用户名和密码
        print(f"Username: {username}")
        print(f"Password: {password}")
if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = LoginApp()
    window.show()
    sys.exit(app.exec())

在这里插入图片描述

# 4统计界面,按钮后下面显示统计结果
import sys
from PyQt6.QtWidgets import (QApplication, QWidget, QVBoxLayout, QHBoxLayout,QLabel, QLineEdit, QPushButton, QTextEdit, QGroupBox)

class StatisticsWidget(QWidget):
    def __init__(self):
        super().__init__()

        # 设置窗口标题和初始大小
        self.setWindowTitle('统计界面')
        self.setGeometry(100, 100, 400, 300)

        # 创建布局
        vbox = QVBoxLayout()

        # 创建输入区域
        groupbox_input = QGroupBox('')
        hbox_input = QHBoxLayout()
        self.edit_input = QLineEdit()
        hbox_input.addWidget(QLabel('输入数据:'))
        hbox_input.addWidget(self.edit_input)
        groupbox_input.setLayout(hbox_input)
        vbox.addWidget(groupbox_input)

        # 创建按钮
        button = QPushButton('统计')
        vbox.addWidget(button)

        # 创建文本区域显示结果
        self.text_result = QTextEdit()
        self.text_result.setReadOnly(True)
        vbox.addWidget(self.text_result)

        # 设置主布局
        self.setLayout(vbox)

        # 按钮点击事件
        button.clicked.connect(self.calculate_statistics)

    def calculate_statistics(self):
        # 获取输入数据
        data = self.edit_input.text()
        # 这里可以根据需要添加统计逻辑,例如计算平均值、最大值、最小值等
        # 以下是一个简单的示例,只计算数据的数量
        count = len(data)
        # 显示结果
        self.text_result.setText(f'数据数量: {count}')

if __name__ == '__main__':
    app = QApplication(sys.argv)
    statistics_widget = StatisticsWidget()
    statistics_widget.show()
    sys.exit(app.exec())

在这里插入图片描述

# 5更完善的统计界面
import sys
from PyQt6.QtWidgets import (QApplication, QWidget, QVBoxLayout, QHBoxLayout,QLabel, QLineEdit, QPushButton, QTextEdit, QGroupBox)

class StatisticsWidget(QWidget):
    def __init__(self):
        super().__init__()
        # 设置窗口标题和初始大小
        self.setWindowTitle('统计界面')
        self.setGeometry(100, 100, 600, 400)
        # 创建布局
        self.vbox = QVBoxLayout()
        # 创建输入区域
        groupbox_input = QGroupBox('') # 输入框
        hbox_input = QHBoxLayout()
        self.edit_input = QLineEdit()
        hbox_input.addWidget(QLabel('输入年:'))
        hbox_input.addWidget(self.edit_input)
        groupbox_input.setLayout(hbox_input)
        self.vbox.addWidget(groupbox_input)

        self.edit_input2 = QLineEdit()
        hbox_input.addWidget(QLabel('输入月:'))
        hbox_input.addWidget(self.edit_input2)

        self.edit_input3 = QLineEdit()
        hbox_input.addWidget(QLabel('输入科室:'))
        hbox_input.addWidget(self.edit_input3)

        # 创建按钮
        # groupbox_input2 = Qb('')  # 输入框
        hbox_input2 = QHBoxLayout()
        button = QPushButton('统计资料数量')
        button2 = QPushButton('统计人完成情况')
        hbox_input2.addWidget(button)
        hbox_input2.addWidget(button2)
        self.vbox.addLayout(hbox_input2)

        # 创建文本区域显示结果
        self.text_result = QTextEdit()
        self.text_result.setReadOnly(True)
        self.vbox.addWidget(self.text_result)

        # 设置主布局
        self.setLayout(self.vbox)

        # 按钮点击事件
        button.clicked.connect(self.calculate_statistics)
        button2.clicked.connect(self.calculate_statistics2)

        # 创建按钮
        button = QPushButton('跳转到文件夹')
        self.vbox.addWidget(button)

    def calculate_statistics(self):
        # 获取输入数据
        data = self.edit_input.text()
        data2 = self.edit_input2.text()
        data3 = self.edit_input3.text()
        # 以下是一个简单的示例,只计算数据的数量
        count = len(data)
        # 显示结果
        self.text_result.setText(data+'年'+data2+'月'+f'\n资料:{count}\n资料:{count}\n资料:{count}\n资料:{count}')

        # # 创建按钮
        # button = QPushButton('跳转到文件夹')
        # self.vbox.addWidget(button)
    def calculate_statistics2(self):
        # 获取输入数据
        data = self.edit_input.text()
        data2 = self.edit_input2.text()
        data3 = self.edit_input3.text()
        # 以下是一个简单的示例,只计算数据的数量
        count = len(data)
        # 显示结果
        self.text_result.setText(data+'年'+data2+'月'+data3+'已完成:\nxx\n未完成:\nxx')

if __name__ == '__main__':
    app = QApplication(sys.argv)
    statistics_widget = StatisticsWidget()
    statistics_widget.show()
    sys.exit(app.exec())

在这里插入图片描述

统计
# 6最终
import sys
from PyQt6.QtWidgets import (QApplication, QWidget, QVBoxLayout, QHBoxLayout,QLabel, QLineEdit, QPushButton, QTextEdit, QGroupBox)
from PyQt6.QtGui import QIcon, QFont
# 4 kimi生成的统计界面,按钮后下面显示统计结果,自用
class StatisticsWidget(QWidget):
    def __init__(self):
        super().__init__()
        # 设置窗口标题和初始大小
        self.setWindowTitle('统计界面')
        self.setGeometry(100, 100, 600, 400)
        # 创建布局
        self.vbox = QVBoxLayout()
        # 创建输入区域
        groupbox_input = QGroupBox('') # 输入框
        hbox_input = QHBoxLayout()
        self.edit_input = QLineEdit()
        hbox_input.addWidget(QLabel('输入年:'))
        hbox_input.addWidget(self.edit_input)
        groupbox_input.setLayout(hbox_input)
        self.vbox.addWidget(groupbox_input)

        self.edit_input2 = QLineEdit()
        hbox_input.addWidget(QLabel('输入月:'))
        hbox_input.addWidget(self.edit_input2)

        self.edit_input3 = QLineEdit()
        hbox_input.addWidget(QLabel('输入科室:'))
        hbox_input.addWidget(self.edit_input3)

        # 创建按钮
        # groupbox_input2 = Qb('')  # 输入框
        hbox_input2 = QHBoxLayout()
        button = QPushButton('统计各科室资料数量')
        button2 = QPushButton('统计该科室人完成情况')
        hbox_input2.addWidget(button)
        hbox_input2.addWidget(button2)
        self.vbox.addLayout(hbox_input2)

        # 创建文本区域显示结果
        self.text_result = QTextEdit()
        self.text_result.setFont(QFont('楷体',14))
        self.text_result.setReadOnly(True)
        self.vbox.addWidget(self.text_result)

        # 设置主布局
        self.setLayout(self.vbox)

        # 按钮点击事件
        button.clicked.connect(self.calculate_statistics)
        button2.clicked.connect(self.calculate_statistics2)

        # 创建按钮
        button = QPushButton('跳转到文件夹')
        button.clicked.connect(self.jump)
        self.vbox.addWidget(button)
        
    def jump(self):
        # 获取输入数据
        data = self.edit_input.text()
        data2 = self.edit_input2.text()
        data3 = self.edit_input3.text()
        if data in s1 and data2+'月' in s3 and data3 in s2:
            print(2)
            os.startfile(fpath+'/'+data+'/'+data3+'/'+data2+'月/')
        # else:

    def calculate_statistics(self):  # 统计日期下的所有科室
        # 获取输入数据
        data = self.edit_input.text()  # 2024
        data2 = self.edit_input2.text()  # 12
        if data in s1:
            if data2 + '月' in s3:
                txt = ''
                for one in s2:
                    a = os.walk(fpath + data + '/' + one+'/'+data2+'月')
                    count = 0
                    for i in a:
                        count += len([j for j in os.listdir(i[0]) if j.endswith('.pdf')])
                    txt += one + ' 资料有' + str(count) + '份\n'
                # 显示结果
                self.text_result.setText( data + '年' + data2 + '月\n' + txt)
            else:
                txt=''
                for one in s2:
                    a = os.walk(fpath + data + '/' + one)
                    count = 0
                    for i in a:
                        count += len([j for j in os.listdir(i[0]) if j.endswith('.pdf')])
                    txt += one + ' 资料有' + str(count) + '份\n'
                # 显示结果
                self.text_result.setText(data+'年\n'+txt)
        else:
            self.text_result.setText('输入错误,至少要输入 年')

    def calculate_statistics2(self):  # 统计科室资料和人
        # 获取输入数据
        data = self.edit_input.text()  # 2024
        data2 = self.edit_input2.text()  # 12
        data3 = self.edit_input3.text()  # 科室
        if data in s1 and data2 + '月' in s3 and data3 in s2:
            spath=fpath+'/'+data+'/'+data3+'/'+data2+'月/'
            txt=''
            for one in s4:
                count = [i for i in os.listdir(spath + one) if i.endswith('.pdf')]
                txt += one[1:3] + '资料 有' + str(len(count)) + '份\n'
            namefile=spath+s4[1]+'/x.txt'
            n1,n2='',''
            ns1,ns2=set(),set()
            if os.path.isfile(namefile):
                with open(namefile,'r') as o:
                    # print(o.read())
                    namestxt=o.read()
                    for i in names[data3]:
                        if i in namestxt:ns1.add(i)
                for i in ns1:n1+=i+' '
                for i in ns1.difference(set(names[data3])):n2+=i+' '
            else:n1,n2='n','n'
            # 显示结果
            self.text_result.setText(data+'年'+data2+'月'+data3+'\n'+txt+'\n'+'已完成:\n'+n1+'\n\n未完成:\n'+n2)
            # self.text_result.setText(f'数据数量: {count}\n{count}')
        else:
            self.text_result.setText( '输入错误,需要输入 年 月 科室')

if __name__ == '__main__':
    app = QApplication(sys.argv)
    statistics_widget = StatisticsWidget()
    statistics_widget.show()
    sys.exit(app.exec())

在这里插入图片描述

主界面调用
# 7主程序
import sys
from PyQt6.QtWidgets import QApplication, QPushButton, QWidget, QGridLayout, QFileDialog
from z1mkdir import fun1_mkdir
from z2classify2 import fun2_classify
from z3stat2 import fun3_stat

class App(QWidget):
    def __init__(self):
        super().__init__()
        self.initUI()

    def initUI(self):
        self.setWindowTitle('pyqt6 main')
        self.setGeometry(100, 100, 400, 300)
        # 创建一个网格布局
        grid_layout = QGridLayout()
        # 创建三个按钮
        self.button1 = QPushButton('建立文件夹', self)
        self.button1.setFixedSize(200,20)
        self.button2 = QPushButton('分类', self)
        self.button3 = QPushButton('统计', self)
        self.button1.clicked.connect(self.fun1)
        self.button2.clicked.connect(self.fun2)
        self.button3.clicked.connect(self.fun3)
        # 将按钮添加到网格布局中,设置间隔
        grid_layout.addWidget(self.button1, 0, 0)
        grid_layout.addWidget(self.button2, 1, 0)
        grid_layout.addWidget(self.button3, 2, 0)
        # 设置布局的间距
        grid_layout.setSpacing(10)  # 设置组件之间的间距
        # 设置窗口的布局
        self.setLayout(grid_layout)
    def fun1(self):  # 选择文件夹 建立文件夹
        print(1)
        # 弹出对话框让用户选择文件夹
        folder_path = QFileDialog.getExistingDirectory(self, "选择文件夹", "/")
        if folder_path:
            print(f"选择的文件夹路径是: {folder_path}")
            fun1_mkdir(folder_path)
            self.close()
    def fun2(self):
        print(2)
        # 弹出对话框让用户选择文件夹
        folder_path = QFileDialog.getExistingDirectory(self, "选择文件夹", "/")
        if folder_path:
            print(f"选择的文件夹路径是: {folder_path}")
            fun2_classify(folder_path)
            self.close()
    def fun3(self):
        print(3)
        global flag  
        flag=2
        self.close()  # 关闭窗口 打开新的窗口

flag=0
# if __name__ == '__main__':
app = QApplication(sys.argv)
ex = App()
ex.show()
app.exec()
print('flag',flag)
# sys.exit(app.exec())
if flag==2:
    fun3_stat()

在这里插入图片描述

# 8优化界面
import sys
from PyQt6.QtWidgets import QApplication, QWidget, QVBoxLayout, QHBoxLayout,QLabel, QLineEdit, QPushButton, QTextEdit, QGroupBox,QComboBox
from PyQt6.QtGui import QIcon, QFont
import pandas as pd
class StatisticsWidget(QWidget):
    def __init__(self):
        super().__init__()
        # 设置窗口标题和初始大小
        self.setWindowTitle('统计界面')
        self.setGeometry(100, 100, 600, 400)
        # 创建布局
        self.vbox = QVBoxLayout()
        # 创建输入区域
        groupbox_input = QGroupBox('') # 输入框
        hbox_input = QHBoxLayout()
        self.edit_input = QComboBox()
        self.edit_input.addItems(s1)
        hbox_input.addWidget(QLabel('选择年:'))
        hbox_input.addWidget(self.edit_input)
        groupbox_input.setLayout(hbox_input)
        self.vbox.addWidget(groupbox_input)

        self.edit_input2 = QComboBox()
        self.edit_input2.addItems(['1','2','3','4','5','6','7','8','9','10','11','12'])
        hbox_input.addWidget(QLabel('选择月:'))
        hbox_input.addWidget(self.edit_input2)

        self.edit_input3 = QComboBox()
        self.edit_input3.addItems(names)
        hbox_input.addWidget(QLabel('选择人:'))
        hbox_input.addWidget(self.edit_input3)

        # 创建按钮
        # groupbox_input2 = Qb('')  # 输入框
        hbox_input2 = QHBoxLayout()
        button = QPushButton('统计一年的资料')
        button2 = QPushButton('统计某年某月的资料')
        button3 = QPushButton('统计某月某人的资料')
        hbox_input2.addWidget(button)
        hbox_input2.addWidget(button2)
        hbox_input2.addWidget(button3)
        self.vbox.addLayout(hbox_input2)

        # 创建文本区域显示结果
        self.text_result = QTextEdit()
        self.text_result.setFont(QFont('楷体',14))
        self.text_result.setReadOnly(True)
        self.vbox.addWidget(self.text_result)

        # 设置主布局
        self.setLayout(self.vbox)

        # 按钮点击事件
        button.clicked.connect(self.calculate_statistics)
        button2.clicked.connect(self.calculate_statistics2)
        button3.clicked.connect(self.calculate_statistics3)

        # 创建按钮
        button = QPushButton('跳转到文件夹')
        button.clicked.connect(self.jump)
        self.vbox.addWidget(button)
    def jump(self):
        data = self.edit_input.currentText()
        data2 = self.edit_input2.currentText()
        data3 = self.edit_input3.currentText()
        print(data,data2,data3)
        os.startfile(fpath)

    def calculate_statistics(self):  # 统计一年的资料
        # 获取输入数据
        data = self.edit_input.currentText()  # 2024
        try:
            c = pd.read_excel(fpath+'bg.xlsx')
            # print(c)
            print(c.shape)  # (19, 7)
            years=set()
            worddict = {}
            r1, r2, r3 = {}, {}, {}
            for i in c.iterrows():
                # print(i[0], i[1].tolist())  # ['土', 2024, 'H202400497', '2024-04-27', '品', nan, '陈']
                one = i[1].tolist()
                years.add(str(one[1]))
                if one[1] == int(data):

                    if one[0] in worddict:
                        r1[one[0]].add(one[4])
                        r2[one[0]].add(one[5])
                        r3[one[0]].add(one[6])
                        worddict[one[0]] += 1
                    else:
                        r1[one[0]] = {one[4]}
                        r2[one[0]] = {one[5]}
                        r3[one[0]] = {one[6]}
                        worddict[one[0]] = 1
            print(worddict)
            print(r1, r2, r3)  # 批准校核检验

            if worddict != {}:
                t = data + '共有资料' + str(sum(worddict.values())) + '份,其中'
                for i in worddict:
                    t += '\n\n'
                    # print(i, worddict[i])
                    t += i + ' 资料有' + str(worddict[i]) + '份'
                    t += '\n批准人有 '
                    for j in r1[i]:
                        if type(j) == str: t += j + ' '
                    t += '\n校核人有 '
                    for j in r2[i]:
                        if type(j) == str: t += j + ' '
                    t += '\n检验人有 '
                    for j in r3[i]:
                        if type(j) == str: t += j + ' '
                self.text_result.setText(t)
            else:
                t =data+ '没有资料\n只有'
                for i in years:
                    t+=i+' '
                t+='年的资料'
                self.text_result.setText(t)

        except FileNotFoundError:
            self.text_result.setText('未发现统计文件bg.xlsx')

    def calculate_statistics2(self):  # 统计某年某月的资料
        # 获取输入数据
        data = self.edit_input.currentText()  # 2024
        data2 = self.edit_input2.currentText()  # 12
        try:
            c = pd.read_excel(fpath+'bg.xlsx')
            # print(c)
            print(c.shape)  # (19, 7)
            years=set()
            mons=set()
            worddict = {}
            r1, r2, r3 = {}, {}, {}
            for i in c.iterrows():
                # print(i[0], i[1].tolist())   # ['土', 2024, 'H202400497', '2024-04-27', '品', nan, '陈']
                one = i[1].tolist()
                years.add(str(one[1])+one[3][5:7])
                # mons.add(str(int(one[3][5:7])))
                if one[1] == int(data) and int(one[3][5:7])==int(data2):

                    if one[0] in worddict:
                        r1[one[0]].add(one[4])
                        r2[one[0]].add(one[5])
                        r3[one[0]].add(one[6])
                        worddict[one[0]] += 1
                    else:
                        r1[one[0]] = {one[4]}
                        r2[one[0]] = {one[5]}
                        r3[one[0]] = {one[6]}
                        worddict[one[0]] = 1
            print(worddict)
            print(r1, r2, r3)  # 批准校核检验

            if worddict != {}:
                t = data +'年'+data2+'月'+ '共有资料' + str(sum(worddict.values())) + '份,其中'
                for i in worddict:
                    t += '\n\n'
                    # print(i, worddict[i])
                    t += i + ' 资料有' + str(worddict[i]) + '份'
                    t += '\n批准人有 '
                    for j in r1[i]:
                        if type(j) == str: t += j + ' '
                    t += '\n校核人有 '
                    for j in r2[i]:
                        if type(j) == str: t += j + ' '
                    t += '\n检验人有 '
                    for j in r3[i]:
                        if type(j) == str: t += j + ' '
                self.text_result.setText(t)
            else:
                t =data+'年'+data2+'月'+ '没有资料\n只有'
                for i in years:
                    t+=i+' '
                t+='的资料'
                self.text_result.setText(t)

        except FileNotFoundError:
            self.text_result.setText('未发现统计文件bg.xlsx')

    def calculate_statistics3(self):  # 统计某月某人的资料
        # 获取输入数据
        data = self.edit_input.currentText()  # 2024
        data2 = self.edit_input2.currentText()  # 12
        data3 = self.edit_input3.currentText()  # name
        try:
            c = pd.read_excel(fpath+'bg.xlsx')
            # print(c)
            print(c.shape)  # (19, 7)
            worddict = {}
            r1 = {}  # 每个类型的批准校核检验人不会重复
            for i in c.iterrows():
                # print(i[0], i[1].tolist())    # ['土', 2024, 'H202400497', '2024-04-27', '品', nan, '陈']
                one = i[1].tolist()
                if one[1] == int(data) and int(one[3][5:7])==int(data2) and data3 in one[4:]:
                    if one[0] in worddict:
                        worddict[one[0]] += 1
                    else:
                        if data3==one[4]:r1[one[0]] = '批准'
                        if data3==one[5]:r1[one[0]] = '校核'
                        if data3==one[6]:r1[one[0]] = '检验'
                        worddict[one[0]] = 1
            print(worddict)
            print(r1)  # 批准校核检验

            if worddict != {}:
                t = data +'年'+data2+'月'+data3+ '共有资料' + str(sum(worddict.values())) + '份,其中'
                for i in worddict:
                    t += '\n\n'
                    # print(i, worddict[i])
                    t += i + ' 资料有' + str(worddict[i]) + '份  '+data3+r1[i]
                self.text_result.setText(t)
            else:
                t = '没有资料'
                self.text_result.setText(t)

        except FileNotFoundError:
            self.text_result.setText('未发现统计文件bg.xlsx')

# if __name__ == '__main__':
def fun3_stat():
    print(fpath)
    app = QApplication(sys.argv)
    statistics_widget = StatisticsWidget()
    statistics_widget.show()
    app.exec()
    # sys.exit(app.exec())

fun3_stat()

在这里插入图片描述

表格
# 9表格结果
import os
from config import fpath,s1,cls,cls1,names,names1 # 分类文件夹目录,类型
names=[i for i in names if i not in names1]  # 去掉歧义的
cls=[i+' '+cls[i] for i in cls.keys() if i not in cls1.keys()]  # 去掉歧义的

import sys
from PyQt6.QtWidgets import QApplication, QWidget, QVBoxLayout, QHBoxLayout,QLabel, QLineEdit, QPushButton, QTextEdit, QGroupBox,QComboBox,QTableWidget,QTableWidgetItem
from PyQt6.QtGui import QIcon, QFont,QColor
from pandas import read_excel

class StatisticsWidget(QWidget):
    def __init__(self):
        super().__init__()
        # 设置窗口标题和初始大小
        self.setWindowTitle('统计界面')
        self.setGeometry(500, 300, 800, 600)
        # 创建布局
        self.vbox = QVBoxLayout()
        # 创建输入区域
        groupbox_input = QGroupBox('') # 输入框
        hbox_input = QHBoxLayout()
        self.edit_input = QComboBox()
        self.edit_input.addItems(['共查','独查','共查独查'])
        hbox_input.addWidget(QLabel('查询模式:'))
        hbox_input.addWidget(self.edit_input)
        groupbox_input.setLayout(hbox_input)
        self.vbox.addWidget(groupbox_input)

        self.edit_input2 = QComboBox()
        self.edit_input2.addItems(s1)
        hbox_input.addWidget(QLabel('选择年:'))
        hbox_input.addWidget(self.edit_input2)

        self.edit_input3 = QComboBox()
        self.edit_input3.addItems(cls)
        hbox_input.addWidget(QLabel('选择类型:'))
        hbox_input.addWidget(self.edit_input3)

        self.et1 = QLineEdit()
        hbox_input.addWidget(self.et1)
        # hbox_input.addWidget(QLabel('编号-'))
        self.et2 = QLineEdit()
        hbox_input.addWidget(self.et2)

        # 创建按钮
        # groupbox_input2 = Qb('')  # 输入框
        hbox_input2 = QHBoxLayout()
        button = QPushButton('统计一年的资料')
        button2 = QPushButton('统计某类型自查情况')
        button3 = QPushButton('统计编号范围的自查')
        hbox_input2.addWidget(button)
        hbox_input2.addWidget(button2)
        hbox_input2.addWidget(button3)
        self.vbox.addLayout(hbox_input2)

        # # 创建文本区域显示结果
        # self.text_result = QTextEdit()
        # self.text_result.setFont(QFont('楷体',14))
        # self.text_result.setReadOnly(True)
        # self.vbox.addWidget(self.text_result)

        # 创建文本区域显示结果-表格
        self.table = QTableWidget()
        self.table.setFont(QFont('楷体', 12))

        self.vbox.addWidget(self.table)
        # 设置主布局
        self.setLayout(self.vbox)

        # 按钮点击事件
        button.clicked.connect(self.calculate_statistics)
        button2.clicked.connect(self.calculate_statistics2)
        button3.clicked.connect(self.calculate_statistics3)

        # 创建按钮
        button = QPushButton('跳转到文件夹')
        button.clicked.connect(self.jump)
        self.vbox.addWidget(button)

        try:
            self.c = read_excel(fpath+'end.xlsx')
            # print(c)
            print(self.c.shape)  # (19, 7)
        except FileNotFoundError:
            # self.text_result.setText('未发现统计文件bg.xlsx')
            self.table.setColumnCount(1)  # 行
            self.table.setRowCount(1)  # 列
            self.table.setItem(0, 0, QTableWidgetItem('未发现统计文件end.xlsx'))
            # 设置表格的大小自适应内容
            self.table.resizeRowsToContents()
            self.table.resizeColumnsToContents()
            self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读

    def jump(self):
        data = self.edit_input.currentText()
        data2 = self.edit_input2.currentText()
        data3 = self.edit_input3.currentText()
        print(data,data2,data3)
        # os.startfile(fpath)  # 报错,可能读共享盘没权限

    def calculate_statistics(self):  # 统计一年的资料
        # 获取输入数据
        data = self.edit_input2.currentText()  # 2024
        try:
            # c = read_excel(fpath+'end.xlsx')
            # print(c)
            # print(c.shape)  # (19, 7)
            years=self.c['年'].drop_duplicates().to_list()  # 获取一列数据无重复
            worddict = {}
            # print(c['年'].drop_duplicates().to_list())
            c1 = self.c[self.c['年'] == int(data)]
            if not c1.empty:
                for i in c1.iterrows():
                    # print(i[0], i[1].tolist())  # $['混凝土', 2024, 'HN202400497', '2024-04-27', '吕品', nan, '陈珂安',['NS111','NS111']]
                    one = i[1].tolist()

                    if one[2] == int(data):
                        # print(one[6],type(one[6]),type(eval(one[6]))) # 检验人[]
                        if one[0] in worddict:
                            worddict[one[0]] += 1
                        else:
                            worddict[one[0]] = 1
                print(worddict)

            if worddict != {}:
                t = data + '共有资料' + str(sum(worddict.values())) + '份,其中'
                for i in worddict:
                    t += '\n'
                    # print(i, worddict[i])
                    t += i + ' 资料有' + str(worddict[i]) + '份'
                # self.text_result.setText(t)

                t=t.split('\n')
                self.table.setColumnCount(1)  # 行
                self.table.setRowCount(len(t))  # 列
                for i in range(len(t)):
                    self.table.setItem(0, i, QTableWidgetItem(t[i]))
                # 设置表格的大小自适应内容
                self.table.resizeRowsToContents()
                self.table.resizeColumnsToContents()
                self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读
            else:
                t =data+ '没有资料\n只有'
                for i in years:
                    t+=str(i)+' '
                t+='年的资料'
                # self.text_result.setText(t)

                self.table.setColumnCount(1) # 行
                self.table.setRowCount(2)  # 列
                self.table.setItem(0,0,QTableWidgetItem(t.split('\n')[0]))
                self.table.setItem(0,1,QTableWidgetItem(t.split('\n')[1]))
                # 设置表格的大小自适应内容
                self.table.resizeRowsToContents()
                self.table.resizeColumnsToContents()
                self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读


        except FileNotFoundError:
            # self.text_result.setText('未发现统计文件bg.xlsx')

            self.table.setColumnCount(1)  # 行
            self.table.setRowCount(1)  # 列
            self.table.setItem(0, 0, QTableWidgetItem('未发现统计文件end.xlsx'))
            # 设置表格的大小自适应内容
            self.table.resizeRowsToContents()
            self.table.resizeColumnsToContents()
            self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读

    def calculate_statistics2(self):  # 统计某年某月的资料
        # 获取输入数据
        data = self.edit_input.currentText()  # 自查
        data2 = self.edit_input2.currentText()  # 2024
        data3 = self.edit_input3.currentText()  # cls
        print(data,data2,data3)
        try:
            # c = read_excel(fpath+'end.xlsx')
            # print(c)
            # print(c.shape)  # (19, 7)
            worddict = []
            c1 = self.c[ (self.c['类型'] == data3.split(' ')[1]) & (self.c['年'] == int(data2))]
            if not c1.empty:
                for i in c1.iterrows():
                    # print(i[0], i[1].tolist())  # $['混凝土', 2024, 'HN202400497', '2024-04-27', '吕品', nan, '陈珂安']
                    one = i[1].tolist()
                    if data == '共查':
                        worddict.append([one[1], int(one[5]), one[6], one[7], one[8], one[13]])
                    if data == '独查':
                        worddict.append([one[1], int(one[5]), one[6], one[7], one[8], one[14]])
                    if data == '共查独查':
                        worddict.append([one[1], int(one[5]), one[6], one[7], one[8], one[13], one[14]])

            if worddict != []:
                t = data2 +'年'+data3+ '共有资料' + str(len(worddict)) + '份,其中'

                self.table.setColumnCount(len(worddict[0]))  # 行
                self.table.setRowCount(len(worddict))  # 列
                if len(worddict[0])==6:
                    self.table.setHorizontalHeaderLabels(["编号","报告日期", "批准人", "校核人", "检验人", data])  # 设置表头
                if len(worddict[0])==7:
                    self.table.setHorizontalHeaderLabels(["编号", "报告日期", "批准人", "校核人", "检验人", "共查",'独查'])  # 设置表头
                for i in range(len(worddict[0])):
                    for j in range(len(worddict)):
                        self.table.setItem(j, i, QTableWidgetItem(str(worddict[j][i][:-1]) if str(worddict[j][i]).endswith('\n') else str(worddict[j][i])))
                        # self.table.item(1,0).setBackground(QColor(233,233,233)) # 背景色,行列和setitem相反
                        if (len(worddict[0])==6 and worddict[j][-1]!='sus'):
                            self.table.item(j,i ).setForeground(QColor(255, 0, 0))
                        if  len(worddict[0])==7 :
                            if worddict[j][-1]!='sus' or worddict[j][-2]!='sus':
                                self.table.item(j,i ).setForeground(QColor(255, 0, 0))
                # 设置表格的大小自适应内容
                self.table.resizeRowsToContents()
                self.table.resizeColumnsToContents()
                self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读

            else:
                t =data2+'年'+data3+ '没有资料'
                # self.text_result.setText(t)
                self.table.setColumnCount(1)  # 行
                self.table.setRowCount(1)  # 列
                self.table.setItem(0, 0, QTableWidgetItem(t))
                # 设置表格的大小自适应内容
                self.table.resizeRowsToContents()
                self.table.resizeColumnsToContents()
                self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读

        except FileNotFoundError:
            # self.text_result.setText('未发现统计文件bg.xlsx')
            self.table.setColumnCount(1)  # 行
            self.table.setRowCount(1)  # 列
            self.table.setItem(0, 0, QTableWidgetItem('未发现统计文件end.xlsx'))
            # 设置表格的大小自适应内容
            self.table.resizeRowsToContents()
            self.table.resizeColumnsToContents()
            self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读

    def calculate_statistics3(self):  # 统计某月某人的资料
        # 获取输入数据
        data = self.edit_input.currentText()  # 自查
        data2 = self.edit_input2.currentText()  # 2024
        data3 = self.edit_input3.currentText()  # cls
        print(data,data2,data3)
        try:
            bh1, bh2 = int(self.et1.text()), int(self.et2.text())
            print(bh1, bh2)
            if bh1>bh2 or bh1<1 or bh2>50000:int('asd')

            try:
                # c = read_excel(fpath + 'end.xlsx')
                # print(c)
                # print(c.shape)  # (19, 7)
                worddict = []
                c1 = self.c[(self.c['类型'] == data3.split(' ')[1]) & (self.c['年'] == int(data2))]
                if not c1.empty:
                    for i in range(bh1,bh2+1):
                        # print(data3.split(' ')[0]+data2+str(i).zfill(5))
                        c2=c1[c1['编号']==data3.split(' ')[0]+data2+str(i).zfill(5)]
                        if  not c2.empty:
                            for j in c2.iterrows():
                                # print(j[0], j[1].tolist())  # $['混凝土', 2024, 'HN202400497', '2024-04-27', '吕品', nan, '陈珂安']
                                one = j[1].tolist()
                                if data == '共查':
                                    worddict.append([one[1], int(one[5]), one[6], one[7], one[8], one[13]])
                                if data == '独查':
                                    worddict.append([one[1], int(one[5]), one[6], one[7], one[8], one[14]])
                                if data == '共查独查':
                                    worddict.append([one[1], int(one[5]), one[6], one[7], one[8], one[13], one[14]])
                        else:
                            worddict.append([data3.split(' ')[0]+data2+str(i).zfill(5),'没数据','','','','','']
                                            if data == '共查独查' else [data3.split(' ')[0]+data2+str(i).zfill(5),'没数据','','','',''])

                # if not c1.empty:
                #     for i in c1.iterrows():
                #         # print(i[0], i[1].tolist())  # $['混凝土', 2024, 'HN202400497', '2024-04-27', '吕品', nan, '陈珂安']
                #         one = i[1].tolist()
                #         if data == '共查':
                #             worddict.append([one[2], one[3], one[4], one[5], ','.join(eval(one[6])), one[-2]])
                #         if data == '独查':
                #             worddict.append([one[2], one[3], one[4], one[5], ','.join(eval(one[6])), one[-1]])
                #         if data == '共查独查':
                #             worddict.append([one[2], one[3], one[4], one[5], ','.join(eval(one[6])), one[-2], one[-1]])

                if worddict != []:
                    t = data2 + '年' + data3 + '共有资料' + str(len(worddict)) + '份,其中'

                    self.table.setColumnCount(len(worddict[0]))  # 行
                    self.table.setRowCount(len(worddict))  # 列
                    if len(worddict[0]) == 6:
                        self.table.setHorizontalHeaderLabels(["编号", "报告日期", "批准人", "校核人", "检验人", data])  # 设置表头
                    if len(worddict[0]) == 7:
                        self.table.setHorizontalHeaderLabels(["编号", "报告日期", "批准人", "校核人", "检验人", "共查", '独查'])  # 设置表头
                    for i in range(len(worddict[0])):
                        for j in range(len(worddict)):
                            self.table.setItem(j, i, QTableWidgetItem(
                                str(worddict[j][i][:-1]) if str(worddict[j][i]).endswith('\n') else str(worddict[j][i])))
                            # self.table.item(1,0).setBackground(QColor(233,233,233)) # 背景色,行列和setitem相反
                            if (len(worddict[0]) == 6 and worddict[j][-1] != 'sus'):
                                self.table.item(j, i).setForeground(QColor(0, 0, 255) if worddict[j][-1] == '' else QColor(255, 0, 0))
                            if len(worddict[0]) == 7:
                                if worddict[j][-1] != 'sus' or worddict[j][-2] != 'sus':
                                    self.table.item(j, i).setForeground(QColor(0, 0, 255) if worddict[j][-1] == '' else QColor(255, 0, 0))
                    # 设置表格的大小自适应内容
                    self.table.resizeRowsToContents()
                    self.table.resizeColumnsToContents()
                    self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读

                else:
                    t = data2 + '年' + data3 + '没有资料'
                    # self.text_result.setText(t)
                    self.table.setColumnCount(1)  # 行
                    self.table.setRowCount(1)  # 列
                    self.table.setItem(0, 0, QTableWidgetItem(t))
                    # 设置表格的大小自适应内容
                    self.table.resizeRowsToContents()
                    self.table.resizeColumnsToContents()
                    self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读

            except FileNotFoundError:
                # self.text_result.setText('未发现统计文件bg.xlsx')
                self.table.setColumnCount(1)  # 行
                self.table.setRowCount(1)  # 列
                self.table.setItem(0, 0, QTableWidgetItem('未发现统计文件end.xlsx'))
                # 设置表格的大小自适应内容
                self.table.resizeRowsToContents()
                self.table.resizeColumnsToContents()
                self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读
        except:
            # self.text_result.setText('未发现统计文件bg.xlsx')
            self.table.setColumnCount(1)  # 行
            self.table.setRowCount(1)  # 列
            self.table.setItem(0, 0, QTableWidgetItem('编号输入错误。比如1-100应该输入1 100'))
            # 设置表格的大小自适应内容
            self.table.resizeRowsToContents()
            self.table.resizeColumnsToContents()
            self.table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)  # 设置表格为只读


# if __name__ == '__main__':
def fun3_stat():
    print(fpath)
    app = QApplication(sys.argv)
    statistics_widget = StatisticsWidget()
    statistics_widget.show()
    app.exec()
    # sys.exit(app.exec())

if __name__ == '__main__':
    fun3_stat()

在这里插入图片描述

内嵌matplotlib
# 10 内置左右两个Matplotlib图
from PyQt6.QtWidgets import QApplication, QMainWindow, QVBoxLayout, QWidget
# pyqt嵌入Matplotlib图表,用 FigureCanvasQTAgg 类来实现
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
class MainWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle('统计界面')

        # 创建一个 Matplotlib 图表
        self.figure = Figure()  # 或plt.figure()
        self.canvas = FigureCanvas(self.figure)

        # 设置窗口布局
        layout = QVBoxLayout()
        layout.addWidget(self.canvas)
        self.setLayout(layout)

        # self.plot_data()
    # def plot_data(self):
        # 绘制图表的代码 折线图
        ax = self.figure.add_subplot(121)
        ax.plot([1, 2, 3, 4], [10, 20, 25, 30])
        ax.set_title('Matplotlib in PyQt')
        ax1= self.figure.add_subplot(122)
        # ax1.plot([1, 2, 3, 4], [10, 20, 25, 30])
        ax1.pie( [10, 20, 25, 30],[0.0,0.0,0.0,0.0], ['HN', 'SS', 'SN', 'HJ'],['blue', 'yellow', 'red', 'green'],)
        ax1.axis('equal') # 等高宽,确保为圆形
        ax1.set_title('Matplot1lib in PyQt')
        # # 更新Matplotlib图形
        # self.canvas.draw()

if __name__ == "__main__":
    app = QApplication([])
    window = MainWindow()
    window.show()
    app.exec()

在这里插入图片描述

跳转
# 11 窗口跳转 一次性
import sys
from PyQt6.QtWidgets import QApplication, QPushButton, QWidget, QVBoxLayout, QMainWindow,QLabel

class FirstWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("First Window")
        self.setGeometry(100, 100, 280, 80)
        self.layout = QVBoxLayout()

        self.button = QPushButton("Go to Second Window", self)
        self.button.clicked.connect(self.open_second_window)
        self.button2 = QPushButton("Go to 2", self)
        self.button2.clicked.connect(self.open_second_window2)
        self.layout.addWidget(self.button)
        self.layout.addWidget(self.button2)
        self.setLayout(self.layout)
    def open_second_window(self):
        # 实例化并显示第二个窗口
        self.second_window = SecondWindow()  # 不写self第二个界面直接没了
        self.second_window.show()
        # self.close()  # 关闭第一个界面。不关也可以
    def open_second_window2(self):
        # 实例化并显示第二个窗口
        self.second_window2 = SecondWindow2()  # 不写self第二个界面直接没了
        self.second_window2.show()
        # self.close()  # 关闭第一个界面。不关也可以

class SecondWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Second Window")
        self.setGeometry(300, 300, 280, 80)
        layout = QVBoxLayout()
        self.setLayout(layout)
        self.label = QLabel("This is the second window.", self)
        layout.addWidget(self.label)

class SecondWindow2(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Second Window2")
        self.setGeometry(300, 300, 280, 80)
        layout = QVBoxLayout()
        self.setLayout(layout)
        self.label = QLabel("This is the second window.", self)
        layout.addWidget(self.label)

if __name__ == "__main__":
    app = QApplication(sys.argv)
    first_window = FirstWindow()
    first_window.show()
    app.exec()
# 12 pyqt界面跳转并返回
import sys
from PyQt6.QtWidgets import QApplication, QPushButton, QWidget, QVBoxLayout

class MainWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Main Window")
        self.setGeometry(100, 100, 280, 200)
        layout = QVBoxLayout()
        self.setLayout(layout)

        self.button = QPushButton("Open Second Window", self)
        self.button.clicked.connect(self.open_second_window)
        layout.addWidget(self.button)

    def open_second_window(self):
        self.second_window = SecondWindow(self)
        self.second_window.show()
        self.hide()  # 隐藏主窗口,self.close()也行

class SecondWindow(QWidget):
    def __init__(self, main_window):
        super().__init__()
        self.setWindowTitle("Second Window")
        self.setGeometry(300, 300, 280, 200)
        layout = QVBoxLayout()
        self.setLayout(layout)

        self.return_button = QPushButton("Return to Main Window", self)
        self.return_button.clicked.connect(lambda: self.return_to_main(main_window))  # 用lambda来传递MainWindow的引用到槽函数中
        layout.addWidget(self.return_button)

    def return_to_main(self, main_window):
        self.close()  # 关闭当前窗口
        main_window.show()  # 显示主窗口

if __name__ == "__main__":
    app = QApplication(sys.argv)
    main_window = MainWindow()
    main_window.show()
    app.exec()
颜色
import sys
sys.path.append('.')
import os
from config import s1
from stat import StatisticsWidget,StatisticsWidget2
from PyQt6.QtWidgets import QApplication, QWidget, QVBoxLayout, QHBoxLayout,QLabel, QLineEdit, QPushButton, QTextEdit, QGroupBox,QComboBox,QSizePolicy
# from PyQt6.QtGui import QIcon, QFont,QPalette,QColor
# pyqt嵌入Matplotlib图表,用 FigureCanvasQTAgg 类来实现
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']  # 指定默认字体,显示中文
class tmz(QWidget):
    def __init__(self):
        super().__init__()

        # 设置窗口标题和初始大小
        self.setWindowTitle('统计界面')
        self.setGeometry(500, 300, 1100, 560)
        self.setStyleSheet("background-color: #E3FFFF;")  # 背景色
        # 创建布局
        self.vbox = QVBoxLayout()
        # 1创建输入区域
        groupbox_input = QGroupBox('') # 输入框
        # 设置输入框的尺寸策略
        groupbox_input.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed)  # 拉窗口上下距离不动,左右可拉长
        hbox_input = QHBoxLayout()
        self.edit_input = QComboBox()
        self.edit_input.addItems(s1)
        hbox_input.addWidget(QLabel('选择年:'))
        hbox_input.addWidget(self.edit_input)
        groupbox_input.setLayout(hbox_input)
        self.vbox.addWidget(groupbox_input)

        self.edit_input2 = QComboBox()
        self.edit_input2.addItems(['1','2','3','4','5','6','7','8','9','10','11','12'])
        hbox_input.addWidget(QLabel('选择月:'))
        hbox_input.addWidget(self.edit_input2)

        button = QPushButton('统计某年某月的资料')
        button.setStyleSheet("background-color: #A8FF6F;")
        hbox_input.addWidget(button)
        # 按钮点击事件
        button.clicked.connect(self.cct)

        # # 2创建文本区域显示结果
        # self.text_result = QTextEdit()
        # self.text_result.setFont(QFont('楷体',14))
        # self.text_result.setReadOnly(True)
        # self.vbox.addWidget(self.text_result)

        # 2创建一个 Matplotlib 图表
        self.figure = Figure()  # 或plt.figure()
        self.figure.set_facecolor('#E3FFFF') # 背景色
        self.canvas = FigureCanvas(self.figure)
        # 设置窗口布局
        self.layout = QVBoxLayout()
        self.layout.addWidget(self.canvas)
        self.vbox.addLayout(self.layout)

        # 3跳转按钮
        self.downb= QHBoxLayout()
        button1 = QPushButton('跳转到界面1')
        button1.clicked.connect(self.jump1)
        button2 = QPushButton('跳转到界面2')
        button2.clicked.connect(self.jump2)
        button1.setStyleSheet("background-color: #FFFF91;")
        button2.setStyleSheet("background-color: #D8C2FF;")
        self.downb.addWidget(button1)
        self.downb.addWidget(button2)
        self.vbox.addLayout(self.downb)

        # 设置主布局
        self.setLayout(self.vbox)

    def jump1(self):
        # 实例化并显示第二个窗口
        self.second_window = StatisticsWidget()  # 不写self第二个界面直接没了
        self.second_window.show()
        # self.close()  # 关闭第一个界面。不关也可以
    def jump2(self):
        # 实例化并显示第二个窗口
        self.second_window2 = StatisticsWidget2()  # 不写self第二个界面直接没了
        self.second_window2.show()
        # self.close()  # 关闭第一个界面。不关也可以
    def cct(self):
        # 获取输入数据
        data = self.edit_input.currentText()  # 2024
        data2 = self.edit_input2.currentText()  # 12

        self.figure.clf()  # 清除画布
        ax = self.figure.add_subplot(121)
        ax.pie([10, 20, int(data)-2000, 30], [0.02, 0.0, 0.0, 0.0], ['e3', 'g5', data, 'mo'],
                ['blue', 'yellow', 'red', 'green'], )
        ax.axis('equal')  # 等高宽,确保为圆形
        ax.set_title('年'+data)
        ax1 = self.figure.add_subplot(122)
        # ax1.plot([1, 2, 3, 4], [10, 20, 25, 30])
        ax1.pie([100, 20, int(data2)*5, 30,2,33], labels=['qwe', 'SSe', data2, 'H2J1','Se1N', 'H2J1'], )
        ax1.axis('equal')  # 等高宽,确保为圆形
        ax1.set_title('月'+data2)
        self.figure.suptitle(data + '年' + data2 + '月')
        # # 更新Matplotlib图形
        self.canvas.draw()

if __name__ == '__main__':
    # fun3_stat()
    app = QApplication(sys.argv)
    statistics_widget = tmz()
    statistics_widget.show()
    app.exec()
    # sys.exit(app.exec())

在这里插入图片描述

打包程序

pyinstaller打包成exe程序,是把解释器和import的一些依赖一起打包
在cmd运行 pyinstaller --onefile(-F) script.py ,打包成一个大的exe
pyinstaller --onedir(-D) script.py,打包成文件夹-默认,文件夹运行比exe快一些
-i "icon.ico"  设置打包后的exe图标
-w 打包后运行隐藏cmd控制台,-c 不隐藏

打包完成,dist里是程序,build文件夹不需要

运行闪退,把程序拖到cmd里执行就能看到报错
运行报错not found in PYZ,我把所有相对路径改为绝对路径就好了
运行报错没找到包或包的方法错误,缺什么就把对应的包复制到dist下的__internal就行
运行找不到资源-图片文件,要吧图片复制到和exe在一起的文件夹

打包太大了,很多没用的包,试了虚拟环境、钩子什么的 累,还是用手删吧 更快,保留程序import包的文件夹 保留使程序运行的最低限度
### PDF文档识别的技术方法与工具 #### 一、PDF文档识别的核心技术 光学字符识别(Optical Character Recognition, OCR)是用于从图像或扫描文件中提取可编辑和可搜索数据的主要技术[^1]。OCR 技术通过分析输入的图片或扫描件,将其转换为机器编码文本。对于PDF文件而言,尤其是那些由扫描仪生成的仅含图像的PDF文件,OCR 是必不可少的一环。 #### 二、自动化工具的功能特点 为了应对企业级需求,一些专门针对PDF文件设计的工具被开发出来。这些工具不仅支持基本的文字提取功能,还具备更高级的能力,比如自动重命名文件以及分类管理等功能。例如,在某些应用环境中,用户可以选择包含多个PDF文件的目录并通过单击按钮来启动整个处理流程[^2]。 #### 三、具体操作过程概述 当使用基于OCR 的软件解决方案时,通常会经历以下几个方面的工作流描述: - **选择目标路径**:指定待处理的PDF 文件所在的存储位置; - **触发处理机制**:"Process PDFs" 动作一旦被执行,则意味着系统开始逐一对选定范围内的每一个PDF 进行解析工作; - **执行核心算法**:在此阶段内部实现了复杂的模式匹配逻辑以便准确获取所需信息片段;最后完成相应的动作如更改名称等附加任务。 #### 四、实际应用场景举例说明 假设某公司拥有大量历史存档资料需要迁移到新的信息系统当中去,那么借助于上述提到过的那种集成了先进OCR引擎并且易于部署使用的桌面端或者云端服务形式的产品将会极大地提高工作效率减少人工干预成本的同时也保证了数据质量达到预期效果。 ```python import pytesseract from pdf2image import convert_from_path def extract_text_from_pdf(pdf_path): pages = convert_from_path(pdf_path, dpi=300) text = "" for page in pages: txt = pytesseract.image_to_string(page) text += txt return text pdf_file = 'example.pdf' extracted_content = extract_text_from_pdf(pdf_file) print(extracted_content) ``` 以上是一个简单的Python脚本例子,它展示了如何利用`pytesseract`库配合`pdf2image`模块将PDF文件转成图片再做OCR识别的过程。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值