1.环境条件:python 3.5.2 版本;pandas 0.25.3 ;xlrd 1.1.0
2.由两个python文件组成,
类文件:classreatewriteexcel.py
主文件:main_fanka_total.py
3.辅助文件:
节假日文件:csv_condition_nonworkdays.txt
里面包含每一年的节假日和周末日期
摘选条件日期:人员名单_模板.xlsx
4.生成效果:
5.资源连接:https://download.youkuaiyun.com/download/qq_35871505/86747635
classreatewriteexcel.py
# __author__ = 'lzc'
# -*- coding: UTF-8 -*-
import os
import pandas as pd
import datetime
import time
from datetime import datetime
class excel_class():
"""
excelcalss 用于读写excel操作,需要安装插件:pandas、xlrd、openpyxl
"""
def __init__(self):
#全局结果字符串,日志结果
res_str=""
self.res_str= res_str
res_count = 0
self.res_count = res_count
def read_excel(self, path):
# 读取excel文件,返回一个标头list和values二维list
df = pd.read_excel(path)
# print (df)
data_head = df.columns.tolist()
data_value = df.values
return data_head, data_value
def write_excel(self, path, head, data):
# 将list写入excel
df = pd.DataFrame(data, columns=head)
df.to_excel(path, index=False, header=True)
self.res_str = self.res_str+"摘选并生成:"+path+"\n"
# print('写excel完成!')
def read_writ_excel(self, path, condition):
#单个条件摘选
# 读excel
flag=0#符合跳进标识
data_head, data_value = self.read_excel(path)
listdate = [] # 收集数据list
for list in data_value:
string = "".join(str(list))#转换成字符串
if condition in string:
# print (string)
listdate.append(list)
flag=1
if flag==1:
self.res_str = self.res_str+"符合条件:"+path+"\n"
if listdate: # 判断是否为空值
# 写excel
file, ext = self.split_url(path)
write_path = file + "_" + condition + ".xlsx"
self.write_excel(write_path, data_head, listdate)
def split_url(selt, url):
# 分离文件和拓展名
file, ext = os.path.splitext(url)
# print (file)
# print (ext)
return file, ext
def juede_folder(self, path):
# 判断是路径还是文件,是目录返回1,是excel文件返回0,其他文件返回2,未找到路径返回3
if os.path.isdir(path):
return 1
elif os.path.isfile(path):
file, ext = os.path.splitext(path)
if ext in ['.xlsx', '.xls']:
return 0
else:
# print ("非excel文件,请检查。")
return 2
else:
return 3
def get_file_list(self, path):
# 返回excel文件的list
filelist = []
for dirpath, dirnames, filenames in os.walk(path):
# print (dirpath)
# print (dirnames)
for filename in filenames:
filedir = os.path.join(dirpath, filename)
file, ext = os.path.splitext(filedir)
if ext in ['.xlsx', '.xls']:
filelist.append(filedir)
# print (filedir)
return filelist
# 将时间转换成横杠形式,先转换成时间戳,然后再转换成所需字符串
def time_str_format(self,str):
if '-' in str:
return str
else:
timeC = self.str_to_time(str)
timeArray = time.localtime(timeC)
timeformat = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
return timeformat
# 时间字符串变为时间戳
def str_to_time(self,str):
format = "%Y/%m/%d %H:%M"
time_array = time.strptime(str, format)
time_stamp = int(time.mktime(time_array))
return time_stamp
# 判断是否在项目期间
def jub_project_time(self,date):
project1_start = '2017/12/01 00:00:00'
project1_stop = '2019/5/31 23:59:00'
project2_start = '2020/12/01 00:00:00'
project2_stop = '2021/2/28 23:59:00'
p1_start = self.str_to_time(project1_start)
p1_stop = self.str_to_time(project1_stop)
p2_start = self.str_to_time(project2_start)
p2_stop = self.str_to_time(project2_stop)
date_stamp = self.str_to_time(self,date)
if date_stamp >= p1_start and date_stamp <= p1_stop:
return True
elif date_stamp >= p2_start and date_stamp <= p2_stop:
return True
else:
return False
# 判断文件夹是否存在,不存在建立文件夹
def judge_folder(self,folder):
if not os.path.exists(folder):
os.makedirs(folder)
# 读取txt文档参数,返回list
def open_text_file(self,path):
list = []
with open(path, 'r', encoding='utf-8', ) as f:
for line in f.readlines():
line = line.strip('\n') # 去掉每行带出来了 \n
line = line.strip(' ') # 去掉空行
list.append(str(line))
# print (list)
return list
#计算两个日期的月份差距,返回yyyy-mm日期list
def countdatemonth(self,startdate,enddate):
list=[] #用于收集日期字符串:yyyy-mm
# startdate='2019-01-25'
# enddate='2021-12-17'
year_end = datetime.strptime(enddate,'%Y-%m-%d').year
month_end =datetime.strptime(enddate,'%Y-%m-%d').month
year_start = datetime.strptime(startdate, '%Y-%m-%d').year
month_start = datetime.strptime(startdate, '%Y-%m-%d').month
# print(year_end, month_end, year_start, month_start)
value = (year_end-year_start)*12 + (month_end-month_start)
for i in range(0,value+1):
month_add=month_start + i
month_new=month_add % 12
year_add = month_add // 12
year_new = year_start + year_add
if month_new==0:
month_new=12
year_new=year_new-1
if month_new >=10:
datestr = str(year_new) + '-' + str(month_new)
else:
datestr=str(year_new)+'-0'+str(month_new)
# print (datestr)
list.append(datestr)
return list
#返回日志结果
def return_res(self):
# print (self.res_str)
return self.res_str
main_fanka_total.py
# __author__ = 'lzc'
# -*- coding: UTF-8 -*-
from classreatewriteexcel import *
from collections import OrderedDict
import pandas as pd
def select_content_toexcel(path,path_model):
'''
思路:1.以名字-日期开头制作一个list,2.以名字-原始数据制作一个词典,3.list和dict进行比对,统计数据
支持单个文件摘选,也支持多个文件摘选
:param path原始数据路径,path_model:统计模板的路径
:return: res,日志记录
'''
ex = excel_class() #实例化类
# 非工作日
nonworkdays_list = ex.open_text_file('csv_condition_nonworkdays.txt')
# 31天数
# row_daylist = ex.open_text_file('csv_condition_day_row.txt')
row_daylist = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15',
'16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31']
#判断是路径还是文件,是目录返回1,是excel文件返回0,其他文件返回2,未找到路径返回3
juebe = ex.juede_folder(path)
data_list_extend =[] #所有数据的集合
# head_list_extend=[] #头文件的全局变量
if juebe == 1:#也支持多个文件摘选
# print(path+"非excel文件,是路径")
filelist = ex.get_file_list(path)
for file in filelist:
if ex.juede_folder(file) == 0:
# print (file)
head,value=ex.read_excel(file) # 读excel-原始数据
data_list_extend.extend(value) #收集所有数据
else:
print(file,"is not excel file")
elif juebe == 0: #支持单个文件摘选
# 读excel-原始数据
data_head, data_value = ex.read_excel(path)
data_list_extend.extend(data_value) #收集所有数据
# head_list_extend.extend(data_head)
elif juebe == 2:
return ("非excel文件,请检查。")
elif juebe == 3:
return ("未找到该路径:" + path)
# 读取条件-- 统计表模板
# 读excel
head_model, value_model = ex.read_excel(path_model)
# 名字集合,收集模板所有的人名
condition_name_list = [] # 名字集合
#收集模板所有的人名
for list in value_model:
condition_name = list[0] #人名
if condition_name not in condition_name_list:
condition_name_list.append(condition_name)
#以人名建立词典,制作人名-日期的字典
dic_day_list = {name: [] for name in condition_name_list}
# print (dic_day_list)
# 制作人名-日期的字典
for list in value_model:
condition_name = list[0] #人名
startdate=str(list[1])[0:10] #开始时间
enddate = str(list[2])[0:10] #结束时间
# print (startdate)
list = ex.countdatemonth(startdate,enddate)
for datestr in list:
dic_day_list[condition_name].append(datestr)
# print (datestr)
# # 对字典进行排序
dic_day_list = OrderedDict(sorted(dic_day_list.items(), key=lambda d: d[0], reverse=True))
# print (dic_day_list)
#以人名建立词典,用于收集符合人名的数据
dic_data_list = {name:[] for name in condition_name_list}
#以名字的字典 dic_nam_list,统计所有符合名字的数据
for datalist in data_list_extend:
dataliststr="".join(str(datalist))
for name in condition_name_list:
if name in dataliststr:
dic_data_list[name].append(dataliststr)
# # 对字典进行排序
dic_data_list = OrderedDict(sorted(dic_data_list.items(), key=lambda d: d[0], reverse=True))
# print (dic_data_list)
export_list = [] #收集输出数据list
listmpt=""
#两个词典数据比对
for key_model, value_model in dic_day_list.items(): # 遍历字典-模板
for key_name, value_list in dic_data_list.items(): # 遍历字典,key_name-所有的姓名,value_list-打卡日期list
value_list = "".join(str(value_list)) #转换成字符串,set(),去掉重复项
#判断key是否相同
if key_model in key_name:
for month in value_model:
list = []
firtcol = str(key_name) # 第一列
list.append(firtcol) # 第一列
seccol = str(month) # 第二列
list.append(seccol) # 第二列
# 循环天数
for day in row_daylist:
daystr = month + "-" + day # 日期字符串
if daystr in nonworkdays_list: # 如果属于非工作日
if daystr in value_list: # 项目的日期是否在timelist中
list.append("加班") # 统计周末加班
else:
list.append("/") # 统计周末
else:
if daystr in value_list: # 项目的日期是否在timelist中
list.append("钉钉") # 统计工作日打饭卡
else:
list.append("缺卡") # 统计工作未打饭卡
# print (list)
export_list.append(list)
export_list.append(listmpt)#添加一行空格
#写excel
if juebe == 1:
write_path = str(path) + "/01统计结果.xlsx"
ex.write_excel(write_path, head_model, export_list)
else:
file, ext = ex.split_url(path)
write_path = file + "_统计结果.xlsx"
ex.write_excel(write_path, head_model, export_list)
res=ex.return_res()
return res
if __name__ =="__main__":
#源文件路径
path ='原始记录.xlsx'
# path = '钉钉打卡'
#模型路径
path_model = '人员名单_模板.xlsx'
res=select_content_toexcel(path,path_model)
print (res)