常用的python工具方法积累汇总,避免重复造轮子
# -*- coding: utf-8 -*-
# ******************************************** #
# Cause i know you are my polaris. - 星尘 #
# ******************************************** #
import os,re,shutil,time, configparser
import xml.dom.minidom
from typing import Dict, List
from fnmatch import fnmatch, fnmatchcase
# 第三方库
# ------------------------------------------ #
try:
import xlrd
except:
os.system('pip install xlrd')
os.system('python -m pip install xlrd')
import xlrd
try:
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Font, PatternFill
except:
os.system('pip install openpyxl')
os.system('python -m pip install openpyxl')
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Font, PatternFill
# ------------------------------------------ #
# 处理表格文件
class MyTable:
def __init__(self):
self._isBold = False
self._fontColor = "000000"
self._bgColor = "FFFFFF"
#加载xlsx文件
def LoadXLSXFile(self, strPath: str, sheetName: str=""):
'''
参数:
strPath: 文件路径
sheetName: 表名 注:为空时默认取第一张表
返回
listData: 字典列表
'''
try:
workBookObj = load_workbook(strPath)
sheetObj = None
if "" == sheetName:
sheetObj = workBookObj.worksheets[0]
elif sheetName in workBookObj.sheetnames:
sheetObj = workBookObj[sheetName]
else:
print("sheetName is not exist")
return []
except:
print("can not load file")
return []
# 取列名
listTabColName = []
iColNameIndex = sheetObj.min_row # 第一行数据当作列名
for cellItem in sheetObj[iColNameIndex]:
listTabColName.append(str(cellItem.value))
# 保存数据
listData = [] # 最终获取到的数据结果
mergeArea = sheetObj.merged_cells
iMinRow = sheetObj.min_row + 1 # 跳过首行列名
iMinCol = sheetObj.min_column
iMaxRow = sheetObj.max_row
iMaxCol = sheetObj.max_column
for iRow in range(iMinRow, iMaxRow+1):
dictRowData = {} # 遍历行时保存的单行字典(key为列名)
for iCol in range(iMinCol, iMaxCol+1):
iRowTmp = iRow
iColTmp = iCol
# 判断是否处在合并区间内
for mergeItem in mergeArea:
if mergeItem.min_row <= iRow <= mergeItem.max_row and mergeItem.min_col <= iCol <= mergeItem.max_col:
# 替换为合并区间的左上角坐标索引
iRowTmp = mergeItem.min_row
iColTmp = mergeItem.min_col
break
# 保存单行数据
if iCol > len(listTabColName):
continue
content = sheetObj.cell(iRowTmp, iColTmp).value
if content is None:
dictRowData[listTabColName[iCol-1]] = ''
else:
dictRowData[listTabColName[iCol-1]] = str(content)
# 存储行字典
listData.append(dictRowData)
return listData
#加载xls文件 (excel 2007以前)
def LoadXLSFile(self, strPath: str, sheetName: str=""):
'''
参数:
strPath: 文件路径
sheetName: 表名 注:为空时默认取第一张表
返回
listData: 字典列表
'''
try:
workBookObj = xlrd.open_workbook(strPath, formatting_info=True)
sheetObj = None
if "" == sheetName:
sheetObj = workBookObj[0]
elif sheetName in workBookObj._sheet_names:
sheetObj = workBookObj.sheet_by_name(sheetName)
else:
return []
except:
return []
# 取列名
listTabColName = []
iColNameIndex = 0 # 第一行数据当作列名
obj = sheetObj[0]
for cellItem in sheetObj[iColNameIndex]:
listTabColName.append(str(cellItem.value))
# 保存数据
listData = [] # 最终获取到的数据结果
mergeArea = sheetObj.merged_cells
iMinRow = 1 # 跳过首行列名
iMinCol = 0
iMaxRow = sheetObj.nrows
iMaxCol = sheetObj.ncols
for iRow in range(iMinRow, iMaxRow):
dictRowData = {} # 遍历行时保存的单行字典(key为列名)
for iCol in range(iMinCol, iMaxCol):
iRowTmp = iRow
iColTmp = iCol
# 判断是否处在合并区间内
for mergeItem in mergeArea:
if mergeItem[0] <= iRow < mergeItem[1] and mergeItem[2] <= iCol < mergeItem[3]:
# 替换为合并区间的左上角坐标索引
iRowTmp = mergeItem[0]
iColTmp = mergeItem[2]
break
# 保存单行数据
if iCol >= len(listTabColName):
continue
content = sheetObj.cell(iRowTmp, iColTmp).value
if content is None:
dictRowData[listTabColName[iCol]] = ''
else:
dictRowData[listTabColName[iCol]] = str(content)
# 存储行字典
listData.append(dictRowData)
return listData
def LoadExcelSheetName(self, strPath: str):
try:
workBookObj = xlrd.open_workbook(strPath, formatting_info=True)
return workBookObj._sheet_names
except:
try:
workBookObj = load_workbook(strPath)
return workBookObj.sheetnames
except:
return []
# 加载excel文件
def LoadExcelFile(self, strPath: str, sheetName: str=""):
'''
参数:
strPath: 文件路径
sheetName: 表名 注:为空时默认取第一张表
返回
listData: 字典列表
'''
try:
load_workbook(strPath)
return self.LoadXLSXFile(strPath, sheetName)
except:
try:
xlrd.open_workbook(strPath)
return self.LoadXLSFile(strPath, sheetName)
except:
return []
def LoadTxt(self, strTxt: str, linkTag: str = "\t", isExistHead: bool = True):
if "" == strTxt:
return []
# 保存列名
listFirstLineByKeyName = []
# 保存数据
listData = []
linesData = strTxt.split("\n")
for lineItem in linesData:
if "" == lineItem.strip():
continue
pass
lineItem = lineItem.replace("\r", "")
lineItem = lineItem.replace("\n", "")
if 0 == len(listFirstLineByKeyName) and True == isExistHead:
listFirstLineByKeyName = lineItem.split(linkTag)
continue
dictLineData = {}
arrLineItem = lineItem.split(linkTag)
if True == isExistHead:
for iIndex in range(0, len(listFirstLineByKeyName)):
strVal = ""
if iIndex < len(arrLineItem):
strVal = arrLineItem[iIndex]
dictLineData[listFirstLineByKeyName[iIndex]] = strVal
else:
for iIndex in range(0, arrLineItem):
dictLineData[str(iIndex)] = arrLineItem[iIndex]
listData.append(dictLineData)
return listData
# 加载文本文件
def LoadTxtFile(self, strPath: str, linkTag: str = "\t", isExistHead: bool = True):
'''
参数:
strPath: 文件路径
linkTag: 数据连接符
isExistHead: 是否将第一行当作表头处理
返回
listData: 字典列表
'''
try:
fileObj = open(strPath, "r", encoding="utf-8")
strContent = fileObj.read()
fileObj.close()
except:
return []
listData = self.LoadTxt(strContent, linkTag, isExistHead)
return listData
# 设置保存excel的表头样式
def SetFontStyle(self, isBold=False, fontColor="000000"):
self._isBold = isBold
self._fontColor = fontColor
def SetFillStyle(self, bgColor="FFFFFF"):
self._bgColor = bgColor
# 保存文件
def SaveXLSXFile(self, listData: List[Dict[str, str]], strPath = "", sheetName = "", isReplaceSheet = True):
'''
参数:
listData: 字典列表(注: 列表的key会加入表头)
strPath: 文件路径
sheetName: 表名
isReplaceSheet: 是否覆盖已存在的表
返回
True : False
'''
try:
if os.path.exists(strPath):
workbookObj = load_workbook(strPath)
else:
workbookObj = Workbook()
workbookObj.remove(workbookObj["Sheet"])
except:
return False
if "" == sheetName:
sheetNewObj = workbookObj.create_sheet()
elif sheetName not in workbookObj.sheetnames:
sheetNewObj = workbookObj.create_sheet(sheetName)
elif True == isReplaceSheet:
workbookObj.remove(workbookObj[sheetName])
sheetNewObj = workbookObj.create_sheet(sheetName)
else:
print("sheetName is alread exist")
return False
listKeyName = []
for iColIndex in range(0, len(listData)):
dataItem = listData[iColIndex]
for keyName in dataItem:
if keyName not in listKeyName:
listKeyName.append(keyName)
iRowIndex = listKeyName.index(keyName)
sheetNewObj.cell(iColIndex+2, iRowIndex+1).value = dataItem[keyName]
# 表头样式
headFontStyle = Font(bold=self._isBold, color=self._fontColor)
headFillStyle = PatternFill(start_color=self._bgColor)
# sheetNewObj.insert_rows(1)
for iIndex in range(0, len(listKeyName)):
sheetNewObj.cell(1, iIndex+1).font = headFontStyle
sheetNewObj.cell(1, iIndex+1).fill = headFillStyle
sheetNewObj.cell(1, iIndex+1).value = listKeyName[iIndex]
while(True):
try:
workbookObj.save(strPath)
except:
print("-------------------------")
print("Error:文件保存失败,请检查文件是否正在被excel打开("+strPath+")", flush=True)
print("是否重试?(Y/N)", flush=True)
inRes = input()
if 'Y' == inRes:
continue
else:
print("文件保存失败", flush=True)
break
return True
# 数据分组 相同值合并
def GetGroupDataByTabName(self, listData: List[Dict[str, str]], colName: str = "", splitChar: str = ""):
'''
参数:
listData: 字典列表(注: 列表的key会加入表头)
colName: 字典索引
strRefDefRefTxt: 切割字符(默认不切割)
返回
dictData: 列表字典
'''
dictData = {}
for dataItem in listData:
if 0 == len(dataItem) or colName not in dataItem:
continue
colNameItems = [colName]
if "" != splitChar:
colNameItems = dataItem[colName].split(splitChar)
for colNameItem in colNameItems:
if colNameItem not in dictData:
dictData[colNameItem] = []
dictData[colNameItem].append(dataItem)
return dictData
# 数据过滤
# 注:Condition可以设置一行判断逻辑语句,最终会当作python语句执行,值变量为%val% (字符串类型)
# 例:查找listData中Name包含‘_’的项 =》 FilterDataByTabName(listData, "Name", "'_' in %val%")
def FilterDataByTabName(self, listData: List[Dict[str, str]], colName: str = "", Condition: str="True"):
'''
参数:
listData: 字典列表(注: 列表的key会加入表头)
colName: 字典索引
Condition: 过滤条件 注:值变量使用%val%参数
返回
listFilterData: 字典列表
'''
listFilterData = []
for dataItem in listData:
if 0 == len(dataItem) or colName not in dataItem:
continue
itemVal = "'" + dataItem[colName] +"'"
ConditionTmp = Condition
ConditionTmp = ConditionTmp.replace("%val%", itemVal)
try:
if True == eval(ConditionTmp):
listFilterData.append(dataItem)
except:
pass
return listFilterData
# class Table
# 文件目录操作
class MyFile:
def __init__(self):
pass
# 判断文件类型是否满足
def IsTrueFileType(self, strFile: str, fileType: set={"*.*"}, isStrictCase: bool = False):
'''
参数:
strFile: 文件路径
fileType: 文件类型集合 注:支持通配符
isStrictCase: 是否严格匹配大小写
返回
True : False
'''
if False == os.path.isfile(strFile):
return False
strFile = strFile.replace("/", "\\")
strFileName = strFile[strFile.rfind("\\")+1:]
funcMatch = None
if True == isStrictCase:
funcMatch = fnmatchcase
else:
funcMatch = fnmatch
for typeItem in fileType:
if True == funcMatch(strFileName, typeItem):
return True
return False
# 遍历目录获取文件列表
def GetFileList(self, strRootDir: str, fileType: set={"*.*"}, isFindChildDir: bool = True, isStrictCase: bool = False):
'''
参数:
strRootDir: 目录路径
fileType: 文件类型集合 注:支持通配符
isFindChildDir: 是否查找子目录
isStrictCase: 是否严格匹配大小写
返回
fileList: 文件列表
'''
fileList = []
strRootDir = strRootDir.replace("/", "\\")
if True == os.path.isfile(strRootDir):
if True == self.IsTrueFileType(strRootDir, fileType, isStrictCase):
fileList.append(strRootDir)
return fileList
if os.path.isdir(strRootDir):
for item in os.listdir(strRootDir):
file = os.path.join(strRootDir, item)
if True == os.path.isdir(file) and False == isFindChildDir:
continue
fileList.extend(self.GetFileList(file, fileType, isFindChildDir, isStrictCase))
self.strDirPath = strRootDir
self.fileList = fileList
return fileList
# 批量拷贝文件到指定目录
def CopyFileList2Dir(self, fileList: List[str], strTargetDir: str):
'''
参数:
fileList: 文件列表
strTargetDir: 目标目录
返回
void
'''
strTargetDir = os.path.join(strTargetDir, ".\\")
if False == os.path.isdir(strTargetDir):
return
for file in fileList:
try:
shutil.copy(file, os.path.dirname(strTargetDir))
except:
print("copy fail: '%s' to '%s'"%(file, strTargetDir))
pass
def CopyFileList2Makedirs(self, fileList: List[str], strTargetDir: str, strRelativeDir: str = ""):
'''
参数:
fileList: 文件列表
strTargetDir: 目标目录
strRelativeDir: 文件列表相对根目录
返回
void
'''
strTargetDir = os.path.join(strTargetDir, ".\\")
strRelativeDir = os.path.join(strRelativeDir, ".\\")
if False == os.path.isdir(strTargetDir) or False == os.path.isdir(strRelativeDir):
return
for file in fileList:
relFile = os.path.relpath(file, strRelativeDir)
newFile = os.path.join(strTargetDir, relFile)
newDir = os.path.dirname(newFile)
if False == os.path.isdir(newDir):
os.makedirs(newDir)
try:
shutil.copy(file, newDir)
except:
print("copy fail: '%s' to '%s'"%(file, newDir))
# 删除目录及子目录和文件
def RemoveDir(self, strDir: str):
'''
参数:
strDir: 目标目录
返回
void
'''
if os.path.isfile(strDir):
os.remove(strDir)
return
if os.path.isdir(strDir):
for item in os.listdir(strDir):
file = os.path.join(strDir, item)
if os.path.isfile(file):
os.remove(file)
continue
if 0 == len(os.listdir(file)):
os.rmdir(file)
else:
self.RemoveDir(file)
if 0 == len(os.listdir(strDir)):
os.rmdir(strDir)
pass
@staticmethod
def Log(strLogContent: str="", strLogFile: str="log.log"):
strLogContent = time.strftime("[%Y/%m/%d %H:%M:%S]\t", time.localtime()) + strLogContent + "\n"
fileObj = open(strLogFile, "a", encoding="utf-8")
fileObj.write(strLogContent)
fileObj.close()
pass
# class MyFile:
class MyXML:
def __init__(self) -> None:
self.xmldoc = None
pass
def PrettyXML(self, strText = "", strIndent:str = " ") -> str:
isAddRoot = False
strXMLHead = '<?xml version="1.0" ?>'
strKeyTag = '<?xml'
strRootTmp = "ROOT_TO_PRETTY"
listLines = strText.split("\n")
strTextTmp = ""
for lineItem in listLines:
strTextTmp += lineItem.strip()
strText = strTextTmp
strText = strText.replace("<?xml", "<!-- <?xml")
strText = strText.replace("?>", "?> -->")
if 0 != strText.find(strKeyTag):
isAddRoot = True
strText = "<" + strRootTmp + ">" + strText + "</" + strRootTmp + ">"
strText = strXMLHead + strText
xmlText = xml.dom.minidom.parseString(strText)
strText = xmlText.toprettyxml(indent=strIndent)
strText = strText.replace("<" + strRootTmp + ">" , "")
strText = strText.replace("</" + strRootTmp + ">" , "")
strText = strText.replace("<" + strRootTmp + "/>" , "")
strText = strText.replace(strXMLHead, "")
if True == isAddRoot:
strText = strText.replace("\n"+strIndent, "\n")
strText = strText.replace("<!-- <?xml", "<?xml")
strText = strText.replace("?> -->", "?>")
strText = strText.strip()
return strText
def PrettyXMLFile(self, strFile = "") -> None:
if False is os.path.isfile(strFile):
return
fileObj = open(strFile, "r", encoding="utf-8")
strText = fileObj.read()
fileObj.close()
strText = self.PrettyXML(strText)
fileObj = open(strFile, "w", encoding="utf-8")
fileObj.write(strText)
fileObj.close()
# 字符串处理
class MyStr:
def __init__(self):
pass
# 按有效空白符切割,并返回列表
def MySpiltByBlank(self, strTarget: str=""):
'''
参数:
strTarget: 目标字符串
返回
listData: 切割后的列表数组
'''
strTmp = strTarget.strip()
listData = []
strTmp = strTmp.replace("\t", " ")
while(" " in strTmp):
iPos = strTmp.find(" ")
strItem = strTmp[0:iPos]
strTmp = strTmp[iPos:]
strTmp = strTmp.strip()
listData.append(strItem)
listData.append(strTmp)
return listData
# 删除开头为strRemove的字符串
def RemoveBeginStr(self, strTarget: str="", strRemove: str=""):
if 0 == strTarget.find(strRemove):
strTarget = strTarget[len(strRemove):]
return strTarget
# 删除结尾为strRemove的字符串
def RemoveEndStr(self, strTarget: str="", strRemove: str=""):
if strTarget.endswith(strRemove):
strTarget = strTarget[0:0-len(strRemove):]
return strTarget
# 正则表达式提取指定开头和结尾包含的内容,返回列表
def GetContentBetweenStartAndEnd(self, strTarget: str="", strStart: str="", strEnd: str="", isLine: bool=False):
'''
参数:
strTarget: 目标字符串
strStart : 起始字符串
strEnd : 结尾字符串
返回
listData: 匹配到的内容列表
备注:
使用正则表达式匹配
'''
listData = []
if "" == strStart or "" == strEnd:
listData.append(strTarget)
return listData
strTmp = "(?<={start})(.*?)(?={end})".format(start=strStart, end=strEnd)
if False is isLine:
strTmp = "(?<={start})((.|\r|\n)*?)(?={end})".format(start=strStart, end=strEnd)
pattern = re.compile(strTmp)
listData = pattern.findall(strTarget)
return listData
# class MyStr:
# ini文件
class MyConfigParser(configparser.ConfigParser):
def __init__(self):
super().__init__()
def optionxform(self, optionstr):
# 重写optionxform方法, 保留参数大小写
return optionstr
def LoadConfigIni(self, strFile:str = "", encoding: str = "utf-8")->dict[str, dict[str, str]]:
'''
参数:
strFile : 配置文件ini
encoding : 配置文件编码(默认utf-8)
返回
dictdata : 配置文件字典
备注:
'''
dictdata = {}
if not os.path.exists(strFile):
return dictdata
self.read(strFile, encoding)
for sectionItem in self.sections():
dictSubdata = {}
for itemset in self.items(sectionItem):
itemKey = itemset[0]
itemVal = itemset[1]
dictSubdata[itemKey] = itemVal
dictdata[sectionItem] = dictSubdata
return dictdata
# class MyConfigParser(configparser.ConfigParser):