1.python切分excel
"""
@Project :pythonProject
@File :splitFile.py
@IDE :PyCharm
@Author :alice
@Date :2025/3/21 13:48
"""
import pandas as pd
import os
def split_excel(input_path, output_prefix,total_num,single_one,num_columns):
"""
分割大型Excel文件
参数:
input_path: 输入文件路径
output_prefix: 输出文件前缀
"""
output_dir = os.path.dirname(output_prefix)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
if(len(num_columns)==0):
reader = pd.read_excel(input_path, sheet_name='Sheet1',dtype=str)
else:
reader = pd.read_excel(input_path, sheet_name='Sheet1', dtype=num_columns)
num = 0
for i in range(0,total_num,single_one):
new_data = reader.iloc[i:i+single_one]
new_data.to_excel(output_prefix+str(num)+'.xlsx', index=False)
print(f"文件分割完成!生成 {output_prefix+str(num)+'.xlsx'}")
num += 1
print("分割完成!!!")
if __name__ == "__main__":
total_num = 80535
single_one = 30000
input_file = "/Users/zhaotongxue/Downloads/sss.xlsx"
output_prefix = "/Users/zhaotongxue/Desktop/ss/前缀"
num_columns = {}
split_excel(input_file, output_prefix,total_num,single_one,num_columns)
2.python检查excel输出每列最大长度[目录下所有文件 or 目录下每个文件]
"""
@Project :pythonProject
@File :fileUtils.py
@IDE :PyCharm
@Author :alice
@Date :2024/7/18 10:10
"""
import os
import openpyxl
import pandas as pd
class FileUtils:
def __init__(self, path):
self.path = path
def get_files(self):
files_path = []
for root, dirs, files in os.walk(self.path):
files = [file for file in files if not file.startswith(".")]
for file in files:
files_path.append(os.path.join(root, file))
return files_path
def showMaxLen(self, dic, skp,sheet_name,type):
global data
print("文件名称:"+dic)
if type == "xlsx":
data = pd.read_excel(dic,sheet_name=sheet_name, skiprows=skp)
if type == "csv":
data = pd.read_csv(dic,skiprows=skp)
for i, col in enumerate(data.columns.tolist()):
data[col] = data[col].astype(str)
n_max = data[col].str.len().max()
print(i, col, n_max)
def showMaxLen2(self, dic, skp,type):
num = 0
cols = {}
for paths in dic:
num = num + 1
if type=="xlsx":
data = pd.read_excel(paths, skiprows=skp)
elif type=="csv":
data = pd.read_csv(paths, skiprows=skp)
for i, col in enumerate(data.columns.tolist()):
data[col] = data[col].astype(str)
n_max = data[col].str.len().max()
if cols.get(col) is not None and cols.get(col) < n_max:
cols.update({col: n_max})
elif cols.get(col) is None:
cols.update({col: n_max})
print("总文件数:{}".format(num))
return cols
if __name__ == '__main__':
path = "/Users/zhaotongxue/Desktop/黑龙江"
fileUtils = FileUtils(path)
files = fileUtils.get_files()
columns = fileUtils.showMaxLen2(files,skp=0,type="xlsx")
print(columns)
for file in files:
fileUtils.showMaxLen(file,skp=0,sheet_name="Sheet1",type="xlsx")