将以下脚本 生成绿色免安装exe文件 并部分内容可修改 位置 如文件夹位置 列内容等 """
优化脚本内容 减少体积
文件名规则:工序明细号 工程名称中提取的五位文字
"""
import pandas as pd
import xlwt
import sys
import os
import glob
import logging
import re
import time
import math
def setup_logging():
"""设置日志"""
log_dir = os.path.join(os.getcwd(), "处理日志")
if not os.path.exists(log_dir):
os.makedirs(log_dir)
timestamp = time.strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(log_dir, f"excel_processor_{timestamp}.log")
logging.getLogger().handlers.clear()
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file, encoding='utf-8'),
logging.StreamHandler(sys.stdout)
]
)
return logging.getLogger(__name__)
logger = setup_logging()
def read_smart_file(file_path):
"""智能读取Excel/HTML文件"""
file_name = os.path.basename(file_path)
logger.info(f"读取: {file_name}")
try:
with open(file_path, 'rb') as f:
file_start = f.read(500).decode('utf-8', errors='ignore').lower()
if any(tag in file_start for tag in ['<!doctype', '<html', '<table', '<tr>', '<td>']):
logger.info("检测到HTML格式")
encodings = ['utf-8', 'gbk', 'gb2312']
for encoding in encodings:
try:
dfs = pd.read_html(file_path, encoding=encoding)
if dfs:
df = dfs[0]
if len(df) > 0 and df.iloc[0].astype(str).str.contains('设备编码|订单号|需求日期').any():
df.columns = df.iloc[0]
df = df[1:].reset_index(drop=True)
return df
except:
continue
raise ValueError("无法读取HTML文件")
else:
try:
return pd.read_excel(file_path, engine='xlrd')
except:
return pd.read_excel(file_path, engine='openpyxl')
except Exception as e:
logger.error(f"读取失败: {e}")
raise
def get_cell_display_width(text):
"""计算单元格显示宽度"""
if not text:
return 0
width = 0
for char in str(text):
if '\u4e00' <= char <= '\u9fff':
width += 2
else:
width += 1
return width
def get_filename_from_data(df, input_file):
"""从数据中提取文件名(新规则:工序明细号 工程名称前五个字)"""
process_no = ""
project_name = ""
if '工序明细号' in df.columns:
process_series = df['工序明细号'].dropna()
if not process_series.empty:
process_no = str(process_series.iloc[0]).strip()
if '工程名称' in df.columns:
project_series = df['工程名称'].dropna()
if not project_series.empty:
project_name = str(project_series.iloc[0]).strip()
# 提取工程名称中的五位文字
if project_name:
# 移除空格和特殊字符,只保留中文文字
clean_name = re.sub(r'[^\u4e00-\u9fff]', '', project_name)
if len(clean_name) >= 5:
project_short = clean_name[:5]
elif clean_name:
project_short = clean_name
else:
# 如果没有中文字符,取前5个字符
project_short = project_name[:5] if len(project_name) >= 5 else project_name
else:
project_short = ""
# 构建文件名:工序明细号 工程名称前五个字
if process_no and project_short:
base_name = f"{process_no} {project_short}"
elif project_short:
base_name = project_short
elif process_no:
base_name = process_no
else:
base_name = os.path.splitext(os.path.basename(input_file))[0]
base_name = re.sub(r'[\\/*?:"<>|]', '', base_name)
return base_name
def format_numeric_value(value):
"""格式化数值,如果是数值类型且是浮点数,则转换为整数(去掉小数点后的部分)"""
if pd.isna(value):
return None
try:
# 尝试转换为数值
num_value = float(value)
# 如果是浮点数且没有小数部分,转换为整数
if num_value.is_integer():
return int(num_value)
else:
# 否则直接去掉小数部分(向下取整)
return int(math.floor(num_value))
except (ValueError, TypeError):
# 如果不是数值类型,返回原值
return value
def create_excel_with_format(df_selected, available_cols, output_file, combined_header):
"""创建格式化的Excel文件"""
wb = xlwt.Workbook(encoding='utf-8')
ws = wb.add_sheet('处理结果')
# 样式定义
borders = xlwt.Borders()
borders.left = borders.right = borders.top = borders.bottom = xlwt.Borders.THIN
title_style = xlwt.XFStyle()
title_font = xlwt.Font()
title_font.name = '宋体'
title_font.bold = True
title_font.height = 280
title_style.font = title_font
title_alignment = xlwt.Alignment()
title_alignment.horz = xlwt.Alignment.HORZ_CENTER
title_alignment.vert = xlwt.Alignment.VERT_CENTER
title_alignment.wrap = 0
title_style.alignment = title_alignment
header_style = xlwt.XFStyle()
header_font = xlwt.Font()
header_font.name = '宋体'
header_font.bold = True
header_font.height = 220
header_style.font = header_font
header_alignment = xlwt.Alignment()
header_alignment.horz = xlwt.Alignment.HORZ_CENTER
header_alignment.vert = xlwt.Alignment.VERT_CENTER
header_alignment.wrap = 0
header_style.alignment = header_alignment
header_style.borders = borders
data_style = xlwt.XFStyle()
data_font = xlwt.Font()
data_font.name = '宋体'
data_font.height = 200
data_style.font = data_font
data_alignment = xlwt.Alignment()
data_alignment.horz = xlwt.Alignment.HORZ_CENTER
data_alignment.vert = xlwt.Alignment.VERT_CENTER
data_alignment.wrap = 0
data_style.alignment = data_alignment
data_style.borders = borders
# 整数样式 - 用于数值列,不显示小数部分
integer_style = xlwt.XFStyle()
integer_font = xlwt.Font()
integer_font.name = '宋体'
integer_font.height = 200
integer_style.font = integer_font
integer_alignment = xlwt.Alignment()
integer_alignment.horz = xlwt.Alignment.HORZ_CENTER
integer_alignment.vert = xlwt.Alignment.VERT_CENTER
integer_alignment.wrap = 0
integer_style.alignment = integer_alignment
integer_style.borders = borders
integer_style.num_format_str = '0' # 设置格式为整数,不显示小数
# 计算列宽(改进版)
col_widths = {}
for col_idx, col_name in enumerate(available_cols):
max_width = get_cell_display_width(str(col_name))
# 检查该列的所有数据
for row_idx in range(len(df_selected)):
cell_value = df_selected.iloc[row_idx, col_idx]
if not pd.isna(cell_value):
cell_width = get_cell_display_width(str(cell_value))
if cell_width > max_width:
max_width = cell_width
# 设置合理的列宽:最小8,最大40
col_width = max(8, min(max_width + 2, 40))
col_widths[col_idx] = col_width
# 写入标题
ws.write_merge(0, 0, 0, len(available_cols)-1, combined_header, title_style)
# 第一行行高设置为50(xlwt中1/20点,所以50*20=1000)
ws.row(0).height = 1000 # 50点
# 写入表头
for col_idx, col_name in enumerate(available_cols):
ws.write(1, col_idx, col_name, header_style)
# 设置列宽(xlwt中256为单位)
ws.col(col_idx).width = 256 * col_widths[col_idx]
# 第二行行高设置为25(25*20=500)
ws.row(1).height = 500 # 25点
# 定义需要转换为整数的列
integer_columns = ['自由项2', '库存数量', '未领数量', '出库数量']
# 写入数据行
if len(df_selected) > 0:
for row_idx, row in enumerate(df_selected.itertuples(index=False), 2):
for col_idx, value in enumerate(row):
col_name = available_cols[col_idx]
if pd.isna(value):
cell_value = ""
else:
cell_value = str(value)
# 检查是否是需要转换为整数的列
if col_name in integer_columns:
formatted_value = format_numeric_value(value)
if formatted_value is not None:
# 使用整数样式写入
ws.write(row_idx, col_idx, formatted_value, integer_style)
else:
ws.write(row_idx, col_idx, "", integer_style)
else:
ws.write(row_idx, col_idx, cell_value, data_style)
# 数据行行高设置为25
ws.row(row_idx).height = 500 # 25点
else:
ws.write(2, 0, "(无数据)", data_style)
ws.row(2).height = 500
# 写入汇总行
total_row = len(df_selected) + 2
# 汇总样式
summary_style = xlwt.XFStyle()
summary_font = xlwt.Font()
summary_font.name = '宋体'
summary_font.bold = True
summary_font.height = 200
summary_style.font = summary_font
summary_alignment = xlwt.Alignment()
summary_alignment.horz = xlwt.Alignment.HORZ_CENTER
summary_alignment.vert = xlwt.Alignment.VERT_CENTER
summary_alignment.wrap = 0
summary_style.alignment = summary_alignment
summary_style.borders = borders
# 汇总背景色(淡黄色)
summary_pattern = xlwt.Pattern()
summary_pattern.pattern = xlwt.Pattern.SOLID_PATTERN
summary_pattern.pattern_fore_colour = 0x2C # 淡黄色
summary_style.pattern = summary_pattern
# 整数汇总样式(用于整数列的汇总)
integer_summary_style = xlwt.XFStyle()
integer_summary_font = xlwt.Font()
integer_summary_font.name = '宋体'
integer_summary_font.bold = True
integer_summary_font.height = 200
integer_summary_style.font = integer_summary_font
integer_summary_alignment = xlwt.Alignment()
integer_summary_alignment.horz = xlwt.Alignment.HORZ_CENTER
integer_summary_alignment.vert = xlwt.Alignment.VERT_CENTER
integer_summary_alignment.wrap = 0
integer_summary_style.alignment = integer_summary_alignment
integer_summary_style.borders = borders
integer_summary_style.pattern = summary_pattern
integer_summary_style.num_format_str = '0' # 设置格式为整数
# 计算汇总
numeric_columns = ['库存数量', '未领数量', '出库数量']
summary_values = {}
for col in numeric_columns:
if col in df_selected.columns:
try:
df_selected[col] = pd.to_numeric(df_selected[col], errors='coerce')
# 汇总值也转换为整数
summary_values[col] = format_numeric_value(df_selected[col].sum(skipna=True))
except:
summary_values[col] = 0
# 自由项2不计算汇总,但也要使用整数格式
if '自由项2' in df_selected.columns:
# 自由项2在汇总行留空
pass
# 汇总标签放在第8列(索引7),如果列数不足则放在最后一列
summary_col_idx = 7 if len(available_cols) > 7 else len(available_cols) - 1
# 写入汇总标签
ws.write(total_row, summary_col_idx, "汇总", summary_style)
# 写入汇总数值
for col_idx, col_name in enumerate(available_cols):
if col_idx == summary_col_idx:
continue
if col_name in integer_columns:
# 整数列使用整数汇总样式
if col_name in numeric_columns:
# 数值列写入汇总值
ws.write(total_row, col_idx, float(summary_values.get(col_name, 0)), integer_summary_style)
else:
# 非数值整数列(如自由项2)留空
ws.write(total_row, col_idx, "", integer_summary_style)
else:
# 非整数列留空
ws.write(total_row, col_idx, "", summary_style)
# 汇总行行高设置为25
ws.row(total_row).height = 500 # 25点
wb.save(output_file)
def process_excel_data(input_file, output_folder):
"""处理Excel数据"""
try:
df = read_smart_file(input_file)
logger.info(f"读取成功: {df.shape}行×{df.shape[1]}列")
df.columns = [str(col).strip().replace('\n', '') for col in df.columns]
required_columns = [
'设备编码', '订单号', '需求日期', '物料描述', '厂家模号',
'自由项1', '自由项3', '自由项4', '自由项2',
'库存数量', '未领数量', '出库数量', '物料编码', '批号'
]
available_cols = [col for col in required_columns if col in df.columns]
df_selected = df[available_cols].copy()
# 对需要转换为整数的列进行预处理
integer_columns = ['自由项2', '库存数量', '未领数量', '出库数量']
for col in integer_columns:
if col in df_selected.columns:
try:
# 转换为数值类型
df_selected[col] = pd.to_numeric(df_selected[col], errors='coerce')
# 应用整数格式化
df_selected[col] = df_selected[col].apply(format_numeric_value)
except Exception as e:
logger.warning(f"列 {col} 转换为数值时出错: {e}")
if '需求日期' in df_selected.columns:
df_selected['需求日期'] = pd.to_datetime(df_selected['需求日期'], errors='coerce')
df_selected['需求日期'] = df_selected['需求日期'].dt.strftime('%Y-%m-%d')
# 提取标题
project_name = ""
material_order = ""
if '工程名称' in df.columns and len(df) > 0:
project_name = str(df['工程名称'].iloc[0]) if not pd.isna(df['工程名称'].iloc[0]) else ""
if '备料单号' in df.columns and len(df) > 0:
material_order = str(df['备料单号'].iloc[0]) if not pd.isna(df['备料单号'].iloc[0]) else ""
combined_header = f"{project_name} {material_order}".strip() or "表格处理结果"
# 需求主数量汇总
if '需求主数量' in df.columns:
try:
total_demand = pd.to_numeric(df['需求主数量'], errors='coerce').sum()
if isinstance(total_demand, float) and total_demand.is_integer():
total_demand_str = f"{int(total_demand)}kg"
else:
total_demand_str = f"{total_demand:.2f}kg"
combined_header = f"{combined_header} {total_demand_str}"
logger.info(f"需求主数量汇总: {total_demand_str}")
except:
pass
# 数据排序
sort_cols = [col for col in ['自由项1', '自由项3', '自由项2'] if col in df_selected.columns]
if sort_cols:
df_selected = df_selected.sort_values(by=sort_cols, na_position='last')
# 生成文件名(新规则)
base_name = get_filename_from_data(df, input_file)
# 避免重名
counter = 0
while True:
if counter == 0:
file_name = f"{base_name}.xls"
else:
file_name = f"{base_name}{counter}.xls"
output_file = os.path.join(output_folder, file_name)
if not os.path.exists(output_file):
break
counter += 1
logger.info(f"输出文件: {file_name}")
# 创建Excel
create_excel_with_format(df_selected, available_cols, output_file, combined_header)
print(f"✓ 成功: {file_name} ({len(df_selected)}行)")
print(f" 输出格式: .xls (Excel 97-2003兼容)")
print(f" 文件名: {file_name}")
print(f" 特殊处理: 自由项2、库存数量、未领数量、出库数量不保留小数")
return True
except Exception as e:
logger.error(f"处理失败: {e}")
print(f"✗ 失败: {os.path.basename(input_file)} - {str(e)}")
return False
def main():
"""主函数"""
print("\n智能表格处理器")
print("=" * 60)
print("文件名规则: 工序明细号 工程名称前五个字")
print("输出格式: .xls (Excel 97-2003兼容)")
print("行高设置: 第一行50,其他行25")
print("特殊处理: 自由项2、库存数量、未领数量、出库数量不保留小数")
print("=" * 60)
current_dir = os.getcwd()
input_folder = os.path.join(current_dir, "待处理")
output_folder = os.path.join(current_dir, "已处理")
if not os.path.exists(input_folder):
print(f"错误: 请在当前目录创建 '{os.path.basename(input_folder)}' 文件夹")
print(f"当前目录: {current_dir}")
input("\n按回车键退出...")
return
if not os.path.exists(output_folder):
os.makedirs(output_folder)
print(f"已创建输出文件夹: {output_folder}")
print(f"输入文件夹: {input_folder}")
print(f"输出文件夹: {output_folder}")
print("-" * 60)
patterns = ['*.xls', '*.xlsx', '*.xlsm']
files = []
for pattern in patterns:
files.extend(glob.glob(os.path.join(input_folder, pattern)))
if not files:
print("提示: 待处理文件夹中没有支持的文件 (.xls, .xlsx, .xlsm)")
print("请将文件放入 '待处理' 文件夹中")
input("\n按回车键退出...")
return
print(f"找到 {len(files)} 个文件:")
for i, file_path in enumerate(files, 1):
file_name = os.path.basename(file_path)
size_kb = os.path.getsize(file_path) / 1024
print(f" {i:2d}. {file_name} ({size_kb:.1f} KB)")
print("\n开始处理...")
print("-" * 60)
processed = 0
failed = 0
failed_files = []
for input_file in files:
success = process_excel_data(input_file, output_folder)
if success:
processed += 1
else:
failed += 1
failed_files.append(os.path.basename(input_file))
print("-" * 40)
print("\n" + "=" * 60)
print("处理完成!")
print("=" * 60)
print(f"✅ 成功处理: {processed} 个文件")
if failed > 0:
print(f"❌ 处理失败: {failed} 个文件")
print("失败文件:")
for f in failed_files:
print(f" • {f}")
print(f"\n输出文件位置: {output_folder}")
print("输出格式特点:")
print(" 1. 📄 格式: .xls (Excel 97-2003兼容)")
print(" 2. 📁 文件名: 工序明细号 工程名称前五个字")
print(" 3. 📏 行高: 第一行50,其他行25")
print(" 4. 🔧 列宽: 根据内容自动调整")
print(" 5. 🟦 边框: 所有单元格都有边框")
print(" 6. ⚖️ 需求主数量汇总值显示在标题行")
print(" 7. 🔢 整数格式: 自由项2、库存数量、未领数量、出库数量不保留小数")
print(f"\n详细日志位置: 处理日志/")
print("=" * 60)
input("\n按回车键退出程序...")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n程序被用户中断")
logger.info("程序被用户中断")
except Exception as e:
error_msg = f"程序发生致命错误: {str(e)}"
print(f"\n{error_msg}")
logger.critical(error_msg)
input("\n按回车键退出程序...")
最新发布