#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
最小化Excel读取工具
使用最基础的方法读取Excel文件
"""
import os
import sys
def install_requirements():
"""安装必要的依赖包"""
import subprocess
try:
import openpyxl
except ImportError:
print("正在安装 openpyxl...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'openpyxl'])
def read_excel_minimal(file_path, start_row=2):
"""
最小化读取Excel文件
Args:
file_path (str): Excel文件路径
start_row (int): 开始读取的行号
Returns:
dict: 包含所有sheet数据的字典
"""
try:
import openpyxl
print(f"正在读取Excel文件: {file_path}")
print("=" * 40)
# 以只读模式打开Excel文件
workbook = openpyxl.load_workbook(file_path, read_only=True, data_only=True)
sheet_names = workbook.sheetnames
print(f"发现 {len(sheet_names)} 个sheet: {sheet_names}")
all_data = {}
for sheet_name in sheet_names:
print(f"\n处理sheet: {sheet_name}")
print("-" * 20)
try:
# 获取sheet
sheet = workbook[sheet_name]
data_rows = []
row_count = 0
# 逐行读取数据
for row in sheet.iter_rows(min_row=start_row, values_only=True):
row_count += 1
# 处理None值
row_data = []
for cell in row:
if cell is None:
row_data.append("")
else:
row_data.append(str(cell))
# 检查行是否为空
if not all(cell.strip() == '' for cell in row_data):
data_rows.append(row_data)
all_data[sheet_name] = data_rows
print(f"从第 {start_row} 行开始读取了 {len(data_rows)} 行数据")
# 显示前几行数据
if data_rows:
print("前3行数据:")
for i, row in enumerate(data_rows[:3]):
print(f" 行 {start_row + i}: {row}")
if len(data_rows) > 3:
print(f" ... 还有 {len(data_rows) - 3} 行")
else:
print("没有有效数据")
except Exception as e:
print(f"处理sheet {sheet_name} 时出错: {e}")
all_data[sheet_name] = []
workbook.close()
return all_data
except Exception as e:
print(f"读取Excel失败: {e}")
return {}
def main():
"""主函数"""
print("最小化Excel读取工具")
print("=" * 30)
# 查找Excel文件(排除临时文件)
excel_files = []
for f in os.listdir('.'):
if f.lower().endswith(('.xlsx', '.xls')) and not f.startswith('~$'):
excel_files.append(f)
if not excel_files:
print("当前目录下没有找到Excel文件")
return
print(f"找到 {len(excel_files)} 个Excel文件:")
for i, excel_file in enumerate(excel_files, 1):
print(f"{i}. {excel_file}")
# 选择文件
if len(excel_files) == 1:
selected_file = excel_files[0]
print(f"\n自动选择: {selected_file}")
else:
try:
choice = int(input(f"\n请选择文件 (1-{len(excel_files)}): ")) - 1
selected_file = excel_files[choice]
except (ValueError, IndexError):
selected_file = excel_files[0]
# 安装依赖
install_requirements()
# 读取Excel数据
data = read_excel_minimal(selected_file, start_row=2)
if not data:
print("没有读取到任何数据")
return
# 显示统计信息
print("\n" + "=" * 40)
print("数据统计:")
total_rows = 0
for sheet_name, sheet_data in data.items():
rows = len(sheet_data)
total_rows += rows
print(f" {sheet_name}: {rows} 行")
print(f"总计: {total_rows} 行数据")
# 显示所有数据
print("\n" + "=" * 40)
print("完整数据:")
for sheet_name, sheet_data in data.items():
print(f"\nSheet: {sheet_name}")
print("-" * 20)
for i, row in enumerate(sheet_data):
print(f"行 {i+1}: {row}")
if __name__ == "__main__":
main()
python 提取excel 数据
最新推荐文章于 2025-12-05 17:02:52 发布
1126

被折叠的 条评论
为什么被折叠?



