python IOError: invalid mode ('r') or filename

本文解决使用pandas.read_table()读取数据表时遇到的'invalidmode'错误,提供了正确的文件路径和打开模式示例。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

我想要用pandas.read_table()将数据表中的数据读到一个pandas DataFrame对象中:

import pandas as pd
unames = [‘user_id‘, ‘gender‘, ‘age‘, ‘occupation‘, ‘zip‘]
users = pd.read_table(‘C:\1M data\ml-1m\users.dat‘, sep=‘::‘, header = None, names = unames)
但是报错:invalid mode (‘r‘) or filename : ‘C:\x01M data\ml-1m\users.dat‘

这种错误的出现是在使用built-in函数file()或者open()的时候。或者是因为文件的打开模式不对,或者是文件名有问题。前者的话只需要注意文件是否可读或者可写就可以了。后者则是与文件路径相关的问题,需要在文件名前加r或者R转义,如:file(r”e:\Test.txt”,‘r‘).或者将反斜杠\变成两个,如file(“e:\Test.txt”,‘r‘).

因此,要将代码改为:

users = pd.read_table(r’C:\1M data\ml-1m\users.dat’, sep=‘::‘, header = None, names = unames)

#!/usr/bin/env python # -*- coding: utf-8 -*- import os import re import sys import argparse import xlwt from collections import defaultdict # 分区名称映射表(前缀 → 友好名称) PARTITION_NAME_MAP = { '02_': 'system', '03_': 'vendor', '04_': 'product', '05_': 'odm', '06_': 'my_product', '07_': 'my_engineering', '08_': 'my_stock', '09_': 'my_heytap', '10_': 'my_company', '11_': 'my_carrier', '12_': 'my_region', '13_': 'my_preload', '14_': 'data', '15_': 'my_bigball', '16_': 'my_manifest', '17_system_dlkm': 'system_dlkm', # 新增独立分区 '17_vendor_dlkm': 'vendor_dlkm', # 新增独立分区 '17_cache': 'cache' # 新增独立分区 } def parse_du_file(file_path): """解析du命令输出文件并转换为MB""" data = {} try: with open(file_path, 'r') as f: for line in f: if 'Permission denied' in line or 'No such file' in line or not line.strip(): continue match = re.match(r'(\d+\.?\d*)\s*([KMG]?)[Bb]?\s+(.*)', line.strip()) if match: size, unit, path = match.groups() size = float(size) # 单位转换到MB if unit == 'K': size = size / 1024.0 elif unit == '': size = size / (1024*1024.0) # 默认字节转MB elif unit == 'M': pass # 已经是MB elif unit == 'G': size = size * 1024.0 data[path] = round(size, 4) # 保留两位小数 except IOError as e: print("警告: 无法读取文件 {}: {}".format(file_path, str(e))) return data def extract_file_prefix(filename): """提取文件前缀""" if filename.startswith('17_'): # 移除.txt扩展名后返回完整标识 return filename.replace('.txt', '') match = re.match(r'^(\d+_)', filename) return match.group(1) if match else "other_" def is_main_partition_file(filename, prefix): """检查是否为主分区文件""" # 17_开头的文件都视为主分区文件 if prefix.startswith('17_'): return True # 使用兼容的字符串拼接方式替代f-string expected_name = prefix + PARTITION_NAME_MAP[prefix] + ".txt" return filename == expected_name def generate_dual_report(folder1, folder2, output_xlsx): """生成双机对比报告""" # 获取文件夹名称用于显示 folder1_name = os.path.basename(os.path.normpath(folder1)) folder2_name = os.path.basename(os.path.normpath(folder2)) # 验证文件夹是否存在 for folder in [folder1, folder2]: if not os.path.exists(folder): print("错误: 目录不存在 - {}".format(folder)) return "目录 {} 不存在,请检查路径".format(folder) if not os.path.isdir(folder): print("错误: 路径不是目录 - {}".format(folder)) return "{} 不是有效目录".format(folder) # 初始化数据结构 # 主文件数据(用于总览页) machine1_main_data = {} # {prefix: {path: size}} machine2_main_data = {} # {prefix: {path: size}} # 所有文件数据(用于分区详细页) machine1_all_files = defaultdict(dict) # {filename: {path: size}} machine2_all_files = defaultdict(dict) # {filename: {path: size}} # 收集数据 for folder_path, main_dict, all_dict in [ (folder1, machine1_main_data, machine1_all_files), (folder2, machine2_main_data, machine2_all_files) ]: print("处理目录: {}".format(folder_path)) try: for filename in os.listdir(folder_path): if not filename.endswith('.txt'): continue # 提取文件前缀 prefix = extract_file_prefix(filename) # 跳过01_mount.txt和无效前缀 if prefix == '01_' or prefix not in PARTITION_NAME_MAP: continue file_path = os.path.join(folder_path, filename) partition_name = PARTITION_NAME_MAP[prefix] # 解析文件数据 file_data = parse_du_file(file_path) # 添加到所有文件数据 all_dict[filename] = file_data # 如果是主文件,添加到主文件数据(用于总览页) if is_main_partition_file(filename, prefix): print("解析主分区文件: {}".format(file_path)) main_dict[prefix] = file_data except OSError as e: print("目录访问错误: {}".format(str(e))) return "无法访问目录 {}: {}".format(folder_path, str(e)) # 创建Excel工作簿 try: wb = xlwt.Workbook(encoding='utf-8') header_style = xlwt.easyxf('font: bold on') title_style = xlwt.easyxf('font: bold on, height 280; align: wrap on, vert centre') normal_style = xlwt.easyxf() added_style = xlwt.easyxf('pattern: pattern solid, fore_colour light_green;') removed_style = xlwt.easyxf('pattern: pattern solid, fore_colour rose;') summary_style = xlwt.easyxf('font: bold on, color blue;') # ====== 创建总览Sheet页(添加备注列) ====== ws_overview = wb.add_sheet('总览') print("创建总览Sheet页(仅主文件数据)") current_row = 0 # 写入总览标题(扩展列数) ws_overview.write_merge( current_row, current_row, 0, 5, # 扩展到6列 "存储使用总览(仅主分区文件)", title_style ) current_row += 1 # 写入文件夹名称 ws_overview.write(current_row, 1, folder1_name, header_style) ws_overview.write(current_row, 2, folder2_name, header_style) current_row += 1 # 写入表头(增加备注列) headers = ['分区', '总大小(MB)', '总大小(MB)', '差值(MB)', '标记', '增大TOP路径'] for col, header in enumerate(headers): ws_overview.write(current_row, col, header, header_style) current_row += 1 # 存储各分区汇总数据 overview_data = [] total_machine1 = 0.0 total_machine2 = 0.0 # 按分区顺序处理数据 for prefix in sorted(PARTITION_NAME_MAP.keys()): partition_name = PARTITION_NAME_MAP[prefix] # 跳过data分区 if partition_name == 'data': continue # 获取主文件数据 data1 = machine1_main_data.get(prefix, {}) data2 = machine2_main_data.get(prefix, {}) # 计算主文件总大小 partition_total1 = round(sum(data1.values()), 2) partition_total2 = round(sum(data2.values()), 2) diff = partition_total1 - partition_total2 # 更新总计 total_machine1 += partition_total1 total_machine2 += partition_total2 # 确定标记样式 if diff > 0: mark = "增加" style = added_style elif diff < 0: mark = "减少" style = removed_style else: mark = "无变化" style = normal_style # 计算分区中增大的TOP3路径(仅当分区增大时) top_notes = [] if diff > 0: path_diffs = [] all_paths = set(data1.keys()) | set(data2.keys()) for path in all_paths: size1 = data1.get(path, 0.0) size2 = data2.get(path, 0.0) path_diff = size1 - size2 if path_diff > 0: # 只记录增大的路径 path_diffs.append((path, path_diff)) # 按增大值降序排序,取TOP3 path_diffs.sort(key=lambda x: x[1], reverse=True) for i, (path, diff_val) in enumerate(path_diffs[:3]): # 简化长路径显示 if len(path) > 50: path = "..." + path[-47:] top_notes.append(f"{i+1}. {path}: +{diff_val:.2f}MB") # 保存分区数据 overview_data.append({ 'name': partition_name, 'machine1': partition_total1, 'machine2': partition_total2, 'diff': diff, 'style': style, 'mark': mark, 'notes': "\n".join(top_notes) if top_notes else "无显著增大路径" }) # 写入行数据到总览页(新增备注列) ws_overview.write(current_row, 0, partition_name, style) ws_overview.write(current_row, 1, partition_total1, style) ws_overview.write(current_row, 2, partition_total2, style) ws_overview.write(current_row, 3, diff, style) ws_overview.write(current_row, 4, mark, style) ws_overview.write(current_row, 5, overview_data[-1]['notes'], wrap_style) current_row += 1 # 设置备注列宽度(100字符) ws_overview.col(5).width = 256 * 100 # 添加空行 current_row += 1 # 写入总计行(使用新的差值计算方式) total_diff = total_machine1 - total_machine2 if total_diff > 0: total_mark = "总增加" # folder1 > folder2 total_style = added_style elif total_diff < 0: total_mark = "总减少" # folder1 < folder2 total_style = removed_style else: total_mark = "无变化" total_style = normal_style ws_overview.write(current_row, 0, "总计", header_style) ws_overview.write(current_row, 1, total_machine1, header_style) ws_overview.write(current_row, 2, total_machine2, header_style) ws_overview.write(current_row, 3, total_diff, header_style) ws_overview.write(current_row, 4, total_mark, header_style) # ====== 为每个文件创建单独的Sheet页 ====== # 获取所有唯一的文件名(两个文件夹的并集) all_filenames = sorted(set(machine1_all_files.keys()) | set(machine2_all_files.keys())) for filename in all_filenames: # 提取文件前缀 prefix = extract_file_prefix(filename) # 跳过无效前缀 if prefix not in PARTITION_NAME_MAP: continue # 获取分区名称 partition_name = PARTITION_NAME_MAP[prefix] # 创建Sheet页名称(文件名不带扩展名) sheet_name = filename.replace('.txt', '') if len(sheet_name) > 31: # Excel sheet名称长度限制 sheet_name = sheet_name[:31] # 创建Sheet页 ws = wb.add_sheet(sheet_name) print("创建文件Sheet页: {}".format(sheet_name)) # 当前行指针 current_row = 0 # 写入分区标题 title = "分区: {} - 文件: {}".format(partition_name, filename) ws.write_merge( current_row, current_row, 0, 5, title, title_style ) current_row += 1 # 写入文件夹名称(新格式) ws.write_merge(current_row, current_row, 0, 1, folder1_name, header_style) ws.write_merge(current_row, current_row, 2, 3, folder2_name, header_style) ws.write(current_row, 4, "差异(M)", header_style) ws.write(current_row, 5, "标记", header_style) current_row += 1 # 写入表头(新格式) headers = ['路径', '大小(M)', '路径', '大小(M)', '差异(M)', '标记'] for col, header in enumerate(headers): ws.write(current_row, col, header, header_style) current_row += 1 # 获取文件数据 data1 = machine1_all_files.get(filename, {}) data2 = machine2_all_files.get(filename, {}) # 获取所有路径(合并两个文件夹的路径) all_paths = sorted(set(data1.keys()) | set(data2.keys())) # 初始化变化统计数据 total_increase = 0.0 # 增大总和 total_decrease = 0.0 # 减小总和 total_added = 0.0 # 新增文件总和 total_removed = 0.0 # 去除文件总和 # 写入数据行(新格式) for path in all_paths: size1 = data1.get(path, 0.0) size2 = data2.get(path, 0.0) # 修改为 folder1 - folder2 的差值计算方式 diff = size1 - size2 # 确定标记和样式(根据新的差值计算方式调整) if size1 == 0 and size2 > 0: mark = "除去" cell_style = removed_style total_removed += size2 elif size1 > 0 and size2 == 0: mark = "新增" cell_style = added_style total_added += size1 else: if diff > 0: mark = "增大" cell_style = added_style total_increase += diff elif diff < 0: mark = "减小" cell_style = removed_style total_decrease += abs(diff) else: mark = "相同" cell_style = normal_style # 写入行数据(新格式) # folder1列 if size1 > 0: ws.write(current_row, 0, path, cell_style) ws.write(current_row, 1, size1, cell_style) else: ws.write(current_row, 0, "", cell_style) ws.write(current_row, 1, "", cell_style) # folder2列 if size2 > 0: ws.write(current_row, 2, path, cell_style) ws.write(current_row, 3, size2, cell_style) else: ws.write(current_row, 2, "", cell_style) ws.write(current_row, 3, "", cell_style) # 差异和标记列 ws.write(current_row, 4, diff, cell_style) ws.write(current_row, 5, mark, cell_style) current_row += 1 # 添加文件汇总行(新格式) file_total1 = sum(data1.values()) file_total2 = sum(data2.values()) file_diff = file_total1 - file_total2 # 写入汇总行(新格式) ws.write(current_row, 0, "文件汇总", header_style) ws.write(current_row, 1, file_total1, header_style) ws.write(current_row, 2, "", header_style) ws.write(current_row, 3, file_total2, header_style) ws.write(current_row, 4, file_diff, header_style) ws.write(current_row, 5, "", header_style) current_row += 1 # 添加变化分类统计行 - 修改为Python 2.7兼容的格式 message = ( u"{partition_name}路径下: " u"减小{total_decrease:.2f}M " u"增大{total_increase:.2f}M " u"新增文件{total_added:.2f}M " u"减少文件{total_removed:.2f}M" ).format( partition_name=partition_name, total_decrease=total_decrease, total_increase=total_increase, total_added=total_added, total_removed=total_removed ) ws.write_merge( current_row, current_row, 0, 5, message, summary_style ) # 保存文件 wb.save(output_xlsx) return "对比报告已成功生成: {}".format(output_xlsx) except Exception as e: import traceback traceback.print_exc() return "生成Excel文件时出错: {}".format(str(e)) def generate_single_report(folder, output_xlsx): """生成单机拆解报告""" # 获取文件夹名称用于显示 folder_name = os.path.basename(os.path.normpath(folder)) # 验证文件夹是否存在 if not os.path.exists(folder): print("错误: 目录不存在 - {}".format(folder)) return "目录 {} 不存在,请检查路径".format(folder) if not os.path.isdir(folder): print("错误: 路径不是目录 - {}".format(folder)) return "{} 不是有效目录".format(folder) # 初始化数据结构 main_data = {} # {prefix: {path: size}} 主文件数据 all_files = defaultdict(dict) # {filename: {path: size}} 所有文件数据 print("处理目录: {}".format(folder)) try: for filename in os.listdir(folder): if not filename.endswith('.txt'): continue # 提取文件前缀 prefix = extract_file_prefix(filename) # 跳过01_mount.txt和无效前缀 if prefix == '01_' or prefix not in PARTITION_NAME_MAP: continue file_path = os.path.join(folder, filename) partition_name = PARTITION_NAME_MAP[prefix] # 解析文件数据 file_data = parse_du_file(file_path) # 添加到所有文件数据 all_files[filename] = file_data # 如果是主文件,添加到主文件数据(用于总览页) if is_main_partition_file(filename, prefix): print("解析主分区文件: {}".format(file_path)) main_data[prefix] = file_data except OSError as e: print("目录访问错误: {}".format(str(e))) return "无法访问目录 {}: {}".format(folder, str(e)) # 创建Excel工作簿 try: wb = xlwt.Workbook(encoding='utf-8') header_style = xlwt.easyxf('font: bold on') title_style = xlwt.easyxf('font: bold on, height 280; align: wrap on, vert centre') normal_style = xlwt.easyxf() # ====== 创建总览Sheet页(只包含主文件数据) ====== ws_overview = wb.add_sheet('总览') print("创建总览Sheet页(仅主文件数据)") # 当前行指针 current_row = 0 # 写入总览标题 ws_overview.write_merge( current_row, current_row, 0, 1, "存储使用总览(仅主分区文件) - 单机报告", title_style ) current_row += 1 # 写入文件夹名称 ws_overview.write(current_row, 0, "分区", header_style) ws_overview.write(current_row, 1, "总大小(MB)", header_style) current_row += 1 # 存储各分区汇总数据 total_size = 0.0 # 按分区顺序处理数据 for prefix in sorted(PARTITION_NAME_MAP.keys()): partition_name = PARTITION_NAME_MAP[prefix] # 跳过data分区 if partition_name == 'data': continue # 获取主文件数据 data = main_data.get(prefix, {}) partition_total = round(sum(data.values()), 2) total_size += partition_total # 写入行数据到总览页 ws_overview.write(current_row, 0, partition_name, normal_style) ws_overview.write(current_row, 1, partition_total, normal_style) current_row += 1 # 添加空行 current_row += 1 # 写入总计行 ws_overview.write(current_row, 0, "总计", header_style) ws_overview.write(current_row, 1, total_size, header_style) # ====== 为每个文件创建单独的Sheet页 ====== for filename in sorted(all_files.keys()): # 提取文件前缀 prefix = extract_file_prefix(filename) # 跳过无效前缀 if prefix not in PARTITION_NAME_MAP: continue # 获取分区名称 partition_name = PARTITION_NAME_MAP[prefix] # 创建Sheet页名称(文件名不带扩展名) sheet_name = filename.replace('.txt', '') if len(sheet_name) > 31: # Excel sheet名称长度限制 sheet_name = sheet_name[:31] # 创建Sheet页 ws = wb.add_sheet(sheet_name) print("创建文件Sheet页: {}".format(sheet_name)) # 当前行指针 current_row = 0 # 写入分区标题 title = "分区: {} - 文件: {}".format(partition_name, filename) ws.write_merge( current_row, current_row, 0, 1, title, title_style ) current_row += 1 # 写入表头 headers = ['路径', '大小(M)'] for col, header in enumerate(headers): ws.write(current_row, col, header, header_style) current_row += 1 # 获取文件数据 data = all_files[filename] # 按路径排序 sorted_paths = sorted(data.keys()) # 写入数据行 for path in sorted_paths: size = data[path] ws.write(current_row, 0, path, normal_style) ws.write(current_row, 1, size, normal_style) current_row += 1 # 写入汇总行 file_total = sum(data.values()) ws.write(current_row, 0, "文件汇总", header_style) ws.write(current_row, 1, file_total, header_style) # 保存文件 wb.save(output_xlsx) return "单机报告已成功生成: {}".format(output_xlsx) except Exception as e: import traceback traceback.print_exc() return "生成Excel文件时出错: {}".format(str(e)) if __name__ == "__main__": # 创建参数解析器 parser = argparse.ArgumentParser(description='存储空间分析工具') subparsers = parser.add_subparsers(dest='mode', help='运行模式') # 双机对比模式 dual_parser = subparsers.add_parser('dual', help='双机对比模式') dual_parser.add_argument('folder1', help='第一个文件夹路径') dual_parser.add_argument('folder2', help='第二个文件夹路径') dual_parser.add_argument('output', help='输出Excel文件路径') # 单机拆解模式 single_parser = subparsers.add_parser('single', help='单机拆解模式') single_parser.add_argument('folder', help='待分析文件夹路径') single_parser.add_argument('output', help='输出Excel文件路径') # 解析参数 args = parser.parse_args() if args.mode == 'dual': print("运行双机对比模式...") result = generate_dual_report(args.folder1, args.folder2, args.output) elif args.mode == 'single': print("运行单机拆解模式...") result = generate_single_report(args.folder, args.output) else: result = "错误:请选择 'dual' 或 'single' 模式" print(result)V27960021@dg03podv27960021kj4p:~/存储/hawaii$ python storage.py dual ./22610/存储/测试机 ./22610/存储/对比机 22610_report_dual.xlsx File "storage.py", line 222 top_notes.append(f"{i+1}. {path}: +{diff_val:.2f}MB") ^ SyntaxError: invalid syntax
07-16
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值