dataset = load_dataset()报错:AttributeError: readonly attribute

博客展示了一段Python代码报错信息,报错源于导入datasets库时出现问题。解决方案是更新datasets库版本,更新后问题得到解决。
部署运行你感兴趣的模型镜像
from datasets import load_dataset

# 下载 IMDb 数据集
dataset = load_dataset("pietrolesci/imdb")

dataset.save_to_disk("./dataset_imdb")
# 输出数据集信息
print(dataset)

上述代码报错:
Traceback (most recent call last): File "e:\3_DesktopMigration\AI_Lee20231227\1_Codes\1_NegativeSampling_pytorch_sgns_master\1_downloadDataset_Lee20240228.py", line 1, in <module> from datasets import load_dataset File "D:\1_Software\1_SoftwareApplication\miniconda3\miniconda3\envs\myAI_LearnEnv\Lib\site-packages\datasets\__init__.py", line 34, in <module> from .arrow_dataset import Dataset, concatenate_datasets File "D:\1_Software\1_SoftwareApplication\miniconda3\miniconda3\envs\myAI_LearnEnv\Lib\site-packages\datasets\arrow_dataset.py", line 46, in <module> from .arrow_reader import ArrowReader File "D:\1_Software\1_SoftwareApplication\miniconda3\miniconda3\envs\myAI_LearnEnv\Lib\site-packages\datasets\arrow_reader.py", line 33, in <module> from .table import InMemoryTable, MemoryMappedTable, Table, concat_tables File "D:\1_Software\1_SoftwareApplication\miniconda3\miniconda3\envs\myAI_LearnEnv\Lib\site-packages\datasets\table.py", line 331, in <module> class InMemoryTable(TableBlock): File "D:\1_Software\1_SoftwareApplication\miniconda3\miniconda3\envs\myAI_LearnEnv\Lib\site-packages\datasets\table.py", line 356, in InMemoryTable @inject_arrow_table_documentation(pa.Table.from_pandas) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\1_Software\1_SoftwareApplication\miniconda3\miniconda3\envs\myAI_LearnEnv\Lib\site-packages\datasets\table.py", line 21, in wrapper out = wraps(arrow_table_method)(method) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\1_Software\1_SoftwareApplication\miniconda3\miniconda3\envs\myAI_LearnEnv\Lib\functools.py", line 61, in update_wrapper wrapper.__wrapped__ = wrapped ^^^^^^^^^^^^^^^^^^^ AttributeError: readonly attribute

解决方案:

更新datasets库版本。

pip install datasets --upgrade

 问题解决:

有如下显示:

您可能感兴趣的与本文相关的镜像

Python3.11

Python3.11

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

点击导出错误信息按钮,提示 noneType object has no attribute keys ,以下为代码import tkinter as tk from tkinter import filedialog, messagebox, ttk import pandas as pd import os import subprocess import math import tkinter as tk from tkinter import messagebox import datetime class ExcelViewerApp: def init(self, root): self.root = root self.root.title(“TPC效率化工具”) self.root.geometry(“1200x800”) # 使用网格布局的主容器 self.main_frame = tk.Frame(root, bg='#f0f0f0', padx=15, pady=15) self.main_frame.pack(fill=tk.BOTH, expand=True) # 配置网格 self.main_frame.columnconfigure(0, weight=0) self.main_frame.columnconfigure(1, weight=1) self.main_frame.columnconfigure(2, weight=0) self.main_frame.columnconfigure(3, weight=0) # === 主表控件 === # 文件路径行 tk.Label(self.main_frame, text="主表文件路径:", bg='#f0f0f0').grid(row=0, column=0, sticky='w', padx=(0, 5)) self.path_label = tk.Label( self.main_frame, text="未选择主表文件", anchor='w', relief=tk.SUNKEN, bg="#ffffff", padx=5, pady=5 ) self.path_label.grid(row=0, column=1, sticky='ew', padx=(0, 10)) # 工作表行 tk.Label(self.main_frame, text="主表工作表:", bg='#f0f0f0').grid(row=1, column=0, sticky='w', padx=(0, 5), pady=(10, 0)) self.sheet_label = tk.Label( self.main_frame, text="未选择主表工作表", anchor='w', relief=tk.SUNKEN, bg="#ffffff", padx=5, pady=5 ) self.sheet_label.grid(row=1, column=1, sticky='ew', padx=(0, 10), pady=(10, 0)) # 打开文件按钮 self.open_btn = tk.Button( self.main_frame, text="打开主表文件", command=self.open_excel, bg='#f0f0f0', fg='black', padx=15, pady=8, width=10 ) self.open_btn.grid(row=0, column=2, rowspan=2, sticky='ns', padx=(0, 5)) # 选择主表按钮 self.select_btn = tk.Button( self.main_frame, text="选择主表", command=self.load_excel, bg='#f0f0f0', fg='black', padx=15, pady=8, width=10 ) self.select_btn.grid(row=0, column=3, rowspan=2, sticky='ns', padx=(0, 5)) # === 副表控件 === (添加在下方) tk.Label(self.main_frame, text="副表文件路径:", bg='#f0f0f0').grid(row=2, column=0, sticky='w', padx=(0, 5), pady=(20, 0)) self.aux_path_label = tk.Label( self.main_frame, text="未选择副表文件", anchor='w', relief=tk.SUNKEN, bg="#ffffff", padx=5, pady=5 ) self.aux_path_label.grid(row=2, column=1, sticky='ew', padx=(0, 10), pady=(20, 0)) tk.Label(self.main_frame, text="副表工作表:", bg='#f0f0f0').grid(row=3, column=0, sticky='w', padx=(0, 5), pady=(10, 0)) self.aux_sheet_label = tk.Label( self.main_frame, text="未选择副表工作表", anchor='w', relief=tk.SUNKEN, bg="#ffffff", padx=5, pady=5 ) self.aux_sheet_label.grid(row=3, column=1, sticky='ew', padx=(0, 10), pady=(10, 0)) # 打开副表文件按钮 self.aux_open_btn = tk.Button( self.main_frame, text="打开副表文件", command=self.open_aux_excel, bg='#f0f0f0', fg='black', padx=15, pady=8, width=10 ) self.aux_open_btn.grid(row=2, column=2, rowspan=2, sticky='ns', padx=(0, 5), pady=(20, 0)) # 选择副表按钮 self.aux_select_btn = tk.Button( self.main_frame, text="选择副表", command=self.load_aux_excel, bg='#f0f0f0', fg='black', padx=15, pady=8, width=10 ) self.aux_select_btn.grid(row=2, column=3, rowspan=2, sticky='ns', padx=(0, 5), pady=(20, 0)) # 使用数组存储数据 self.dataset = [] # 主表数据 self.aux_dataset = [] # 副表数据 # 窗口居中 self.center_window(self.root) # 添加对比按钮 self.compare_btn = tk.Button( self.main_frame, text="对比数据", command=self.compare_data, bg='#4CAF50', fg='white', padx=15, pady=8, width=15 ) self.compare_btn.grid(row=4, column=1, columnspan=2, pady=20) # 在对比按钮下方添加导出错误按钮 self.export_btn = tk.Button( self.main_frame, text="导出错误信息", command=self.export_errors, bg='#FF9800', fg='white', padx=15, pady=8, width=15 ) self.export_btn.grid(row=4, column=2, columnspan=2, pady=20) # 添加过滤控制区域(在结果区域上方) self.filter_frame = tk.Frame(root, bg='#f0f0f0') self.filter_frame.pack(fill=tk.X, padx=10, pady=(0,5)) # 添加两个过滤复选框 self.show_correct_var = tk.BooleanVar(value=True) self.show_error_var = tk.BooleanVar(value=True) cb_correct = tk.Checkbutton( self.filter_frame, text="显示正确", variable=self.show_correct_var, command=self.filter_results, bg='#f0f0f0' ) cb_correct.pack(side=tk.LEFT, padx=(10,5)) cb_error = tk.Checkbutton( self.filter_frame, text="显示错误", variable=self.show_error_var, command=self.filter_results, bg='#f0f0f0' ) cb_error.pack(side=tk.LEFT, padx=(5,10)) # 创建结果显示区域 self.result_frame = tk.Frame(root, bg='#f0f0f0') self.result_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=(0,10)) # 添加Treeview显示结果 self.tree = ttk.Treeview(self.result_frame) self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) # 添加滚动条 scrollbar = ttk.Scrollbar(self.result_frame, orient="vertical", command=self.tree.yview) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.tree.configure(yscrollcommand=scrollbar.set) # 状态标签 self.status_label = tk.Label(root, text="就绪", bg='#f0f0f0', anchor='w') self.status_label.pack(fill=tk.X, padx=10, pady=5) def center_window(self, window): window.update_idletasks() width = window.winfo_width() height = window.winfo_height() x = (window.winfo_screenwidth() // 2) - (width // 2) y = (window.winfo_screenheight() // 2) - (height // 2) window.geometry(f"+{x}+{y}") # ===== 主表功能 ===== def open_excel(self): """打开主表文件""" file_path = self.path_label.cget("text") if not file_path or file_path == "未选择主表文件": messagebox.showwarning("警告", "请先选择主表Excel文件") return try: if os.name == 'nt': os.startfile(file_path) elif os.name == 'posix': subprocess.call(('open', file_path)) else: subprocess.call(('xdg-open', file_path)) except Exception as e: messagebox.showerror("错误", f"打开主表文件失败: {str(e)}") def load_excel(self): """加载主表Excel""" file_path = filedialog.askopenfilename( title="选择主表Excel文件", filetypes=[("Excel文件", "*.xlsx *.xls")] ) if not file_path: return try: self.path_label.config(text=file_path) self.select_main_sheet(file_path) except Exception as e: messagebox.showerror("错误", f"读取主表文件失败: {str(e)}") def select_main_sheet(self, file_path): """选择主表工作表""" try: xl = pd.ExcelFile(file_path) sheet_names = xl.sheet_names selector = tk.Toplevel(self.root) selector.title("选择主表工作表") selector.geometry("300x150") tk.Label(selector, text="请选择主表工作表:").pack(pady=10) sheet_var = tk.StringVar(selector) combobox = ttk.Combobox( selector, textvariable=sheet_var, values=sheet_names, state="readonly", width=40 ) combobox.pack(pady=10, padx=20, fill=tk.X) combobox.current(0) tk.Button( selector, text="确认选择", command=lambda: self.process_main_sheet_selection( file_path, sheet_var.get(), selector ), bg='#f0f0f0', fg='black', padx=10, pady=5 ).pack(pady=15) self.center_window(selector) except Exception as e: messagebox.showerror("错误", f"读取主表文件失败: {str(e)}") def process_main_sheet_selection(self, file_path, sheet_name, selector): """处理主表工作表选择结果""" try: df = pd.read_excel(file_path, sheet_name=sheet_name, header=0, skiprows=list(range(0,9))) self.dataset = df.to_dict(orient='records') row_count = len(self.dataset) col_count = len(df.columns) if row_count > 0 else 0 self.sheet_label.config(text=f"{sheet_name} ({row_count}行×{col_count}列)") selector.destroy() messagebox.showinfo("加载成功", f"主表工作表 [{sheet_name}] 已载入\n" f"数据维度: {row_count}行 × {col_count}列" ) print(f"主表数据示例: {self.dataset[0] if self.dataset else '空'}") except Exception as e: messagebox.showerror("错误", f"加载主表数据失败: {str(e)}") # ===== 副表功能 ===== def open_aux_excel(self): """打开副表文件""" file_path = self.aux_path_label.cget("text") if not file_path or file_path == "未选择副表文件": messagebox.showwarning("警告", "请先选择副表Excel文件") return try: if os.name == 'nt': os.startfile(file_path) elif os.name == 'posix': subprocess.call(('open', file_path)) else: subprocess.call(('xdg-open', file_path)) except Exception as e: messagebox.showerror("错误", f"打开副表文件失败: {str(e)}") def load_aux_excel(self): """加载副表Excel""" file_path = filedialog.askopenfilename( title="选择副表Excel文件", filetypes=[("Excel文件", "*.xlsx *.xls")] ) if not file_path: return try: self.aux_path_label.config(text=file_path) self.select_aux_sheet(file_path) except Exception as e: messagebox.showerror("错误", f"读取副表文件失败: {str(e)}") def select_aux_sheet(self, file_path): """选择副表工作表""" try: xl = pd.ExcelFile(file_path) sheet_names = xl.sheet_names selector = tk.Toplevel(self.root) selector.title("选择副表工作表") selector.geometry("300x150") tk.Label(selector, text="请选择副表工作表:").pack(pady=10) sheet_var = tk.StringVar(selector) combobox = ttk.Combobox( selector, textvariable=sheet_var, values=sheet_names, state="readonly", width=40 ) combobox.pack(pady=10, padx=20, fill=tk.X) combobox.current(0) tk.Button( selector, text="确认选择", command=lambda: self.process_aux_sheet_selection( file_path, sheet_var.get(), selector ), bg='#f0f0f0', fg='black', padx=10, pady=5 ).pack(pady=15) self.center_window(selector) except Exception as e: messagebox.showerror("错误", f"读取副表文件失败: {str(e)}") def process_aux_sheet_selection(self, file_path, sheet_name, selector): """处理副表工作表选择结果""" try: #df = pd.read_excel(file_path, sheet_name=sheet_name, header=0, skiprows=list(range(0,9))) df = pd.read_excel(file_path, sheet_name=sheet_name, header=0) self.aux_dataset = df.to_dict(orient='records') row_count = len(self.aux_dataset) col_count = len(df.columns) if row_count > 0 else 0 self.aux_sheet_label.config(text=f"{sheet_name} ({row_count}行×{col_count}列)") selector.destroy() messagebox.showinfo("加载成功", f"副表工作表 [{sheet_name}] 已载入\n" f"数据维度: {row_count}行 × {col_count}列" ) print(f"副表数据示例: {self.aux_dataset[0] if self.aux_dataset else '空'}") except Exception as e: messagebox.showerror("错误", f"加载副表数据失败: {str(e)}") def compare_data(self): """对比主表和副表数据并设置背景色,将空字符串和NaN视为相同""" if not self.dataset or not self.aux_dataset: messagebox.showwarning("警告", "请先加载主表和副表数据") return # 初始化错误行收集列表 self.error_rows = [] self.all_items = [] self.row_status = {} # 辅助函数:判断两个值是否相等 def values_equal(a, b): if (a == "" or a is None or (isinstance(a, float) and math.isnan(a))): return (b == "" or b is None or (isinstance(b, float) and math.isnan(b))) return a == b # 获取列名 main_columns = set(self.dataset[0].keys()) if self.dataset else set() aux_columns = set(self.aux_dataset[0].keys()) if self.aux_dataset else set() # 获取共有列(排除Z2)- 修复:确保列名一致性 common_columns = sorted([col for col in (main_columns & aux_columns) if col != 'Z2']) # 验证Z2列存在 if 'Z2' not in main_columns or 'Z2' not in aux_columns: messagebox.showerror("错误", "主表或副表缺少Z2列") return # 配置Treeview - 修复:确保列配置正确 self.tree["columns"] = ["Z2"] + common_columns self.tree["show"] = "headings" self.tree.heading("Z2", text="Z2(匹配状态)") for col in common_columns: self.tree.heading(col, text=col) # 设置列宽 self.tree.column("Z2", width=200, anchor=tk.CENTER) for col in common_columns: self.tree.column(col, width=100, anchor=tk.CENTER) # 清空现有数据 for item in self.tree.get_children(): self.tree.delete(item) # 创建颜色标记 self.tree.tag_configure('match', background='#DFF0D8') # 绿色匹配 self.tree.tag_configure('mismatch', background='#F8D7DA') # 红色不匹配 self.tree.tag_configure('not_found', background='#F8D7DA') # 红色未找到 self.tree.tag_configure('extra', background='#FFE69C') # 黄色副表多余 # 统计变量 match_count = mismatch_count = not_found_count = extra_count = 0 error_samples = [] # 收集所有错误示例 # 创建映射表 # 收集主表所有Z2值 - 修复:处理None值 main_z2_values = set() for row in self.dataset: z2_value = row.get('Z2') if z2_value is not None: main_z2_values.add(z2_value) # 记录所有副表Z2值和行 aux_map = {} for aux_row in self.aux_dataset: z2_value = aux_row.get('Z2') if z2_value is not None: aux_map.setdefault(z2_value, []).append(aux_row) # 找出副表多余的Z2值 - 修复:确保正确识别多余行 extra_z2_values = [z2 for z2 in aux_map.keys() if z2 not in main_z2_values] # 第一步:处理主表数据 for main_row in self.dataset: main_z2 = main_row.get('Z2') found_in_aux = False all_matched = True mismatched_columns = [] matching_aux_rows = aux_map.get(main_z2, []) if main_z2 is not None else [] if matching_aux_rows: found_in_aux = True row_match = False for aux_row in matching_aux_rows: current_match = True for col in common_columns: main_val = main_row.get(col, '') aux_val = aux_row.get(col, '') if not values_equal(main_val, aux_val): current_match = False if col not in mismatched_columns: mismatched_columns.append(col) if current_match: row_match = True mismatched_columns = [] break all_matched = row_match # 准备显示内容 if not found_in_aux: z2_display = f"{main_z2} ✗ (未找到)" if main_z2 is not None else "✗ (未找到-Z2为空)" not_found_count += 1 status = 'not_found' error_samples.append(f"[主表] Z2={main_z2}: 未找到") elif all_matched: z2_display = f"{main_z2} ✓" match_count += 1 status = 'match' else: error_cols = ", ".join(mismatched_columns[:3]) if len(mismatched_columns) > 3: error_cols += f" 等{len(mismatched_columns)}处" z2_display = f"{main_z2} ✗ ({error_cols})" if main_z2 is not None else f"✗ ({error_cols})" mismatch_count += 1 error_samples.append(f"[主表] Z2={main_z2}: 列不匹配({error_cols})") status = 'mismatch' values = [z2_display] + [main_row.get(col, '') for col in common_columns] item_id = self.tree.insert("", "end", values=values) self.tree.item(item_id, tags=(status,)) self.all_items.append(item_id) self.row_status[item_id] = status if not found_in_aux or not all_matched: self.error_rows.append({ 'status': '未找到' if not found_in_aux else '不匹配', 'main_z2': main_z2, 'main_row': main_row, 'aux_row': matching_aux_rows[0] if matching_aux_rows else None, 'mismatched_columns': mismatched_columns.copy() }) # 第二步:处理副表多余数据 - 修复:确保正确显示副表错误 for z2 in extra_z2_values: for aux_row in aux_map[z2]: extra_count += 1 error_samples.append(f"[副表] Z2={z2}: 多余数据") z2_display = f"{z2} ✗ (副表多余)" values = [z2_display] + [aux_row.get(col, '') for col in common_columns] item_id = self.tree.insert("", "end", values=values) self.tree.item(item_id, tags=('extra',)) self.all_items.append(item_id) self.row_status[item_id] = 'extra' self.error_rows.append({ 'status': '副表多余', 'main_z2': z2, 'main_row': None, 'aux_row': aux_row, 'mismatched_columns': [] }) # 更新状态信息 - 修复:正确处理所有错误类型 detail_text = "" if error_samples: sample_display = "\n".join(error_samples[:3]) # 显示前3个错误示例 if len(error_samples) > 3: sample_display += f"\n...等{len(error_samples)}处错误" detail_text = f"\n错误详情:\n{sample_display}" self.status_label.config( text=(f"对比完成 | 匹配: {match_count}行 | " f"主表错误: {mismatch_count + not_found_count}行 | " f"副表错误: {extra_count}行 | " f"共有列: {', '.join(common_columns)}{detail_text}") ) # 初始显示所有行 self.filter_results() def filter_results(self): """根据复选框状态过滤结果 - 修复:确保副表错误行正确显示""" if not hasattr(self, 'all_items') or not self.all_items: return show_correct = self.show_correct_var.get() show_error = self.show_error_var.get() # 处理所有行的显示/隐藏 for item_id in self.all_items: status = self.row_status[item_id] visible = False if status == 'match' and show_correct: visible = True elif status in ['mismatch', 'not_found', 'extra'] and show_error: visible = True if visible: self.tree.reattach(item_id, '', 'end') else: self.tree.detach(item_id) def export_errors(self): """导出错误信息到Excel文件""" if not hasattr(self, 'error_rows') or not self.error_rows: messagebox.showinfo("提示", "没有需要导出的错误信息") return # 获取副表路径 aux_path = self.aux_path_label.cget("text") if not aux_path or aux_path == "未选择副表文件": messagebox.showwarning("警告", "请先选择副表文件") return # 创建错误文件路径(副表所在目录) aux_dir = os.path.dirname(aux_path) timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") error_filename = f"错误信息_{timestamp}.xlsx" error_path = os.path.join(aux_dir, error_filename) try: # 准备导出数据 export_data = [] # 获取所有列名(主表和副表的并集) all_columns = set() for row in self.error_rows: all_columns.update(row['main_row'].keys()) if row['aux_row']: all_columns.update(row['aux_row'].keys()) # 创建列名映射(避免重复) columns = ['状态', '主表-Z2', '错误列'] for col in sorted(all_columns): if col != 'Z2': columns.extend([f'主表-{col}', f'副表-{col}']) # 填充数据 for row in self.error_rows: data_row = { '状态': row['status'], '主表-Z2': row['main_z2'], '错误列': ', '.join(row['mismatched_columns']) } for col in sorted(all_columns): if col == 'Z2': continue # 主表数据 main_val = row['main_row'].get(col, '') data_row[f'主表-{col}'] = main_val if pd.notnull(main_val) else '' # 副表数据 aux_val = row['aux_row'].get(col, '') if row['aux_row'] else '' data_row[f'副表-{col}'] = aux_val if pd.notnull(aux_val) else '' export_data.append(data_row) # 创建DataFrame并导出 df = pd.DataFrame(export_data, columns=columns) df.to_excel(error_path, index=False) # 提示用户 messagebox.showinfo("导出成功", f"错误信息已导出到:\n{error_path}") # 询问是否打开文件 if messagebox.askyesno("打开文件", "是否打开导出的错误信息文件?"): if os.name == 'nt': os.startfile(error_path) elif os.name == 'posix': subprocess.call(('open', error_path)) else: subprocess.call(('xdg-open', error_path)) except Exception as e: messagebox.showerror("导出错误", f"导出错误信息时出错:\n{str(e)}") if name == “main”: root = tk.Tk() app = ExcelViewerApp(root) root.mainloop()
09-13
[main] 正在生成文件夹: d:/testcode/exp1-1024/build [main] 缺少包含 CMake 缓存的文件夹。将重新生成缓存。 [main] 正在配置项目: exp1-1024 [proc] 正在执行命令: C:/msys64/ucrt64/bin/cmake.exe -DCMAKE_BUILD_TYPE:STRING=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE -DCMAKE_C_COMPILER:FILEPATH=c:\msys64\ucrt64\bin\gcc.exe -DCMAKE_CXX_COMPILER:FILEPATH=c:\msys64\ucrt64\bin\g++.exe --no-warn-unused-cli -S D:/testcode/exp1-1024 -B d:/testcode/exp1-1024/build -G Ninja [cmake] Not searching for unused variables given on the command line. [cmake] -- The C compiler identification is GNU 15.2.0 [cmake] -- The CXX compiler identification is GNU 15.2.0 [cmake] -- Detecting C compiler ABI info [cmake] -- Detecting C compiler ABI info - done [cmake] -- Check for working C compiler: c:/msys64/ucrt64/bin/gcc.exe - skipped [cmake] -- Detecting C compile features [cmake] -- Detecting C compile features - done [cmake] -- Detecting CXX compiler ABI info [cmake] -- Detecting CXX compiler ABI info - done [cmake] -- Check for working CXX compiler: c:/msys64/ucrt64/bin/g++.exe - skipped [cmake] -- Detecting CXX compile features [cmake] -- Detecting CXX compile features - done [cmake] -- Current System: Windows [cmake] -- LIBRARY_PATH: C:/msys64/ucrt64/lib [cmake] -- INCLUDE_PATH: C:/msys64/ucrt64/include [cmake] -- QT_LIBS_PATH: C:/msys64/ucrt64/lib [cmake] -- QT_INCL_PATH: C:/msys64/ucrt64/include [cmake] -- GDAL_DIR: C:/msys64/ucrt64 [cmake] -- Found OpenMP_C: -fopenmp (found version "4.5") [cmake] -- Found OpenMP_CXX: -fopenmp (found version "4.5") [cmake] -- Found OpenMP: TRUE (found version "4.5") [cmake] -- found openmp [cmake] -- dirs: C:/msys64/ucrt64/lib [cmake] -- Qt5 found! [cmake] -- LIBS: Qt5::Core,Qt5::Gui,Qt5::Widgets,Qt5::PrintSupport [cmake] -- ================= Skip QCPLOT EXE ================= [cmake] -- DLib Library: C:/msys64/ucrt64/lib/libdlib.dll.a [cmake] -- Armadillo Library: C:/msys64/ucrt64/lib/libarmadillo.dll.a [cmake] -- Lapack Library: C:/msys64/ucrt64/lib/liblapack.dll.a [cmake] -- Blas Library: C:/msys64/ucrt64/lib/libblas.dll.a [cmake] -- CBlas Library: C:/msys64/ucrt64/lib/libcblas.dll.a [cmake] -- GSL Library: C:/msys64/ucrt64/lib/libgsl.dll.a [cmake] -- GSL CBLAS Library: C:/msys64/ucrt64/lib/libgslcblas.dll.a [cmake] -- QtCore Library: C:/msys64/ucrt64/lib/libQt5Core.dll.a [cmake] -- Configuring done (7.8s) [cmake] -- Generating done (1.2s) [cmake] -- Build files have been written to: D:/testcode/exp1-1024/build [build] 正在启动生成 [proc] 正在执行命令: chcp [proc] 正在执行命令: C:/msys64/ucrt64/bin/cmake.exe --build d:/testcode/exp1-1024/build --config Debug --target all -- [build] [1/4 25% :: 0.165] Automatic MOC and UIC for target exp1 [build] [3/4 50% :: 0.216] Building CXX object CMakeFiles/exp1.dir/exp1_autogen/mocs_compilation.cpp.obj [build] [3/4 75% :: 19.822] Building CXX object CMakeFiles/exp1.dir/src/test/main.cpp.obj [build] In file included from C:/msys64/ucrt64/include/armadillo:26, [build] from D:/testcode/exp1-1024/src/test/main.cpp:2: [build] C:/msys64/ucrt64/include/armadillo_bits/config.hpp:308:111: note: '#pragma message: WARNING: use of ARMA_BLAS_LONG_LONG in conjunction with ARMA_USE_WRAPPER is not supported' [build] 308 | #pragma message ("WARNING: use of ARMA_BLAS_LONG_LONG in conjunction with ARMA_USE_WRAPPER is not supported") [build] | ^ [build] In file included from D:/testcode/exp1-1024/include/Common/Common.h:15, [build] from D:/testcode/exp1-1024/include/Geometry/Ellipsoid2.h:21, [build] from D:/testcode/exp1-1024/include/Geometry/Geometry.h:17, [build] from D:/testcode/exp1-1024/include/FileIO/SLCParameter.h:35, [build] from D:/testcode/exp1-1024/include/FileIO/FileIO.h:14, [build] from D:/testcode/exp1-1024/src/test/main.cpp:4: [build] D:/testcode/exp1-1024/include/Common/setOperation.h:14:17: warning: inline function 'arma::Mat<double> xbinsar::unique_rows(const arma::Mat<double>&, arma::Mat<int>&)' declared as dllimport: attribute ignored [-Wattributes] [build] 14 | unique_rows(const arma::Mat<double>& inMat, arma::Mat<int>& index); [build] | ^~~~~~~~~~~ [build] D:/testcode/exp1-1024/include/Common/setOperation.h:19:17: warning: inline function 'arma::Mat<double> xbinsar::sort_rows(const arma::Mat<double>&, arma::Mat<int>&)' declared as dllimport: attribute ignored [-Wattributes] [build] 19 | sort_rows(const arma::Mat<double>& inMat, arma::Mat<int>& index); [build] | ^~~~~~~~~ [build] D:/testcode/exp1-1024/include/Common/setOperation.h:25:17: warning: inline function 'arma::Mat<float> xbinsar::unique_rows(const arma::Mat<float>&, arma::Mat<int>&)' declared as dllimport: attribute ignored [-Wattributes] [build] 25 | unique_rows(const arma::Mat<float>& inMat, arma::Mat<int>& index); [build] | ^~~~~~~~~~~ [build] D:/testcode/exp1-1024/include/Common/setOperation.h:30:17: warning: inline function 'arma::Mat<float> xbinsar::sort_rows(const arma::Mat<float>&, arma::Mat<int>&)' declared as dllimport: attribute ignored [-Wattributes] [build] 30 | sort_rows(const arma::Mat<float>& inMat, arma::Mat<int>& index); [build] | ^~~~~~~~~ [build] D:/testcode/exp1-1024/include/Common/setOperation.h:36:17: warning: inline function 'arma::Mat<int> xbinsar::unique_rows(const arma::Mat<int>&, arma::Mat<int>&)' declared as dllimport: attribute ignored [-Wattributes] [build] 36 | unique_rows(const arma::Mat<int>& inMat, arma::Mat<int>& index); [build] | ^~~~~~~~~~~ [build] D:/testcode/exp1-1024/include/Common/setOperation.h:41:17: warning: inline function 'arma::Mat<int> xbinsar::sort_rows(const arma::Mat<int>&, arma::Mat<int>&)' declared as dllimport: attribute ignored [-Wattributes] [build] 41 | sort_rows(const arma::Mat<int>& inMat, arma::Mat<int>& index); [build] | ^~~~~~~~~ [build] [4/4 100% :: 20.037] Linking CXX executable D:\testcode\exp1-1024\bin\exp1.exe [build] FAILED: [code=1] D:/testcode/exp1-1024/bin/exp1.exe [build] C:\Windows\system32\cmd.exe /C "cd . && c:\msys64\ucrt64\bin\g++.exe -std=c++17 -g CMakeFiles/exp1.dir/exp1_autogen/mocs_compilation.cpp.obj CMakeFiles/exp1.dir/src/test/main.cpp.obj -o D:\testcode\exp1-1024\bin\exp1.exe -Wl,--out-implib,D:\testcode\exp1-1024\lib\libexp1.dll.a -Wl,--major-image-version,0,--minor-image-version,0 C:/msys64/ucrt64/lib/libdlib.dll.a C:/msys64/ucrt64/lib/libarmadillo.dll.a C:/msys64/ucrt64/lib/liblapack.dll.a C:/msys64/ucrt64/lib/libblas.dll.a C:/msys64/ucrt64/lib/libcblas.dll.a C:/msys64/ucrt64/lib/libgsl.dll.a C:/msys64/ucrt64/lib/libgslcblas.dll.a C:/msys64/ucrt64/lib/libQt5Core.dll.a D:/testcode/exp1-1024/lib/libCommon.dll.a D:/testcode/exp1-1024/lib/libGeometry.dll.a D:/testcode/exp1-1024/lib/libFileIO.dll.a D:/testcode/exp1-1024/lib/libDataImport.dll.a D:/testcode/exp1-1024/lib/libCoreg.dll.a D:/testcode/exp1-1024/lib/libSARTools.dll.a D:/testcode/exp1-1024/lib/libGeo.dll.a D:/testcode/exp1-1024/lib/libBasicTools.dll.a -lkernel32 -luser32 -lgdi32 -lwinspool -lshell32 -lole32 -loleaut32 -luuid -lcomdlg32 -ladvapi32 && cd ." [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: CMakeFiles/exp1.dir/src/test/main.cpp.obj: in function `main': [build] D:/testcode/exp1-1024/src/test/main.cpp:58:(.text+0x4dc): undefined reference to `image_downscale_by_lin2di(QString const&, QString const&, QString&)' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: CMakeFiles/exp1.dir/src/test/main.cpp.obj: in function `read_tiff(QString const&, double**&, int&, int&, QString&)': [build] D:/testcode/exp1-1024/src/test/main.cpp:214:(.text+0xac6): undefined reference to `GDALAllRegister' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:218:(.text+0xaef): undefined reference to `GDALOpen' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:226:(.text+0xb56): undefined reference to `GDALDataset::GetRasterXSize() const' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:227:(.text+0xb68): undefined reference to `GDALDataset::GetRasterYSize() const' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:228:(.text+0xb7f): undefined reference to `GDALDataset::GetRasterBand(int)' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:232:(.text+0xba9): undefined reference to `GDALClose' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:242:(.text+0xcb0): undefined reference to `GDALRasterBand::RasterIO(GDALRWFlag, int, int, int, int, void*, int, int, GDALDataType, long long, long long, GDALRasterIOExtraArg*)' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:252:(.text+0xdcd): undefined reference to `GDALClose' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:258:(.text+0xdf3): undefined reference to `GDALClose' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: CMakeFiles/exp1.dir/src/test/main.cpp.obj: in function `image_downscale(QString const&, QString const&, QString&)': [build] D:/testcode/exp1-1024/src/test/main.cpp:323:(.text+0x1336): undefined reference to `GDALAllRegister' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:324:(.text+0x133b): undefined reference to `GetGDALDriverManager' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:324:(.text+0x134d): undefined reference to `GDALDriverManager::GetDriverByName(char const*)' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:339:(.text+0x14c4): undefined reference to `GDALDriver::Create(char const*, int, int, int, GDALDataType, char const* const*)' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:356:(.text+0x1629): undefined reference to `GDALDataset::GetRasterBand(int)' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:359:(.text+0x16c6): undefined reference to `GDALRasterBand::RasterIO(GDALRWFlag, int, int, int, int, void*, int, int, GDALDataType, long long, long long, GDALRasterIOExtraArg*)' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:364:(.text+0x176c): undefined reference to `GDALClose' [build] C:/msys64/ucrt64/bin/../lib/gcc/x86_64-w64-mingw32/15.2.0/../../../../x86_64-w64-mingw32/bin/ld.exe: D:/testcode/exp1-1024/src/test/main.cpp:377:(.text+0x188a): undefined reference to `GDALClose' [build] collect2.exe: error: ld returned 1 exit status [build] ninja: build stopped: subcommand failed. [proc] 命令“C:/msys64/ucrt64/bin/cmake.exe --build d:/testcode/exp1-1024/build --config Debug --target all --”已退出,代码为 1 [driver] 生成完毕: 00:00:20.098 [build] 生成已完成,退出代码为 1
最新发布
11-13
<think>好的,用户在使用Hugging Face的datasets库时遇到了问题,特别是load_datasetload_from_disk函数报错。我需要根据提供的引用信息和常见解决方法,一步步分析可能的原因和解决方案。 首先,引用[1]提到,某些数据集需要手动下载,导致load_dataset抛出AssertionError。这时候用户需要按照提示下载文件,然后用data_dir参数指定路径。所以第一个步骤应该是检查错误信息中的提示,手动下载数据集,并通过data_dir参数正确指向路径。 接下来,引用[2]指出git下载的文件无法直接使用,load_dataset执行时会失败,尤其是通过Google Drive下载的情况。这可能是因为数据集格式不符合要求,或者下载方式不正确。用户需要确认数据集是否来自官方源,并且下载后的文件结构正确。另外,如果用户试图用load_from_disk加载非dist格式的文件夹,也会出错,所以需要确保文件夹是正确保存的格式。 然后,关于参数的使用,比如data_dir、data_files和cache_dir。data_dir用于指定包含数据文件的目录,data_files可以直接指定文件路径或模式,而cache_dir可以设置缓存路径避免权限问题。用户需要检查这些参数是否正确使用,路径是否存在,文件是否完整。 编码问题方面,如果数据集文件使用GBK编码,而默认是UTF-8,可能需要指定encoding参数。但有些情况下,比如处理CSV文件时,可能需要先转换编码格式,或者使用正确的编码参数,比如encoding='gbk'。 另外,引用[3]中提到的模型加载方式可能不直接相关,但分片加载的思路可能有参考价值。不过这里可能不需要涉及模型加载部分。 总结可能的解决步骤:手动下载数据集并指定路径、检查数据集的来源和格式、正确使用参数、处理编码问题、确保缓存目录正确,以及处理网络问题。需要逐一排查这些可能性,并提供具体的代码示例帮助用户调整参数。 最后,生成相关问题时,需要围绕数据集加载错误、参数使用、编码问题、缓存处理等方面展开,确保覆盖用户可能遇到的其他相关问题。</think>### 分步解决Hugging Face datasets加载数据集报错问题 #### 一、手动下载数据集并指定路径 当遇到`AssertionError`时,需根据错误提示手动下载数据集文件,并使用`data_dir`参数指定路径: ```python from datasets import load_dataset # 示例:手动下载后指定文件路径 dataset = load_dataset("数据集名称", data_dir="/path/to/manually_downloaded_files") ``` 引用[1]指出,某些数据集因许可问题需手动下载后通过`data_dir`参数指定路径[^1]。 #### 二、正确处理数据集格式 1. **避免直接使用Git/Google Drive文件** 若数据集通过`git`或Google Drive下载,需确保文件格式符合标准。例如: ```python # 使用data_files参数直接指定文件路径 dataset = load_dataset("csv", data_files="/path/to/data.csv") ``` 引用[2]强调,非标准格式文件会导致`load_from_disk`失败[^2]。 2. **验证数据集格式** 使用`load_from_disk`前需确保数据集通过`save_to_disk`保存: ```python dataset.save_to_disk("/path/to/saved_dataset") reloaded_dataset = load_from_disk("/path/to/saved_dataset") ``` #### 三、参数配置优化 | 参数名 | 作用场景 | 示例值 | |--------------|-------------------------------|---------------------------| | `data_dir` | 手动下载文件的目录 | `data_dir="data/"` | | `data_files` | 直接指定文件路径或通配符 | `data_files=["data/*.csv"]`| | `cache_dir` | 自定义缓存目录(避免权限问题) | `cache_dir="./custom_cache"`| | `encoding` | 处理GBK编码文件 | `encoding="gbk"` | #### 四、编码问题解决方案 若数据集文件为GBK编码: ```python # 处理CSV文件时指定编码 dataset = load_dataset("csv", data_files="data_gbk.csv", encoding="gbk") ``` 若报错`UnicodeDecodeError`,建议将文件转换为UTF-8编码后再加载。 #### 五、其他常见问题处理 - **网络连接问题**:使用`HF_ENDPOINT=https://hf-mirror.com`设置镜像源加速下载 - **缓存清理**:通过`rm -rf ~/.cache/huggingface/datasets`清除损坏的缓存文件 $$ \text{错误处理流程:检查参数配置} \rightarrow \text{验证文件格式} \rightarrow \text{手动下载或编码转换} $$
评论 1
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值