Visual Studio Code (1.106.2, undefined, desktop)
Jupyter Extension Version: 2025.9.1.
Python Extension Version: 2025.18.0.
Python Environment Extension Version: 1.12.0.
Pylance Extension Version: 2025.9.1.
Platform: win32 (x64).
Home = c:\Users\13636
Temp Storage folder ~\AppData\Roaming\Code\User\globalStorage\ms-toolsai.jupyter\version-2025.9.1
Workspace folder f:\Programmer\python\MyAI
13:06:47.445 [info] Starting Kernel (Python Path: e:\Python311\python11\python.exe, Unknown, 3.11.9) for 'f:\Programmer\python\MyAI\train_sakura.ipynb' (disableUI=true)
13:06:47.817 [warn] Failed to get activated env vars for e:\Python311\python11\python.exe in 267ms
13:06:47.821 [warn] Failed to get activated env vars for e:\Python311\python11\python.exe in 271ms
13:06:47.871 [info] Process Execution: e:\Python311\python11\python.exe -c "import site;print("USER_BASE_VALUE");print(site.USER_SITE);print("USER_BASE_VALUE");"
13:06:47.968 [info] Process Execution: e:\Python311\python11\python.exe -m pip list
13:06:47.974 [info] Process Execution: e:\Python311\python11\python.exe -c "import ipykernel; print(ipykernel.__version__); print("5dc3a68c-e34e-4080-9c3e-2a532b2ccb4d"); print(ipykernel.__file__)"
13:06:47.983 [info] Process Execution: e:\Python311\python11\python.exe ~\.vscode\extensions\ms-toolsai.jupyter-2025.9.1-win32-x64\pythonFiles\vscode_datascience_helpers\kernel_interrupt_daemon.py --ppid 3168
> cwd: ~\.vscode\extensions\ms-toolsai.jupyter-2025.9.1-win32-x64\pythonFiles\vscode_datascience_helpers
13:06:50.567 [info] Process Execution: e:\Python311\python11\python.exe -m ipykernel_launcher --f=~\AppData\Roaming\jupyter\runtime\kernel-v32c5d7b38bf28bbca760de1bcdeb909b302a795d1.json
> cwd: f:\Programmer\python\MyAI
13:06:52.321 [info] Kernel successfully started
13:06:52.331 [info] Process Execution: e:\Python311\python11\python.exe ~\.vscode\extensions\ms-toolsai.jupyter-2025.9.1-win32-x64\pythonFiles\printJupyterDataDir.py
13:11:46.790 [info] Interrupt kernel execution
13:11:46.790 [info] Interrupt requested f:\Programmer\python\MyAI\train_sakura.ipynb
13:11:46.790 [info] Interrupt kernel execution
13:11:46.790 [info] Interrupting kernel: python3119jvsc74a57bd0c80a7e5537753b1d2c99e421d729a0b214b8c11307fb1943043dc1eba0a677b8
13:11:46.790 [info] Interrupting kernel via custom event (Win32)
13:11:56.792 [info] Interrupt requested & sent for f:\Programmer\python\MyAI\train_sakura.ipynb in notebookEditor.
13:12:00.258 [info] Restart requested f:\Programmer\python\MyAI\train_sakura.ipynb
13:12:00.269 [info] Process Execution: c:\WINDOWS\System32\taskkill.exe /F /T /PID 17992
13:12:00.344 [warn] Failed to get activated env vars for e:\Python311\python11\python.exe in 73ms
13:12:00.350 [info] Process Execution: e:\Python311\python11\python.exe -c "import ipykernel; print(ipykernel.__version__); print("5dc3a68c-e34e-4080-9c3e-2a532b2ccb4d"); print(ipykernel.__file__)"
13:12:00.360 [info] Process Execution: e:\Python311\python11\python.exe -m ipykernel_launcher --f=~\AppData\Roaming\jupyter\runtime\kernel-v38bdf80aa4fbeb1a2e1af040d7fc75b420477ba7a.json
> cwd: f:\Programmer\python\MyAI
13:12:01.509 [info] Restarted 53607fdc-7887-4227-839f-45729ad69e01
13:46:17.218 [warn] Cell completed with errors Iu [Error]: name 'AutoTokenizer' is not defined
at n.execute (F:\data\.vscode\extensions\ms-toolsai.jupyter-2025.9.1-win32-x64\dist\extension.node.js:302:4958) {
ename: 'NameError',
evalue: "name 'AutoTokenizer' is not defined",
traceback: [
'\x1B[31m---------------------------------------------------------------------------\x1B[39m',
'\x1B[31mNameError\x1B[39m Traceback (most recent call last)',
'\x1B[36mCell\x1B[39m\x1B[36m \x1B[39m\x1B[32mIn[1]\x1B[39m\x1B[32m, line 3\x1B[39m\n' +
'\x1B[32m 1\x1B[39m \x1B[38;5;66;03m# 1. 加载 tokenizer 和 model\x1B[39;00m\n' +
'\x1B[32m 2\x1B[39m model_name = \x1B[33m"\x1B[39m\x1B[33mqwen/Qwen3-8B\x1B[39m\x1B[33m"\x1B[39m\n' +
'\x1B[32m----> \x1B[39m\x1B[32m3\x1B[39m tokenizer = \x1B[43mAutoTokenizer\x1B[49m.from_pretrained(model_name, trust_remote_code=\x1B[38;5;28;01mTrue\x1B[39;00m, use_fast=\x1B[38;5;28;01mFalse\x1B[39;00m)\n' +
'\x1B[32m 4\x1B[39m model = AutoModelForCausalLM.from_pretrained(\n' +
'\x1B[32m 5\x1B[39m model_name,\n' +
'\x1B[32m 6\x1B[39m dtype=torch.bfloat16,\n' +
'\x1B[32m 7\x1B[39m device_map=\x1B[33m"\x1B[39m\x1B[33mauto\x1B[39m\x1B[33m"\x1B[39m,\n' +
'\x1B[32m 8\x1B[39m trust_remote_code=\x1B[38;5;28;01mTrue\x1B[39;00m\n' +
'\x1B[32m 9\x1B[39m )\n',
"\x1B[31mNameError\x1B[39m: name 'AutoTokenizer' is not defined"
]
}
13:47:06.017 [warn] Cell completed with errors Iu [Error]: An error occurred while generating the dataset
at n.execute (F:\data\.vscode\extensions\ms-toolsai.jupyter-2025.9.1-win32-x64\dist\extension.node.js:302:4958) {
ename: 'DatasetGenerationError',
evalue: 'An error occurred while generating the dataset',
traceback: [
'\x1B[31m---------------------------------------------------------------------------\x1B[39m',
'\x1B[31mValueError\x1B[39m Traceback (most recent call last)',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\packaged_modules\\json\\json.py:174\x1B[39m, in \x1B[36mJson._generate_tables\x1B[39m\x1B[34m(self, files)\x1B[39m\n' +
'\x1B[32m 171\x1B[39m \x1B[38;5;28;01mwith\x1B[39;00m \x1B[38;5;28mopen\x1B[39m(\n' +
'\x1B[32m 172\x1B[39m file, encoding=\x1B[38;5;28mself\x1B[39m.config.encoding, errors=\x1B[38;5;28mself\x1B[39m.config.encoding_errors\n' +
'\x1B[32m 173\x1B[39m ) \x1B[38;5;28;01mas\x1B[39;00m f:\n' +
'\x1B[32m--> \x1B[39m\x1B[32m174\x1B[39m df = \x1B[43mpandas_read_json\x1B[49m\x1B[43m(\x1B[49m\x1B[43mf\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 175\x1B[39m \x1B[38;5;28;01mexcept\x1B[39;00m \x1B[38;5;167;01mValueError\x1B[39;00m:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\packaged_modules\\json\\json.py:38\x1B[39m, in \x1B[36mpandas_read_json\x1B[39m\x1B[34m(path_or_buf, **kwargs)\x1B[39m\n' +
'\x1B[32m 37\x1B[39m kwargs[\x1B[33m"\x1B[39m\x1B[33mdtype_backend\x1B[39m\x1B[33m"\x1B[39m] = \x1B[33m"\x1B[39m\x1B[33mpyarrow\x1B[39m\x1B[33m"\x1B[39m\n' +
'\x1B[32m---> \x1B[39m\x1B[32m38\x1B[39m \x1B[38;5;28;01mreturn\x1B[39;00m \x1B[43mpd\x1B[49m\x1B[43m.\x1B[49m\x1B[43mread_json\x1B[49m\x1B[43m(\x1B[49m\x1B[43mpath_or_buf\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mkwargs\x1B[49m\x1B[43m)\x1B[49m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:815\x1B[39m, in \x1B[36mread_json\x1B[39m\x1B[34m(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, precise_float, date_unit, encoding, encoding_errors, lines, chunksize, compression, nrows, storage_options, dtype_backend, engine)\x1B[39m\n' +
'\x1B[32m 814\x1B[39m \x1B[38;5;28;01melse\x1B[39;00m:\n' +
'\x1B[32m--> \x1B[39m\x1B[32m815\x1B[39m \x1B[38;5;28;01mreturn\x1B[39;00m \x1B[43mjson_reader\x1B[49m\x1B[43m.\x1B[49m\x1B[43mread\x1B[49m\x1B[43m(\x1B[49m\x1B[43m)\x1B[49m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:1014\x1B[39m, in \x1B[36mJsonReader.read\x1B[39m\x1B[34m(self)\x1B[39m\n' +
'\x1B[32m 1013\x1B[39m \x1B[38;5;28;01melse\x1B[39;00m:\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1014\x1B[39m obj = \x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_get_object_parser\x1B[49m\x1B[43m(\x1B[49m\x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43mdata\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 1015\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m \x1B[38;5;28mself\x1B[39m.dtype_backend \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;129;01mnot\x1B[39;00m lib.no_default:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:1040\x1B[39m, in \x1B[36mJsonReader._get_object_parser\x1B[39m\x1B[34m(self, json)\x1B[39m\n' +
'\x1B[32m 1039\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m typ == \x1B[33m"\x1B[39m\x1B[33mframe\x1B[39m\x1B[33m"\x1B[39m:\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1040\x1B[39m obj = \x1B[43mFrameParser\x1B[49m\x1B[43m(\x1B[49m\x1B[43mjson\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mkwargs\x1B[49m\x1B[43m)\x1B[49m\x1B[43m.\x1B[49m\x1B[43mparse\x1B[49m\x1B[43m(\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 1042\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m typ == \x1B[33m"\x1B[39m\x1B[33mseries\x1B[39m\x1B[33m"\x1B[39m \x1B[38;5;129;01mor\x1B[39;00m obj \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;28;01mNone\x1B[39;00m:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:1176\x1B[39m, in \x1B[36mParser.parse\x1B[39m\x1B[34m(self)\x1B[39m\n' +
'\x1B[32m 1174\x1B[39m \x1B[38;5;129m@final\x1B[39m\n' +
'\x1B[32m 1175\x1B[39m \x1B[38;5;28;01mdef\x1B[39;00m\x1B[38;5;250m \x1B[39m\x1B[34mparse\x1B[39m(\x1B[38;5;28mself\x1B[39m):\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1176\x1B[39m \x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_parse\x1B[49m\x1B[43m(\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 1178\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m \x1B[38;5;28mself\x1B[39m.obj \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;28;01mNone\x1B[39;00m:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:1392\x1B[39m, in \x1B[36mFrameParser._parse\x1B[39m\x1B[34m(self)\x1B[39m\n' +
'\x1B[32m 1390\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m orient == \x1B[33m"\x1B[39m\x1B[33mcolumns\x1B[39m\x1B[33m"\x1B[39m:\n' +
'\x1B[32m 1391\x1B[39m \x1B[38;5;28mself\x1B[39m.obj = DataFrame(\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1392\x1B[39m \x1B[43mujson_loads\x1B[49m\x1B[43m(\x1B[49m\x1B[43mjson\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mprecise_float\x1B[49m\x1B[43m=\x1B[49m\x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43mprecise_float\x1B[49m\x1B[43m)\x1B[49m, dtype=\x1B[38;5;28;01mNone\x1B[39;00m\n' +
'\x1B[32m 1393\x1B[39m )\n' +
'\x1B[32m 1394\x1B[39m \x1B[38;5;28;01melif\x1B[39;00m orient == \x1B[33m"\x1B[39m\x1B[33msplit\x1B[39m\x1B[33m"\x1B[39m:\n',
'\x1B[31mValueError\x1B[39m: Trailing data',
'\nDuring handling of the above exception, another exception occurred:\n',
'\x1B[31mArrowInvalid\x1B[39m Traceback (most recent call last)',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:1818\x1B[39m, in \x1B[36mArrowBasedBuilder._prepare_split_single\x1B[39m\x1B[34m(self, gen_kwargs, fpath, file_format, max_shard_size, job_id)\x1B[39m\n' +
'\x1B[32m 1817\x1B[39m _time = time.time()\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1818\x1B[39m \x1B[43m\x1B[49m\x1B[38;5;28;43;01mfor\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43m_\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mtable\x1B[49m\x1B[43m \x1B[49m\x1B[38;5;129;43;01min\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mgenerator\x1B[49m\x1B[43m:\x1B[49m\n' +
'\x1B[32m 1819\x1B[39m \x1B[43m \x1B[49m\x1B[38;5;28;43;01mif\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mmax_shard_size\x1B[49m\x1B[43m \x1B[49m\x1B[38;5;129;43;01mis\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[38;5;129;43;01mnot\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[38;5;28;43;01mNone\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[38;5;129;43;01mand\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mwriter\x1B[49m\x1B[43m.\x1B[49m\x1B[43m_num_bytes\x1B[49m\x1B[43m \x1B[49m\x1B[43m>\x1B[49m\x1B[43m \x1B[49m\x1B[43mmax_shard_size\x1B[49m\x1B[43m:\x1B[49m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\packaged_modules\\json\\json.py:177\x1B[39m, in \x1B[36mJson._generate_tables\x1B[39m\x1B[34m(self, files)\x1B[39m\n' +
`\x1B[32m 176\x1B[39m logger.error(\x1B[33mf\x1B[39m\x1B[33m"\x1B[39m\x1B[33mFailed to load JSON from file \x1B[39m\x1B[33m'\x1B[39m\x1B[38;5;132;01m{\x1B[39;00mfile\x1B[38;5;132;01m}\x1B[39;00m\x1B[33m'\x1B[39m\x1B[33m with error \x1B[39m\x1B[38;5;132;01m{\x1B[39;00m\x1B[38;5;28mtype\x1B[39m(e)\x1B[38;5;132;01m}\x1B[39;00m\x1B[33m: \x1B[39m\x1B[38;5;132;01m{\x1B[39;00me\x1B[38;5;132;01m}\x1B[39;00m\x1B[33m"\x1B[39m)\n` +
'\x1B[32m--> \x1B[39m\x1B[32m177\x1B[39m \x1B[38;5;28;01mraise\x1B[39;00m e\n' +
'\x1B[32m 178\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m df.columns.tolist() == [\x1B[32m0\x1B[39m]:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\packaged_modules\\json\\json.py:151\x1B[39m, in \x1B[36mJson._generate_tables\x1B[39m\x1B[34m(self, files)\x1B[39m\n' +
'\x1B[32m 150\x1B[39m \x1B[38;5;28;01mtry\x1B[39;00m:\n' +
'\x1B[32m--> \x1B[39m\x1B[32m151\x1B[39m pa_table = \x1B[43mpaj\x1B[49m\x1B[43m.\x1B[49m\x1B[43mread_json\x1B[49m\x1B[43m(\x1B[49m\n' +
'\x1B[32m 152\x1B[39m \x1B[43m \x1B[49m\x1B[43mio\x1B[49m\x1B[43m.\x1B[49m\x1B[43mBytesIO\x1B[49m\x1B[43m(\x1B[49m\x1B[43mbatch\x1B[49m\x1B[43m)\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mread_options\x1B[49m\x1B[43m=\x1B[49m\x1B[43mpaj\x1B[49m\x1B[43m.\x1B[49m\x1B[43mReadOptions\x1B[49m\x1B[43m(\x1B[49m\x1B[43mblock_size\x1B[49m\x1B[43m=\x1B[49m\x1B[43mblock_size\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 153\x1B[39m \x1B[43m \x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 154\x1B[39m \x1B[38;5;28;01mbreak\x1B[39;00m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pyarrow\\_json.pyx:342\x1B[39m, in \x1B[36mpyarrow._json.read_json\x1B[39m\x1B[34m()\x1B[39m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pyarrow\\error.pxi:155\x1B[39m, in \x1B[36mpyarrow.lib.pyarrow_internal_check_status\x1B[39m\x1B[34m()\x1B[39m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pyarrow\\error.pxi:92\x1B[39m, in \x1B[36mpyarrow.lib.check_status\x1B[39m\x1B[34m()\x1B[39m\n',
'\x1B[31mArrowInvalid\x1B[39m: JSON parse error: Invalid value. in row 14',
'\nThe above exception was the direct cause of the following exception:\n',
'\x1B[31mDatasetGenerationError\x1B[39m Traceback (most recent call last)',
'\x1B[36mCell\x1B[39m\x1B[36m \x1B[39m\x1B[32mIn[5]\x1B[39m\x1B[32m, line 2\x1B[39m\n' +
'\x1B[32m 1\x1B[39m \x1B[38;5;66;03m# 3. 从JSONL文件加载数据集\x1B[39;00m\n' +
'\x1B[32m----> \x1B[39m\x1B[32m2\x1B[39m dataset = \x1B[43mload_dataset\x1B[49m\x1B[43m(\x1B[49m\x1B[33;43m"\x1B[39;49m\x1B[33;43mjson\x1B[39;49m\x1B[33;43m"\x1B[39;49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mdata_files\x1B[49m\x1B[43m=\x1B[49m\x1B[33;43m"\x1B[39;49m\x1B[33;43mtrain_data.jsonl\x1B[39;49m\x1B[33;43m"\x1B[39;49m\x1B[43m)\x1B[49m[\x1B[33m"\x1B[39m\x1B[33mtrain\x1B[39m\x1B[33m"\x1B[39m] \x1B[38;5;66;03m# 加载JSONL文件\x1B[39;00m\n' +
'\x1B[32m 4\x1B[39m \x1B[38;5;66;03m# Tokenize 数据\x1B[39;00m\n' +
'\x1B[32m 5\x1B[39m \x1B[38;5;28;01mdef\x1B[39;00m\x1B[38;5;250m \x1B[39m\x1B[34mtokenize_function\x1B[39m(examples):\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\load.py:1417\x1B[39m, in \x1B[36mload_dataset\x1B[39m\x1B[34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, keep_in_memory, save_infos, revision, token, streaming, num_proc, storage_options, **config_kwargs)\x1B[39m\n' +
'\x1B[32m 1414\x1B[39m \x1B[38;5;28;01mreturn\x1B[39;00m builder_instance.as_streaming_dataset(split=split)\n' +
'\x1B[32m 1416\x1B[39m \x1B[38;5;66;03m# Download and prepare data\x1B[39;00m\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1417\x1B[39m \x1B[43mbuilder_instance\x1B[49m\x1B[43m.\x1B[49m\x1B[43mdownload_and_prepare\x1B[49m\x1B[43m(\x1B[49m\n' +
'\x1B[32m 1418\x1B[39m \x1B[43m \x1B[49m\x1B[43mdownload_config\x1B[49m\x1B[43m=\x1B[49m\x1B[43mdownload_config\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1419\x1B[39m \x1B[43m \x1B[49m\x1B[43mdownload_mode\x1B[49m\x1B[43m=\x1B[49m\x1B[43mdownload_mode\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1420\x1B[39m \x1B[43m \x1B[49m\x1B[43mverification_mode\x1B[49m\x1B[43m=\x1B[49m\x1B[43mverification_mode\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1421\x1B[39m \x1B[43m \x1B[49m\x1B[43mnum_proc\x1B[49m\x1B[43m=\x1B[49m\x1B[43mnum_proc\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1422\x1B[39m \x1B[43m \x1B[49m\x1B[43mstorage_options\x1B[49m\x1B[43m=\x1B[49m\x1B[43mstorage_options\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1423\x1B[39m \x1B[43m\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 1425\x1B[39m \x1B[38;5;66;03m# Build dataset for splits\x1B[39;00m\n' +
'\x1B[32m 1426\x1B[39m keep_in_memory = (\n' +
'\x1B[32m 1427\x1B[39m keep_in_memory \x1B[38;5;28;01mif\x1B[39;00m keep_in_memory \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;129;01mnot\x1B[39;00m \x1B[38;5;28;01mNone\x1B[39;00m \x1B[38;5;28;01melse\x1B[39;00m is_small_dataset(builder_instance.info.dataset_size)\n' +
'\x1B[32m 1428\x1B[39m )\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:897\x1B[39m, in \x1B[36mDatasetBuilder.download_and_prepare\x1B[39m\x1B[34m(self, output_dir, download_config, download_mode, verification_mode, dl_manager, base_path, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)\x1B[39m\n' +
'\x1B[32m 895\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m num_proc \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;129;01mnot\x1B[39;00m \x1B[38;5;28;01mNone\x1B[39;00m:\n' +
'\x1B[32m 896\x1B[39m prepare_split_kwargs[\x1B[33m"\x1B[39m\x1B[33mnum_proc\x1B[39m\x1B[33m"\x1B[39m] = num_proc\n' +
'\x1B[32m--> \x1B[39m\x1B[32m897\x1B[39m \x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_download_and_prepare\x1B[49m\x1B[43m(\x1B[49m\n' +
'\x1B[32m 898\x1B[39m \x1B[43m \x1B[49m\x1B[43mdl_manager\x1B[49m\x1B[43m=\x1B[49m\x1B[43mdl_manager\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 899\x1B[39m \x1B[43m \x1B[49m\x1B[43mverification_mode\x1B[49m\x1B[43m=\x1B[49m\x1B[43mverification_mode\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 900\x1B[39m \x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mprepare_split_kwargs\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 901\x1B[39m \x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mdownload_and_prepare_kwargs\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 902\x1B[39m \x1B[43m\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 903\x1B[39m \x1B[38;5;66;03m# Sync info\x1B[39;00m\n' +
'\x1B[32m 904\x1B[39m \x1B[38;5;28mself\x1B[39m.info.dataset_size = \x1B[38;5;28msum\x1B[39m(split.num_bytes \x1B[38;5;28;01mfor\x1B[39;00m split \x1B[38;5;129;01min\x1B[39;00m \x1B[38;5;28mself\x1B[39m.info.splits.values())\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:973\x1B[39m, in \x1B[36mDatasetBuilder._download_and_prepare\x1B[39m\x1B[34m(self, dl_manager, verification_mode, **prepare_split_kwargs)\x1B[39m\n' +
'\x1B[32m 969\x1B[39m split_dict.add(split_generator.split_info)\n' +
'\x1B[32m 971\x1B[39m \x1B[38;5;28;01mtry\x1B[39;00m:\n' +
'\x1B[32m 972\x1B[39m \x1B[38;5;66;03m# Prepare split will record examples associated to the split\x1B[39;00m\n' +
'\x1B[32m--> \x1B[39m\x1B[32m973\x1B[39m \x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_prepare_split\x1B[49m\x1B[43m(\x1B[49m\x1B[43msplit_generator\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mprepare_split_kwargs\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 974\x1B[39m \x1B[38;5;28;01mexcept\x1B[39;00m \x1B[38;5;167;01mOSError\x1B[39;00m \x1B[38;5;28;01mas\x1B[39;00m e:\n' +
'\x1B[32m 975\x1B[39m \x1B[38;5;28;01mraise\x1B[39;00m \x1B[38;5;167;01mOSError\x1B[39;00m(\n' +
'\x1B[32m 976\x1B[39m \x1B[33m"\x1B[39m\x1B[33mCannot find data file. \x1B[39m\x1B[33m"\x1B[39m\n' +
'\x1B[32m 977\x1B[39m + (\x1B[38;5;28mself\x1B[39m.manual_download_instructions \x1B[38;5;129;01mor\x1B[39;00m \x1B[33m"\x1B[39m\x1B[33m"\x1B[39m)\n' +
'\x1B[32m 978\x1B[39m + \x1B[33m"\x1B[39m\x1B[38;5;130;01m\\n\x1B[39;00m\x1B[33mOriginal error:\x1B[39m\x1B[38;5;130;01m\\n\x1B[39;00m\x1B[33m"\x1B[39m\n' +
'\x1B[32m 979\x1B[39m + \x1B[38;5;28mstr\x1B[39m(e)\n' +
'\x1B[32m 980\x1B[39m ) \x1B[38;5;28;01mfrom\x1B[39;00m\x1B[38;5;250m \x1B[39m\x1B[38;5;28;01mNone\x1B[39;00m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:1705\x1B[39m, in \x1B[36mArrowBasedBuilder._prepare_split\x1B[39m\x1B[34m(self, split_generator, file_format, num_proc, max_shard_size)\x1B[39m\n' +
'\x1B[32m 1703\x1B[39m job_id = \x1B[32m0\x1B[39m\n' +
'\x1B[32m 1704\x1B[39m \x1B[38;5;28;01mwith\x1B[39;00m pbar:\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1705\x1B[39m \x1B[43m \x1B[49m\x1B[38;5;28;43;01mfor\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mjob_id\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mdone\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mcontent\x1B[49m\x1B[43m \x1B[49m\x1B[38;5;129;43;01min\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_prepare_split_single\x1B[49m\x1B[43m(\x1B[49m\n' +
'\x1B[32m 1706\x1B[39m \x1B[43m \x1B[49m\x1B[43mgen_kwargs\x1B[49m\x1B[43m=\x1B[49m\x1B[43mgen_kwargs\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mjob_id\x1B[49m\x1B[43m=\x1B[49m\x1B[43mjob_id\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43m_prepare_split_args\x1B[49m\n' +
'\x1B[32m 1707\x1B[39m \x1B[43m \x1B[49m\x1B[43m)\x1B[49m\x1B[43m:\x1B[49m\n' +
'\x1B[32m 1708\x1B[39m \x1B[43m \x1B[49m\x1B[38;5;28;43;01mif\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mdone\x1B[49m\x1B[43m:\x1B[49m\n' +
'\x1B[32m 1709\x1B[39m \x1B[43m \x1B[49m\x1B[43mresult\x1B[49m\x1B[43m \x1B[49m\x1B[43m=\x1B[49m\x1B[43m \x1B[49m\x1B[43mcontent\x1B[49m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:1861\x1B[39m, in \x1B[36mArrowBasedBuilder._prepare_split_single\x1B[39m\x1B[34m(self, gen_kwargs, fpath, file_format, max_shard_size, job_id)\x1B[39m\n' +
'\x1B[32m 1859\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m \x1B[38;5;28misinstance\x1B[39m(e, DatasetGenerationError):\n' +
'\x1B[32m 1860\x1B[39m \x1B[38;5;28;01mraise\x1B[39;00m\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1861\x1B[39m \x1B[38;5;28;01mraise\x1B[39;00m DatasetGenerationError(\x1B[33m"\x1B[39m\x1B[33mAn error occurred while generating the dataset\x1B[39m\x1B[33m"\x1B[39m) \x1B[38;5;28;01mfrom\x1B[39;00m\x1B[38;5;250m \x1B[39m\x1B[34;01me\x1B[39;00m\n' +
'\x1B[32m 1863\x1B[39m \x1B[38;5;28;01myield\x1B[39;00m job_id, \x1B[38;5;28;01mTrue\x1B[39;00m, (total_num_examples, total_num_bytes, writer._features, num_shards, shard_lengths)\n',
'\x1B[31mDatasetGenerationError\x1B[39m: An error occurred while generating the dataset'
]
}
13:55:06.053 [info] Restart requested f:\Programmer\python\MyAI\train_sakura.ipynb
13:55:06.076 [info] Process Execution: c:\WINDOWS\System32\taskkill.exe /F /T /PID 19512
13:55:06.126 [warn] Failed to get activated env vars for e:\Python311\python11\python.exe in 47ms
13:55:06.131 [info] Process Execution: e:\Python311\python11\python.exe -c "import ipykernel; print(ipykernel.__version__); print("5dc3a68c-e34e-4080-9c3e-2a532b2ccb4d"); print(ipykernel.__file__)"
13:55:06.145 [info] Process Execution: e:\Python311\python11\python.exe -m ipykernel_launcher --f=~\AppData\Roaming\jupyter\runtime\kernel-v36aceef2c7df0625522d930946aa56731cd8fcd86.json
> cwd: f:\Programmer\python\MyAI
13:55:07.583 [info] Restarted 53607fdc-7887-4227-839f-45729ad69e01
13:55:36.006 [warn] Cell completed with errors Iu [Error]: An error occurred while generating the dataset
at n.execute (F:\data\.vscode\extensions\ms-toolsai.jupyter-2025.9.1-win32-x64\dist\extension.node.js:302:4958) {
ename: 'DatasetGenerationError',
evalue: 'An error occurred while generating the dataset',
traceback: [
'\x1B[31m---------------------------------------------------------------------------\x1B[39m',
'\x1B[31mValueError\x1B[39m Traceback (most recent call last)',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\packaged_modules\\json\\json.py:174\x1B[39m, in \x1B[36mJson._generate_tables\x1B[39m\x1B[34m(self, files)\x1B[39m\n' +
'\x1B[32m 171\x1B[39m \x1B[38;5;28;01mwith\x1B[39;00m \x1B[38;5;28mopen\x1B[39m(\n' +
'\x1B[32m 172\x1B[39m file, encoding=\x1B[38;5;28mself\x1B[39m.config.encoding, errors=\x1B[38;5;28mself\x1B[39m.config.encoding_errors\n' +
'\x1B[32m 173\x1B[39m ) \x1B[38;5;28;01mas\x1B[39;00m f:\n' +
'\x1B[32m--> \x1B[39m\x1B[32m174\x1B[39m df = \x1B[43mpandas_read_json\x1B[49m\x1B[43m(\x1B[49m\x1B[43mf\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 175\x1B[39m \x1B[38;5;28;01mexcept\x1B[39;00m \x1B[38;5;167;01mValueError\x1B[39;00m:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\packaged_modules\\json\\json.py:38\x1B[39m, in \x1B[36mpandas_read_json\x1B[39m\x1B[34m(path_or_buf, **kwargs)\x1B[39m\n' +
'\x1B[32m 37\x1B[39m kwargs[\x1B[33m"\x1B[39m\x1B[33mdtype_backend\x1B[39m\x1B[33m"\x1B[39m] = \x1B[33m"\x1B[39m\x1B[33mpyarrow\x1B[39m\x1B[33m"\x1B[39m\n' +
'\x1B[32m---> \x1B[39m\x1B[32m38\x1B[39m \x1B[38;5;28;01mreturn\x1B[39;00m \x1B[43mpd\x1B[49m\x1B[43m.\x1B[49m\x1B[43mread_json\x1B[49m\x1B[43m(\x1B[49m\x1B[43mpath_or_buf\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mkwargs\x1B[49m\x1B[43m)\x1B[49m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:815\x1B[39m, in \x1B[36mread_json\x1B[39m\x1B[34m(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, precise_float, date_unit, encoding, encoding_errors, lines, chunksize, compression, nrows, storage_options, dtype_backend, engine)\x1B[39m\n' +
'\x1B[32m 814\x1B[39m \x1B[38;5;28;01melse\x1B[39;00m:\n' +
'\x1B[32m--> \x1B[39m\x1B[32m815\x1B[39m \x1B[38;5;28;01mreturn\x1B[39;00m \x1B[43mjson_reader\x1B[49m\x1B[43m.\x1B[49m\x1B[43mread\x1B[49m\x1B[43m(\x1B[49m\x1B[43m)\x1B[49m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:1014\x1B[39m, in \x1B[36mJsonReader.read\x1B[39m\x1B[34m(self)\x1B[39m\n' +
'\x1B[32m 1013\x1B[39m \x1B[38;5;28;01melse\x1B[39;00m:\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1014\x1B[39m obj = \x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_get_object_parser\x1B[49m\x1B[43m(\x1B[49m\x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43mdata\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 1015\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m \x1B[38;5;28mself\x1B[39m.dtype_backend \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;129;01mnot\x1B[39;00m lib.no_default:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:1040\x1B[39m, in \x1B[36mJsonReader._get_object_parser\x1B[39m\x1B[34m(self, json)\x1B[39m\n' +
'\x1B[32m 1039\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m typ == \x1B[33m"\x1B[39m\x1B[33mframe\x1B[39m\x1B[33m"\x1B[39m:\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1040\x1B[39m obj = \x1B[43mFrameParser\x1B[49m\x1B[43m(\x1B[49m\x1B[43mjson\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mkwargs\x1B[49m\x1B[43m)\x1B[49m\x1B[43m.\x1B[49m\x1B[43mparse\x1B[49m\x1B[43m(\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 1042\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m typ == \x1B[33m"\x1B[39m\x1B[33mseries\x1B[39m\x1B[33m"\x1B[39m \x1B[38;5;129;01mor\x1B[39;00m obj \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;28;01mNone\x1B[39;00m:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:1176\x1B[39m, in \x1B[36mParser.parse\x1B[39m\x1B[34m(self)\x1B[39m\n' +
'\x1B[32m 1174\x1B[39m \x1B[38;5;129m@final\x1B[39m\n' +
'\x1B[32m 1175\x1B[39m \x1B[38;5;28;01mdef\x1B[39;00m\x1B[38;5;250m \x1B[39m\x1B[34mparse\x1B[39m(\x1B[38;5;28mself\x1B[39m):\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1176\x1B[39m \x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_parse\x1B[49m\x1B[43m(\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 1178\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m \x1B[38;5;28mself\x1B[39m.obj \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;28;01mNone\x1B[39;00m:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pandas\\io\\json\\_json.py:1392\x1B[39m, in \x1B[36mFrameParser._parse\x1B[39m\x1B[34m(self)\x1B[39m\n' +
'\x1B[32m 1390\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m orient == \x1B[33m"\x1B[39m\x1B[33mcolumns\x1B[39m\x1B[33m"\x1B[39m:\n' +
'\x1B[32m 1391\x1B[39m \x1B[38;5;28mself\x1B[39m.obj = DataFrame(\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1392\x1B[39m \x1B[43mujson_loads\x1B[49m\x1B[43m(\x1B[49m\x1B[43mjson\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mprecise_float\x1B[49m\x1B[43m=\x1B[49m\x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43mprecise_float\x1B[49m\x1B[43m)\x1B[49m, dtype=\x1B[38;5;28;01mNone\x1B[39;00m\n' +
'\x1B[32m 1393\x1B[39m )\n' +
'\x1B[32m 1394\x1B[39m \x1B[38;5;28;01melif\x1B[39;00m orient == \x1B[33m"\x1B[39m\x1B[33msplit\x1B[39m\x1B[33m"\x1B[39m:\n',
'\x1B[31mValueError\x1B[39m: Trailing data',
'\nDuring handling of the above exception, another exception occurred:\n',
'\x1B[31mArrowInvalid\x1B[39m Traceback (most recent call last)',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:1818\x1B[39m, in \x1B[36mArrowBasedBuilder._prepare_split_single\x1B[39m\x1B[34m(self, gen_kwargs, fpath, file_format, max_shard_size, job_id)\x1B[39m\n' +
'\x1B[32m 1817\x1B[39m _time = time.time()\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1818\x1B[39m \x1B[43m\x1B[49m\x1B[38;5;28;43;01mfor\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43m_\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mtable\x1B[49m\x1B[43m \x1B[49m\x1B[38;5;129;43;01min\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mgenerator\x1B[49m\x1B[43m:\x1B[49m\n' +
'\x1B[32m 1819\x1B[39m \x1B[43m \x1B[49m\x1B[38;5;28;43;01mif\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mmax_shard_size\x1B[49m\x1B[43m \x1B[49m\x1B[38;5;129;43;01mis\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[38;5;129;43;01mnot\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[38;5;28;43;01mNone\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[38;5;129;43;01mand\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mwriter\x1B[49m\x1B[43m.\x1B[49m\x1B[43m_num_bytes\x1B[49m\x1B[43m \x1B[49m\x1B[43m>\x1B[49m\x1B[43m \x1B[49m\x1B[43mmax_shard_size\x1B[49m\x1B[43m:\x1B[49m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\packaged_modules\\json\\json.py:177\x1B[39m, in \x1B[36mJson._generate_tables\x1B[39m\x1B[34m(self, files)\x1B[39m\n' +
`\x1B[32m 176\x1B[39m logger.error(\x1B[33mf\x1B[39m\x1B[33m"\x1B[39m\x1B[33mFailed to load JSON from file \x1B[39m\x1B[33m'\x1B[39m\x1B[38;5;132;01m{\x1B[39;00mfile\x1B[38;5;132;01m}\x1B[39;00m\x1B[33m'\x1B[39m\x1B[33m with error \x1B[39m\x1B[38;5;132;01m{\x1B[39;00m\x1B[38;5;28mtype\x1B[39m(e)\x1B[38;5;132;01m}\x1B[39;00m\x1B[33m: \x1B[39m\x1B[38;5;132;01m{\x1B[39;00me\x1B[38;5;132;01m}\x1B[39;00m\x1B[33m"\x1B[39m)\n` +
'\x1B[32m--> \x1B[39m\x1B[32m177\x1B[39m \x1B[38;5;28;01mraise\x1B[39;00m e\n' +
'\x1B[32m 178\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m df.columns.tolist() == [\x1B[32m0\x1B[39m]:\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\packaged_modules\\json\\json.py:151\x1B[39m, in \x1B[36mJson._generate_tables\x1B[39m\x1B[34m(self, files)\x1B[39m\n' +
'\x1B[32m 150\x1B[39m \x1B[38;5;28;01mtry\x1B[39;00m:\n' +
'\x1B[32m--> \x1B[39m\x1B[32m151\x1B[39m pa_table = \x1B[43mpaj\x1B[49m\x1B[43m.\x1B[49m\x1B[43mread_json\x1B[49m\x1B[43m(\x1B[49m\n' +
'\x1B[32m 152\x1B[39m \x1B[43m \x1B[49m\x1B[43mio\x1B[49m\x1B[43m.\x1B[49m\x1B[43mBytesIO\x1B[49m\x1B[43m(\x1B[49m\x1B[43mbatch\x1B[49m\x1B[43m)\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mread_options\x1B[49m\x1B[43m=\x1B[49m\x1B[43mpaj\x1B[49m\x1B[43m.\x1B[49m\x1B[43mReadOptions\x1B[49m\x1B[43m(\x1B[49m\x1B[43mblock_size\x1B[49m\x1B[43m=\x1B[49m\x1B[43mblock_size\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 153\x1B[39m \x1B[43m \x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 154\x1B[39m \x1B[38;5;28;01mbreak\x1B[39;00m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pyarrow\\_json.pyx:342\x1B[39m, in \x1B[36mpyarrow._json.read_json\x1B[39m\x1B[34m()\x1B[39m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pyarrow\\error.pxi:155\x1B[39m, in \x1B[36mpyarrow.lib.pyarrow_internal_check_status\x1B[39m\x1B[34m()\x1B[39m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\pyarrow\\error.pxi:92\x1B[39m, in \x1B[36mpyarrow.lib.check_status\x1B[39m\x1B[34m()\x1B[39m\n',
'\x1B[31mArrowInvalid\x1B[39m: JSON parse error: Invalid value. in row 14',
'\nThe above exception was the direct cause of the following exception:\n',
'\x1B[31mDatasetGenerationError\x1B[39m Traceback (most recent call last)',
'\x1B[36mCell\x1B[39m\x1B[36m \x1B[39m\x1B[32mIn[4]\x1B[39m\x1B[32m, line 2\x1B[39m\n' +
'\x1B[32m 1\x1B[39m \x1B[38;5;66;03m# 3. 从JSONL文件加载数据集\x1B[39;00m\n' +
'\x1B[32m----> \x1B[39m\x1B[32m2\x1B[39m dataset = \x1B[43mload_dataset\x1B[49m\x1B[43m(\x1B[49m\x1B[33;43m"\x1B[39;49m\x1B[33;43mjson\x1B[39;49m\x1B[33;43m"\x1B[39;49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mdata_files\x1B[49m\x1B[43m=\x1B[49m\x1B[33;43m"\x1B[39;49m\x1B[33;43mtrain_data.jsonl\x1B[39;49m\x1B[33;43m"\x1B[39;49m\x1B[43m)\x1B[49m[\x1B[33m"\x1B[39m\x1B[33mtrain\x1B[39m\x1B[33m"\x1B[39m] \x1B[38;5;66;03m# 加载JSONL文件\x1B[39;00m\n' +
'\x1B[32m 4\x1B[39m \x1B[38;5;66;03m# Tokenize 数据\x1B[39;00m\n' +
'\x1B[32m 5\x1B[39m \x1B[38;5;28;01mdef\x1B[39;00m\x1B[38;5;250m \x1B[39m\x1B[34mtokenize_function\x1B[39m(examples):\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\load.py:1417\x1B[39m, in \x1B[36mload_dataset\x1B[39m\x1B[34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, keep_in_memory, save_infos, revision, token, streaming, num_proc, storage_options, **config_kwargs)\x1B[39m\n' +
'\x1B[32m 1414\x1B[39m \x1B[38;5;28;01mreturn\x1B[39;00m builder_instance.as_streaming_dataset(split=split)\n' +
'\x1B[32m 1416\x1B[39m \x1B[38;5;66;03m# Download and prepare data\x1B[39;00m\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1417\x1B[39m \x1B[43mbuilder_instance\x1B[49m\x1B[43m.\x1B[49m\x1B[43mdownload_and_prepare\x1B[49m\x1B[43m(\x1B[49m\n' +
'\x1B[32m 1418\x1B[39m \x1B[43m \x1B[49m\x1B[43mdownload_config\x1B[49m\x1B[43m=\x1B[49m\x1B[43mdownload_config\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1419\x1B[39m \x1B[43m \x1B[49m\x1B[43mdownload_mode\x1B[49m\x1B[43m=\x1B[49m\x1B[43mdownload_mode\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1420\x1B[39m \x1B[43m \x1B[49m\x1B[43mverification_mode\x1B[49m\x1B[43m=\x1B[49m\x1B[43mverification_mode\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1421\x1B[39m \x1B[43m \x1B[49m\x1B[43mnum_proc\x1B[49m\x1B[43m=\x1B[49m\x1B[43mnum_proc\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1422\x1B[39m \x1B[43m \x1B[49m\x1B[43mstorage_options\x1B[49m\x1B[43m=\x1B[49m\x1B[43mstorage_options\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 1423\x1B[39m \x1B[43m\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 1425\x1B[39m \x1B[38;5;66;03m# Build dataset for splits\x1B[39;00m\n' +
'\x1B[32m 1426\x1B[39m keep_in_memory = (\n' +
'\x1B[32m 1427\x1B[39m keep_in_memory \x1B[38;5;28;01mif\x1B[39;00m keep_in_memory \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;129;01mnot\x1B[39;00m \x1B[38;5;28;01mNone\x1B[39;00m \x1B[38;5;28;01melse\x1B[39;00m is_small_dataset(builder_instance.info.dataset_size)\n' +
'\x1B[32m 1428\x1B[39m )\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:897\x1B[39m, in \x1B[36mDatasetBuilder.download_and_prepare\x1B[39m\x1B[34m(self, output_dir, download_config, download_mode, verification_mode, dl_manager, base_path, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)\x1B[39m\n' +
'\x1B[32m 895\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m num_proc \x1B[38;5;129;01mis\x1B[39;00m \x1B[38;5;129;01mnot\x1B[39;00m \x1B[38;5;28;01mNone\x1B[39;00m:\n' +
'\x1B[32m 896\x1B[39m prepare_split_kwargs[\x1B[33m"\x1B[39m\x1B[33mnum_proc\x1B[39m\x1B[33m"\x1B[39m] = num_proc\n' +
'\x1B[32m--> \x1B[39m\x1B[32m897\x1B[39m \x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_download_and_prepare\x1B[49m\x1B[43m(\x1B[49m\n' +
'\x1B[32m 898\x1B[39m \x1B[43m \x1B[49m\x1B[43mdl_manager\x1B[49m\x1B[43m=\x1B[49m\x1B[43mdl_manager\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 899\x1B[39m \x1B[43m \x1B[49m\x1B[43mverification_mode\x1B[49m\x1B[43m=\x1B[49m\x1B[43mverification_mode\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 900\x1B[39m \x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mprepare_split_kwargs\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 901\x1B[39m \x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mdownload_and_prepare_kwargs\x1B[49m\x1B[43m,\x1B[49m\n' +
'\x1B[32m 902\x1B[39m \x1B[43m\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 903\x1B[39m \x1B[38;5;66;03m# Sync info\x1B[39;00m\n' +
'\x1B[32m 904\x1B[39m \x1B[38;5;28mself\x1B[39m.info.dataset_size = \x1B[38;5;28msum\x1B[39m(split.num_bytes \x1B[38;5;28;01mfor\x1B[39;00m split \x1B[38;5;129;01min\x1B[39;00m \x1B[38;5;28mself\x1B[39m.info.splits.values())\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:973\x1B[39m, in \x1B[36mDatasetBuilder._download_and_prepare\x1B[39m\x1B[34m(self, dl_manager, verification_mode, **prepare_split_kwargs)\x1B[39m\n' +
'\x1B[32m 969\x1B[39m split_dict.add(split_generator.split_info)\n' +
'\x1B[32m 971\x1B[39m \x1B[38;5;28;01mtry\x1B[39;00m:\n' +
'\x1B[32m 972\x1B[39m \x1B[38;5;66;03m# Prepare split will record examples associated to the split\x1B[39;00m\n' +
'\x1B[32m--> \x1B[39m\x1B[32m973\x1B[39m \x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_prepare_split\x1B[49m\x1B[43m(\x1B[49m\x1B[43msplit_generator\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43mprepare_split_kwargs\x1B[49m\x1B[43m)\x1B[49m\n' +
'\x1B[32m 974\x1B[39m \x1B[38;5;28;01mexcept\x1B[39;00m \x1B[38;5;167;01mOSError\x1B[39;00m \x1B[38;5;28;01mas\x1B[39;00m e:\n' +
'\x1B[32m 975\x1B[39m \x1B[38;5;28;01mraise\x1B[39;00m \x1B[38;5;167;01mOSError\x1B[39;00m(\n' +
'\x1B[32m 976\x1B[39m \x1B[33m"\x1B[39m\x1B[33mCannot find data file. \x1B[39m\x1B[33m"\x1B[39m\n' +
'\x1B[32m 977\x1B[39m + (\x1B[38;5;28mself\x1B[39m.manual_download_instructions \x1B[38;5;129;01mor\x1B[39;00m \x1B[33m"\x1B[39m\x1B[33m"\x1B[39m)\n' +
'\x1B[32m 978\x1B[39m + \x1B[33m"\x1B[39m\x1B[38;5;130;01m\\n\x1B[39;00m\x1B[33mOriginal error:\x1B[39m\x1B[38;5;130;01m\\n\x1B[39;00m\x1B[33m"\x1B[39m\n' +
'\x1B[32m 979\x1B[39m + \x1B[38;5;28mstr\x1B[39m(e)\n' +
'\x1B[32m 980\x1B[39m ) \x1B[38;5;28;01mfrom\x1B[39;00m\x1B[38;5;250m \x1B[39m\x1B[38;5;28;01mNone\x1B[39;00m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:1705\x1B[39m, in \x1B[36mArrowBasedBuilder._prepare_split\x1B[39m\x1B[34m(self, split_generator, file_format, num_proc, max_shard_size)\x1B[39m\n' +
'\x1B[32m 1703\x1B[39m job_id = \x1B[32m0\x1B[39m\n' +
'\x1B[32m 1704\x1B[39m \x1B[38;5;28;01mwith\x1B[39;00m pbar:\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1705\x1B[39m \x1B[43m \x1B[49m\x1B[38;5;28;43;01mfor\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mjob_id\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mdone\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mcontent\x1B[49m\x1B[43m \x1B[49m\x1B[38;5;129;43;01min\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[38;5;28;43mself\x1B[39;49m\x1B[43m.\x1B[49m\x1B[43m_prepare_split_single\x1B[49m\x1B[43m(\x1B[49m\n' +
'\x1B[32m 1706\x1B[39m \x1B[43m \x1B[49m\x1B[43mgen_kwargs\x1B[49m\x1B[43m=\x1B[49m\x1B[43mgen_kwargs\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43mjob_id\x1B[49m\x1B[43m=\x1B[49m\x1B[43mjob_id\x1B[49m\x1B[43m,\x1B[49m\x1B[43m \x1B[49m\x1B[43m*\x1B[49m\x1B[43m*\x1B[49m\x1B[43m_prepare_split_args\x1B[49m\n' +
'\x1B[32m 1707\x1B[39m \x1B[43m \x1B[49m\x1B[43m)\x1B[49m\x1B[43m:\x1B[49m\n' +
'\x1B[32m 1708\x1B[39m \x1B[43m \x1B[49m\x1B[38;5;28;43;01mif\x1B[39;49;00m\x1B[43m \x1B[49m\x1B[43mdone\x1B[49m\x1B[43m:\x1B[49m\n' +
'\x1B[32m 1709\x1B[39m \x1B[43m \x1B[49m\x1B[43mresult\x1B[49m\x1B[43m \x1B[49m\x1B[43m=\x1B[49m\x1B[43m \x1B[49m\x1B[43mcontent\x1B[49m\n',
'\x1B[36mFile \x1B[39m\x1B[32me:\\Python311\\python11\\Lib\\site-packages\\datasets\\builder.py:1861\x1B[39m, in \x1B[36mArrowBasedBuilder._prepare_split_single\x1B[39m\x1B[34m(self, gen_kwargs, fpath, file_format, max_shard_size, job_id)\x1B[39m\n' +
'\x1B[32m 1859\x1B[39m \x1B[38;5;28;01mif\x1B[39;00m \x1B[38;5;28misinstance\x1B[39m(e, DatasetGenerationError):\n' +
'\x1B[32m 1860\x1B[39m \x1B[38;5;28;01mraise\x1B[39;00m\n' +
'\x1B[32m-> \x1B[39m\x1B[32m1861\x1B[39m \x1B[38;5;28;01mraise\x1B[39;00m DatasetGenerationError(\x1B[33m"\x1B[39m\x1B[33mAn error occurred while generating the dataset\x1B[39m\x1B[33m"\x1B[39m) \x1B[38;5;28;01mfrom\x1B[39;00m\x1B[38;5;250m \x1B[39m\x1B[34;01me\x1B[39;00m\n' +
'\x1B[32m 1863\x1B[39m \x1B[38;5;28;01myield\x1B[39;00m job_id, \x1B[38;5;28;01mTrue\x1B[39;00m, (total_num_examples, total_num_bytes, writer._features, num_shards, shard_lengths)\n',
'\x1B[31mDatasetGenerationError\x1B[39m: An error occurred while generating the dataset'
]
}
13:59:30.098 [info] Restart requested f:\Programmer\python\MyAI\train_sakura.ipynb
13:59:30.106 [info] Process Execution: c:\WINDOWS\System32\taskkill.exe /F /T /PID 12080
13:59:30.492 [warn] Failed to get activated env vars for e:\Python311\python11\python.exe in 321ms
13:59:30.497 [info] Process Execution: e:\Python311\python11\python.exe -c "import ipykernel; print(ipykernel.__version__); print("5dc3a68c-e34e-4080-9c3e-2a532b2ccb4d"); print(ipykernel.__file__)"
13:59:30.506 [info] Process Execution: e:\Python311\python11\python.exe -m ipykernel_launcher --f=~\AppData\Roaming\jupyter\runtime\kernel-v34bd4939ffc86a7fed2a59716c56f5f4971bfebc7.json
> cwd: f:\Programmer\python\MyAI
13:59:31.590 [info] Restarted 53607fdc-7887-4227-839f-45729ad69e01
13:59:56.108 [error] Disposing session as kernel process died ExitCode: 3221225477, Reason:
最新发布