---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 9
2 model_name = "qwen/Qwen3-8B"
4 tokenizer = AutoTokenizer.from_pretrained(
5 model_name,
6 trust_remote_code=True,
7 use_fast=False
8 )
----> 9 model = AutoModelForCausalLM.from_pretrained(
10 # attn_implementation="flash_attention_2", # 可选:加速注意力计算
11 model_name,
12 device_map="auto",
13 trust_remote_code=True
14 )
File e:\Python311\python11\Lib\site-packages\modelscope\utils\hf_util\patcher.py:285, in _patch_pretrained_class.<locals>.get_wrapped_class.<locals>.ClassWrapper.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
281 with file_pattern_context(kwargs, module_class, cls):
282 model_dir = get_model_dir(pretrained_model_name_or_path,
283 **kwargs)
--> 285 module_obj = module_class.from_pretrained(
286 model_dir, *model_args, **kwargs)
288 if module_class.__name__.startswith('AutoModel'):
289 module_obj.model_dir = model_dir
File e:\Python311\python11\Lib\site-packages\transformers\models\auto\auto_factory.py:604, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
602 if model_class.config_class == config.sub_configs.get("text_config", None):
603 config = config.get_text_config()
--> 604 return model_class.from_pretrained(
605 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
606 )
607 raise ValueError(
608 f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
609 f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping)}."
610 )
File e:\Python311\python11\Lib\site-packages\transformers\modeling_utils.py:277, in restore_default_dtype.<locals>._wrapper(*args, **kwargs)
275 old_dtype = torch.get_default_dtype()
276 try:
--> 277 return func(*args, **kwargs)
278 finally:
279 torch.set_default_dtype(old_dtype)
File e:\Python311\python11\Lib\site-packages\transformers\modeling_utils.py:5140, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
5137 device_map_kwargs["offload_buffers"] = True
5139 if not is_fsdp_enabled() and not is_deepspeed_zero3_enabled():
-> 5140 dispatch_model(model, **device_map_kwargs)
5142 if hf_quantizer is not None:
5143 model.hf_quantizer = hf_quantizer
File e:\Python311\python11\Lib\site-packages\accelerate\big_modeling.py:504, in dispatch_model(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)
502 model.to(device)
503 else:
--> 504 raise ValueError(
505 "You are trying to offload the whole model to the disk. Please use the `disk_offload` function instead."
506 )
507 # Convert OrderedDict back to dict for easier usage
508 model.hf_device_map = dict(device_map)
ValueError: You are trying to offload the whole model to the disk. Please use the `disk_offload` function instead.
# 1. 加载 tokenizer 和 model
model_name = "qwen/Qwen3-8B"
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True,
use_fast=False
)
model = AutoModelForCausalLM.from_pretrained(
# attn_implementation="flash_attention_2", # 可选:加速注意力计算
model_name,
device_map="auto",
trust_remote_code=True
)