Traceback (most recent call last):
File "/models/z50051264/bitsandbytes-master/examples/inference/inference.py", line 75, in <module>
generated_ids = model.generate(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/generation/utils.py", line 2597, in generate
result = self._sample(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/generation/utils.py", line 3557, in _sample
outputs = self(**model_inputs, return_dict=True)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/utils/generic.py", line 969, in wrapper
output = func(self, *args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 703, in forward
outputs: BaseModelOutputWithPast = self.model(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/utils/generic.py", line 969, in wrapper
output = func(self, *args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 436, in forward
layer_outputs = decoder_layer(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/modeling_layers.py", line 48, in __call__
return super().__call__(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 257, in forward
hidden_states, self_attn_weights = self.self_attn(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 160, in forward
key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/models/z50051264/bitsandbytes-main/bitsandbytes/nn/modules.py", line 525, in forward
return bnb.matmul_4bit(x, weight, bias=bias, quant_state=self.weight.quant_state).to(inp_dtype)
File "/models/z50051264/bitsandbytes-main/bitsandbytes/autograd/_functions.py", line 466, in matmul_4bit
return MatMul4Bit.apply(A, B, out, bias, quant_state)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/autograd/function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/models/z50051264/bitsandbytes-main/bitsandbytes/autograd/_functions.py", line 380, in forward
output = torch.nn.functional.linear(A, F.dequantize_4bit(B, quant_state).to(A.dtype).t(), bias)
RuntimeError: The size of tensor a (512) must match the size of tensor b (3584) at non-singleton dimension 1
[ERROR] 2025-07-25-08:11:14 (PID:4222, Device:0, RankID:-1) ERR99999 UNKNOWN applicaiton exception
分析报错
最新发布