Flask make_response(*args)

本文介绍了在Flask框架中如何使用make_response方法来创建并返回一个带有自定义头部信息的响应对象。通过示例展示了如何设置响应头部,以及make_response支持的不同参数类型。

视图本质上是返回一个response对象(也可以直接创建Response对象),但是在return后不方便设置头信息等,所以还是先封装成response对象,再返回比较全面,如下:

def index():
    return render_template('index.html')

make_response可以设置返回的其他信息

def index():
    response = make_response(render_template('index.html'))
    response.headers['X-Parachutes'] = 'parachutes are cool'
    return response
参数描述
str通过字符串创建response对象
bytes通过字节创建response对象
dict通过字典撞见response对象
tuple(body, status, headers) (body, status) (body, headers)

在使用tuple时:
body可以是str,bytes或dict
status可以是str或者integer
headers可以是dict或[(key,value),(key,value)]
如果body是一个response对象,其status会覆盖headers的

参考:
https://flask.palletsprojects.com/en/1.1.x/api/#flask.make_response
https://www.cnblogs.com/zhuchunyu/p/10466509.html

C:\Users\ASUS\.conda\envs\wys\python.exe C:\Users\ASUS\Desktop\Pro\API\app.py Traceback (most recent call last): File "C:\Users\ASUS\Desktop\Pro\API\app.py", line 188, in <module> if not os.path.exists(app.config['UPLOAD_FOLDER']): KeyError: 'UPLOAD_FOLDER'from flask import Flask, request, jsonify, send_from_directory, make_response from flask_sqlalchemy import SQLAlchemy from flask_cors import CORS from functools import wraps # 新增 import os import uuid from datetime import datetime # 初始化Flask应用 app = Flask(__name__) # ================ CORS全局配置 ================ CORS(app, resources={ r"/*": { "origins": "http://localhost:5173", "methods": ["GET", "POST", "PUT", "DELETE", "OPTIONS"], "allow_headers": ["Content-Type", "Authorization", "X-Requested-With"], "supports_credentials": True, "expose_headers": ["Set-Cookie"] # 修改 } }) # ================ 新增认证中间件 ================ def login_required(f): @wraps(f) def decorated_function(*args, **kwargs): # 从cookie获取用户ID user_id = request.cookies.get('user_id') if not user_id: return jsonify({"message": "未认证"}), 401 # 验证用户是否存在 user = User.query.get(user_id) if not user: return jsonify({"message": "用户不存在"}), 401 return f(*args, **kwargs) return decorated_function # ================ 修改登录接口 ================ @app.route('/login', methods=['POST', 'OPTIONS']) def login(): try: if request.method == 'OPTIONS': return _build_cors_preflight_response() data = request.get_json() username = data.get('username', '').strip() password = data.get('password', '') user = User.query.filter_by(username=username).first() if not user or user.password != password: return jsonify({"message": "认证失败"}), 401 # 创建响应并设置cookie response = jsonify({ "message": "登录成功", "user_id": user.id, "profile_completed": user.profile_completed, "avatar_url": f"/uploads/{user.avatar}" }) # 设置HTTP-only Cookie response.set_cookie( key='user_id', value=str(user.id), httponly=True, samesite='Lax', max_age=86400, # 1天有效期 secure=False, # 开发环境关闭 domain='localhost' ) return response, 200 except Exception as e: return jsonify({"message": f"服务器错误: {str(e)}"}), 500 # ================ 修改用户资料接口 ================ @app.route('/user/<int:user_id>', methods=['GET', 'PUT', 'OPTIONS']) @login_required # 新增认证装饰器 def user_profile(user_id): try: if request.method == 'OPTIONS': return _build_cors_preflight_response() # 从cookie获取当前用户 current_user_id = int(request.cookies.get('user_id')) # 验证用户权限 if current_user_id != user_id: return jsonify({"message": "无权访问"}), 403 user = User.query.get_or_404(user_id) if request.method == 'GET': return jsonify({ "username": user.username, "shop_name": user.shop_name, "phone": user.phone, "email": user.email }), 200 if request.method == 'PUT': data = request.get_json() if not data: return jsonify({"message": "请求体不能为空"}), 400 update_fields = { 'shop_name': data.get('shop_name'), 'phone': data.get('phone'), 'email': data.get('email') } for field, value in update_fields.items(): if value is not None and getattr(user, field) != value: setattr(user, field, value) db.session.commit() return jsonify({"message": "更新成功"}), 200 except Exception as e: db.session.rollback() app.logger.error(f"用户资料操作失败: {str(e)}") return jsonify({"message": "服务器错误"}), 500 # ================ 修改密码接口 ================ @app.route('/user/<int:user_id>/password', methods=['PUT', 'OPTIONS']) @login_required # 新增认证装饰器 def update_password(user_id): try: # 权限验证 current_user_id = int(request.cookies.get('user_id')) if current_user_id != user_id: return jsonify({"message": "无权操作"}), 403 user = User.query.get_or_404(user_id) data = request.get_json() if 'old_password' not in data or 'new_password' not in data: return jsonify({"message": "需要提供原密码和新密码"}), 400 if user.password != data['old_password']: return jsonify({"message": "原密码错误"}), 401 if len(data['new_password']) < 6: return jsonify({"message": "密码至少6位"}), 400 user.password = data['new_password'] user.password_updated_at = datetime.utcnow() db.session.commit() return jsonify({"message": "密码更新成功"}), 200 except Exception as e: db.session.rollback() return jsonify({"message": f"服务器错误: {str(e)}"}), 500 # ================ 工具函数 ================ def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in {'png', 'jpg', 'jpeg'} def _build_cors_preflight_response(): response = make_response() response.headers.add("Access-Control-Allow-Origin", "http://localhost:5173") response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") response.headers.add("Access-Control-Allow-Methods", "*") response.headers.add("Access-Control-Max-Age", "86400") response.headers.add("Access-Control-Allow-Credentials", "true") response.headers['Access-Control-Allow-Origin'] = 'http://localhost:5173' response.headers['Access-Control-Allow-Credentials'] = 'true' response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' return response, 204 # ================ 启动应用 ================ if __name__ == '__main__': # 确保上传目录存在 if not os.path.exists(app.config['UPLOAD_FOLDER']): os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) # 检查数据库连接 try: with app.app_context(): db.create_all() db.engine.connect() print("✅ 数据库连接成功") except Exception as e: print(f"❌ 数据库连接失败: {str(e)}") exit(1) app.run(host='0.0.0.0', port=5000, debug=True)
05-27
INFO 09-12 08:12:49 __init__.py:207] Automatically detected platform cuda. INFO 09-12 08:12:49 api_server.py:912] vLLM API server version 0.7.3 INFO 09-12 08:12:49 api_server.py:913] args: Namespace(host=None, port=8003, uvicorn_log_level='info', allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, enable_reasoning=False, reasoning_parser=None, tool_call_parser=None, tool_parser_plugin='', model='/models/DeepSeek-R1-Distill-Llama-70B', task='auto', tokenizer=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=False, allowed_local_media_path=None, download_dir=None, load_format='auto', config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', kv_cache_dtype='auto', max_model_len=84320, guided_decoding_backend='xgrammar', logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, max_parallel_loading_workers=None, ray_workers_use_nsight=False, block_size=None, enable_prefix_caching=None, disable_sliding_window=False, use_v2_block_manager=True, num_lookahead_slots=0, seed=0, swap_space=4, cpu_offload_gb=0, gpu_memory_utilization=0.95, num_gpu_blocks_override=None, max_num_batched_tokens=512, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, max_num_seqs=1, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_overrides=None, enforce_eager=True, max_seq_len_to_capture=8192, disable_custom_all_reduce=False, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config=None, limit_mm_per_prompt=None, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=False, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=False, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', num_scheduler_steps=1, multi_step_stream_outputs=True, scheduler_delay_factor=0.0, enable_chunked_prefill=None, speculative_model=None, speculative_model_quantization=None, num_speculative_tokens=None, speculative_disable_mqa_scorer=False, speculative_draft_tensor_parallel_size=None, speculative_max_model_len=None, speculative_disable_by_batch_size=None, ngram_prompt_lookup_max=None, ngram_prompt_lookup_min=None, spec_decoding_acceptance_method='rejection_sampler', typical_acceptance_sampler_posterior_threshold=None, typical_acceptance_sampler_posterior_alpha=None, disable_logprobs_during_spec_decoding=None, model_loader_extra_config=None, ignore_patterns=[], preemption_mode=None, served_model_name=['DeepSeek-R1'], qlora_adapter_name_or_path=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, scheduling_policy='fcfs', scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', generation_config=None, override_generation_config=None, enable_sleep_mode=False, calculate_kv_scales=False, additional_config=None, disable_log_requests=False, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False) INFO 09-12 08:12:49 api_server.py:209] Started engine process with PID 76 INFO 09-12 08:12:53 __init__.py:207] Automatically detected platform cuda. INFO 09-12 08:12:55 config.py:549] This model supports multiple tasks: {'generate', 'score', 'reward', 'embed', 'classify'}. Defaulting to 'generate'. WARNING 09-12 08:12:55 arg_utils.py:1187] Chunked prefill is enabled by default for models with max_model_len > 32K. Currently, chunked prefill might not work with some features or models. If you encounter any issues, please disable chunked prefill by setting --enable-chunked-prefill=False. INFO 09-12 08:12:55 config.py:1555] Chunked prefill is enabled with max_num_batched_tokens=512. WARNING 09-12 08:12:55 cuda.py:95] To see benefits of async output processing, enable CUDA graph. Since, enforce-eager is enabled, async output processor cannot be used WARNING 09-12 08:12:55 config.py:685] Async output processing is not supported on the current platform type cuda. INFO 09-12 08:12:58 config.py:549] This model supports multiple tasks: {'generate', 'embed', 'score', 'classify', 'reward'}. Defaulting to 'generate'. WARNING 09-12 08:12:58 arg_utils.py:1187] Chunked prefill is enabled by default for models with max_model_len > 32K. Currently, chunked prefill might not work with some features or models. If you encounter any issues, please disable chunked prefill by setting --enable-chunked-prefill=False. INFO 09-12 08:12:58 config.py:1555] Chunked prefill is enabled with max_num_batched_tokens=512. WARNING 09-12 08:12:58 cuda.py:95] To see benefits of async output processing, enable CUDA graph. Since, enforce-eager is enabled, async output processor cannot be used WARNING 09-12 08:12:58 config.py:685] Async output processing is not supported on the current platform type cuda. INFO 09-12 08:12:58 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.3) with config: model='/models/DeepSeek-R1-Distill-Llama-70B', speculative_config=None, tokenizer='/models/DeepSeek-R1-Distill-Llama-70B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=84320, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=DeepSeek-R1, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=False, chunked_prefill_enabled=True, use_async_output_proc=False, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"splitting_ops":[],"compile_sizes":[],"cudagraph_capture_sizes":[],"max_capture_size":0}, use_cached_outputs=True, INFO 09-12 08:12:59 cuda.py:229] Using Flash Attention backend. INFO 09-12 08:13:00 model_runner.py:1110] Starting to load model /models/DeepSeek-R1-Distill-Llama-70B... ERROR 09-12 08:13:00 engine.py:400] CUDA out of memory. Tried to allocate 896.00 MiB. GPU 0 has a total capacity of 23.64 GiB of which 214.81 MiB is free. Process 20061 has 23.43 GiB memory in use. Of the allocated memory 22.99 GiB is allocated by PyTorch, and 1.57 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) ERROR 09-12 08:13:00 engine.py:400] Traceback (most recent call last): Process SpawnProcess-1: ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 391, in run_mp_engine ERROR 09-12 08:13:00 engine.py:400] engine = MQLLMEngine.from_engine_args(engine_args=engine_args, ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 124, in from_engine_args ERROR 09-12 08:13:00 engine.py:400] return cls(ipc_path=ipc_path, ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 76, in __init__ ERROR 09-12 08:13:00 engine.py:400] self.engine = LLMEngine(*args, **kwargs) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/llm_engine.py", line 273, in __init__ ERROR 09-12 08:13:00 engine.py:400] self.model_executor = executor_class(vllm_config=vllm_config, ) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 52, in __init__ ERROR 09-12 08:13:00 engine.py:400] self._init_executor() ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 47, in _init_executor ERROR 09-12 08:13:00 engine.py:400] self.collective_rpc("load_model") ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc ERROR 09-12 08:13:00 engine.py:400] answer = run_method(self.driver_worker, method, args, kwargs) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/utils.py", line 2196, in run_method ERROR 09-12 08:13:00 engine.py:400] return func(*args, **kwargs) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/worker.py", line 183, in load_model ERROR 09-12 08:13:00 engine.py:400] self.model_runner.load_model() ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/model_runner.py", line 1112, in load_model ERROR 09-12 08:13:00 engine.py:400] self.model = get_model(vllm_config=self.vllm_config) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/__init__.py", line 14, in get_model ERROR 09-12 08:13:00 engine.py:400] return loader.load_model(vllm_config=vllm_config) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/loader.py", line 406, in load_model ERROR 09-12 08:13:00 engine.py:400] model = _initialize_model(vllm_config=vllm_config) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/loader.py", line 125, in _initialize_model ERROR 09-12 08:13:00 engine.py:400] return model_class(vllm_config=vllm_config, prefix=prefix) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 496, in __init__ ERROR 09-12 08:13:00 engine.py:400] self.model = self._init_model(vllm_config=vllm_config, ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 533, in _init_model ERROR 09-12 08:13:00 engine.py:400] return LlamaModel(vllm_config=vllm_config, prefix=prefix) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/compilation/decorators.py", line 151, in __init__ ERROR 09-12 08:13:00 engine.py:400] old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs) ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 326, in __init__ ERROR 09-12 08:13:00 engine.py:400] self.start_layer, self.end_layer, self.layers = make_layers( ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 558, in make_layers ERROR 09-12 08:13:00 engine.py:400] maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}")) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 328, in <lambda> ERROR 09-12 08:13:00 engine.py:400] lambda prefix: layer_type(config=config, ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 254, in __init__ ERROR 09-12 08:13:00 engine.py:400] self.mlp = LlamaMLP( ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 70, in __init__ ERROR 09-12 08:13:00 engine.py:400] self.gate_up_proj = MergedColumnParallelLinear( Traceback (most recent call last): ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/linear.py", line 441, in __init__ ERROR 09-12 08:13:00 engine.py:400] super().__init__(input_size=input_size, ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/linear.py", line 314, in __init__ ERROR 09-12 08:13:00 engine.py:400] self.quant_method.create_weights( ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/linear.py", line 129, in create_weights ERROR 09-12 08:13:00 engine.py:400] weight = Parameter(torch.empty(sum(output_partition_sizes), ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/torch/utils/_device.py", line 106, in __torch_function__ ERROR 09-12 08:13:00 engine.py:400] return func(*args, **kwargs) ERROR 09-12 08:13:00 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^ ERROR 09-12 08:13:00 engine.py:400] torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 896.00 MiB. GPU 0 has a total capacity of 23.64 GiB of which 214.81 MiB is free. Process 20061 has 23.43 GiB memory in use. Of the allocated memory 22.99 GiB is allocated by PyTorch, and 1.57 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap self.run() File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 402, in run_mp_engine raise e File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 391, in run_mp_engine engine = MQLLMEngine.from_engine_args(engine_args=engine_args, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 124, in from_engine_args return cls(ipc_path=ipc_path, ^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 76, in __init__ self.engine = LLMEngine(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/engine/llm_engine.py", line 273, in __init__ self.model_executor = executor_class(vllm_config=vllm_config, ) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 52, in __init__ self._init_executor() File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 47, in _init_executor self.collective_rpc("load_model") File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc answer = run_method(self.driver_worker, method, args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/utils.py", line 2196, in run_method return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/worker/worker.py", line 183, in load_model self.model_runner.load_model() File "/usr/local/lib/python3.12/dist-packages/vllm/worker/model_runner.py", line 1112, in load_model self.model = get_model(vllm_config=self.vllm_config) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/__init__.py", line 14, in get_model return loader.load_model(vllm_config=vllm_config) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/loader.py", line 406, in load_model model = _initialize_model(vllm_config=vllm_config) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/loader.py", line 125, in _initialize_model return model_class(vllm_config=vllm_config, prefix=prefix) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 496, in __init__ self.model = self._init_model(vllm_config=vllm_config, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 533, in _init_model return LlamaModel(vllm_config=vllm_config, prefix=prefix) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/compilation/decorators.py", line 151, in __init__ old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs) File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 326, in __init__ self.start_layer, self.end_layer, self.layers = make_layers( ^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 558, in make_layers maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}")) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 328, in <lambda> lambda prefix: layer_type(config=config, ^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 254, in __init__ self.mlp = LlamaMLP( ^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 70, in __init__ self.gate_up_proj = MergedColumnParallelLinear( ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/linear.py", line 441, in __init__ super().__init__(input_size=input_size, File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/linear.py", line 314, in __init__ self.quant_method.create_weights( File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/linear.py", line 129, in create_weights weight = Parameter(torch.empty(sum(output_partition_sizes), ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/torch/utils/_device.py", line 106, in __torch_function__ return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 896.00 MiB. GPU 0 has a total capacity of 23.64 GiB of which 214.81 MiB is free. Process 20061 has 23.43 GiB memory in use. Of the allocated memory 22.99 GiB is allocated by PyTorch, and 1.57 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) [rank0]:[W912 08:13:00.032028245 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator()) Traceback (most recent call last): File "<frozen runpy>", line 198, in _run_module_as_main File "<frozen runpy>", line 88, in _run_code File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 991, in <module> uvloop.run(run_server(args)) File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 109, in run return __asyncio.run( ^^^^^^^^^^^^^^ File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run return runner.run(main) ^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run return self._loop.run_until_complete(task) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 61, in wrapper return await main ^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 947, in run_server async with build_async_engine_client(args) as engine_client: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__ return await anext(self.gen) ^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 139, in build_async_engine_client async with build_async_engine_client_from_engine_args( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__ return await anext(self.gen) ^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 233, in build_async_engine_client_from_engine_args raise RuntimeError( RuntimeError: Engine process failed to start. See stack trace for the root cause. ubuntu docker部署DeepSeek-R1-Distill-Llama-70B 模型 报错
最新发布
09-13
2025-06-20 22:58:46.696914: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. WARNING:tensorflow:From E:\Miniconda3\envs\openvla\lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead. E:\Miniconda3\envs\openvla\lib\site-packages\huggingface_hub\file_download.py:943: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn( E:\Miniconda3\envs\openvla\lib\site-packages\huggingface_hub\file_download.py:943: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn( Traceback (most recent call last): File "C:\Users\wyzzw\Desktop\华迪\代码.py", line 93, in <module> deploy() File "E:\Miniconda3\envs\openvla\lib\site-packages\draccus\argparsing.py", line 203, in wrapper_inner response = fn(cfg, *args, **kwargs) File "C:\Users\wyzzw\Desktop\华迪\代码.py", line 88, in deploy server = OpenVLAServer(cfg.openvla_path) File "C:\Users\wyzzw\Desktop\华迪\代码.py", line 39, in __init__ self.vla = AutoModelForVision2Seq.from_pretrained( File "E:\Miniconda3\envs\openvla\lib\site-packages\transformers\models\auto\auto_factory.py", line 558, in from_pretrained return model_class.from_pretrained( File "E:\Miniconda3\envs\openvla\lib\site-packages\transformers\modeling_utils.py", line 3544, in from_pretrained config = cls._autoset_attn_implementation( File "E:\Miniconda3\envs\openvla\lib\site-packages\transformers\modeling_utils.py", line 1454, in _autoset_attn_implementation cls._check_and_enable_flash_attn_2( File "E:\Miniconda3\envs\openvla\lib\site-packages\transformers\modeling_utils.py", line 1591, in _check_and_enable_flash_attn_2 raise ValueError( ValueError: You are attempting to use Flash Attention 2.0 with a model not initialized on GPU and with no GPU available. This is not supported yet. Please make sure to have access to a GPU and either initialise the model on a GPU by passing a device_map or initialising the model on CPU and then moving it to GPU.
06-21
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值