【Triton 教程】triton_language.tensor

Triton 是一种用于并行编程的语言和编译器。它旨在提供一个基于 Python 的编程环境,以高效编写自定义 DNN 计算内核,并能够在现代 GPU 硬件上以最大吞吐量运行。
更多 Triton 中文文档可访问 →https://triton.hyper.ai/

class triton.language.tensor(self, handle, type: dtype)

表示一个值或指针的 N 维数组。

在 Triton 程序中,tensor 是最基本的数据结构。triton.language 中的大多数函数对 tensors 进行操作并返回。

这里大多数命名的成员函数都是 triton.language 中自由函数的重复。例如,triton.language.sqrt(x) 等同于 x.sqrt()

tensor 还定义了大部分的魔法/双下划线方法,因此可以像写 x+yx << 2 等等。

构造函数

__init__(self, handle, type: dtype)

不被用户代码调用。

方法

init(self, handle, type)不被用户代码调用
abs(self)转发到 abs() 自由函数
advance(self, offsets)转发到 advance() 自由函数
argmax(self, *kwargs)转发到 argmax() 自由函数
argmin(self, *kwargs)转发到 argmin() 自由函数
associative_scan(self, axis, combine_fn[, …])转发到 associative_scan() 自由函数
atomic_add(self, val[, mask, sem, scope])转发到 atomic_add() 自由函数
atomic_and(self, val[, mask, sem, scope])转发到 atomic_and() 自由函数
atomic_cas(self, cmp, val[, sem, scope])转发到 atomic_cas() 自由函数
atomic_max(self, val[, mask, sem, scope])转发到 atomic_max() 自由函数
atomic_min(self, val[, mask, sem, scope])转发到 atomic_min() 自由函数
atomic_or(self, val[, mask, sem, scope])转发到 atomic_or() 自由函数
atomic_xchg(self, val[, mask, sem, scope])转发到 atomic_xchg() 自由函数
atomic_xor(self, val[, mask, sem, scope])转发到 atomic_xor() 自由函数
broadcast_to(self, *shape)转发到 broadcast_to() 自由函数
cast(self, dtype[, fp_downcast_rounding, …])转发到 cast() 自由函数
cdiv(*self,**kwargs)转发到 cdiv() 自由函数
ceil(self)转发到 ceil() 自由函数
cos(self)转发到 cos() 自由函数
cumprod(*self,**kwargs)转发到 cumprod() 自由函数
cumsum(*self,**kwargs)转发到 cumsum() 自由函数
erf(self)转发到 erf() 自由函数
exp(self)转发到 exp() 自由函数
exp2(self)转发到 exp2() 自由函数
expand_dims(self, axis)转发到 expand_dims() 自由函数
flip(*self,**kwargs)转发到 flip() 自由函数
floor(self)转发到 floor() 自由函数
histogram(self, num_bins)转发到 histogram() 自由函数
log(self)转发到 log() 自由函数
log2(self)转发到 log2() 自由函数
logical_and(self, other)
logical_or(self, other)
max(*self,**kwargs)转发到 max() 自由函数
min(*self,**kwargs)转发到 min() 自由函数
permute(self, *dims)转发到 permute() 自由函数
ravel(*self,**kwargs)转发到 ravel() 自由函数
reduce(self, axis, combine_fn[, keep_dims])转发到 reduce() 自由函数
reshape(self, *shape[, can_reorder])转发到 reshape() 自由函数
rsqrt(self)转发到 rsqrt() 自由函数
sigmoid(*self,**kwargs)转发到 sigmoid() 自由函数
sin(self)转发到 sin() 自由函数
softmax(*self,**kwargs)转发到 softmax() 自由函数
sort(*self,**kwargs)转发到 sort() 自由函数
split(self)转发到 split() 自由函数
sqrt(self)转发到 sqrt() 自由函数
sqrt_rn(self)转发到 sqrt_rn() 自由函数
store(self, value[, mask, boundary_check, …])转发到 store() 自由函数
sum(*self,**kwargs)转发到 sum() 自由函数
to(self, dtype[, fp_downcast_rounding, bitcast])tensor.cast() 的别名
trans(self, *dims)转发到 trans() 自由函数
view(self, *shape)转发到 view() 自由函数
xor_sum(self[, axis, keep_dims])转发到 xor_sum() 自由函数

属性

T转置 1 个 2D 张量
import numpy as np import cv2 from numpy.array_api import uint8 from skimage.measure import label, regionprops from openvino.runtime import Core from PIL import Image import torch import openvino as ov import matplotlib.pyplot as plt from sympy.codegen.ast import int32 from triton.language import dtype class YiAtrium(): def __init__(self, device="CPU"): self.core = Core() self.model = self.core.read_model("/Work/zhangxin/ultralytics/runs/segment/train/weights/best_openvino_model/best.xml") self.compiled_model = self.core.compile_model(self.model, device) # 修改1:使用Opencv库读取数据,适配原有的代码。 # 修改2:输入cvmat图像数据 def preprocess(self, image): # img = cv2.imread(img_path,cv2.IMREAD_GRAYSCALE) # img = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) img_resized = cv2.resize(img, (256,256)) img_ndarray = img_resized if img_ndarray.ndim == 2: img_ndarray = img_ndarray[np.newaxis, ...] img_ndarray = img_ndarray[np.newaxis, ...] img_ndarray = img_ndarray / 255.0 tensor = torch.from_numpy(img_ndarray) img_tensor = tensor.permute(2, 0, 1).float().contiguous() # 或 tensor = tensor.permute(2, 0, 1).contiguous() img_tensor = img_tensor.unsqueeze(0) # 形状变为:(1, 3, 256, 256) #img_ndarray = img_ndarray / 255.0 #img_tensor = torch.as_tensor(img_ndarray.copy()).float().contiguous() return img_tensor @staticmethod def keep_largest_region(mask): if mask.max() == 0: return mask labeled_mask = label(mask, connectivity=1) regions = regionprops(labeled_mask) if not regions: return mask largest_region = max(regions, key=lambda r: r.area) result_mask = np.zeros_like(mask) result_mask[labeled_mask == largest_region.label] = 1 return result_mask @staticmethod def fill_hole(image): """填充图像中的空洞""" # 确保是二值图像 if len(np.unique(image)) > 2: _, image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY) src = image.copy() mask = np.zeros([src.shape[0] + 2, src.shape[1] + 2], np.uint8) # 找到背景点 isbreak = False for i in range(src.shape[0]): for j in range(src.shape[1]): if src[i, j] == 0: seedpoint = (j, i) # 注意坐标顺序 (x,y) isbreak = True break if isbreak: break cv2.floodFill(src, mask, seedpoint, 255) img_floofill_inv = cv2.bitwise_not(src) im_out = image | img_floofill_inv return im_out @staticmethod def get_edge_points(mask, scalex, scaley): contours, _ = cv2.findContours( mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE ) edge_points = [] for contour in contours: epsilon = 0.005 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) for point in approx: x, y = point[0] up_x = int(x * scaley) up_y = int(y * scalex) edge_points.append((up_x, up_y)) final_result = {"detection": {}, "segmentation": {}} point_dict = {"1-LVIntima_pylogon": edge_points.copy()} final_result["segmentation"] = point_dict return final_result # 原始:输入的是图像的路径 # def Yi_Segment(self, image_path): # input_tensor = self.preprocess(image_path) # predict_tensor = self.compiled_model(input_tensor) # output_ndarray = predict_tensor[0].argmax(1).squeeze(0) # prediction = output_ndarray.astype(np.uint8) # prediction = prediction * 255 # output_mask = self.fill_hole(self.keep_largest_region(prediction)) # final_output = self.get_edge_points(output_mask) # return final_output # 修改:输入cvmat图像数据 def Yi_Segment(self, image): input_tensor = self.preprocess(image) predict_tensor = self.compiled_model(input_tensor) output_ndarray = predict_tensor[0].argmax(1).squeeze(0) prediction = output_ndarray.astype(np.uint8) prediction = prediction * 255 output_mask = self.fill_hole(self.keep_largest_region(prediction)) img_h, img_w = image.shape[:2] scalex = img_h / 256.0 scaley = img_w / 256.0 final_output = self.get_edge_points(output_mask, scalex, scaley) #final_output = self.get_edge_points(final_output) return final_output if __name__ == "__main__": image_path = "/Work/zhangxin/ultralytics/runs/test/Heart_A2C_0000003_20240416_Comen_cropped_105.bmp" # segmenter = YiAtrium() image = cv2.imread(image_path) final_result = segmenter.Yi_Segment(image) img = cv2.imread(image_path) new_img = np.zeros((image.shape[0], image.shape[1]), dtype=uint8) cv2.drawContours(new_img, [np.asarray(final_result["segmentation"]["1-LVIntima_pylogon"])], -1, [255, 0], thickness=cv2.FILLED) cv2.imshow("1", new_img) cv2.imshow(("2"),img) cv2.waitKey(0) 帮我总结此代码实现的内容
09-03
bash: docker: command not found [root@ac6b15bb1f77 mas]# python -m vllm.entrypoints.openai.api_server \ > --model /models/z50051264/summary/Qwen2.5-7B-awq/ \ > --max-num-seqs=256 \ > --max-model-len=4096 \ > --max-num-batched-tokens=4096 \ > --tensor-parallel-size=1 \ > --block-size=128 \ > --host=0.0.0.0 \ > --port=8080 \ > --gpu-memory-utilization=0.9 \ > --trust-remote-code \ > --served-model-name=zzz \ > --quantization awq INFO 07-25 03:09:14 [__init__.py:39] Available plugins for group vllm.platform_plugins: INFO 07-25 03:09:14 [__init__.py:41] - ascend -> vllm_ascend:register INFO 07-25 03:09:14 [__init__.py:44] All plugins in this group will be loaded. Set `VLLM_PLUGINS` to control which plugins to load. INFO 07-25 03:09:14 [__init__.py:235] Platform plugin ascend is activated WARNING 07-25 03:09:15 [_custom_ops.py:20] Failed to import from vllm._C with ModuleNotFoundError("No module named 'vllm._C'") INFO 07-25 03:09:18 [importing.py:63] Triton not installed or not compatible; certain GPU-related functions will not be available. WARNING 07-25 03:09:19 [registry.py:413] Model architecture DeepSeekMTPModel is already registered, and will be overwritten by the new model class vllm_ascend.models.deepseek_mtp:CustomDeepSeekMTP. WARNING 07-25 03:09:19 [registry.py:413] Model architecture Qwen2VLForConditionalGeneration is already registered, and will be overwritten by the new model class vllm_ascend.models.qwen2_vl:AscendQwen2VLForConditionalGeneration. WARNING 07-25 03:09:19 [registry.py:413] Model architecture Qwen2_5_VLForConditionalGeneration is already registered, and will be overwritten by the new model class vllm_ascend.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration. WARNING 07-25 03:09:19 [registry.py:413] Model architecture DeepseekV2ForCausalLM is already registered, and will be overwritten by the new model class vllm_ascend.models.deepseek_v2:CustomDeepseekV2ForCausalLM. WARNING 07-25 03:09:19 [registry.py:413] Model architecture DeepseekV3ForCausalLM is already registered, and will be overwritten by the new model class vllm_ascend.models.deepseek_v2:CustomDeepseekV3ForCausalLM. WARNING 07-25 03:09:19 [registry.py:413] Model architecture Qwen3MoeForCausalLM is already registered, and will be overwritten by the new model class vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM. INFO 07-25 03:09:20 [api_server.py:1395] vLLM API server version 0.9.2 INFO 07-25 03:09:20 [cli_args.py:325] non-default args: {'host': '0.0.0.0', 'port': 8080, 'model': '/models/z50051264/summary/Qwen2.5-7B-awq/', 'trust_remote_code': True, 'max_model_len': 4096, 'quantization': 'awq', 'served_model_name': ['zzz'], 'block_size': 128, 'max_num_batched_tokens': 4096, 'max_num_seqs': 256} INFO 07-25 03:09:34 [config.py:841] This model supports multiple tasks: {'generate', 'classify', 'embed', 'reward'}. Defaulting to 'generate'. INFO 07-25 03:09:34 [config.py:1472] Using max model len 4096 WARNING 07-25 03:09:35 [config.py:960] ascend quantization is not fully optimized yet. The speed can be slower than non-quantized models. INFO 07-25 03:09:35 [config.py:2285] Chunked prefill is enabled with max_num_batched_tokens=4096. INFO 07-25 03:09:35 [platform.py:174] PIECEWISE compilation enabled on NPU. use_inductor not supported - using only ACL Graph mode INFO 07-25 03:09:35 [utils.py:321] Calculated maximum supported batch sizes for ACL graph: 66 INFO 07-25 03:09:35 [utils.py:336] Adjusted ACL graph batch sizes for Qwen2ForCausalLM model (layers: 28): 67 → 66 sizes INFO 07-25 03:09:45 [__init__.py:39] Available plugins for group vllm.platform_plugins: INFO 07-25 03:09:45 [__init__.py:41] - ascend -> vllm_ascend:register INFO 07-25 03:09:45 [__init__.py:44] All plugins in this group will be loaded. Set `VLLM_PLUGINS` to control which plugins to load. INFO 07-25 03:09:45 [__init__.py:235] Platform plugin ascend is activated WARNING 07-25 03:09:46 [_custom_ops.py:20] Failed to import from vllm._C with ModuleNotFoundError("No module named 'vllm._C'") INFO 07-25 03:09:50 [importing.py:63] Triton not installed or not compatible; certain GPU-related functions will not be available. INFO 07-25 03:09:50 [core.py:526] Waiting for init message from front-end. WARNING 07-25 03:09:50 [registry.py:413] Model architecture DeepSeekMTPModel is already registered, and will be overwritten by the new model class vllm_ascend.models.deepseek_mtp:CustomDeepSeekMTP. WARNING 07-25 03:09:50 [registry.py:413] Model architecture Qwen2VLForConditionalGeneration is already registered, and will be overwritten by the new model class vllm_ascend.models.qwen2_vl:AscendQwen2VLForConditionalGeneration. WARNING 07-25 03:09:50 [registry.py:413] Model architecture Qwen2_5_VLForConditionalGeneration is already registered, and will be overwritten by the new model class vllm_ascend.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration. WARNING 07-25 03:09:50 [registry.py:413] Model architecture DeepseekV2ForCausalLM is already registered, and will be overwritten by the new model class vllm_ascend.models.deepseek_v2:CustomDeepseekV2ForCausalLM. WARNING 07-25 03:09:50 [registry.py:413] Model architecture DeepseekV3ForCausalLM is already registered, and will be overwritten by the new model class vllm_ascend.models.deepseek_v2:CustomDeepseekV3ForCausalLM. WARNING 07-25 03:09:50 [registry.py:413] Model architecture Qwen3MoeForCausalLM is already registered, and will be overwritten by the new model class vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM. INFO 07-25 03:09:50 [core.py:69] Initializing a V1 LLM engine (v0.9.2) with config: model='/models/z50051264/summary/Qwen2.5-7B-awq/', speculative_config=None, tokenizer='/models/z50051264/summary/Qwen2.5-7B-awq/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=True, quantization=ascend, enforce_eager=False, kv_cache_dtype=auto, device_config=npu, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=zzz, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, pooler_config=None, compilation_config={"level":3,"debug_dump_path":"","cache_dir":"","backend":"","custom_ops":["all"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output","vllm.unified_ascend_attention_with_output"],"use_inductor":false,"compile_sizes":[],"inductor_compile_config":{},"inductor_passes":{},"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"max_capture_size":512,"local_cache_dir":null} ERROR 07-25 03:09:53 [core.py:586] EngineCore failed to start. ERROR 07-25 03:09:53 [core.py:586] Traceback (most recent call last): ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 577, in run_engine_core ERROR 07-25 03:09:53 [core.py:586] engine_core = EngineCoreProc(*args, **kwargs) ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 404, in __init__ ERROR 07-25 03:09:53 [core.py:586] super().__init__(vllm_config, executor_class, log_stats, ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 75, in __init__ ERROR 07-25 03:09:53 [core.py:586] self.model_executor = executor_class(vllm_config) ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm/vllm/executor/executor_base.py", line 53, in __init__ ERROR 07-25 03:09:53 [core.py:586] self._init_executor() ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 47, in _init_executor ERROR 07-25 03:09:53 [core.py:586] self.collective_rpc("init_device") ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 57, in collective_rpc ERROR 07-25 03:09:53 [core.py:586] answer = run_method(self.driver_worker, method, args, kwargs) ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm/vllm/utils/__init__.py", line 2736, in run_method ERROR 07-25 03:09:53 [core.py:586] return func(*args, **kwargs) ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm/vllm/worker/worker_base.py", line 606, in init_device ERROR 07-25 03:09:53 [core.py:586] self.worker.init_device() # type: ignore ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 132, in init_device ERROR 07-25 03:09:53 [core.py:586] NPUPlatform.set_device(device) ERROR 07-25 03:09:53 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/platform.py", line 98, in set_device ERROR 07-25 03:09:53 [core.py:586] torch.npu.set_device(device) ERROR 07-25 03:09:53 [core.py:586] File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch_npu/npu/utils.py", line 80, in set_device ERROR 07-25 03:09:53 [core.py:586] torch_npu._C._npu_setDevice(device_id) ERROR 07-25 03:09:53 [core.py:586] RuntimeError: SetPrecisionMode:build/CMakeFiles/torch_npu.dir/compiler_depend.ts:156 NPU function error: at_npu::native::AclSetCompileopt(aclCompileOpt::ACL_PRECISION_MODE, precision_mode), error code is 500001 ERROR 07-25 03:09:53 [core.py:586] [ERROR] 2025-07-25-03:09:53 (PID:977, Device:0, RankID:-1) ERR00100 PTA call acl api failed ERROR 07-25 03:09:53 [core.py:586] [Error]: The internal ACL of the system is incorrect. ERROR 07-25 03:09:53 [core.py:586] Rectify the fault based on the error information in the ascend log. ERROR 07-25 03:09:53 [core.py:586] EC0010: [PID: 977] 2025-07-25-03:09:53.177.260 Failed to import Python module [AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead..]. ERROR 07-25 03:09:53 [core.py:586] Solution: Check that all required components are properly installed and the specified Python path matches the Python installation directory. (If the path does not match the directory, run set_env.sh in the installation package.) ERROR 07-25 03:09:53 [core.py:586] TraceBack (most recent call last): ERROR 07-25 03:09:53 [core.py:586] AOE Failed to call InitCannKB[FUNC:Initialize][FILE:python_adapter_manager.cc][LINE:47] ERROR 07-25 03:09:53 [core.py:586] Failed to initialize TeConfigInfo. ERROR 07-25 03:09:53 [core.py:586] [GraphOpt][InitializeInner][InitTbeFunc] Failed to init tbe.[FUNC:InitializeTeFusion][FILE:tbe_op_store_adapter.cc][LINE:1889] ERROR 07-25 03:09:53 [core.py:586] [GraphOpt][InitializeInner][InitTeFusion]: Failed to initialize TeFusion.[FUNC:InitializeInner][FILE:tbe_op_store_adapter.cc][LINE:1856] ERROR 07-25 03:09:53 [core.py:586] [SubGraphOpt][PreCompileOp][InitAdapter] InitializeAdapter adapter [tbe_op_adapter] failed! Ret [4294967295][FUNC:InitializeAdapter][FILE:op_store_adapter_manager.cc][LINE:79] ERROR 07-25 03:09:53 [core.py:586] [SubGraphOpt][PreCompileOp][Init] Initialize op store adapter failed, OpsStoreName[tbe-custom].[FUNC:Initialize][FILE:op_store_adapter_manager.cc][LINE:120] ERROR 07-25 03:09:53 [core.py:586] [FusionMngr][Init] Op store adapter manager init failed.[FUNC:Initialize][FILE:fusion_manager.cc][LINE:115] ERROR 07-25 03:09:53 [core.py:586] PluginManager InvokeAll failed.[FUNC:Initialize][FILE:ops_kernel_manager.cc][LINE:83] ERROR 07-25 03:09:53 [core.py:586] OpsManager initialize failed.[FUNC:InnerInitialize][FILE:gelib.cc][LINE:259] ERROR 07-25 03:09:53 [core.py:586] GELib::InnerInitialize failed.[FUNC:Initialize][FILE:gelib.cc][LINE:184] ERROR 07-25 03:09:53 [core.py:586] GEInitialize failed.[FUNC:GEInitialize][FILE:ge_api.cc][LINE:371] ERROR 07-25 03:09:53 [core.py:586] [Initialize][Ge]GEInitialize failed. ge result = 4294967295[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161] ERROR 07-25 03:09:53 [core.py:586] [Init][Compiler]Init compiler failed[FUNC:ReportInnerError][FILE:log_inner.cpp][LINE:145] ERROR 07-25 03:09:53 [core.py:586] [Set][Options]OpCompileProcessor init failed![FUNC:ReportInnerError][FILE:log_inner.cpp][LINE:145] ERROR 07-25 03:09:53 [core.py:586] Process EngineCore_0: Traceback (most recent call last): File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap self.run() File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 590, in run_engine_core raise e File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 577, in run_engine_core engine_core = EngineCoreProc(*args, **kwargs) File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 404, in __init__ super().__init__(vllm_config, executor_class, log_stats, File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 75, in __init__ self.model_executor = executor_class(vllm_config) File "/vllm-workspace/vllm/vllm/executor/executor_base.py", line 53, in __init__ self._init_executor() File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 47, in _init_executor self.collective_rpc("init_device") File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 57, in collective_rpc answer = run_method(self.driver_worker, method, args, kwargs) File "/vllm-workspace/vllm/vllm/utils/__init__.py", line 2736, in run_method return func(*args, **kwargs) File "/vllm-workspace/vllm/vllm/worker/worker_base.py", line 606, in init_device self.worker.init_device() # type: ignore File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 132, in init_device NPUPlatform.set_device(device) File "/vllm-workspace/vllm-ascend/vllm_ascend/platform.py", line 98, in set_device torch.npu.set_device(device) File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch_npu/npu/utils.py", line 80, in set_device torch_npu._C._npu_setDevice(device_id) RuntimeError: SetPrecisionMode:build/CMakeFiles/torch_npu.dir/compiler_depend.ts:156 NPU function error: at_npu::native::AclSetCompileopt(aclCompileOpt::ACL_PRECISION_MODE, precision_mode), error code is 500001 [ERROR] 2025-07-25-03:09:53 (PID:977, Device:0, RankID:-1) ERR00100 PTA call acl api failed [Error]: The internal ACL of the system is incorrect. Rectify the fault based on the error information in the ascend log. EC0010: [PID: 977] 2025-07-25-03:09:53.177.260 Failed to import Python module [AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead..]. Solution: Check that all required components are properly installed and the specified Python path matches the Python installation directory. (If the path does not match the directory, run set_env.sh in the installation package.) TraceBack (most recent call last): AOE Failed to call InitCannKB[FUNC:Initialize][FILE:python_adapter_manager.cc][LINE:47] Failed to initialize TeConfigInfo. [GraphOpt][InitializeInner][InitTbeFunc] Failed to init tbe.[FUNC:InitializeTeFusion][FILE:tbe_op_store_adapter.cc][LINE:1889] [GraphOpt][InitializeInner][InitTeFusion]: Failed to initialize TeFusion.[FUNC:InitializeInner][FILE:tbe_op_store_adapter.cc][LINE:1856] [SubGraphOpt][PreCompileOp][InitAdapter] InitializeAdapter adapter [tbe_op_adapter] failed! Ret [4294967295][FUNC:InitializeAdapter][FILE:op_store_adapter_manager.cc][LINE:79] [SubGraphOpt][PreCompileOp][Init] Initialize op store adapter failed, OpsStoreName[tbe-custom].[FUNC:Initialize][FILE:op_store_adapter_manager.cc][LINE:120] [FusionMngr][Init] Op store adapter manager init failed.[FUNC:Initialize][FILE:fusion_manager.cc][LINE:115] PluginManager InvokeAll failed.[FUNC:Initialize][FILE:ops_kernel_manager.cc][LINE:83] OpsManager initialize failed.[FUNC:InnerInitialize][FILE:gelib.cc][LINE:259] GELib::InnerInitialize failed.[FUNC:Initialize][FILE:gelib.cc][LINE:184] GEInitialize failed.[FUNC:GEInitialize][FILE:ge_api.cc][LINE:371] [Initialize][Ge]GEInitialize failed. ge result = 4294967295[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161] [Init][Compiler]Init compiler failed[FUNC:ReportInnerError][FILE:log_inner.cpp][LINE:145] [Set][Options]OpCompileProcessor init failed![FUNC:ReportInnerError][FILE:log_inner.cpp][LINE:145] Traceback (most recent call last): File "/usr/local/python3.10.17/lib/python3.10/runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "/usr/local/python3.10.17/lib/python3.10/runpy.py", line 86, in _run_code exec(code, run_globals) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1495, in <module> uvloop.run(run_server(args)) File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/__init__.py", line 82, in run return loop.run_until_complete(wrapper()) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/__init__.py", line 61, in wrapper return await main File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1431, in run_server await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1451, in run_server_worker async with build_async_engine_client(args, client_config) as engine_client: File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in __aenter__ return await anext(self.gen) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client async with build_async_engine_client_from_engine_args( File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in __aenter__ return await anext(self.gen) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args async_llm = AsyncLLM.from_vllm_config( File "/vllm-workspace/vllm/vllm/v1/engine/async_llm.py", line 162, in from_vllm_config return cls( File "/vllm-workspace/vllm/vllm/v1/engine/async_llm.py", line 124, in __init__ self.engine_core = EngineCoreClient.make_async_mp_client( File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 96, in make_async_mp_client return AsyncMPClient(*client_args) File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 666, in __init__ super().__init__( File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 403, in __init__ with launch_core_engines(vllm_config, executor_class, File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 142, in __exit__ next(self.gen) File "/vllm-workspace/vllm/vllm/v1/engine/utils.py", line 434, in launch_core_engines wait_for_engine_startup( File "/vllm-workspace/vllm/vllm/v1/engine/utils.py", line 484, in wait_for_engine_startup raise RuntimeError("Engine core initialization failed. " RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {} [ERROR] 2025-07-25-03:09:59 (PID:707, Device:-1, RankID:-1) ERR99999 UNKNOWN applicaiton exception 分析报错
07-26
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值