【Triton 教程】triton_language.expand_dims

Triton 是一种用于并行编程的语言和编译器。它旨在提供一个基于 Python 的编程环境,以高效编写自定义 DNN 计算内核,并能够在现代 GPU 硬件上以最大吞吐量运行。

更多 Triton 中文文档可访问 →https://triton.hyper.ai/

triton.language.expand_dims(input, axis)

通过插入新的长度为 1 的维度来扩展张量的形状。

轴索引是相对于生成的张量而言的,因此对于每个轴,result.shape[axis] 将为 1。

参数

  • input (tl.tensor) - 输入张量。
  • axis (int | Sequence[int] ) - 要添加新轴的索引。

该函数也可作为 tensor 的成员函数调用,使用 x.expand_dims(...) 而不是 expand_dims(x, ...)

模型是应用yolov8n 分割模型训练: from ultralytics import YOLO import torch # Load a model model = YOLO('yolov8-seg.yaml') # build a new model from YAML model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training) model = YOLO('yolov8-seg.yaml').load('yolov8n.pt') # build from YAML and transfer weights results = model.train(pose=True, data='./datasets/Dataset_A2C_2025-08-05-1/Dataset_A2C_2025-08-05-1.yaml', epochs=200, imgsz=256) 现想实现预测,如下代码,但报错,请帮我修正: import numpy as np import cv2 from numpy.array_api import uint8 from skimage.measure import label, regionprops from openvino.runtime import Core from PIL import Image import torch import openvino as ov import matplotlib.pyplot as plt from sympy.codegen.ast import int32 from triton.language import dtype class YiAtrium(): def __init__(self, device="CPU"): self.core = Core() self.model = self.core.read_model("/Work/zhangxin/ultralytics/runs/segment/train/weights/best_openvino_model/best.xml") self.compiled_model = self.core.compile_model(self.model, device) # 修改1:使用Opencv库读取数据,适配原有的代码。 # 修改2:输入cvmat图像数据 def preprocess(self, image): # img = cv2.imread(img_path,cv2.IMREAD_GRAYSCALE) # img = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) img_resized = cv2.resize(img, (256,256)) img_ndarray = img_resized if img_ndarray.ndim == 2: img_ndarray = img_ndarray[np.newaxis, ...] img_ndarray = img_ndarray[np.newaxis, ...] img_ndarray = img_ndarray / 255.0 tensor = torch.from_numpy(img_ndarray) img_tensor = tensor.permute(2, 0, 1).float().contiguous() # 或 tensor = tensor.permute(2, 0, 1).contiguous() img_tensor = img_tensor.unsqueeze(0) # 形状变为:(1, 3, 256, 256) #img_ndarray = img_ndarray / 255.0 #img_tensor = torch.as_tensor(img_ndarray.copy()).float().contiguous() return img_tensor @staticmethod def keep_largest_region(mask): if mask.max() == 0: return mask labeled_mask = label(mask, connectivity=1) regions = regionprops(labeled_mask) if not regions: return mask largest_region = max(regions, key=lambda r: r.area) result_mask = np.zeros_like(mask) result_mask[labeled_mask == largest_region.label] = 1 return result_mask @staticmethod def fill_hole(image): """填充图像中的空洞""" # 确保是二值图像 if len(np.unique(image)) > 2: _, image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY) src = image.copy() mask = np.zeros([src.shape[0] + 2, src.shape[1] + 2], np.uint8) # 找到背景点 isbreak = False for i in range(src.shape[0]): for j in range(src.shape[1]): if src[i, j] == 0: seedpoint = (j, i) # 注意坐标顺序 (x,y) isbreak = True break if isbreak: break cv2.floodFill(src, mask, seedpoint, 255) img_floofill_inv = cv2.bitwise_not(src) im_out = image | img_floofill_inv return im_out @staticmethod def get_edge_points(mask, scalex, scaley): contours, _ = cv2.findContours( mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE ) edge_points = [] for contour in contours: epsilon = 0.005 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) for point in approx: x, y = point[0] up_x = int(x * scaley) up_y = int(y * scalex) edge_points.append((up_x, up_y)) final_result = {"detection": {}, "segmentation": {}} point_dict = {"1-LVIntima_pylogon": edge_points.copy()} final_result["segmentation"] = point_dict return final_result # 原始:输入的是图像的路径 # def Yi_Segment(self, image_path): # input_tensor = self.preprocess(image_path) # predict_tensor = self.compiled_model(input_tensor) # output_ndarray = predict_tensor[0].argmax(1).squeeze(0) # prediction = output_ndarray.astype(np.uint8) # prediction = prediction * 255 # output_mask = self.fill_hole(self.keep_largest_region(prediction)) # final_output = self.get_edge_points(output_mask) # return final_output # 修改:输入cvmat图像数据 def Yi_Segment(self, image): input_tensor = self.preprocess(image) predict_tensor = self.compiled_model(input_tensor) output_ndarray = predict_tensor[0].argmax(1).squeeze(0) prediction = output_ndarray.astype(np.uint8) prediction = prediction * 255 output_mask = self.fill_hole(self.keep_largest_region(prediction)) img_h, img_w = image.shape[:2] scalex = img_h / 256.0 scaley = img_w / 256.0 final_output = self.get_edge_points(output_mask, scalex, scaley) #final_output = self.get_edge_points(final_output) return final_output if __name__ == "__main__": image_path = "/Work/zhangxin/ultralytics/runs/test/Heart_A2C_0000003_20240416_Comen_cropped_105.bmp" # segmenter = YiAtrium() image = cv2.imread(image_path) final_result = segmenter.Yi_Segment(image) img = cv2.imread(image_path) new_img = np.zeros((image.shape[0], image.shape[1]), dtype=uint8) cv2.drawContours(new_img, [np.asarray(final_result["segmentation"]["1-LVIntima_pylogon"])], -1, [255, 0], thickness=cv2.FILLED) cv2.imshow("1", new_img) cv2.imshow(("2"),img) cv2.waitKey(0)
09-03
Traceback (most recent call last): File "/root/anaconda3/envs/mamba/lib/python3.9/site-packages/triton/language/core.py", line 35, in wrapper return fn(*args, **kwargs) File "/root/anaconda3/envs/mamba/lib/python3.9/site-packages/triton/language/core.py", line 1192, in arange return semantic.arange(start, end, _builder) File "/root/anaconda3/envs/mamba/lib/python3.9/site-packages/triton/language/semantic.py", line 512, in arange raise ValueError("arange's range must be a power of 2") ValueError: arange's range must be a power of 2 The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/home/samwang1010/Triton/nsa.py", line 268, in <module> output = nsa(seq, Q, K, V, Br, Bc, window_size, n) File "/home/samwang1010/Triton/nsa.py", line 232, in nsa nsa_kernal[grid]( File "/root/anaconda3/envs/mamba/lib/python3.9/site-packages/triton/runtime/jit.py", line 345, in <lambda> return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs) File "/root/anaconda3/envs/mamba/lib/python3.9/site-packages/triton/runtime/jit.py", line 662, in run kernel = self.compile( File "/root/anaconda3/envs/mamba/lib/python3.9/site-packages/triton/compiler/compiler.py", line 276, in compile module = src.make_ir(options, codegen_fns, context) File "/root/anaconda3/envs/mamba/lib/python3.9/site-packages/triton/compiler/compiler.py", line 113, in make_ir return ast_to_ttir(self.fn, self, context=context, options=options, codegen_fns=codegen_fns) triton.compiler.errors.CompilationError: at 183:64: O_window = o / tl.expand_dims(Li, 1) tl.store(O_Window + q_offsets[:, None] * stride_owinn + d_offsets[None, :] * stride_owind, O_window, mask=q_offsets[:, None] * stride_owinn < N) ''' 门控融合 ''' l = tl.load(seq + q_offsets[:, None] * stride_ln + d_offsets[None, :] * stride_ld) w_out = tl.load(W_out + d_offsets[:, None] * stride_wout1 + tl.arange(0, 3)[None, :] * stride_wout2)
03-08
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值