行指针的使用——*(*(pointer + row) + col)是什么意思

本文探讨了如何使用行指针来访问二维数组中的元素,特别是解释了表达式`*(*(pointer + row) + col)`的含义。通过一个实例,解释了如何用行指针对3X3二维数组的每一行找到最大值并存储到一维数组中。

行指针的使用——((pointer + row) + col)是什么意思
用指针分别指向二维数组中的一个元素和二维数组中的一个子数组的时候,可以形象的称指向单个元素的为列指针,指向子数组的指针为行指针。

#include<stdio.h>

int main()
{
   
   
	int array[3][4] = {
   
    {
   
   1,2,3,4},{
   
   5,6,7,8},{
   
   9,10,11,12} };
	int<
S32 refine_process_char(REFINE_CONTEXT *refine_config, FG *src_image, FG *dst_image, float stop_percent) { if ((refine_config == NULL) || (src_image == NULL) || (dst_image == NULL)) { AMS_ERROR("refine process Input is NULL"); return ERROR; } S32 sum = 0; U16 width = refine_config->width; U16 height = refine_config->height; U8 radius = refine_config->radius; S32 stop_amount = (2 * radius + 1) * (2 * radius + 1) * stop_percent; U16 *col_hist = refine_config->col_hist; U16 *hist = refine_config->hist; S32 *row_offset = refine_config->row_offset; S32 *col_offset = refine_config->col_offset; get_offset_pos(row_offset, width, radius, radius); get_offset_pos(col_offset, height, radius, radius); memset(col_hist, 0, 2 * (width + radius + radius) * sizeof(U16)); S16 row, col; for (row = 0; row < height; row++) { if (row == 0) { for (S16 k = -radius; k <= radius; k++) { U8 *line_ps = src_image + col_offset[k + radius] * width; for (col = -radius; col < width + radius; col++) { col_hist[(col + radius) * 2 + line_ps[row_offset[col + radius]]]++; } } } else { U8 *line_ps = src_image + col_offset[row - 1] * width; for (col = -radius; col < width + radius; col++) { col_hist[(col + radius) * 2 + line_ps[row_offset[col + radius]]]--; } line_ps = src_image + col_offset[row + radius + radius] * width; for (col = -radius; col < width + radius; col++) { col_hist[(col + radius) * 2 + line_ps[row_offset[col + radius]]]++; } } memset(hist, 0, 2 * sizeof(uint16_t)); U8 *line_pd = dst_image + row * width; for (col = 0; col < width; col++) { if (col == 0) { for (S16 k = -radius; k <= radius; k++) { U16 *col_ptr = col_hist + (k + radius) * 2; hist[0] = hist[0] + col_ptr[0]; hist[1] = hist[1] + col_ptr[1]; } } else { U16 *col_ptr1 = col_hist + (row_offset[col - 1] + radius) * 2; U16 *col_ptr2 = col_hist + (row_offset[col + radius + radius] + radius) * 2; hist[0] = col_ptr2[0] + hist[0] - col_ptr1[0]; hist[1] = col_ptr2[1] + hist[1] - col_ptr1[1]; } sum = 0; for (U16 i = 0; i < 2; i++) { sum += hist[i]; if (sum >= stop_amount) { *line_pd = i; break; } } line_pd++; } } return OK; } /* Pixel mirroring */ LOCAL S32 get_mirror_pos(U16 length, S32 pos) { if (pos < 0) { pos = -pos; while (pos > length) { pos -= length; } } else if (pos >= length) { pos = length - (pos - length + 2); while (pos < 0) { pos += length; } } return pos; } LOCAL void get_offset_pos(S32 *pos_ptr, U16 length, U16 left, U16 right) { for (S32 col = -left; col < length + right; col++) { pos_ptr[col + left] = get_mirror_pos(length, col); } } 这段代码实现了在全图内进行refine,将其改造为在一个窗口内(left right top bottom)进行refine的代码
10-12
import torch import triton import triton.language as tl @triton.jit def fused_bmm_softmax_kernel( # 输入/输出指针 a_ptr, b_ptr, c_ptr, # 张量维度 B, M, N, K, # 张量A步长 (batch, row, col) stride_ab, stride_am, stride_ak, # 张量B步长 (batch, col, col) stride_bb, stride_bk, stride_bn, # 输出C步长 (batch, row, col) stride_cb, stride_cm, stride_cn, # 分块参数 BLOCK_SIZE_M: tl.constexpr, BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr ): # 程序ID对应输出矩阵中的位置 (batch, row) batch_id = tl.program_id(0) row_id = tl.program_id(1) # 初始化行最大值为负无穷 row_max = tl.full((), float('-inf'), dtype=tl.float32) # Pass 1: 计算当前行的最大值 for n_block_idx in range(0, tl.cdiv(N, BLOCK_SIZE_N)): # 为当前列块预加载A的部分数据 a_ptrs = a_ptr + batch_id * stride_ab + row_id * stride_am + \ tl.arange(0, BLOCK_SIZE_K)[None, :] * stride_ak b_ptrs = b_ptr + batch_id * stride_bb + \ tl.arange(0, BLOCK_SIZE_K)[:, None] * stride_bk + \ (n_block_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)[None, :]) * stride_bn # 累加部分矩阵乘法结果 accumulator = tl.zeros((BLOCK_SIZE_N,), dtype=tl.float32) for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)): a = tl.load(a_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[None, :] < K - k * BLOCK_SIZE_K, other=0.0) b = tl.load(b_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[:, None] < K - k * BLOCK_SIZE_K, other=0.0) partial = tl.dot(a, b) accumulator += partial a_ptrs += BLOCK_SIZE_K * stride_ak b_ptrs += BLOCK_SIZE_K * stride_bk # 更新行最大值 max_val = tl.max(accumulator) row_max = tl.maximum(row_max, max_val) # Pass 2: 计算指数和 (exp_sum) exp_sum = tl.zeros((), dtype=tl.float32) for n_block_idx in range(0, tl.cdiv(N, BLOCK_SIZE_N)): # 重新加载数据(与Pass 1类似) a_ptrs = a_ptr + batch_id * stride_ab + row_id * stride_am + \ tl.arange(0, BLOCK_SIZE_K)[None, :] * stride_ak b_ptrs = b_ptr + batch_id * stride_bb + \ tl.arange(0, BLOCK_SIZE_K)[:, None] * stride_bk + \ (n_block_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)[None, :]) * stride_bn accumulator = tl.zeros((BLOCK_SIZE_N,), dtype=tl.float32) for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)): a = tl.load(a_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[None, :] < K - k * BLOCK_SIZE_K, other=0.0) b = tl.load(b_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[:, None] < K - k * BLOCK_SIZE_K, other=0.0) partial = tl.dot(a, b) accumulator += partial a_ptrs += BLOCK_SIZE_K * stride_ak b_ptrs += BLOCK_SIZE_K * stride_bk # 计算稳定指数值并累加 exp_vals = tl.exp(accumulator - row_max) exp_sum += tl.sum(exp_vals) # Pass 3: 计算归一化结果并写入 for n_block_idx in range(0, tl.cdiv(N, BLOCK_SIZE_N)): a_ptrs = a_ptr + batch_id * stride_ab + row_id * stride_am + \ tl.arange(0, BLOCK_SIZE_K)[None, :] * stride_ak b_ptrs = b_ptr + batch_id * stride_bb + \ tl.arange(0, BLOCK_SIZE_K)[:, None] * stride_bk + \ (n_block_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)[None, :]) * stride_bn accumulator = tl.zeros((BLOCK_SIZE_N,), dtype=tl.float32) for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)): a = tl.load(a_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[None, :] < K - k * BLOCK_SIZE_K, other=0.0) b = tl.load(b_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[:, None] < K - k * BLOCK_SIZE_K, other=0.0) partial = tl.dot(a, b) accumulator += partial a_ptrs += BLOCK_SIZE_K * stride_ak b_ptrs += BLOCK_SIZE_K * stride_bk # 计算最终Softmax结果 exp_vals = tl.exp(accumulator - row_max) softmax_output = exp_vals / exp_sum # 写入输出 c_ptrs = c_ptr + batch_id * stride_cb + row_id * stride_cm + \ (n_block_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) * stride_cn tl.store(c_ptrs, softmax_output) 封装函数供PyTorch调用 def fused_bmm_softmax(a: torch.Tensor, b: torch.Tensor): B, M, K = a.shape B, K, N = b.shape c = torch.empty((B, M, N), device=a.device, dtype=a.dtype) # 配置分块大小 (需根据硬件调整) BLOCK_SIZE_M = 1 # 每个程序实例处理1行 BLOCK_SIZE_N = 64 # 列方向分块大小 BLOCK_SIZE_K = 32 # K维度分块大小 grid = (B, M) # 每个批次每行一个程序实例 fused_bmm_softmax_kernel[grid]( a, b, c, B, M, N, K, a.stride(0), a.stride(1), a.stride(2), b.stride(0), b.stride(1), b.stride(2), c.stride(0), c.stride(1), c.stride(2), BLOCK_SIZE_M=BLOCK_SIZE_M, BLOCK_SIZE_N=BLOCK_SIZE_N, BLOCK_SIZE_K=BLOCK_SIZE_K ) return c把上面的融合kernel跟据下面的代码的思路写一个配套的代码(完全与融合kernel配套,只是代码思路类似而已):import os import sys import random import itertools import csv from triton import Config import torch import triton import torch_mlu import torch.nn as nn import triton.language as tl os.environ[“TRITON_PRINT_AUTOTUNING”] = “1” def get_config(): row_block = list(range(1, 33)) num_stages = [1, 2, 3, 4] configs = list(itertools.product(row_block, num_stages)) random.shuffle(configs) return configs @triton.jit def softmax_kernel(output_ptr, input_ptr, input_row_stride, output_row_stride, n_rows, n_cols, BLOCK_SIZE: tl.constexpr, OUTER_ROW_BLOCK: tl.constexpr, ROW_BLOCK: tl.constexpr, num_stages: tl.constexpr): # The rows of the softmax are independent, so we parallelize across rows. pid = tl.program_id(0) ub = tl.minimum(n_rows, (tl.program_id(0) + 1) * OUTER_ROW_BLOCK) outer_raw_idx = tl.program_id(0) * OUTER_ROW_BLOCK for inner_row in range(outer_raw_idx, ub, ROW_BLOCK): row_offsets = tl.arange(0, ROW_BLOCK) row_mask = (inner_row + row_offsets < ub)[:, None] # The stride represents how much we need to increase the pointer to advance 1 row row_start_ptr = input_ptr + (inner_row + row_offsets) * input_row_stride col_offsets = tl.arange(0, BLOCK_SIZE)[None, :] input_ptrs = row_start_ptr[:, None] + col_offsets # Load rows into SRAM row = tl.load(input_ptrs, mask=row_mask).to(tl.float32) # Subtract maximum for numerical stability row_minus_max = row - tl.max(row, axis=1)[:, None] # Note that exponentiation in Triton is fast but approximate numerator = tl.exp(row_minus_max) denominator = tl.sum(numerator, axis=1)[:, None] softmax_output = numerator / denominator softmax_output = softmax_output.to(tl.float16) # Write back output to DRAM output_row_start_ptr = output_ptr + (inner_row + row_offsets) * output_row_stride output_ptrs = output_row_start_ptr[:, None] + col_offsets tl.store(output_ptrs, softmax_output, mask=row_mask) def triton_softmax_perf(configs, x): n_rows, n_cols = x.shape BLOCK_SIZE = n_cols # Allocate output y = torch.empty_like(x) # Enqueue kernel. We split all rows into 48 parts evenly. OUTER_ROW_BLOCK = triton.cdiv(n_rows, 48) grid = lambda META: (48, 1, 1) softmax_kernel[(grid)](y, x, x.stride(0), y.stride(0), n_rows, n_cols, BLOCK_SIZE=BLOCK_SIZE, OUTER_ROW_BLOCK=OUTER_ROW_BLOCK, ROW_BLOCK=configs[0], num_stages=configs[1]) return y def benchmark_softmax(configs, M, K, warmup, repeat): # 创建输入数据 a = torch.randn(M, K, dtype=torch.float16, device=‘mlu’).contiguous() trainset, count, maxsampled = [], 0, 20 for i in range(len(configs)): try: # filter invalid settings c_triton = triton_softmax_perf(configs[i], a) except: continue for _ in range(warmup): c_triton = triton_softmax_perf(configs[i], a) torch.mlu.synchronize() start_event = torch.mlu.Event(enable_timing=True) end_event = torch.mlu.Event(enable_timing=True) start_event.record() for _ in range(repeat): c_triton = triton_softmax_perf(configs[i], a) torch.mlu.synchronize() end_event.record() torch.mlu.synchronize() trainset.append(list(configs[i])) trainset[count].append(start_event.hardware_time(end_event) / repeat / 1000) print('train data {}: {}'.format(count, trainset[count])) count += 1 if count >= maxsampled: break with open('data/{}_{}.csv'.format(M, K), 'w', newline='') as f: writer = csv.writer(f) writer.writerows(trainset) def main(): micro_shapes = [ (80, 768), (604, 768), (1216, 768), (1968, 768), ] configs = get_config() for M, K in micro_shapes: print('Processing input {} {}'.format(M, K)) benchmark_softmax(configs, M, K, warmup=10, repeat=20) if name==“main”: main()
06-12
import torch import triton import triton.language as tl @triton.jit def fused_bmm_softmax_kernel( # 输入/输出指针 a_ptr, b_ptr, c_ptr, # 张量维度 B, M, N, K, # 张量A步长 (batch, row, col) stride_ab, stride_am, stride_ak, # 张量B步长 (batch, col, col) stride_bb, stride_bk, stride_bn, # 输出C步长 (batch, row, col) stride_cb, stride_cm, stride_cn, # 分块参数 BLOCK_SIZE_M: tl.constexpr, BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr ): # 程序ID对应输出矩阵中的位置 (batch, row) batch_id = tl.program_id(0) row_id = tl.program_id(1) # 初始化行最大值为负无穷 row_max = tl.full((), float('-inf'), dtype=tl.float32) # Pass 1: 计算当前行的最大值 for n_block_idx in range(0, tl.cdiv(N, BLOCK_SIZE_N)): # 为当前列块预加载A的部分数据 a_ptrs = a_ptr + batch_id * stride_ab + row_id * stride_am + \ tl.arange(0, BLOCK_SIZE_K)[None, :] * stride_ak b_ptrs = b_ptr + batch_id * stride_bb + \ tl.arange(0, BLOCK_SIZE_K)[:, None] * stride_bk + \ (n_block_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)[None, :]) * stride_bn # 累加部分矩阵乘法结果 accumulator = tl.zeros((BLOCK_SIZE_N,), dtype=tl.float32) for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)): a = tl.load(a_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[None, :] < K - k * BLOCK_SIZE_K, other=0.0) b = tl.load(b_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[:, None] < K - k * BLOCK_SIZE_K, other=0.0) partial = tl.dot(a, b) accumulator += partial a_ptrs += BLOCK_SIZE_K * stride_ak b_ptrs += BLOCK_SIZE_K * stride_bk # 更新行最大值 max_val = tl.max(accumulator) row_max = tl.maximum(row_max, max_val) # Pass 2: 计算指数和 (exp_sum) exp_sum = tl.zeros((), dtype=tl.float32) for n_block_idx in range(0, tl.cdiv(N, BLOCK_SIZE_N)): # 重新加载数据(与Pass 1类似) a_ptrs = a_ptr + batch_id * stride_ab + row_id * stride_am + \ tl.arange(0, BLOCK_SIZE_K)[None, :] * stride_ak b_ptrs = b_ptr + batch_id * stride_bb + \ tl.arange(0, BLOCK_SIZE_K)[:, None] * stride_bk + \ (n_block_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)[None, :]) * stride_bn accumulator = tl.zeros((BLOCK_SIZE_N,), dtype=tl.float32) for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)): a = tl.load(a_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[None, :] < K - k * BLOCK_SIZE_K, other=0.0) b = tl.load(b_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[:, None] < K - k * BLOCK_SIZE_K, other=0.0) partial = tl.dot(a, b) accumulator += partial a_ptrs += BLOCK_SIZE_K * stride_ak b_ptrs += BLOCK_SIZE_K * stride_bk # 计算稳定指数值并累加 exp_vals = tl.exp(accumulator - row_max) exp_sum += tl.sum(exp_vals) # Pass 3: 计算归一化结果并写入 for n_block_idx in range(0, tl.cdiv(N, BLOCK_SIZE_N)): a_ptrs = a_ptr + batch_id * stride_ab + row_id * stride_am + \ tl.arange(0, BLOCK_SIZE_K)[None, :] * stride_ak b_ptrs = b_ptr + batch_id * stride_bb + \ tl.arange(0, BLOCK_SIZE_K)[:, None] * stride_bk + \ (n_block_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)[None, :]) * stride_bn accumulator = tl.zeros((BLOCK_SIZE_N,), dtype=tl.float32) for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)): a = tl.load(a_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[None, :] < K - k * BLOCK_SIZE_K, other=0.0) b = tl.load(b_ptrs, mask=tl.arange(0, BLOCK_SIZE_K)[:, None] < K - k * BLOCK_SIZE_K, other=0.0) partial = tl.dot(a, b) accumulator += partial a_ptrs += BLOCK_SIZE_K * stride_ak b_ptrs += BLOCK_SIZE_K * stride_bk # 计算最终Softmax结果 exp_vals = tl.exp(accumulator - row_max) softmax_output = exp_vals / exp_sum # 写入输出 c_ptrs = c_ptr + batch_id * stride_cb + row_id * stride_cm + \ (n_block_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) * stride_cn tl.store(c_ptrs, softmax_output) # 封装函数供PyTorch调用 def fused_bmm_softmax(a: torch.Tensor, b: torch.Tensor): B, M, K = a.shape B, K, N = b.shape c = torch.empty((B, M, N), device=a.device, dtype=a.dtype) # 配置分块大小 (需根据硬件调整) BLOCK_SIZE_M = 1 # 每个程序实例处理1行 BLOCK_SIZE_N = 64 # 列方向分块大小 BLOCK_SIZE_K = 32 # K维度分块大小 grid = (B, M) # 每个批次每行一个程序实例 fused_bmm_softmax_kernel[grid]( a, b, c, B, M, N, K, a.stride(0), a.stride(1), a.stride(2), b.stride(0), b.stride(1), b.stride(2), c.stride(0), c.stride(1), c.stride(2), BLOCK_SIZE_M=BLOCK_SIZE_M, BLOCK_SIZE_N=BLOCK_SIZE_N, BLOCK_SIZE_K=BLOCK_SIZE_K ) return c把上面的融合kernel替换到下面的代码中:import os import sys import random import itertools import csv from triton import Config import torch import triton import torch_mlu import torch.nn as nn import triton.language as tl os.environ["TRITON_PRINT_AUTOTUNING"] = "1" def get_config(): row_block = list(range(1, 33)) num_stages = [1, 2, 3, 4] configs = list(itertools.product(row_block, num_stages)) random.shuffle(configs) return configs @triton.jit def softmax_kernel(output_ptr, input_ptr, input_row_stride, output_row_stride, n_rows, n_cols, BLOCK_SIZE: tl.constexpr, OUTER_ROW_BLOCK: tl.constexpr, ROW_BLOCK: tl.constexpr, num_stages: tl.constexpr): # The rows of the softmax are independent, so we parallelize across rows. pid = tl.program_id(0) ub = tl.minimum(n_rows, (tl.program_id(0) + 1) * OUTER_ROW_BLOCK) outer_raw_idx = tl.program_id(0) * OUTER_ROW_BLOCK for inner_row in range(outer_raw_idx, ub, ROW_BLOCK): row_offsets = tl.arange(0, ROW_BLOCK) row_mask = (inner_row + row_offsets < ub)[:, None] # The stride represents how much we need to increase the pointer to advance 1 row row_start_ptr = input_ptr + (inner_row + row_offsets) * input_row_stride col_offsets = tl.arange(0, BLOCK_SIZE)[None, :] input_ptrs = row_start_ptr[:, None] + col_offsets # Load rows into SRAM row = tl.load(input_ptrs, mask=row_mask).to(tl.float32) # Subtract maximum for numerical stability row_minus_max = row - tl.max(row, axis=1)[:, None] # Note that exponentiation in Triton is fast but approximate numerator = tl.exp(row_minus_max) denominator = tl.sum(numerator, axis=1)[:, None] softmax_output = numerator / denominator softmax_output = softmax_output.to(tl.float16) # Write back output to DRAM output_row_start_ptr = output_ptr + (inner_row + row_offsets) * output_row_stride output_ptrs = output_row_start_ptr[:, None] + col_offsets tl.store(output_ptrs, softmax_output, mask=row_mask) def triton_softmax_perf(configs, x): n_rows, n_cols = x.shape BLOCK_SIZE = n_cols # Allocate output y = torch.empty_like(x) # Enqueue kernel. We split all rows into 48 parts evenly. OUTER_ROW_BLOCK = triton.cdiv(n_rows, 48) grid = lambda META: (48, 1, 1) softmax_kernel[(grid)](y, x, x.stride(0), y.stride(0), n_rows, n_cols, BLOCK_SIZE=BLOCK_SIZE, OUTER_ROW_BLOCK=OUTER_ROW_BLOCK, ROW_BLOCK=configs[0], num_stages=configs[1]) return y def benchmark_softmax(configs, M, K, warmup, repeat): # 创建输入数据 a = torch.randn(M, K, dtype=torch.float16, device='mlu').contiguous() trainset, count, maxsampled = [], 0, 20 for i in range(len(configs)): try: # filter invalid settings c_triton = triton_softmax_perf(configs[i], a) except: continue for _ in range(warmup): c_triton = triton_softmax_perf(configs[i], a) torch.mlu.synchronize() start_event = torch.mlu.Event(enable_timing=True) end_event = torch.mlu.Event(enable_timing=True) start_event.record() for _ in range(repeat): c_triton = triton_softmax_perf(configs[i], a) torch.mlu.synchronize() end_event.record() torch.mlu.synchronize() trainset.append(list(configs[i])) trainset[count].append(start_event.hardware_time(end_event) / repeat / 1000) print('train data {}: {}'.format(count, trainset[count])) count += 1 if count >= maxsampled: break with open('data/{}_{}.csv'.format(M, K), 'w', newline='') as f: writer = csv.writer(f) writer.writerows(trainset) def main(): micro_shapes = [ (80, 768), (604, 768), (1216, 768), (1968, 768), ] configs = get_config() for M, K in micro_shapes: print('Processing input {} {}'.format(M, K)) benchmark_softmax(configs, M, K, warmup=10, repeat=20) if __name__=="__main__": main()
06-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值