TBuf和const char*的转换

本文介绍如何在Symbian环境下实现TBuf与constchar*之间的相互转换,并提供了使用固定大小缓冲区和动态缓冲区两种方法。同时,讨论了在转换过程中可能遇到的问题及解决方案。

TBuf和const char*的转换

http://wiki.forum.nokia.com/index.php/How_to_Convert_TBuf_to_Char_and_Vice_Versa

How to Convert TBuf to Char and Vice Versa
From Forum Nokia Wiki

void stringToDescriptor(const char* aString, TDes& aDescriptor)
{
    TPtrC8 ptr(reinterpret_cast<const TUint8*>(aString));
    aDescriptor.Copy(ptr);
}

Usage:

const char* str = "Hello, world!";
    TBuf<32> buffer;  // Make it large enough for str
    stringToDescriptor(str, buffer);

Problem with the code above is that defining a TBuf not large enough will raise a USER 23 panic.

Some ways to avoid this include:

Using either __ASSERT_DEBUG or __ASSERT_ALWAYS, depending on your needs. Note that you can use alternatives to User::Panic(), as long as you avoid executing sensitive code.

void stringToDescriptor(const char* aString, TDes& aDescriptor)
{
    TPtrC8 ptr(reinterpret_cast<const TUint8*>(aString));
    _LIT(KMyPanicDescriptor, "My panic text");
    __ASSERT_ALWAYS(User::StringLength(reinterpret_cast<const TUint8*>(aString))
 <= aDescriptor.MaxLength(), User::Panic(KMyPanicDescriptor, 0));
    aDescriptor.Copy(ptr);
}

The other way is relying on a dynamic buffer, using HBufC for instance:

HBufC* stringToDescriptorL(const char* aString)
{
    TPtrC8 ptr(reinterpret_cast<const TUint8*>(aString));
    HBufC* buffer = HBufC::NewL(ptr.Length());
    buffer->Des().Copy(ptr);
 
    return buffer;
}

Note that the caller is responsible of freeing the HBufC returned. Also note the trailing "L" in the function's name.

Depending on your code, you may prefere one of these over the others. Also, you may need to add extra checks (for instance, checking whether the char pointer is null or not).

Converting descriptors to C-strings may be done this way:

const char* descriptorToStringL(const TDesC& aDescriptor)
{
    TInt length = aDescriptor.Length();
 
    HBufC8* buffer = HBufC8::NewLC(length);
    buffer->Des().Copy(aDescriptor);
 
    char* str = new(ELeave) char[length + 1];
    Mem::Copy(str, buffer->Ptr(), length);
    str[length] = '/0';
 
    CleanupStack::PopAndDestroy(buffer);
 
    return str;
}

#include "sys.h" #include "delay.h" #include "usart.h" #include "lcd.h" #include "rtc.h" #include "exti.h" #include "key.h" #include "beep.h" #include "adc.h" #include "math.h" #include "alarm.h" // 包含闹钟功能的头文件 #include <string.h> //===================== 宏定义部分 ===================== // 时钟指针长度定义(像素) #define SECOND_HAND_LENGTH 100 // 秒针长度 #define MINUTE_HAND_LENGTH 75 // 分针长度 #define HOUR_HAND_LENGTH 50 // 时针长度 // 圆周率定义 #ifndef PI #define PI 3.14159265358979323846 #endif #define UART_BUF_SIZE 32 // 增大缓冲区大小 //===================== 全局变量部分 ===================== u8 alarmSetting; // 当前设置的闹钟编号(1-3) u8 alarmSettingMode; // 闹钟设置模式(0-设置小时,1-设置分钟) u8 alarmBeepCount; // 闹钟蜂鸣器计数(用于控制蜂鸣频率) // 串口接收相关变量 u8 uart_buf[UART_BUF_SIZE]; // 串口接收缓冲区(最多存储10字节) u8 uart_len = 0; // 接收数据长度 u8 uart_receive_finish = 0; // 接收完成标志 // 系统状态变量 u8 set; // 未明确使用的标志位 int sign; // 整点报时标志 int choice=0; // 菜单选择项索引 int th,tm,ts; // 时分秒(用于绘制时钟) int dy,dm,dd; // 年月日 int w; // 星期 //===================== 函数声明部分 ===================== u8 calculate_weekday(int year, int month, int day); //===================== 函数定义部分 ===================== /** * @brief 计算星期几(蔡勒公式) * @param year 年份(0-99) * @param month 月份(1-12)++ * @param day 日期(1-31) * @return 星期几(1-7,1表示星期一) */ u8 calculate_weekday(int year, int month, int day) { // 所有变量声明放在函数开头 int c, y, w; if (month < 3) { month += 12; year--; } c = year / 100; y = year % 100; w = c / 4 - 2 * c + y + y / 4 + (13 * (month + 1)) / 5 + day - 1; w = (w % 7 + 7) % 7; // 转换为0-6(0表示星期日) return w + 1; // 转换为1-7(1表示星期一) } /** * @brief 解析串口命令并执行相应操作 * @param cmd 命令缓冲区指针 */ void parse_command(u8 *cmd) { switch(cmd[0]) { // 设置系统时间命令(格式:Hxx:yy) case 'H': { int hour, minute; if(sscanf((char*)&cmd[1], "%d:%d", &hour, &minute) == 2) { if(hour >= 0 && hour < 24 && minute >= 0 && minute < 60) { RTC_Set_Time(hour, minute, 0, RTC_H12_AM); printf("Time set to: %02d:%02d:00\r\n", hour, minute); } else { printf("Invalid time! Range: 00:00-23:59\r\n"); } } else { printf("Invalid format! Use Hxx:yy (e.g., H14:30)\r\n"); } break; } // 设置闹钟时间命令(格式:AxxxHH:MM) case 'A': { int alarm_idx, hour, minute; if(sscanf((char*)&cmd[1], "%d%d:%d", &alarm_idx, &hour, &minute) == 3) { if(alarm_idx >= 1 && alarm_idx <= MAX_ALARMS && hour >= 0 && hour < 24 && minute >= 0 && minute < 60) { alarms[alarm_idx-1].hour = hour; alarms[alarm_idx-1].minute = minute; printf("Alarm %d set to: %02d:%02d\r\n", alarm_idx, hour, minute); } else { printf("Invalid alarm or time! Alarm: 1-%d, Time: 00:00-23:59\r\n", MAX_ALARMS); } } else { printf("Invalid format! Use AxxxHH:MM (e.g., A108:30)\r\n"); } break; } // 启用闹钟命令(格式:Exxx) case 'E': { int alarm_idx; if(sscanf((char*)&cmd[1], "%d", &alarm_idx) == 1) { if(alarm_idx >= 1 && alarm_idx <= MAX_ALARMS) { alarms[alarm_idx-1].enable = 1; printf("Alarm %d enabled\r\n", alarm_idx); } else { printf("Invalid alarm! Range: 1-%d\r\n", MAX_ALARMS); } } else { printf("Invalid format! Use Exxx (e.g., E1)\r\n"); } break; } // 禁用闹钟命令(格式:Dxxx) case 'D': { int alarm_idx; if(sscanf((char*)&cmd[1], "%d", &alarm_idx) == 1) { if(alarm_idx >= 1 && alarm_idx <= MAX_ALARMS) { alarms[alarm_idx-1].enable = 0; alarms[alarm_idx-1].alarmState = 0; printf("Alarm %d disabled\r\n", alarm_idx); } else { printf("Invalid alarm! Range: 1-%d\r\n", MAX_ALARMS); } } else { printf("Invalid format! Use Dxxx (e.g., D1)\r\n"); } break; } // 设置日期命令(格式:Yyy-mm-dd) case 'Y': { int year, month, day; u8 max_day; u8 week; const u8 month_days[13] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; //printf("Received Y command: %s\r\n", cmd); // 尝试解析两种格式:Yyy-mm-dd 或 YYYYY-mm-dd if(sscanf((char*)&cmd[1], "%d-%d-%d", &year, &month, &day) == 3) { // 如果解析出的年份是四位数(如2025),转换为两位数(如25) if(year >= 2000) year -= 2000; if(year < 0 || year > 99) { printf("Invalid year! Range: 00-99\r\n"); break; } if(month < 1 || month > 12) { printf("Invalid month! Range: 01-12\r\n"); break; } max_day = month_days[month]; if(month == 2) { if((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)) { max_day = 29; } } if(day < 1 || day > max_day) { printf("Invalid day! Max day for %02d is %d\r\n", month, max_day); break; } week = calculate_weekday(year, month, day); RTC_Set_Date(year, month, day, week); dy = year; dm = month; dd = day; w = week; printf("Date set to: 20%02d-%02d-%02d, Week: %d\r\n", year, month, day, week); } else { printf("sscanf failed, cmd[1]: %s\r\n", &cmd[1]); printf("Invalid format! Use Yyy-mm-dd (e.g., Y25-06-20)\r\n"); } break; } // 未知命令处理 default: printf("Unknown command: %s\r\n", cmd); printf("Supported commands:\r\n"); printf("Hxx:yy - Set time (e.g., H14:30)\r\n"); printf("AxxxHH:MM - Set alarm (e.g., A1 08:30)\r\n"); printf("Exxx - Enable alarm (e.g., E1)\r\n"); printf("Dxxx - Disable alarm (e.g., D1)\r\n"); printf("Yyy-mm-dd - Set date (e.g., Y23-06-20)\r\n"); break; } } /** * @brief 绘制时钟指针 * @param hour 小时 * @param minute 分钟 * @param second 秒 */ void draw_clock_hands(int hour, int minute, int second) { // 计算指针角度(弧度制) // 注意:0度对应12点方向,顺时针为正方向 float second_angle = (second * 6.0) * PI / 180.0; // 秒针:每秒6度 float minute_angle = (minute * 6.0 + second * 0.1) * PI / 180.0; // 分针:每分钟6度,每秒0.1度 float hour_angle = ((hour % 12) * 30.0 + minute * 0.5) * PI / 180.0; // 时针:每小时30度,每分钟0.5度 // 计算指针端点坐标(中心点为(240, 180)) // 注意:LCD坐标Y轴向下为正,需要取反以正确显示 int sec_x = 240 + SECOND_HAND_LENGTH * sin(second_angle); int sec_y = 180 - SECOND_HAND_LENGTH * cos(second_angle); int min_x = 240 + MINUTE_HAND_LENGTH * sin(minute_angle); int min_y = 180 - MINUTE_HAND_LENGTH * cos(minute_angle); int hour_x = 240 + HOUR_HAND_LENGTH * sin(hour_angle); int hour_y = 180 - HOUR_HAND_LENGTH * cos(hour_angle); // 绘制指针(时针黑色、分针蓝色、秒针红色) POINT_COLOR = BLACK; // 设置时针颜色为黑色 LCD_DrawLine(240, 180, hour_x, hour_y); // 绘制时针 POINT_COLOR = BLUE; // 设置分针颜色为蓝色 LCD_DrawLine(240, 180, min_x, min_y); // 绘制分针 POINT_COLOR = RED; // 设置秒针颜色为红色 LCD_DrawLine(240, 180, sec_x, sec_y); // 绘制秒针 } /** * @brief 主函数 */ int main(void) { RTC_TimeTypeDef RTC_TimeStruct; // RTC时间结构体 RTC_DateTypeDef RTC_DateStruct; // RTC日期结构体 short temp; u8 tbuf[40]; // 临时字符串缓冲区 u16 t=0; // 计时变量 int i; // 循环变量 // 系统初始化 set=0; NVIC_PriorityGroupConfig(NVIC_PriorityGroup_2); // 配置系统中断优先级分组2 delay_init(168); // 初始化延时函数 uart_init(115200); // 初始化串口,波特率115200 LCD_Init(); // 初始化LCD My_RTC_Init(); // 初始化RTC(实时时钟) EXTIX_Init(); // 初始化外部中断 BEEP_Init(); // 初始化蜂鸣器 Adc_Init(); // 初始化温度采集ADC RTC_Set_WakeUp(RTC_WakeUpClock_CK_SPRE_16bits,0); // 配置WAKE UP中断,1秒中断一次 BEEP=0; // 蜂鸣器初始状态关闭 // 闹钟初始化 for(i=0; i<MAX_ALARMS; i++){ alarms[i].enable = 0; // 禁用所有闹钟 alarms[i].hour = 7; // 默认闹钟时间为7:00 alarms[i].minute = 0; alarms[i].alarmState = 0; // 闹钟状态初始化为未触发 } alarmSetting = 0; alarmSettingMode = 0; alarmBeepCount = 0; // 初始化LCD显示 POINT_COLOR=BLACK; // 恢复字体颜色为黑色 // 绘制时钟背景 LCD_DrawRectangle(90, 30, 390, 330); // 绘制外框 LCD_DrawRectangle(120, 60, 360, 300); // 绘制内框 // 定义时钟绘制参数 #define SIDE_LENGTH 300 // 时钟边长 #define MARGIN 10 // 边距 // =========绘制时钟刻度(1-12小时标记) LCD_ShowString(235, 30+MARGIN, 10, 16, 16, "12"); // 12点 LCD_ShowString(390-MARGIN-16, 180, 5, 16, 16, "3"); // 3点 LCD_ShowString(240, 330-MARGIN-16, 5, 16, 16, "6"); // 6点 LCD_ShowString(90+MARGIN, 180, 5, 16, 16, "9"); // 9点 LCD_ShowString(315, 30+MARGIN, 5, 16, 16, "1"); // 1点 LCD_ShowString(390-MARGIN-16, 105, 5, 16, 16, "2"); // 2点 LCD_ShowString(390-MARGIN-16, 255, 5, 16, 16, "4"); // 4点 LCD_ShowString(315, 330-MARGIN-16, 5, 16, 16, "5"); // 5点 LCD_ShowString(165, 330-MARGIN-16, 5, 16, 16, "7"); // 7点 LCD_ShowString(90+MARGIN, 255, 5, 16, 16, "8"); // 8点 LCD_ShowString(90+MARGIN, 105, 10, 16, 16, "10"); // 10点 LCD_ShowString(165, 30+MARGIN, 10, 16, 16, "11"); // 11点 //--------------------串口操作提示 LCD_ShowString(160, 600, 600, 16, 16, "SetTime: H18:00"); LCD_ShowString(160, 620, 600, 16, 16, "SetDate: Y2025-06-20"); // 使用Y前缀 LCD_ShowString(160, 640, 600, 16, 16, "SetAlarm: A1 08:00"); LCD_ShowString(160, 660, 600, 16, 16, "Enable/Disable: E1/D1"); //============主循环 while(1) { t++; if((t%10)==0) // 每100ms刷新一次显示(t++一次为10ms,10次为100ms) { // ======刷新时钟指针 POINT_COLOR=BLACK; LCD_Fill(125, 65, 355, 295, WHITE); // 清除指针区域 draw_clock_hands(th, tm, ts); // 绘制时钟指针 // ======显示时间日期 RTC_GetTime(RTC_Format_BIN,&RTC_TimeStruct); // 读取RTC时间 RTC_GetDate(RTC_Format_BIN, &RTC_DateStruct); // 读取RTC日期 th=RTC_TimeStruct.RTC_Hours; tm=RTC_TimeStruct.RTC_Minutes; ts=RTC_TimeStruct.RTC_Seconds; dy=RTC_DateStruct.RTC_Year; dm=RTC_DateStruct.RTC_Month; dd=RTC_DateStruct.RTC_Date; w=RTC_DateStruct.RTC_WeekDay; POINT_COLOR=BLUE; sprintf((char*)tbuf,"20%02d-%02d-%02d",RTC_DateStruct.RTC_Year,RTC_DateStruct.RTC_Month,RTC_DateStruct.RTC_Date); LCD_ShowString(160, 340, 200, 24, 24, tbuf); // 显示日期 //=======显示周 switch(RTC_DateStruct.RTC_WeekDay) { case 1: LCD_ShowString(160, 370, 200, 24, 24, "Monday "); break; case 2: LCD_ShowString(160, 370, 200, 24, 24, "Tuesday "); break; case 3: LCD_ShowString(160, 370, 200, 24, 24, "Wednesday"); break; case 4: LCD_ShowString(160, 370, 200, 24, 24, "Thursday "); break; case 5: LCD_ShowString(160, 370, 200, 24, 24, "Friday "); break; case 6: LCD_ShowString(160, 370, 200, 24, 24, "Saturday "); break; case 7: LCD_ShowString(160, 370, 200, 24, 24, "Sunday "); break; } sprintf((char*)tbuf,"%02d:%02d:%02d",RTC_TimeStruct.RTC_Hours,RTC_TimeStruct.RTC_Minutes,RTC_TimeStruct.RTC_Seconds); LCD_ShowString(160, 400, 200, 24, 24, tbuf); // 显示时间 //======== 整点报时功能 if(RTC_TimeStruct.RTC_Minutes==0&&RTC_TimeStruct.RTC_Seconds==0){sign=1;} if(RTC_TimeStruct.RTC_Minutes==0&&RTC_TimeStruct.RTC_Seconds==10){sign=10;} if(sign==1){BEEP=!BEEP;delay_ms(20);BEEP=!BEEP;}//整点时蜂鸣器短鸣一次 // =============读取并显示温度 temp = Get_Temprate(); //获取内部温度传感器温度值 sprintf((char*)tbuf,"Temperature: %.2fC",(float)temp/100); //显示温度 LCD_ShowString(160, 430, 200, 24 , 24 , tbuf); } // =============闹钟状态显示与处理 for(i=0; i<MAX_ALARMS; i++){ char alarmBuf[40]; u16 textColor = BLUE; // 默认显示颜色为蓝色 // 构建闹钟状态显示字符串 if(alarms[i].enable){ if(alarms[i].alarmState == 1){ // 闹钟触发中 sprintf(alarmBuf, "Alarm%d: %02d:%02d ON ", i+1, alarms[i].hour, alarms[i].minute); textColor = RED; // 触发中的闹钟显示为红色 } else { sprintf(alarmBuf, "Alarm%d: %02d:%02d ON ", i+1, alarms[i].hour, alarms[i].minute); } } else { sprintf(alarmBuf, "Alarm%d: %02d:%02d OFF", i+1, alarms[i].hour, alarms[i].minute); } // 在LCD上显示闹钟状态 POINT_COLOR = textColor; LCD_ShowString(160, 460 + i*30, 200, 24, 24, (u8*)alarmBuf); } // ======闹钟触发检测 if(alarms[0].enable && th == alarms[0].hour && tm == alarms[0].minute && alarms[0].alarmState == 0){ alarms[0].alarmState = 1; alarmBeepCount = 0; BEEP = 1; // 启动蜂鸣器 } if(alarms[1].enable && th == alarms[1].hour && tm == alarms[1].minute && alarms[1].alarmState == 0){ alarms[1].alarmState = 1; alarmBeepCount = 0; BEEP = 1; // 启动蜂鸣器 } if(alarms[2].enable && th == alarms[2].hour && tm == alarms[2].minute && alarms[2].alarmState == 0){ alarms[2].alarmState = 1; alarmBeepCount = 0; BEEP = 1; // 启动蜂鸣器 } // 蜂鸣器控制(闹钟触发时) if(alarms[0].alarmState == 1 || alarms[1].alarmState == 1 || alarms[2].alarmState == 1){ alarmBeepCount++; // 每500ms切换蜂鸣器状态(20次*10ms=200ms) if(alarmBeepCount % 20 == 0){ BEEP = !BEEP; } // 10秒后自动停止蜂鸣(200次*10ms=2000ms=2秒) if(alarmBeepCount >= 200){ for(i=0; i<MAX_ALARMS; i++){ if(alarms[i].alarmState == 1){ alarms[i].alarmState = 2; } } BEEP = 0; // 确保蜂鸣器停止 } } // ===============选择菜单栏显示 switch(choice){ case 0: LCD_ShowString(160,560,200,24,24," "); break; // 时分设置 case 1: LCD_ShowString(160,560,200,24,24,"SetHour "); break; // 设置小时 case 2: LCD_ShowString(160,560,200,24,24,"SetMinute "); break; //年月日周设置 case 3: LCD_ShowString(160,560,200,24,24,"SetYear "); break; // 设置年份 case 4: LCD_ShowString(160,560,200,24,24,"SetMonth "); break; // 修正拼写错误:Mounth -> Month case 5: LCD_ShowString(160,560,200,24,24,"SetDay "); break; // 设置日期 // 闹钟1设置 case 6: LCD_ShowString(160,560,200,24,24,"SetAlarm1 Hour "); break; case 7: LCD_ShowString(160,560,200,24,24,"SetAlarm1 Minute "); break; case 8: LCD_ShowString(160,560,200,24,24,"Enable Alarm1 "); break; // 闹钟2设置 case 9: LCD_ShowString(160,560,200,24,24,"SetAlarm2 Hour "); break; case 10: LCD_ShowString(160,560,200,24,24,"SetAlarm2 Minute "); break; case 11: LCD_ShowString(160,560,200,24,24,"Enable Alarm2 "); break; // 闹钟3设置 case 12: LCD_ShowString(160,560,200,24,24,"SetAlarm3 Hour "); break; case 13: LCD_ShowString(160,560,200,24,24,"SetAlarm3 Minute "); break; case 14: LCD_ShowString(160,560,200,24,24,"Enable Alarm3 "); break; } // =============处理串口命令 if(uart_receive_finish) { uart_receive_finish = 0; parse_command(uart_buf); // 解析并执行串口命令 } } }详细解释一下
06-26
/** * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. * This file is a part of the CANN Open Software. * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). * Please refer to the License for details. You may not use this file except in compliance with the License. * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. * See LICENSE in the root of the software repository for the full text of the License. */ /*! * \file quant_batch_matmul_swiglu.cpp * \brief */ #include "register/op_def_registry.h" #include "tiling/tiling_api.h" #include "quant_batch_matmul_swiglu_tiling.h" namespace optiling { static ge::graphStatus TilingFunc(gert::TilingContext* context) { auto shape_a = context->GetInputTensor(0)->GetOriginShape(); auto shape_b = context->GetInputTensor(1)->GetOriginShape(); int32_t M = shape_a.GetDim(0); int32_t N = shape_b.GetDim(1); int32_t K = shape_a.GetDim(1); int32_t baseM = 64; int32_t baseN = 128; int32_t baseK = 128; auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo()); // 获取实际核心数 int32_t blockDim = ascendcPlatform.GetCoreNumAic(); matmul_tiling::MultiCoreMatmulTiling cubeTiling(ascendcPlatform); cubeTiling.SetDim(blockDim); // 设置核心数 // 计算每个核心处理的M维度大小(向上取整) int32_t perCoreM = (M + blockDim - 1) / blockDim; // 设置分块参数 - 使用SetFixSplit替代SetSplitStrategy cubeTiling.SetFixSplit(perCoreM, baseN, baseK); cubeTiling.SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); cubeTiling.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); cubeTiling.SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); cubeTiling.SetBias(false); cubeTiling.SetBufferSpace(-1, -1, -1); cubeTiling.SetShape(M, N, K); cubeTiling.SetOrgShape(M, N, K); cubeTiling.SetSingleShape(M, N, K); QuantBatchMatmulSwigluTilingData tiling; if (cubeTiling.GetTiling(tiling.cubeTilingData) == -1){ return ge::GRAPH_FAILED; } uint32_t stepM = 1; uint32_t stepN = 1; tiling.cubeTilingData.set_stepM(stepM); tiling.cubeTilingData.set_stepN(stepN); context->SetBlockDim(blockDim); context->SetTilingKey(1); tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); // 多核工作空间计算 size_t perCoreOutputSize = perCoreM * N * sizeof(int32_t); size_t totalUserWorkspace = perCoreOutputSize * blockDim; size_t systemWorkspaceSize = ascendcPlatform.GetLibApiWorkSpaceSize(); size_t *currentWorkspace = context->GetWorkspaceSizes(1); currentWorkspace[0] = totalUserWorkspace + systemWorkspaceSize; return ge::GRAPH_SUCCESS; } } namespace ge { static ge::graphStatus InferShape(gert::InferShapeContext* context) { const gert::Shape* x1_shape = context->GetInputShape(0); const gert::Shape* x2_shape = context->GetInputShape(1); gert::Shape* y_shape = context->GetOutputShape(0); y_shape->SetDimNum(x1_shape->GetDimNum()); y_shape->SetDim(1, x1_shape->GetDim(x1_shape->GetDimNum() - 2)); y_shape->SetDim(0, x2_shape->GetDim(x1_shape->GetDimNum() - 1)); return GRAPH_SUCCESS; } } namespace ops { class QuantBatchMatmulSwiglu : public OpDef { public: explicit QuantBatchMatmulSwiglu(const char *name) : OpDef(name) { this->Input("x1") .ParamType(REQUIRED) .DataType({ge::DT_INT8}) .Format({ge::FORMAT_ND}) .UnknownShapeFormat({ge::FORMAT_ND}) .IgnoreContiguous(); this->Input("x2") .ParamType(REQUIRED) .DataType({ge::DT_INT8}) .Format({ge::FORMAT_ND}) .UnknownShapeFormat({ge::FORMAT_ND}) .IgnoreContiguous(); this->Input("scale") .ParamType(REQUIRED) .DataType({ge::DT_FLOAT}) .Format({ge::FORMAT_ND}) .UnknownShapeFormat({ge::FORMAT_ND}); this->Input("offset") .ParamType(OPTIONAL) .DataType({ge::DT_FLOAT}) .Format({ge::FORMAT_ND}) .UnknownShapeFormat({ge::FORMAT_ND}); this->Input("bias") .ParamType(OPTIONAL) .DataType({ge::DT_BF16}) .Format({ge::FORMAT_ND}) .UnknownShapeFormat({ge::FORMAT_ND}); this->Input("pertoken_scale") .ParamType(OPTIONAL) .DataType({ge::DT_FLOAT}) .Format({ge::FORMAT_ND}) .UnknownShapeFormat({ge::FORMAT_ND}); this->Output("y") .ParamType(REQUIRED) .DataType({ge::DT_BF16}) .Format({ge::FORMAT_ND}) .UnknownShapeFormat({ge::FORMAT_ND}); // this->Attr("dtype").AttrType(REQUIRED).Int(); this->Attr("transpose_x1").AttrType(OPTIONAL).Bool(false); this->Attr("transpose_x2").AttrType(OPTIONAL).Bool(false); OpAICoreConfig aicore_config; aicore_config.DynamicCompileStaticFlag(true) .DynamicFormatFlag(true) .DynamicRankSupportFlag(true) .DynamicShapeSupportFlag(true) .NeedCheckSupportFlag(false) .PrecisionReduceFlag(true) .ExtendCfgInfo("aclnnSupport.value", "support_aclnn"); this->AICore().AddConfig("ascend910b", aicore_config); this->AICore().SetTiling(optiling::TilingFunc); this->SetInferShape(ge::InferShape); } }; OP_ADD(QuantBatchMatmulSwiglu); } // namespace ops #include "kernel_operator.h" #include "lib/matmul_intf.h" constexpr int BUFFER_NUM = 2; using namespace AscendC; using namespace matmul; // 算子模板类 template <typename aType, typename bType, typename cType, typename biasType, typename outType> class QuantBatchMatmulSwigluKernel { public: __aicore__ inline QuantBatchMatmulSwigluKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR y, GM_ADDR scale, GM_ADDR workspace, const TCubeTiling &tiling, TPipe *pipe); __aicore__ inline void Process(TPipe *pipe); __aicore__ inline void EpilogueCopyIn(int id); __aicore__ inline void EpilogueCompute(int id); __aicore__ inline void EpilogueCopyOut(int id); // 当前只考虑ND格式 Matmul<MatmulType<TPosition::GM, CubeFormat::ND, aType>, MatmulType<TPosition::GM, CubeFormat::ND, bType>, MatmulType<TPosition::GM, CubeFormat::ND, cType>, MatmulType<TPosition::GM, CubeFormat::ND, biasType>> matmulObj; GlobalTensor<aType> aGlobal; GlobalTensor<bType> bGlobal; GlobalTensor<outType> yGlobal; // Matmult+Swiglu计算结果 GlobalTensor<cType> cGlobal; // Matmult计算结果 GlobalTensor<float> scaleGlobal; TCubeTiling tiling; TQue<QuePosition::VECIN, BUFFER_NUM> inQueueCast; TQue<QuePosition::VECIN, 1> inQueueScale; TQue<QuePosition::VECOUT, BUFFER_NUM> outQueueCast; TBuf<TPosition::VECCALC> castTmp; }; template <typename aType, typename bType, typename cType, typename biasType, typename outType> __aicore__ inline void QuantBatchMatmulSwigluKernel<aType, bType, cType, biasType, outType>::Init( GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR y, GM_ADDR scale, GM_ADDR workspace, const TCubeTiling &tiling, TPipe *pipe) { this->tiling = tiling; aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ aType *>(a), tiling.M * tiling.Ka); bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ bType *>(b), tiling.Kb * tiling.N); cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ cType *>(c), tiling.M * tiling.N); // biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ biasType*>(bias), tiling.N); yGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ outType *>(y), tiling.M * tiling.N / 2); scaleGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(scale), tiling.N); // 应当设置 aGlobal = aGlobal[0]; bGlobal = bGlobal[0]; cGlobal = cGlobal[0]; // biasGlobal = biasGlobal[offsetBias]; yGlobal = yGlobal[0]; pipe->InitBuffer(inQueueCast, BUFFER_NUM, tiling.N * sizeof(cType)); pipe->InitBuffer(outQueueCast, BUFFER_NUM, tiling.N / 2 * sizeof(outType)); pipe->InitBuffer(castTmp, tiling.N * sizeof(float) * 2); pipe->InitBuffer(inQueueScale, 1, tiling.N * sizeof(float)); SetSysWorkspace(workspace); if (GetSysWorkSpacePtr() == nullptr) { return; } } // 总计算函数 template <typename aType, typename bType, typename cType, typename biasType, typename outType> __aicore__ inline void QuantBatchMatmulSwigluKernel<aType, bType, cType, biasType, outType>::Process(TPipe *pipe) { matmulObj.SetTensorA(aGlobal); matmulObj.SetTensorB(bGlobal); matmulObj.DisableBias(); // matmulObj.SetBias(biasGlobal); matmulObj.template IterateAll<false>(cGlobal, 0, false, true); matmulObj.WaitIterateAll(); matmulObj.End(); // 这里CV被分段了,思考如何CV流水并行。 PipeBarrier<PIPE_ALL>(); auto scaleLocal = inQueueScale.AllocTensor<float>(); DataCopy(scaleLocal, scaleGlobal, this->tiling.N); inQueueScale.EnQue(scaleLocal); // 提示 一个Cube Core 2个Vector Core 方法 GetSubBlockIdx() int epilogueCount = this->tiling.M / (GetBlockNum() * 2); for (int i = GetBlockIdx() * epilogueCount; i < (GetBlockIdx() + 1) * epilogueCount; i++) { EpilogueCopyIn(i); EpilogueCompute(i); EpilogueCopyOut(i); } } template <typename aType, typename bType, typename cType, typename biasType, typename outType> __aicore__ inline void QuantBatchMatmulSwigluKernel<aType, bType, cType, biasType, outType>::EpilogueCopyIn(int id) { // 输入Matmul计算结果 auto epilogueInLocal = inQueueCast.AllocTensor<cType>(); DataCopy(epilogueInLocal, cGlobal[id * tiling.N], tiling.N); inQueueCast.EnQue(epilogueInLocal); } template <typename aType, typename bType, typename cType, typename biasType, typename outType> __aicore__ inline void QuantBatchMatmulSwigluKernel<aType, bType, cType, biasType, outType>::EpilogueCompute(int id) { // scale反量化 swiglu激活计算 auto epilogueOutLocal = outQueueCast.AllocTensor<outType>(); auto epilogueInLocal = inQueueCast.DeQue<cType>(); auto scaleLocal = inQueueScale.DeQue<float>(); LocalTensor<float> tmpFloatBuf = castTmp.Get<float>(); LocalTensor<float> tmpSiluBuf = tmpFloatBuf[this->tiling.N]; Cast(tmpFloatBuf, epilogueInLocal, AscendC::RoundMode::CAST_CEIL, this->tiling.N); Mul(tmpFloatBuf, tmpFloatBuf, scaleLocal, this->tiling.N); Silu(tmpSiluBuf, tmpFloatBuf, this->tiling.N / 2); Mul(tmpFloatBuf, tmpSiluBuf, tmpFloatBuf[this->tiling.N / 2], this->tiling.N / 2); Cast(epilogueOutLocal, tmpFloatBuf, AscendC::RoundMode::CAST_CEIL, this->tiling.N / 2); outQueueCast.EnQue(epilogueOutLocal); inQueueCast.FreeTensor(epilogueInLocal); } template <typename aType, typename bType, typename cType, typename biasType, typename outType> __aicore__ inline void QuantBatchMatmulSwigluKernel<aType, bType, cType, biasType, outType>::EpilogueCopyOut(int id) { // 输出结果 auto epilogueOutLocal = outQueueCast.DeQue<outType>(); DataCopy(yGlobal[tiling.N / 2 * id], epilogueOutLocal, tiling.N / 2); outQueueCast.FreeTensor(epilogueOutLocal); } extern "C" __global__ __aicore__ void quant_batch_matmul_swiglu(GM_ADDR x1, GM_ADDR x2, GM_ADDR scale, GM_ADDR offset, GM_ADDR bias, GM_ADDR pertokenScale, GM_ADDR y, GM_ADDR workSpace, GM_ADDR tiling) { GM_ADDR user1 = GetUserWorkspace(workSpace); GET_TILING_DATA(tilingData, tiling); // TODO: user kernel impl QuantBatchMatmulSwigluKernel<int8_t, int8_t, int32_t, int32_t, bfloat16_t> QuantBatchMatmulSwigluKernel; TPipe pipe; QuantBatchMatmulSwigluKernel.Init(x1, x2, nullptr, user1, y, scale, workSpace, tilingData.cubeTilingData, &pipe); REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), QuantBatchMatmulSwigluKernel.matmulObj, &tilingData.cubeTilingData); if (TILING_KEY_IS(1)) { QuantBatchMatmulSwigluKernel.Process(&pipe); } }两个代码优化 是整个跑通的程序时间更简短 在昇腾npu AScend 910b处理器
07-10
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值