sSMtoCores 这个结构体由SM版本和相对应的Cuda核个数组成
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, 如 3.0 5.0
// and m = SM minor version
int Cores;
} sSMtoCores;
遍历如下数组,就可以获取相应的cuda cores个数 ,比如 5.0对应的是 0x50 ,对应的cores数是128个
sSMtoCores nGpuArchCoresPerSM[] = {
{0x30, 192},
{0x32, 192},
{0x35, 192},
{0x37, 192},
{0x50, 128},
{0x52, 128},
{0x53, 128},
{0x60, 64},
{0x61, 128},
{0x62, 128},
{0x70, 64},
{0x72, 64},
{0x75, 64},
{-1, -1}};
((major << 4) + minor) 将整形的主版本(5) 和小版本(0)转换为16进制的 0x50
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}