2.1
//program unroll
for (int i=0; i<n; i++){
for (int j=0; j<n; j++){
for (int k=0; k<n; k++){
int A_data = ReadData(device_id=i*n^2+j*n+k, A_memory_address=i*n+k);
int B_data = ReadData(device_id=i*n^2+j*n+k, B_memory_address=k*n+j);
int C_data = A_data*B_data on device_id=i*n^2+j*n+k;
}
}
}
//program unroll
for (int i=0; i<n; i++){
for (int j=0; j<n; j++){
for (int logk_idx=2; logk_idx<logn; logk_idx*=2){
//program unroll
for (int krange_idx=0; krange_idx<n; krange_idx+=logk_idx){
int add_buffer=C_data;
for (int k=1; k<logk_idx; k++){
move C_data from device_id=krange_idx+k+i*n^2+j*n to device_id=krange_idx+i*n^2+j*n
and add to add_buffer
}
}
}
}
}
//program unroll
for (int i=0; i<n; i++){
for (int j=0; j<n; j++){
StoreData(device_id=i*n^2+j*n, C_data);
}
}
第二问:可以对数据进行补零使之为2的指数
2.2
K | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 3 | 3 | 4 | 5 | 5 | 5 | 5 | 6 | 7 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
temp | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
在device上分配n个空间(temp),从左到右前缀求差,将temp=1的位置提取出来。