通过矩阵乘法性能的测量,理解Cache的作用

博客通过代码展示矩阵乘法性能测量,以理解Cache的作用。定义了ijk、kij、jki三种矩阵乘法函数,在测量前清空Cache,对不同规模矩阵进行测试,计算每次内循环迭代所需的平均CPU周期数,以此探究Cache对矩阵乘法性能的影响。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

通过矩阵乘法性能的测量,理解Cache的作用

#include <stdio.h> #include <time.h>
#include <stdlib.h> /* Code to clear cache / / Core i7 has 8M L3 cache, which is 1M longs / #define ASIZE (1 << 20) / Cache block size is 64 bytes / #define STRIDE 8 static long stuff[ASIZE]; static long sink; static void clear() { long x = sink; int i; for (i = 0; i < ASIZE; i += STRIDE) x += stuff[i]; sink = x; } / 返回每次内循环迭代所需的平均CPU周期数 */ double ijk(int N) { int i, j, k,t;
double **A,**B,**C;
A=(double **)malloc(sizeof(double *)*N);
B=(double **)malloc(sizeof(double *)*N);
C=(double **)malloc(sizeof(double *)*N);
for(i=0;i<N;i++){
*(A+i)=(double *)malloc(sizeof(double)*N);
*(B+i)=(double *)malloc(sizeof(double)*N);
(C+i)=(double )malloc(sizeof(double)N);
} double sum,time[10]; clock_t c1, c2, c; for(t=0;t<10;t++){ c1 = clock(); // clock_gettime for (i = 0; i < N; i++){ for (j = 0; j < N; j++) { sum = 0.0; for (k = 0; k < N; k++) sum += A[i][k] * B[k][j]; C[i][j] += sum; }
} c2 = clock(); //clock_gettime time[t]=(double)(c2-c1)/(N * N); }
for(i=0;i<N;i++){
free(
(A+i));
free(
(B+i));
free(
(C+i));
}
free(A);
free(B);
free©; double sum1=0; for(t=0;t<10;t++){ sum1+=time[t]; printf("%lf “,time[t]); } printf(“average is %lf”,sum1/10); return 0; }
double kij(int N) { int i, j, k,t;
double **A,**B,**C;
A=(double **)malloc(sizeof(double *)*N);
B=(double **)malloc(sizeof(double *)*N);
C=(double **)malloc(sizeof(double *)*N);
for(i=0;i<N;i++){
*(A+i)=(double *)malloc(sizeof(double)*N);
*(B+i)=(double *)malloc(sizeof(double)*N);
(C+i)=(double )malloc(sizeof(double)N);
} double sum,time[10]; clock_t c1, c2, c; for(t=0;t<10;t++){ c1 = clock(); // clock_gettime for (k = 0; k < N; k++){ for (i = 0; i < N; i++) { sum = 0.0; for (j = 0; j < N; j++) sum += A[i][k] * B[k][j]; C[k][i] += sum;
} } c2 = clock(); //clock_gettime time[t]=(double)(c2-c1)/(N * N); }
for(i=0;i<N;i++){
free(
(A+i));
free(
(B+i));
free(
(C+i));
}
free(A);
free(B);
free©; double sum1=0; for(t=0;t<10;t++){ sum1+=time[t]; printf(”%lf ",time[t]); } printf(“average is %lf”,sum1/10); return 0; }

double jki(int N) {
int i, j, k,t;
double **A,**B,**C;
A=(double **)malloc(sizeof(double *)*N);
B=(double **)malloc(sizeof(double *)*N);
C=(double **)malloc(sizeof(double *)*N);
for(i=0;i<N;i++){
*(A+i)=(double *)malloc(sizeof(double)*N);
*(B+i)=(double *)malloc(sizeof(double)*N);
(C+i)=(double )malloc(sizeof(double)N);
} double sum,time[10]; clock_t c1, c2, c; for(t=0;t<10;t++){ c1 = clock(); // clock_gettime for (j = 0; j < N; j++) for (k = 0; k < N; k++) { sum = 0.0; for (i = 0; i < N; i++){ sum += A[i][k] * B[k][j];
} C[j][k] += sum; } c2 = clock(); //clock_gettime time[t]=(double)(c2-c1)/(N * N); }
for(i=0;i<N;i++){
free(
(A+i));
free(
(B+i));
free(
(C+i));
}
free(A);
free(B);
free©; double sum1=0; for(t=0;t<10;t++){ sum1+=time[t]; printf("%lf “,time[t]); } printf(“average is %lf”,sum1/10);
return 0; } int main() { int i, j,N; double **A,**B,**C;
A=(double **)malloc(sizeof(double *)*N);
B=(double **)malloc(sizeof(double *)*N);
C=(double **)malloc(sizeof(double *)*N);
for(i=0;i<N;i++){
*(A+i)=(double *)malloc(sizeof(double)*N);
*(B+i)=(double *)malloc(sizeof(double)*N);
*(C+i)=(double *)malloc(sizeof(double)*N);
} for (i = 0; i < N; i ++) // 初始化矩阵中的元素 for (j = 0; j < N; j ++) { A[i][j] = 1.0; B[i][j] = 1.0; } clear();// 测量前先清空cache
for(i=100;i<=1000;i+=100){
N=i;
printf(”\n",i);
printf(“dang N wei %d shi\n”,i);
printf(“ijk matrix mult time :”); ijk(N);
printf("\n",i);
printf(“kij matrix mult time :”); kij(N);
printf("\n");
printf(“jki matrix mult time :”); jki(N);
printf("\n",i);
} return 0; }

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值