测试在kernel里面交换全局内存的值

最新推荐文章于 2023-04-03 23:55:52 发布

maowenge

最新推荐文章于 2023-04-03 23:55:52 发布

阅读量407

点赞数

CC 4.0 BY-SA版权

文章标签： cuda gpu

本文链接：https://blog.youkuaiyun.com/maowenge/article/details/40743847

本文通过CUDA内核实现了一个全局内存交换算法，并在GPU上进行了性能测试，对比了GPU与CPU在相同任务上的运行效率。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

///****测试在kernel里面交换全局内存的值
#include<iostream>
#include<cuda.h>
#include<cuda_runtime.h>
#include<time.h>
using namespace std;
const int N=10000;

__global__ void change(int *a,int *b)
{
int temp;
for(int i=0;i<N;i++)
{
/*temp=a[i];
a[i]=b[i];
b[i]=a[i];*/
temp=a[i]+b[i];
}
}
int main(void)
{
int ha[N],hb[N];
int *da,*db;
clock_t h_start,h_elapsed;
cudaMalloc((void**)&da,N*sizeof(int));
cudaMalloc((void**)&db,N*sizeof(int));
for(int i=0;i<N;i++)
{
ha[i]=i;
hb[i]=2*i;
}
cudaMemcpy(da,ha,N*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(db,hb,N*sizeof(int),cudaMemcpyHostToDevice);
cudaEvent_t start,stop;
float elapsed;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
for(int i=0;i<1000;i++)
{change<<<1,1>>>(da,db);}
cudaDeviceSynchronize();

cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed,start,stop);
cout<<"gpu__"<<elapsed<<endl;

cudaMemcpy(ha,da,N*sizeof(int),cudaMemcpyDeviceToHost);
cudaMemcpy(hb,db,N*sizeof(int),cudaMemcpyDeviceToHost);
/*for(int i=0;i<N;i++)
{
cout<<ha[i]<<" "<<hb[i]<<endl;
}*/
h_start=clock();
int temp;
for(int j=0;j<1000;j++)
{for(int i=0;i<N;i++)
{
/*temp=ha[i];
ha[i]=hb[i];
hb[i]=temp;*/
temp=ha[i]+hb[i];
}
}
h_elapsed=clock()-h_start;
cout<<"cpu__"<<h_elapsed<<endl;
/*for(int i=0;i<N;i++)
{
cout<<ha[i]<<" "<<hb[i]<<endl;
}*/
return 0;
}