CUDA中的计时方式:#include <cuda_runtime.h>
<pre code_snippet_id="1724831" snippet_file_name="blog_20160622_1_6734195" name="code" class="cpp"> cudaEvent_t start1;//stream计时
cudaEvent_t stop1;
cudaEventCreate(&start1);cudaEventCreate(&stop1);cudaEventRecord(start1, NULL);Kernel<<<numblocks,numthreads>>>( a, b, c );cudaEventRecord(stop1, NULL);cudaEventSynchronize(stop1);cudaEventElapsedTime(&msecTotal, start1, stop1);
需要注意的是函数cudaEventSynchronize() 不可或缺,因为CUDA的kernel函数是以异步方式执行的,调用后立刻返回,这会导致计时不准确。cudaEventSynchronize(stop1)会使得直到GP