#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdio.h>
using namespace std;
#define ARR_LEN 4096
void checkErr(cudaError_t err,const char* err_info)//检查正确sing
{
if (err!=cudaSuccess)
{
cerr << "ERROR IN: " << err_info << endl;
exit(1);
}
}
__global__ void addKernel(float *a,float*b, float *c)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx<ARR_LEN)
{
c[idx] = a[idx] + b[idx];
//a[idx] = 1.0;
//b[idx] = 2.0;
}
}
int main()
{
float *host_arr1, *host_arr2, *host_out;
float *dev_arr1, *dev_arr2, *dev_out;
//srand(1099);
int idev = 0;
cudaDeviceProp deviceProp;
checkErr(cudaSetDevice(idev),"cudaSetDevice");
checkErr(cudaGetDeviceProperties(&deviceProp, idev),"cudaGetDeviceProperties");
if (!deviceProp.canMapHostMemory)
{
fprintf(