二维数组动态申请内存空间,及其与显存数据相互拷贝的两种方式如下:
/*
* Copyright XuHongzhi. All rights reserved.
* Data: 2012-4-22
*/
//
// 此程序是演示了二维动态数组空间申请和与显存数据相互拷贝的两种方法
#include <stdio.h>
#include <cutil_inline.h>
#include <iostream>
using namespace std;
int main(int argc, char **argv)
{
CUT_DEVICE_INIT(argc, argv); // 启动 CUDA
#if 1
// 方法1.逐行拷贝
float **CPU_ORIGN, **CPU_RET; // host端原数据、拷贝回数据
float **GPU; // device端数据
int width = 5, height = 3; // 数组的宽度和高度
size_t size = sizeof(float)*width; // 数据的宽度in bytes
int row, col;
// 申请内存空间, 并初始化
CPU_ORIGN = new float*[height];
CPU_RET = new float*[height];
for(row = 0; row < height; ++row)
{
CPU_ORIGN[row] = new float[width];
CPU_RET[row] = new float[width];
// 初始化数据
for(col = 0; col < width; ++col)
{
CPU_ORIGN[row][col] = (float)(row + col);
CPU_RET[row][col] = 0.0f;
}
}
// 申请显存空间并初始化
GPU = new float*[height];
for(row = 0; row < height; ++row)
{
cutilSafeCall( cudaMalloc((void**)&GPU[row], size));
cutilSafeCall( cudaMemset(GPU[row], 0, size));
}
// 将host端原数据拷贝到device端
for(row = 0; row < height; ++row)
cutilSafeCall(cudaMemcpy(GPU[row], CPU_ORIGN[row], size, cudaMemcpyHostToDevice));
// 将device端数据拷贝到host端返回数据
for(row = 0; row < height; ++row)
cutilSafeCall(cudaMemcpy(CPU_RET[row], GPU[row], size, cudaMemcpyDeviceToHost));
// 打印host端返回数据
for(row = 0; row < height; ++row)
{
for(col = 0; col < width; ++col)
cout << CPU_RET[row][col] << " ";
cout << endl;
}
// 释放内存和显存空间
free(CPU_ORIGN);
free(CPU_RET);
for(row = 0; row < height; ++row)
cutilSafeCall(cudaFree(GPU[row]));
#endif
#if 0
// 方法2.整体拷贝
float **CPU_ORIGN, **CPU_RET; // host端原数据、拷贝回数据
float **GPU; // device端数据
int width = 5, height = 3; // 数组的宽度和高度
size_t size = sizeof(float)*width; // 数据的宽度in bytes
size_t pitch;
int row, col;
// 申请内存空间, 并初始化
CPU_ORIGN = new float*[height];
CPU_RET = new float*[height];
for(row = 0; row < height; ++row)
{
CPU_ORIGN[row] = new float[width];
CPU_RET[row] = new float[width];
// 初始化数据
for(col = 0; col < width; ++col)
{
CPU_ORIGN[row][col] = (float)(row + col);
CPU_RET[row][col] = 0.0f;
}
}
// 申请显存空间并初始化
cutilSafeCall(cudaMallocPitch((void**)&GPU, &pitch, size, height));
cutilSafeCall(cudaMemset2D(GPU, pitch, 0, size, height));
// 将host端原数据拷贝到device端
cutilSafeCall(cudaMemcpy2D(GPU, pitch, CPU_ORIGN, size, size, height, cudaMemcpyHostToDevice));
// 将device端数据拷贝到host端返回数据
cutilSafeCall(cudaMemcpy2D(CPU_RET, size, GPU, pitch, size, height, cudaMemcpyDeviceToHost));
// 打印host端返回数据
for(row = 0; row < height; ++row)
{
for(col = 0; col < width; ++col)
cout << CPU_RET[row][col] << " ";
cout << endl;
}
// 释放内存和显存空间
free(CPU_ORIGN);
free(CPU_RET);
cutilSafeCall(cudaFree(GPU));
#endif
CUT_EXIT(argc, argv); // 退出CUDA
}