Buffers 测试套件专注于验证 OpenCL 缓冲区对象的各种操作,包括读写、拷贝、填充、映射/解映射以及子缓冲区功能。这些测试确保内存对象在主机和设备之间正确传输和操作。
5.1 缓冲区创建与内存标志
5.1.1 内存标志组合测试
OpenCL 支持多种内存标志控制缓冲区的分配和访问行为:
const cl_mem_flags flag_set[] = {
CL_MEM_ALLOC_HOST_PTR, // 在主机可访问内存中分配
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, // 分配并拷贝主机数据
CL_MEM_USE_HOST_PTR, // 使用主机提供的指针
CL_MEM_COPY_HOST_PTR, // 拷贝主机数据到设备
0 // 默认分配
};
内存标志说明
| 标志 | 行为 | 使用场景 |
|---|---|---|
| CL_MEM_ALLOC_HOST_PTR | 在主机可访问内存中分配缓冲区 | 频繁的主机-设备数据交换 |
| CL_MEM_USE_HOST_PTR | 直接使用主机提供的内存指针 | 零拷贝操作,减少内存占用 |
| CL_MEM_COPY_HOST_PTR | 拷贝主机数据到新分配的设备内存 | 需要独立的设备副本 |
| 0 (默认) | 在设备内存中分配 | 纯设备端计算 |
5.1.2 缓冲区创建测试
int test_buffer_creation(cl_context context, cl_mem_flags flags)
{
cl_int error;
size_t buffer_size = 1024 * sizeof(cl_int);
cl_int *host_data = NULL;
// 如果需要主机指针,分配对齐的内存
if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))
{
size_t min_alignment = get_min_alignment(context);
host_data = (cl_int*)align_malloc(buffer_size, min_alignment);
// 初始化主机数据
for (int i = 0; i < 1024; i++)
host_data[i] = i;
}
// 创建缓冲区
cl_mem buffer = clCreateBuffer(context, flags, buffer_size, host_data, &error);
test_error(error, "Unable to create buffer");
log_info("Buffer created with flags: 0x%x\n", flags);
// 验证缓冲区属性
cl_mem_flags returned_flags;
error = clGetMemObjectInfo(buffer, CL_MEM_FLAGS,
sizeof(returned_flags), &returned_flags, NULL);
test_error(error, "Unable to get mem flags");
if ((returned_flags & flags) != flags)
{
log_error("ERROR: Returned flags (0x%x) don't match requested (0x%x)\n",
returned_flags, flags);
return -1;
}
// 清理
clReleaseMemObject(buffer);
if (host_data) align_free(host_data);
return 0;
}
5.2 缓冲区读写操作
5.2.1 buffer_read - 缓冲区读取测试
验证从设备缓冲区读取数据到主机的功能。
测试内核
__kernel void test_buffer_read_int(__global int *dst)
{
int tid = get_global_id(0);
dst[tid] = ((1<<16)+1); // 写入测试值 65537
}
主机端测试代码
int test_buffer_read_int(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper buffer;
// 创建缓冲区
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &error);
test_error(error, "Unable to create buffer");
// 创建并编译内核
error = create_single_kernel_helper(context, &program, &kernel, 1,
buffer_read_int_kernel_code,
"test_buffer_read_int");
test_error(error, "Unable to create kernel");
// 设置内核参数
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
test_error(error, "Unable to set kernel arg");
// 执行内核
size_t global_size = num_elements;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
&global_size, NULL, 0, NULL, NULL);
test_error(error, "Unable to enqueue kernel");
// 分配主机内存接收数据
cl_int *host_buffer = (cl_int*)malloc(sizeof(cl_int) * num_elements);
// 从设备读取数据
error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
sizeof(cl_int) * num_elements,
host_buffer, 0, NULL, NULL);
test_error(error, "Unable to read buffer");
// 验证结果
cl_int expected = (1<<16)+1;
for (int i = 0; i < num_elements; i++)
{
if (host_buffer[i] != expected)
{
log_error("ERROR: Data mismatch at index %d: expected %d, got %d\n",
i, expected, host_buffer[i]);
free(host_buffer);
return -1;
}
}
log_info("Buffer read test passed\n");
free(host_buffer);
return 0;
}
5.2.2 向量类型读取测试
测试不同向量类型的缓冲区读取:
// int2 测试
__kernel void test_buffer_read_int2(__global int2 *dst)
{
int tid = get_global_id(0);
dst[tid] = ((1<<16)+1); // 向量所有分量都是 65537
}
// int4 测试
__kernel void test_buffer_read_int4(__global int4 *dst)
{
int tid = get_global_id(0);
dst[tid] = ((1<<16)+1);
}
// int8, int16 类似...
验证向量数据:
// 验证 int4 数据
cl_int4 *host_buffer = (cl_int4*)malloc(sizeof(cl_int4) * num_elements);
error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
sizeof(cl_int4) * num_elements,
host_buffer, 0, NULL, NULL);
for (int i = 0; i < num_elements; i++)
{
if (host_buffer[i].s[0] != expected ||
host_buffer[i].s[1] != expected ||
host_buffer[i].s[2] != expected ||
host_buffer[i].s[3] != expected)
{
log_error("ERROR: int4 data mismatch at index %d\n", i);
return -1;
}
}
5.2.3 buffer_write - 缓冲区写入测试
验证从主机写入数据到设备缓冲区的功能。
测试内核(验证写入的数据)
__kernel void test_buffer_write_int(__global int *src, __global int *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid]; // 将源缓冲区数据拷贝到目标缓冲区
}
主机端测试流程
int test_buffer_write_int(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_int error;
clMemWrapper src_buffer, dst_buffer;
MTdata d = init_genrand(gRandomSeed);
// 生成随机测试数据
cl_int *host_data = (cl_int*)malloc(sizeof(cl_int) * num_elements);
for (int i = 0; i < num_elements; i++)
host_data[i] = (cl_int)genrand_int32(d);
// 创建源缓冲区和目标缓冲区
src_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_int) * num_elements, NULL, &error);
test_error(error, "Unable to create source buffer");
dst_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements, NULL, &error);
test_error(error, "Unable to create dest buffer");
// 写入数据到源缓冲区
error = clEnqueueWriteBuffer(queue, src_buffer, CL_TRUE, 0,
sizeof(cl_int) * num_elements,
host_data, 0, NULL, NULL);
test_error(error, "Unable to write buffer");
// 创建内核并执行(拷贝到目标缓冲区)
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1,
buffer_write_int_kernel_code,
"test_buffer_write_int");
test_error(error, "Unable to create kernel");
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_buffer);
error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &dst_buffer);
test_error(error, "Unable to set kernel args");
size_t global_size = num_elements;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
&global_size, NULL, 0, NULL, NULL);
test_error(error, "Unable to enqueue kernel");
// 读取结果并验证
cl_int *result = (cl_int*)malloc(sizeof(cl_int) * num_elements);
error = clEnqueueReadBuffer(queue, dst_buffer, CL_TRUE, 0,
sizeof(cl_int) * num_elements,
result, 0, NULL, NULL);
test_error(error, "Unable to read result");
// 验证数据完整性
for (int i = 0; i < num_elements; i++)
{
if (result[i] != host_data[i])
{
log_error("ERROR: Data mismatch at %d: expected %d, got %d\n",
i, host_data[i], result[i]);
free(host_data);
free(result);
free_mtdata(d);
return -1;
}
}
log_info("Buffer write test passed\n");
free(host_data);
free(result);
free_mtdata(d);
return 0;
}
5.2.4 异步读写测试
测试异步(非阻塞)的缓冲区读写操作:
int test_buffer_async_read_write(cl_command_queue queue, cl_mem buffer,
cl_int *host_data, int num_elements)
{
cl_int error;
cl_event write_event, read_event;
// 异步写入(非阻塞)
error = clEnqueueWriteBuffer(queue, buffer, CL_FALSE, 0,
sizeof(cl_int) * num_elements,
host_data, 0, NULL, &write_event);
test_error(error, "Unable to enqueue async write");
// 异步读取(依赖写入完成)
cl_int *result = (cl_int*)malloc(sizeof(cl_int) * num_elements);
error = clEnqueueReadBuffer(queue, buffer, CL_FALSE, 0,
sizeof(cl_int) * num_elements,
result, 1, &write_event, &read_event);
test_error(error, "Unable to enqueue async read");
// 等待读取完成
error = clWaitForEvents(1, &read_event);
test_error(error, "Unable to wait for events");
// 验证数据
for (int i = 0; i < num_elements; i++)
{
if (result[i] != host_data[i])
{
log_error("ERROR: Async read/write data mismatch at %d\n", i);
return -1;
}
}
clReleaseEvent(write_event);
clReleaseEvent(read_event);
free(result);
return 0;
}
5.3 缓冲区拷贝与填充
5.3.1 buffer_copy - 缓冲区拷贝
验证 clEnqueueCopyBuffer 在设备端拷贝缓冲区数据。
int test_buffer_copy(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_int error;
clMemWrapper src_buffer, dst_buffer;
MTdata d = init_genrand(gRandomSeed);
// 生成测试数据
cl_int *input = (cl_int*)malloc(sizeof(cl_int) * num_elements);
cl_int *output = (cl_int*)malloc(sizeof(cl_int) * num_elements);
for (int i = 0; i < num_elements; i++)
{
input[i] = (cl_int)genrand_int32(d);
output[i] = 0xDEADBEEF; // 填充错误数据
}
// 创建源和目标缓冲区
src_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(cl_int) * num_elements, input, &error);
test_error(error, "Unable to create source buffer");
dst_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements, NULL, &error);
test_error(error, "Unable to create dest buffer");
// 在设备端拷贝缓冲区
error = clEnqueueCopyBuffer(queue, src_buffer, dst_buffer, 0, 0,
sizeof(cl_int) * num_elements, 0, NULL, NULL);
test_error(error, "Unable to copy buffer");
// 读取结果
error = clEnqueueReadBuffer(queue, dst_buffer, CL_TRUE, 0,
sizeof(cl_int) * num_elements,
output, 0, NULL, NULL);
test_error(error, "Unable to read buffer");
// 验证拷贝结果
for (int i = 0; i < num_elements; i++)
{
if (output[i] != input[i])
{
log_error("ERROR: Copy failed at %d: expected %d, got %d\n",
i, input[i], output[i]);
free(input);
free(output);
free_mtdata(d);
return -1;
}
}
log_info("Buffer copy test passed\n");
free(input);
free(output);
free_mtdata(d);
return 0;
}
5.3.2 buffer_partial_copy - 部分拷贝
测试缓冲区的部分区域拷贝:
int test_buffer_partial_copy(cl_command_queue queue, cl_context context,
int num_elements, cl_uint src_start,
cl_uint dst_start, int size)
{
cl_int error;
clMemWrapper src_buffer, dst_buffer;
// 分配测试数据
cl_int *input = (cl_int*)malloc(sizeof(cl_int) * num_elements);
cl_int *output = (cl_int*)malloc(sizeof(cl_int) * num_elements);
for (int i = 0; i < num_elements; i++)
{
input[i] = i * 100;
output[i] = -1; // 初始化为不同值
}
// 创建缓冲区
src_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_int) * num_elements, input, &error);
dst_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_int) * num_elements, output, &error);
// 部分拷贝:从 src_start 拷贝 size 个元素到 dst_start
error = clEnqueueCopyBuffer(queue, src_buffer, dst_buffer,
src_start * sizeof(cl_int),
dst_start * sizeof(cl_int),
size * sizeof(cl_int),
0, NULL, NULL);
test_error(error, "Unable to copy buffer region");
// 读取结果
error = clEnqueueReadBuffer(queue, dst_buffer, CL_TRUE, 0,
sizeof(cl_int) * num_elements,
output, 0, NULL, NULL);
// 验证:只有拷贝区域应该改变
for (int i = 0; i < num_elements; i++)
{
if (i >= dst_start && i < dst_start + size)
{
// 拷贝区域:应该匹配源数据
int src_idx = i - dst_start + src_start;
if (output[i] != input[src_idx])
{
log_error("ERROR: Copied region mismatch at %d\n", i);
return -1;
}
}
else
{
// 非拷贝区域:应该保持原值 -1
if (output[i] != -1)
{
log_error("ERROR: Non-copied region changed at %d\n", i);
return -1;
}
}
}
log_info("Partial buffer copy test passed\n");
free(input);
free(output);
return 0;
}
5.3.3 buffer_fill - 缓冲区填充
使用 clEnqueueFillBuffer 填充缓冲区(OpenCL 1.2+):
int test_buffer_fill_int(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_int error;
clMemWrapper buffer;
// 创建缓冲区
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &error);
test_error(error, "Unable to create buffer");
// 填充模式(4 字节)
cl_int fill_pattern = TEST_PRIME_INT; // ((1<<16)+1) = 65537
// 填充整个缓冲区
error = clEnqueueFillBuffer(queue, buffer, &fill_pattern,
sizeof(cl_int), 0,
sizeof(cl_int) * num_elements,
0, NULL, NULL);
test_error(error, "Unable to fill buffer");
// 读取并验证
cl_int *result = (cl_int*)malloc(sizeof(cl_int) * num_elements);
error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
sizeof(cl_int) * num_elements,
result, 0, NULL, NULL);
test_error(error, "Unable to read buffer");
for (int i = 0; i < num_elements; i++)
{
if (result[i] != fill_pattern)
{
log_error("ERROR: Fill pattern mismatch at %d: "
"expected %d, got %d\n",
i, fill_pattern, result[i]);
free(result);
return -1;
}
}
log_info("Buffer fill test passed\n");
free(result);
return 0;
}
填充结构体数据:
typedef struct {
cl_int a;
cl_float b;
} TestStruct;
int test_buffer_fill_struct(...)
{
TestStruct fill_pattern;
fill_pattern.a = TEST_PRIME_INT;
fill_pattern.b = TEST_PRIME_FLOAT;
error = clEnqueueFillBuffer(queue, buffer, &fill_pattern,
sizeof(TestStruct), 0,
sizeof(TestStruct) * num_elements,
0, NULL, NULL);
// ... 验证 ...
}
5.4 子缓冲区测试
5.4.1 sub_buffers_read_write - 子缓冲区读写
子缓冲区允许创建父缓冲区的区域视图:
int test_sub_buffers_read_write(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_int error;
clMemWrapper parent_buffer;
cl_mem sub_buffer1, sub_buffer2;
size_t buffer_size = sizeof(cl_int) * num_elements;
// 创建父缓冲区
parent_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
buffer_size, NULL, &error);
test_error(error, "Unable to create parent buffer");
// 创建子缓冲区 1:前半部分
cl_buffer_region region1;
region1.origin = 0;
region1.size = buffer_size / 2;
sub_buffer1 = clCreateSubBuffer(parent_buffer, CL_MEM_READ_WRITE,
CL_BUFFER_CREATE_TYPE_REGION,
®ion1, &error);
test_error(error, "Unable to create sub-buffer 1");
// 创建子缓冲区 2:后半部分
cl_buffer_region region2;
region2.origin = buffer_size / 2;
region2.size = buffer_size / 2;
sub_buffer2 = clCreateSubBuffer(parent_buffer, CL_MEM_READ_WRITE,
CL_BUFFER_CREATE_TYPE_REGION,
®ion2, &error);
test_error(error, "Unable to create sub-buffer 2");
// 向子缓冲区 1 写入数据
cl_int *data1 = (cl_int*)malloc(buffer_size / 2);
for (int i = 0; i < num_elements / 2; i++)
data1[i] = i;
error = clEnqueueWriteBuffer(queue, sub_buffer1, CL_TRUE, 0,
buffer_size / 2, data1, 0, NULL, NULL);
test_error(error, "Unable to write to sub-buffer 1");
// 向子缓冲区 2 写入数据
cl_int *data2 = (cl_int*)malloc(buffer_size / 2);
for (int i = 0; i < num_elements / 2; i++)
data2[i] = i + 1000;
error = clEnqueueWriteBuffer(queue, sub_buffer2, CL_TRUE, 0,
buffer_size / 2, data2, 0, NULL, NULL);
test_error(error, "Unable to write to sub-buffer 2");
// 从父缓冲区读取完整数据
cl_int *result = (cl_int*)malloc(buffer_size);
error = clEnqueueReadBuffer(queue, parent_buffer, CL_TRUE, 0,
buffer_size, result, 0, NULL, NULL);
test_error(error, "Unable to read from parent buffer");
// 验证子缓冲区 1 的数据
for (int i = 0; i < num_elements / 2; i++)
{
if (result[i] != data1[i])
{
log_error("ERROR: Sub-buffer 1 data mismatch at %d\n", i);
return -1;
}
}
// 验证子缓冲区 2 的数据
for (int i = 0; i < num_elements / 2; i++)
{
if (result[i + num_elements / 2] != data2[i])
{
log_error("ERROR: Sub-buffer 2 data mismatch at %d\n", i);
return -1;
}
}
log_info("Sub-buffer read/write test passed\n");
clReleaseMemObject(sub_buffer1);
clReleaseMemObject(sub_buffer2);
free(data1);
free(data2);
free(result);
return 0;
}
5.4.2 sub_buffers_overlapping - 重叠子缓冲区
测试子缓冲区区域重叠的行为(应该被允许,但需要谨慎同步):
int test_sub_buffers_overlapping(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_int error;
clMemWrapper parent_buffer;
cl_mem sub_buffer_A, sub_buffer_B;
size_t buffer_size = sizeof(cl_int) * num_elements;
// 创建父缓冲区
parent_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
buffer_size, NULL, &error);
test_error(error, "Unable to create parent buffer");
// 子缓冲区 A:[0, 3/4)
cl_buffer_region region_A;
region_A.origin = 0;
region_A.size = (buffer_size * 3) / 4;
sub_buffer_A = clCreateSubBuffer(parent_buffer, CL_MEM_READ_WRITE,
CL_BUFFER_CREATE_TYPE_REGION,
®ion_A, &error);
test_error(error, "Unable to create sub-buffer A");
// 子缓冲区 B:[1/4, 1) - 与 A 重叠
cl_buffer_region region_B;
region_B.origin = buffer_size / 4;
region_B.size = (buffer_size * 3) / 4;
sub_buffer_B = clCreateSubBuffer(parent_buffer, CL_MEM_READ_WRITE,
CL_BUFFER_CREATE_TYPE_REGION,
®ion_B, &error);
test_error(error, "Unable to create sub-buffer B");
// 测试:向 A 写入,从 B 读取(重叠区域)
cl_int *data_A = (cl_int*)malloc(region_A.size);
for (size_t i = 0; i < region_A.size / sizeof(cl_int); i++)
data_A[i] = (cl_int)i;
error = clEnqueueWriteBuffer(queue, sub_buffer_A, CL_TRUE, 0,
region_A.size, data_A, 0, NULL, NULL);
test_error(error, "Unable to write to sub-buffer A");
// 同步确保写入完成
error = clFinish(queue);
test_error(error, "Unable to finish queue");
// 从 B 读取(包含重叠区域)
cl_int *data_B = (cl_int*)malloc(region_B.size);
error = clEnqueueReadBuffer(queue, sub_buffer_B, CL_TRUE, 0,
region_B.size, data_B, 0, NULL, NULL);
test_error(error, "Unable to read from sub-buffer B");
// 验证重叠区域的数据一致性
size_t overlap_start = 0; // B 的起始在 A 中的偏移
size_t overlap_size = region_A.size - region_B.origin;
for (size_t i = 0; i < overlap_size / sizeof(cl_int); i++)
{
size_t idx_A = (region_B.origin / sizeof(cl_int)) + i;
if (data_B[i] != data_A[idx_A])
{
log_error("ERROR: Overlapping region data mismatch at %zu\n", i);
return -1;
}
}
log_info("Overlapping sub-buffers test passed\n");
clReleaseMemObject(sub_buffer_A);
clReleaseMemObject(sub_buffer_B);
free(data_A);
free(data_B);
return 0;
}
5.4.3 子缓冲区对齐要求
创建子缓冲区时必须满足对齐要求:
// 查询设备的内存基址对齐要求
cl_uint base_align;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
sizeof(base_align), &base_align, NULL);
test_error(error, "Unable to get base address alignment");
log_info("Device requires %u-bit alignment\n", base_align);
// 子缓冲区的 origin 必须是 (base_align / 8) 字节的倍数
size_t align_bytes = base_align / 8;
region.origin = align_to(offset, align_bytes);
5.5 缓冲区映射与解映射
5.5.1 buffer_map_read - 映射读取
通过映射访问缓冲区数据,避免显式拷贝:
int test_buffer_map_read_int(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_int error;
clMemWrapper buffer;
clProgramWrapper program;
clKernelWrapper kernel;
// 创建并初始化缓冲区
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &error);
test_error(error, "Unable to create buffer");
// 创建内核填充数据
error = create_single_kernel_helper(context, &program, &kernel, 1,
buffer_read_int_kernel_code,
"test_buffer_read_int");
test_error(error, "Unable to create kernel");
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
test_error(error, "Unable to set kernel arg");
size_t global_size = num_elements;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
&global_size, NULL, 0, NULL, NULL);
test_error(error, "Unable to enqueue kernel");
// 映射缓冲区用于读取
cl_int *mapped_ptr = (cl_int*)clEnqueueMapBuffer(
queue, buffer, CL_TRUE, CL_MAP_READ,
0, sizeof(cl_int) * num_elements,
0, NULL, NULL, &error);
test_error(error, "Unable to map buffer for reading");
// 直接访问映射的内存
cl_int expected = TEST_PRIME_INT;
for (int i = 0; i < num_elements; i++)
{
if (mapped_ptr[i] != expected)
{
log_error("ERROR: Mapped data mismatch at %d: "
"expected %d, got %d\n",
i, expected, mapped_ptr[i]);
clEnqueueUnmapMemObject(queue, buffer, mapped_ptr, 0, NULL, NULL);
return -1;
}
}
// 解除映射
error = clEnqueueUnmapMemObject(queue, buffer, mapped_ptr, 0, NULL, NULL);
test_error(error, "Unable to unmap buffer");
log_info("Buffer map read test passed\n");
return 0;
}
5.5.2 buffer_map_write - 映射写入
通过映射修改缓冲区内容:
int test_buffer_map_write_int(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_int error;
clMemWrapper buffer;
MTdata d = init_genrand(gRandomSeed);
// 创建缓冲区
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &error);
test_error(error, "Unable to create buffer");
// 映射缓冲区用于写入
cl_int *mapped_ptr = (cl_int*)clEnqueueMapBuffer(
queue, buffer, CL_TRUE, CL_MAP_WRITE,
0, sizeof(cl_int) * num_elements,
0, NULL, NULL, &error);
test_error(error, "Unable to map buffer for writing");
// 生成并写入随机数据
cl_int *expected = (cl_int*)malloc(sizeof(cl_int) * num_elements);
for (int i = 0; i < num_elements; i++)
{
expected[i] = (cl_int)genrand_int32(d);
mapped_ptr[i] = expected[i];
}
// 解除映射(数据写回设备)
error = clEnqueueUnmapMemObject(queue, buffer, mapped_ptr, 0, NULL, NULL);
test_error(error, "Unable to unmap buffer");
// 读取验证
cl_int *result = (cl_int*)malloc(sizeof(cl_int) * num_elements);
error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
sizeof(cl_int) * num_elements,
result, 0, NULL, NULL);
test_error(error, "Unable to read buffer");
for (int i = 0; i < num_elements; i++)
{
if (result[i] != expected[i])
{
log_error("ERROR: Map write verification failed at %d\n", i);
free(expected);
free(result);
free_mtdata(d);
return -1;
}
}
log_info("Buffer map write test passed\n");
free(expected);
free(result);
free_mtdata(d);
return 0;
}
5.5.3 映射标志组合
不同的映射标志控制访问权限:
// 只读映射
void *ptr = clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_READ,
0, size, 0, NULL, NULL, &error);
// 只写映射(性能优化:不需要读取旧数据)
void *ptr = clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
// 读写映射
void *ptr = clEnqueueMapBuffer(queue, buffer, CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
// 写入且丢弃旧内容(OpenCL 1.2+)
void *ptr = clEnqueueMapBuffer(queue, buffer, CL_TRUE,
CL_MAP_WRITE_INVALIDATE_REGION,
0, size, 0, NULL, NULL, &error);
性能提示:
CL_MAP_WRITE_INVALIDATE_REGION:告诉驱动不需要保留旧数据,可以提升性能- 映射/解映射有开销,适合大块数据访问
- 小数据量用 Read/Write API 可能更快
本章小结
第5章详细介绍了 Buffers 测试套件的核心功能:
- 缓冲区创建:各种内存标志组合(USE_HOST_PTR、COPY_HOST_PTR、ALLOC_HOST_PTR)
- 读写操作:同步/异步读写,支持标量和向量类型
- 拷贝填充:设备端缓冲区拷贝、部分拷贝、模式填充
- 子缓冲区:区域视图创建、重叠子缓冲区、对齐要求
- 映射操作:零拷贝访问、不同映射标志、性能优化
这些测试确保 OpenCL 缓冲区对象的各种操作正确、高效,是内存管理验证的关键。下一章将介绍 Images 图像对象测试。
1947

被折叠的 条评论
为什么被折叠?



