Allocations 测试套件验证 OpenCL 内存对象分配的健壮性和容量限制。该测试重点测试设备内存分配能力、多次分配、大内存对象管理,以及在接近内存限制时的行为。
7.1 测试概述
7.1.1 测试目标
Allocations 测试套件旨在:
- 验证设备能否分配接近
CL_DEVICE_MAX_MEM_ALLOC_SIZE的内存对象 - 测试多次分配直到达到
CL_DEVICE_GLOBAL_MEM_SIZE的总量 - 验证分配后内存对象的可用性(通过内核访问)
- 测试阻塞和非阻塞内存操作
- 检查内存分配失败时的错误处理
7.1.2 测试类型
| 测试名称 | 内存对象类型 | 访问模式 | 写入方式 |
|---|---|---|---|
| buffer | Buffer | 读写 | 阻塞写入 |
| image2d_read | 2D Image | 只读 | 阻塞写入 |
| image2d_write | 2D Image | 只写 | 阻塞写入 |
| buffer_non_blocking | Buffer | 读写 | 非阻塞写入 |
| image2d_read_non_blocking | 2D Image | 只读 | 非阻塞写入 |
| image2d_write_non_blocking | 2D Image | 只写 | 非阻塞写入 |
7.1.3 关键设备限制
int query_device_limits(cl_device_id device)
{
cl_int error;
cl_ulong max_alloc_size;
cl_ulong global_mem_size;
cl_bool unified_memory;
// 查询单次最大分配大小
error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(max_alloc_size), &max_alloc_size, NULL);
test_error(error, "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE");
// 查询全局内存总大小
error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(global_mem_size), &global_mem_size, NULL);
test_error(error, "Failed to query CL_DEVICE_GLOBAL_MEM_SIZE");
// 查询是否共享主机内存
error = clGetDeviceInfo(device, CL_DEVICE_HOST_UNIFIED_MEMORY,
sizeof(unified_memory), &unified_memory, NULL);
test_error(error, "Failed to query CL_DEVICE_HOST_UNIFIED_MEMORY");
log_info("Device Memory Limits:\n");
log_info(" CL_DEVICE_MAX_MEM_ALLOC_SIZE: %llu bytes (%.2f MB)\n",
max_alloc_size, max_alloc_size / (1024.0 * 1024.0));
log_info(" CL_DEVICE_GLOBAL_MEM_SIZE: %llu bytes (%.2f MB)\n",
global_mem_size, global_mem_size / (1024.0 * 1024.0));
log_info(" Host Unified Memory: %s\n",
unified_memory ? "Yes" : "No");
// OpenCL 规范要求:MAX_MEM_ALLOC_SIZE <= GLOBAL_MEM_SIZE
if (max_alloc_size > global_mem_size)
{
log_error("ERROR: MAX_MEM_ALLOC_SIZE (%llu) > GLOBAL_MEM_SIZE (%llu)\n",
max_alloc_size, global_mem_size);
return -1;
}
return 0;
}
7.2 Buffer 分配测试
7.2.1 单个大型 Buffer 分配
测试分配单个接近最大限制的 Buffer。
int test_single_buffer_allocation(cl_context context, cl_device_id device)
{
cl_int error;
cl_ulong max_alloc_size;
// 获取最大分配大小
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(max_alloc_size), &max_alloc_size, NULL);
// 从最大值开始尝试,如果失败则逐步减小
size_t target_size = (size_t)max_alloc_size;
size_t reduction_step = target_size / 16; // 每次减少 1/16
cl_mem buffer = NULL;
log_info("Attempting to allocate single buffer of size %.2f MB\n",
target_size / (1024.0 * 1024.0));
while (target_size > max_alloc_size / 8)
{
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
target_size, NULL, &error);
if (error == CL_SUCCESS)
{
log_info("SUCCESS: Allocated %.2f MB\n",
target_size / (1024.0 * 1024.0));
// 验证实际分配大小
size_t actual_size;
error = clGetMemObjectInfo(buffer, CL_MEM_SIZE,
sizeof(actual_size),
&actual_size, NULL);
test_error(error, "Failed to query buffer size");
if (actual_size != target_size)
{
log_error("WARNING: Requested %zu bytes, got %zu bytes\n",
target_size, actual_size);
}
clReleaseMemObject(buffer);
return 0;
}
else if (error == CL_OUT_OF_RESOURCES || error == CL_MEM_OBJECT_ALLOCATION_FAILURE)
{
// 内存不足,减小尝试
target_size -= reduction_step;
log_info("Allocation failed, trying %.2f MB\n",
target_size / (1024.0 * 1024.0));
}
else
{
// 其他错误
log_error("Unexpected error: %d\n", error);
return -1;
}
}
// 无法分配超过 1/8 最大值
log_error("FAIL: Could not allocate more than 1/8 of max size\n");
return -1;
}
7.2.2 多个 Buffer 分配
测试分配多个 Buffer 直到达到全局内存限制。
#define MAX_ALLOCATIONS 1024
int test_multiple_buffer_allocations(cl_context context, cl_device_id device)
{
cl_int error;
cl_ulong global_mem_size;
cl_ulong max_alloc_size;
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(global_mem_size), &global_mem_size, NULL);
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(max_alloc_size), &max_alloc_size, NULL);
// 使用 60% 的全局内存(保留空间给系统和驱动)
cl_ulong target_total = (cl_ulong)(global_mem_size * 0.6);
// 计算每个分配的大小(不超过单次最大限制)
size_t alloc_size = (size_t)(max_alloc_size / 4); // 使用 1/4 最大值
int num_allocations = (int)(target_total / alloc_size);
if (num_allocations > MAX_ALLOCATIONS)
num_allocations = MAX_ALLOCATIONS;
log_info("Attempting to allocate %d buffers of %.2f MB each\n",
num_allocations, alloc_size / (1024.0 * 1024.0));
log_info("Target total: %.2f MB\n", target_total / (1024.0 * 1024.0));
cl_mem *buffers = (cl_mem*)malloc(sizeof(cl_mem) * num_allocations);
int allocated_count = 0;
size_t total_allocated = 0;
for (int i = 0; i < num_allocations; i++)
{
buffers[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
alloc_size, NULL, &error);
if (error == CL_SUCCESS)
{
allocated_count++;
total_allocated += alloc_size;
if ((i + 1) % 10 == 0)
{
log_info(" Allocated %d buffers (%.2f MB total)\n",
allocated_count,
total_allocated / (1024.0 * 1024.0));
}
}
else if (error == CL_OUT_OF_RESOURCES ||
error == CL_MEM_OBJECT_ALLOCATION_FAILURE)
{
// 内存耗尽,停止分配
log_info("Memory exhausted after %d allocations\n", allocated_count);
break;
}
else
{
log_error("Unexpected error at allocation %d: %d\n", i, error);
break;
}
}
log_info("Successfully allocated %d buffers, total: %.2f MB (%.1f%% of target)\n",
allocated_count,
total_allocated / (1024.0 * 1024.0),
(total_allocated * 100.0) / target_total);
// 清理
for (int i = 0; i < allocated_count; i++)
{
clReleaseMemObject(buffers[i]);
}
free(buffers);
// 如果分配的总量小于目标的 1/8,则测试失败
if (total_allocated < target_total / 8)
{
log_error("FAIL: Allocated less than 1/8 of target\n");
return -1;
}
return 0;
}
7.2.3 阻塞 vs 非阻塞写入
测试阻塞和非阻塞写入对分配性能的影响。
int test_buffer_write_performance(cl_context context,
cl_command_queue queue,
cl_device_id device)
{
cl_int error;
size_t buffer_size = 256 * 1024 * 1024; // 256 MB
// 创建测试数据
unsigned char *test_data = (unsigned char*)malloc(buffer_size);
for (size_t i = 0; i < buffer_size; i++)
test_data[i] = (unsigned char)(i % 256);
// 测试阻塞写入
log_info("Testing blocking write...\n");
cl_mem buffer_blocking = clCreateBuffer(context, CL_MEM_READ_WRITE,
buffer_size, NULL, &error);
test_error(error, "Failed to create buffer");
clock_t start = clock();
error = clEnqueueWriteBuffer(queue, buffer_blocking, CL_TRUE,
0, buffer_size, test_data,
0, NULL, NULL);
test_error(error, "Failed blocking write");
clock_t end = clock();
double blocking_time = (double)(end - start) / CLOCKS_PER_SEC;
log_info(" Blocking write took %.3f seconds\n", blocking_time);
// 测试非阻塞写入
log_info("Testing non-blocking write...\n");
cl_mem buffer_non_blocking = clCreateBuffer(context, CL_MEM_READ_WRITE,
buffer_size, NULL, &error);
test_error(error, "Failed to create buffer");
cl_event write_event;
start = clock();
error = clEnqueueWriteBuffer(queue, buffer_non_blocking, CL_FALSE,
0, buffer_size, test_data,
0, NULL, &write_event);
test_error(error, "Failed non-blocking write");
// 等待完成
error = clWaitForEvents(1, &write_event);
test_error(error, "Failed to wait for event");
end = clock();
double non_blocking_time = (double)(end - start) / CLOCKS_PER_SEC;
log_info(" Non-blocking write took %.3f seconds\n", non_blocking_time);
// 清理
clReleaseEvent(write_event);
clReleaseMemObject(buffer_blocking);
clReleaseMemObject(buffer_non_blocking);
free(test_data);
log_info("Blocking vs Non-blocking ratio: %.2fx\n",
blocking_time / non_blocking_time);
return 0;
}
7.3 Image 分配测试
7.3.1 图像尺寸计算
图像分配受尺寸限制,需要计算合适的宽高。
int calculate_image_dimensions(cl_device_id device,
size_t target_size,
size_t *width,
size_t *height)
{
cl_int error;
size_t max_width, max_height;
// 查询最大图像尺寸
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH,
sizeof(max_width), &max_width, NULL);
test_error(error, "Failed to query max image width");
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
sizeof(max_height), &max_height, NULL);
test_error(error, "Failed to query max image height");
// 计算所需像素数(RGBA,每像素 4 * sizeof(cl_uint))
size_t pixel_size = 4 * sizeof(cl_uint);
size_t num_pixels = target_size / pixel_size;
// 检查是否超过最大像素数
unsigned long long max_pixels =
(unsigned long long)max_width * max_height;
if (num_pixels > max_pixels)
{
log_error("Target size too large for image\n");
return -1;
}
// 尽量使用接近正方形的尺寸
size_t calculated_width = (size_t)sqrt((double)num_pixels);
if (calculated_width > max_width)
calculated_width = max_width;
if (calculated_width == 0)
calculated_width = 1;
size_t calculated_height = num_pixels / calculated_width;
if (calculated_height > max_height)
calculated_height = max_height;
if (calculated_height == 0)
calculated_height = 1;
*width = calculated_width;
*height = calculated_height;
size_t actual_size = calculated_width * calculated_height * pixel_size;
log_info("Image dimensions: %zux%zu, actual size: %.2f MB\n",
calculated_width, calculated_height,
actual_size / (1024.0 * 1024.0));
return 0;
}
7.3.2 2D 只读图像分配
int test_read_only_image_allocation(cl_context context, cl_device_id device)
{
cl_int error;
size_t width, height;
// 检查图像支持
cl_bool image_support;
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
sizeof(image_support), &image_support, NULL);
test_error(error, "Failed to query image support");
if (!image_support)
{
log_info("Device does not support images, skipping test\n");
return 0;
}
// 目标大小:128 MB
size_t target_size = 128 * 1024 * 1024;
if (calculate_image_dimensions(device, target_size, &width, &height) != 0)
return -1;
// 设置图像格式:RGBA, UNSIGNED_INT32
cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_UNSIGNED_INT32;
// 创建只读图像
log_info("Creating read-only image: %zux%zu\n", width, height);
cl_mem image = create_image_2d(context, CL_MEM_READ_ONLY,
&format, width, height,
0, NULL, &error);
if (error == CL_SUCCESS)
{
log_info("SUCCESS: Read-only image allocated\n");
// 查询实际尺寸
size_t actual_width, actual_height;
clGetImageInfo(image, CL_IMAGE_WIDTH,
sizeof(actual_width), &actual_width, NULL);
clGetImageInfo(image, CL_IMAGE_HEIGHT,
sizeof(actual_height), &actual_height, NULL);
log_info("Actual dimensions: %zux%zu\n", actual_width, actual_height);
clReleaseMemObject(image);
return 0;
}
else if (error == CL_OUT_OF_RESOURCES ||
error == CL_MEM_OBJECT_ALLOCATION_FAILURE ||
error == CL_INVALID_IMAGE_SIZE)
{
log_error("Failed to allocate image: %d\n", error);
return -1;
}
else
{
log_error("Unexpected error: %d\n", error);
return -1;
}
}
7.3.3 2D 只写图像分配
int test_write_only_image_allocation(cl_context context, cl_device_id device)
{
cl_int error;
size_t width, height;
cl_bool image_support;
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
sizeof(image_support), &image_support, NULL);
if (!image_support)
{
log_info("Device does not support images, skipping test\n");
return 0;
}
size_t target_size = 128 * 1024 * 1024;
if (calculate_image_dimensions(device, target_size, &width, &height) != 0)
return -1;
cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_UNSIGNED_INT32;
log_info("Creating write-only image: %zux%zu\n", width, height);
cl_mem image = create_image_2d(context, CL_MEM_WRITE_ONLY,
&format, width, height,
0, NULL, &error);
if (error == CL_SUCCESS)
{
log_info("SUCCESS: Write-only image allocated\n");
clReleaseMemObject(image);
return 0;
}
else
{
log_error("Failed to allocate write-only image: %d\n", error);
return -1;
}
}
7.4 内存填充与验证
7.4.1 随机数据填充
分配后填充随机数据以验证内存可用性。
int fill_buffer_with_random_data(cl_command_queue queue,
cl_mem buffer,
size_t size,
unsigned int seed)
{
cl_int error;
// 生成随机数据
unsigned char *random_data = (unsigned char*)malloc(size);
srand(seed);
for (size_t i = 0; i < size; i++)
{
random_data[i] = (unsigned char)(rand() % 256);
}
// 写入 buffer
error = clEnqueueWriteBuffer(queue, buffer, CL_TRUE,
0, size, random_data,
0, NULL, NULL);
test_error(error, "Failed to write buffer");
// 读回验证
unsigned char *read_back = (unsigned char*)malloc(size);
error = clEnqueueReadBuffer(queue, buffer, CL_TRUE,
0, size, read_back,
0, NULL, NULL);
test_error(error, "Failed to read buffer");
// 比较数据
int mismatches = 0;
for (size_t i = 0; i < size; i++)
{
if (random_data[i] != read_back[i])
{
if (mismatches < 10) // 只打印前 10 个不匹配
{
log_error("Mismatch at offset %zu: wrote %u, read %u\n",
i, random_data[i], read_back[i]);
}
mismatches++;
}
}
free(random_data);
free(read_back);
if (mismatches > 0)
{
log_error("Total mismatches: %d\n", mismatches);
return -1;
}
log_info("Buffer verification passed\n");
return 0;
}
7.4.2 图像数据填充
int fill_image_with_pattern(cl_command_queue queue,
cl_mem image,
size_t width,
size_t height)
{
cl_int error;
// 生成渐变图案(RGBA 格式)
size_t pixel_size = 4 * sizeof(cl_uint);
size_t data_size = width * height * pixel_size;
cl_uint *pattern = (cl_uint*)malloc(data_size);
for (size_t y = 0; y < height; y++)
{
for (size_t x = 0; x < width; x++)
{
size_t idx = (y * width + x) * 4;
pattern[idx + 0] = (cl_uint)(x * 255 / width); // R
pattern[idx + 1] = (cl_uint)(y * 255 / height); // G
pattern[idx + 2] = (cl_uint)((x + y) * 255 / (width + height)); // B
pattern[idx + 3] = 255; // A
}
}
// 写入图像
size_t origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
error = clEnqueueWriteImage(queue, image, CL_TRUE,
origin, region,
0, 0, // row_pitch, slice_pitch
pattern,
0, NULL, NULL);
test_error(error, "Failed to write image");
// 读回验证
cl_uint *read_back = (cl_uint*)malloc(data_size);
error = clEnqueueReadImage(queue, image, CL_TRUE,
origin, region,
0, 0,
read_back,
0, NULL, NULL);
test_error(error, "Failed to read image");
// 验证数据
if (memcmp(pattern, read_back, data_size) != 0)
{
log_error("Image data mismatch\n");
free(pattern);
free(read_back);
return -1;
}
log_info("Image verification passed\n");
free(pattern);
free(read_back);
return 0;
}
7.5 内核执行验证
7.5.1 Buffer 访问内核
通过内核访问分配的内存以验证其可用性。
const char *buffer_access_kernel_source = R"(
__kernel void access_buffers(__global uint *buffer,
uint buffer_size,
__global uint *checksum)
{
int gid = get_global_id(0);
int num_elements = buffer_size / sizeof(uint);
// 计算校验和
uint local_sum = 0;
for (int i = gid; i < num_elements; i += get_global_size(0))
{
local_sum += buffer[i];
}
// 原子累加到全局校验和
atomic_add(checksum, local_sum);
}
)";
int test_buffer_kernel_access(cl_context context,
cl_command_queue queue,
cl_device_id device,
cl_mem buffer,
size_t buffer_size)
{
cl_int error;
cl_program program;
cl_kernel kernel;
// 创建程序
program = clCreateProgramWithSource(context, 1,
&buffer_access_kernel_source,
NULL, &error);
test_error(error, "Failed to create program");
error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
test_error(error, "Failed to build program");
kernel = clCreateKernel(program, "access_buffers", &error);
test_error(error, "Failed to create kernel");
// 创建校验和 buffer
cl_mem checksum_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_uint), NULL, &error);
test_error(error, "Failed to create checksum buffer");
// 初始化校验和为 0
cl_uint zero = 0;
error = clEnqueueWriteBuffer(queue, checksum_buffer, CL_TRUE,
0, sizeof(cl_uint), &zero,
0, NULL, NULL);
test_error(error, "Failed to initialize checksum");
// 设置内核参数
cl_uint size_arg = (cl_uint)buffer_size;
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
error |= clSetKernelArg(kernel, 1, sizeof(cl_uint), &size_arg);
error |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &checksum_buffer);
test_error(error, "Failed to set kernel arguments");
// 执行内核
size_t global_size = 256;
error = clEnqueueNDRangeKernel(queue, kernel, 1,
NULL, &global_size, NULL,
0, NULL, NULL);
test_error(error, "Failed to execute kernel");
error = clFinish(queue);
test_error(error, "Failed to finish queue");
// 读取校验和
cl_uint checksum;
error = clEnqueueReadBuffer(queue, checksum_buffer, CL_TRUE,
0, sizeof(cl_uint), &checksum,
0, NULL, NULL);
test_error(error, "Failed to read checksum");
log_info("Kernel checksum: 0x%08X\n", checksum);
// 清理
clReleaseMemObject(checksum_buffer);
clReleaseKernel(kernel);
clReleaseProgram(program);
return 0;
}
7.5.2 Image 访问内核
const char *image_access_kernel_source = R"(
__kernel void access_image(__read_only image2d_t image,
__global uint *checksum)
{
int x = get_global_id(0);
int y = get_global_id(1);
int width = get_image_width(image);
int height = get_image_height(image);
if (x >= width || y >= height)
return;
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
int2 coord = (int2)(x, y);
uint4 pixel = read_imageui(image, sampler, coord);
// 计算像素校验和
uint pixel_sum = pixel.x + pixel.y + pixel.z + pixel.w;
// 原子累加
atomic_add(checksum, pixel_sum);
}
)";
int test_image_kernel_access(cl_context context,
cl_command_queue queue,
cl_device_id device,
cl_mem image,
size_t width,
size_t height)
{
cl_int error;
cl_program program;
cl_kernel kernel;
program = clCreateProgramWithSource(context, 1,
&image_access_kernel_source,
NULL, &error);
test_error(error, "Failed to create program");
error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
test_error(error, "Failed to build program");
kernel = clCreateKernel(program, "access_image", &error);
test_error(error, "Failed to create kernel");
// 创建校验和 buffer
cl_mem checksum_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_uint), NULL, &error);
test_error(error, "Failed to create checksum buffer");
cl_uint zero = 0;
clEnqueueWriteBuffer(queue, checksum_buffer, CL_TRUE,
0, sizeof(cl_uint), &zero, 0, NULL, NULL);
// 设置内核参数
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &image);
error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &checksum_buffer);
test_error(error, "Failed to set kernel arguments");
// 执行内核
size_t global_size[2] = {width, height};
error = clEnqueueNDRangeKernel(queue, kernel, 2,
NULL, global_size, NULL,
0, NULL, NULL);
test_error(error, "Failed to execute kernel");
error = clFinish(queue);
test_error(error, "Failed to finish queue");
// 读取校验和
cl_uint checksum;
clEnqueueReadBuffer(queue, checksum_buffer, CL_TRUE,
0, sizeof(cl_uint), &checksum, 0, NULL, NULL);
log_info("Image kernel checksum: 0x%08X\n", checksum);
clReleaseMemObject(checksum_buffer);
clReleaseKernel(kernel);
clReleaseProgram(program);
return 0;
}
7.6 统一内存设备处理
7.6.1 检测统一内存
int adjust_for_unified_memory(cl_device_id device, cl_ulong *global_mem_size)
{
cl_int error;
cl_bool unified_memory;
error = clGetDeviceInfo(device, CL_DEVICE_HOST_UNIFIED_MEMORY,
sizeof(unified_memory), &unified_memory, NULL);
test_error(error, "Failed to query unified memory");
if (unified_memory)
{
// 统一内存设备与主机共享内存,减少目标以避免换页
cl_ulong original_size = *global_mem_size;
*global_mem_size = original_size / 2;
log_info("Unified memory device detected\n");
log_info("Reducing target from %.2f MB to %.2f MB\n",
original_size / (1024.0 * 1024.0),
*global_mem_size / (1024.0 * 1024.0));
return 1; // 是统一内存设备
}
else
{
// 独立显存设备,使用 60% 避免驱动/系统开销
cl_ulong original_size = *global_mem_size;
*global_mem_size = (cl_ulong)(original_size * 0.6);
log_info("Discrete memory device detected\n");
log_info("Using 60%% of memory: %.2f MB\n",
*global_mem_size / (1024.0 * 1024.0));
return 0; // 不是统一内存设备
}
}
7.7 测试命令行选项
7.7.1 可用选项
Allocations 测试支持以下命令行选项:
| 选项 | 说明 | 示例 |
|---|---|---|
single | 测试单个大内存分配 | ./test_allocations single |
multiple | 测试多个小内存分配 | ./test_allocations multiple |
<number> | 重复测试次数 | ./test_allocations 5 |
<percentage>% | 内存目标百分比 | ./test_allocations 75% |
do_not_force_fill | 不填充数据(跳过写入) | ./test_allocations do_not_force_fill |
do_not_execute | 不执行内核验证 | ./test_allocations do_not_execute |
7.7.2 使用示例
# 运行单个大 buffer 分配测试
./test_allocations single buffer
# 运行多个小 buffer 分配测试,使用 50% 内存
./test_allocations multiple 50% buffer
# 重复测试 3 次
./test_allocations 3 buffer
# 测试所有类型,不执行内核
./test_allocations do_not_execute
# 测试图像分配,使用 75% 内存
./test_allocations single 75% image2d_read
7.8 错误处理与调试
7.8.1 常见错误
| 错误代码 | 含义 | 解决方法 |
|---|---|---|
CL_OUT_OF_RESOURCES | 设备资源不足 | 减小分配大小或数量 |
CL_MEM_OBJECT_ALLOCATION_FAILURE | 内存分配失败 | 检查内存限制,减小目标 |
CL_INVALID_IMAGE_SIZE | 图像尺寸无效 | 检查尺寸是否超过设备限制 |
CL_OUT_OF_HOST_MEMORY | 主机内存不足 | 释放其他应用内存 |
7.8.2 调试技巧
int debug_allocation_failure(cl_context context,
cl_device_id device,
size_t requested_size,
int error_code)
{
log_error("Allocation failed with error: %d\n", error_code);
log_error("Requested size: %.2f MB\n", requested_size / (1024.0 * 1024.0));
// 查询当前内存状态
cl_ulong max_alloc, global_mem;
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(max_alloc), &max_alloc, NULL);
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(global_mem), &global_mem, NULL);
log_error("Device limits:\n");
log_error(" MAX_MEM_ALLOC_SIZE: %.2f MB\n", max_alloc / (1024.0 * 1024.0));
log_error(" GLOBAL_MEM_SIZE: %.2f MB\n", global_mem / (1024.0 * 1024.0));
if (requested_size > max_alloc)
{
log_error("Requested size exceeds MAX_MEM_ALLOC_SIZE!\n");
log_error("Try using multiple smaller allocations\n");
}
// 尝试更小的分配以找到最大可用大小
size_t test_size = requested_size / 2;
while (test_size > 1024 * 1024) // 最小 1 MB
{
cl_mem test_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
test_size, NULL, &error_code);
if (error_code == CL_SUCCESS)
{
log_info("Successful allocation at %.2f MB\n",
test_size / (1024.0 * 1024.0));
clReleaseMemObject(test_mem);
break;
}
test_size /= 2;
}
return 0;
}
本章小结
第7章详细介绍了 Allocations 测试套件的核心内容:
- 设备限制:查询
MAX_MEM_ALLOC_SIZE和GLOBAL_MEM_SIZE - Buffer 分配:单个大分配、多个小分配、阻塞/非阻塞写入
- Image 分配:尺寸计算、只读/只写图像分配
- 内存验证:随机数据填充、内核访问验证、校验和计算
- 统一内存:检测和调整策略
- 错误处理:常见错误、调试技巧
这些测试确保 OpenCL 实现能够正确处理各种内存分配场景,特别是接近设备限制时的行为。下一章将介绍 event测试。
2028

被折叠的 条评论
为什么被折叠?



