CUDA纹理对象与引用详解-优快云博客

纹理对象API：
纹理对象是使用cudaCreateTextureObject（）从指定纹理的struct cudaResourceDesc类型的资源描述中创建的，也可以是从如此定义的纹理描述中创建的：

struct cudaTextureDesc
{
    enum cudaTextureAddressMode addressMode[3];
    enum cudaTextureFilterMode filterMode;
    enum cudaTextureReadMode readMode;
    int sRGB;
    int normalizedCoords;
    unsigned int maxAnisotropy;
    enum cudaTextureFilterMode mipmapFilterMode;
    float mipmapLevelBias;
    float minMipmapLevelClamp;
    float maxMipmapLevelClamp;
};

addressMode指定寻址模式;
filterMode指定过滤模式;
readMode指定读取模式;
normalizedCoords指定纹理坐标是否标准化;
请参阅sRGB，maxAnisotropy，mipmapFilterMode，mipmapLevelBias，minMipmapLevelClamp和maxMipmapLevelClamp的参考手册。
以下代码示例将一些简单的转换内核应用于纹理：

// Simple transformation kernel
__global__ void transformKernel(float* output,
    cudaTextureObject_t texObj,
    int width, int height,
    float theta)
{
    // Calculate normalized texture coordinates
    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
    float u = x / (float)width;
    float v = y / (float)height;
    // Transform coordinates
    u -= 0.5f;
    v -= 0.5f;
    float tu = u * cosf(theta) - v * sinf(theta) + 0.5f;
    float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;
    // Read from texture and write to global memory
    output[y * width + x] = tex2D<float>(texObj, tu, tv);
}
// Host code
int main()
{
    // Allocate CUDA array in device memory
    cudaChannelFormatDesc channelDesc =
        cudaCreateChannelDesc(32, 0, 0, 0,
            cudaChannelFormatKindFloat);
    cudaArray* cuArray;
    cudaMallocArray(&cuArray, &channelDesc, width, height);
    // Copy to device memory some data located at address h_data
    // in host memory
    cudaMemcpyToArray(cuArray, 0, 0, h_data, size,
        cudaMemcpyHostToDevice);
    // Specify texture
    struct cudaResourceDesc resDesc;
    memset(&resDesc, 0, sizeof(resDesc));
    resDesc.resType = cudaResourceTypeArray;
    resDesc.res.array.array = cuArray;
    // Specify texture object parameters
    struct cudaTextureDesc texDesc;
    memset(&texDesc, 0, sizeof(texDesc));
    texDesc.addressMode[0] = cudaAddressModeWrap;
    texDesc.addressMode[1] = cudaAddressModeWrap;
    texDesc.filterMode = cudaFilterModeLinear;
    texDesc.readMode = cudaReadModeElementType;
    texDesc.normalizedCoords = 1;
    // Create texture object
    cudaTextureObject_t texObj = 0;
    cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
    // Allocate result of transformation in device memory
    float* output;
    cudaMalloc(&output, width * height * sizeof(float));
    // Invoke kernel
    dim3 dimBlock(16, 16);
    dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x,
        (height + dimBlock.y - 1) / dimBlock.y);
    transformKernel << <dimGrid, dimBlock >> >(output,
        texObj, width, height,
        angle);
    // Destroy texture object
    cudaDestroyTextureObject(texObj);
    // Free device memory
    cudaFreeArray(cuArray);
    cudaFree(output);
    return 0;
}

纹理参考API：
纹理引用的一些属性是不可变的，在编译时必须知道; 它们在声明纹理参考时被指定。纹理引用在文件范围内被声明为纹理类型的变量：

texture<DataType, Type, ReadMode> texRef;

当：

DataType指定纹理的类型;
Type指定纹理参考的类型，对于一维，二维或三维纹理，分别等于cudaTextureType1D，cudaTextureType2D或cudaTextureType3D，或等于一维或二维分层纹理的cudaTextureType1DLayered或cudaTextureType2DLayered 分别; Type是一个可选的参数，默认为cudaTextureType1D;
ReadMode指定读取模式; 它是一个可选参数，默认为cudaReadModeElementType

纹理引用只能被声明为静态全局变量，不能作为参数传递给函数。
纹理引用的其他属性是可变的，可以在运行时通过主机运行时更改。如参考手册中所述，运行时API具有低级别C风格界面和高级C ++风格界面。纹理类型在高级API中定义为从低级API中定义的textureReference类型公开派生的结构，如下所示：

struct textureReference {
    int normalized;
    enum cudaTextureFilterMode filterMode;
    enum cudaTextureAddressMode addressMode[3];
    struct cudaChannelFormatDesc channelDesc;
    int sRGB;
    unsigned int maxAnisotropy;
    enum cudaTextureFilterMode mipmapFilterMode;
    float mipmapLevelBias;
    float minMipmapLevelClamp;
    float maxMipmapLevelClamp;
}

normalized指定纹理坐标是否标准化;
filterMode指定过滤模式;
addressMode指定寻址模式;
channelDesc描述纹理的格式; 它必须匹配纹理引用声明的DataType参数; channelDesc是以下类型的：

struct cudaChannelFormatDesc {
    int x, y, z, w;
    enum cudaChannelFormatKind f;
};

其中x，y，z和w等于返回值的每个分量的位数，f是：

cudaChannelFormatKindSigned如果这些组件是有符号的整型，
如果它们是无符号整数类型，则为cudaChannelFormatKindUnsigned，
如果它们是浮点类型，则为cudaChannelFormatKindFloat

请参阅sRGB，maxAnisotropy，mipmapFilterMode，mipmapLevelBias，minMipmapLevelClamp和maxMipmapLevelClamp的参考手册。

normalized，addressMode和filterMode可以在主机代码中直接修改。在内核可以使用纹理参考从纹理存储器读取之前，必须使用cudaBindTexture（）或cudaBindTexture2D（）将纹理参考绑定到线性存储器，或者 CUDA数组的cudaBindTextureToArray（）。 cudaUnbindTexture（）用于取消绑定纹理参考。一旦纹理引用被解除绑定，即使使用先前绑定的纹理的内核还没有完成，也可以安全地将其重新引导到另一个数组。建议使用cudaMallocPitch（）在线性内存中分配二维纹理，并使用cudaMallocPitch（）返回的间距作为cudaBindTexture2D（）的输入参数。
以下代码示例将2D纹理引用绑定到由devPtr指向的线性内存：
使用低级API：

texture<float, cudaTextureType2D,
    cudaReadModeElementType> texRef;
textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr, &texRef);
cudaChannelFormatDesc channelDesc =
cudaCreateChannelDesc<float>();
size_t offset;
cudaBindTexture2D(&offset, texRefPtr, devPtr, &channelDesc,
    width, height, pitch);

使用高级API：

texture<float, cudaTextureType2D,
    cudaReadModeElementType> texRef;
cudaChannelFormatDesc channelDesc =
cudaCreateChannelDesc<float>();
size_t offset;
cudaBindTexture2D(&offset, texRef, devPtr, channelDesc,
    width, height, pitch);

以下代码示例将2D纹理引用绑定到CUDA数组cuArray：
使用低级API：

texture<float, cudaTextureType2D,
    cudaReadModeElementType> texRef;
textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr, &texRef);
cudaChannelFormatDesc channelDesc;
cudaGetChannelDesc(&channelDesc, cuArray);
cudaBindTextureToArray(texRef, cuArray, &channelDesc);

使用高级API：

texture<float, cudaTextureType2D,
    cudaReadModeElementType> texRef;
cudaBindTextureToArray(texRef, cuArray);

将纹理绑定到纹理参考时指定的格式必须与声明纹理参考时指定的参数相匹配; 否则，纹理提取的结果是不确定的。
如表所示，可以绑定到内核的纹理数量是有限制的

以下代码示例将一些简单的转换内核应用于纹理。

// 2D float texture
texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
// Simple transformation kernel
__global__ void transformKernel(float* output,
    int width, int height,
    float theta)
{
    // Calculate normalized texture coordinates
    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
    float u = x / (float)width;
    float v = y / (float)height;
    // Transform coordinates
    u -= 0.5f;
    v -= 0.5f;
    float tu = u * cosf(theta) - v * sinf(theta) + 0.5f;
    float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;
    // Read from texture and write to global memory
    output[y * width + x] = tex2D(texRef, tu, tv);
}
// Host code
int main()
{
    // Allocate CUDA array in device memory
    cudaChannelFormatDesc channelDesc =
        cudaCreateChannelDesc(32, 0, 0, 0,
            cudaChannelFormatKindFloat);
    cudaArray* cuArray;
    cudaMallocArray(&cuArray, &channelDesc, width, height);
    // Copy to device memory some data located at address h_data
    // in host memory
    cudaMemcpyToArray(cuArray, 0, 0, h_data, size,
        cudaMemcpyHostToDevice);
    // Set texture reference parameters
    texRef.addressMode[0] = cudaAddressModeWrap;
    texRef.addressMode[1] = cudaAddressModeWrap;
    texRef.filterMode = cudaFilterModeLinear;
    texRef.normalized = true;
    // Bind the array to the texture reference
    cudaBindTextureToArray(texRef, cuArray, channelDesc);
    // Allocate result of transformation in device memory
    float* output;
    cudaMalloc(&output, width * height * sizeof(float));
    // Invoke kernel
    dim3 dimBlock(16, 16);
    dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x,
        (height + dimBlock.y - 1) / dimBlock.y);
    transformKernel << <dimGrid, dimBlock >> >(output, width, height,
        angle);
    // Free device memory
    cudaFreeArray(cuArray);
    cudaFree(output);
    return 0;
}

v2_b3e48725bdfbdcfc7b4f3ec994ef012b_hd