借 attribute 引入 The GNU C Reference Manual

最新推荐文章于 2025-02-14 21:07:52 发布

Eloudy

最新推荐文章于 2025-02-14 21:07:52 发布

阅读量656

点赞数

分类专栏： blas cuda 并行计算文章标签： gnu c语言 p2p

本文链接：https://blog.youkuaiyun.com/eloudy/article/details/123031804

版权

blas 同时被 3 个专栏收录

65 篇文章

订阅专栏

cuda

34 篇文章

订阅专栏

并行计算

21 篇文章

订阅专栏

__attribute__ 是 GNU C 规范的一个编译期关键字；

话题文档主页：

The GNU C Reference Manual - GNU Project - Free Software Foundation

在一般的Linux中，在文件 /usr/include/crt/host_defines.h的70多行处有这么一个定义：

#define __align__(n) \
        __attribute__((aligned(n)))

在这个文件的前面几行有如下信息：

#if defined(__GNUC__) || defined(__CUDA_LIBDEVICE__) || defined(__CUDACC_RTC__)

也就是如下意思：

#if defined(__GNUC__) || defined(__CUDA_LIBDEVICE__) || defined(__CUDACC_RTC__)

    ...

#define __align__(n) \
        __attribute__((aligned(n)))

    ...

#endif

可以做两个认识：

1. CUDA 的设备代码，也就是 cuda kernel 很大程度上遵循了 GNU C 的规范，也就是基于 llvm 的 nvcc 编译器认可 GNU C规范。

2. 理清一下 __align__(n) 的用法仅限于遵循 GNU C规范的编译器，如 gcc 和 llvm；

————————————————————————————————————

再看一段安装了 cuda 驱动后的头文件中的信息：

#if defined(__CUDACC__)
#define __CUDA_ALIGN__(align) __align__(align)
#else
/* Define alignment macro based on compiler type (cannot assume C11 "_Alignas" is available) */
#if __cplusplus >= 201103L
#define __CUDA_ALIGN__(n) alignas(n)    /* C++11 kindly gives us a keyword for this */
#else /* !(__cplusplus >= 201103L)*/
#if defined(__GNUC__) /* || defined(__IBMC__) || defined(__clang__) || defined(__PGI) */
#define __CUDA_ALIGN__(n) __attribute__ ((aligned(n)))
#elif defined(_MSC_VER) /* || defined(__ICC) */
#define __CUDA_ALIGN__(n) __declspec(align(n))
#else
#define __CUDA_ALIGN__(n)
#endif /* defined(__GNUC__) */
#endif /* __cplusplus >= 201103L */
#endif /* defined(__CUDACC__) */

其中的第一行条件是定义了 __CUDACC__，与__GNUC__的一行的后续内容相同：

#if defined(__CUDACC__)
#define __CUDA_ALIGN__(align) __align__(align)


...


#if defined(__GNUC__) /* || defined(__IBMC__) || defined(__clang__) || defined(__PGI) */
#define __CUDA_ALIGN__(n) __attribute__ ((aligned(n)))

在 Linux中 /usr/include/crt/host_defines.h的定义：

#define __align__(n) \
        __attribute__((aligned(n)))

即，由上面这一行可得：

#define __CUDA_ALIGN__(align) __align__(align)

就是

#define __CUDA_ALIGN__(n) __attribute__ ((aligned(n)))

根本出处在于 GNU C的 __attribute__ 的 aligned(n) 操作。

——————————————————————————————————————————

__attribute__ ((aligned(n))) 小示例：

#include <stdio.h>

struct Score {
        short b[3];
} __attribute__ ((aligned (8)));

typedef int int32_t __attribute__ ((aligned (8)));

int main(){
        short or_B[3];
        struct Score B;

        short or_B_array[9];
        struct Score B_array[3];

        int or_C[9];
//      int32_t C[9];//C[9] can not be compiled; 
                     // as alignment of array elements is greater than element size. 
                     //sizeof(int) is 4 bytes, but aligned(8), 8>4.

printf("sizeof(or_B)=%ld, sizeof(B)=%ld, sizeof(or_B_array)=%ld, sizeof(B_array)=%ld, sizeof(or_C)=%ld\n",sizeof(or_B), sizeof(B), sizeof(or_B_array), sizeof(B_array), sizeof(or_C));

        return 0;
}

效果图：

副作用：typedef int int32_t __attribute__ ((aligned (8)));

这时无法用 int32_t 来定义数组，因为int占4个bytes，而对齐为8个bytes；编译器无法让程序按照类型和索引来寻址数组的元素。

可以用

struct int32_str{
        int x;
} __attribute__((aligned(8)));

这时，编译器会给结构体的元素按照对齐的8byte来索引这个类型的数组元素。

示例代码：

#include <stdio.h>

struct Score {
        short b[3];
} __attribute__ ((aligned (8)));

struct int32_str{
        int x;
} __attribute__((aligned(8)));

typedef int int32_t __attribute__ ((aligned (8)));

int main(){
        short or_B[3];
        struct Score B;

        short or_B_array[9];
        struct Score B_array[3];

        int or_C[9];
//      int32_t C[9];//can not be compiled;
                     //as alignment of array elements is greater than element size.
                     //sizeof(int) is 4 bytes, but aligned(8), 8>4.
        struct int32_str D_array[9];

printf(" sizeof(or_B)=%ld \n sizeof(B)=%ld \n\n"
        "sizeof(or_B_array)=%ld \n sizeof(B_array)=%ld \n\n"
        " sizeof(or_C)=%ld \n sizeof(D_array)=%ld\n\n",
        sizeof(or_B),       sizeof(B),
        sizeof(or_B_array), sizeof(B_array),
        sizeof(or_C),       sizeof(D_array));

        return 0;
}

效果图：