1、2线程中，无竞争的原子操作的性能_线程竞争原子操作-优快云博客

本文链接：https://blog.youkuaiyun.com/superzmy/article/details/106267287

#include <windows.h>
#include <thread>


	enum {Count = 40000000};
 	union 
    {
        volatile long valueA;
        char _1[64];
    };

    union
    {
        volatile long valueB;
        char _2[64];
    };

    union
    {
        volatile long valueC;
        char _3[64];
    };
    for (int j = 0; j < 10; ++j)
    {
        if (j == 3) //第3+1轮开启一个读线程读A
        {
            puts("以下开启线程 read A-------------------");
            std::thread
            ([pA = &valueA, pB = &valueB, pC = &valueC]()
                {
                    volatile long v;
                    while (true)
                    {
                        v = *pA;
                        for (int i = 0; i < 1; ++i)
                            _mm_pause();
                        Sleep(0);
                    }
                }).detach();
        }
        else if (j == 6) //第6+1轮再开启一个读线程读B
        {
            puts("以下开启线程 read B-------------------");
            std::thread
            ([pA = &valueA, pB = &valueB, pC = &valueC]()
                {
                    volatile long v;
                    while (true)
                    {
                        v = *pB;
                        for (int i = 0; i < 1; ++i)
                            _mm_pause();
                        Sleep(0);
                    }
                }).detach();
        }

        {
            uint32_t t = GetTickCount();
            for (int i = 0; i < Count; ++i)
            {
                InterlockedIncrement(&valueA);
            }
            t = GetTickCount() - t;
            printf("同一个变量          {incA} =              %dM组/s,  *1=%dM/s\n", Count / t / 1000, 1 * Count / t / 1000);
        }

        {
            uint32_t t = GetTickCount();
            for (int i = 0; i < Count; ++i)
            {
                InterlockedIncrement(&valueA);
                InterlockedDecrement(&valueA);
            }
            t = GetTickCount() - t;
            printf("同一个变量          {incA, decA} =         %dM组/s,  *2=%dM/s\n", Count / t / 1000, 2 * Count / t / 1000);
        }

        {
            uint32_t t = GetTickCount();
            for (int i = 0; i < Count; ++i)
            {
                InterlockedIncrement(&valueA);
                InterlockedIncrement(&valueB);
            }
            t = GetTickCount() - t;
            printf("不同cacheline变量   {incA, incB } =        %dM组/s,  *2=%dM/s\n", Count / t / 1000, 2 * Count / t / 1000);
        }

        {
            uint32_t t = GetTickCount();
            for (int i = 0; i < Count; ++i)
            {
                InterlockedIncrement(&valueA);
                InterlockedIncrement(&valueB);
                InterlockedIncrement(&valueC);
            }
            t = GetTickCount() - t;
            printf("不同cacheline变量   {incA, incB, incC } =  %dM组/s,  *3=%dM/s\n", Count / t / 1000, 3 * Count / t / 1000);
        }

        puts("");
    }

同一个变量 {incA} = 150M组/s, *1=150M/s
同一个变量 {incA, decA} = 75M组/s, *2=150M/s
不同cacheline变量 {incA, incB } = 88M组/s, *2=176M/s
不同cacheline变量 {incA, incB, incC } = 49M组/s, *3=147M/s

同一个变量 {incA} = 160M组/s, *1=160M/s
同一个变量 {incA, decA} = 77M组/s, *2=155M/s
不同cacheline变量 {incA, incB } = 88M组/s, *2=176M/s
不同cacheline变量 {incA, incB, incC } = 49M组/s, *3=147M/s

同一个变量 {incA} = 160M组/s, *1=160M/s
同一个变量 {incA, decA} = 77M组/s, *2=155M/s
不同cacheline变量 {incA, incB } = 91M组/s, *2=183M/s
不同cacheline变量 {incA, incB, incC } = 49M组/s, *3=147M/s

以下开启线程 read A-------------------
同一个变量 {incA} = 121M组/s, *1=121M/s
同一个变量 {incA, decA} = 64M组/s, *2=128M/s
不同cacheline变量 {incA, incB } = 71M组/s, *2=142M/s
不同cacheline变量 {incA, incB, incC } = 39M组/s, *3=118M/s

同一个变量 {incA} = 128M组/s, *1=128M/s
同一个变量 {incA, decA} = 61M组/s, *2=122M/s
不同cacheline变量 {incA, incB } = 71M组/s, *2=142M/s
不同cacheline变量 {incA, incB, incC } = 41M组/s, *3=124M/s

同一个变量 {incA} = 121M组/s, *1=121M/s
同一个变量 {incA, decA} = 61M组/s, *2=122M/s
不同cacheline变量 {incA, incB } = 73M组/s, *2=146M/s
不同cacheline变量 {incA, incB, incC } = 41M组/s, *3=124M/s

以下开启线程 read B-------------------
同一个变量 {incA} = 121M组/s, *1=121M/s
同一个变量 {incA, decA} = 64M组/s, *2=128M/s
不同cacheline变量 {incA, incB } = 64M组/s, *2=128M/s
不同cacheline变量 {incA, incB, incC } = 37M组/s, *3=111M/s

同一个变量 {incA} = 128M组/s, *1=128M/s
同一个变量 {incA, decA} = 62M组/s, *2=125M/s
不同cacheline变量 {incA, incB } = 64M组/s, *2=128M/s
不同cacheline变量 {incA, incB, incC } = 36M组/s, *3=108M/s

同一个变量 {incA} = 121M组/s, *1=121M/s
同一个变量 {incA, decA} = 64M组/s, *2=128M/s
不同cacheline变量 {incA, incB } = 64M组/s, *2=128M/s
不同cacheline变量 {incA, incB, incC } = 37M组/s, *3=111M/s

同一个变量 {incA} = 121M组/s, *1=121M/s
同一个变量 {incA, decA} = 62M组/s, *2=125M/s
不同cacheline变量 {incA, incB } = 64M组/s, *2=128M/s
不同cacheline变量 {incA, incB, incC } = 34M组/s, *3=103M/s

此外，此测试进程，开两份时与一份几乎无差异，开三份时数字也有一份的2/3以上。