Windows和pthread中提供的自旋锁

Windows和POSIX中都提供了自旋锁,我们也可以通过C++11的atomic来实现自旋锁。那么两者性能上面是什么关系?先引入实现代码:

#ifndef __spinlock_h__
#define __spinlock_h__

#include <atomic>

#ifdef _WIN32

#include <Windows.h>

class spinlock_mutex
{
public:
    static constexpr DWORD SPINLOCK_COUNT = -1;
public:
    // 在初始化时,会出现资源不足的问题,这里忽略这个问题
    // 具体参考Critical Sections and Error Handling(Windows via C/C++)
    spinlock_mutex()
    {
        InitializeCriticalSectionAndSpinCount(&m_cs, SPINLOCK_COUNT);
    }

    ~spinlock_mutex()
    {
        DeleteCriticalSection(&m_cs);
    }

    void lock()
    {
        EnterCriticalSection(&m_cs);
    }

    bool try_lock()
    {
        return TryEnterCriticalSection(&m_cs) == TRUE;
    }

    void unlock()
    {
        LeaveCriticalSection(&m_cs);
    }

private:
    CRITICAL_SECTION m_cs;
};

#elif defined(_POSIX_C_SOURCE)

#include <pthread.h>

class spinlock_mutex
{
public:
    // 这里不处理可能出现的调用错误
    spinlock_mutex()
    {
        pthread_spin_init(&m_cs, PTHREAD_PROCESS_PRIVATE);
    }

    ~spinlock_mutex()
    {
        pthread_spin_destroy(&m_cs);
    }

    void lock()
    {
        pthread_spin_lock(&m_cs);
    }

    bool try_lock()
    {
        return pthread_spin_trylock(&m_cs) == 0;
    }

    void unlock()
    {
        pthread_spin_unlock(&m_cs);
    }

private:
    pthread_spinlock_t m_cs;
};

#else

class spinlock_mutex
{
    std::atomic_flag flag;
public:
    spinlock_mutex() :
        flag{ ATOMIC_FLAG_INIT }
    {}

    void lock()
    {
        while (flag.test_and_set(std::memory_order_acquire));
    }

    void unlock()
    {
        flag.clear(std::memory_order_release);
    }

    bool try_lock()
    {
        return !flag.test_and_set(std::memory_order_acquire);
    }
};

#endif


#endif    // __spinlock_h__

下面给出一个简单测试,两组线程,一组用来插入,另外一组用来取出。测试结果显示:

(1)无论是Windows,还是POSIX提供的C语言版本的自旋锁,都和C++11使用atomic构建的自旋锁效率相近。

(2)在插入线程数和取出线程数相同的情况下,线程数越多,效率越低。

下面是测试代码:

#include <memory>
#include <cassert>

#include <iostream>
#include <vector>
#include <thread>
#include <future>
#include <random>
#include <chrono>

#include "spinlock.h"
#include <forward_list>


struct student_name
{
    student_name(int age = 0)
        : age(age), next(nullptr)
    {

    }

    int age;

    student_name* next;
};



spinlock_mutex g_mtx;
std::forward_list<int> g_students;


std::atomic<int> g_inserts; // insert num (successful)
std::atomic<int> g_drops;   // drop num (successful)

std::atomic<int> g_printNum;    // as same as g_drops

std::atomic<long long> g_ageInSum;   // age sum when producing student_name
std::atomic<long long> g_ageOutSum;  // age sum when consuming student_name

std::atomic<bool> goOn(true);

constexpr int INSERT_THREAD_NUM = 1;
constexpr int DROP_THREAD_NUM = 1;

constexpr int ONE_THREAD_PRODUCE_NUM = 5000000;    // when testing, no more than this number, you know 20,000,00 * 100 * 10 ~= MAX_INT if thread num <= 10

inline void printOne(student_name* t)
{
    g_printNum.fetch_add(1, std::memory_order_relaxed);
    g_ageOutSum.fetch_add(t->age, std::memory_order_relaxed);
    g_drops.fetch_add(1, std::memory_order_relaxed);
    delete t;
}

void insert_students(int idNo)
{
    std::default_random_engine dre(time(nullptr));
    std::uniform_int_distribution<int> ageDi(1, 99);

    for (int i = 0; i < ONE_THREAD_PRODUCE_NUM; ++i)
    {
        int newAge = ageDi(dre);
        g_ageInSum.fetch_add(newAge, std::memory_order_relaxed);

        {
            std::lock_guard<spinlock_mutex> lock(g_mtx);
            g_students.push_front(newAge);
            
        }

        // use memory_order_relaxed avoiding affect folly memory order
        g_inserts.fetch_add(1, std::memory_order_relaxed);
    }
}

void drop_students(int idNo)
{
    while (auto go = goOn.load(std::memory_order_consume))
    {
        {
            std::forward_list<int> tmp;
            {
                std::lock_guard<spinlock_mutex> lock(g_mtx);
                std::swap(g_students, tmp);
            }
            auto it = tmp.begin();
            while (it != tmp.end())
            {
                g_printNum.fetch_add(1, std::memory_order_relaxed);
                g_ageOutSum.fetch_add(*it, std::memory_order_relaxed);
                g_drops.fetch_add(1, std::memory_order_relaxed);
                ++it;
            }
        }
    }
}

int main()
{
    auto start = std::chrono::system_clock::now();

    std::vector<std::future<void>> insert_threads;
    std::vector<std::future<void>> drop_threads;

    for (auto i = 0; i != INSERT_THREAD_NUM; ++i)
    {
        insert_threads.push_back(std::async(std::launch::async, insert_students, i));
    }

    for (auto i = 0; i != DROP_THREAD_NUM; ++i)
    {
        drop_threads.push_back(std::async(std::launch::async, drop_students, i));

    }

    for (auto& thread : insert_threads)
    {
        thread.get();
    }

    std::this_thread::sleep_for(std::chrono::milliseconds(1000));

    goOn.store(false, std::memory_order_release);

    for (auto& thread : drop_threads)
    {
        thread.get();
    }

    {
        std::forward_list<int> tmp;
        {
            std::lock_guard<spinlock_mutex> lock(g_mtx);
            std::swap(g_students, tmp);
        }
        auto it = tmp.begin();
        while (it != tmp.end())
        {
            g_printNum.fetch_add(1, std::memory_order_relaxed);
            g_ageOutSum.fetch_add(*it, std::memory_order_relaxed);
            g_drops.fetch_add(1, std::memory_order_relaxed);
            ++it;
        }
    }

    auto end = std::chrono::system_clock::now();
    std::chrono::duration<double> diff = end - start;
    std::cout << "Time to insert and drop is: " << diff.count() << " s\n";

    std::cout << "insert count1: " << g_inserts.load() << std::endl;
    std::cout << "drop count1: " << g_drops.load() << std::endl;
    std::cout << "print num1: " << g_printNum.load() << std::endl;

    std::cout << "age in1: " << g_ageInSum.load() << std::endl;
    std::cout << "age out1: " << g_ageOutSum.load() << std::endl;

    std::cout << std::endl;
}

关于自选锁,还有以下内容需要说明:

(1)应用层用spinlock的最大问题是不能跟kernel一样的关中断(cli/sti),假设并发稍微多点,线程1在lock之后unlock之前发生了时钟中断,
 * 一段时间后才会被切回来调用unlock,那么这段时间中另一个调用lock的线程不就得空跑while了?这才是最浪费cpu时间的地方。
 * 所以不能关中断就只能sleep了,怎么着都存在巨大的冲突代价。

(2)具体参考:https://www.zhihu.com/question/55764216

转载于:https://www.cnblogs.com/albizzia/p/8870886.html

<think>我们正在讨论Windows环境下替代pthread_mutex_t的同步机制。在Windows中,最接近的替代品是CRITICAL_SECTION互斥体(Mutex)。以下将详细说明这两种机制。 ### 1. CRITICAL_SECTION(临界区) - **特点**:轻量级,仅在同一进程的线程间同步,速度较快。 - **使用步骤**: 1. 声明:`CRITICAL_SECTION cs;` 2. 初始化:`InitializeCriticalSection(&cs);` 3. 进入临界区:`EnterCriticalSection(&cs);` 4. 离开临界区:`LeaveCriticalSection(&cs);` 5. 销毁:`DeleteCriticalSection(&cs);` - **示例代码**: ```c #include <windows.h> CRITICAL_SECTION cs; int shared_data = 0; DWORD WINAPI ThreadFunc(LPVOID lpParam) { EnterCriticalSection(&cs); shared_data++; LeaveCriticalSection(&cs); return 0; } int main() { InitializeCriticalSection(&cs); HANDLE hThreads[2]; hThreads[0] = CreateThread(NULL, 0, ThreadFunc, NULL, 0, NULL); hThreads[1] = CreateThread(NULL, 0, ThreadFunc, NULL, 0, NULL); WaitForMultipleObjects(2, hThreads, TRUE, INFINITE); DeleteCriticalSection(&cs); return 0; } ``` ### 2. Mutex(互斥体) - **特点**:重量级,支持跨进程同步,但速度相对较慢。 - **使用步骤**: 1. 创建:`CreateMutex(NULL, FALSE, NULL);` // 第二个参数为初始拥有权,FALSE表示创建后不立即拥有 2. 等待获取:`WaitForSingleObject(hMutex, INFINITE);` 3. 释放:`ReleaseMutex(hMutex);` 4. 关闭句柄:`CloseHandle(hMutex);` - **示例代码**: ```c #include <windows.h> HANDLE hMutex; int shared_data = 0; DWORD WINAPI ThreadFunc(LPVOID lpParam) { WaitForSingleObject(hMutex, INFINITE); // 等待互斥体 shared_data++; ReleaseMutex(hMutex); // 释放互斥体 return 0; } int main() { hMutex = CreateMutex(NULL, FALSE, NULL); HANDLE hThreads[2]; hThreads[0] = CreateThread(NULL, 0, ThreadFunc, NULL, 0, NULL); hThreads[1] = CreateThread(NULL, 0, ThreadFunc, NULL, 0, NULL); WaitForMultipleObjects(2, hThreads, TRUE, INFINITE); CloseHandle(hMutex); return 0; } ``` ### 3. 选择建议 - **CRITICAL_SECTION**:适用于同一进程内线程同步,性能高[^1]。 - **Mutex**:需要跨进程同步时使用,或需要等待超时功能时(`WaitForSingleObject`支持超时设置)。 ### 4. 与pthread_mutex_t的对比 | 特性 | pthread_mutex_t | Windows CRITICAL_SECTION | Windows Mutex | |-------------------|----------------------|---------------------------|---------------| | 进程内同步 | 支持 | 支持 | 支持 | | 跨进程同步 | 不支持 | 不支持 | 支持 | | 递归 | 支持(通过属性设置) | 支持(默认) | 支持 | | 性能 | 较高 | 高 | 较低 | | 超时等待 | 支持`pthread_mutex_timedlock` | 不支持(需自旋) | 支持`WaitForSingleObject`超时参数 | ### 5. 补充:Slim读写Windows Vista+) 对于读写场景,Windows提供了`SRWLOCK`(轻量读写): ```c #include <windows.h> SRWLOCK srwLock; // 初始化(无需销毁) InitializeSRWLock(&srwLock); // 获取共享(读) AcquireSRWLockShared(&srwLock); // ... 读操作 ... ReleaseSRWLockShared(&srwLock); // 获取独占(写) AcquireSRWLockExclusive(&srwLock); // ... 写操作 ... ReleaseSRWLockExclusive(&srwLock); ``` > 📌 **注意**:`SRWLOCK`不支持递归加,且无销毁函数。 ### 总结 在Windows环境下: - 优先使用`CRITICAL_SECTION`替代进程内互斥[^2]。 - 需要跨进程时使用`Mutex`。 - 读写场景考虑`SRWLOCK`(Vista及以上系统)。 §§ 相关问题 §§ 1. Windows中如何实现条件变量同步? 2. CRITICAL_SECTIONMutex在性能上有多少差异? 3. 如何在Windows中模拟pthread的递归?</think>### Windows 环境下替代 `pthread_mutex_t` 的同步机制 在 Windows 平台中,替代 POSIX `pthread_mutex_t` 的主要同步结构体是 **CRITICAL_SECTION**(临界区) **Mutex**(互斥体)。以下是详细用法对比: --- #### 一、轻量级替代:CRITICAL_SECTION(临界区) **特点**:仅适用于**同一进程内**的线程同步,性能高,不跨进程。 ```c #include <windows.h> // 声明初始化 CRITICAL_SECTION cs; InitializeCriticalSection(&cs); // 相当于 pthread_mutex_init // 加 EnterCriticalSection(&cs); // 相当于 pthread_mutex_lock // 临界区代码 // ... // 解 LeaveCriticalSection(&cs); // 相当于 pthread_mutex_unlock // 销毁 DeleteCriticalSection(&cs); // 相当于 pthread_mutex_destroy ``` > **适用场景**:进程内线程同步,如全局变量保护[^1]。 --- #### 二、跨进程替代:Mutex(互斥体) **特点**:支持**跨进程**同步,通过命名互斥体实现系统级资源保护。 ```c #include <windows.h> // 创建或打开互斥体 HANDLE hMutex = CreateMutex( NULL, // 安全属性 FALSE, // 初始是否拥有(FALSE=不拥有) L"Global\\MyMutex" // 命名(跨进程唯一标识) ); // 等待获取(阻塞式) DWORD waitResult = WaitForSingleObject(hMutex, INFINITE); if (waitResult == WAIT_OBJECT_0) { // 临界区代码 // ... // 释放 ReleaseMutex(hMutex); } // 销毁 CloseHandle(hMutex); ``` > **适用场景**:多进程共享资源保护(如共享内存)[^2]。 --- ### 功能对比表 | 特性 | `pthread_mutex_t` (Linux) | `CRITICAL_SECTION` (Windows) | `Mutex` (Windows) | |----------------------|---------------------------|------------------------------|------------------| | **进程内同步** | ✅ | ✅ | ✅ | | **跨进程同步** | ❌ | ❌ | ✅ | | **递归支持** | ✅(需设置属性) | ✅(默认) | ✅ | | **超时等待** | ✅ (`pthread_mutex_timedlock`) | ❌ | ✅ (`WaitForSingleObject` 支持超时参数) | | **性能** | 高 | 高(比Mutex快5-10倍) | 中 | --- ### 关键注意事项 1. **递归实现**: ```c // Windows 临界区默认支持递归加(同一线程多次进入不会死) EnterCriticalSection(&cs); EnterCriticalSection(&cs); // 允许 LeaveCriticalSection(&cs); LeaveCriticalSection(&cs); ``` 2. **错误处理**: ```c if (WaitForSingleObject(hMutex, 1000) == WAIT_TIMEOUT) { // 处理超时逻辑 } ``` 3. **替代读写**: - 使用 `SRWLOCK`(Windows Vista+): ```c SRWLOCK srwLock; InitializeSRWLock(&srwLock); AcquireSRWLockExclusive(&srwLock); // 写 ReleaseSRWLockExclusive(&srwLock); ``` --- ### 完整示例(CRITICAL_SECTION) ```c #include <windows.h> #include <stdio.h> CRITICAL_SECTION cs; int counter = 0; DWORD WINAPI ThreadFunc(LPVOID lpParam) { for (int i = 0; i < 100000; i++) { EnterCriticalSection(&cs); counter++; LeaveCriticalSection(&cs); } return 0; } int main() { InitializeCriticalSection(&cs); HANDLE threads[2]; threads[0] = CreateThread(NULL, 0, ThreadFunc, NULL, 0, NULL); threads[1] = CreateThread(NULL, 0, ThreadFunc, NULL, 0, NULL); WaitForMultipleObjects(2, threads, TRUE, INFINITE); printf("Counter: %d\n", counter); // 正确输出200000 DeleteCriticalSection(&cs); return 0; } ``` > 💡 **最佳实践**:优先使用 `CRITICAL_SECTION` 进行进程内同步,仅在需要跨进程时使用 `Mutex`[^1][^2]。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值