C++无锁编程——无锁栈(lock-free stack)

C++无锁编程：实现与优化无锁栈

原创

已于 2023-07-26 14:49:38 修改 · 2.4k 阅读

10 ·

CC 4.0 BY-SA版权

文章标签：

#数据结构 #c++ #多线程 #无锁编程

于 2023-06-28 10:09:43 首次发布

本文介绍了C++中实现无锁栈的几种方法，包括使用智能指针std::shared_ptr管理内存，手动管理内存以避免锁竞争，以及利用风险指针和引用计数判断节点是否可删除。文章详细讨论了内存顺序对性能的影响，并提供了相应的放宽内存顺序的代码示例。此外，还提供了一个简单的测试代码来验证无锁栈的正确性。

C++无锁编程——无锁栈(lock-free stack)

贺志国
2023.6.28

无锁数据结构意味着线程可以并发地访问数据结构而不出错。例如，一个无锁栈能同时允许一个线程压入数据，另一个线程弹出数据。不仅如此，当调度器中途挂起其中一个访问线程时，其他线程必须能够继续完成自己的工作，而无需等待挂起线程。
无锁栈一个很大的问题在于，如何在不加锁的前提下，正确地分配和释放节点的内存，同时不引起逻辑错误和程序崩溃。

一、使用智能指针`std::shared_ptr<T>`实现

一个最朴素的想法是，使用智能指针管理节点。事实上，如果平台支持std::atomic_is_lock_free(&some_shared_ptr)实现返回true，那么所有内存回收问题就都迎刃而解了（我在X86和Arm平台测试，均返回false）。示例代码（文件命名为 lock_free_stack.h）如下：

#pragma once

#include <atomic>
#include <memory>

template <typename T>
class LockFreeStack {
   
   
 public:
  LockFreeStack(): head_(nullptr) {
   
   }
  ~LockFreeStack() {
   
   
    while (Pop()) {
   
   
      // Do nothing and wait for all elements are poped.
    }
  }
  LockFreeStack(const LockFreeStack& other) = delete;
  LockFreeStack& operator=(const LockFreeStack& other) = delete;
  
  bool IsEmpty() const {
   
    return std::atomic_load(&head_) == nullptr; }

  void Push(const T& data) {
   
   
    const auto new_node = std::make_shared<Node>(data);
    new_node->next = std::atomic_load(&head_);
    // If new_node->next is the same as head_, update head_ to new_node and
    // return true.
    // If new_node->next and head_ are not equal, update new_node->next to head_
    // and return false.
    while (
        !std::atomic_compare_exchange_weak(&head_, &new_node->next, new_node)) {
   
   
      // Do nothing and wait for the head_ is updated to new_node.
    }
  }

  std::shared_ptr<T> Pop() {
   
   
    std::shared_ptr<Node> old_head = std::atomic_load(&head_);
    // If old_head is not a null pointer and it is the same as head_, update
    // head_ to old_head->next and return true.
    // If old_head is not a null pointer and it is not equal to head_, update
    // old_head to head_ and return false.
    while (old_head != nullptr &&
           !std::atomic_compare_exchange_weak(
               &head_, &old_head, std::atomic_load(&old_head->next))) {
   
   
      // Do nothing and wait for the head_ is updated to old_head->next.
    }

    if (old_head != nullptr) {
   
   
      std::atomic_store(&old_head->next, std::shared_ptr<Node>());
      return old_head->data;
    }

    return std::shared_ptr<T>();
  }
  
 private:
  struct Node {
   
   
    // std::make_shared does not throw an exception.
    Node(const T& input_data)
        : data(std::make_shared<T>(input_data)), next(nullptr) {
   
   }

    std::shared_ptr<T> data;
    std::shared_ptr<Node> next;
  };

  std::shared_ptr<Node> head_;
};

上述代码中，希望借助std::shared_ptr<>来完成节点内存的动态分配和回收，因为其有内置的引用计数机制。不幸地是，虽然std::shared_ptr<>虽然可以用于原子操作，但在大多数平台上不是无锁的，需要通过C++标准库添加内部锁来实现原子操作，这样会带来极大的性能开销，无法满足高并发访问的需求。

如果编译器支持C++20标准，std::atomic<std::shared_ptr<T>>允许用户原子地操纵 std::shared_ptr，即在确保原子操作的同时，还能正确地处理引用计数。与其他原子类型一样，其实现也不确定是否无锁。使用std::atomic<std::shared_ptr<T>>实现无锁栈（表面上看肯定无锁，实际上是否无锁取决于std::atomic<std::shared_ptr<T>>的is_lock_free函数返回值是否为true）的示例代码（文件命名为 lock_free_stack.h）如下：

#pragma once

#include <atomic>
#include <memory>

template <typename T>
class LockFreeStack {
   
   
 public:
  LockFreeStack() : head_(nullptr) {
   
   }
  ~LockFreeStack() {
   
   
    while (Pop()) {
   
   
      // Do nothing and wait for all elements are poped.
    }
  }
  LockFreeStack(const LockFreeStack& other) = delete;
  LockFreeStack& operator=(const LockFreeStack& other) = delete;

  bool IsEmpty() const {
   
    return std::atomic_load(&head_) == nullptr; }

  void Push(const T& data) {
   
   
    const auto new_node = std::make_shared<Node>(data);
    std::shared_ptr<Node> old_head = head_.load();
    new_node->next = old_head;
    // If old_head is the same as head_, update head_ to new_node and return
    // true. If old_head and head_ are not equal, update old_head to head_ and
    // return false.
    while (!head_.compare_exchange_weak(old_head, new_node)) {
   
   
      new_node->next = old_head;
    }
  }

  std::shared_ptr<T> Pop() {
   
   
    std::shared_ptr<Node> old_head = head_.load();
    // If old_head is not a null pointer and it is the same as head_, update
    // head_ to old_head->next and return true.
    // If old_head is not a null pointer and it is not equal to head_, update
    // old_head to head_ and return false.
    while (old_head != nullptr &&
           !head_.compare_exchange_weak(old_head, old_head->next.load())) {
   
   
      // Do nothing and wait for the head_ is updated to old_head->next.
    }

    if (old_head != nullptr) {
   
   
      old_head->next = std::shared_ptr<Node>();
      return old_head->data;
    }

    return std::shared_ptr<T>();
  }

 private:
  struct Node {
   
   
    // std::make_shared does not throw an exception.
    Node(const T& input_data)
        : data(std::make_shared<T>(input_data)), next(nullptr) {
   
   }

    std::shared_ptr<T> data;
    std::atomic<std::shared_ptr<Node>> next;
  };

  // Compilation error: /usr/include/c++/9/atomic:191:21: error: static
  // assertion failed: std::atomic requires a trivially copyable type
  // static_assert(__is_trivially_copyable(_Tp),
  std::atomic<std::shared_ptr<Node>> head_;
};

我的编译器目前只支持C++17标准，上述代码会出现如下编译错误：

In file included from /home/zhiguohe/code/excercise/lock_freee/lock_free_stack_with_shared_ptr_cpp/lock_free_stack_with_shared_ptr.h:3,
                 from /home/zhiguohe/code/excercise/lock_freee/lock_free_stack_with_shared_ptr_cpp/lock_free_stack_with_shared_ptr.cpp:1:
/usr/include/c++/9/atomic: In instantiation of ‘struct std::atomic<std::shared_ptr<LockFreeStack<int>::Node> >’:
/home/zhiguohe/code/excercise/lock_freee/lock_free_stack_with_shared_ptr_cpp/lock_free_stack_with_shared_ptr.h:61:38:   required from ‘class LockFreeStack<int>’
/home/zhiguohe/code/excercise/lock_freee/lock_free_stack_with_shared_ptr_cpp/lock_free_stack_with_shared_ptr.cpp:16:22:   required from here
/usr/include/c++/9/atomic:191:21: error: static assertion failed: std::atomic requires a trivially copyable type
  191 |       static_assert(__is_trivially_copyable(_Tp),
      |                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
make[2]: *** [CMakeFiles/lock_free_stack_with_shared_ptr_cpp.dir/build.make:63: CMakeFiles/lock_free_stack_with_shared_ptr_cpp.dir/lock_free_stack_with_shared_ptr.cpp.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:644: CMakeFiles/lock_free_stack_with_shared_ptr_cpp.dir/all] Error 2
make: *** [Makefile:117: all] Error 2

二、手动管理内存——使用简单的计数器判断是否存在线程调用`Pop`函数

2.1 不考虑放宽内存顺序

如果编译器不支持C++20标准，我们需要手动管理节点的内存分配和回收。一种简单的思路是，判断当前有无线程访问Pop函数，如果不存在，则删除所有弹出的节点，否则将弹出的节点存储到待删除列表to_be_deleted_中，等到最终无线程访问Pop函数时再释放to_be_deleted_。下面展示该思路的实现代码（文件命名为 lock_free_stack.h，示例来源于C++ Concurrency In Action, 2ed 2019，修复了其中的bug）：

#pragma once

#include <atomic>
#include <memory>

template <typename T>
class LockFreeStack {
   
   
 public:
   LockFreeStack()
      : head_(nullptr), to_be_deleted_(nullptr), threads_in_pop_(0) {
   
   }
  ~LockFreeStack() {
   
   
    while (Pop()) {
   
   
      // Do nothing and wait for all elements are poped.
    }
  }
  LockFreeStack(const LockFreeStack& other) = delete;
  LockFreeStack& operator=(const LockFreeStack& other) = delete;
  
  bool IsEmpty() const {
   
    return head_.load() == nullptr; }

  void Push(const T& data) {
   
   
    Node* new_node = new Node(data);
    new_node->next = head_.load();
    // If new_node->next is the same as head_, update head_ to new_node and
    // return true.
    // If new_node->next and head_ are not equal, update new_node->next to head_
    // and return false.
    while (!head_.compare_exchange_weak(new_node->next, new_node)) {
   
   
      // Do nothing and wait for the head_ is updated to new_node.
    }
  }

  std::shared_ptr<T> Pop() {
   
   
    Node* old_head = head_.load();
    // If old_head is not a null pointer and it is the same as head_, update
    // head_ to old_head->next and return true.
    // If old_head is not a null pointer and it is not equal to head_, update
    // old_head to head_ and return false.
    while (old_head != nullptr &&
           !head_.compare_exchange_weak(old_head, old_head->next)) {
   
   
      // Do nothing and wait for the head_ is updated to old_head->next.
    }

    // return old_head != nullptr ? old_head->data : std::shared_ptr<T>();

    std::shared_ptr<T> res;
    if (old_head != nullptr) {
   
   
      ++threads_in_pop_;
      res.swap(old_head->data);
      // Reclaim deleted nodes.
      TryReclaim(old_head);
    }

    return res;
  }

  ~LockFreeStack() {
   
   
    while (Pop()) {
   
   
      // Do nothing and wait for all elements are poped.
    }
  }

 private:
  // If the struct definition of Node is placed in the private data member
  // field where 'head_' is defined, the following compilation error will occur:
  //
  // error: 'Node' has not been declared ...
  //
  // It should be a bug of the compiler. The struct definition of Node is put in
  // front of the private member function `DeleteNodes` to eliminate this error.
  struct Node {
   
   
    // std::make_shared does not throw an exception.
    Node(const T& input_data)
        : data(std