C++死锁问题详解
1. 死锁基本概念
1.1 什么是死锁
死锁是指两个或多个线程在执行过程中,因争夺资源而造成的一种互相等待的现象,若无外力作用,它们都将无法继续执行。
#include <iostream>
#include <thread>
#include <mutex>
class BasicDeadlock {
std::mutex mutex1, mutex2;
public:
void thread1_work() {
std::lock_guard<std::mutex> lock1(mutex1);
std::cout << "Thread 1 acquired mutex1" << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::lock_guard<std::mutex> lock2(mutex2); // 等待mutex2
std::cout << "Thread 1 acquired mutex2" << std::endl;
}
void thread2_work() {
std::lock_guard<std::mutex> lock2(mutex2);
std::cout << "Thread 2 acquired mutex2" << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::lock_guard<std::mutex> lock1(mutex1); // 等待mutex1
std::cout << "Thread 2 acquired mutex1" << std::endl;
}
};
void demonstrate_basic_deadlock() {
BasicDeadlock example;
std::thread t1([&] { example.thread1_work(); });
std::thread t2([&] { example.thread2_work(); });
t1.join();
t2.join(); // 可能永远阻塞在这里
}
1.2 死锁的四个必要条件
- 互斥条件:资源不能被共享,只能由一个线程使用
- 请求与保持条件:线程持有至少一个资源,并等待获取其他资源
- 不可剥夺条件:资源只能由持有者释放,不能被强制剥夺
- 循环等待条件:存在一个线程-资源的循环等待链
2. 常见的死锁场景
2.1 锁顺序不一致
class LockOrderDeadlock {
std::mutex account_mutex;
std::mutex log_mutex;
double balance = 1000.0;
public:
// 方法1:先锁账户,再锁日志
void withdraw(double amount) {
std::lock_guard<std::mutex> account_lock(account_mutex);
// 模拟一些处理时间
std::this_thread::sleep_for(std::chrono::milliseconds(10));
std::lock_guard<std::mutex> log_lock(log_mutex);
if (balance >= amount) {
balance -= amount;
std::cout << "Withdrew " << amount << ", balance: " << balance << std::endl;
}
}
// 方法2:先锁日志,再锁账户 - 死锁风险!
void audit() {
std::lock_guard<std::mutex> log_lock(log_mutex);
// 模拟一些处理时间
std::this_thread::sleep_for(std::chrono::milliseconds(10));
std::lock_guard<std::mutex> account_lock(account_mutex);
std::cout << "Audit: balance is " << balance << std::endl;
}
};
2.2 递归锁误用
class RecursiveLockIssue {
std::recursive_mutex mutex;
public:
void method_a() {
std::lock_guard<std::recursive_mutex> lock(mutex);
method_b(); // 直接调用另一个需要锁的方法
std::cout << "Method A completed" << std::endl;
}
void method_b() {
std::lock_guard<std::recursive_mutex> lock(mutex); // 同一个线程,可以重复获取
// 但如果是不同的锁,就会死锁
std::cout << "Method B completed" << std::endl;
}
// 危险的模式:不同锁的递归调用
void dangerous_method(std::mutex& other_mutex) {
std::lock_guard<std::recursive_mutex> lock1(mutex);
std::lock_guard<std::mutex> lock2(other_mutex); // 可能死锁
}
};
2.3 回调函数中的死锁
class CallbackDeadlock {
std::mutex mutex;
std::vector<std::function<void()>> callbacks;
public:
void register_callback(std::function<void()> callback) {
std::lock_guard<std::mutex> lock(mutex);
callbacks.push_back(callback);
}
void notify_all() {
std::lock_guard<std::mutex> lock(mutex);
for (auto& callback : callbacks) {
// 危险:回调函数可能再次调用register_callback
callback(); // 可能导致死锁
}
}
void problematic_usage() {
register_callback([this] {
// 这个回调试图再次获取同一个锁
register_callback([] {
std::cout << "Nested callback" << std::endl;
});
});
notify_all(); // 死锁!
}
};
2.4 条件变量与死锁
class ConditionVariableDeadlock {
std::mutex mutex;
std::condition_variable cv;
bool data_ready = false;
std::queue<int> data_queue;
public:
void producer() {
for (int i = 0; i < 10; ++i) {
std::unique_lock<std::mutex> lock(mutex);
// 如果队列已满,等待消费者(但这里缺少实际的满条件检查)
data_queue.push(i);
data_ready = true;
cv.notify_one();
// 忘记释放锁,或者在不合适的时候持有锁
std::this_thread::sleep_for(std::chrono::milliseconds(100));
// 锁仍然被持有,消费者无法获取
}
}
void consumer() {
while (true) {
std::unique_lock<std::mutex> lock(mutex);
cv.wait(lock, [this] { return data_ready; });
if (!data_queue.empty()) {
int value = data_queue.front();
data_queue.pop();
std::cout << "Consumed: " << value << std::endl;
}
if (data_queue.empty()) {
data_ready = false;
}
}
}
};
3. 死锁检测方法
3.1 运行时死锁检测
#include <stack>
#include <unordered_set>
class DeadlockDetector {
static thread_local std::stack<std::mutex*> lock_stack;
static std::mutex detector_mutex;
static std::unordered_set<std::mutex*> all_locks;
public:
class LockTracker {
std::mutex* mutex_ptr;
public:
explicit LockTracker(std::mutex& mutex) : mutex_ptr(&mutex) {
std::lock_guard<std::mutex> lock(detector_mutex);
// 检查是否会导致死锁
for (auto* held_lock : all_locks) {
if (held_lock == mutex_ptr) {
// 同一个线程重复获取同一个锁 - 正常情况
continue;
}
// 检查循环等待(简化版本)
if (is_in_lock_stack(held_lock)) {
std::cerr << "POTENTIAL DEADLOCK DETECTED!" << std::endl;
print_lock_stack();
}
}
all_locks.insert(mutex_ptr);
lock_stack.push(mutex_ptr);
}
~LockTracker() {
std::lock_guard<std::mutex> lock(detector_mutex);
if (!lock_stack.empty() && lock_stack.top() == mutex_ptr) {
lock_stack.pop();
}
all_locks.erase(mutex_ptr);
}
private:
static bool is_in_lock_stack(std::mutex* mutex) {
// 简化实现
std::stack<std::mutex*> temp = lock_stack;
while (!temp.empty()) {
if (temp.top() == mutex) return true;
temp.pop();
}
return false;
}
static void print_lock_stack() {
std::stack<std::mutex*> temp = lock_stack;
std::cerr << "Lock stack: ";
while (!temp.empty()) {
std::cerr << temp.top() << " -> ";
temp.pop();
}
std::cerr << "current" << std::endl;
}
};
};
// 使用包装的锁
class InstrumentedMutex {
std::mutex real_mutex;
public:
void lock() {
DeadlockDetector::LockTracker tracker(real_mutex);
real_mutex.lock();
}
void unlock() {
real_mutex.unlock();
}
};
3.2 静态分析工具
# 使用Clang静态分析器
clang --analyze -Xanalyzer -analyzer-output=text program.cpp
# 使用ThreadSanitizer
g++ -fsanitize=thread -g -O1 program.cpp -o program
# 使用Helgrind (Valgrind)
valgrind --tool=helgrind ./program
4. 死锁预防解决方案
4.1 锁顺序策略
class LockHierarchy {
// 定义锁的层次级别
enum LockLevel {
LEVEL_LOW = 1,
LEVEL_MEDIUM = 2,
LEVEL_HIGH = 3
};
class HierarchicalMutex {
std::mutex internal_mutex;
const LockLevel level;
static thread_local LockLevel current_level;
public:
explicit HierarchicalMutex(LockLevel lvl) : level(lvl) {}
void lock() {
if (level >= current_level) {
throw std::logic_error("Lock hierarchy violation!");
}
internal_mutex.lock();
current_level = level;
}
void unlock() {
current_level = LEVEL_HIGH; // 重置到最高级别
internal_mutex.unlock();
}
};
HierarchicalMutex low_mutex{LEVEL_LOW};
HierarchicalMutex medium_mutex{LEVEL_MEDIUM};
HierarchicalMutex high_mutex{LEVEL_HIGH};
public:
void correct_operation1() {
std::lock_guard<HierarchicalMutex> lock1(high_mutex); // 级别3
std::lock_guard<HierarchicalMutex> lock2(medium_mutex); // 级别2 - 允许
std::lock_guard<HierarchicalMutex> lock3(low_mutex); // 级别1 - 允许
// 操作...
}
void incorrect_operation() {
std::lock_guard<HierarchicalMutex> lock1(low_mutex); // 级别1
// 下一行会抛出异常,因为试图获取更高级别的锁
// std::lock_guard<HierarchicalMutex> lock2(high_mutex); // 错误!
}
};
thread_local LockHierarchy::LockLevel LockHierarchy::HierarchicalMutex::current_level =
LockHierarchy::LEVEL_HIGH;
4.2 同时获取多个锁
class SimultaneousLock {
std::mutex mutex1, mutex2, mutex3;
public:
// 方法1:使用std::lock同时获取多个锁
void operation_with_std_lock() {
std::unique_lock<std::mutex> lock1(mutex1, std::defer_lock);
std::unique_lock<std::mutex> lock2(mutex2, std::defer_lock);
std::unique_lock<std::mutex> lock3(mutex3, std::defer_lock);
// 原子性地获取所有锁,避免死锁
std::lock(lock1, lock2, lock3);
// 所有锁都已获取,安全操作
std::cout << "All locks acquired safely" << std::endl;
}
// 方法2:使用C++17的scoped_lock
void operation_with_scoped_lock() {
std::scoped_lock lock(mutex1, mutex2, mutex3); // C++17
// 自动同时获取所有锁
std::cout << "All locks acquired with scoped_lock" << std::endl;
}
// 方法3:定义锁的获取顺序
void operation_with_defined_order() {
// 总是按照固定的顺序获取锁
std::lock_guard<std::mutex> lock1(mutex1);
std::lock_guard<std::mutex> lock2(mutex2);
std::lock_guard<std::mutex> lock3(mutex3);
std::cout << "Locks acquired in fixed order" << std::endl;
}
};
4.3 超时和尝试锁
class TimeoutLock {
std::timed_mutex mutex1, mutex2;
public:
bool try_operation(std::chrono::milliseconds timeout) {
// 尝试获取第一个锁
std::unique_lock<std::timed_mutex> lock1(mutex1, std::defer_lock);
if (!lock1.try_lock_for(timeout)) {
std::cout << "Failed to acquire mutex1 within timeout" << std::endl;
return false;
}
// 尝试获取第二个锁
std::unique_lock<std::timed_mutex> lock2(mutex2, std::defer_lock);
if (!lock2.try_lock_for(timeout)) {
std::cout << "Failed to acquire mutex2 within timeout" << std::endl;
// 自动释放lock1(RAII)
return false;
}
// 成功获取两个锁
perform_critical_work();
return true;
}
bool try_all_locks_simultaneously() {
// 尝试立即获取所有锁
std::unique_lock<std::timed_mutex> lock1(mutex1, std::try_to_lock);
std::unique_lock<std::timed_mutex> lock2(mutex2, std::try_to_lock);
if (lock1.owns_lock() && lock2.owns_lock()) {
perform_critical_work();
return true;
}
// 如果无法获取所有锁,立即放弃
std::cout << "Could not acquire all locks, backing off..." << std::endl;
return false;
}
private:
void perform_critical_work() {
std::cout << "Performing critical work..." << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(50));
}
};
4.4 无锁编程
#include <atomic>
class LockFreeCounter {
std::atomic<int> value{0};
public:
void increment() {
// 无锁操作,不会死锁
value.fetch_add(1, std::memory_order_relaxed);
}
int get() const {
return value.load(std::memory_order_acquire);
}
bool compare_and_swap(int expected, int new_value) {
return value.compare_exchange_weak(expected, new_value,
std::memory_order_acq_rel,
std::memory_order_acquire);
}
};
// 无锁队列示例
template<typename T>
class LockFreeQueue {
private:
struct Node {
T data;
std::atomic<Node*> next;
Node(const T& data) : data(data), next(nullptr) {}
};
std::atomic<Node*> head;
std::atomic<Node*> tail;
public:
LockFreeQueue() {
Node* dummy = new Node(T{});
head.store(dummy);
tail.store(dummy);
}
~LockFreeQueue() {
while (Node* old_head = head.load()) {
head.store(old_head->next);
delete old_head;
}
}
void push(const T& data) {
Node* new_node = new Node(data);
Node* old_tail = tail.load();
while (true) {
Node* next = old_tail->next.load();
if (!next) {
if (old_tail->next.compare_exchange_weak(next, new_node)) {
tail.compare_exchange_weak(old_tail, new_node);
return;
}
} else {
tail.compare_exchange_weak(old_tail, next);
}
}
}
bool pop(T& result) {
Node* old_head = head.load();
while (true) {
Node* next = old_head->next.load();
if (!next) return false;
if (head.compare_exchange_weak(old_head, next)) {
result = std::move(next->data);
delete old_head;
return true;
}
}
}
};
5. 死锁恢复策略
5.1 死锁检测和恢复
class DeadlockRecovery {
std::mutex mutex1, mutex2;
std::atomic<bool> shutdown_requested{false};
public:
bool cooperative_operation_with_timeout() {
auto start_time = std::chrono::steady_clock::now();
const auto timeout = std::chrono::seconds(5);
while (!shutdown_requested) {
if (try_operation_with_backoff()) {
return true;
}
if (std::chrono::steady_clock::now() - start_time > timeout) {
std::cout << "Operation timeout, possible deadlock detected" << std::endl;
recover_from_deadlock();
return false;
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
return false;
}
void request_shutdown() {
shutdown_requested = true;
}
private:
bool try_operation_with_backoff() {
std::unique_lock<std::mutex> lock1(mutex1, std::try_to_lock);
if (!lock1.owns_lock()) return false;
std::unique_lock<std::mutex> lock2(mutex2, std::try_to_lock);
if (!lock2.owns_lock()) return false;
// 成功获取两个锁
perform_work();
return true;
}
void recover_from_deadlock() {
std::cout << "Attempting deadlock recovery..." << std::endl;
// 策略1:强制释放资源(危险,可能破坏一致性)
// 策略2:回滚操作
// 策略3:通知管理员或记录严重错误
// 这里我们采用优雅降级
perform_graceful_degradation();
}
void perform_work() {
std::cout << "Performing normal work..." << std::endl;
}
void perform_graceful_degradation() {
std::cout << "Performing degraded operation..." << std::endl;
// 执行不需要所有锁的操作
}
};
5.2 事务性内存
#include <vector>
#include <functional>
class TransactionalMemory {
std::vector<std::function<void()>> operations;
std::vector<std::function<void()>> rollback_ops;
public:
template<typename Operation, typename Rollback>
void add_operation(Operation&& op, Rollback&& rollback) {
operations.emplace_back(std::forward<Operation>(op));
rollback_ops.emplace_back(std::forward<Rollback>(rollback));
}
bool execute() {
// 第一阶段:尝试执行所有操作
std::vector<std::function<void()>> executed_ops;
for (auto& op : operations) {
try {
op();
executed_ops.push_back(op);
} catch (...) {
// 任何操作失败,回滚所有已执行的操作
std::cout << "Operation failed, rolling back..." << std::endl;
rollback_all(executed_ops);
return false;
}
}
return true;
}
private:
void rollback_all(const std::vector<std::function<void()>>& executed_ops) {
// 按相反顺序回滚
for (auto it = executed_ops.rbegin(); it != executed_ops.rend(); ++it) {
try {
// 查找对应的回滚操作
auto index = std::distance(operations.begin(),
std::find(operations.begin(),
operations.end(), *it));
if (index < rollback_ops.size()) {
rollback_ops[index]();
}
} catch (...) {
std::cerr << "Rollback operation failed" << std::endl;
}
}
}
};
6. 最佳实践总结
6.1 设计原则
class DeadlockFreeDesign {
public:
// 原则1:总是以相同的顺序获取锁
void consistent_lock_order() {
static std::mutex mutex_a, mutex_b, mutex_c;
// 总是按照A->B->C的顺序
std::lock_guard<std::mutex> lock_a(mutex_a);
std::lock_guard<std::mutex> lock_b(mutex_b);
std::lock_guard<std::mutex> lock_c(mutex_c);
// 安全操作
}
// 原则2:使用RAII管理锁
class RAIILockManager {
std::unique_lock<std::mutex> lock;
public:
explicit RAIILockManager(std::mutex& m) : lock(m) {}
// 自动管理锁的生命周期
};
// 原则3:避免在持有锁时调用未知代码
void avoid_unknown_calls_while_locked() {
std::mutex resource_mutex;
{
std::lock_guard<std::mutex> lock(resource_mutex);
// 只执行已知安全的操作
known_safe_operation();
// 不要调用可能获取其他锁的函数
// potentially_dangerous_operation(); // 避免!
}
// 现在锁已释放,可以调用任意代码
potentially_dangerous_operation();
}
// 原则4:使用锁层次
void use_lock_hierarchy() {
// 如前所述的层次锁模式
}
// 原则5:优先使用无锁数据结构
void prefer_lock_free() {
std::atomic<int> counter{0};
LockFreeQueue<int> queue;
// 无锁操作不会死锁
counter.fetch_add(1);
queue.push(42);
}
private:
void known_safe_operation() {}
void potentially_dangerous_operation() {}
};
6.2 代码审查清单
class DeadlockPreventionChecklist {
public:
static void review_code() {
std::cout << "Deadlock Prevention Checklist:" << std::endl;
std::cout << "1. 所有锁是否按照固定顺序获取?" << std::endl;
std::cout << "2. 是否避免了嵌套锁?" << std::endl;
std::cout << "3. 是否使用RAII管理锁?" << std::endl;
std::cout << "4. 锁的持有时间是否尽可能短?" << std::endl;
std::cout << "5. 是否避免了在持有锁时调用虚函数?" << std::endl;
std::cout << "6. 是否避免了在持有锁时分配内存?" << std::endl;
std::cout << "7. 是否考虑了使用无锁数据结构?" << std::endl;
std::cout << "8. 是否设置了合理的锁超时时间?" << std::endl;
}
};
7. 测试死锁
7.1 死锁测试框架
#include <gtest/gtest.h>
class DeadlockTest : public ::testing::Test {
protected:
template<typename SystemUnderTest>
void stress_test_deadlock(SystemUnderTest& system, int num_threads,
std::chrono::seconds duration) {
std::atomic<bool> stop{false};
std::vector<std::thread> threads;
std::atomic<int> completed_operations{0};
std::atomic<int> deadlock_count{0};
auto start_time = std::chrono::steady_clock::now();
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back([&, i] {
while (!stop) {
if (std::chrono::steady_clock::now() - start_time > duration) {
break;
}
try {
if (i % 2 == 0) {
system.operation_a();
} else {
system.operation_b();
}
completed_operations++;
} catch (const std::exception& e) {
std::cerr << "Exception: " << e.what() << std::endl;
deadlock_count++;
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
});
}
std::this_thread::sleep_for(duration);
stop = true;
for (auto& t : threads) {
if (t.joinable()) t.join();
}
std::cout << "Completed operations: " << completed_operations << std::endl;
std::cout << "Potential deadlocks: " << deadlock_count << std::endl;
ASSERT_GT(completed_operations, 0) << "System appears deadlocked!";
}
};
TEST_F(DeadlockTest, BasicSystemTest) {
BasicDeadlock system;
stress_test_deadlock(system, 4, std::chrono::seconds(5));
}
通过遵循这些模式和最佳实践,可以有效地预防、检测和解决死锁问题,构建出更健壮的并发C++应用程序。
5万+

被折叠的 条评论
为什么被折叠?



