c/c++linux后台开发学习笔记 3.2.4 死锁

最新推荐文章于 2024-07-01 17:31:48 发布

jsc723

最新推荐文章于 2024-07-01 17:31:48 发布

阅读量814

点赞数

分类专栏： c/c++linux后台开发文章标签： c语言 linux

本文链接：https://blog.youkuaiyun.com/jsc723/article/details/122138950

版权

c/c++linux后台开发专栏收录该内容

19 篇文章

订阅专栏

死锁存在的条件

如果在一个系统中以下四个条件同时成立，那么就能引起死锁：

互斥：至少有一个资源必须处于非共享模式，即一次只有一个进程可使用。如果另一进程申请该资源，那么申请进程应等到该资源释放为止。
占有并等待：—个进程应占有至少一个资源，并等待另一个资源，而该资源为其他进程所占有。
非抢占：资源不能被抢占，即资源只能被进程在完成任务后自愿释放。
循环等待：有一组等待进程 {P0，P1，…，Pn}，P0 等待的资源为 P1 占有，P1 等待的资源为 P2 占有，……，Pn-1 等待的资源为 Pn 占有，Pn 等待的资源为 P0 占有。

预防死锁

最好的方法：规定所有的锁只能按一定的顺序被获取（破坏循环等待条件）
其他方法：

同时获取所有资源（效率低，要预先知道需要多少资源）
在获取锁失败是不进行等待，而是释放资源（局限性较大）

避免死锁

银行家算法（需要知道每个线程需要的最大资源）

死锁检测

分配图

有两种节点：P线程节点，R资源节点
每次线程P向资源R申请资源，如果R没有资源，则添加一条等待边（P->R）
当P得到资源R时，删除等待边（如果有），并立即添加一条分配边（R->P）
当释放资源时，删除分配边

注意操作图的时候要加锁
在这里插入图片描述
当且只当分配图有环时，存在死锁

代码实现

通过dlsym在pthread_mutex_lock和pthread_spinn_lock上挂勾子，在加锁解锁前后维护了分配图
新建一个监控线程，每格五秒检测图里是否有环（这里假设每个资源最多只有一个owner）
具体代码就不解释了吧
整个图需要的数据结构都保存在全局变量checker中
其中jshashmap（哈希表）和jsgraph（图）是我自己手写的简单数据结构，这里为了简洁就省略了

#define _GNU_SOURCE
#include <dlfcn.h>

#include <stdio.h>
#include <pthread.h>
#include <unistd.h>

#include <stdlib.h>
#include <stdint.h>
#include <string.h>

#include <unistd.h>
#include "jshashmap.h"
#include "jsgraph.h"

#define THREAD_NUM      10

typedef unsigned long int uint64;

typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);

pthread_mutex_lock_t pthread_mutex_lock_f;

typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);

pthread_mutex_unlock_t pthread_mutex_unlock_f;

typedef int (*pthread_spin_lock_t)(pthread_spinlock_t *spin);

pthread_spin_lock_t pthread_spin_lock_f;

typedef int (*pthread_spin_unlock_t)(pthread_spinlock_t *spin);

pthread_spin_unlock_t pthread_spin_unlock_f;

#define THREAD_VERTEX_START 0
#define THREAD_VERTEX_MAX (JS_GSIZE >> 1)
#define RESOURCE_VERTEX_START THREAD_VERTEX_MAX
#define RESOURCE_VERTEX_MAX JS_GSIZE
#define WAITING_PATH_MAX 512

struct {
	pthread_spinlock_t lock;
	//js_hashmap_t lock_owner; //lock addr => thread vertex_id
	js_hashmap_t addr_vertex; //thread addr or lock addr => vertex_id
	int next_thread_vid;      //next thread vid to allocate
	int next_lock_vid;        //next lock vid to allocate
	js_graph_t waiting_graph; //data: vertex_id => thread_addr
} checker;

void print_cycle(int *path, int start, int end) {
	int i, vid;
	printf("cycle detected: ");
	for(i = start; i < end; i++) {
		vid = path[i];
		printf("[%c idx=%d id=%lu] -> ", (vid < THREAD_VERTEX_MAX) ? 'T' : 'L', vid, checker.waiting_graph.data[vid]);
	}
	printf("[%c idx=%d id=%lu]\n", 'T', start, checker.waiting_graph.data[vid]);
}

void check_dead_lock_dfs(int *visited, int *path, int npath, int cur) {
	int i, next;
	for(i = 0; i < npath; i++) {
		if(path[i] == cur) {
			print_cycle(path, i, npath);
			return;
		}
	}
	if (checker.waiting_graph.adj[cur]) {
		next = checker.waiting_graph.adj[cur]->vertex_id;
		if (!visited[next]) {
			path[npath++] = cur;
			if (npath == WAITING_PATH_MAX) {
				printf("waiting path too long\n");
				return;
			}
			check_dead_lock_dfs(visited, path, npath, next);
			visited[next] = 1;
		}
	}
}

void check_dead_lock(void) {
	static int visited[JS_GSIZE];
	static int path[WAITING_PATH_MAX];
	js_graph_print(&checker.waiting_graph);
	memset(visited, 0, sizeof(int) * JS_GSIZE);
	memset(path, 0, sizeof(int) * JS_GSIZE);
	int i;
	for(i = 0; i < JS_GSIZE; i++) {
		if (!visited[i]) {
			check_dead_lock_dfs(visited, path, 0, i);
			visited[i] = 1;
		}
	}
}

static void *thread_routine(void *args) {
	while (1) {
		sleep(5);
		check_dead_lock();
	}
}

void init_checker() {
	pthread_spin_init(&checker.lock, PTHREAD_PROCESS_PRIVATE);
	js_hashmap_init(&checker.addr_vertex);
	js_graph_init(&checker.waiting_graph, 0);
	checker.next_thread_vid = THREAD_VERTEX_START;
	checker.next_lock_vid = RESOURCE_VERTEX_START;
}

void start_check(void) {
	pthread_t tid;
	pthread_create(&tid, NULL, thread_routine, NULL);
}

//NOT thread safe
int checker_get_thread_vid(uint64 thread_id) {
	js_hashmap_node_t *node = js_hashmap_get(&checker.addr_vertex, thread_id);
	int vid;
	if (!node) {
		vid = checker.next_thread_vid++;
		js_hashmap_put(&checker.addr_vertex, thread_id, vid);
		checker.waiting_graph.data[vid] = (void*)thread_id;
	} else {
		vid = (int)node->v;
	}
	return vid;
}

//NOT thread safe
int checker_get_lock_vid(uint64 lockaddr) {
js_hashmap_node_t *node = js_hashmap_get(&checker.addr_vertex, lockaddr);
	int vid;
	if (!node) {
		vid = checker.next_lock_vid++;
		js_hashmap_put(&checker.addr_vertex, lockaddr, vid);
		checker.waiting_graph.data[vid] = (void*)lockaddr;
	} else {
		vid = (int)node->v;
	}
	return vid;
}


//thread safe
void lock_before(uint64 thread_id, uint64 lockaddr) {
	pthread_spin_lock_f(&checker.lock);

	int t_vid = checker_get_thread_vid(thread_id);
	int l_vid = checker_get_lock_vid(lockaddr);
	js_graph_add_edge(&checker.waiting_graph, t_vid, l_vid); // thread => lock

	pthread_spin_unlock_f(&checker.lock);
}

//thread_id just acquired a lock
void lock_after(uint64 thread_id, uint64 lockaddr) {
	pthread_spin_lock_f(&checker.lock);

	int t_vid = checker_get_thread_vid(thread_id);
	int l_vid = checker_get_lock_vid(lockaddr);
	//reverse edge
	js_graph_erase_edge(&checker.waiting_graph, t_vid, l_vid);
	js_graph_add_edge(&checker.waiting_graph, l_vid, t_vid); // lock => thread

	pthread_spin_unlock_f(&checker.lock);
}

//remove lock from lock list if no one is waiting it
void unlock_after(uint64 thread_id, uint64 lockaddr) {
	pthread_spin_lock_f(&checker.lock);

	int t_vid = checker_get_thread_vid(thread_id);
	int l_vid = checker_get_lock_vid(lockaddr);
	//remove ownership
	js_graph_erase_edge(&checker.waiting_graph, l_vid, t_vid);

	pthread_spin_unlock_f(&checker.lock);
}



int pthread_mutex_lock(pthread_mutex_t *mutex) {
    pthread_t selfid = pthread_self(); //
    
	lock_before(selfid, (uint64)mutex);
    pthread_mutex_lock_f(mutex);
	lock_after(selfid, (uint64)mutex);
}

int pthread_mutex_unlock(pthread_mutex_t *mutex) {
	pthread_t selfid = pthread_self();

    pthread_mutex_unlock_f(mutex);
	unlock_after(selfid, (uint64)mutex);
}

int pthread_spin_lock(pthread_spinlock_t *spin) {
    pthread_t selfid = pthread_self(); //
    
	lock_before(selfid, (uint64)spin);
    pthread_spin_lock_f(spin);
	lock_after(selfid, (uint64)spin);
}

int pthread_spin_unlock(pthread_spinlock_t *spin) {
	pthread_t selfid = pthread_self();

    pthread_spin_unlock_f(spin);
	unlock_after(selfid, (uint64)spin);
}

static int init_hook() {

    pthread_mutex_lock_f = dlsym(RTLD_NEXT, "pthread_mutex_lock");
    pthread_mutex_unlock_f = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
	pthread_spin_lock_f = dlsym(RTLD_NEXT, "pthread_spin_lock");
    pthread_spin_unlock_f = dlsym(RTLD_NEXT, "pthread_spin_unlock");

}

测试代码


pthread_mutex_t mutex_1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_4 = PTHREAD_MUTEX_INITIALIZER;
pthread_spinlock_t spin_1;
pthread_spinlock_t spin_2;

void *thread_rountine_1(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 1 : %ld \n", selfid);
	
    pthread_mutex_lock(&mutex_1);
    sleep(1);
    pthread_mutex_lock(&mutex_2);

    pthread_mutex_unlock(&mutex_2);
    pthread_mutex_unlock(&mutex_1);

    return (void *)(0);
}

void *thread_rountine_2(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 2 : %ld \n", selfid);
	
    pthread_mutex_lock(&mutex_2);
    sleep(1);
    pthread_mutex_lock(&mutex_3);

    pthread_mutex_unlock(&mutex_3);
    pthread_mutex_unlock(&mutex_2);

    return (void *)(0);
}

void *thread_rountine_3(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 3 : %ld \n", selfid);

    pthread_mutex_lock(&mutex_3);
    sleep(1);
    pthread_mutex_lock(&mutex_4);

    pthread_mutex_unlock(&mutex_4);
    pthread_mutex_unlock(&mutex_3);

    return (void *)(0);
}

void *thread_rountine_4(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 4 : %ld \n", selfid);
	
    pthread_mutex_lock(&mutex_4);
    sleep(1);
    pthread_mutex_lock(&mutex_1);

    pthread_mutex_unlock(&mutex_1);
    pthread_mutex_unlock(&mutex_4);

    return (void *)(0);
}

void *thread_rountine_5(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 5 : %ld \n", selfid);

    pthread_spin_lock(&spin_1);
	printf("thread_routine 5 get spin 1\n");
    sleep(1);
    pthread_spin_lock(&spin_2);
	printf("thread_routine 5 get spin 2\n");

    pthread_spin_unlock(&spin_2);
    pthread_spin_unlock(&spin_1);

    return (void *)(0);
}

void *thread_rountine_6(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 6 : %ld \n", selfid);
	
    pthread_spin_lock(&spin_2);
	printf("thread_routine 6 get spin 2\n");
    sleep(1);
    pthread_spin_lock(&spin_1);
	printf("thread_routine 6 get spin 1\n");

    pthread_spin_unlock(&spin_1);
    pthread_spin_unlock(&spin_2);

    return (void *)(0);
}


int main()
{

    init_hook();
	init_checker();
	start_check();

	printf("start_check\n");
	pthread_spin_init(&spin_1, PTHREAD_PROCESS_PRIVATE);
	pthread_spin_init(&spin_2, PTHREAD_PROCESS_PRIVATE);

    pthread_t tid1, tid2, tid3, tid4, tid5, tid6;
    pthread_create(&tid1, NULL, thread_rountine_1, NULL);
    pthread_create(&tid2, NULL, thread_rountine_2, NULL);
    pthread_create(&tid3, NULL, thread_rountine_3, NULL);
    pthread_create(&tid4, NULL, thread_rountine_4, NULL);
	pthread_create(&tid5, NULL, thread_rountine_5, NULL);
    pthread_create(&tid6, NULL, thread_rountine_6, NULL);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);
    pthread_join(tid3, NULL);
    pthread_join(tid4, NULL);
	pthread_join(tid5, NULL);
    pthread_join(tid6, NULL);

    return 0;
}

修复死锁

死锁是逻辑bug，完全修复死锁需要修改业务代码
临时补救方法（选一个，从简单到难）：

把所有陷入死锁的线程都干掉
把所有陷入死锁的线程恢复到之前的一个checkpoint
把死锁进程一个个干掉直到不再死锁
把死锁进程的资源一个个释放直到不再死锁

参考资料

零声教育c/c++linux后台开发3.2.4
https://blog.youkuaiyun.com/zhangpower1993/article/details/89518780#:~:text=%E6%AD%BB%E9%94%81%E9%81%BF%E5%85%8D%E6%98%AF%E5%88%A9%E7%94%A8,%E9%94%81%EF%BC%8C%E5%88%99%E6%8B%92%E7%BB%9D%E8%AF%A5%E7%94%B3%E8%AF%B7%E3%80%82
http://c.biancheng.net/view/1236.html