- 一、概述
- 在Linux系统里,假设有两处代码(比如不同线程的两个函数F1和F2)都要获取两个锁(分别为L1和L2),如果F1持有L1后再去获取L2,而此时恰好由F2持有L2且它也正在尝试获取L1,那么此时就是处于死锁的状态,这是一个最简单的死锁例子,也即所谓的AB-BA死锁。
-
- 死锁导致的最终结果无需多说,关于如何避免死锁在教科书上也有提到,最简单直观的做法就是按顺序上锁,以破坏死锁的环形等待条件。但对于拥有成千上万个锁的整个系统来说,完全定义它们之间的顺序是非常困难的,所以一种更可行的办法就是尽量提前发现这其中潜在的死锁风险,而不是等到最后真正出现死锁时给用户带来切实的困惑。
- 已有很多工具用于发现可能的死锁风险,而本文介绍的调试/检测模块lockdep,即是属于这一类工具的一种。调试模块lockdep从2006年引入内核,经过实践验证,其对提前发现死锁起到了巨大的效果
-
- 官方文档有介绍调试模块lockdep的设计原理,这里按照我自己的理解描述一下。
- 1,lockdep操作的基本单元并非单个的锁实例,而是锁类(lock-class)。比如,struct inode结构体中的自旋锁i_lock字段就代表了这一类锁,而具体每个inode节点的锁只是该类锁中的一个实例。对所有这些实例,lockdep会把它们当作一个整体做处理,即把判断粒度放大,否则对可能有成千上万个的实例进行逐一判断,那处理难度可想而知,而且也没有必要。当然,在具体的处理中,可能会记录某些特性情况下的实例的部分相关信息,以便提供事后问题排查。
- 2,lockdep跟踪每个锁类的自身状态,也跟踪各个锁类之间的依赖关系,通过一系列的验证规则,以确保锁类状态和锁类之间的依赖总是正确的。另外,锁类一旦在初次使用时被注册,那么后续就会一直存在,所有它的具体实例都会关联到它。
-
- lockdep是linux内核的一个调试模块,用来检查内核互斥机制尤其是自旋锁潜在的死锁问题。自旋锁由于是查询方式等待,不释放处理器,比一般的互斥机制更容易死锁,故引入lockdep检查以下几种情况可能的死锁。
- 1.同一个进程递归地加锁同一把锁.
- 2.一把锁既在中断(或中断下半部)使能的情况下执行过加锁操作,又在中断(或中断下半部)里执行过加锁操作。这样该锁有可能在锁定时由于中断发生又试图在同一处理器上加锁,加锁后导致依赖图产生成闭环,这是典型的死锁现象。
-
- 二、 lockdep验证规则
- (1)单锁状态规则(Single-lock state rules)
- 1,一个软中断不安全(softirq-unsafe)的锁类同样也是硬中断不安全(hardirq-unsafe)的。
- 2,对于任何一个锁类,它不可能同时是hardirq-safe和hardirq-unsafe,也不可能同时是softirq-safe和softirq-unsafe,即这两对对应状态是互斥的。
- 上面这两条就是lockdep判断单锁是否会发生死锁的检测规则。
- (2)多锁依赖规则(Multi-lock dependency rules)
- 1,同一个锁类不能被获取两次,因为这会导致递归死锁。
- 2,不能以不同的顺序获取两个锁类,即如此这样:
- <L1> -> <L2>
- <L2> -> <L1>
- 是不行的。因为这会非常容易的导致本文最先提到的AB-BA死锁。当然,下面这样的情况也不行:
- <L1> -> <L3> -> <L4> -> <L2>
- <L2> -> <L3> -> <L4> -> <L1>
- 即在中间插入了其它正常顺序的锁也能被lockdep检测出来。
- 3,同一个锁实例在任何两个锁类之间不能出现这样的情况:
- <hardirq-safe> -> <hardirq-unsafe>
- <softirq-safe> -> <softirq-unsafe>
- 这意味着,如果同一个锁实例,在某些地方是hardirq-safe(即采用spin_lock_irqsave(…)),而在某些地方又是hardirq-unsafe(即采用spin_lock(…)),那么就存在死锁的风险。这应该容易理解,比如在进程上下文中持有锁A,并且锁A是hardirq-unsafe,如果此时触发硬中断,而硬中断处理函数又要去获取锁A,那么就导致了死锁。
- 在锁类状态发生变化时,进行如下几个规则检测,判断是否存在潜在死锁。比较简单,就是判断hardirq-safe和hardirq-unsafe以及softirq-safe和softirq-unsafe是否发生了碰撞.
-
- 三、相关结构体
- 1.struct held_lock
- 在每个进程的task_struct结构体中定义了struct held_lock held_locks[MAX_LOCK_DEPTH]成员,用来记录锁。
- struct held_lock {
- 215 /*
- 216 * One-way hash of the dependency chain up to this point. We
- 217 * hash the hashes step by step as the dependency chain grows.
- 218 *
- 219 * We use it for dependency-caching and we skip detection
- 220 * passes and dependency-updates if there is a cache-hit, so
- 221 * it is absolutely critical for 100% coverage of the validator
- 222 * to have a unique key value for every unique dependency path
- 223 * that can occur in the system, to make a unique hash value
- 224 * as likely as possible - hence the 64-bit width.
- 225 *
- 226 * The task struct holds the current hash value (initialized
- 227 * with zero), here we store the previous hash value:
- 228 */
- u64 prev_chain_key;
- unsigned long acquire_ip;
- struct lockdep_map *instance;
- struct lockdep_map *nest_lock;
- #ifdef CONFIG_LOCK_STAT
- u64 waittime_stamp;
- u64 holdtime_stamp;
- #endif
- unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS;
- 238 /*
- 239 * The lock-stack is unified in that the lock chains of interrupt
- 240 * contexts nest ontop of process context chains, but we 'separate'
- 241 * the hashes by starting with 0 if we cross into an interrupt
- 242 * context, and we also keep do not add cross-context lock
- 243 * dependencies - the lock usage graph walking covers that area
- 244 * anyway, and we'd just unnecessarily increase the number of
- 245 * dependencies otherwise. [Note: hardirq and softirq contexts
- 246 * are separated from each other too.]
- 247 *
- 248 * The following field is used to detect when we cross into an
- 249 * interrupt context:
- 250 */
- unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */
- unsigned int trylock:1; /* 16 bits */
-
- unsigned int read:2; /* see lock_acquire() comment */
- unsigned int check:2; /* see lock_acquire() comment */
- unsigned int hardirqs_off:1;
- unsigned int references:11; /* 32 bits */
- };
-
- 2.lockdep_map
- 各种锁结构体中如mutex、rawspinlock、semaphore内嵌该结构体,用于对锁检测。
- struct lockdep_map {
- struct lock_class_key *key;
- struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
- const char *name;
- #ifdef CONFIG_LOCK_STAT
- int cpu; //对结构体初始化时所在的cpu号
- unsigned long ip;
- #endif
- };
-
- 3.lock_class
- struct lock_class {
- struct list_head hash_entry;
- struct list_head lock_entry;
-
- struct lockdep_subclass_key *key;
- unsigned int subclass;
- unsigned int dep_gen_id;
-
- unsigned long usage_mask;
- struct stack_trace usage_traces[XXX_LOCK_USAGE_STATES];
-
- struct list_head locks_after, locks_before;
- unsigned int version;
- unsigned long ops;
-
- const char *name;
- int name_version;
-
- #ifdef CONFIG_LOCK_STAT
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
- #endif
- };
-
- 4.lock_class_key
- struct lock_class_key {
- struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
- };
-
- 5.lockdep_subclass_key
- struct lockdep_subclass_key {
- char __one_byte;
- } __attribute__ ((__packed__));
-
-
- 三、lockdep初始化
- 建立两个散列表calsshash_table和chainhash_table,并初始化全局变量lockdep_initialized,标志已初始化完成。
- static struct list_head classhash_table[CLASSHASH_SIZE];
- static struct list_head chainhash_table[CHAINHASH_SIZE];
- void lockdep_init(void)
- {
- int i;
-
- if (lockdep_initialized)
- return;
-
- for (i = 0; i < CLASSHASH_SIZE; i++)
- INIT_LIST_HEAD(classhash_table + i);
-
- for (i = 0; i < CHAINHASH_SIZE; i++)
- INIT_LIST_HEAD(chainhash_table + i);
-
- lockdep_initialized = 1;
- }
-
- 四、提供接口
- 1. lockdep_init_map
- 用于初始化锁内嵌的lockdep_map结构体
- static inline void sema_init(struct semaphore *sem, int val)
- {
- static struct lock_class_key __key;
- *sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
- lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0);
- }
-
- void lockdep_init_map(struct lockdep_map *lock, const char *name,struct lock_class_key *key, int subclass)
- {
- int i;
-
- //arm上是空函数
- kmemcheck_mark_initialized(lock, sizeof(*lock));
-
- //初始化lock_class结构体的class_cache成员
- for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
- lock->class_cache[i] = NULL;
-
- #ifdef CONFIG_LOCK_STAT
- lock->cpu = raw_smp_processor_id();
- #endif
- //name不能为空
- if (DEBUG_LOCKS_WARN_ON(!name)) {
- lock->name = "NULL";
- return;
- }
- //设置name
- lock->name = name;
-
- //key不能为空
- if (DEBUG_LOCKS_WARN_ON(!key))
- return;
-
- //对key的地址进行健康检查,确保在内核.data地址空间,percpu空间或者module空间
- if (!static_obj(key)) {
- printk("BUG: key %p not in .data!\n", key);
- DEBUG_LOCKS_WARN_ON(1);
- return;
- }
- //设置key
- lock->key = key;
-
- if (unlikely(!debug_locks))
- return;
-
- //subclass不为0,将lockdep_map注册到类中
- if (subclass)
- register_lock_class(lock, subclass, 1);
- }
-
- 2.
- void lock_acquire(struct lockdep_map *lock, unsigned int subclass,int trylock, int read, int check,struct lockdep_map *nest_lock, unsigned long ip)
- {
- unsigned long flags;
-
- if (unlikely(current->lockdep_recursion))
- return;
-
- raw_local_irq_save(flags);
- check_flags(flags);
-
- current->lockdep_recursion = 1;
- //空函数
- trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
- __lock_acquire(lock, subclass, trylock, read, check,irqs_disabled_flags(flags), nest_lock, ip, 0);
- current->lockdep_recursion = 0;
- raw_local_irq_restore(flags);
- }
-
- 2. debug_check_no_locks_freed
- 用于检测一个锁是不是被多次初始化,或者一块内存在释放时还持有锁。
- void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
- {
- struct task_struct *curr = current;
- struct held_lock *hlock;
- unsigned long flags;
- int i;
-
- if (unlikely(!debug_locks))
- return;
-
- local_irq_save(flags);
- //遍历当前进程所拥有的held_lock
- for (i = 0; i < curr->lockdep_depth; i++) {
- hlock = curr->held_locks + i;
-
- //检查hlock是否在(mem_from,mem_from+mem_len)区间里,不在此区间则继续循环
- if (not_in_range(mem_from, mem_len, hlock->instance,sizeof(*hlock->instance)))
- continue;
-
- print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
- break;
- }
- local_irq_restore(flags);
- }
-
- static inline int not_in_range(const void* mem_from, unsigned long mem_len,
- const void* lock_from, unsigned long lock_len)
- {
- return lock_from + lock_len <= mem_from || mem_from + mem_len <= lock_from;
- }
-
- static void print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
- const void *mem_to, struct held_lock *hlock)
- {
- //如果关闭所有lock-debugging,则退出
- if (!debug_locks_off())
- return;
- //
- if (debug_locks_silent)
- return;
-
- printk("\n");
- printk("=========================\n");
- printk("[ BUG: held lock freed! ]\n");
- print_kernel_ident();
- printk("-------------------------\n");
- printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
- curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
- print_lock(hlock);//打印锁信息
- lockdep_print_held_locks(curr);
-
- printk("\nstack backtrace:\n");
- dump_stack();//打印堆栈信息
- }
-
- //Generic 'turn off all lock debugging' function:
- int debug_locks_off(void)
- {
- if (__debug_locks_off()) {
- if (!debug_locks_silent) {
- console_verbose();
- return 1;
- }
- }
- return 0;
- }
-
- //debug_locks为1表示打开lock-debugging,为0表示关闭所有lock-debugging
- static inline int __debug_locks_off(void)
- {
- return xchg(&debug_locks, 0);
- }
-
- static void print_kernel_ident(void)
- {
- printk("%s %.*s %s\n", init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version,
- print_tainted());
- }
-
- static void print_lock(struct held_lock *hlock)
- {
- print_lock_name(hlock_class(hlock));
- printk(", at: ");
- print_ip_sym(hlock->acquire_ip);
- }
-
- static inline struct lock_class *hlock_class(struct held_lock *hlock)
- {
- if (!hlock->class_idx) {
- DEBUG_LOCKS_WARN_ON(1);
- return NULL;
- }
- return lock_classes + hlock->class_idx - 1;
- }
-
- static void print_lock_name(struct lock_class *class)
- {
- 529 char usage[LOCK_USAGE_CHARS];
- 530
- 531 get_usage_chars(class, usage);
- 532
- 533 printk(" (");
- 534 __print_lock_name(class);
- 535 printk("){%s}", usage);
- }
-
- static void __print_lock_name(struct lock_class *class)
- {
- 511 char str[KSYM_NAME_LEN];
- 512 const char *name;
- 513
- 514 name = class->name;
- 515 if (!name) {
- 516 name = __get_key_name(class->key, str);
- 517 printk("%s", name);
- 518 } else {
- 519 printk("%s", name);
- 520 if (class->name_version > 1)
- 521 printk("#%d", class->name_version);
- 522 if (class->subclass)
- 523 printk("/%d", class->subclass);
- 524 }
- }
-
- static inline void print_ip_sym(unsigned long ip)
- {
- printk("[<%p>] %pS\n", (void *) ip, (void *) ip);
- }
-
- static void lockdep_print_held_locks(struct task_struct *curr)
- {
- int i, depth = curr->lockdep_depth;
-
- if (!depth) {
- printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
- return;
- }
- printk("%d lock%s held by %s/%d:\n",
- depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
-
- for (i = 0; i < depth; i++) {
- printk(" #%d: ", i);
- print_lock(curr->held_locks + i);
- }
- }
-
- 2.
-
-
- 参考http://www.lenky.info/archives/2013/04/2253