spinlock bug (finally sovle by remove debug option)

最新推荐文章于 2024-12-26 11:32:02 发布

zhangqingsup

最新推荐文章于 2024-12-26 11:32:02 发布

阅读量678

点赞数

CC 4.0 BY-SA版权

分类专栏： S3C6410 文章标签： recursion locking debugging struct patch thread

本文链接：https://blog.youkuaiyun.com/zhangqingsup/article/details/5598087

S3C6410 专栏收录该内容

10 篇文章

订阅专栏

本篇博客详细记录了一次spinlock调试修复的过程，包括错误复现、根本原因分析及最终解决方案。作者发现当前任务PID在SMP环境下并非全局唯一，导致spinlock调试代码出现问题。通过将owner_pid替换为指向task_struct的指针解决了该问题。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

spinlock-debug fix From: Ingo Molnar To: Andrew Morton Subject: [patch] spinlock-debug fix Date: Mon, 27 Jun 2005 11:48:44 +0200 Cc: Reuben Farrelly , linux-kernel@vger.kernel.org Archive-link: Article, Thread * Andrew Morton wrote: > Reuben Farrelly wrote: > > > > > Anyway, scary trace. It look like some spinlock is thought to be in the > > > wrong state in schedule(). Send the .config, please. > > > > Now online at http://www.reub.net/kernel/.config > > Me too. > > BUG: spinlock recursion on CPU#0, swapper/0, c120d520 > [] dump_stack+0x19/0x20 > [] spin_bug+0x42/0x54 > [] _raw_spin_lock+0x3e/0x84 > [] _spin_lock+0x9/0x10 > [] schedule+0x479/0xbc8 > [] cpu_idle+0x88/0x8c > [] rest_init+0x21/0x28 > [] start_kernel+0x151/0x158 > [] 0xc010020f > Kernel panic - not syncing: bad locking > > The bug is in the new spinlock debugging code itself. Ingo, can you > test that .config please? couldnt reproduce it on an UP box, nor on an SMP/HT 2/4-way box, but it finally triggered on a 2-way SMP box. the bug is that current->pid is not a unique identifier on SMP (doh!). The patch below fixes the bug - which also happens to be a speedup for the debugging code, as the ->pid dereferencing does not have to be done anymore. Also, i've disabled the panicing for now. Ingo - change owner_pid to owner, to fix bad pid uniqueness assumption on SMP - some more debug output printed - dont panic for now Signed-off-by: Ingo Molnar include/linux/spinlock_types.h | 16 ++++++++++------ kernel/sched.c | 2 +- lib/spinlock_debug.c | 30 +++++++++++++++++++----------- 3 files changed, 30 insertions(+), 18 deletions(-) Index: linux/include/linux/spinlock_types.h =================================================================== --- linux.orig/include/linux/spinlock_types.h +++ linux/include/linux/spinlock_types.h @@ -21,11 +21,12 @@ typedef struct { unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK - unsigned int magic, owner_pid, owner_cpu; + unsigned int magic, owner_cpu; + void *owner; #endif } spinlock_t; -#define SPINLOCK_MAGIC 0xdead4ead +#define SPINLOCK_MAGIC 0xdead4ead typedef struct { raw_rwlock_t raw_lock; @@ -33,22 +34,25 @@ typedef struct { unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK - unsigned int magic, owner_pid, owner_cpu; + unsigned int magic, owner_cpu; + void *owner; #endif } rwlock_t; -#define RWLOCK_MAGIC 0xdeaf1eed +#define RWLOCK_MAGIC 0xdeaf1eed + +#define SPINLOCK_OWNER_INIT ((void *)-1L) #ifdef CONFIG_DEBUG_SPINLOCK # define SPIN_LOCK_UNLOCKED / (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, / .magic = SPINLOCK_MAGIC, / - .owner_pid = -1, / + .owner = SPINLOCK_OWNER_INIT, / .owner_cpu = -1 } #define RW_LOCK_UNLOCKED / (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, / .magic = RWLOCK_MAGIC, / - .owner_pid = -1, / + .owner = SPINLOCK_OWNER_INIT, / .owner_cpu = -1 } #else # define SPIN_LOCK_UNLOCKED / Index: linux/kernel/sched.c =================================================================== --- linux.orig/kernel/sched.c +++ linux/kernel/sched.c @@ -1604,7 +1604,7 @@ static inline void finish_task_switch(ru prev_task_flags = prev->flags; #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ - rq->lock.owner_pid = current->pid; + rq->lock.owner = current; #endif finish_arch_switch(prev); finish_lock_switch(rq, prev); Index: linux/lib/spinlock_debug.c =================================================================== --- linux.orig/lib/spinlock_debug.c +++ linux/lib/spinlock_debug.c @@ -14,16 +14,24 @@ static void spin_bug(spinlock_t *lock, const char *msg) { static long print_once = 1; + struct task_struct *owner = NULL; if (xchg(&print_once, 0)) { - printk("BUG: spinlock %s on CPU#%d, %s/%d, %p/n", msg, - smp_processor_id(), current->comm, current->pid, lock); + if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) + owner = lock->owner; + printk("BUG: spinlock %s on CPU#%d, %s/%d/n", + msg, smp_processor_id(), current->comm, current->pid); + printk(" lock: %p, .magic: %08x, .owner: %s/%d, .owner_cpu: %d/n", + lock, lock->magic, + owner ? owner->comm : "", + owner ? owner->pid : -1, + lock->owner_cpu); dump_stack(); #ifdef CONFIG_SMP /* * We cannot continue on SMP: */ - panic("bad locking"); +// panic("bad locking"); #endif } } @@ -33,7 +41,7 @@ static void spin_bug(spinlock_t *lock, c static inline void debug_spin_lock_before(spinlock_t *lock) { SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(lock->owner_pid == current->pid, lock, "recursion"); + SPIN_BUG_ON(lock->owner == current, lock, "recursion"); SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), lock, "cpu recursion"); } @@ -41,17 +49,17 @@ static inline void debug_spin_lock_befor static inline void debug_spin_lock_after(spinlock_t *lock) { lock->owner_cpu = raw_smp_processor_id(); - lock->owner_pid = current->pid; + lock->owner = current; } static inline void debug_spin_unlock(spinlock_t *lock) { SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked"); - SPIN_BUG_ON(lock->owner_pid != current->pid, lock, "wrong owner"); + SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); - lock->owner_pid = -1; + lock->owner = SPINLOCK_OWNER_INIT; lock->owner_cpu = -1; } @@ -176,7 +184,7 @@ void _raw_read_unlock(rwlock_t *lock) static inline void debug_write_lock_before(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - RWLOCK_BUG_ON(lock->owner_pid == current->pid, lock, "recursion"); + RWLOCK_BUG_ON(lock->owner == current, lock, "recursion"); RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), lock, "cpu recursion"); } @@ -184,16 +192,16 @@ static inline void debug_write_lock_befo static inline void debug_write_lock_after(rwlock_t *lock) { lock->owner_cpu = raw_smp_processor_id(); - lock->owner_pid = current->pid; + lock->owner = current; } static inline void debug_write_unlock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - RWLOCK_BUG_ON(lock->owner_pid != current->pid, lock, "wrong owner"); + RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); - lock->owner_pid = -1; + lock->owner = SPINLOCK_OWNER_INIT; lock->owner_cpu = -1; }