原理:
软锁需要开启CONFIG_LOCKUP_DETECTOR内核选项,源码kernel/watchdog.c。
情况宋宝华的博文:Linux内核为什么会发生soft lockup?-优快云博客
watchdog_timer_fn :per_cpu 定时器回调
判断(softlockup_completion) 已经completion ?
yes: stop_one_cpu_nowait //借用stop_machine 内核线程来工作(migration/X),工作函数softlockup_fn
cpu_stop_queue_work //将cpu_stop_work 插入,
wake_up_q //并唤醒 wake_q_head关联的stop_machine内核线程(migration/X)
no:
检测是否超过20s ?
工作函数:softlockup_fn
static int softlockup_fn(void *data)
{
__this_cpu_write(soft_lockup_hrtimer_cnt,
__this_cpu_read(hrtimer_interrupts));
__touch_watchdog();
complete(this_cpu_ptr(&softlockup_completion)); //complete
return 0;
}
文章提到:在开启 CONFIG_PREEMPT=y 的时候,且没有关闭本地irq的时候,在内核中断el1_irq:
SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
kernel_entry 1
gic_prio_irq_setup pmr=x20, tmp=x1
enable_da_f
mov x0, sp
bl enter_el1_irq_or_nmi
irq_handler
#ifdef CONFIG_PREEMPTION
ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
/*
* DA_F were cleared at start of handling. If anything is set in DAIF,
* we come back from an NMI, so skip preemption
*/
mrs x0, daif
orr x24, x24, x0
alternative_else_nop_endif
cbnz x24, 1f // preempt count != 0 || NMI return path
bl arm64_preempt_schedule_irq // irq en/disable is done inside
1:
#endif
mov x0, sp
bl exit_el1_irq_or_nmi
kernel_exit 1
SYM_CODE_END(el1_irq)
当发生中断时,会执行el1_irq来处理中断,来读取当前任务的thread_info.preempt_count 判断thread_info.preempt_count是否为0,如果为0 则调用arm64_preempt_schedule_irq 进行抢占式调度。 参考本人的实验:深入理解Linux内核之内核抢占-优快云博客
下面看下抢占式调度:
arm64_preempt_schedule_irq
->preempt_schedule_irq
->__schedule(true) //调用主调度器进行抢占式调度
需要满足可抢占条件:
asmlinkage __visible void __sched preempt_schedule_irq(void)
{
enum ctx_state prev_state;
/* Catch callers which need to be fixed */
BUG_ON(preempt_count() || !irqs_disabled());
prev_state = exception_enter();
do {
preempt_disable();
local_irq_enable();
__schedule(true);
local_irq_disable();
sched_preempt_enable_no_resched();
} while (need_resched());
exception_exit(prev_state);
}
打开抢占的时候
开启抢占:
preempt_enable
->if (unlikely(preempt_count_dec_and_test())) \ //抢占计数器减一 为0
__preempt_schedule(); \
->preempt_schedule //kernel/sched/core.c
-> __schedule(true) //调用主调度器进行抢占式调度
在打开抢占PREEMPT的内核中,while循环将不会有任何问题。如果使用关闭抢占,比如通过自旋锁的方式会如何呢?
打开本地irq的自旋锁:
lock_up_module.c
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/spinlock.h>
MODULE_LICENSE("GPL");
static int hog_thread(void *data) {
static DEFINE_SPINLOCK(lock);
unsigned long flags;
printk(KERN_INFO "Hogging a CPU now task %s %d \n",current->comm,smp_processoo
r_id());
//spin_lock_irqsave(&lock, flags);
spin_lock(&lock);
while (1)
;
spin_unlock(&lock);
//spin_unlock_irqrestore(&lock,flags);
/* unreached */
printk(KERN_INFO "Hogging a CPU end %d \n",smp_processor_id());
return 0;
}
static int __init hog_init(void) {
kthread_run(&hog_thread, NULL, "hog");
return 0;
}
module_init(hog_init);
MODULE_LICENSE("GPL v2"); // 表示模块代码接受的软件许可协议
MODULE_AUTHOR("www"); // 描述模块的作者信息
MODULE_DESCRIPTION("soft lock up module"); // 对模块的简单介绍
MODULE_ALIAS("soft_lock_up_module"); // 给模块设置一个别名
上面的代码,即时内核开启 CONFIG_PREEMPT 报rcu错误和softlockup错误:该rcu是self-detected自我检测print_cpu_stall
[ 4085.374398] rcu: INFO: rcu_preempt self-detected stall on CPU
[ 4085.381455] rcu: 0-....: (20972 ticks this GP) idle=39e/1/0x4000000000000002 softirq=9507/9507 fqs=8514
[ 4085.393648] (t=21003 jiffies g=14621 q=9063)
[ 4085.399292] Task dump for CPU 0:
[ 4085.403795] task:softlockup_thre state:R running task stack: 0 pid: 1878 ppid: 2 flags:0x0000022a
[ 4085.416336] Call trace:
[ 4085.420054] dump_backtrace+0x0/0x1b4
[ 4085.424940] show_stack+0x24/0x80
[ 4085.429504] sched_show_task+0x150/0x17c
[ 4085.434698] dump_cpu_task+0x50/0x60
print_cpu_stall
check_cpu_stall
rcu_pending
[ 4085.439509] rcu_dump_cpu_stacks+0xb8/0xf8
[ 4085.444882] rcu_sched_clock_irq+0xb34/0xe54
[ 4085.450446] update_process_times+0x6c/0xb0
[ 4085.455890] tick_sched_handle+0x3c/0x60
[ 4085.461074] tick_sched_timer+0x58/0xb0
[ 4085.466149] __hrtimer_run_queues+0x18c/0x3cc
[ 4085.471751] hrtimer_interrupt+0xf4/0x29c
[ 4085.476990] arch_timer_handler_phys+0x40/0x50
[ 4085.482685] handle_percpu_devid_irq+0x94/0x290
[ 4085.488461] __handle_domain_irq+0x88/0xec
[ 4085.493818] gic_handle_irq+0xcc/0x14c
[ 4085.498826] el1_irq+0xc4/0x180
[ 4085.503218] task+0x7c/0x98 [soft_lock_up_module]
[ 4085.509172] kthread+0x164/0x16c
[ 4085.513668] ret_from_fork+0x10/0x3c
[ 240.615578] watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [hog:1473]
[ 240.623623] Modules linked in: hard_lock_up_module(O) ip6table_filter ip6_tables iptable_filter ip_tables x_tables spidev drm fuse cdc_ncm cdc_ether crct10dif_ce ax88179_178a usbnet rtc_ds1307 ipv6
[ 240.645338] CPU: 2 PID: 1473 Comm: hog Tainted: G O 5.10.0-with-dc-g1c56f5c219bc-dirty #38
[ 240.657609] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 240.664479] pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--)
[ 240.671887] pc : hog_thread+0x4c/0xfff0 [hard_lock_up_module]
[ 240.678995] lr : hog_thread+0x4c/0xfff0 [hard_lock_up_module]
[ 240.686111] sp : ffff80001654fe30
[ 240.690804] x29: ffff80001654fe30 x28: 0000000000000000
[ 240.697494] x27: ffff001f809c1fc8 x26: ffff800012828688
[ 240.704200] x25: ffff8000115d1840 x24: 0000000000000000
[ 240.710921] x23: ffff800009680010 x22: ffff8000166efa78
[ 240.717621] x21: ffff001f8d0bc800 x20: ffff001f809c1f80
[ 240.724303] x19: ffff800011a20000 x18: 0000000000000030
[ 240.731006] x17: 0000000000000000 x16: 0000000000000000
[ 240.737702] x15: 0000000000000001 x14: ffff80009654fb37
[ 240.744392] x13: 0000000000000a00 x12: ffff80001116f000
[ 240.751092] x11: 0000000000000001 x10: 0000000000000a00
[ 240.757761] x9 : ffff80000968005c x8 : ffff001f8d0bd260
[ 240.764474] x7 : 0000000000000000 x6 : 0000000000000000
[ 240.771192] x5 : 0000000000000000 x4 : 0000000000000000
[ 240.777910] x3 : ffff8000096a03c0 x2 : 0000000000000001
[ 240.784617] x1 : 0000000000000000 x0 : 0000000000000000
[ 240.791329] Call trace:
[ 240.795153] hog_thread+0x4c/0xfff0 [hard_lock_up_module]
[ 240.801947] kthread+0x164/0x16c
[ 240.806562] ret_from_fork+0x10/0x3c
Linux 内核分析 rcu_sched self-detected stall on CPU_小立爱学习的博客-优快云博客
spin_lock最终会调用preempt_disable:关闭抢占,此时CONFIG_PREEMPT=y。不满足preempt_schedule_irq抢占,导致无法进程调度。
static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
关闭本地irq的自旋锁:
demo驱动:
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/spinlock.h>
MODULE_LICENSE("GPL");
static int hog_thread(void *data) {
static DEFINE_SPINLOCK(lock);
unsigned long flags;
printk(KERN_INFO "Hogging a CPU now task %s %d \n",current->comm,smp_processoo
r_id());
spin_lock_irqsave(&lock, flags);
//spin_lock(&lock);
while (1)
;
//spin_unlock(&lock);
spin_unlock_irqrestore(&lock,flags);
/* unreached */
printk(KERN_INFO "Hogging a CPU end %d \n",smp_processor_id());
return 0;
}
static int __init hog_init(void) {
kthread_run(&hog_thread, NULL, "hog");
return 0;
}
module_init(hog_init);
MODULE_LICENSE("GPL v2"); // 表示模块代码接受的软件许可协议
MODULE_AUTHOR("www"); // 描述模块的作者信息
MODULE_DESCRIPTION("soft lock up module"); // 对模块的简单介绍
MODULE_ALIAS("soft_lock_up_module"); // 给模块设置一个别名
内核日志:此时rcu是 依靠其它cpu检查print_other_cpu_stall。因为本地irq关闭了,softlockup也检测不到了。在print_other_cpu_stall中加入dump_stack以观察其调用栈:
[ 38.779118] hard_lock_up_module: loading out-of-tree module taints kernel.
[ 38.786519] hot init task insmod 6
[ 38.786524] Hogging a CPU now task hog 1 //内核线程在1号cpu
[ 59.787816] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G O 5.10.0-with-dc-g1c56f5c219bc-dirty #39
[ 59.797713] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 59.803229] Call trace:
[ 59.805689] dump_backtrace+0x0/0x1b4
[ 59.809369] show_stack+0x24/0x80
[ 59.812701] dump_stack+0xd0/0x12c
print_other_cpu_stall
check_cpu_stall
rcu_pending
[ 59.816120] rcu_sched_clock_irq+0x5f0/0xe64
[ 59.820413] update_process_times+0x6c/0xb0
[ 59.824618] tick_sched_handle+0x38/0x74
[ 59.828560] tick_sched_timer+0x58/0xb0
[ 59.832413] __hrtimer_run_queues+0x18c/0x3cc
[ 59.836791] hrtimer_interrupt+0xf4/0x29c
[ 59.840823] arch_timer_handler_phys+0x40/0x50
[ 59.845291] handle_percpu_devid_irq+0x94/0x290
[ 59.849845] __handle_domain_irq+0x88/0xec
[ 59.853962] gic_handle_irq+0xcc/0x14c
[ 59.857729] el1_irq+0xc4/0x180
[ 59.860884] arch_cpu_idle+0x18/0x30
[ 59.864478] default_idle_call+0x4c/0x1e4
[ 59.868508] do_idle+0x238/0x2b0
[ 59.871751] cpu_startup_entry+0x30/0x7c
[ 59.875694] secondary_start_kernel+0x144/0x190 //这里是加了dump_stack的输出
[ 59.881517] rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
[ 59.888999] rcu: 1-...0: (1 ticks this GP) idle=f4e/1/0x4000000000000000 softirq=2189/2189 fqs=2625
[ 59.900852] (detected by 5, t=5252 jiffies, g=1753, q=152)
//此处被5号cpu检测到。检测到1号cpu的rcu问题
//kernel/sched/core.c
void sched_show_task(struct task_struct *p)
{
unsigned long free = 0;
int ppid;
if (!try_get_task_stack(p))
return;
pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
if (p->state == TASK_RUNNING)
pr_cont(" running task ");
#ifdef CONFIG_DEBUG_STACK_USAGE
free = stack_not_used(p);
#endif
ppid = 0;
rcu_read_lock();
if (pid_alive(p))
ppid = task_pid_nr(rcu_dereference(p->real_parent));
rcu_read_unlock();
pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
free, task_pid_nr(p), ppid,
(unsigned long)task_thread_info(p)->flags);
print_worker_info(KERN_INFO, p);
show_stack(p, NULL, KERN_INFO);
put_task_stack(p);
}
EXPORT_SYMBOL_GPL(sched_show_task);
这里调用:kernel/rcu/tree_stall.h
static void rcu_check_gp_kthread_starvation(void)
{
struct task_struct *gpk = rcu_state.gp_kthread;
unsigned long j;
if (rcu_is_gp_kthread_starving(&j)) {
pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
rcu_state.name, j,
(long)rcu_seq_current(&rcu_state.gp_seq),
data_race(rcu_state.gp_flags),
gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
if (gpk) {
pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
pr_err("RCU grace-period kthread stack dump:\n");
sched_show_task(gpk); //调用这个打印rcu_sched 内核线程
wake_up_process(gpk);
}
}
}
为什么softlockup打印时间总是22s或23s ?
经常看到软锁打印的时间总是22或者23秒,难道时间不增加,难道中间得到缓解了?其实不是。
kernel:[ 9000.631301] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [softlockup_thre:1519]
q
[root@localhost linux-5.10]#
Message from syslogd@localhost at Jan 5 13:55:50 ...
kernel:[ 9064.631381] watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [softlockup_thre:1519]
Message from syslogd@localhost at Jan 5 13:56:50 ...
kernel:[ 9124.631487] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [softlockup_thre:1519]
Message from syslogd@localhost at Jan 5 13:57:54 ...
kernel:[ 9188.631505] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [softlockup_thre:1519]
5.10内核:watchdog_timer_fn里面有个参数soft_watchdog_warn默认为false ,在首次发生软锁后会设置为true,它的作用是为了不让同一个cpu发生软锁后,每4s定时器检测都报错。实际上duration时间会增加,只是后面不报了。等到一定时间后rcu检测机制调用print_cpu_stall_info->touch_nmi_watchdog->touch_softlockup_watchdog->touch_softlockup_watchdog_sched将时间戳设置为(SOFTLOCKUP_RESET ULONG_MAX)。
然后watchdog_timer_fn 会判断touch_ts == SOFTLOCKUP_RESET。然后__touch_watchdog()。重置时间戳。走下一轮时间判断。下一次打印又是22s或者23s。
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
if (!watchdog_enabled)
return HRTIMER_NORESTART;
/* kick the hardlockup detector */
watchdog_interrupt_count();
/* kick the softlockup detector */
if (completion_done(this_cpu_ptr(&softlockup_completion))) {
reinit_completion(this_cpu_ptr(&softlockup_completion));
stop_one_cpu_nowait(smp_processor_id(),
softlockup_fn, NULL,
this_cpu_ptr(&softlockup_stop_work));
}
/* .. and repeat */
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
if (touch_ts == SOFTLOCKUP_RESET) {
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
__this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
/* Clear the guest paused flag on watchdog reset */
kvm_check_and_clear_guest_paused();
__touch_watchdog();
return HRTIMER_RESTART;
}
/* check for a softlockup
* This is done by making sure a high priority task is
* being scheduled. The task touches the watchdog to
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*/
duration = is_softlockup(touch_ts);
if (unlikely(duration)) { //发生软锁超过20秒
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like a soft lockup, check to see if the host
* stopped the vm before we issue the warning
*/
if (kvm_check_and_clear_guest_paused())
return HRTIMER_RESTART;
/* only warn once */
if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
if (softlockup_all_cpu_backtrace) {
/* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping cpu back traces
*/
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
/* Someone else will report us. Let's give up */
__this_cpu_write(soft_watchdog_warn, true);
return HRTIMER_RESTART;
}
}
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
if (softlockup_all_cpu_backtrace) {
/* Avoid generating two back traces for current
* given that one is already made above
*/
trigger_allbutself_cpu_backtrace();
clear_bit(0, &soft_lockup_nmi_warn);
/* Barrier to sync with other cpus */
smp_mb__after_atomic();
}
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
} else
__this_cpu_write(soft_watchdog_warn, false);
return HRTIMER_RESTART;
}
log: 发送软锁必然发生rcu ??? 否则rcu怎么更新时间戳。。
softlockup 的定时器是4s钟。如果某个cpu发生softlockup,
[ 180.859790] ===is_softlockup set to 1
[ 184.617839] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 26
[ 188.617805] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 30
[ 192.617776] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 34
[ 196.617747] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 38
[ 200.617718] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 41
[ 204.617689] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 45
[ 208.617660] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 49
[ 212.617631] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 53
[ 215.803853] ===touch_softlockup_watchdog_sched at cpu 0 comm softlockup_thre
[ 215.812472] CPU: 0 PID: 1519 Comm: softlockup_thre Tainted: G O L 5.10.0-with-dc-g3d8cd6f7086d-dirty #83
[ 215.826085] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 215.833101] Call trace:
[ 215.837053] dump_backtrace+0x0/0x1b4
[ 215.842201] show_stack+0x24/0x70
[ 215.846967] dump_stack+0xd0/0x12c
[ 215.851770] touch_softlockup_watchdog_sched+0x70/0x80
[ 215.858319] touch_softlockup_watchdog+0x10/0x20
touch_nmi_watchdog
[ 215.864301] print_cpu_stall_info+0x64/0x258
[ 215.869920] rcu_sched_clock_irq+0x5e4/0x9d0
[ 215.875536] update_process_times+0x6c/0xb0
[ 215.881044] tick_sched_handle+0x3c/0x80
[ 215.886282] tick_sched_timer+0x58/0xb0
[ 215.891469] __hrtimer_run_queues+0x14c/0x390
[ 215.897167] hrtimer_interrupt+0xfc/0x290
[ 215.902523] arch_timer_handler_phys+0x40/0x50
[ 215.908308] handle_percpu_devid_irq+0x94/0x220
[ 215.914219] __handle_domain_irq+0x88/0xf0
[ 215.919683] gic_handle_irq+0xcc/0x150
[ 215.924806] el1_irq+0xc4/0x180
[ 215.929287] task+0xac/0xd8 [soft_lock_up_module]
[ 215.935330] kthread+0x130/0x134
[ 215.939899] ret_from_fork+0x10/0x30
[ 216.617603] ===is_softlockup_cpu0 1 will __touch_watchdog now touch_ts ffffffffffffffff
[ 216.628271] ===__touch_watchdog after softlockup at cpu 0 comm softlockup_thre
[ 216.638307] CPU: 0 PID: 1519 Comm: softlockup_thre Tainted: G O L 5.10.0-with-dc-g3d8cd6f7086d-dirty #83
[ 216.651684] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 216.658616] Call trace:
[ 216.662466] dump_backtrace+0x0/0x1b4
[ 216.667494] show_stack+0x24/0x70
[ 216.672171] dump_stack+0xd0/0x12c
[ 216.676922] __touch_watchdog+0x88/0x90
[ 216.682144] watchdog_timer_fn+0x170/0x37c
[ 216.687603] __hrtimer_run_queues+0x14c/0x390
[ 216.693297] hrtimer_interrupt+0xfc/0x290
[ 216.698656] arch_timer_handler_phys+0x40/0x50
[ 216.704468] handle_percpu_devid_irq+0x94/0x220
[ 216.710388] __handle_domain_irq+0x88/0xf0
[ 216.715855] gic_handle_irq+0xcc/0x150
[ 216.720971] el1_irq+0xc4/0x180
[ 216.725478] task+0xac/0xd8 [soft_lock_up_module]
[ 216.731546] kthread+0x130/0x134
[ 216.736142] ret_from_fork+0x10/0x30
[ 220.617573] ===is_softlockup set to 0
[ 224.617544] ===is_softlockup set to 0
[ 228.617516] ===is_softlockup set to 0
[ 232.617487] ===is_softlockup set to 0
[ 236.617459] ===is_softlockup set to 0
6.6版本的内核。增加通过period_ts变量来控制:但是在软锁时候依然需要rcu调用->touch_nmi_watchdog来更新period_ts:
依赖定时器检测核watchdog_timer_fn将softlockup_fn加入到migration/X ,等待下次调度是执行,并更新period_ts和touch_ts
is_softlockup:
if (time_after(now, period_ts + get_softlockup_thresh()))
{
时间搓 > 上一次上报softlockup+20s
}
watchdog_timer_fn
{
period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts));
touch_ts = __this_cpu_read(watchdog_touch_ts);
duration = is_softlockup(touch_ts, period_ts, now);
if(duration)
{
update_report_ts();//记录period_ts为当前时间
}
}
正常情况:
period_ts一直在更新,now 一直小于period_ts+20s: is_softlockup 返回false
如果period_ts没有更新,is_softlocku返回true。
打印watchdog
update_report_ts,导致在20s内不会重复打印watchdog
20s后。 继续打印。。
下面log:
touch_ts 在83秒,period_ts在105秒
...
===watchdog_timer_fn Y now 116 period_ts 105 touch_ts 83 duration 0 cpu 5
===watchdog_timer_fn Y now 120 period_ts 105 touch_ts 83 duration 0 cpu 5
===watchdog_timer_fn Y now 124 period_ts 105 touch_ts 83 duration 0 cpu 5
===watchdog_timer_fn Y now 127 period_ts 105 touch_ts 83 duration 44 cpu 5
atchdog: BUG: soft lockup - CPU#5 stuck for 44s! [hog:5341]
===watchdog_timer_fn Y now 131 period_ts 127 touch_ts 83 duration 0 5
另外NMI watchdog (touch_nmi_watchdog)也更新period_ts:
touch_nmi_watchdog->touch_softlockup_watchdog->touch_softlockup_watchdog_sched->SOFTLOCKUP_DELAY_REPORT
因此定时器watchdog_timer_fn 通常会快速判断。更新period_ts返回。这时候将不会判断是否发送softlockup。
应该touch_nmi_watchdog频率高于softlockup_fn,period_ts超前与touch_ts
且大多数时候是这个在更新,该函数是stop_machine里面执行调用的,参考multi_cpu_stop:
multi_cpu_stop->touch_nmi_watchdog->touch_softlockup_watchdog
[ 178.043687] ===touch_nmi_watchdog
[ 178.043687] CPU: 3 PID: 30 Comm: migration/3 Not tainted 6.6.88-virt-test-gb4a37d4b822a-dirty #26
[ 178.055839] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 178.061309] Stopper: multi_cpu_stop+0x0/0x1c8 <- stop_machine_cpuslocked+0x14c/0x1a0
[ 178.069039] Call trace:
[ 178.071472] dump_backtrace+0x9c/0x100
[ 178.075209] show_stack+0x20/0x40
[ 178.078512] dump_stack_lvl+0x60/0xb0
[ 178.082161] dump_stack+0x18/0x28
[ 178.085462] multi_cpu_stop+0x158/0x1c8
[ 178.089285] cpu_stopper_thread+0xc8/0x198
[ 178.093368] smpboot_thread_fn+0x140/0x190
[ 178.097453] kthread+0xe8/0xf8
[ 178.100494] ret_from_fork+0x10/0x20
[ 116.378009] ===touch_softlockup_watchdog_sched set SOFTLOCKUP_DELAY_REPORT
[ 116.378009] CPU: 0 PID: 17 Comm: migration/0 Not tainted 6.6.88-virt-test-gb4a37d4b822a-dirty #23
[ 116.393719] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 116.399189] Stopper: multi_cpu_stop+0x0/0x1a8 <- stop_machine_cpuslocked+0x14c/0x1a0
[ 116.406919] Call trace:
[ 116.409351] dump_backtrace+0x9c/0x100
[ 116.413089] show_stack+0x20/0x40
[ 116.416392] dump_stack_lvl+0x60/0xb0
[ 116.420041] dump_stack+0x18/0x28
[ 116.423343] touch_softlockup_watchdog_sched+0x60/0x88
[ 116.428467] touch_softlockup_watchdog+0x10/0x28
[ 116.433071] multi_cpu_stop+0x138/0x1a8
[ 116.436894] cpu_stopper_thread+0xc8/0x198
[ 116.440977] smpboot_thread_fn+0x140/0x190
[ 116.445061] kthread+0xe8/0xf8
[ 116.448103] ret_from_fork+0x10/0x20
6.6版本内核内核死循环构建软锁日志分析,核2:由于触发软锁,migration/2内核线程得不到调度,无法更新touch_ts,一直停留在145s,touch_nmi_watchdog只能由rcu来报。
代码分析显示20s判断超时。判断由period_ts决定。
time_after(now, period_ts + get_softlockup_thresh())
如果now > period_ts+20 成了,返回now-touch_ts;否则返回0
内核死循环下,migration/2内核线程得不到调度,无法更新touch_ts,一直停留在145s:
[ 164.276098] ===watchdog_timer_fn Y now 152 period_ts 149 touch_ts 145 duration 0 2
[ 168.275988] ===watchdog_timer_fn Y now 156 period_ts 149 touch_ts 145 duration 0 2
[ 172.275878] ===watchdog_timer_fn Y now 160 period_ts 149 touch_ts 145 duration 0 2
[ 176.275768] ===watchdog_timer_fn Y now 164 period_ts 149 touch_ts 145 duration 0 2 //此前一直未超过20s
[ 179.747674] rcu: INFO: rcu_sched self-detected stall on CPU //先中断softirq->rcu->更新period_ts为167s (print_cpu_stall_info->touch_nmi_watchdog)
[ 179.753233] ===print_cpu_stall_info cpu 2
[ 184.275549] ===watchdog_timer_fn Y now 171 period_ts 167 touch_ts 145 duration 0 2
[ 188.275439] ===watchdog_timer_fn Y now 175 period_ts 167 touch_ts 145 duration 0 2
[ 192.275329] ===watchdog_timer_fn Y now 179 period_ts 167 touch_ts 145 duration 0 2
[ 196.275220] ===watchdog_timer_fn Y now 182 period_ts 167 touch_ts 145 duration 0 2
[ 200.275110] ===watchdog_timer_fn Y now 186 period_ts 167 touch_ts 145 duration 0 2
[ 204.275000] ===watchdog_timer_fn Y now 190 period_ts 167 touch_ts 145 duration 45 2 //超过20s,返回now-touch_ts = 45s
[ 204.289750] watchdog: BUG: soft lockup - CPU#2 stuck for 45s! [hog:5267] //报watchdog,且更新period_ts 为190s
[ 208.274890] ===watchdog_timer_fn Y now 193 period_ts 190 touch_ts 145 duration 0 2 //更新后period_ts,重新判断duration是否大于20s
[ 212.274780] ===watchdog_timer_fn Y now 197 period_ts 190 touch_ts 145 duration 0 2
[ 216.274671] ===watchdog_timer_fn Y now 201 period_ts 190 touch_ts 145 duration 0 2
[ 220.274598] ===watchdog_timer_fn Y now 205 period_ts 190 touch_ts 145 duration 0 2
[ 224.274657] ===watchdog_timer_fn Y now 208 period_ts 190 touch_ts 145 duration 0 2
[ 228.274716] ===watchdog_timer_fn Y now 212 period_ts 190 touch_ts 145 duration 67 2 //超过20s,返回now-touch_ts = 67s
[ 228.289466] watchdog: BUG: soft lockup - CPU#2 stuck for 67s! [hog:5267] //报watchdog,且更新period_ts 为212s
[ 232.274774] ===watchdog_timer_fn Y now 216 period_ts 212 touch_ts 145 duration 0 2
[ 236.274833] ===watchdog_timer_fn Y now 220 period_ts 212 touch_ts 145 duration 0 2
[ 240.274892] ===watchdog_timer_fn Y now 223 period_ts 212 touch_ts 145 duration 0 2
[ 242.906931] rcu: INFO: rcu_sched self-detected stall on CPU //中断softirq->rcu->更新period_ts为227s
[ 242.912489] ===print_cpu_stall_info cpu 2
[ 248.275010] ===watchdog_timer_fn Y now 231 period_ts 227 touch_ts 145 duration 0 2
[ 252.275069] ===watchdog_timer_fn Y now 234 period_ts 227 touch_ts 145 duration 0 2
[ 256.275127] ===watchdog_timer_fn Y now 238 period_ts 227 touch_ts 145 duration 0 2
[ 260.275186] ===watchdog_timer_fn Y now 242 period_ts 227 touch_ts 145 duration 0 2
[ 264.275245] ===watchdog_timer_fn Y now 246 period_ts 227 touch_ts 145 duration 0 2
[ 268.275304] ===watchdog_timer_fn Y now 249 period_ts 227 touch_ts 145 duration 104 2 //超过20s,返回now-touch_ts = 104s
[ 268.290139] watchdog: BUG: soft lockup - CPU#2 stuck for 104s! [hog:5267] //报watchdog,且更新period_ts 为249s
[ 272.275363] ===watchdog_timer_fn Y now 253 period_ts 249 touch_ts 145 duration 0 2
[ 276.275421] ===watchdog_timer_fn Y now 257 period_ts 249 touch_ts 145 duration 0 2
[ 280.275480] ===watchdog_timer_fn Y now 261 period_ts 249 touch_ts 145 duration 0 2
[ 284.275535] ===watchdog_timer_fn Y now 264 period_ts 249 touch_ts 145 duration 0 2
[ 288.275544] ===watchdog_timer_fn Y now 268 period_ts 249 touch_ts 145 duration 0 2
[ 292.275553] ===watchdog_timer_fn Y now 272 period_ts 249 touch_ts 145 duration 127 2 //超过20s,返回now-touch_ts = 127s
[ 292.290391] watchdog: BUG: soft lockup - CPU#2 stuck for 127s! [hog:5267] //报watchdog,且更新period_ts 为272s
[ 296.275563] ===watchdog_timer_fn Y now 275 period_ts 272 touch_ts 145 duration 0 2
[ 300.275572] ===watchdog_timer_fn Y now 279 period_ts 272 touch_ts 145 duration 0 2
[ 304.275582] ===watchdog_timer_fn Y now 283 period_ts 272 touch_ts 145 duration 0 2
上述情形是开启irq,发送rcu的核和watchdog为相同的核。下面是另外一种情况:关闭irq。理论上关闭irq,watchdog在该核应该是不会有时钟检测。
关闭irq的死循环 日志:
[ 3173.164736] ===watchdog_timer_fn Y now 2955 period_ts 2951 touch_ts 2951 duration 0 1
[ 3173.168736] ===watchdog_timer_fn Y now 2955 period_ts 2951 touch_ts 2951 duration 0 2
[ 3173.176736] ===watchdog_timer_fn Y now 2955 period_ts 2951 touch_ts 2951 duration 0 3
[ 3173.180738] ===watchdog_timer_fn Y now 2955 period_ts 2951 touch_ts 2951 duration 0 4
[ 3173.184736] ===watchdog_timer_fn Y now 2955 period_ts 2951 touch_ts 2951 duration 0 5
[ 3173.188737] ===watchdog_timer_fn Y now 2955 period_ts 2951 touch_ts 2951 duration 0 6
[ 3173.192736] ===watchdog_timer_fn Y now 2955 period_ts 2951 touch_ts 2951 duration 0 7
[ 3177.164728] ===watchdog_timer_fn Y now 2958 period_ts 2955 touch_ts 2955 duration 0 1
[ 3177.168728] ===watchdog_timer_fn Y now 2958 period_ts 2955 touch_ts 2955 duration 0 2
[ 3177.176728] ===watchdog_timer_fn Y now 2958 period_ts 2955 touch_ts 2955 duration 0 3
[ 3177.180728] ===watchdog_timer_fn Y now 2958 period_ts 2955 touch_ts 2955 duration 0 4
[ 3177.184728] ===watchdog_timer_fn Y now 2958 period_ts 2955 touch_ts 2955 duration 0 5
[ 3177.188729] ===watchdog_timer_fn Y now 2958 period_ts 2955 touch_ts 2955 duration 0 6
[ 3177.192728] ===watchdog_timer_fn Y now 2958 period_ts 2955 touch_ts 2955 duration 0 7
[ 3181.164720] ===watchdog_timer_fn Y now 2962 period_ts 2958 touch_ts 2958 duration 0 1
[ 3181.168720] ===watchdog_timer_fn Y now 2962 period_ts 2958 touch_ts 2958 duration 0 2
[ 3181.176720] ===watchdog_timer_fn Y now 2962 period_ts 2958 touch_ts 2958 duration 0 3
[ 3181.180718] ===watchdog_timer_fn Y now 2962 period_ts 2955 touch_ts 2955 duration 0 4
[ 3181.184720] ===watchdog_timer_fn Y now 2962 period_ts 2958 touch_ts 2958 duration 0 5
[ 3181.188720] ===watchdog_timer_fn Y now 2962 period_ts 2959 touch_ts 2959 duration 0 6
[ 3181.192720] ===watchdog_timer_fn Y now 2962 period_ts 2958 touch_ts 2958 duration 0 7
[ 3185.164712] ===watchdog_timer_fn Y now 2966 period_ts 2962 touch_ts 2962 duration 0 1
[ 3185.168712] ===watchdog_timer_fn Y now 2966 period_ts 2962 touch_ts 2962 duration 0 2
[ 3185.176712] ===watchdog_timer_fn Y now 2966 period_ts 2962 touch_ts 2962 duration 0 3
[ 3185.180710] ===watchdog_timer_fn Y now 2966 period_ts 2955 touch_ts 2955 duration 0 4
[ 3185.184712] ===watchdog_timer_fn Y now 2966 period_ts 2962 touch_ts 2962 duration 0 5
[ 3185.188712] ===watchdog_timer_fn Y now 2966 period_ts 2962 touch_ts 2962 duration 0 6
[ 3185.192712] ===watchdog_timer_fn Y now 2966 period_ts 2962 touch_ts 2962 duration 0 7
首先rcu的报错,显示cpu 0 出现问题,被其它核 5 检测到。且上面日志中的watchdog_timer_fn确实没有上报核0的,证明关闭了中断不会有watchdog定时器:
[ 3187.996705] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[ 3188.002612] ===print_cpu_stall_info cpu 0
[ 3188.006695] rcu: 0-...0: (0 ticks this GP) idle=fb0c/1/0x4000000000000000 softirq=6941/6941 fqs=1781
[ 3188.015902] rcu: (detected by 5, t=5256 jiffies, g=13437, q=799 ncpus=8)
[ 3188.022675] Task dump for CPU 0:
[ 3188.025889] task:hog state:R running task stack:0 pid:2551 ppid:2 flags:0x0000020a
[ 3188.035791] Call trace:
[ 3188.038224] __switch_to+0xd8/0x118
核4出现watchdog报错,且调用栈为smp_call_function_many_cond,说明在调用核0的时候一直没有得到响应。
[ 3197.180686] ===watchdog_timer_fn Y now 2977 period_ts 2955 touch_ts 2955 duration 22 4
[ 3197.196031] watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [khugepaged:63]
[ 3197.244572] CPU: 4 PID: 63 Comm: khugepaged Kdump: loaded Tainted: G O 6.6.88-virt-test-gb4a37d4b822a-dirty #27
[ 3197.256032] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 3197.261502] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 3197.268450] pc : smp_call_function_many_cond+0x154/0x5c0
[ 3197.273836] lr : smp_call_function_many_cond+0x114/0x5c0