原理:
软锁需要开启CONFIG_LOCKUP_DETECTOR内核选项,源码kernel/watchdog.c。
情况宋宝华的博文:Linux内核为什么会发生soft lockup?-优快云博客
watchdog_timer_fn :per_cpu 定时器回调
判断(softlockup_completion) 已经completion ?
yes: stop_one_cpu_nowait //借用stop_machine 内核线程来工作(migration/X),工作函数softlockup_fn
cpu_stop_queue_work //将cpu_stop_work 插入,
wake_up_q //并唤醒 wake_q_head关联的stop_machine内核线程(migration/X)
no:
检测是否超过20s ?
工作函数:softlockup_fn
static int softlockup_fn(void *data)
{
__this_cpu_write(soft_lockup_hrtimer_cnt,
__this_cpu_read(hrtimer_interrupts));
__touch_watchdog();
complete(this_cpu_ptr(&softlockup_completion)); //complete
return 0;
}
文章提到:在开启 CONFIG_PREEMPT=y 的时候,且没有关闭本地irq的时候,在内核中断el1_irq:
SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
kernel_entry 1
gic_prio_irq_setup pmr=x20, tmp=x1
enable_da_f
mov x0, sp
bl enter_el1_irq_or_nmi
irq_handler
#ifdef CONFIG_PREEMPTION
ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
/*
* DA_F were cleared at start of handling. If anything is set in DAIF,
* we come back from an NMI, so skip preemption
*/
mrs x0, daif
orr x24, x24, x0
alternative_else_nop_endif
cbnz x24, 1f // preempt count != 0 || NMI return path
bl arm64_preempt_schedule_irq // irq en/disable is done inside
1:
#endif
mov x0, sp
bl exit_el1_irq_or_nmi
kernel_exit 1
SYM_CODE_END(el1_irq)
当发生中断时,会执行el1_irq来处理中断,来读取当前任务的thread_info.preempt_count 判断thread_info.preempt_count是否为0,如果为0 则调用arm64_preempt_schedule_irq 进行抢占式调度。 参考本人的实验:深入理解Linux内核之内核抢占-优快云博客
下面看下抢占式调度:
arm64_preempt_schedule_irq
->preempt_schedule_irq
->__schedule(true) //调用主调度器进行抢占式调度
需要满足可抢占条件:
asmlinkage __visible void __sched preempt_schedule_irq(void)
{
enum ctx_state prev_state;
/* Catch callers which need to be fixed */
BUG_ON(preempt_count() || !irqs_disabled());
prev_state = exception_enter();
do {
preempt_disable();
local_irq_enable();
__schedule(true);
local_irq_disable();
sched_preempt_enable_no_resched();
} while (need_resched());
exception_exit(prev_state);
}
打开抢占的时候
开启抢占:
preempt_enable
->if (unlikely(preempt_count_dec_and_test())) \ //抢占计数器减一 为0
__preempt_schedule(); \
->preempt_schedule //kernel/sched/core.c
-> __schedule(true) //调用主调度器进行抢占式调度
在打开抢占PREEMPT的内核中,while循环将不会有任何问题。如果使用关闭抢占,比如通过自旋锁的方式会如何呢?
打开本地irq的自旋锁:
lock_up_module.c
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/spinlock.h>
MODULE_LICENSE("GPL");
static int hog_thread(void *data) {
static DEFINE_SPINLOCK(lock);
unsigned long flags;
printk(KERN_INFO "Hogging a CPU now task %s %d \n",current->comm,smp_processoo
r_id());
//spin_lock_irqsave(&lock, flags);
spin_lock(&lock);
while (1)
;
spin_unlock(&lock);
//spin_unlock_irqrestore(&lock,flags);
/* unreached */
printk(KERN_INFO "Hogging a CPU end %d \n",smp_processor_id());
return 0;
}
static int __init hog_init(void) {
kthread_run(&hog_thread, NULL, "hog");
return 0;
}
module_init(hog_init);
MODULE_LICENSE("GPL v2"); // 表示模块代码接受的软件许可协议
MODULE_AUTHOR("www"); // 描述模块的作者信息
MODULE_DESCRIPTION("soft lock up module"); // 对模块的简单介绍
MODULE_ALIAS("soft_lock_up_module"); // 给模块设置一个别名
上面的代码,即时内核开启 CONFIG_PREEMPT 报rcu错误和softlockup错误:该rcu是self-detected自我检测print_cpu_stall
[ 4085.374398] rcu: INFO: rcu_preempt self-detected stall on CPU
[ 4085.381455] rcu: 0-....: (20972 ticks this GP) idle=39e/1/0x4000000000000002 softirq=9507/9507 fqs=8514
[ 4085.393648] (t=21003 jiffies g=14621 q=9063)
[ 4085.399292] Task dump for CPU 0:
[ 4085.403795] task:softlockup_thre state:R running task stack: 0 pid: 1878 ppid: 2 flags:0x0000022a
[ 4085.416336] Call trace:
[ 4085.420054] dump_backtrace+0x0/0x1b4
[ 4085.424940] show_stack+0x24/0x80
[ 4085.429504] sched_show_task+0x150/0x17c
[ 4085.434698] dump_cpu_task+0x50/0x60
print_cpu_stall
check_cpu_stall
rcu_pending
[ 4085.439509] rcu_dump_cpu_stacks+0xb8/0xf8
[ 4085.444882] rcu_sched_clock_irq+0xb34/0xe54
[ 4085.450446] update_process_times+0x6c/0xb0
[ 4085.455890] tick_sched_handle+0x3c/0x60
[ 4085.461074] tick_sched_timer+0x58/0xb0
[ 4085.466149] __hrtimer_run_queues+0x18c/0x3cc
[ 4085.471751] hrtimer_interrupt+0xf4/0x29c
[ 4085.476990] arch_timer_handler_phys+0x40/0x50
[ 4085.482685] handle_percpu_devid_irq+0x94/0x290
[ 4085.488461] __handle_domain_irq+0x88/0xec
[ 4085.493818] gic_handle_irq+0xcc/0x14c
[ 4085.498826] el1_irq+0xc4/0x180
[ 4085.503218] task+0x7c/0x98 [soft_lock_up_module]
[ 4085.509172] kthread+0x164/0x16c
[ 4085.513668] ret_from_fork+0x10/0x3c
[ 240.615578] watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [hog:1473]
[ 240.623623] Modules linked in: hard_lock_up_module(O) ip6table_filter ip6_tables iptable_filter ip_tables x_tables spidev drm fuse cdc_ncm cdc_ether crct10dif_ce ax88179_178a usbnet rtc_ds1307 ipv6
[ 240.645338] CPU: 2 PID: 1473 Comm: hog Tainted: G O 5.10.0-with-dc-g1c56f5c219bc-dirty #38
[ 240.657609] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 240.664479] pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--)
[ 240.671887] pc : hog_thread+0x4c/0xfff0 [hard_lock_up_module]
[ 240.678995] lr : hog_thread+0x4c/0xfff0 [hard_lock_up_module]
[ 240.686111] sp : ffff80001654fe30
[ 240.690804] x29: ffff80001654fe30 x28: 0000000000000000
[ 240.697494] x27: ffff001f809c1fc8 x26: ffff800012828688
[ 240.704200] x25: ffff8000115d1840 x24: 0000000000000000
[ 240.710921] x23: ffff800009680010 x22: ffff8000166efa78
[ 240.717621] x21: ffff001f8d0bc800 x20: ffff001f809c1f80
[ 240.724303] x19: ffff800011a20000 x18: 0000000000000030
[ 240.731006] x17: 0000000000000000 x16: 0000000000000000
[ 240.737702] x15: 0000000000000001 x14: ffff80009654fb37
[ 240.744392] x13: 0000000000000a00 x12: ffff80001116f000
[ 240.751092] x11: 0000000000000001 x10: 0000000000000a00
[ 240.757761] x9 : ffff80000968005c x8 : ffff001f8d0bd260
[ 240.764474] x7 : 0000000000000000 x6 : 0000000000000000
[ 240.771192] x5 : 0000000000000000 x4 : 0000000000000000
[ 240.777910] x3 : ffff8000096a03c0 x2 : 0000000000000001
[ 240.784617] x1 : 0000000000000000 x0 : 0000000000000000
[ 240.791329] Call trace:
[ 240.795153] hog_thread+0x4c/0xfff0 [hard_lock_up_module]
[ 240.801947] kthread+0x164/0x16c
[ 240.806562] ret_from_fork+0x10/0x3c
Linux 内核分析 rcu_sched self-detected stall on CPU_小立爱学习的博客-优快云博客
spin_lock最终会调用preempt_disable:关闭抢占,此时CONFIG_PREEMPT=y。不满足preempt_schedule_irq抢占,导致无法进程调度。
static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
关闭本地irq的自旋锁:
demo驱动:
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/spinlock.h>
MODULE_LICENSE("GPL");
static int hog_thread(void *data) {
static DEFINE_SPINLOCK(lock);
unsigned long flags;
printk(KERN_INFO "Hogging a CPU now task %s %d \n",current->comm,smp_processoo
r_id());
spin_lock_irqsave(&lock, flags);
//spin_lock(&lock);
while (1)
;
//spin_unlock(&lock);
spin_unlock_irqrestore(&lock,flags);
/* unreached */
printk(KERN_INFO "Hogging a CPU end %d \n",smp_processor_id());
return 0;
}
static int __init hog_init(void) {
kthread_run(&hog_thread, NULL, "hog");
return 0;
}
module_init(hog_init);
MODULE_LICENSE("GPL v2"); // 表示模块代码接受的软件许可协议
MODULE_AUTHOR("www"); // 描述模块的作者信息
MODULE_DESCRIPTION("soft lock up module"); // 对模块的简单介绍
MODULE_ALIAS("soft_lock_up_module"); // 给模块设置一个别名
内核日志:此时rcu是 依靠其它cpu检查print_other_cpu_stall。因为本地irq关闭了,softlockup也检测不到了。在print_other_cpu_stall中加入dump_stack以观察其调用栈:
[ 38.779118] hard_lock_up_module: loading out-of-tree module taints kernel.
[ 38.786519] hot init task insmod 6
[ 38.786524] Hogging a CPU now task hog 1 //内核线程在1号cpu
[ 59.787816] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G O 5.10.0-with-dc-g1c56f5c219bc-dirty #39
[ 59.797713] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 59.803229] Call trace:
[ 59.805689] dump_backtrace+0x0/0x1b4
[ 59.809369] show_stack+0x24/0x80
[ 59.812701] dump_stack+0xd0/0x12c
print_other_cpu_stall
check_cpu_stall
rcu_pending
[ 59.816120] rcu_sched_clock_irq+0x5f0/0xe64
[ 59.820413] update_process_times+0x6c/0xb0
[ 59.824618] tick_sched_handle+0x38/0x74
[ 59.828560] tick_sched_timer+0x58/0xb0
[ 59.832413] __hrtimer_run_queues+0x18c/0x3cc
[ 59.836791] hrtimer_interrupt+0xf4/0x29c
[ 59.840823] arch_timer_handler_phys+0x40/0x50
[ 59.845291] handle_percpu_devid_irq+0x94/0x290
[ 59.849845] __handle_domain_irq+0x88/0xec
[ 59.853962] gic_handle_irq+0xcc/0x14c
[ 59.857729] el1_irq+0xc4/0x180
[ 59.860884] arch_cpu_idle+0x18/0x30
[ 59.864478] default_idle_call+0x4c/0x1e4
[ 59.868508] do_idle+0x238/0x2b0
[ 59.871751] cpu_startup_entry+0x30/0x7c
[ 59.875694] secondary_start_kernel+0x144/0x190 //这里是加了dump_stack的输出
[ 59.881517] rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
[ 59.888999] rcu: 1-...0: (1 ticks this GP) idle=f4e/1/0x4000000000000000 softirq=2189/2189 fqs=2625
[ 59.900852] (detected by 5, t=5252 jiffies, g=1753, q=152)
//此处被5号cpu检测到。检测到1号cpu的rcu问题
//kernel/sched/core.c
void sched_show_task(struct task_struct *p)
{
unsigned long free = 0;
int ppid;
if (!try_get_task_stack(p))
return;
pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
if (p->state == TASK_RUNNING)
pr_cont(" running task ");
#ifdef CONFIG_DEBUG_STACK_USAGE
free = stack_not_used(p);
#endif
ppid = 0;
rcu_read_lock();
if (pid_alive(p))
ppid = task_pid_nr(rcu_dereference(p->real_parent));
rcu_read_unlock();
pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
free, task_pid_nr(p), ppid,
(unsigned long)task_thread_info(p)->flags);
print_worker_info(KERN_INFO, p);
show_stack(p, NULL, KERN_INFO);
put_task_stack(p);
}
EXPORT_SYMBOL_GPL(sched_show_task);
这里调用:kernel/rcu/tree_stall.h
static void rcu_check_gp_kthread_starvation(void)
{
struct task_struct *gpk = rcu_state.gp_kthread;
unsigned long j;
if (rcu_is_gp_kthread_starving(&j)) {
pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
rcu_state.name, j,
(long)rcu_seq_current(&rcu_state.gp_seq),
data_race(rcu_state.gp_flags),
gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
if (gpk) {
pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
pr_err("RCU grace-period kthread stack dump:\n");
sched_show_task(gpk); //调用这个打印rcu_sched 内核线程
wake_up_process(gpk);
}
}
}
为什么softlockup打印时间总是22s或23s ?
经常看到软锁打印的时间总是22或者23秒,难道时间不增加,难道中间得到缓解了?其实不是。
kernel:[ 9000.631301] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [softlockup_thre:1519]
q
[root@localhost linux-5.10]#
Message from syslogd@localhost at Jan 5 13:55:50 ...
kernel:[ 9064.631381] watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [softlockup_thre:1519]
Message from syslogd@localhost at Jan 5 13:56:50 ...
kernel:[ 9124.631487] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [softlockup_thre:1519]
Message from syslogd@localhost at Jan 5 13:57:54 ...
kernel:[ 9188.631505] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [softlockup_thre:1519]
watchdog_timer_fn里面有个参数soft_watchdog_warn默认为false ,在首次发生软锁后会设置为true,它的作用是为了不让同一个cpu发生软锁后,每4s定时器检测都报错。实际上duration时间会增加,只是后面不报了。等到一定时间后rcu检测机制调用print_cpu_stall_info->touch_nmi_watchdog->touch_softlockup_watchdog->touch_softlockup_watchdog_sched将时间戳设置为(SOFTLOCKUP_RESET ULONG_MAX)。
然后watchdog_timer_fn 会判断touch_ts == SOFTLOCKUP_RESET。然后__touch_watchdog()。重置时间戳。走下一轮时间判断。下一次打印又是22s或者23s。
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
if (!watchdog_enabled)
return HRTIMER_NORESTART;
/* kick the hardlockup detector */
watchdog_interrupt_count();
/* kick the softlockup detector */
if (completion_done(this_cpu_ptr(&softlockup_completion))) {
reinit_completion(this_cpu_ptr(&softlockup_completion));
stop_one_cpu_nowait(smp_processor_id(),
softlockup_fn, NULL,
this_cpu_ptr(&softlockup_stop_work));
}
/* .. and repeat */
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
if (touch_ts == SOFTLOCKUP_RESET) {
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
__this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
/* Clear the guest paused flag on watchdog reset */
kvm_check_and_clear_guest_paused();
__touch_watchdog();
return HRTIMER_RESTART;
}
/* check for a softlockup
* This is done by making sure a high priority task is
* being scheduled. The task touches the watchdog to
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*/
duration = is_softlockup(touch_ts);
if (unlikely(duration)) { //发生软锁超过20秒
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like a soft lockup, check to see if the host
* stopped the vm before we issue the warning
*/
if (kvm_check_and_clear_guest_paused())
return HRTIMER_RESTART;
/* only warn once */
if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
if (softlockup_all_cpu_backtrace) {
/* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping cpu back traces
*/
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
/* Someone else will report us. Let's give up */
__this_cpu_write(soft_watchdog_warn, true);
return HRTIMER_RESTART;
}
}
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
if (softlockup_all_cpu_backtrace) {
/* Avoid generating two back traces for current
* given that one is already made above
*/
trigger_allbutself_cpu_backtrace();
clear_bit(0, &soft_lockup_nmi_warn);
/* Barrier to sync with other cpus */
smp_mb__after_atomic();
}
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
} else
__this_cpu_write(soft_watchdog_warn, false);
return HRTIMER_RESTART;
}
log:
softlockup 的定时器是4s钟。如果某个cpu发生softlockup,
[ 180.859790] ===is_softlockup set to 1
[ 184.617839] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 26
[ 188.617805] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 30
[ 192.617776] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 34
[ 196.617747] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 38
[ 200.617718] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 41
[ 204.617689] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 45
[ 208.617660] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 49
[ 212.617631] ===watchdog_timer_fn at cpu 0 comm softlockup_thre soft_watchdog_warn 1 duration 53
[ 215.803853] ===touch_softlockup_watchdog_sched at cpu 0 comm softlockup_thre
[ 215.812472] CPU: 0 PID: 1519 Comm: softlockup_thre Tainted: G O L 5.10.0-with-dc-g3d8cd6f7086d-dirty #83
[ 215.826085] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 215.833101] Call trace:
[ 215.837053] dump_backtrace+0x0/0x1b4
[ 215.842201] show_stack+0x24/0x70
[ 215.846967] dump_stack+0xd0/0x12c
[ 215.851770] touch_softlockup_watchdog_sched+0x70/0x80
[ 215.858319] touch_softlockup_watchdog+0x10/0x20
touch_nmi_watchdog
[ 215.864301] print_cpu_stall_info+0x64/0x258
[ 215.869920] rcu_sched_clock_irq+0x5e4/0x9d0
[ 215.875536] update_process_times+0x6c/0xb0
[ 215.881044] tick_sched_handle+0x3c/0x80
[ 215.886282] tick_sched_timer+0x58/0xb0
[ 215.891469] __hrtimer_run_queues+0x14c/0x390
[ 215.897167] hrtimer_interrupt+0xfc/0x290
[ 215.902523] arch_timer_handler_phys+0x40/0x50
[ 215.908308] handle_percpu_devid_irq+0x94/0x220
[ 215.914219] __handle_domain_irq+0x88/0xf0
[ 215.919683] gic_handle_irq+0xcc/0x150
[ 215.924806] el1_irq+0xc4/0x180
[ 215.929287] task+0xac/0xd8 [soft_lock_up_module]
[ 215.935330] kthread+0x130/0x134
[ 215.939899] ret_from_fork+0x10/0x30
[ 216.617603] ===is_softlockup_cpu0 1 will __touch_watchdog now touch_ts ffffffffffffffff
[ 216.628271] ===__touch_watchdog after softlockup at cpu 0 comm softlockup_thre
[ 216.638307] CPU: 0 PID: 1519 Comm: softlockup_thre Tainted: G O L 5.10.0-with-dc-g3d8cd6f7086d-dirty #83
[ 216.651684] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 216.658616] Call trace:
[ 216.662466] dump_backtrace+0x0/0x1b4
[ 216.667494] show_stack+0x24/0x70
[ 216.672171] dump_stack+0xd0/0x12c
[ 216.676922] __touch_watchdog+0x88/0x90
[ 216.682144] watchdog_timer_fn+0x170/0x37c
[ 216.687603] __hrtimer_run_queues+0x14c/0x390
[ 216.693297] hrtimer_interrupt+0xfc/0x290
[ 216.698656] arch_timer_handler_phys+0x40/0x50
[ 216.704468] handle_percpu_devid_irq+0x94/0x220
[ 216.710388] __handle_domain_irq+0x88/0xf0
[ 216.715855] gic_handle_irq+0xcc/0x150
[ 216.720971] el1_irq+0xc4/0x180
[ 216.725478] task+0xac/0xd8 [soft_lock_up_module]
[ 216.731546] kthread+0x130/0x134
[ 216.736142] ret_from_fork+0x10/0x30
[ 220.617573] ===is_softlockup set to 0
[ 224.617544] ===is_softlockup set to 0
[ 228.617516] ===is_softlockup set to 0
[ 232.617487] ===is_softlockup set to 0
[ 236.617459] ===is_softlockup set to 0