摘自:http://blog.chinaunix.net/xmlrpc.php?r=blog/article&uid=14528823&id=4215888
softlockup(watchdog)用于檢測系統(tǒng)調度是否正常,即軟鎖的情況,當發(fā)生softlockup時,內核不能調度,但還能響應中斷,對用戶的表現(xiàn)可能為:能ping通,但無法登陸系統(tǒng),無法進行正常操作。
其基本原理為:為每個CPU啟動一個內核線程(watchdog/x),此線程為優(yōu)先級最高的實時線程,在該線程得到調度時,會更新相應的計數(shù)(時間戳),同時會啟動定時器,當定時器到期時檢查相應的時間戳,如果超過指定時間,都沒有更新,則說明這段時間內都沒有發(fā)生調度(因為此線程優(yōu)先級最高),則打印相應告警或根據配置可以進入panic流程。
基本代碼分析(2.6.32)
rest_init->kernel_init->lockup_detector_init->cpu_callback->watchdog_prepare_cpu(初始化watchdog定時器):
- static int watchdog_prepare_cpu(int cpu)
- {
- struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
-
- WARN_ON(per_cpu(softlockup_watchdog, cpu));
- hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);//初始化高精度定時器
- hrtimer->function = watchdog_timer_fn;//設置定時器處理函數(shù)
-
- return 0;
- }
看門狗定時器處理函數(shù):
- static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
- {
- //獲取計數(shù)watchdog_touch_ts,該計數(shù)在watchdog內核線程被調度時更新
- unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
- struct pt_regs *regs = get_irq_regs();
- int duration;
-
- /* kick the hardlockup detector */
- //增加中斷計數(shù),證明沒有發(fā)生硬鎖(關中斷死鎖)
- watchdog_interrupt_count();
-
- /* kick the softlockup detector */
- //喚醒wathdog內核線程
- wake_up_process(__get_cpu_var(softlockup_watchdog));
-
- /* .. and repeat */
- //重啟定時器
- hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
- if (touch_ts == 0) {
- if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
- /*
- * If the time stamp was touched atomically
- * make sure the scheduler tick is up to date.
- */
- __get_cpu_var(softlockup_touch_sync) = false;
- sched_clock_tick();
- }
- __touch_watchdog();
- return HRTIMER_RESTART;
- }
-
- /* check for a softlockup
- * This is done by making sure a high priority task is
- * being scheduled. The task touches the watchdog to
- * indicate it is getting cpu time. If it hasn't then
- * this is a good indication some task is hogging the cpu
- */
- //判斷是否發(fā)生了軟鎖,原理是判斷touch_ts(時間戳)是否超過一定時間沒有更新
- duration = is_softlockup(touch_ts);
- if (unlikely(duration)) {
- /* only warn once */
- if (__get_cpu_var(soft_watchdog_warn) == true)
- return HRTIMER_RESTART;
- //發(fā)生了軟鎖后,進行一些列的信息記錄和告警。
- printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
- smp_processor_id(), duration,
- current->comm, task_pid_nr(current));
- print_modules();
- print_irqtrace_events(current);
- if (regs)
- show_regs(regs);
- else
- dump_stack();
- //如果配置了softlockup_panic(proc中配置),則panic
- if (softlockup_panic)
- panic("softlockup: hung tasks");
- __get_cpu_var(soft_watchdog_warn) = true;
- } else
- __get_cpu_var(soft_watchdog_warn) = false;
-
- return HRTIMER_RESTART;
- }
啟動看門狗,即創(chuàng)建watchdog內核線程。
- static int watchdog_enable(int cpu)
- {
- struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
- int err = 0;
-
- /* enable the perf event */
- err = watchdog_nmi_enable(cpu);
-
- /* Regardless of err above, fall through and start softlockup */
-
- /* create the watchdog thread */
- if (!p) {
- //創(chuàng)建watchdog內核線程
- p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
- if (IS_ERR(p)) {
- printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
- if (!err)
- /* if hardlockup hasn't already set this */
- err = PTR_ERR(p);
- goto out;
- }
- kthread_bind(p, cpu);
- per_cpu(watchdog_touch_ts, cpu) = 0;
- per_cpu(softlockup_watchdog, cpu) = p;
- wake_up_process(p);
- }
-
- out:
- return err;
- }
watchdog內核線程執(zhí)行主函數(shù),主要是要更新計數(shù)(時間戳)
- static int watchdog(void *unused)
- {
- //設置為最高優(yōu)先級
- struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
- struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
- //設置為實時線程
- sched_setscheduler(current, SCHED_FIFO, ¶m);
-
- /* initialize timestamp */
- //初始化計數(shù)(時間戳)
- __touch_watchdog();
-
- /* kick off the timer for the hardlockup detector */
- /* done here because hrtimer_start can only pin to smp_processor_id() */
- //啟動定時器,用于檢測是否發(fā)生軟鎖
- hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
- HRTIMER_MODE_REL_PINNED);
- //睡眠
- set_current_state(TASK_INTERRUPTIBLE);
- /*
- * Run briefly once per second to reset the softlockup timestamp.
- * If this gets delayed for more than 60 seconds then the
- * debug-printout triggers in watchdog_timer_fn().
- */
- while (!kthread_should_stop()) {
- //更新計數(shù)
- __touch_watchdog();
- schedule();
-
- if (kthread_should_stop())
- break;
-
- set_current_state(TASK_INTERRUPTIBLE);
- }
- __set_current_state(TASK_RUNNING);
-
- return 0;
- }
判斷是否發(fā)生軟鎖:is_softlockup
- static int is_softlockup(unsigned long touch_ts)
- {
- unsigned long now = get_timestamp(smp_processor_id());
-
- /* Warn about unreasonable delays: */
- //檢測計數(shù)多久沒有更新了,如果超過了60s,則表示發(fā)生了軟鎖
- if (time_after(now, touch_ts + softlockup_thresh))
- return now - touch_ts;
-
- return 0;
- }
本站僅提供存儲服務,所有內容均由用戶發(fā)布,如發(fā)現(xiàn)有害或侵權內容,請
點擊舉報。