Commit bb81a09e authored by Andrew Morton's avatar Andrew Morton Committed by Andi Kleen

[PATCH] x86: all cpu backtrace

When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu
backtrace, so we get to see which CPU is holding the lock, and where.

Cc: Andi Kleen <ak@muc.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
parent e5e3a042
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/dmi.h> #include <linux/dmi.h>
#include <linux/kprobes.h> #include <linux/kprobes.h>
#include <linux/cpumask.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/nmi.h> #include <asm/nmi.h>
...@@ -42,6 +43,8 @@ int nmi_watchdog_enabled; ...@@ -42,6 +43,8 @@ int nmi_watchdog_enabled;
static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
static cpumask_t backtrace_mask = CPU_MASK_NONE;
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
* offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
*/ */
...@@ -907,6 +910,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) ...@@ -907,6 +910,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
touched = 1; touched = 1;
} }
if (cpu_isset(cpu, backtrace_mask)) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
printk("NMI backtrace for cpu %d\n", cpu);
dump_stack();
spin_unlock(&lock);
cpu_clear(cpu, backtrace_mask);
}
sum = per_cpu(irq_stat, cpu).apic_timer_irqs; sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
/* if the apic timer isn't firing, this cpu isn't doing much */ /* if the apic timer isn't firing, this cpu isn't doing much */
...@@ -1033,6 +1046,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, ...@@ -1033,6 +1046,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
#endif #endif
void __trigger_all_cpu_backtrace(void)
{
int i;
backtrace_mask = cpu_online_map;
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
if (cpus_empty(backtrace_mask))
break;
mdelay(1);
}
}
EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(nmi_watchdog);
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
......
...@@ -12,14 +12,15 @@ ...@@ -12,14 +12,15 @@
* Mikael Pettersson : PM converted to driver model. Disable/enable API. * Mikael Pettersson : PM converted to driver model. Disable/enable API.
*/ */
#include <linux/nmi.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <linux/nmi.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/kprobes.h> #include <linux/kprobes.h>
#include <linux/cpumask.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/nmi.h> #include <asm/nmi.h>
...@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi; ...@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi;
static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
static cpumask_t backtrace_mask = CPU_MASK_NONE;
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
* offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
*/ */
...@@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) ...@@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
{ {
int sum; int sum;
int touched = 0; int touched = 0;
int cpu = smp_processor_id();
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
u64 dummy; u64 dummy;
int rc=0; int rc=0;
...@@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) ...@@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
touched = 1; touched = 1;
} }
if (cpu_isset(cpu, backtrace_mask)) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
printk("NMI backtrace for cpu %d\n", cpu);
dump_stack();
spin_unlock(&lock);
cpu_clear(cpu, backtrace_mask);
}
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
/* Could check oops_in_progress here too, but it's safer /* Could check oops_in_progress here too, but it's safer
not too */ not too */
...@@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, ...@@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
#endif #endif
void __trigger_all_cpu_backtrace(void)
{
int i;
backtrace_mask = cpu_online_map;
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
if (cpus_empty(backtrace_mask))
break;
mdelay(1);
}
}
EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(nmi_watchdog);
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
#define ASM_NMI_H #define ASM_NMI_H
#include <linux/pm.h> #include <linux/pm.h>
#include <asm/irq.h>
#ifdef ARCH_HAS_NMI_WATCHDOG
/** /**
* do_nmi_callback * do_nmi_callback
...@@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, ...@@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
void __user *, size_t *, loff_t *); void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic; extern int unknown_nmi_panic;
void __trigger_all_cpu_backtrace(void);
#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
#endif
#endif /* ASM_NMI_H */ #endif /* ASM_NMI_H */
...@@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, ...@@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
extern int unknown_nmi_panic; extern int unknown_nmi_panic;
void __trigger_all_cpu_backtrace(void);
#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
#endif /* ASM_NMI_H */ #endif /* ASM_NMI_H */
...@@ -15,9 +15,14 @@ ...@@ -15,9 +15,14 @@
* disables interrupts for a long time. This call is stateless. * disables interrupts for a long time. This call is stateless.
*/ */
#ifdef ARCH_HAS_NMI_WATCHDOG #ifdef ARCH_HAS_NMI_WATCHDOG
#include <asm/nmi.h>
extern void touch_nmi_watchdog(void); extern void touch_nmi_watchdog(void);
#else #else
# define touch_nmi_watchdog() touch_softlockup_watchdog() # define touch_nmi_watchdog() touch_softlockup_watchdog()
#endif #endif
#ifndef trigger_all_cpu_backtrace
#define trigger_all_cpu_backtrace() do { } while (0)
#endif
#endif #endif
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
*/ */
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/nmi.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/debug_locks.h> #include <linux/debug_locks.h>
#include <linux/delay.h> #include <linux/delay.h>
...@@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock) ...@@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock)
raw_smp_processor_id(), current->comm, raw_smp_processor_id(), current->comm,
current->pid, lock); current->pid, lock);
dump_stack(); dump_stack();
#ifdef CONFIG_SMP
trigger_all_cpu_backtrace();
#endif
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment