Commit 1d991001 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'x86/mce3' into x86/urgent

parents bc3f5d3d b1f49f95
...@@ -102,15 +102,39 @@ struct mce_log { ...@@ -102,15 +102,39 @@ struct mce_log {
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/percpu.h>
#include <linux/init.h>
#include <asm/atomic.h>
extern int mce_disabled; extern int mce_disabled;
extern int mce_p5_enabled;
#include <asm/atomic.h> #ifdef CONFIG_X86_MCE
#include <linux/percpu.h> void mcheck_init(struct cpuinfo_x86 *c);
#else
static inline void mcheck_init(struct cpuinfo_x86 *c) {}
#endif
#ifdef CONFIG_X86_OLD_MCE
extern int nr_mce_banks;
void amd_mcheck_init(struct cpuinfo_x86 *c);
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
#else
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void enable_p5_mce(void) {}
#endif
void mce_setup(struct mce *m); void mce_setup(struct mce *m);
void mce_log(struct mce *m); void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, mce_dev); DECLARE_PER_CPU(struct sys_device, mce_dev);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/* /*
* To support more than 128 would need to escape the predefined * To support more than 128 would need to escape the predefined
...@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c); ...@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count); DECLARE_PER_CPU(unsigned, mce_poll_count);
void mce_log_therm_throt_event(__u64 status);
extern atomic_t mce_entry; extern atomic_t mce_entry;
void do_machine_check(struct pt_regs *, long);
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
...@@ -167,13 +187,32 @@ void mce_notify_process(void); ...@@ -167,13 +187,32 @@ void mce_notify_process(void);
DECLARE_PER_CPU(struct mce, injectm); DECLARE_PER_CPU(struct mce, injectm);
extern struct file_operations mce_chrdev_ops; extern struct file_operations mce_chrdev_ops;
#ifdef CONFIG_X86_MCE /*
void mcheck_init(struct cpuinfo_x86 *c); * Exception handler
#else */
#define mcheck_init(c) do { } while (0)
#endif /* Call the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
void do_machine_check(struct pt_regs *, long);
/*
* Threshold handler
*/
extern void (*mce_threshold_vector)(void); extern void (*mce_threshold_vector)(void);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/*
* Thermal handler
*/
void intel_init_thermal(struct cpuinfo_x86 *c);
#ifdef CONFIG_X86_NEW_MCE
void mce_log_therm_throt_event(__u64 status);
#else
static inline void mce_log_therm_throt_event(__u64 status) {}
#endif
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */ #endif /* _ASM_X86_MCE_H */
#ifndef _ASM_X86_THERM_THROT_H
#define _ASM_X86_THERM_THROT_H
#include <asm/atomic.h>
extern atomic_t therm_throt_en;
int therm_throt_process(int curr);
#endif /* _ASM_X86_THERM_THROT_H */
obj-y = mce.o therm_throt.o obj-y = mce.o
obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
...@@ -10,10 +10,9 @@ ...@@ -10,10 +10,9 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For AMD Athlon/Duron: */ /* Machine Check Handler For AMD Athlon/Duron: */
static void k7_machine_check(struct pt_regs *regs, long error_code) static void k7_machine_check(struct pt_regs *regs, long error_code)
{ {
......
This diff is collapsed.
#include <linux/init.h>
#include <asm/mce.h>
#ifdef CONFIG_X86_OLD_MCE
void amd_mcheck_init(struct cpuinfo_x86 *c);
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
extern int mce_p5_enable;
static inline int mce_p5_enabled(void) { return mce_p5_enable; }
static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
#else
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline int mce_p5_enabled(void) { return 0; }
static inline void enable_p5_mce(void) { }
#endif
/* Call the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
#ifdef CONFIG_X86_OLD_MCE
extern int nr_mce_banks;
void intel_set_thermal_handler(void);
#else
static inline void intel_set_thermal_handler(void) { }
#endif
void intel_init_thermal(struct cpuinfo_x86 *c);
/* /*
* Common code for Intel machine checks * Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/ */
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/therm_throt.h> #include <linux/init.h>
#include <asm/processor.h> #include <linux/interrupt.h>
#include <asm/system.h> #include <linux/percpu.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/mce.h>
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
#include "mce.h" static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
void intel_init_thermal(struct cpuinfo_x86 *c) /*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static DEFINE_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
static int cmci_supported(int *banks)
{ {
unsigned int cpu = smp_processor_id(); u64 cap;
int tm2 = 0;
u32 l, h; if (mce_cmci_disabled || mce_ignore_ce)
return 0;
/* /*
* Thermal monitoring depends on ACPI, clock modulation * Vendor check is not strictly needed, but the initial
* and APIC as well * initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/ */
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC) || if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
!cpu_has(c, X86_FEATURE_APIC)) { return 0;
pr_debug("Thermal monitoring disabled\n"); if (!cpu_has_apic || lapic_get_maxlvt() < 6)
return; return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
return !!(cap & MCG_CMCI_P);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static void intel_threshold_interrupt(void)
{
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
mce_notify_irq();
}
static void print_update(char *type, int *hdr, int num)
{
if (*hdr == 0)
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
*hdr = 1;
printk(KERN_CONT " %s:%d", type, num);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static void cmci_discover(int banks, int boot)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
unsigned long flags;
int hdr = 0;
int i;
spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
if (test_bit(i, owned))
continue;
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Already owned by someone else? */
if (val & CMCI_EN) {
if (test_and_clear_bit(i, owned) || boot)
print_update("SHD", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
}
val |= CMCI_EN | CMCI_THRESHOLD;
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & CMCI_EN) {
if (!test_and_set_bit(i, owned) || boot)
print_update("CMCI", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
} else {
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
} }
spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
}
/* /*
* First check if its enabled already, in which case there might * Just in case we missed an event during initialization check
* be some SMM goo which handles it, so we can't even put a handler * all the CMCI owned banks.
* since it might be delivered via SMI already: */
*/ void cmci_recheck(void)
rdmsr(MSR_IA32_MISC_ENABLE, l, h); {
h = apic_read(APIC_LVTTHMR); unsigned long flags;
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { int banks;
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu); if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
return; return;
} local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
local_irq_restore(flags);
}
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) /*
tm2 = 1; * Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void cmci_clear(void)
{
unsigned long flags;
int i;
int banks;
u64 val;
/* Check whether a vector already exists */ if (!cmci_supported(&banks))
if (h & APIC_VECTOR_MASK) {
printk(KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed\n",
cpu, (h & APIC_VECTOR_MASK));
return; return;
spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
} }
spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
/* We'll mask the thermal vector in the lapic till we're ready: */ /*
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; * After a CPU went down cycle through all the others and rediscover
apic_write(APIC_LVTTHMR, h); * Must run in process context.
*/
void cmci_rediscover(int dying)
{
int banks;
int cpu;
cpumask_var_t old;
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); if (!cmci_supported(&banks))
wrmsr(MSR_IA32_THERM_INTERRUPT, return;
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); if (!alloc_cpumask_var(&old, GFP_KERNEL))
return;
cpumask_copy(old, &current->cpus_allowed);
for_each_online_cpu(cpu) {
if (cpu == dying)
continue;
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
intel_set_thermal_handler(); set_cpus_allowed_ptr(current, old);
free_cpumask_var(old);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void cmci_reenable(void)
{
int banks;
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
rdmsr(MSR_IA32_MISC_ENABLE, l, h); static void intel_init_cmci(void)
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); {
int banks;
/* Unmask the thermal vector: */ if (!cmci_supported(&banks))
l = apic_read(APIC_LVTTHMR); return;
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", mce_threshold_vector = intel_threshold_interrupt;
cpu, tm2 ? "TM2" : "TM1"); cmci_discover(banks, 1);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
cmci_recheck();
}
/* enable thermal throttle processing */ void mce_intel_feature_init(struct cpuinfo_x86 *c)
atomic_set(&therm_throt_en, 1); {
intel_init_thermal(c);
intel_init_cmci();
} }
/*
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
#include "mce.h"
asmlinkage void smp_thermal_interrupt(void)
{
__u64 msr_val;
ack_APIC_irq();
exit_idle();
irq_enter();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
mce_log_therm_throt_event(msr_val);
inc_irq_stat(irq_thermal_count);
irq_exit();
}
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static DEFINE_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
static int cmci_supported(int *banks)
{
u64 cap;
if (mce_cmci_disabled || mce_ignore_ce)
return 0;
/*
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
return !!(cap & MCG_CMCI_P);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static void intel_threshold_interrupt(void)
{
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
mce_notify_irq();
}
static void print_update(char *type, int *hdr, int num)
{
if (*hdr == 0)
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
*hdr = 1;
printk(KERN_CONT " %s:%d", type, num);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static void cmci_discover(int banks, int boot)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
unsigned long flags;
int hdr = 0;
int i;
spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
if (test_bit(i, owned))
continue;
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Already owned by someone else? */
if (val & CMCI_EN) {
if (test_and_clear_bit(i, owned) || boot)
print_update("SHD", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
}
val |= CMCI_EN | CMCI_THRESHOLD;
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & CMCI_EN) {
if (!test_and_set_bit(i, owned) || boot)
print_update("CMCI", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
} else {
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void cmci_recheck(void)
{
unsigned long flags;
int banks;
if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
local_irq_restore(flags);
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void cmci_clear(void)
{
unsigned long flags;
int i;
int banks;
u64 val;
if (!cmci_supported(&banks))
return;
spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void cmci_rediscover(int dying)
{
int banks;
int cpu;
cpumask_var_t old;
if (!cmci_supported(&banks))
return;
if (!alloc_cpumask_var(&old, GFP_KERNEL))
return;
cpumask_copy(old, &current->cpus_allowed);
for_each_online_cpu(cpu) {
if (cpu == dying)
continue;
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
set_cpus_allowed_ptr(current, old);
free_cpumask_var(old);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void cmci_reenable(void)
{
int banks;
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
static void intel_init_cmci(void)
{
int banks;
if (!cmci_supported(&banks))
return;
mce_threshold_vector = intel_threshold_interrupt;
cmci_discover(banks, 1);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
cmci_recheck();
}
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_thermal(c);
intel_init_cmci();
}
...@@ -17,10 +17,9 @@ ...@@ -17,10 +17,9 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
static int firstbank; static int firstbank;
#define MCE_RATE (15*HZ) /* timer rate is 15s */ #define MCE_RATE (15*HZ) /* timer rate is 15s */
......
/* /*
* P4 specific Machine Check Exception Reporting * P4 specific Machine Check Exception Reporting
*/ */
#include <linux/interrupt.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <asm/therm_throt.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/mce.h>
#include <asm/apic.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* as supported by the P4/Xeon family */ /* as supported by the P4/Xeon family */
struct intel_mce_extended_msrs { struct intel_mce_extended_msrs {
u32 eax; u32 eax;
...@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs { ...@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
static int mce_num_extended_msrs; static int mce_num_extended_msrs;
#ifdef CONFIG_X86_MCE_P4THERMAL
static void unexpected_thermal_interrupt(struct pt_regs *regs)
{
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
smp_processor_id());
add_taint(TAINT_MACHINE_CHECK);
}
/* P4/Xeon Thermal transition interrupt handler: */
static void intel_thermal_interrupt(struct pt_regs *regs)
{
__u64 msr_val;
ack_APIC_irq();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
}
/* Thermal interrupt handler for this CPU setup: */
static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
unexpected_thermal_interrupt;
void smp_thermal_interrupt(struct pt_regs *regs)
{
irq_enter();
vendor_thermal_interrupt(regs);
__get_cpu_var(irq_stat).irq_thermal_count++;
irq_exit();
}
void intel_set_thermal_handler(void)
{
vendor_thermal_interrupt = intel_thermal_interrupt;
}
#endif /* CONFIG_X86_MCE_P4THERMAL */
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
{ {
......
...@@ -10,12 +10,11 @@ ...@@ -10,12 +10,11 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* By default disabled */ /* By default disabled */
int mce_p5_enable; int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */ /* Machine check handler for Pentium class Intel CPUs: */
static void pentium_machine_check(struct pt_regs *regs, long error_code) static void pentium_machine_check(struct pt_regs *regs, long error_code)
...@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) ...@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{ {
u32 l, h; u32 l, h;
/* Check for MCE support: */ /* Default P5 to off as its often misconnected: */
if (!cpu_has(c, X86_FEATURE_MCE)) if (!mce_p5_enabled)
return; return;
#ifdef CONFIG_X86_OLD_MCE /* Check for MCE support: */
/* Default P5 to off as its often misconnected: */ if (!cpu_has(c, X86_FEATURE_MCE))
if (mce_disabled != -1)
return; return;
#endif
machine_check_vector = pentium_machine_check; machine_check_vector = pentium_machine_check;
/* Make sure the vector pointer is visible before we enable MCEs: */ /* Make sure the vector pointer is visible before we enable MCEs: */
......
...@@ -10,10 +10,9 @@ ...@@ -10,10 +10,9 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For PII/PIII */ /* Machine Check Handler For PII/PIII */
static void intel_machine_check(struct pt_regs *regs, long error_code) static void intel_machine_check(struct pt_regs *regs, long error_code)
{ {
......
...@@ -13,13 +13,23 @@ ...@@ -13,13 +13,23 @@
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
* Inspired by Ross Biro's and Al Borchers' counter code. * Inspired by Ross Biro's and Al Borchers' counter code.
*/ */
#include <linux/interrupt.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <asm/therm_throt.h> #include <asm/processor.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* How long to wait between reporting thermal events */ /* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ) #define CHECK_INTERVAL (300 * HZ)
...@@ -27,7 +37,7 @@ ...@@ -27,7 +37,7 @@
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
atomic_t therm_throt_en = ATOMIC_INIT(0); static atomic_t therm_throt_en = ATOMIC_INIT(0);
#ifdef CONFIG_SYSFS #ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \ #define define_therm_throt_sysdev_one_ro(_name) \
...@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { ...@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been * 1 : Event should be logged further, and a message has been
* printed to the syslog. * printed to the syslog.
*/ */
int therm_throt_process(int curr) static int therm_throt_process(int curr)
{ {
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
__u64 tmp_jiffs = get_jiffies_64(); __u64 tmp_jiffs = get_jiffies_64();
...@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void) ...@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
return 0; return 0;
} }
device_initcall(thermal_throttle_init_device); device_initcall(thermal_throttle_init_device);
#endif /* CONFIG_SYSFS */ #endif /* CONFIG_SYSFS */
/* Thermal transition interrupt handler */
static void intel_thermal_interrupt(void)
{
__u64 msr_val;
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
mce_log_therm_throt_event(msr_val);
}
static void unexpected_thermal_interrupt(void)
{
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
smp_processor_id());
add_taint(TAINT_MACHINE_CHECK);
}
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
{
exit_idle();
irq_enter();
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
irq_exit();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq();
}
void intel_init_thermal(struct cpuinfo_x86 *c)
{
unsigned int cpu = smp_processor_id();
int tm2 = 0;
u32 l, h;
/* Thermal monitoring depends on ACPI and clock modulation*/
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
return;
/*
* First check if its enabled already, in which case there might
* be some SMM goo which handles it, so we can't even put a handler
* since it might be delivered via SMI already:
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
return;
}
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
tm2 = 1;
/* Check whether a vector already exists */
if (h & APIC_VECTOR_MASK) {
printk(KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed\n",
cpu, (h & APIC_VECTOR_MASK));
return;
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
apic_write(APIC_LVTTHMR, h);
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
wrmsr(MSR_IA32_THERM_INTERRUPT,
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
smp_thermal_vector = intel_thermal_interrupt;
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
/* Unmask the thermal vector: */
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
cpu, tm2 ? "TM2" : "TM1");
/* enable thermal throttle processing */
atomic_set(&therm_throt_en, 1);
}
...@@ -9,10 +9,9 @@ ...@@ -9,10 +9,9 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* Machine check handler for WinChip C6: */ /* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code) static void winchip_machine_check(struct pt_regs *regs, long error_code)
{ {
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include <asm/traps.h> #include <asm/traps.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/mce.h>
#include <asm/mach_traps.h> #include <asm/mach_traps.h>
...@@ -64,8 +65,6 @@ ...@@ -64,8 +65,6 @@
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/traps.h> #include <asm/traps.h>
#include "cpu/mcheck/mce.h"
asmlinkage int system_call(void); asmlinkage int system_call(void);
/* Do we ignore FPU interrupts ? */ /* Do we ignore FPU interrupts ? */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment