Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
L
linux-davinci
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
linux
linux-davinci
Commits
caab36b5
Commit
caab36b5
authored
Mar 05, 2009
by
Ingo Molnar
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'x86/mce2' into x86/core
parents
a1413c89
73af76df
Changes
12
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
709 additions
and
172 deletions
+709
-172
arch/x86/Kconfig
arch/x86/Kconfig
+5
-0
arch/x86/include/asm/apicdef.h
arch/x86/include/asm/apicdef.h
+1
-0
arch/x86/include/asm/mce.h
arch/x86/include/asm/mce.h
+32
-3
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/msr-index.h
+5
-0
arch/x86/kernel/alternative.c
arch/x86/kernel/alternative.c
+11
-6
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/apic.c
+15
-0
arch/x86/kernel/cpu/mcheck/Makefile
arch/x86/kernel/cpu/mcheck/Makefile
+1
-0
arch/x86/kernel/cpu/mcheck/mce_32.c
arch/x86/kernel/cpu/mcheck/mce_32.c
+0
-14
arch/x86/kernel/cpu/mcheck/mce_64.c
arch/x86/kernel/cpu/mcheck/mce_64.c
+395
-135
arch/x86/kernel/cpu/mcheck/mce_amd_64.c
arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+9
-13
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+206
-1
arch/x86/kernel/cpu/mcheck/threshold.c
arch/x86/kernel/cpu/mcheck/threshold.c
+29
-0
No files found.
arch/x86/Kconfig
View file @
caab36b5
...
...
@@ -783,6 +783,11 @@ config X86_MCE_AMD
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
config X86_MCE_THRESHOLD
depends on X86_MCE_AMD || X86_MCE_INTEL
bool
default y
config X86_MCE_NONFATAL
tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
depends on X86_32 && X86_MCE
...
...
arch/x86/include/asm/apicdef.h
View file @
caab36b5
...
...
@@ -53,6 +53,7 @@
#define APIC_ESR_SENDILL 0x00020
#define APIC_ESR_RECVILL 0x00040
#define APIC_ESR_ILLREGA 0x00080
#define APIC_LVTCMCI 0x2f0
#define APIC_ICR 0x300
#define APIC_DEST_SELF 0x40000
#define APIC_DEST_ALLINC 0x80000
...
...
arch/x86/include/asm/mce.h
View file @
caab36b5
...
...
@@ -11,6 +11,8 @@
*/
#define MCG_CTL_P (1UL<<8)
/* MCG_CAP register available */
#define MCG_EXT_P (1ULL<<9)
/* Extended registers available */
#define MCG_CMCI_P (1ULL<<10)
/* CMCI supported */
#define MCG_STATUS_RIPV (1UL<<0)
/* restart ip valid */
#define MCG_STATUS_EIPV (1UL<<1)
/* ip points to correct instruction */
...
...
@@ -90,14 +92,29 @@ extern int mce_disabled;
#include <asm/atomic.h>
void
mce_setup
(
struct
mce
*
m
);
void
mce_log
(
struct
mce
*
m
);
DECLARE_PER_CPU
(
struct
sys_device
,
device_mce
);
extern
void
(
*
threshold_cpu_callback
)(
unsigned
long
action
,
unsigned
int
cpu
);
/*
* To support more than 128 would need to escape the predefined
* Linux defined extended banks first.
*/
#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
#ifdef CONFIG_X86_MCE_INTEL
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
);
void
cmci_clear
(
void
);
void
cmci_reenable
(
void
);
void
cmci_rediscover
(
int
dying
);
void
cmci_recheck
(
void
);
#else
static
inline
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
)
{
}
static
inline
void
cmci_clear
(
void
)
{}
static
inline
void
cmci_reenable
(
void
)
{}
static
inline
void
cmci_rediscover
(
int
dying
)
{}
static
inline
void
cmci_recheck
(
void
)
{}
#endif
#ifdef CONFIG_X86_MCE_AMD
...
...
@@ -106,11 +123,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
static
inline
void
mce_amd_feature_init
(
struct
cpuinfo_x86
*
c
)
{
}
#endif
void
mce_log_therm_throt_event
(
unsigned
int
cpu
,
__u64
status
);
extern
int
mce_available
(
struct
cpuinfo_x86
*
c
);
void
mce_log_therm_throt_event
(
__u64
status
);
extern
atomic_t
mce_entry
;
extern
void
do_machine_check
(
struct
pt_regs
*
,
long
);
typedef
DECLARE_BITMAP
(
mce_banks_t
,
MAX_NR_BANKS
);
DECLARE_PER_CPU
(
mce_banks_t
,
mce_poll_banks
);
enum
mcp_flags
{
MCP_TIMESTAMP
=
(
1
<<
0
),
/* log time stamp */
MCP_UC
=
(
1
<<
1
),
/* log uncorrected errors */
};
extern
void
machine_check_poll
(
enum
mcp_flags
flags
,
mce_banks_t
*
b
);
extern
int
mce_notify_user
(
void
);
#endif
/* !CONFIG_X86_32 */
...
...
@@ -120,8 +149,8 @@ extern void mcheck_init(struct cpuinfo_x86 *c);
#else
#define mcheck_init(c) do { } while (0)
#endif
extern
void
stop_mce
(
void
);
extern
void
restart_mce
(
void
);
extern
void
(
*
mce_threshold_vector
)
(
void
);
#endif
/* __KERNEL__ */
#endif
/* _ASM_X86_MCE_H */
arch/x86/include/asm/msr-index.h
View file @
caab36b5
...
...
@@ -77,6 +77,11 @@
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
/* These are consecutive and not in the normal 4er MCE bank block */
#define MSR_IA32_MC0_CTL2 0x00000280
#define CMCI_EN (1ULL << 30)
#define CMCI_THRESHOLD_MASK 0xffffULL
#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186
...
...
arch/x86/kernel/alternative.c
View file @
caab36b5
...
...
@@ -414,9 +414,17 @@ void __init alternative_instructions(void)
that might execute the to be patched code.
Other CPUs are not running. */
stop_nmi
();
#ifdef CONFIG_X86_MCE
stop_mce
();
#endif
/*
* Don't stop machine check exceptions while patching.
* MCEs only happen when something got corrupted and in this
* case we must do something about the corruption.
* Ignoring it is worse than a unlikely patching race.
* Also machine checks tend to be broadcast and if one CPU
* goes into machine check the others follow quickly, so we don't
* expect a machine check to cause undue problems during to code
* patching.
*/
apply_alternatives
(
__alt_instructions
,
__alt_instructions_end
);
...
...
@@ -456,9 +464,6 @@ void __init alternative_instructions(void)
(
unsigned
long
)
__smp_locks_end
);
restart_nmi
();
#ifdef CONFIG_X86_MCE
restart_mce
();
#endif
}
/**
...
...
arch/x86/kernel/apic/apic.c
View file @
caab36b5
...
...
@@ -46,6 +46,7 @@
#include <asm/idle.h>
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <asm/mce.h>
unsigned
int
num_processors
;
...
...
@@ -842,6 +843,14 @@ void clear_local_APIC(void)
apic_write
(
APIC_LVTTHMR
,
v
|
APIC_LVT_MASKED
);
}
#endif
#ifdef CONFIG_X86_MCE_INTEL
if
(
maxlvt
>=
6
)
{
v
=
apic_read
(
APIC_LVTCMCI
);
if
(
!
(
v
&
APIC_LVT_MASKED
))
apic_write
(
APIC_LVTCMCI
,
v
|
APIC_LVT_MASKED
);
}
#endif
/*
* Clean APIC state for other OSs:
*/
...
...
@@ -1241,6 +1250,12 @@ void __cpuinit setup_local_APIC(void)
apic_write
(
APIC_LVT1
,
value
);
preempt_enable
();
#ifdef CONFIG_X86_MCE_INTEL
/* Recheck CMCI information after local APIC is up on CPU #0 */
if
(
smp_processor_id
()
==
0
)
cmci_recheck
();
#endif
}
void
__cpuinit
end_local_APIC_setup
(
void
)
...
...
arch/x86/kernel/cpu/mcheck/Makefile
View file @
caab36b5
...
...
@@ -4,3 +4,4 @@ obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
obj-$(CONFIG_X86_MCE_INTEL)
+=
mce_intel_64.o
obj-$(CONFIG_X86_MCE_AMD)
+=
mce_amd_64.o
obj-$(CONFIG_X86_MCE_NONFATAL)
+=
non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD)
+=
threshold.o
arch/x86/kernel/cpu/mcheck/mce_32.c
View file @
caab36b5
...
...
@@ -60,20 +60,6 @@ void mcheck_init(struct cpuinfo_x86 *c)
}
}
static
unsigned
long
old_cr4
__initdata
;
void
__init
stop_mce
(
void
)
{
old_cr4
=
read_cr4
();
clear_in_cr4
(
X86_CR4_MCE
);
}
void
__init
restart_mce
(
void
)
{
if
(
old_cr4
&
X86_CR4_MCE
)
set_in_cr4
(
X86_CR4_MCE
);
}
static
int
__init
mcheck_disable
(
char
*
str
)
{
mce_disabled
=
1
;
...
...
arch/x86/kernel/cpu/mcheck/mce_64.c
View file @
caab36b5
This diff is collapsed.
Click to expand it.
arch/x86/kernel/cpu/mcheck/mce_amd_64.c
View file @
caab36b5
...
...
@@ -79,6 +79,8 @@ static unsigned char shared_bank[NR_BANKS] = {
static
DEFINE_PER_CPU
(
unsigned
char
,
bank_map
);
/* see which banks are on */
static
void
amd_threshold_interrupt
(
void
);
/*
* CPU Initialization
*/
...
...
@@ -174,6 +176,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
tr
.
reset
=
0
;
tr
.
old_limit
=
0
;
threshold_restart_bank
(
&
tr
);
mce_threshold_vector
=
amd_threshold_interrupt
;
}
}
}
...
...
@@ -187,19 +191,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
* the interrupt goes off when error_count reaches threshold_limit.
* the handler will simply log mcelog w/ software defined bank number.
*/
asmlinkage
void
mce
_threshold_interrupt
(
void
)
static
void
amd
_threshold_interrupt
(
void
)
{
unsigned
int
bank
,
block
;
struct
mce
m
;
u32
low
=
0
,
high
=
0
,
address
=
0
;
ack_APIC_irq
();
exit_idle
();
irq_enter
();
memset
(
&
m
,
0
,
sizeof
(
m
));
rdtscll
(
m
.
tsc
);
m
.
cpu
=
smp_processor_id
();
mce_setup
(
&
m
);
/* assume first bank caused it */
for
(
bank
=
0
;
bank
<
NR_BANKS
;
++
bank
)
{
...
...
@@ -233,7 +231,8 @@ asmlinkage void mce_threshold_interrupt(void)
/* Log the machine check that caused the threshold
event. */
do_machine_check
(
NULL
,
0
);
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_poll_banks
));
if
(
high
&
MASK_OVERFLOW_HI
)
{
rdmsrl
(
address
,
m
.
misc
);
...
...
@@ -243,13 +242,10 @@ asmlinkage void mce_threshold_interrupt(void)
+
bank
*
NR_BLOCKS
+
block
;
mce_log
(
&
m
);
goto
out
;
return
;
}
}
}
out:
inc_irq_stat
(
irq_threshold_count
);
irq_exit
();
}
/*
...
...
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
View file @
caab36b5
/*
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/init.h>
...
...
@@ -13,6 +15,7 @@
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
#include <asm/apic.h>
asmlinkage
void
smp_thermal_interrupt
(
void
)
{
...
...
@@ -25,7 +28,7 @@ asmlinkage void smp_thermal_interrupt(void)
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
if
(
therm_throt_process
(
msr_val
&
1
))
mce_log_therm_throt_event
(
smp_processor_id
(),
msr_val
);
mce_log_therm_throt_event
(
msr_val
);
inc_irq_stat
(
irq_thermal_count
);
irq_exit
();
...
...
@@ -85,7 +88,209 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
return
;
}
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
static
DEFINE_PER_CPU
(
mce_banks_t
,
mce_banks_owned
);
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static
DEFINE_SPINLOCK
(
cmci_discover_lock
);
#define CMCI_THRESHOLD 1
static
int
cmci_supported
(
int
*
banks
)
{
u64
cap
;
/*
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if
(
boot_cpu_data
.
x86_vendor
!=
X86_VENDOR_INTEL
)
return
0
;
if
(
!
cpu_has_apic
||
lapic_get_maxlvt
()
<
6
)
return
0
;
rdmsrl
(
MSR_IA32_MCG_CAP
,
cap
);
*
banks
=
min_t
(
unsigned
,
MAX_NR_BANKS
,
cap
&
0xff
);
return
!!
(
cap
&
MCG_CMCI_P
);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static
void
intel_threshold_interrupt
(
void
)
{
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
mce_notify_user
();
}
static
void
print_update
(
char
*
type
,
int
*
hdr
,
int
num
)
{
if
(
*
hdr
==
0
)
printk
(
KERN_INFO
"CPU %d MCA banks"
,
smp_processor_id
());
*
hdr
=
1
;
printk
(
KERN_CONT
" %s:%d"
,
type
,
num
);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static
void
cmci_discover
(
int
banks
,
int
boot
)
{
unsigned
long
*
owned
=
(
void
*
)
&
__get_cpu_var
(
mce_banks_owned
);
int
hdr
=
0
;
int
i
;
spin_lock
(
&
cmci_discover_lock
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
u64
val
;
if
(
test_bit
(
i
,
owned
))
continue
;
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Already owned by someone else? */
if
(
val
&
CMCI_EN
)
{
if
(
test_and_clear_bit
(
i
,
owned
)
||
boot
)
print_update
(
"SHD"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
continue
;
}
val
|=
CMCI_EN
|
CMCI_THRESHOLD
;
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Did the enable bit stick? -- the bank supports CMCI */
if
(
val
&
CMCI_EN
)
{
if
(
!
test_and_set_bit
(
i
,
owned
)
||
boot
)
print_update
(
"CMCI"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
}
else
{
WARN_ON
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
)));
}
}
spin_unlock
(
&
cmci_discover_lock
);
if
(
hdr
)
printk
(
KERN_CONT
"
\n
"
);
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void
cmci_recheck
(
void
)
{
unsigned
long
flags
;
int
banks
;
if
(
!
mce_available
(
&
current_cpu_data
)
||
!
cmci_supported
(
&
banks
))
return
;
local_irq_save
(
flags
);
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
local_irq_restore
(
flags
);
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void
cmci_clear
(
void
)
{
int
i
;
int
banks
;
u64
val
;
if
(
!
cmci_supported
(
&
banks
))
return
;
spin_lock
(
&
cmci_discover_lock
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
if
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
)))
continue
;
/* Disable CMCI */
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
val
&=
~
(
CMCI_EN
|
CMCI_THRESHOLD_MASK
);
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
));
}
spin_unlock
(
&
cmci_discover_lock
);
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void
cmci_rediscover
(
int
dying
)
{
int
banks
;
int
cpu
;
cpumask_var_t
old
;
if
(
!
cmci_supported
(
&
banks
))
return
;
if
(
!
alloc_cpumask_var
(
&
old
,
GFP_KERNEL
))
return
;
cpumask_copy
(
old
,
&
current
->
cpus_allowed
);
for_each_online_cpu
(
cpu
)
{
if
(
cpu
==
dying
)
continue
;
if
(
set_cpus_allowed_ptr
(
current
,
&
cpumask_of_cpu
(
cpu
)))
continue
;
/* Recheck banks in case CPUs don't all have the same */
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
set_cpus_allowed_ptr
(
current
,
old
);
free_cpumask_var
(
old
);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void
cmci_reenable
(
void
)
{
int
banks
;
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
static
__cpuinit
void
intel_init_cmci
(
void
)
{
int
banks
;
if
(
!
cmci_supported
(
&
banks
))
return
;
mce_threshold_vector
=
intel_threshold_interrupt
;
cmci_discover
(
banks
,
1
);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write
(
APIC_LVTCMCI
,
THRESHOLD_APIC_VECTOR
|
APIC_DM_FIXED
);
cmci_recheck
();
}
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
)
{
intel_init_thermal
(
c
);
intel_init_cmci
();
}
arch/x86/kernel/cpu/mcheck/threshold.c
0 → 100644
View file @
caab36b5
/*
* Common corrected MCE threshold handler code:
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <asm/irq_vectors.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
static
void
default_threshold_interrupt
(
void
)
{
printk
(
KERN_ERR
"Unexpected threshold interrupt at vector %x
\n
"
,
THRESHOLD_APIC_VECTOR
);
}
void
(
*
mce_threshold_vector
)(
void
)
=
default_threshold_interrupt
;
asmlinkage
void
mce_threshold_interrupt
(
void
)
{
exit_idle
();
irq_enter
();
inc_irq_stat
(
irq_threshold_count
);
mce_threshold_vector
();
irq_exit
();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq
();
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment