Commit b9917028 authored by Andi Kleen's avatar Andi Kleen Committed by Robert Richter

oprofile: Implement Intel architectural perfmon support

Newer Intel CPUs (Core1+) have support for architectural
events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.

The advantage of this is that it can be done without knowing about
the specific CPU, because the CPU describes by itself what
performance events are supported. This is only a fallback
because only a limited set of 6 events are supported.
This allows to do profiling on Nehalem and on Atom systems
(later not tested)

This patch implements support for that in oprofile's Intel
Family 6 profiling module. It also has the advantage of supporting
an arbitary number of events now as reported by the CPU.
Also allow arbitary counter widths >32bit while we're at it.

Requires a patched oprofile userland to support the new
architecture.

v2: update for latest oprofile tree
    remove force_arch_perfmon
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarRobert Richter <robert.richter@amd.com>
parent f645f640
...@@ -429,6 +429,16 @@ static int __init ppro_init(char **cpu_type) ...@@ -429,6 +429,16 @@ static int __init ppro_init(char **cpu_type)
return 1; return 1;
} }
static int __init arch_perfmon_init(char **cpu_type)
{
if (!cpu_has_arch_perfmon)
return 0;
*cpu_type = "i386/arch_perfmon";
model = &op_arch_perfmon_spec;
arch_perfmon_setup_counters();
return 1;
}
/* in order to get sysfs right */ /* in order to get sysfs right */
static int using_nmi; static int using_nmi;
...@@ -436,7 +446,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) ...@@ -436,7 +446,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
{ {
__u8 vendor = boot_cpu_data.x86_vendor; __u8 vendor = boot_cpu_data.x86_vendor;
__u8 family = boot_cpu_data.x86; __u8 family = boot_cpu_data.x86;
char *cpu_type; char *cpu_type = NULL;
int ret = 0; int ret = 0;
if (!cpu_has_apic) if (!cpu_has_apic)
...@@ -474,19 +484,20 @@ int __init op_nmi_init(struct oprofile_operations *ops) ...@@ -474,19 +484,20 @@ int __init op_nmi_init(struct oprofile_operations *ops)
switch (family) { switch (family) {
/* Pentium IV */ /* Pentium IV */
case 0xf: case 0xf:
if (!p4_init(&cpu_type)) p4_init(&cpu_type);
return -ENODEV;
break; break;
/* A P6-class processor */ /* A P6-class processor */
case 6: case 6:
if (!ppro_init(&cpu_type)) ppro_init(&cpu_type);
return -ENODEV;
break; break;
default: default:
return -ENODEV; break;
} }
if (!cpu_type && !arch_perfmon_init(&cpu_type))
return -ENODEV;
break; break;
default: default:
......
/* /*
* @file op_model_ppro.h * @file op_model_ppro.h
* pentium pro / P6 model-specific MSR operations * Family 6 perfmon and architectural perfmon MSR operations
* *
* @remark Copyright 2002 OProfile authors * @remark Copyright 2002 OProfile authors
* @remark Copyright 2008 Intel Corporation
* @remark Read the file COPYING * @remark Read the file COPYING
* *
* @author John Levon * @author John Levon
* @author Philippe Elie * @author Philippe Elie
* @author Graydon Hoare * @author Graydon Hoare
* @author Andi Kleen
*/ */
#include <linux/oprofile.h> #include <linux/oprofile.h>
#include <linux/slab.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/intel_arch_perfmon.h>
#include "op_x86_model.h" #include "op_x86_model.h"
#include "op_counter.h" #include "op_counter.h"
#define NUM_COUNTERS 2 static int num_counters = 2;
#define NUM_CONTROLS 2 static int counter_width = 32;
#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
#define CTR_32BIT_WRITE(l, msrs, c) \ #define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
...@@ -40,20 +42,20 @@ ...@@ -40,20 +42,20 @@
#define CTRL_SET_UM(val, m) (val |= (m << 8)) #define CTRL_SET_UM(val, m) (val |= (m << 8))
#define CTRL_SET_EVENT(val, e) (val |= e) #define CTRL_SET_EVENT(val, e) (val |= e)
static unsigned long reset_value[NUM_COUNTERS]; static u64 *reset_value;
static void ppro_fill_in_addresses(struct op_msrs * const msrs) static void ppro_fill_in_addresses(struct op_msrs * const msrs)
{ {
int i; int i;
for (i = 0; i < NUM_COUNTERS; i++) { for (i = 0; i < num_counters; i++) {
if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
else else
msrs->counters[i].addr = 0; msrs->counters[i].addr = 0;
} }
for (i = 0; i < NUM_CONTROLS; i++) { for (i = 0; i < num_counters; i++) {
if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
else else
...@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) ...@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
unsigned int low, high; unsigned int low, high;
int i; int i;
if (!reset_value) {
reset_value = kmalloc(sizeof(unsigned) * num_counters,
GFP_ATOMIC);
if (!reset_value)
return;
}
if (cpu_has_arch_perfmon) {
union cpuid10_eax eax;
eax.full = cpuid_eax(0xa);
if (counter_width < eax.split.bit_width)
counter_width = eax.split.bit_width;
}
/* clear all counters */ /* clear all counters */
for (i = 0 ; i < NUM_CONTROLS; ++i) { for (i = 0 ; i < num_counters; ++i) {
if (unlikely(!CTRL_IS_RESERVED(msrs, i))) if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
continue; continue;
CTRL_READ(low, high, msrs, i); CTRL_READ(low, high, msrs, i);
...@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) ...@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
} }
/* avoid a false detection of ctr overflows in NMI handler */ /* avoid a false detection of ctr overflows in NMI handler */
for (i = 0; i < NUM_COUNTERS; ++i) { for (i = 0; i < num_counters; ++i) {
if (unlikely(!CTR_IS_RESERVED(msrs, i))) if (unlikely(!CTR_IS_RESERVED(msrs, i)))
continue; continue;
CTR_32BIT_WRITE(1, msrs, i); wrmsrl(msrs->counters[i].addr, -1LL);
} }
/* enable active counters */ /* enable active counters */
for (i = 0; i < NUM_COUNTERS; ++i) { for (i = 0; i < num_counters; ++i) {
if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
reset_value[i] = counter_config[i].count; reset_value[i] = counter_config[i].count;
CTR_32BIT_WRITE(counter_config[i].count, msrs, i); wrmsrl(msrs->counters[i].addr, -reset_value[i]);
CTRL_READ(low, high, msrs, i); CTRL_READ(low, high, msrs, i);
CTRL_CLEAR(low); CTRL_CLEAR(low);
...@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, ...@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
unsigned int low, high; unsigned int low, high;
int i; int i;
for (i = 0 ; i < NUM_COUNTERS; ++i) { for (i = 0 ; i < num_counters; ++i) {
if (!reset_value[i]) if (!reset_value[i])
continue; continue;
CTR_READ(low, high, msrs, i); CTR_READ(low, high, msrs, i);
if (CTR_OVERFLOWED(low)) { if (CTR_OVERFLOWED(low)) {
oprofile_add_sample(regs, i); oprofile_add_sample(regs, i);
CTR_32BIT_WRITE(reset_value[i], msrs, i); wrmsrl(msrs->counters[i].addr, -reset_value[i]);
} }
} }
...@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs) ...@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs)
unsigned int low, high; unsigned int low, high;
int i; int i;
for (i = 0; i < NUM_COUNTERS; ++i) { for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) { if (reset_value[i]) {
CTRL_READ(low, high, msrs, i); CTRL_READ(low, high, msrs, i);
CTRL_SET_ACTIVE(low); CTRL_SET_ACTIVE(low);
...@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs) ...@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
unsigned int low, high; unsigned int low, high;
int i; int i;
for (i = 0; i < NUM_COUNTERS; ++i) { for (i = 0; i < num_counters; ++i) {
if (!reset_value[i]) if (!reset_value[i])
continue; continue;
CTRL_READ(low, high, msrs, i); CTRL_READ(low, high, msrs, i);
...@@ -169,21 +185,65 @@ static void ppro_shutdown(struct op_msrs const * const msrs) ...@@ -169,21 +185,65 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
{ {
int i; int i;
for (i = 0 ; i < NUM_COUNTERS ; ++i) { for (i = 0 ; i < num_counters ; ++i) {
if (CTR_IS_RESERVED(msrs, i)) if (CTR_IS_RESERVED(msrs, i))
release_perfctr_nmi(MSR_P6_PERFCTR0 + i); release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
} }
for (i = 0 ; i < NUM_CONTROLS ; ++i) { for (i = 0 ; i < num_counters ; ++i) {
if (CTRL_IS_RESERVED(msrs, i)) if (CTRL_IS_RESERVED(msrs, i))
release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
} }
if (reset_value) {
kfree(reset_value);
reset_value = NULL;
}
} }
struct op_x86_model_spec const op_ppro_spec = { struct op_x86_model_spec const op_ppro_spec = {
.num_counters = NUM_COUNTERS, .num_counters = 2,
.num_controls = NUM_CONTROLS, .num_controls = 2,
.fill_in_addresses = &ppro_fill_in_addresses,
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
.start = &ppro_start,
.stop = &ppro_stop,
.shutdown = &ppro_shutdown
};
/*
* Architectural performance monitoring.
*
* Newer Intel CPUs (Core1+) have support for architectural
* events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
* The advantage of this is that it can be done without knowing about
* the specific CPU.
*/
void arch_perfmon_setup_counters(void)
{
union cpuid10_eax eax;
eax.full = cpuid_eax(0xa);
/* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
current_cpu_data.x86_model == 15) {
eax.split.version_id = 2;
eax.split.num_counters = 2;
eax.split.bit_width = 40;
}
num_counters = eax.split.num_counters;
op_arch_perfmon_spec.num_counters = num_counters;
op_arch_perfmon_spec.num_controls = num_counters;
}
struct op_x86_model_spec op_arch_perfmon_spec = {
/* num_counters/num_controls filled in at runtime */
.fill_in_addresses = &ppro_fill_in_addresses, .fill_in_addresses = &ppro_fill_in_addresses,
/* user space does the cpuid check for available events */
.setup_ctrs = &ppro_setup_ctrs, .setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs, .check_ctrs = &ppro_check_ctrs,
.start = &ppro_start, .start = &ppro_start,
......
...@@ -49,5 +49,8 @@ extern struct op_x86_model_spec const op_ppro_spec; ...@@ -49,5 +49,8 @@ extern struct op_x86_model_spec const op_ppro_spec;
extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_spec;
extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_p4_ht2_spec;
extern struct op_x86_model_spec const op_amd_spec; extern struct op_x86_model_spec const op_amd_spec;
extern struct op_x86_model_spec op_arch_perfmon_spec;
extern void arch_perfmon_setup_counters(void);
#endif /* OP_X86_MODEL_H */ #endif /* OP_X86_MODEL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment