Commit a1110654 authored by Scott Wood's avatar Scott Wood Committed by Kumar Gala

powerpc/perf: e500 support

This implements perf_event support for the Freescale embedded performance
monitor, based on the existing perf_event.c that supports server/classic
chips.

Some limitations:
- Performance monitor interrupts are regular EE interrupts, and thus you
  can't profile places with interrupts disabled.  We may want to implement
  soft IRQ-disabling, with perfmon interrupts exempted and treated as NMIs.
- When trying to schedule multiple event groups at once, and using
  restricted events, situations could arise where scheduling fails even
  though it would be possible.  Consider three groups, each with two events.
  One group has restricted events, the others don't.  The two non-restricted
  groups are scheduled, then one is removed, which happens to occupy the two
  counters that can't do restricted events.  The remaining non-restricted
  group will not be moved to the non-restricted-capable counters to make
  room if the restricted group tries to be scheduled.
Signed-off-by: default avatarScott Wood <scottwood@freescale.com>
Acked-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarKumar Gala <galak@kernel.crashing.org>
parent 9d6df3fd
/* /*
* Performance event support - PowerPC-specific definitions. * Performance event support - hardware-specific disambiguation
* *
* Copyright 2008-2009 Paul Mackerras, IBM Corporation. * For now this is a compile-time decision, but eventually it should be
* runtime. This would allow multiplatform perf event support for e300 (fsl
* embedded perf counters) plus server/classic, and would accommodate
* devices other than the core which provide their own performance counters.
*
* Copyright 2010 Freescale Semiconductor, Inc.
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*/ */
#include <linux/types.h>
#include <asm/hw_irq.h>
#define MAX_HWEVENTS 8
#define MAX_EVENT_ALTERNATIVES 8
#define MAX_LIMITED_HWCOUNTERS 2
/*
* This struct provides the constants and functions needed to
* describe the PMU on a particular POWER-family CPU.
*/
struct power_pmu {
const char *name;
int n_counter;
int max_alternatives;
unsigned long add_fields;
unsigned long test_adder;
int (*compute_mmcr)(u64 events[], int n_ev,
unsigned int hwc[], unsigned long mmcr[]);
int (*get_constraint)(u64 event_id, unsigned long *mskp,
unsigned long *valp);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
u32 flags;
int n_generic;
int *generic_events;
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
};
/*
* Values for power_pmu.flags
*/
#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */
#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */
/*
* Values for flags to get_alternatives()
*/
#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */
#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
extern int register_power_pmu(struct power_pmu *);
struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
#define PERF_EVENT_INDEX_OFFSET 1
/*
* Only override the default definitions in include/linux/perf_event.h
* if we have hardware PMU support.
*/
#ifdef CONFIG_PPC_PERF_CTRS #ifdef CONFIG_PPC_PERF_CTRS
#define perf_misc_flags(regs) perf_misc_flags(regs) #include <asm/perf_event_server.h>
#endif #endif
/* #ifdef CONFIG_FSL_EMB_PERF_EVENT
* The power_pmu.get_constraint function returns a 32/64-bit value and #include <asm/perf_event_fsl_emb.h>
* a 32/64-bit mask that express the constraints between this event_id and #endif
* other events.
*
* The value and mask are divided up into (non-overlapping) bitfields
* of three different types:
*
* Select field: this expresses the constraint that some set of bits
* in MMCR* needs to be set to a specific value for this event_id. For a
* select field, the mask contains 1s in every bit of the field, and
* the value contains a unique value for each possible setting of the
* MMCR* bits. The constraint checking code will ensure that two events
* that set the same field in their masks have the same value in their
* value dwords.
*
* Add field: this expresses the constraint that there can be at most
* N events in a particular class. A field of k bits can be used for
* N <= 2^(k-1) - 1. The mask has the most significant bit of the field
* set (and the other bits 0), and the value has only the least significant
* bit of the field set. In addition, the 'add_fields' and 'test_adder'
* in the struct power_pmu for this processor come into play. The
* add_fields value contains 1 in the LSB of the field, and the
* test_adder contains 2^(k-1) - 1 - N in the field.
*
* NAND field: this expresses the constraint that you may not have events
* in all of a set of classes. (For example, on PPC970, you can't select
* events from the FPU, ISU and IDU simultaneously, although any two are
* possible.) For N classes, the field is N+1 bits wide, and each class
* is assigned one bit from the least-significant N bits. The mask has
* only the most-significant bit set, and the value has only the bit
* for the event_id's class set. The test_adder has the least significant
* bit set in the field.
*
* If an event_id is not subject to the constraint expressed by a particular
* field, then it will have 0 in both the mask and value for that field.
*/
/*
* Performance event support - Freescale embedded specific definitions.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
* Copyright 2010 Freescale Semiconductor, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#include <asm/hw_irq.h>
#define MAX_HWEVENTS 4
/* event flags */
#define FSL_EMB_EVENT_VALID 1
#define FSL_EMB_EVENT_RESTRICTED 2
/* upper half of event flags is PMLCb */
#define FSL_EMB_EVENT_THRESHMUL 0x0000070000000000ULL
#define FSL_EMB_EVENT_THRESH 0x0000003f00000000ULL
struct fsl_emb_pmu {
const char *name;
int n_counter; /* total number of counters */
/*
* The number of contiguous counters starting at zero that
* can hold restricted events, or zero if there are no
* restricted events.
*
* This isn't a very flexible method of expressing constraints,
* but it's very simple and is adequate for existing chips.
*/
int n_restricted;
/* Returns event flags and PMLCb (FSL_EMB_EVENT_*) */
u64 (*xlate_event)(u64 event_id);
int n_generic;
int *generic_events;
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
};
int register_fsl_emb_pmu(struct fsl_emb_pmu *);
/*
* Performance event support - PowerPC classic/server specific definitions.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#include <asm/hw_irq.h>
#define MAX_HWEVENTS 8
#define MAX_EVENT_ALTERNATIVES 8
#define MAX_LIMITED_HWCOUNTERS 2
/*
* This struct provides the constants and functions needed to
* describe the PMU on a particular POWER-family CPU.
*/
struct power_pmu {
const char *name;
int n_counter;
int max_alternatives;
unsigned long add_fields;
unsigned long test_adder;
int (*compute_mmcr)(u64 events[], int n_ev,
unsigned int hwc[], unsigned long mmcr[]);
int (*get_constraint)(u64 event_id, unsigned long *mskp,
unsigned long *valp);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
u32 flags;
int n_generic;
int *generic_events;
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
};
/*
* Values for power_pmu.flags
*/
#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */
#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */
/*
* Values for flags to get_alternatives()
*/
#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */
#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
extern int register_power_pmu(struct power_pmu *);
struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
#define PERF_EVENT_INDEX_OFFSET 1
/*
* Only override the default definitions in include/linux/perf_event.h
* if we have hardware PMU support.
*/
#ifdef CONFIG_PPC_PERF_CTRS
#define perf_misc_flags(regs) perf_misc_flags(regs)
#endif
/*
* The power_pmu.get_constraint function returns a 32/64-bit value and
* a 32/64-bit mask that express the constraints between this event_id and
* other events.
*
* The value and mask are divided up into (non-overlapping) bitfields
* of three different types:
*
* Select field: this expresses the constraint that some set of bits
* in MMCR* needs to be set to a specific value for this event_id. For a
* select field, the mask contains 1s in every bit of the field, and
* the value contains a unique value for each possible setting of the
* MMCR* bits. The constraint checking code will ensure that two events
* that set the same field in their masks have the same value in their
* value dwords.
*
* Add field: this expresses the constraint that there can be at most
* N events in a particular class. A field of k bits can be used for
* N <= 2^(k-1) - 1. The mask has the most significant bit of the field
* set (and the other bits 0), and the value has only the least significant
* bit of the field set. In addition, the 'add_fields' and 'test_adder'
* in the struct power_pmu for this processor come into play. The
* add_fields value contains 1 in the LSB of the field, and the
* test_adder contains 2^(k-1) - 1 - N in the field.
*
* NAND field: this expresses the constraint that you may not have events
* in all of a set of classes. (For example, on PPC970, you can't select
* events from the FPU, ISU and IDU simultaneously, although any two are
* possible.) For N classes, the field is N+1 bits wide, and each class
* is assigned one bit from the least-significant N bits. The mask has
* only the most-significant bit set, and the value has only the bit
* for the event_id's class set. The test_adder has the least significant
* bit set in the field.
*
* If an event_id is not subject to the constraint expressed by a particular
* field, then it will have 0 in both the mask and value for that field.
*/
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#define PMLCA_FCM0 0x08000000 /* Freeze when PMM==0 */ #define PMLCA_FCM0 0x08000000 /* Freeze when PMM==0 */
#define PMLCA_CE 0x04000000 /* Condition Enable */ #define PMLCA_CE 0x04000000 /* Condition Enable */
#define PMLCA_EVENT_MASK 0x007f0000 /* Event field */ #define PMLCA_EVENT_MASK 0x00ff0000 /* Event field */
#define PMLCA_EVENT_SHIFT 16 #define PMLCA_EVENT_SHIFT 16
#define PMRN_PMLCB0 0x110 /* PM Local Control B0 */ #define PMRN_PMLCB0 0x110 /* PM Local Control B0 */
......
...@@ -99,11 +99,15 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o ...@@ -99,11 +99,15 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o power5+-pmu.o power6-pmu.o power7-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
ifneq ($(CONFIG_PPC_INDIRECT_IO),y) ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
......
...@@ -1808,7 +1808,7 @@ static struct cpu_spec __initdata cpu_specs[] = { ...@@ -1808,7 +1808,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.icache_bsize = 64, .icache_bsize = 64,
.dcache_bsize = 64, .dcache_bsize = 64,
.num_pmcs = 4, .num_pmcs = 4,
.oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */ .oprofile_cpu_type = "ppc/e500mc",
.oprofile_type = PPC_OPROFILE_FSL_EMB, .oprofile_type = PPC_OPROFILE_FSL_EMB,
.cpu_setup = __setup_cpu_e500mc, .cpu_setup = __setup_cpu_e500mc,
.machine_check = machine_check_e500, .machine_check = machine_check_e500,
......
/*
* Performance counter support for e500 family processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
* Copyright 2010 Freescale Semiconductor, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
#include <linux/perf_event.h>
#include <asm/reg.h>
#include <asm/cputable.h>
/*
* Map of generic hardware event types to hardware events
* Zero if unsupported
*/
static int e500_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 1,
[PERF_COUNT_HW_INSTRUCTIONS] = 2,
[PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12,
[PERF_COUNT_HW_BRANCH_MISSES] = 15,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
/*
* D-cache misses are not split into read/write/prefetch;
* use raw event 41.
*/
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 27, 0 },
[C(OP_WRITE)] = { 28, 0 },
[C(OP_PREFETCH)] = { 29, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 2, 60 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
/*
* Assuming LL means L2, it's not a good match for this model.
* It allocates only on L1 castout or explicit prefetch, and
* does not have separate read/write events (but it does have
* separate instruction/data events).
*/
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0, 0 },
},
/*
* There are data/instruction MMU misses, but that's a miss on
* the chip's internal level-one TLB which is probably not
* what the user wants. Instead, unified level-two TLB misses
* are reported here.
*/
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 26, 66 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 12, 15 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
static int num_events = 128;
/* Upper half of event id is PMLCb, for threshold events */
static u64 e500_xlate_event(u64 event_id)
{
u32 event_low = (u32)event_id;
u64 ret;
if (event_low >= num_events)
return 0;
ret = FSL_EMB_EVENT_VALID;
if (event_low >= 76 && event_low <= 81) {
ret |= FSL_EMB_EVENT_RESTRICTED;
ret |= event_id &
(FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH);
} else if (event_id &
(FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) {
/* Threshold requested on non-threshold event */
return 0;
}
return ret;
}
static struct fsl_emb_pmu e500_pmu = {
.name = "e500 family",
.n_counter = 4,
.n_restricted = 2,
.xlate_event = e500_xlate_event,
.n_generic = ARRAY_SIZE(e500_generic_events),
.generic_events = e500_generic_events,
.cache_events = &e500_cache_events,
};
static int init_e500_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type)
return -ENODEV;
if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc"))
num_events = 256;
else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500"))
return -ENODEV;
return register_fsl_emb_pmu(&e500_pmu);
}
arch_initcall(init_e500_pmu);
This diff is collapsed.
...@@ -144,6 +144,16 @@ config FSL_EMB_PERFMON ...@@ -144,6 +144,16 @@ config FSL_EMB_PERFMON
and some e300 cores (c3 and c4). Select this only if your and some e300 cores (c3 and c4). Select this only if your
core supports the Embedded Performance Monitor APU core supports the Embedded Performance Monitor APU
config FSL_EMB_PERF_EVENT
bool
depends on FSL_EMB_PERFMON && PERF_EVENTS && !PPC_PERF_CTRS
default y
config FSL_EMB_PERF_EVENT_E500
bool
depends on FSL_EMB_PERF_EVENT && E500
default y
config 4xx config 4xx
bool bool
depends on 40x || 44x depends on 40x || 44x
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment