Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
L
linux-davinci
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
linux
linux-davinci
Commits
1d2f3794
Commit
1d2f3794
authored
Jul 22, 2009
by
Peter Zijlstra
Browse files
Options
Browse Files
Download
Plain Diff
Merge commit 'tip/perfcounters/core' into perf-counters-for-linus
parents
1483b19f
f1c6a581
Changes
10
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
556 additions
and
142 deletions
+556
-142
arch/x86/kernel/cpu/perf_counter.c
arch/x86/kernel/cpu/perf_counter.c
+235
-20
kernel/perf_counter.c
kernel/perf_counter.c
+17
-19
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-report.txt
+15
-0
tools/perf/builtin-report.c
tools/perf/builtin-report.c
+180
-47
tools/perf/perf.h
tools/perf/perf.h
+7
-1
tools/perf/util/include/linux/kernel.h
tools/perf/util/include/linux/kernel.h
+8
-0
tools/perf/util/strlist.c
tools/perf/util/strlist.c
+18
-2
tools/perf/util/strlist.h
tools/perf/util/strlist.h
+9
-2
tools/perf/util/symbol.c
tools/perf/util/symbol.c
+66
-51
tools/perf/util/symbol.h
tools/perf/util/symbol.h
+1
-0
No files found.
arch/x86/kernel/cpu/perf_counter.c
View file @
1d2f3794
...
@@ -65,6 +65,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
...
@@ -65,6 +65,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
.
enabled
=
1
,
.
enabled
=
1
,
};
};
/*
* Not sure about some of these
*/
static
const
u64
p6_perfmon_event_map
[]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
0x0079
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
0x00c0
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
0x0000
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
0x0000
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
0x00c4
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
0x00c5
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
0x0062
,
};
static
u64
p6_pmu_event_map
(
int
event
)
{
return
p6_perfmon_event_map
[
event
];
}
/*
* Counter setting that is specified not to count anything.
* We use this to effectively disable a counter.
*
* L2_RQSTS with 0 MESI unit mask.
*/
#define P6_NOP_COUNTER 0x0000002EULL
static
u64
p6_pmu_raw_event
(
u64
event
)
{
#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
#define P6_EVNTSEL_INV_MASK 0x00800000ULL
#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
#define P6_EVNTSEL_MASK \
(P6_EVNTSEL_EVENT_MASK | \
P6_EVNTSEL_UNIT_MASK | \
P6_EVNTSEL_EDGE_MASK | \
P6_EVNTSEL_INV_MASK | \
P6_EVNTSEL_COUNTER_MASK)
return
event
&
P6_EVNTSEL_MASK
;
}
/*
/*
* Intel PerfMon v3. Used on Core2 and later.
* Intel PerfMon v3. Used on Core2 and later.
*/
*/
...
@@ -666,6 +712,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
...
@@ -666,6 +712,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
{
{
struct
perf_counter_attr
*
attr
=
&
counter
->
attr
;
struct
perf_counter_attr
*
attr
=
&
counter
->
attr
;
struct
hw_perf_counter
*
hwc
=
&
counter
->
hw
;
struct
hw_perf_counter
*
hwc
=
&
counter
->
hw
;
u64
config
;
int
err
;
int
err
;
if
(
!
x86_pmu_initialized
())
if
(
!
x86_pmu_initialized
())
...
@@ -718,14 +765,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
...
@@ -718,14 +765,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
if
(
attr
->
config
>=
x86_pmu
.
max_events
)
if
(
attr
->
config
>=
x86_pmu
.
max_events
)
return
-
EINVAL
;
return
-
EINVAL
;
/*
/*
* The generic map:
* The generic map:
*/
*/
hwc
->
config
|=
x86_pmu
.
event_map
(
attr
->
config
);
config
=
x86_pmu
.
event_map
(
attr
->
config
);
if
(
config
==
0
)
return
-
ENOENT
;
if
(
config
==
-
1LL
)
return
-
EINVAL
;
hwc
->
config
|=
config
;
return
0
;
return
0
;
}
}
static
void
p6_pmu_disable_all
(
void
)
{
struct
cpu_hw_counters
*
cpuc
=
&
__get_cpu_var
(
cpu_hw_counters
);
u64
val
;
if
(
!
cpuc
->
enabled
)
return
;
cpuc
->
enabled
=
0
;
barrier
();
/* p6 only has one enable register */
rdmsrl
(
MSR_P6_EVNTSEL0
,
val
);
val
&=
~
ARCH_PERFMON_EVENTSEL0_ENABLE
;
wrmsrl
(
MSR_P6_EVNTSEL0
,
val
);
}
static
void
intel_pmu_disable_all
(
void
)
static
void
intel_pmu_disable_all
(
void
)
{
{
wrmsrl
(
MSR_CORE_PERF_GLOBAL_CTRL
,
0
);
wrmsrl
(
MSR_CORE_PERF_GLOBAL_CTRL
,
0
);
...
@@ -767,6 +840,23 @@ void hw_perf_disable(void)
...
@@ -767,6 +840,23 @@ void hw_perf_disable(void)
return
x86_pmu
.
disable_all
();
return
x86_pmu
.
disable_all
();
}
}
static
void
p6_pmu_enable_all
(
void
)
{
struct
cpu_hw_counters
*
cpuc
=
&
__get_cpu_var
(
cpu_hw_counters
);
unsigned
long
val
;
if
(
cpuc
->
enabled
)
return
;
cpuc
->
enabled
=
1
;
barrier
();
/* p6 only has one enable register */
rdmsrl
(
MSR_P6_EVNTSEL0
,
val
);
val
|=
ARCH_PERFMON_EVENTSEL0_ENABLE
;
wrmsrl
(
MSR_P6_EVNTSEL0
,
val
);
}
static
void
intel_pmu_enable_all
(
void
)
static
void
intel_pmu_enable_all
(
void
)
{
{
wrmsrl
(
MSR_CORE_PERF_GLOBAL_CTRL
,
x86_pmu
.
intel_ctrl
);
wrmsrl
(
MSR_CORE_PERF_GLOBAL_CTRL
,
x86_pmu
.
intel_ctrl
);
...
@@ -784,13 +874,13 @@ static void amd_pmu_enable_all(void)
...
@@ -784,13 +874,13 @@ static void amd_pmu_enable_all(void)
barrier
();
barrier
();
for
(
idx
=
0
;
idx
<
x86_pmu
.
num_counters
;
idx
++
)
{
for
(
idx
=
0
;
idx
<
x86_pmu
.
num_counters
;
idx
++
)
{
struct
perf_counter
*
counter
=
cpuc
->
counters
[
idx
];
u64
val
;
u64
val
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
continue
;
rdmsrl
(
MSR_K7_EVNTSEL0
+
idx
,
val
);
if
(
val
&
ARCH_PERFMON_EVENTSEL0_ENABLE
)
val
=
counter
->
hw
.
config
;
continue
;
val
|=
ARCH_PERFMON_EVENTSEL0_ENABLE
;
val
|=
ARCH_PERFMON_EVENTSEL0_ENABLE
;
wrmsrl
(
MSR_K7_EVNTSEL0
+
idx
,
val
);
wrmsrl
(
MSR_K7_EVNTSEL0
+
idx
,
val
);
}
}
...
@@ -819,16 +909,13 @@ static inline void intel_pmu_ack_status(u64 ack)
...
@@ -819,16 +909,13 @@ static inline void intel_pmu_ack_status(u64 ack)
static
inline
void
x86_pmu_enable_counter
(
struct
hw_perf_counter
*
hwc
,
int
idx
)
static
inline
void
x86_pmu_enable_counter
(
struct
hw_perf_counter
*
hwc
,
int
idx
)
{
{
int
err
;
(
void
)
checking_wrmsrl
(
hwc
->
config_base
+
idx
,
err
=
checking_wrmsrl
(
hwc
->
config_base
+
idx
,
hwc
->
config
|
ARCH_PERFMON_EVENTSEL0_ENABLE
);
hwc
->
config
|
ARCH_PERFMON_EVENTSEL0_ENABLE
);
}
}
static
inline
void
x86_pmu_disable_counter
(
struct
hw_perf_counter
*
hwc
,
int
idx
)
static
inline
void
x86_pmu_disable_counter
(
struct
hw_perf_counter
*
hwc
,
int
idx
)
{
{
int
err
;
(
void
)
checking_wrmsrl
(
hwc
->
config_base
+
idx
,
hwc
->
config
);
err
=
checking_wrmsrl
(
hwc
->
config_base
+
idx
,
hwc
->
config
);
}
}
static
inline
void
static
inline
void
...
@@ -836,13 +923,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
...
@@ -836,13 +923,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
{
{
int
idx
=
__idx
-
X86_PMC_IDX_FIXED
;
int
idx
=
__idx
-
X86_PMC_IDX_FIXED
;
u64
ctrl_val
,
mask
;
u64
ctrl_val
,
mask
;
int
err
;
mask
=
0xfULL
<<
(
idx
*
4
);
mask
=
0xfULL
<<
(
idx
*
4
);
rdmsrl
(
hwc
->
config_base
,
ctrl_val
);
rdmsrl
(
hwc
->
config_base
,
ctrl_val
);
ctrl_val
&=
~
mask
;
ctrl_val
&=
~
mask
;
err
=
checking_wrmsrl
(
hwc
->
config_base
,
ctrl_val
);
(
void
)
checking_wrmsrl
(
hwc
->
config_base
,
ctrl_val
);
}
static
inline
void
p6_pmu_disable_counter
(
struct
hw_perf_counter
*
hwc
,
int
idx
)
{
struct
cpu_hw_counters
*
cpuc
=
&
__get_cpu_var
(
cpu_hw_counters
);
u64
val
=
P6_NOP_COUNTER
;
if
(
cpuc
->
enabled
)
val
|=
ARCH_PERFMON_EVENTSEL0_ENABLE
;
(
void
)
checking_wrmsrl
(
hwc
->
config_base
+
idx
,
val
);
}
}
static
inline
void
static
inline
void
...
@@ -943,6 +1041,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
...
@@ -943,6 +1041,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
err
=
checking_wrmsrl
(
hwc
->
config_base
,
ctrl_val
);
err
=
checking_wrmsrl
(
hwc
->
config_base
,
ctrl_val
);
}
}
static
void
p6_pmu_enable_counter
(
struct
hw_perf_counter
*
hwc
,
int
idx
)
{
struct
cpu_hw_counters
*
cpuc
=
&
__get_cpu_var
(
cpu_hw_counters
);
u64
val
;
val
=
hwc
->
config
;
if
(
cpuc
->
enabled
)
val
|=
ARCH_PERFMON_EVENTSEL0_ENABLE
;
(
void
)
checking_wrmsrl
(
hwc
->
config_base
+
idx
,
val
);
}
static
void
intel_pmu_enable_counter
(
struct
hw_perf_counter
*
hwc
,
int
idx
)
static
void
intel_pmu_enable_counter
(
struct
hw_perf_counter
*
hwc
,
int
idx
)
{
{
if
(
unlikely
(
hwc
->
config_base
==
MSR_ARCH_PERFMON_FIXED_CTR_CTRL
))
{
if
(
unlikely
(
hwc
->
config_base
==
MSR_ARCH_PERFMON_FIXED_CTR_CTRL
))
{
...
@@ -959,8 +1070,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
...
@@ -959,8 +1070,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
if
(
cpuc
->
enabled
)
if
(
cpuc
->
enabled
)
x86_pmu_enable_counter
(
hwc
,
idx
);
x86_pmu_enable_counter
(
hwc
,
idx
);
else
x86_pmu_disable_counter
(
hwc
,
idx
);
}
}
static
int
static
int
...
@@ -1176,6 +1285,49 @@ static void intel_pmu_reset(void)
...
@@ -1176,6 +1285,49 @@ static void intel_pmu_reset(void)
local_irq_restore
(
flags
);
local_irq_restore
(
flags
);
}
}
static
int
p6_pmu_handle_irq
(
struct
pt_regs
*
regs
)
{
struct
perf_sample_data
data
;
struct
cpu_hw_counters
*
cpuc
;
struct
perf_counter
*
counter
;
struct
hw_perf_counter
*
hwc
;
int
idx
,
handled
=
0
;
u64
val
;
data
.
regs
=
regs
;
data
.
addr
=
0
;
cpuc
=
&
__get_cpu_var
(
cpu_hw_counters
);
for
(
idx
=
0
;
idx
<
x86_pmu
.
num_counters
;
idx
++
)
{
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
counter
=
cpuc
->
counters
[
idx
];
hwc
=
&
counter
->
hw
;
val
=
x86_perf_counter_update
(
counter
,
hwc
,
idx
);
if
(
val
&
(
1ULL
<<
(
x86_pmu
.
counter_bits
-
1
)))
continue
;
/*
* counter overflow
*/
handled
=
1
;
data
.
period
=
counter
->
hw
.
last_period
;
if
(
!
x86_perf_counter_set_period
(
counter
,
hwc
,
idx
))
continue
;
if
(
perf_counter_overflow
(
counter
,
1
,
&
data
))
p6_pmu_disable_counter
(
hwc
,
idx
);
}
if
(
handled
)
inc_irq_stat
(
apic_perf_irqs
);
return
handled
;
}
/*
/*
* This handler is triggered by the local APIC, so the APIC IRQ handling
* This handler is triggered by the local APIC, so the APIC IRQ handling
...
@@ -1185,14 +1337,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
...
@@ -1185,14 +1337,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
{
{
struct
perf_sample_data
data
;
struct
perf_sample_data
data
;
struct
cpu_hw_counters
*
cpuc
;
struct
cpu_hw_counters
*
cpuc
;
int
bit
,
cpu
,
loops
;
int
bit
,
loops
;
u64
ack
,
status
;
u64
ack
,
status
;
data
.
regs
=
regs
;
data
.
regs
=
regs
;
data
.
addr
=
0
;
data
.
addr
=
0
;
cpu
=
smp_processor_id
();
cpuc
=
&
__get_cpu_var
(
cpu_hw_counters
);
cpuc
=
&
per_cpu
(
cpu_hw_counters
,
cpu
);
perf_disable
();
perf_disable
();
status
=
intel_pmu_get_status
();
status
=
intel_pmu_get_status
();
...
@@ -1249,14 +1400,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
...
@@ -1249,14 +1400,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
struct
cpu_hw_counters
*
cpuc
;
struct
cpu_hw_counters
*
cpuc
;
struct
perf_counter
*
counter
;
struct
perf_counter
*
counter
;
struct
hw_perf_counter
*
hwc
;
struct
hw_perf_counter
*
hwc
;
int
cpu
,
idx
,
handled
=
0
;
int
idx
,
handled
=
0
;
u64
val
;
u64
val
;
data
.
regs
=
regs
;
data
.
regs
=
regs
;
data
.
addr
=
0
;
data
.
addr
=
0
;
cpu
=
smp_processor_id
();
cpuc
=
&
__get_cpu_var
(
cpu_hw_counters
);
cpuc
=
&
per_cpu
(
cpu_hw_counters
,
cpu
);
for
(
idx
=
0
;
idx
<
x86_pmu
.
num_counters
;
idx
++
)
{
for
(
idx
=
0
;
idx
<
x86_pmu
.
num_counters
;
idx
++
)
{
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
...
@@ -1353,6 +1503,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
...
@@ -1353,6 +1503,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
.
priority
=
1
.
priority
=
1
};
};
static
struct
x86_pmu
p6_pmu
=
{
.
name
=
"p6"
,
.
handle_irq
=
p6_pmu_handle_irq
,
.
disable_all
=
p6_pmu_disable_all
,
.
enable_all
=
p6_pmu_enable_all
,
.
enable
=
p6_pmu_enable_counter
,
.
disable
=
p6_pmu_disable_counter
,
.
eventsel
=
MSR_P6_EVNTSEL0
,
.
perfctr
=
MSR_P6_PERFCTR0
,
.
event_map
=
p6_pmu_event_map
,
.
raw_event
=
p6_pmu_raw_event
,
.
max_events
=
ARRAY_SIZE
(
p6_perfmon_event_map
),
.
max_period
=
(
1ULL
<<
31
)
-
1
,
.
version
=
0
,
.
num_counters
=
2
,
/*
* Counters have 40 bits implemented. However they are designed such
* that bits [32-39] are sign extensions of bit 31. As such the
* effective width of a counter for P6-like PMU is 32 bits only.
*
* See IA-32 Intel Architecture Software developer manual Vol 3B
*/
.
counter_bits
=
32
,
.
counter_mask
=
(
1ULL
<<
32
)
-
1
,
};
static
struct
x86_pmu
intel_pmu
=
{
static
struct
x86_pmu
intel_pmu
=
{
.
name
=
"Intel"
,
.
name
=
"Intel"
,
.
handle_irq
=
intel_pmu_handle_irq
,
.
handle_irq
=
intel_pmu_handle_irq
,
...
@@ -1392,6 +1568,39 @@ static struct x86_pmu amd_pmu = {
...
@@ -1392,6 +1568,39 @@ static struct x86_pmu amd_pmu = {
.
max_period
=
(
1ULL
<<
47
)
-
1
,
.
max_period
=
(
1ULL
<<
47
)
-
1
,
};
};
static
int
p6_pmu_init
(
void
)
{
int
high
,
low
;
switch
(
boot_cpu_data
.
x86_model
)
{
case
1
:
case
3
:
/* Pentium Pro */
case
5
:
case
6
:
/* Pentium II */
case
7
:
case
8
:
case
11
:
/* Pentium III */
break
;
case
9
:
case
13
:
/* Pentium M */
break
;
default:
pr_cont
(
"unsupported p6 CPU model %d "
,
boot_cpu_data
.
x86_model
);
return
-
ENODEV
;
}
if
(
!
cpu_has_apic
)
{
pr_info
(
"no Local APIC, try rebooting with lapic"
);
return
-
ENODEV
;
}
x86_pmu
=
p6_pmu
;
return
0
;
}
static
int
intel_pmu_init
(
void
)
static
int
intel_pmu_init
(
void
)
{
{
union
cpuid10_edx
edx
;
union
cpuid10_edx
edx
;
...
@@ -1400,8 +1609,14 @@ static int intel_pmu_init(void)
...
@@ -1400,8 +1609,14 @@ static int intel_pmu_init(void)
unsigned
int
ebx
;
unsigned
int
ebx
;
int
version
;
int
version
;
if
(
!
cpu_has
(
&
boot_cpu_data
,
X86_FEATURE_ARCH_PERFMON
))
if
(
!
cpu_has
(
&
boot_cpu_data
,
X86_FEATURE_ARCH_PERFMON
))
{
/* check for P6 processor family */
if
(
boot_cpu_data
.
x86
==
6
)
{
return
p6_pmu_init
();
}
else
{
return
-
ENODEV
;
return
-
ENODEV
;
}
}
/*
/*
* Check whether the Architectural PerfMon supports
* Check whether the Architectural PerfMon supports
...
...
kernel/perf_counter.c
View file @
1d2f3794
...
@@ -146,6 +146,14 @@ static void put_ctx(struct perf_counter_context *ctx)
...
@@ -146,6 +146,14 @@ static void put_ctx(struct perf_counter_context *ctx)
}
}
}
}
static
void
unclone_ctx
(
struct
perf_counter_context
*
ctx
)
{
if
(
ctx
->
parent_ctx
)
{
put_ctx
(
ctx
->
parent_ctx
);
ctx
->
parent_ctx
=
NULL
;
}
}
/*
/*
* Get the perf_counter_context for a task and lock it.
* Get the perf_counter_context for a task and lock it.
* This has to cope with with the fact that until it is locked,
* This has to cope with with the fact that until it is locked,
...
@@ -1463,10 +1471,8 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
...
@@ -1463,10 +1471,8 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
/*
/*
* Unclone this context if we enabled any counter.
* Unclone this context if we enabled any counter.
*/
*/
if
(
enabled
&&
ctx
->
parent_ctx
)
{
if
(
enabled
)
put_ctx
(
ctx
->
parent_ctx
);
unclone_ctx
(
ctx
);
ctx
->
parent_ctx
=
NULL
;
}
spin_unlock
(
&
ctx
->
lock
);
spin_unlock
(
&
ctx
->
lock
);
...
@@ -1526,7 +1532,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
...
@@ -1526,7 +1532,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
static
struct
perf_counter_context
*
find_get_context
(
pid_t
pid
,
int
cpu
)
static
struct
perf_counter_context
*
find_get_context
(
pid_t
pid
,
int
cpu
)
{
{
struct
perf_counter_context
*
parent_ctx
;
struct
perf_counter_context
*
ctx
;
struct
perf_counter_context
*
ctx
;
struct
perf_cpu_context
*
cpuctx
;
struct
perf_cpu_context
*
cpuctx
;
struct
task_struct
*
task
;
struct
task_struct
*
task
;
...
@@ -1586,11 +1591,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
...
@@ -1586,11 +1591,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
retry:
retry:
ctx
=
perf_lock_task_context
(
task
,
&
flags
);
ctx
=
perf_lock_task_context
(
task
,
&
flags
);
if
(
ctx
)
{
if
(
ctx
)
{
parent_ctx
=
ctx
->
parent_ctx
;
unclone_ctx
(
ctx
);
if
(
parent_ctx
)
{
put_ctx
(
parent_ctx
);
ctx
->
parent_ctx
=
NULL
;
/* no longer a clone */
}
spin_unlock_irqrestore
(
&
ctx
->
lock
,
flags
);
spin_unlock_irqrestore
(
&
ctx
->
lock
,
flags
);
}
}
...
@@ -4262,15 +4263,12 @@ void perf_counter_exit_task(struct task_struct *child)
...
@@ -4262,15 +4263,12 @@ void perf_counter_exit_task(struct task_struct *child)
*/
*/
spin_lock
(
&
child_ctx
->
lock
);
spin_lock
(
&
child_ctx
->
lock
);
child
->
perf_counter_ctxp
=
NULL
;
child
->
perf_counter_ctxp
=
NULL
;
if
(
child_ctx
->
parent_ctx
)
{
/*
/*
* T
his context is a clone; unclone it so it can't get
* If t
his context is a clone; unclone it so it can't get
* swapped to another process while we're removing all
* swapped to another process while we're removing all
* the counters from it.
* the counters from it.
*/
*/
put_ctx
(
child_ctx
->
parent_ctx
);
unclone_ctx
(
child_ctx
);
child_ctx
->
parent_ctx
=
NULL
;
}
spin_unlock
(
&
child_ctx
->
lock
);
spin_unlock
(
&
child_ctx
->
lock
);
local_irq_restore
(
flags
);
local_irq_restore
(
flags
);
...
...
tools/perf/Documentation/perf-report.txt
View file @
1d2f3794
...
@@ -24,6 +24,9 @@ OPTIONS
...
@@ -24,6 +24,9 @@ OPTIONS
--dsos=::
--dsos=::
Only consider symbols in these dsos. CSV that understands
Only consider symbols in these dsos. CSV that understands
file://filename entries.
file://filename entries.
-n
--show-nr-samples
Show the number of samples for each symbol
-C::
-C::
--comms=::
--comms=::
Only consider symbols in these comms. CSV that understands
Only consider symbols in these comms. CSV that understands
...
@@ -33,6 +36,18 @@ OPTIONS
...
@@ -33,6 +36,18 @@ OPTIONS
Only consider these symbols. CSV that understands
Only consider these symbols. CSV that understands
file://filename entries.
file://filename entries.
-w::
--field-width=::
Force each column width to the provided list, for large terminal
readability.
-t::
--field-separator=::
Use a special separator character and don't pad with spaces, replacing
all occurances of this separator in symbol names (and other output)
with a '.' character, that thus it's the only non valid separator.
SEE ALSO
SEE ALSO
--------
--------
linkperf:perf-stat[1]
linkperf:perf-stat[1]
tools/perf/builtin-report.c
View file @
1d2f3794
This diff is collapsed.
Click to expand it.
tools/perf/perf.h
View file @
1d2f3794
#ifndef _PERF_PERF_H
#ifndef _PERF_PERF_H
#define _PERF_PERF_H
#define _PERF_PERF_H
#if defined(__x86_64__) || defined(__i386__)
#if defined(__i386__)
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#endif
#if defined(__x86_64__)
#include "../../arch/x86/include/asm/unistd.h"
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lfence" ::: "memory")
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
...
...
tools/perf/util/include/linux/kernel.h
View file @
1d2f3794
...
@@ -18,4 +18,12 @@
...
@@ -18,4 +18,12 @@
(type *)((char *)__mptr - offsetof(type, member)); })
(type *)((char *)__mptr - offsetof(type, member)); })
#endif
#endif
#ifndef max
#define max(x, y) ({ \
typeof(x) _max1 = (x); \
typeof(y) _max2 = (y); \
(void) (&_max1 == &_max2); \
_max1 > _max2 ? _max1 : _max2; })
#endif
#endif
#endif
tools/perf/util/strlist.c
View file @
1d2f3794
...
@@ -64,6 +64,7 @@ int strlist__add(struct strlist *self, const char *new_entry)
...
@@ -64,6 +64,7 @@ int strlist__add(struct strlist *self, const char *new_entry)
rb_link_node
(
&
sn
->
rb_node
,
parent
,
p
);
rb_link_node
(
&
sn
->
rb_node
,
parent
,
p
);
rb_insert_color
(
&
sn
->
rb_node
,
&
self
->
entries
);
rb_insert_color
(
&
sn
->
rb_node
,
&
self
->
entries
);
++
self
->
nr_entries
;
return
0
;
return
0
;
}
}
...
@@ -157,6 +158,7 @@ struct strlist *strlist__new(bool dupstr, const char *slist)
...
@@ -157,6 +158,7 @@ struct strlist *strlist__new(bool dupstr, const char *slist)
if
(
self
!=
NULL
)
{
if
(
self
!=
NULL
)
{
self
->
entries
=
RB_ROOT
;
self
->
entries
=
RB_ROOT
;
self
->
dupstr
=
dupstr
;
self
->
dupstr
=
dupstr
;
self
->
nr_entries
=
0
;
if
(
slist
&&
strlist__parse_list
(
self
,
slist
)
!=
0
)
if
(
slist
&&
strlist__parse_list
(
self
,
slist
)
!=
0
)
goto
out_error
;
goto
out_error
;
}
}
...
@@ -182,3 +184,17 @@ void strlist__delete(struct strlist *self)
...
@@ -182,3 +184,17 @@ void strlist__delete(struct strlist *self)
free
(
self
);
free
(
self
);
}
}
}
}
struct
str_node
*
strlist__entry
(
const
struct
strlist
*
self
,
unsigned
int
idx
)
{
struct
rb_node
*
nd
;
for
(
nd
=
rb_first
(
&
self
->
entries
);
nd
;
nd
=
rb_next
(
nd
))
{
struct
str_node
*
pos
=
rb_entry
(
nd
,
struct
str_node
,
rb_node
);
if
(
!
idx
--
)
return
pos
;
}
return
NULL
;
}
tools/perf/util/strlist.h
View file @
1d2f3794
...
@@ -11,6 +11,7 @@ struct str_node {
...
@@ -11,6 +11,7 @@ struct str_node {
struct
strlist
{
struct
strlist
{
struct
rb_root
entries
;
struct
rb_root
entries
;
unsigned
int
nr_entries
;
bool
dupstr
;
bool
dupstr
;
};
};
...
@@ -21,11 +22,17 @@ void strlist__remove(struct strlist *self, struct str_node *sn);
...
@@ -21,11 +22,17 @@ void strlist__remove(struct strlist *self, struct str_node *sn);
int
strlist__load
(
struct
strlist
*
self
,
const
char
*
filename
);
int
strlist__load
(
struct
strlist
*
self
,
const
char
*
filename
);
int
strlist__add
(
struct
strlist
*
self
,
const
char
*
str
);
int
strlist__add
(
struct
strlist
*
self
,
const
char
*
str
);
struct
str_node
*
strlist__entry
(
const
struct
strlist
*
self
,
unsigned
int
idx
);
bool
strlist__has_entry
(
struct
strlist
*
self
,
const
char
*
entry
);
bool
strlist__has_entry
(
struct
strlist
*
self
,
const
char
*
entry
);
static
inline
bool
strlist__empty
(
const
struct
strlist
*
self
)
static
inline
bool
strlist__empty
(
const
struct
strlist
*
self
)
{
{
return
rb_first
(
&
self
->
entries
)
==
NULL
;
return
self
->
nr_entries
==
0
;
}
static
inline
unsigned
int
strlist__nr_entries
(
const
struct
strlist
*
self
)
{
return
self
->
nr_entries
;
}
}
int
strlist__parse_list
(
struct
strlist
*
self
,
const
char
*
s
);
int
strlist__parse_list
(
struct
strlist
*
self
,
const
char
*
s
);
...
...
tools/perf/util/symbol.c
View file @
1d2f3794
...
@@ -65,6 +65,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
...
@@ -65,6 +65,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
self
->
syms
=
RB_ROOT
;
self
->
syms
=
RB_ROOT
;
self
->
sym_priv_size
=
sym_priv_size
;
self
->
sym_priv_size
=
sym_priv_size
;
self
->
find_symbol
=
dso__find_symbol
;
self
->
find_symbol
=
dso__find_symbol
;
self
->
slen_calculated
=
0
;
}
}
return
self
;
return
self
;
...
@@ -373,36 +374,61 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
...
@@ -373,36 +374,61 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
idx < nr_entries; \
idx < nr_entries; \
++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
static
int
dso__synthesize_plt_symbols
(
struct
dso
*
self
,
Elf
*
elf
,
/*
GElf_Ehdr
*
ehdr
,
Elf_Scn
*
scn_dynsym
,
* We need to check if we have a .dynsym, so that we can handle the
GElf_Shdr
*
shdr_dynsym
,
* .plt, synthesizing its symbols, that aren't on the symtabs (be it
size_t
dynsym_idx
,
int
verbose
)
* .dynsym or .symtab).
* And always look at the original dso, not at debuginfo packages, that
* have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
*/
static
int
dso__synthesize_plt_symbols
(
struct
dso
*
self
,
int
verbose
)
{
{
uint32_t
nr_rel_entries
,
idx
;
uint32_t
nr_rel_entries
,
idx
;
GElf_Sym
sym
;
GElf_Sym
sym
;
u64
plt_offset
;
u64
plt_offset
;
GElf_Shdr
shdr_plt
;
GElf_Shdr
shdr_plt
;
struct
symbol
*
f
;
struct
symbol
*
f
;
GElf_Shdr
shdr_rel_plt
;
GElf_Shdr
shdr_rel_plt
,
shdr_dynsym
;
Elf_Data
*
reldata
,
*
syms
,
*
symstrs
;
Elf_Data
*
reldata
,
*
syms
,
*
symstrs
;
Elf_Scn
*
scn_plt_rel
,
*
scn_symstrs
;
Elf_Scn
*
scn_plt_rel
,
*
scn_symstrs
,
*
scn_dynsym
;
size_t
dynsym_idx
;
GElf_Ehdr
ehdr
;
char
sympltname
[
1024
];
char
sympltname
[
1024
];
int
nr
=
0
,
symidx
;
Elf
*
elf
;
int
nr
=
0
,
symidx
,
fd
,
err
=
0
;
fd
=
open
(
self
->
name
,
O_RDONLY
);
if
(
fd
<
0
)
goto
out
;
elf
=
elf_begin
(
fd
,
ELF_C_READ_MMAP
,
NULL
);
if
(
elf
==
NULL
)
goto
out_close
;
if
(
gelf_getehdr
(
elf
,
&
ehdr
)
==
NULL
)
goto
out_elf_end
;
scn_dynsym
=
elf_section_by_name
(
elf
,
&
ehdr
,
&
shdr_dynsym
,
".dynsym"
,
&
dynsym_idx
);
if
(
scn_dynsym
==
NULL
)
goto
out_elf_end
;
scn_plt_rel
=
elf_section_by_name
(
elf
,
ehdr
,
&
shdr_rel_plt
,
scn_plt_rel
=
elf_section_by_name
(
elf
,
&
ehdr
,
&
shdr_rel_plt
,
".rela.plt"
,
NULL
);
".rela.plt"
,
NULL
);
if
(
scn_plt_rel
==
NULL
)
{
if
(
scn_plt_rel
==
NULL
)
{
scn_plt_rel
=
elf_section_by_name
(
elf
,
ehdr
,
&
shdr_rel_plt
,
scn_plt_rel
=
elf_section_by_name
(
elf
,
&
ehdr
,
&
shdr_rel_plt
,
".rel.plt"
,
NULL
);
".rel.plt"
,
NULL
);
if
(
scn_plt_rel
==
NULL
)
if
(
scn_plt_rel
==
NULL
)
return
0
;
goto
out_elf_end
;
}
}
err
=
-
1
;
if
(
shdr_rel_plt
.
sh_link
!=
dynsym_idx
)
if
(
shdr_rel_plt
.
sh_link
!=
dynsym_idx
)
return
0
;
goto
out_elf_end
;
if
(
elf_section_by_name
(
elf
,
ehdr
,
&
shdr_plt
,
".plt"
,
NULL
)
==
NULL
)
if
(
elf_section_by_name
(
elf
,
&
ehdr
,
&
shdr_plt
,
".plt"
,
NULL
)
==
NULL
)
return
0
;
goto
out_elf_end
;
/*
/*
* Fetch the relocation section to find the indexes to the GOT
* Fetch the relocation section to find the indexes to the GOT
...
@@ -410,19 +436,19 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
...
@@ -410,19 +436,19 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
*/
*/
reldata
=
elf_getdata
(
scn_plt_rel
,
NULL
);
reldata
=
elf_getdata
(
scn_plt_rel
,
NULL
);
if
(
reldata
==
NULL
)
if
(
reldata
==
NULL
)
return
-
1
;
goto
out_elf_end
;
syms
=
elf_getdata
(
scn_dynsym
,
NULL
);
syms
=
elf_getdata
(
scn_dynsym
,
NULL
);
if
(
syms
==
NULL
)
if
(
syms
==
NULL
)
return
-
1
;
goto
out_elf_end
;
scn_symstrs
=
elf_getscn
(
elf
,
shdr_dynsym
->
sh_link
);
scn_symstrs
=
elf_getscn
(
elf
,
shdr_dynsym
.
sh_link
);
if
(
scn_symstrs
==
NULL
)
if
(
scn_symstrs
==
NULL
)
return
-
1
;
goto
out_elf_end
;
symstrs
=
elf_getdata
(
scn_symstrs
,
NULL
);
symstrs
=
elf_getdata
(
scn_symstrs
,
NULL
);
if
(
symstrs
==
NULL
)
if
(
symstrs
==
NULL
)
return
-
1
;
goto
out_elf_end
;
nr_rel_entries
=
shdr_rel_plt
.
sh_size
/
shdr_rel_plt
.
sh_entsize
;
nr_rel_entries
=
shdr_rel_plt
.
sh_size
/
shdr_rel_plt
.
sh_entsize
;
plt_offset
=
shdr_plt
.
sh_offset
;
plt_offset
=
shdr_plt
.
sh_offset
;
...
@@ -441,7 +467,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
...
@@ -441,7 +467,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
f
=
symbol__new
(
plt_offset
,
shdr_plt
.
sh_entsize
,
f
=
symbol__new
(
plt_offset
,
shdr_plt
.
sh_entsize
,
sympltname
,
self
->
sym_priv_size
,
0
,
verbose
);
sympltname
,
self
->
sym_priv_size
,
0
,
verbose
);
if
(
!
f
)
if
(
!
f
)
return
-
1
;
goto
out_elf_end
;
dso__insert_symbol
(
self
,
f
);
dso__insert_symbol
(
self
,
f
);
++
nr
;
++
nr
;
...
@@ -459,19 +485,25 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
...
@@ -459,19 +485,25 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
f
=
symbol__new
(
plt_offset
,
shdr_plt
.
sh_entsize
,
f
=
symbol__new
(
plt_offset
,
shdr_plt
.
sh_entsize
,
sympltname
,
self
->
sym_priv_size
,
0
,
verbose
);
sympltname
,
self
->
sym_priv_size
,
0
,
verbose
);
if
(
!
f
)
if
(
!
f
)
return
-
1
;
goto
out_elf_end
;
dso__insert_symbol
(
self
,
f
);
dso__insert_symbol
(
self
,
f
);
++
nr
;
++
nr
;
}
}
}
else
{
/*
* TODO: There are still one more shdr_rel_plt.sh_type
* I have to investigate, but probably should be ignored.
*/
}
}
err
=
0
;
out_elf_end:
elf_end
(
elf
);
out_close:
close
(
fd
);
if
(
err
==
0
)
return
nr
;
return
nr
;
out:
fprintf
(
stderr
,
"%s: problems reading %s PLT info.
\n
"
,
__func__
,
self
->
name
);
return
0
;
}
}
static
int
dso__load_sym
(
struct
dso
*
self
,
int
fd
,
const
char
*
name
,
static
int
dso__load_sym
(
struct
dso
*
self
,
int
fd
,
const
char
*
name
,
...
@@ -485,9 +517,8 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
...
@@ -485,9 +517,8 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
GElf_Shdr
shdr
;
GElf_Shdr
shdr
;
Elf_Data
*
syms
;
Elf_Data
*
syms
;
GElf_Sym
sym
;
GElf_Sym
sym
;
Elf_Scn
*
sec
,
*
sec_
dynsym
,
*
sec_
strndx
;
Elf_Scn
*
sec
,
*
sec_strndx
;
Elf
*
elf
;
Elf
*
elf
;
size_t
dynsym_idx
;
int
nr
=
0
;
int
nr
=
0
;
elf
=
elf_begin
(
fd
,
ELF_C_READ_MMAP
,
NULL
);
elf
=
elf_begin
(
fd
,
ELF_C_READ_MMAP
,
NULL
);
...
@@ -504,32 +535,11 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
...
@@ -504,32 +535,11 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
goto
out_elf_end
;
goto
out_elf_end
;
}
}
/*
* We need to check if we have a .dynsym, so that we can handle the
* .plt, synthesizing its symbols, that aren't on the symtabs (be it
* .dynsym or .symtab)
*/
sec_dynsym
=
elf_section_by_name
(
elf
,
&
ehdr
,
&
shdr
,
".dynsym"
,
&
dynsym_idx
);
if
(
sec_dynsym
!=
NULL
)
{
nr
=
dso__synthesize_plt_symbols
(
self
,
elf
,
&
ehdr
,
sec_dynsym
,
&
shdr
,
dynsym_idx
,
verbose
);
if
(
nr
<
0
)
goto
out_elf_end
;
}
/*
* But if we have a full .symtab (that is a superset of .dynsym) we
* should add the symbols not in the .dynsyn
*/
sec
=
elf_section_by_name
(
elf
,
&
ehdr
,
&
shdr
,
".symtab"
,
NULL
);
sec
=
elf_section_by_name
(
elf
,
&
ehdr
,
&
shdr
,
".symtab"
,
NULL
);
if
(
sec
==
NULL
)
{
if
(
sec
==
NULL
)
{
if
(
sec_dynsym
==
NULL
)
sec
=
elf_section_by_name
(
elf
,
&
ehdr
,
&
shdr
,
".dynsym"
,
NULL
);
if
(
sec
==
NULL
)
goto
out_elf_end
;
goto
out_elf_end
;
sec
=
sec_dynsym
;
gelf_getshdr
(
sec
,
&
shdr
);
}
}
syms
=
elf_getdata
(
sec
,
NULL
);
syms
=
elf_getdata
(
sec
,
NULL
);
...
@@ -668,6 +678,11 @@ more:
...
@@ -668,6 +678,11 @@ more:
if
(
!
ret
)
if
(
!
ret
)
goto
more
;
goto
more
;
if
(
ret
>
0
)
{
int
nr_plt
=
dso__synthesize_plt_symbols
(
self
,
verbose
);
if
(
nr_plt
>
0
)
ret
+=
nr_plt
;
}
out:
out:
free
(
name
);
free
(
name
);
return
ret
;
return
ret
;
...
...
tools/perf/util/symbol.h
View file @
1d2f3794
...
@@ -25,6 +25,7 @@ struct dso {
...
@@ -25,6 +25,7 @@ struct dso {
struct
symbol
*
(
*
find_symbol
)(
struct
dso
*
,
u64
ip
);
struct
symbol
*
(
*
find_symbol
)(
struct
dso
*
,
u64
ip
);
unsigned
int
sym_priv_size
;
unsigned
int
sym_priv_size
;
unsigned
char
adjust_symbols
;
unsigned
char
adjust_symbols
;
unsigned
char
slen_calculated
;
char
name
[
0
];
char
name
[
0
];
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment