Commit d00aa669 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perfcounters-fixes-for-linus' of...

Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (27 commits)
  perf_counter: Zero dead bytes from ftrace raw samples size alignment
  perf_counter: Subtract the buffer size field from the event record size
  perf_counter: Require CAP_SYS_ADMIN for raw tracepoint data
  perf_counter: Correct PERF_SAMPLE_RAW output
  perf tools: callchain: Fix bad rounding of minimum rate
  perf_counter tools: Fix libbfd detection for systems with libz dependency
  perf: "Longum est iter per praecepta, breve et efficax per exempla"
  perf_counter: Fix a race on perf_counter_ctx
  perf_counter: Fix tracepoint sampling to be part of generic sampling
  perf_counter: Work around gcc warning by initializing tracepoint record unconditionally
  perf tools: callchain: Fix sum of percentages to be 100% by displaying amount of ignored chains in fractal mode
  perf tools: callchain: Fix 'perf report' display to be callchain by default
  perf tools: callchain: Fix spurious 'perf report' warnings: ignore empty callchains
  perf record: Fix the -A UI for empty or non-existent perf.data
  perf util: Fix do_read() to fail on EOF instead of busy-looping
  perf list: Fix the output to not include tracepoints without an id
  perf_counter/powerpc: Fix oops on cpus without perf_counter hardware support
  perf stat: Fix tool option consistency: rename -S/--scale to -c/--scale
  perf report: Add debug help for the finding of symbol bugs - show the symtab origin (DSO, build-id, kernel, etc)
  perf report: Fix per task mult-counter stat reporting
  ...
parents cec36911 1853db0e
......@@ -518,6 +518,8 @@ void hw_perf_disable(void)
struct cpu_hw_counters *cpuhw;
unsigned long flags;
if (!ppmu)
return;
local_irq_save(flags);
cpuhw = &__get_cpu_var(cpu_hw_counters);
......@@ -572,6 +574,8 @@ void hw_perf_enable(void)
int n_lim;
int idx;
if (!ppmu)
return;
local_irq_save(flags);
cpuhw = &__get_cpu_var(cpu_hw_counters);
if (!cpuhw->disabled) {
......@@ -737,6 +741,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
long i, n, n0;
struct perf_counter *sub;
if (!ppmu)
return 0;
cpuhw = &__get_cpu_var(cpu_hw_counters);
n0 = cpuhw->n_counters;
n = collect_events(group_leader, ppmu->n_counter - n0,
......@@ -1281,6 +1287,8 @@ void hw_perf_counter_setup(int cpu)
{
struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
if (!ppmu)
return;
memset(cpuhw, 0, sizeof(*cpuhw));
cpuhw->mmcr[0] = MMCR0_FC;
}
......
......@@ -121,7 +121,7 @@ enum perf_counter_sample_format {
PERF_SAMPLE_CPU = 1U << 7,
PERF_SAMPLE_PERIOD = 1U << 8,
PERF_SAMPLE_STREAM_ID = 1U << 9,
PERF_SAMPLE_TP_RECORD = 1U << 10,
PERF_SAMPLE_RAW = 1U << 10,
PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
};
......@@ -369,6 +369,8 @@ enum perf_event_type {
*
* { u64 nr,
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
* };
*/
PERF_EVENT_SAMPLE = 9,
......@@ -414,9 +416,9 @@ struct perf_callchain_entry {
__u64 ip[PERF_MAX_STACK_DEPTH];
};
struct perf_tracepoint_record {
int size;
char *record;
struct perf_raw_record {
u32 size;
void *data;
};
struct task_struct;
......@@ -687,7 +689,7 @@ struct perf_sample_data {
struct pt_regs *regs;
u64 addr;
u64 period;
void *private;
struct perf_raw_record *raw;
};
extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
......
......@@ -637,12 +637,20 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
* pc = preempt_count();
*
* __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
* __entry_size = __data_size + sizeof(*entry);
*
* // Below we want to get the aligned size by taking into account
* // the u32 field that will later store the buffer size
* __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),
* sizeof(u64));
* __entry_size -= sizeof(u32);
*
* do {
* char raw_data[__entry_size]; <- allocate our sample in the stack
* struct trace_entry *ent;
*
* zero dead bytes from alignment to avoid stack leak to userspace:
*
* *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
* entry = (struct ftrace_raw_<call> *)raw_data;
* ent = &entry->ent;
* tracing_generic_entry_update(ent, irq_flags, pc);
......@@ -685,12 +693,15 @@ static void ftrace_profile_##call(proto) \
pc = preempt_count(); \
\
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
__entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
sizeof(u64)); \
__entry_size -= sizeof(u32); \
\
do { \
char raw_data[__entry_size]; \
struct trace_entry *ent; \
\
*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
entry = (struct ftrace_raw_##call *)raw_data; \
ent = &entry->ent; \
tracing_generic_entry_update(ent, irq_flags, pc); \
......
This diff is collapsed.
------------------------------
****** perf by examples ******
------------------------------
[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
First, discovery/enumeration of available counters can be done via
'perf list':
titan:~> perf list
[...]
kmem:kmalloc [Tracepoint event]
kmem:kmem_cache_alloc [Tracepoint event]
kmem:kmalloc_node [Tracepoint event]
kmem:kmem_cache_alloc_node [Tracepoint event]
kmem:kfree [Tracepoint event]
kmem:kmem_cache_free [Tracepoint event]
kmem:mm_page_free_direct [Tracepoint event]
kmem:mm_pagevec_free [Tracepoint event]
kmem:mm_page_alloc [Tracepoint event]
kmem:mm_page_alloc_zone_locked [Tracepoint event]
kmem:mm_page_pcpu_drain [Tracepoint event]
kmem:mm_page_alloc_extfrag [Tracepoint event]
Then any (or all) of the above event sources can be activated and
measured. For example the page alloc/free properties of a 'hackbench
run' are:
titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
-e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10
Time: 0.575
Performance counter stats for './hackbench 10':
13857 kmem:mm_page_pcpu_drain
27576 kmem:mm_page_alloc
6025 kmem:mm_pagevec_free
20934 kmem:mm_page_free_direct
0.613972165 seconds time elapsed
You can observe the statistical properties as well, by using the
'repeat the workload N times' feature of perf stat:
titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
kmem:mm_page_free_direct ./hackbench 10
Time: 0.627
Time: 0.644
Time: 0.564
Time: 0.559
Time: 0.626
Performance counter stats for './hackbench 10' (5 runs):
12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
25035 kmem:mm_page_alloc ( +- 3.783% )
6104 kmem:mm_pagevec_free ( +- 0.934% )
18376 kmem:mm_page_free_direct ( +- 4.941% )
0.643954516 seconds time elapsed ( +- 2.363% )
Furthermore, these tracepoints can be used to sample the workload as
well. For example the page allocations done by a 'git gc' can be
captured the following way:
titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
Writing objects: 100% (1148/1148), done.
Total 1148 (delta 690), reused 1148 (delta 690)
[ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
To check which functions generated page allocations:
titan:~/git> perf report
# Samples: 10646
#
# Overhead Command Shared Object
# ........ ............... ..........................
#
23.57% git-repack /lib64/libc-2.5.so
21.81% git /lib64/libc-2.5.so
14.59% git ./git
11.79% git-repack ./git
7.12% git /lib64/ld-2.5.so
3.16% git-repack /lib64/libpthread-2.5.so
2.09% git-repack /bin/bash
1.97% rm /lib64/libc-2.5.so
1.39% mv /lib64/ld-2.5.so
1.37% mv /lib64/libc-2.5.so
1.12% git-repack /lib64/ld-2.5.so
0.95% rm /lib64/ld-2.5.so
0.90% git-update-serv /lib64/libc-2.5.so
0.73% git-update-serv /lib64/ld-2.5.so
0.68% perf /lib64/libpthread-2.5.so
0.64% git-repack /usr/lib64/libz.so.1.2.3
Or to see it on a more finegrained level:
titan:~/git> perf report --sort comm,dso,symbol
# Samples: 10646
#
# Overhead Command Shared Object Symbol
# ........ ............... .......................... ......
#
9.35% git-repack ./git [.] insert_obj_hash
9.12% git ./git [.] insert_obj_hash
7.31% git /lib64/libc-2.5.so [.] memcpy
6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc
6.24% git-repack /lib64/libc-2.5.so [.] memcpy
5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork
5.47% git /lib64/libc-2.5.so [.] _int_malloc
2.99% git /lib64/libc-2.5.so [.] memset
Furthermore, call-graph sampling can be done too, of page
allocations - to see precisely what kind of page allocations there
are:
titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
Writing objects: 100% (1148/1148), done.
Total 1148 (delta 690), reused 1148 (delta 690)
[ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
titan:~/git> perf report -g
# Samples: 10686
#
# Overhead Command Shared Object
# ........ ............... ..........................
#
23.25% git-repack /lib64/libc-2.5.so
|
|--50.00%-- _int_free
|
|--37.50%-- __GI___fork
| make_child
|
|--12.50%-- ptmalloc_unlock_all2
| make_child
|
--6.25%-- __GI_strcpy
21.61% git /lib64/libc-2.5.so
|
|--30.00%-- __GI_read
| |
| --83.33%-- git_config_from_file
| git_config
| |
[...]
Or you can observe the whole system's page allocations for 10
seconds:
titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
kmem:mm_page_free_direct sleep 10
Performance counter stats for 'sleep 10':
171585 kmem:mm_page_pcpu_drain
322114 kmem:mm_page_alloc
73623 kmem:mm_pagevec_free
254115 kmem:mm_page_free_direct
10.000591410 seconds time elapsed
Or observe how fluctuating the page allocations are, via statistical
analysis done over ten 1-second intervals:
titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
kmem:mm_page_free_direct sleep 1
Performance counter stats for 'sleep 1' (10 runs):
17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
34394 kmem:mm_page_alloc ( +- 4.617% )
7509 kmem:mm_pagevec_free ( +- 4.820% )
25653 kmem:mm_page_free_direct ( +- 3.672% )
1.058135029 seconds time elapsed ( +- 3.089% )
Or you can annotate the recorded 'git gc' run on a per symbol basis
and check which instructions/source-code generated page allocations:
titan:~/git> perf annotate __GI___fork
------------------------------------------------
Percent | Source code & Disassembly of libc-2.5.so
------------------------------------------------
:
:
: Disassembly of section .plt:
: Disassembly of section .text:
:
: 00000031a2e95560 <__fork>:
[...]
0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax
0.00 : 31a2e95607: 0f 05 syscall
83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202>
0.00 : 31a2e95615: 85 c0 test %eax,%eax
( this shows that 83.42% of __GI___fork's page allocations come from
the 0x38 system call it performs. )
etc. etc. - a lot more is possible. I could list a dozen of
other different usecases straight away - neither of which is
possible via /proc/vmstat.
/proc/vmstat is not in the same league really, in terms of
expressive power of system analysis and performance
analysis.
All that the above results needed were those new tracepoints
in include/tracing/events/kmem.h.
Ingo
......@@ -40,7 +40,7 @@ OPTIONS
-a::
system-wide collection
-S::
-c::
scale counter values
EXAMPLES
......
......@@ -3,36 +3,122 @@ perf-top(1)
NAME
----
perf-top - Run a command and profile it
perf-top - System profiling tool.
SYNOPSIS
--------
[verse]
'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
'perf top' [-e <EVENT> | --event=EVENT] [<options>]
DESCRIPTION
-----------
This command runs a command and gathers a performance counter profile
from it.
This command generates and displays a performance counter profile in realtime.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
-a::
--all-cpus::
System-wide collection. (default)
-c <count>::
--count=<count>::
Event period to sample.
-C <cpu>::
--CPU=<cpu>::
CPU to profile.
-d <seconds>::
--delay=<seconds>::
Number of seconds to delay between refreshes.
-e::
--event=::
-e <event>::
--event=<event>::
Select the PMU event. Selection can be a symbolic event name
(use 'perf list' to list all events) or a raw PMU
event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
hexadecimal event descriptor.
-a::
system-wide collection
-E <entries>::
--entries=<entries>::
Display this many functions.
-f <count>::
--count-filter=<count>::
Only display functions with more events than this.
-F <freq>::
--freq=<freq>::
Profile at this frequency.
-i::
--inherit::
Child tasks inherit counters, only makes sens with -p option.
-k <path>::
--vmlinux=<path>::
Path to vmlinux. Required for annotation functionality.
-m <pages>::
--mmap-pages=<pages>::
Number of mmapped data pages.
-p <pid>::
--pid=<pid>::
Profile events on existing pid.
-r <priority>::
--realtime=<priority>::
Collect data with this RT SCHED_FIFO priority.
-s <symbol>::
--sym-annotate=<symbol>::
Annotate this symbol. Requires -k option.
-v::
--verbose::
Be more verbose (show counter open errors, etc).
-z::
--zero::
Zero history across display updates.
INTERACTIVE PROMPTING KEYS
--------------------------
[d]::
Display refresh delay.
[e]::
Number of entries to display.
[E]::
Event to display when multiple counters are active.
[f]::
Profile display filter (>= hit count).
[F]::
Annotation display filter (>= % of total).
[s]::
Annotate symbol.
[S]::
Stop annotation, return to full profile display.
[w]::
Toggle between weighted sum and individual count[E]r profile.
[z]::
Toggle event count zeroing across display updates.
[qQ]::
Quit.
Pressing any unmapped key displays a menu, and prompts for input.
-l::
scale counter values
SEE ALSO
--------
......
......@@ -387,10 +387,14 @@ else
has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
ifeq ($(has_bfd),y)
EXTLIBS += -lbfd
else ifeq ($(has_bfd_iberty),y)
EXTLIBS += -lbfd -liberty
else ifeq ($(has_bfd_iberty_z),y)
EXTLIBS += -lbfd -liberty -lz
else
msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
BASIC_CFLAGS += -DNO_DEMANGLE
......
......@@ -525,10 +525,14 @@ static int __cmd_record(int argc, const char **argv)
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
if (!stat(output_name, &st) && !force && !append_file) {
fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
output_name);
exit(-1);
if (!stat(output_name, &st) && st.st_size) {
if (!force && !append_file) {
fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
output_name);
exit(-1);
}
} else {
append_file = 0;
}
flags = O_CREAT|O_RDWR;
......
......@@ -68,7 +68,7 @@ static int callchain;
static
struct callchain_param callchain_param = {
.mode = CHAIN_GRAPH_ABS,
.mode = CHAIN_GRAPH_REL,
.min_percent = 0.5
};
......@@ -112,7 +112,9 @@ struct read_event {
struct perf_event_header header;
u32 pid,tid;
u64 value;
u64 format[3];
u64 time_enabled;
u64 time_running;
u64 id;
};
typedef union event_union {
......@@ -698,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
size_t ret = 0;
if (verbose)
ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip);
ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
dso__symtab_origin(self->dso));
ret += repsep_fprintf(fp, "[%c] ", self->level);
if (self->sym) {
......@@ -888,6 +891,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
return ret;
}
static struct symbol *rem_sq_bracket;
static struct callchain_list rem_hits;
static void init_rem_hits(void)
{
rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
if (!rem_sq_bracket) {
fprintf(stderr, "Not enough memory to display remaining hits\n");
return;
}
strcpy(rem_sq_bracket->name, "[...]");
rem_hits.sym = rem_sq_bracket;
}
static size_t
callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
u64 total_samples, int depth, int depth_mask)
......@@ -897,25 +915,34 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
struct callchain_list *chain;
int new_depth_mask = depth_mask;
u64 new_total;
u64 remaining;
size_t ret = 0;
int i;
if (callchain_param.mode == CHAIN_GRAPH_REL)
new_total = self->cumul_hit;
new_total = self->children_hit;
else
new_total = total_samples;
remaining = new_total;
node = rb_first(&self->rb_root);
while (node) {
u64 cumul;
child = rb_entry(node, struct callchain_node, rb_node);
cumul = cumul_hits(child);
remaining -= cumul;
/*
* The depth mask manages the output of pipes that show
* the depth. We don't want to keep the pipes of the current
* level for the last child of this depth
* level for the last child of this depth.
* Except if we have remaining filtered hits. They will
* supersede the last child
*/
next = rb_next(node);
if (!next)
if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
new_depth_mask &= ~(1 << (depth - 1));
/*
......@@ -930,7 +957,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
ret += ipchain__fprintf_graph(fp, chain, depth,
new_depth_mask, i++,
new_total,
child->cumul_hit);
cumul);
}
ret += callchain__fprintf_graph(fp, child, new_total,
depth + 1,
......@@ -938,6 +965,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
node = next;
}
if (callchain_param.mode == CHAIN_GRAPH_REL &&
remaining && remaining != new_total) {
if (!rem_sq_bracket)
return ret;
new_depth_mask &= ~(1 << (depth - 1));
ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
new_depth_mask, 0, new_total,
remaining);
}
return ret;
}
......@@ -1358,6 +1398,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples)
unsigned int width;
char *col_width = col_width_list_str;
init_rem_hits();
fprintf(fp, "# Samples: %Ld\n", (u64)total_samples);
fprintf(fp, "#\n");
......@@ -1429,6 +1471,8 @@ print_entries:
}
fprintf(fp, "\n");
free(rem_sq_bracket);
return ret;
}
......@@ -1690,14 +1734,37 @@ static void trace_event(event_t *event)
dprintf(".\n");
}
static struct perf_header *header;
static struct perf_counter_attr *perf_header__find_attr(u64 id)
{
int i;
for (i = 0; i < header->attrs; i++) {
struct perf_header_attr *attr = header->attr[i];
int j;
for (j = 0; j < attr->ids; j++) {
if (attr->id[j] == id)
return &attr->attr;
}
}
return NULL;
}
static int
process_read_event(event_t *event, unsigned long offset, unsigned long head)
{
dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n",
struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->read.pid,
event->read.tid,
attr ? __event_name(attr->type, attr->config)
: "FAIL",
event->read.value);
return 0;
......@@ -1743,8 +1810,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}
static struct perf_header *header;
static u64 perf_header__sample_type(void)
{
u64 sample_type = 0;
......@@ -1812,6 +1877,13 @@ static int __cmd_report(void)
" -g?\n");
exit(-1);
}
} else if (callchain_param.mode != CHAIN_NONE && !callchain) {
callchain = 1;
if (register_callchain_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain"
" params\n");
exit(-1);
}
}
if (load_kernel() < 0) {
......@@ -1950,6 +2022,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
else if (!strncmp(tok, "fractal", strlen(arg)))
callchain_param.mode = CHAIN_GRAPH_REL;
else if (!strncmp(tok, "none", strlen(arg))) {
callchain_param.mode = CHAIN_NONE;
callchain = 0;
return 0;
}
else
return -1;
......
......@@ -496,7 +496,7 @@ static const struct option options[] = {
"stat events on existing pid"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('S', "scale", &scale,
OPT_BOOLEAN('c', "scale", &scale,
"scale/normalize counters"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
......
This diff is collapsed.
......@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <math.h>
#include "callchain.h"
......@@ -26,10 +27,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct callchain_node *rnode;
u64 chain_cumul = cumul_hits(chain);
while (*p) {
u64 rnode_cumul;
parent = *p;
rnode = rb_entry(parent, struct callchain_node, rb_node);
rnode_cumul = cumul_hits(rnode);
switch (mode) {
case CHAIN_FLAT:
......@@ -40,7 +45,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
break;
case CHAIN_GRAPH_ABS: /* Falldown */
case CHAIN_GRAPH_REL:
if (rnode->cumul_hit < chain->cumul_hit)
if (rnode_cumul < chain_cumul)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
......@@ -87,7 +92,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
chain_for_each_child(child, node) {
__sort_chain_graph_abs(child, min_hit);
if (child->cumul_hit >= min_hit)
if (cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_ABS);
}
......@@ -108,11 +113,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
u64 min_hit;
node->rb_root = RB_ROOT;
min_hit = node->cumul_hit * min_percent / 100.0;
min_hit = ceil(node->children_hit * min_percent);
chain_for_each_child(child, node) {
__sort_chain_graph_rel(child, min_percent);
if (child->cumul_hit >= min_hit)
if (cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_REL);
}
......@@ -122,7 +127,7 @@ static void
sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root,
u64 min_hit __used, struct callchain_param *param)
{
__sort_chain_graph_rel(chain_root, param->min_percent);
__sort_chain_graph_rel(chain_root, param->min_percent / 100.0);
rb_root->rb_node = chain_root->rb_root.rb_node;
}
......@@ -211,7 +216,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain,
new = create_child(parent, false);
fill_node(new, chain, start, syms);
new->cumul_hit = new->hit = 1;
new->children_hit = 0;
new->hit = 1;
}
/*
......@@ -241,7 +247,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
/* split the hits */
new->hit = parent->hit;
new->cumul_hit = parent->cumul_hit;
new->children_hit = parent->children_hit;
parent->children_hit = cumul_hits(new);
new->val_nr = parent->val_nr - idx_local;
parent->val_nr = idx_local;
......@@ -249,6 +256,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
if (idx_total < chain->nr) {
parent->hit = 0;
add_child(parent, chain, idx_total, syms);
parent->children_hit++;
} else {
parent->hit = 1;
}
......@@ -269,13 +277,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain,
unsigned int ret = __append_chain(rnode, chain, start, syms);
if (!ret)
goto cumul;
goto inc_children_hit;
}
/* nothing in children, add to the current node */
add_child(root, chain, start, syms);
cumul:
root->cumul_hit++;
inc_children_hit:
root->children_hit++;
}
static int
......@@ -317,8 +325,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
/* we match 100% of the path, increment the hit */
if (i - start == root->val_nr && i == chain->nr) {
root->hit++;
root->cumul_hit++;
return 0;
}
......@@ -331,5 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
void append_chain(struct callchain_node *root, struct ip_callchain *chain,
struct symbol **syms)
{
if (!chain->nr)
return;
__append_chain_children(root, chain, syms, 0);
}
......@@ -7,6 +7,7 @@
#include "symbol.h"
enum chain_mode {
CHAIN_NONE,
CHAIN_FLAT,
CHAIN_GRAPH_ABS,
CHAIN_GRAPH_REL
......@@ -21,7 +22,7 @@ struct callchain_node {
struct rb_root rb_root; /* sorted tree of children */
unsigned int val_nr;
u64 hit;
u64 cumul_hit; /* hit + hits of children */
u64 children_hit;
};
struct callchain_param;
......@@ -48,6 +49,11 @@ static inline void callchain_init(struct callchain_node *node)
INIT_LIST_HEAD(&node->val);
}
static inline u64 cumul_hits(struct callchain_node *node)
{
return node->hit + node->children_hit;
}
int register_callchain_param(struct callchain_param *param);
void append_chain(struct callchain_node *root, struct ip_callchain *chain,
struct symbol **syms);
......
......@@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size)
if (ret < 0)
die("failed to read");
if (ret == 0)
die("failed to read: missing data");
size -= ret;
buf += ret;
......@@ -213,9 +215,10 @@ struct perf_header *perf_header__read(int fd)
for (i = 0; i < nr_attrs; i++) {
struct perf_header_attr *attr;
off_t tmp = lseek(fd, 0, SEEK_CUR);
off_t tmp;
do_read(fd, &f_attr, sizeof(f_attr));
tmp = lseek(fd, 0, SEEK_CUR);
attr = perf_header_attr__new(&f_attr.attr);
......
......@@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = {
(strcmp(sys_dirent.d_name, ".")) && \
(strcmp(sys_dirent.d_name, "..")))
static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
{
char evt_path[MAXPATHLEN];
int fd;
snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
sys_dir->d_name, evt_dir->d_name);
fd = open(evt_path, O_RDONLY);
if (fd < 0)
return -EINVAL;
close(fd);
return 0;
}
#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \
while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \
if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \
sys_dirent.d_name, evt_dirent.d_name) && \
(!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \
(strcmp(evt_dirent.d_name, ".")) && \
(strcmp(evt_dirent.d_name, "..")))
(strcmp(evt_dirent.d_name, "..")) && \
(!tp_event_has_id(&sys_dirent, &evt_dirent)))
#define MAX_EVENT_LENGTH 30
......@@ -223,9 +239,15 @@ char *event_name(int counter)
{
u64 config = attrs[counter].config;
int type = attrs[counter].type;
return __event_name(type, config);
}
char *__event_name(int type, u64 config)
{
static char buf[32];
if (attrs[counter].type == PERF_TYPE_RAW) {
if (type == PERF_TYPE_RAW) {
sprintf(buf, "raw 0x%llx", config);
return buf;
}
......
......@@ -10,6 +10,7 @@ extern int nr_counters;
extern struct perf_counter_attr attrs[MAX_COUNTERS];
extern char *event_name(int ctr);
extern char *__event_name(int type, u64 config);
extern int parse_events(const struct option *opt, const char *str, int unset);
......
......@@ -24,6 +24,16 @@ const char *sym_hist_filter;
#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
#endif
enum dso_origin {
DSO__ORIG_KERNEL = 0,
DSO__ORIG_JAVA_JIT,
DSO__ORIG_FEDORA,
DSO__ORIG_UBUNTU,
DSO__ORIG_BUILDID,
DSO__ORIG_DSO,
DSO__ORIG_NOT_FOUND,
};
static struct symbol *symbol__new(u64 start, u64 len,
const char *name, unsigned int priv_size,
u64 obj_start, int verbose)
......@@ -81,6 +91,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
self->sym_priv_size = sym_priv_size;
self->find_symbol = dso__find_symbol;
self->slen_calculated = 0;
self->origin = DSO__ORIG_NOT_FOUND;
}
return self;
......@@ -710,7 +721,7 @@ static char *dso__read_build_id(struct dso *self, int verbose)
++raw;
bid += 2;
}
if (verbose)
if (verbose >= 2)
printf("%s(%s): %s\n", __func__, self->name, build_id);
out_elf_end:
elf_end(elf);
......@@ -720,11 +731,26 @@ out:
return build_id;
}
char dso__symtab_origin(const struct dso *self)
{
static const char origin[] = {
[DSO__ORIG_KERNEL] = 'k',
[DSO__ORIG_JAVA_JIT] = 'j',
[DSO__ORIG_FEDORA] = 'f',
[DSO__ORIG_UBUNTU] = 'u',
[DSO__ORIG_BUILDID] = 'b',
[DSO__ORIG_DSO] = 'd',
};
if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
return '!';
return origin[self->origin];
}
int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
{
int size = PATH_MAX;
char *name = malloc(size), *build_id = NULL;
int variant = 0;
int ret = -1;
int fd;
......@@ -733,19 +759,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
self->adjust_symbols = 0;
if (strncmp(self->name, "/tmp/perf-", 10) == 0)
return dso__load_perf_map(self, filter, verbose);
if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
ret = dso__load_perf_map(self, filter, verbose);
self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT :
DSO__ORIG_NOT_FOUND;
return ret;
}
self->origin = DSO__ORIG_FEDORA - 1;
more:
do {
switch (variant) {
case 0: /* Fedora */
self->origin++;
switch (self->origin) {
case DSO__ORIG_FEDORA:
snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
break;
case 1: /* Ubuntu */
case DSO__ORIG_UBUNTU:
snprintf(name, size, "/usr/lib/debug%s", self->name);
break;
case 2:
case DSO__ORIG_BUILDID:
build_id = dso__read_build_id(self, verbose);
if (build_id != NULL) {
snprintf(name, size,
......@@ -754,16 +787,15 @@ more:
free(build_id);
break;
}
variant++;
self->origin++;
/* Fall thru */
case 3: /* Sane people */
case DSO__ORIG_DSO:
snprintf(name, size, "%s", self->name);
break;
default:
goto out;
}
variant++;
fd = open(name, O_RDONLY);
} while (fd < 0);
......@@ -899,6 +931,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux,
if (err <= 0)
err = dso__load_kallsyms(self, filter, verbose);
if (err > 0)
self->origin = DSO__ORIG_KERNEL;
return err;
}
......
......@@ -26,6 +26,7 @@ struct dso {
unsigned int sym_priv_size;
unsigned char adjust_symbols;
unsigned char slen_calculated;
unsigned char origin;
char name[0];
};
......@@ -49,6 +50,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose);
int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
size_t dso__fprintf(struct dso *self, FILE *fp);
char dso__symtab_origin(const struct dso *self);
void symbol__init(void);
#endif /* _PERF_SYMBOL_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment