Commit cdce9d73 authored by Frederic Weisbecker's avatar Frederic Weisbecker Committed by Ingo Molnar

perf sched: Add sched latency profiling

Add the -l --latency option that reports statistics about the
scheduler latencies.

For now, the latencies are measured in the following sequence
scope:

- task A is sleeping (D or S state)
- task B wakes up A
         ^
         |
         |

   latency timeframe

         |
         |
         v
- task A is scheduled in

Start by recording every scheduler events:

	perf record -e sched:*

and then fetch the results:

	perf sched -l

 Tasks                     count          total              avg            max

migration/0                  2             39849            19924           28826
ksoftirqd/0                  7            756383           108054          373014
migration/1                  5             45391             9078           10452
ksoftirqd/1                  2            399055           199527          359130
events/0                     8           4780110           597513         4500250
events/1                     9           6353057           705895         2986012
kblockd/0                   42          37805097           900121         5077684

The snapshot are in nanoseconds.

- Count: number of snapshots taken for the given task
- Total: total latencies in nanosec
- Avg  : average of latency between wake up and sched in
- Max  : max snapshot latency
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 419ab0d6
...@@ -31,6 +31,7 @@ static struct perf_header *header; ...@@ -31,6 +31,7 @@ static struct perf_header *header;
static u64 sample_type; static u64 sample_type;
static int replay_mode; static int replay_mode;
static int lat_mode;
/* /*
...@@ -847,6 +848,269 @@ static struct trace_sched_handler replay_ops = { ...@@ -847,6 +848,269 @@ static struct trace_sched_handler replay_ops = {
.fork_event = replay_fork_event, .fork_event = replay_fork_event,
}; };
#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
enum thread_state {
THREAD_SLEEPING,
THREAD_WAKED_UP,
THREAD_SCHED_IN,
THREAD_IGNORE
};
struct lat_snapshot {
struct list_head list;
enum thread_state state;
u64 wake_up_time;
u64 sched_in_time;
};
struct thread_latency {
struct list_head snapshot_list;
struct thread *thread;
struct rb_node node;
};
static struct rb_root lat_snapshot_root;
static struct thread_latency *
thread_latency_search(struct rb_root *root, struct thread *thread)
{
struct rb_node *node = root->rb_node;
while (node) {
struct thread_latency *lat;
lat = container_of(node, struct thread_latency, node);
if (thread->pid < lat->thread->pid)
node = node->rb_left;
else if (thread->pid > lat->thread->pid)
node = node->rb_right;
else {
return lat;
}
}
return NULL;
}
static void
__thread_latency_insert(struct rb_root *root, struct thread_latency *data)
{
struct rb_node **new = &(root->rb_node), *parent = NULL;
while (*new) {
struct thread_latency *this;
this = container_of(*new, struct thread_latency, node);
parent = *new;
if (data->thread->pid < this->thread->pid)
new = &((*new)->rb_left);
else if (data->thread->pid > this->thread->pid)
new = &((*new)->rb_right);
else
die("Double thread insertion\n");
}
rb_link_node(&data->node, parent, new);
rb_insert_color(&data->node, root);
}
static void thread_latency_insert(struct thread *thread)
{
struct thread_latency *lat;
lat = calloc(sizeof(*lat), 1);
if (!lat)
die("No memory");
lat->thread = thread;
INIT_LIST_HEAD(&lat->snapshot_list);
__thread_latency_insert(&lat_snapshot_root, lat);
}
static void
latency_fork_event(struct trace_fork_event *fork_event __used,
struct event *event __used,
int cpu __used,
u64 timestamp __used,
struct thread *thread __used)
{
/* should insert the newcomer */
}
static char sched_out_state(struct trace_switch_event *switch_event)
{
const char *str = TASK_STATE_TO_CHAR_STR;
return str[switch_event->prev_state];
}
static void
lat_sched_out(struct thread_latency *lat,
struct trace_switch_event *switch_event)
{
struct lat_snapshot *snapshot;
if (sched_out_state(switch_event) == 'R')
return;
snapshot = calloc(sizeof(*snapshot), 1);
if (!snapshot)
die("Non memory");
list_add_tail(&snapshot->list, &lat->snapshot_list);
}
static void
lat_sched_in(struct thread_latency *lat, u64 timestamp)
{
struct lat_snapshot *snapshot;
if (list_empty(&lat->snapshot_list))
return;
snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
list);
if (snapshot->state != THREAD_WAKED_UP)
return;
if (timestamp < snapshot->wake_up_time) {
snapshot->state = THREAD_IGNORE;
return;
}
snapshot->state = THREAD_SCHED_IN;
snapshot->sched_in_time = timestamp;
}
static void
latency_switch_event(struct trace_switch_event *switch_event,
struct event *event __used,
int cpu __used,
u64 timestamp,
struct thread *thread __used)
{
struct thread_latency *out_lat, *in_lat;
struct thread *sched_out, *sched_in;
sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
if (!in_lat) {
thread_latency_insert(sched_in);
in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
if (!in_lat)
die("Internal latency tree error");
}
out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
if (!out_lat) {
thread_latency_insert(sched_out);
out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
if (!out_lat)
die("Internal latency tree error");
}
lat_sched_in(in_lat, timestamp);
lat_sched_out(out_lat, switch_event);
}
static void
latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
struct event *event __used,
int cpu __used,
u64 timestamp,
struct thread *thread __used)
{
struct thread_latency *lat;
struct lat_snapshot *snapshot;
struct thread *wakee;
/* Note for later, it may be interesting to observe the failing cases */
if (!wakeup_event->success)
return;
wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
lat = thread_latency_search(&lat_snapshot_root, wakee);
if (!lat) {
thread_latency_insert(wakee);
return;
}
if (list_empty(&lat->snapshot_list))
return;
snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
list);
if (snapshot->state != THREAD_SLEEPING)
return;
snapshot->state = THREAD_WAKED_UP;
snapshot->wake_up_time = timestamp;
}
static struct trace_sched_handler lat_ops = {
.wakeup_event = latency_wakeup_event,
.switch_event = latency_switch_event,
.fork_event = latency_fork_event,
};
static void output_lat_thread(struct thread_latency *lat)
{
struct lat_snapshot *shot;
int count = 0;
int i;
int ret;
u64 max = 0, avg;
u64 total = 0, delta;
list_for_each_entry(shot, &lat->snapshot_list, list) {
if (shot->state != THREAD_SCHED_IN)
continue;
count++;
delta = shot->sched_in_time - shot->wake_up_time;
if (delta > max)
max = delta;
total += delta;
}
if (!count)
return;
ret = printf("%s", lat->thread->comm);
for (i = 0; i < 25 - ret; i++)
printf(" ");
avg = total / count;
printf("%5d %10llu %10llu %10llu\n", count, total, avg, max);
}
static void output_lat_results(void)
{
struct rb_node *next;
printf(" Tasks");
printf(" count");
printf(" total");
printf(" avg");
printf(" max\n\n");
next = rb_first(&lat_snapshot_root);
while (next) {
struct thread_latency *lat;
lat = rb_entry(next, struct thread_latency, node);
output_lat_thread(lat);
next = rb_next(next);
}
}
static struct trace_sched_handler *trace_handler; static struct trace_sched_handler *trace_handler;
...@@ -1154,6 +1418,8 @@ static const struct option options[] = { ...@@ -1154,6 +1418,8 @@ static const struct option options[] = {
"dump raw trace in ASCII"), "dump raw trace in ASCII"),
OPT_BOOLEAN('r', "replay", &replay_mode, OPT_BOOLEAN('r', "replay", &replay_mode,
"replay sched behaviour from traces"), "replay sched behaviour from traces"),
OPT_BOOLEAN('l', "latency", &lat_mode,
"measure various latencies"),
OPT_BOOLEAN('v', "verbose", &verbose, OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"), "be more verbose (show symbol address, etc)"),
OPT_END() OPT_END()
...@@ -1180,9 +1446,12 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) ...@@ -1180,9 +1446,12 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
if (replay_mode) if (replay_mode)
trace_handler = &replay_ops; trace_handler = &replay_ops;
else /* We may need a default subcommand */ else if (lat_mode)
trace_handler = &lat_ops;
else /* We may need a default subcommand (perf trace?) */
die("Please select a sub command (-r)\n"); die("Please select a sub command (-r)\n");
if (replay_mode) {
calibrate_run_measurement_overhead(); calibrate_run_measurement_overhead();
calibrate_sleep_measurement_overhead(); calibrate_sleep_measurement_overhead();
...@@ -1196,6 +1465,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) ...@@ -1196,6 +1465,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
printf("------------------------------------------------------------\n"); printf("------------------------------------------------------------\n");
for (i = 0; i < nr_iterations; i++) for (i = 0; i < nr_iterations; i++)
run_one_test(); run_one_test();
} else if (lat_mode) {
setup_pager();
__cmd_sched();
output_lat_results();
}
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment