Commit b0b7065b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'tracing-fixes-for-linus' of...

Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (24 commits)
  tracing/urgent: warn in case of ftrace_start_up inbalance
  tracing/urgent: fix unbalanced ftrace_start_up
  function-graph: add stack frame test
  function-graph: disable when both x86_32 and optimize for size are configured
  ring-buffer: have benchmark test print to trace buffer
  ring-buffer: do not grab locks in nmi
  ring-buffer: add locks around rb_per_cpu_empty
  ring-buffer: check for less than two in size allocation
  ring-buffer: remove useless compile check for buffer_page size
  ring-buffer: remove useless warn on check
  ring-buffer: use BUF_PAGE_HDR_SIZE in calculating index
  tracing: update sample event documentation
  tracing/filters: fix race between filter setting and module unload
  tracing/filters: free filter_string in destroy_preds()
  ring-buffer: use commit counters for commit pointer accounting
  ring-buffer: remove unused variable
  ring-buffer: have benchmark test handle discarded events
  ring-buffer: prevent adding write in discarded area
  tracing/filters: strloc should be unsigned short
  tracing/filters: operand can be negative
  ...

Fix up kmemcheck-induced conflict in kernel/trace/ring_buffer.c manually
parents 38df92b8 d4c40383
......@@ -586,7 +586,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) {
*parent = old;
return;
}
......
......@@ -190,7 +190,7 @@ unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent)
goto out;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
if (ftrace_push_return_trace(parent, ip, &trace.depth) == -EBUSY)
if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
goto out;
trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN;
/* Only trace if the calling function expects to. */
......
......@@ -34,6 +34,7 @@ config X86
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
select HAVE_FTRACE_SYSCALLS
......
......@@ -1174,6 +1174,7 @@ ENTRY(ftrace_graph_caller)
pushl %edx
movl 0xc(%esp), %edx
lea 0x4(%ebp), %eax
movl (%ebp), %ecx
subl $MCOUNT_INSN_SIZE, %edx
call prepare_ftrace_return
popl %edx
......@@ -1188,6 +1189,7 @@ return_to_handler:
pushl %eax
pushl %ecx
pushl %edx
movl %ebp, %eax
call ftrace_return_to_handler
movl %eax, 0xc(%esp)
popl %edx
......
......@@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller)
leaq 8(%rbp), %rdi
movq 0x38(%rsp), %rsi
movq (%rbp), %rdx
subq $MCOUNT_INSN_SIZE, %rsi
call prepare_ftrace_return
......@@ -150,6 +151,7 @@ GLOBAL(return_to_handler)
/* Save the return values */
movq %rax, (%rsp)
movq %rdx, 8(%rsp)
movq %rbp, %rdi
call ftrace_return_to_handler
......
......@@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void)
* Hook the return address and push it in the stack of return addrs
* in current thread info.
*/
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer)
{
unsigned long old;
int faulted;
......@@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
frame_pointer) == -EBUSY) {
*parent = old;
return;
}
......
......@@ -362,6 +362,7 @@ struct ftrace_ret_stack {
unsigned long func;
unsigned long long calltime;
unsigned long long subtime;
unsigned long fp;
};
/*
......@@ -372,7 +373,8 @@ struct ftrace_ret_stack {
extern void return_to_handler(void);
extern int
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
unsigned long frame_pointer);
/*
* Sometimes we don't want to trace a function with the function
......
......@@ -3,6 +3,8 @@
#include <linux/fs.h>
#include <asm/page.h>
/*
* Trace sequences are used to allow a function to call several other functions
* to create a string of data to use (up to a max of PAGE_SIZE.
......
......@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER
config HAVE_FUNCTION_GRAPH_TRACER
bool
config HAVE_FUNCTION_GRAPH_FP_TEST
bool
help
An arch may pass in a unique value (frame pointer) to both the
entering and exiting of a function. On exit, the value is compared
and if it does not match, then it will panic the kernel.
config HAVE_FUNCTION_TRACE_MCOUNT_TEST
bool
help
......@@ -121,6 +128,7 @@ config FUNCTION_GRAPH_TRACER
bool "Kernel Function Graph Tracer"
depends on HAVE_FUNCTION_GRAPH_TRACER
depends on FUNCTION_TRACER
depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
default y
help
Enable the kernel to trace a function at both its return
......
......@@ -1224,6 +1224,13 @@ static void ftrace_shutdown(int command)
return;
ftrace_start_up--;
/*
* Just warn in case of unbalance, no need to kill ftrace, it's not
* critical but the ftrace_call callers may be never nopped again after
* further ftrace uses.
*/
WARN_ON_ONCE(ftrace_start_up < 0);
if (!ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
......
......@@ -186,7 +186,7 @@ static int kmem_trace_init(struct trace_array *tr)
int cpu;
kmemtrace_array = tr;
for_each_cpu_mask(cpu, cpu_possible_map)
for_each_cpu(cpu, cpu_possible_mask)
tracing_reset(tr, cpu);
kmemtrace_start_probes();
......
This diff is collapsed.
......@@ -102,8 +102,10 @@ static enum event_status read_page(int cpu)
event = (void *)&rpage->data[i];
switch (event->type_len) {
case RINGBUF_TYPE_PADDING:
/* We don't expect any padding */
/* failed writes may be discarded events */
if (!event->time_delta)
KILL_TEST();
inc = event->array[0] + 4;
break;
case RINGBUF_TYPE_TIME_EXTEND:
inc = 8;
......@@ -119,7 +121,7 @@ static enum event_status read_page(int cpu)
KILL_TEST();
break;
}
inc = event->array[0];
inc = event->array[0] + 4;
break;
default:
entry = ring_buffer_event_data(event);
......@@ -201,7 +203,7 @@ static void ring_buffer_producer(void)
* Hammer the buffer for 10 secs (this may
* make the system stall)
*/
pr_info("Starting ring buffer hammer\n");
trace_printk("Starting ring buffer hammer\n");
do_gettimeofday(&start_tv);
do {
struct ring_buffer_event *event;
......@@ -237,7 +239,7 @@ static void ring_buffer_producer(void)
#endif
} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
pr_info("End ring buffer hammer\n");
trace_printk("End ring buffer hammer\n");
if (consumer) {
/* Init both completions here to avoid races */
......@@ -260,49 +262,50 @@ static void ring_buffer_producer(void)
overruns = ring_buffer_overruns(buffer);
if (kill_test)
pr_info("ERROR!\n");
pr_info("Time: %lld (usecs)\n", time);
pr_info("Overruns: %lld\n", overruns);
trace_printk("ERROR!\n");
trace_printk("Time: %lld (usecs)\n", time);
trace_printk("Overruns: %lld\n", overruns);
if (disable_reader)
pr_info("Read: (reader disabled)\n");
trace_printk("Read: (reader disabled)\n");
else
pr_info("Read: %ld (by %s)\n", read,
trace_printk("Read: %ld (by %s)\n", read,
read_events ? "events" : "pages");
pr_info("Entries: %lld\n", entries);
pr_info("Total: %lld\n", entries + overruns + read);
pr_info("Missed: %ld\n", missed);
pr_info("Hit: %ld\n", hit);
trace_printk("Entries: %lld\n", entries);
trace_printk("Total: %lld\n", entries + overruns + read);
trace_printk("Missed: %ld\n", missed);
trace_printk("Hit: %ld\n", hit);
/* Convert time from usecs to millisecs */
do_div(time, USEC_PER_MSEC);
if (time)
hit /= (long)time;
else
pr_info("TIME IS ZERO??\n");
trace_printk("TIME IS ZERO??\n");
pr_info("Entries per millisec: %ld\n", hit);
trace_printk("Entries per millisec: %ld\n", hit);
if (hit) {
/* Calculate the average time in nanosecs */
avg = NSEC_PER_MSEC / hit;
pr_info("%ld ns per entry\n", avg);
trace_printk("%ld ns per entry\n", avg);
}
if (missed) {
if (time)
missed /= (long)time;
pr_info("Total iterations per millisec: %ld\n", hit + missed);
trace_printk("Total iterations per millisec: %ld\n",
hit + missed);
/* it is possible that hit + missed will overflow and be zero */
if (!(hit + missed)) {
pr_info("hit + missed overflowed and totalled zero!\n");
trace_printk("hit + missed overflowed and totalled zero!\n");
hit--; /* make it non zero */
}
/* Caculate the average time in nanosecs */
avg = NSEC_PER_MSEC / (hit + missed);
pr_info("%ld ns per entry\n", avg);
trace_printk("%ld ns per entry\n", avg);
}
}
......@@ -353,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg)
ring_buffer_producer();
pr_info("Sleeping for 10 secs\n");
trace_printk("Sleeping for 10 secs\n");
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ * SLEEP_TIME);
__set_current_state(TASK_RUNNING);
......
......@@ -2191,11 +2191,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
return -ENOMEM;
mutex_lock(&tracing_cpumask_update_lock);
err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
if (err)
goto err_unlock;
mutex_lock(&tracing_cpumask_update_lock);
local_irq_disable();
__raw_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
......@@ -2223,8 +2224,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
return count;
err_unlock:
mutex_unlock(&tracing_cpumask_update_lock);
free_cpumask_var(tracing_cpumask);
free_cpumask_var(tracing_cpumask_new);
return err;
}
......@@ -3626,7 +3626,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
struct trace_seq *s;
unsigned long cnt;
s = kmalloc(sizeof(*s), GFP_ATOMIC);
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return ENOMEM;
......
......@@ -27,8 +27,6 @@
#include "trace.h"
#include "trace_output.h"
static DEFINE_MUTEX(filter_mutex);
enum filter_op_ids
{
OP_OR,
......@@ -178,7 +176,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
static int filter_pred_strloc(struct filter_pred *pred, void *event,
int val1, int val2)
{
int str_loc = *(int *)(event + pred->offset);
unsigned short str_loc = *(unsigned short *)(event + pred->offset);
char *addr = (char *)(event + str_loc);
int cmp, match;
......@@ -294,12 +292,12 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
{
struct event_filter *filter = call->filter;
mutex_lock(&filter_mutex);
mutex_lock(&event_mutex);
if (filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_printf(s, "none\n");
mutex_unlock(&filter_mutex);
mutex_unlock(&event_mutex);
}
void print_subsystem_event_filter(struct event_subsystem *system,
......@@ -307,12 +305,12 @@ void print_subsystem_event_filter(struct event_subsystem *system,
{
struct event_filter *filter = system->filter;
mutex_lock(&filter_mutex);
mutex_lock(&event_mutex);
if (filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_printf(s, "none\n");
mutex_unlock(&filter_mutex);
mutex_unlock(&event_mutex);
}
static struct ftrace_event_field *
......@@ -381,6 +379,7 @@ void destroy_preds(struct ftrace_event_call *call)
filter_free_pred(filter->preds[i]);
}
kfree(filter->preds);
kfree(filter->filter_string);
kfree(filter);
call->filter = NULL;
}
......@@ -433,7 +432,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
filter->n_preds = 0;
}
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
if (!call->define_fields)
continue;
......@@ -443,7 +441,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
remove_filter_string(call->filter);
}
}
mutex_unlock(&event_mutex);
}
static int filter_add_pred_fn(struct filter_parse_state *ps,
......@@ -546,6 +543,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
filter_pred_fn_t fn;
unsigned long long val;
int string_type;
int ret;
pred->fn = filter_pred_none;
......@@ -581,7 +579,11 @@ static int filter_add_pred(struct filter_parse_state *ps,
pred->not = 1;
return filter_add_pred_fn(ps, call, pred, fn);
} else {
if (strict_strtoull(pred->str_val, 0, &val)) {
if (field->is_signed)
ret = strict_strtoll(pred->str_val, 0, &val);
else
ret = strict_strtoull(pred->str_val, 0, &val);
if (ret) {
parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
return -EINVAL;
}
......@@ -625,7 +627,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
filter->preds[filter->n_preds] = pred;
filter->n_preds++;
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
if (!call->define_fields)
......@@ -636,14 +637,12 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
err = filter_add_pred(ps, call, pred);
if (err) {
mutex_unlock(&event_mutex);
filter_free_subsystem_preds(system);
parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
goto out;
}
replace_filter_string(call->filter, filter_string);
}
mutex_unlock(&event_mutex);
out:
return err;
}
......@@ -1070,12 +1069,12 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
struct filter_parse_state *ps;
mutex_lock(&filter_mutex);
mutex_lock(&event_mutex);
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable_preds(call);
remove_filter_string(call->filter);
mutex_unlock(&filter_mutex);
mutex_unlock(&event_mutex);
return 0;
}
......@@ -1103,7 +1102,7 @@ out:
postfix_clear(ps);
kfree(ps);
out_unlock:
mutex_unlock(&filter_mutex);
mutex_unlock(&event_mutex);
return err;
}
......@@ -1115,12 +1114,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
struct filter_parse_state *ps;
mutex_lock(&filter_mutex);
mutex_lock(&event_mutex);
if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(system);
remove_filter_string(system->filter);
mutex_unlock(&filter_mutex);
mutex_unlock(&event_mutex);
return 0;
}
......@@ -1148,7 +1147,7 @@ out:
postfix_clear(ps);
kfree(ps);
out_unlock:
mutex_unlock(&filter_mutex);
mutex_unlock(&event_mutex);
return err;
}
......
......@@ -193,8 +193,10 @@ static void tracing_start_function_trace(void)
static void tracing_stop_function_trace(void)
{
ftrace_function_enabled = 0;
/* OK if they are not registered */
if (func_flags.val & TRACE_FUNC_OPT_STACK)
unregister_ftrace_function(&trace_stack_ops);
else
unregister_ftrace_function(&trace_ops);
}
......
......@@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = {
/* Add a function return address to the trace stack on thread info.*/
int
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
unsigned long frame_pointer)
{
unsigned long long calltime;
int index;
......@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
current->ret_stack[index].func = func;
current->ret_stack[index].calltime = calltime;
current->ret_stack[index].subtime = 0;
current->ret_stack[index].fp = frame_pointer;
*depth = index;
return 0;
......@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
/* Retrieve a function return address to the trace stack on thread info.*/
static void
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
unsigned long frame_pointer)
{
int index;
......@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
return;
}
#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
/*
* The arch may choose to record the frame pointer used
* and check it here to make sure that it is what we expect it
* to be. If gcc does not set the place holder of the return
* address in the frame pointer, and does a copy instead, then
* the function graph trace will fail. This test detects this
* case.
*
* Currently, x86_32 with optimize for size (-Os) makes the latest
* gcc do the above.
*/
if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
ftrace_graph_stop();
WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
" from func %pF return to %lx\n",
current->ret_stack[index].fp,
frame_pointer,
(void *)current->ret_stack[index].func,
current->ret_stack[index].ret);
*ret = (unsigned long)panic;
return;
}
#endif
*ret = current->ret_stack[index].ret;
trace->func = current->ret_stack[index].func;
trace->calltime = current->ret_stack[index].calltime;
......@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
* Send the trace to the ring-buffer.
* @return the original return address.
*/
unsigned long ftrace_return_to_handler(void)
unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
{
struct ftrace_graph_ret trace;
unsigned long ret;
ftrace_pop_return_trace(&trace, &ret);
ftrace_pop_return_trace(&trace, &ret, frame_pointer);
trace.rettime = trace_clock_local();
ftrace_graph_return(&trace);
barrier();
......
# builds the trace events example kernel modules;
# then to use one (as root): insmod <module_name.ko>
# If you include a trace header outside of include/trace/events
# then the file that does the #define CREATE_TRACE_POINTS must
# have that tracer file in its main search path. This is because
# define_trace.h will include it, and must be able to find it from
# the include/trace directory.
#
# Here trace-events-sample.c does the CREATE_TRACE_POINTS.
#
CFLAGS_trace-events-sample.o := -I$(src)
obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
......@@ -19,16 +19,21 @@
* If TRACE_SYSTEM is defined, that will be the directory created
* in the ftrace directory under /debugfs/tracing/events/<system>
*
* The define_trace.h belowe will also look for a file name of
* The define_trace.h below will also look for a file name of
* TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
* In this case, it would look for sample.h
*
* If you want a different system than file name, you can override
* the header name by defining TRACE_INCLUDE_FILE
* If the header name will be different than the system name
* (as in this case), then you can override the header name that
* define_trace.h will look up by defining TRACE_INCLUDE_FILE
*
* If this file was called, goofy.h, then we would define:
* This file is called trace-events-sample.h but we want the system
* to be called "sample". Therefore we must define the name of this
* file:
*
* #define TRACE_INCLUDE_FILE goofy
* #define TRACE_INCLUDE_FILE trace-events-sample
*
* As we do an the bottom of this file.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM sample
......@@ -99,13 +104,13 @@ TRACE_EVENT(foo_bar,
*
* #define TRACE_INCLUDE_PATH ../../samples/trace_events
*
* But I chose to simply make it use the current directory and then in
* the Makefile I added:
* But the safest and easiest way to simply make it use the directory
* that the file is in is to add in the Makefile:
*
* CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
* CFLAGS_trace-events-sample.o := -I$(src)
*
* This will make sure the current path is part of the include
* structure for our file so that we can find it.
* structure for our file so that define_trace.h can find it.
*
* I could have made only the top level directory the include:
*
......@@ -115,8 +120,8 @@ TRACE_EVENT(foo_bar,
*
* #define TRACE_INCLUDE_PATH samples/trace_events
*
* But then if something defines "samples" or "trace_events" then we
* could risk that being converted too, and give us an unexpected
* But then if something defines "samples" or "trace_events" as a macro
* then we could risk that being converted too, and give us an unexpected
* result.
*/
#undef TRACE_INCLUDE_PATH
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment