Commit 5622f295 authored by Markus Metzger's avatar Markus Metzger Committed by Ingo Molnar

x86, perf_counter, bts: Optimize BTS overflow handling

Draining the BTS buffer on a buffer overflow interrupt takes too
long resulting in a kernel lockup when tracing the kernel.

Restructure perf_counter sampling into sample creation and sample
output.

Prepare a single reference sample for BTS sampling and update the
from and to address fields when draining the BTS buffer. Drain the
entire BTS buffer between a single perf_output_begin() /
perf_output_end() pair.
Signed-off-by: default avatarMarkus Metzger <markus.t.metzger@intel.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090915130023.A16204@sedona.ch.intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 4b77a729
......@@ -36,10 +36,10 @@ static u64 perf_counter_mask __read_mostly;
#define BTS_RECORD_SIZE 24
/* The size of a per-cpu BTS buffer in bytes: */
#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024)
#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)
/* The BTS overflow threshold in bytes from the end of the buffer: */
#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64)
#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)
/*
......@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
local_irq_restore(flags);
}
static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
struct perf_sample_data *data)
static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
{
struct debug_store *ds = cpuc->ds;
struct bts_record {
......@@ -1498,8 +1497,11 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
u64 flags;
};
struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
unsigned long orig_ip = data->regs->ip;
struct bts_record *at, *top;
struct perf_output_handle handle;
struct perf_event_header header;
struct perf_sample_data data;
struct pt_regs regs;
if (!counter)
return;
......@@ -1510,19 +1512,38 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
top = (struct bts_record *)(unsigned long)ds->bts_index;
if (top <= at)
return;
ds->bts_index = ds->bts_buffer_base;
data.period = counter->hw.last_period;
data.addr = 0;
regs.ip = 0;
/*
* Prepare a generic sample, i.e. fill in the invariant fields.
* We will overwrite the from and to address before we output
* the sample.
*/
perf_prepare_sample(&header, &data, counter, &regs);
if (perf_output_begin(&handle, counter,
header.size * (top - at), 1, 1))
return;
for (; at < top; at++) {
data->regs->ip = at->from;
data->addr = at->to;
data.ip = at->from;
data.addr = at->to;
perf_counter_output(counter, 1, data);
perf_output_sample(&handle, &header, &data, counter);
}
data->regs->ip = orig_ip;
data->addr = 0;
perf_output_end(&handle);
/* There's new data available. */
counter->hw.interrupts++;
counter->pending_kill = POLL_IN;
}
......@@ -1552,13 +1573,9 @@ static void x86_pmu_disable(struct perf_counter *counter)
x86_perf_counter_update(counter, hwc, idx);
/* Drain the remaining BTS records. */
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
struct perf_sample_data data;
struct pt_regs regs;
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
intel_pmu_drain_bts_buffer(cpuc);
data.regs = &regs;
intel_pmu_drain_bts_buffer(cpuc, &data);
}
cpuc->counters[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
......@@ -1619,7 +1636,6 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
data.regs = regs;
data.addr = 0;
cpuc = &__get_cpu_var(cpu_hw_counters);
......@@ -1644,7 +1660,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
if (!x86_perf_counter_set_period(counter, hwc, idx))
continue;
if (perf_counter_overflow(counter, 1, &data))
if (perf_counter_overflow(counter, 1, &data, regs))
p6_pmu_disable_counter(hwc, idx);
}
......@@ -1665,13 +1681,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
int bit, loops;
u64 ack, status;
data.regs = regs;
data.addr = 0;
cpuc = &__get_cpu_var(cpu_hw_counters);
perf_disable();
intel_pmu_drain_bts_buffer(cpuc, &data);
intel_pmu_drain_bts_buffer(cpuc);
status = intel_pmu_get_status();
if (!status) {
perf_enable();
......@@ -1702,7 +1717,7 @@ again:
data.period = counter->hw.last_period;
if (perf_counter_overflow(counter, 1, &data))
if (perf_counter_overflow(counter, 1, &data, regs))
intel_pmu_disable_counter(&counter->hw, bit);
}
......@@ -1729,7 +1744,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
data.regs = regs;
data.addr = 0;
cpuc = &__get_cpu_var(cpu_hw_counters);
......@@ -1754,7 +1768,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
if (!x86_perf_counter_set_period(counter, hwc, idx))
continue;
if (perf_counter_overflow(counter, 1, &data))
if (perf_counter_overflow(counter, 1, &data, regs))
amd_pmu_disable_counter(hwc, idx);
}
......
......@@ -691,6 +691,17 @@ struct perf_cpu_context {
int recursion[4];
};
struct perf_output_handle {
struct perf_counter *counter;
struct perf_mmap_data *data;
unsigned long head;
unsigned long offset;
int nmi;
int sample;
int locked;
unsigned long flags;
};
#ifdef CONFIG_PERF_COUNTERS
/*
......@@ -722,16 +733,38 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
extern void perf_counter_update_userpage(struct perf_counter *counter);
struct perf_sample_data {
struct pt_regs *regs;
u64 type;
u64 ip;
struct {
u32 pid;
u32 tid;
} tid_entry;
u64 time;
u64 addr;
u64 id;
u64 stream_id;
struct {
u32 cpu;
u32 reserved;
} cpu_entry;
u64 period;
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
};
extern void perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_counter *counter);
extern void perf_prepare_sample(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_counter *counter,
struct pt_regs *regs);
extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
extern void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
struct perf_sample_data *data,
struct pt_regs *regs);
/*
* Return 1 for a software counter, 0 for a hardware counter
......@@ -781,6 +814,12 @@ extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
#define perf_instruction_pointer(regs) instruction_pointer(regs)
#endif
extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_counter *counter, unsigned int size,
int nmi, int sample);
extern void perf_output_end(struct perf_output_handle *handle);
extern void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len);
#else
static inline void
perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
......@@ -807,7 +846,28 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { }
static inline void perf_counter_comm(struct task_struct *tsk) { }
static inline void perf_counter_fork(struct task_struct *tsk) { }
static inline void perf_counter_init(void) { }
static inline int
perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
unsigned int size, int nmi, int sample) { }
static inline void perf_output_end(struct perf_output_handle *handle) { }
static inline void
perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len) { }
static inline void
perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_counter *counter) { }
static inline void
perf_prepare_sample(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_counter *counter,
struct pt_regs *regs) { }
#endif
#define perf_output_put(handle, x) \
perf_output_copy((handle), &(x), sizeof(x))
#endif /* __KERNEL__ */
#endif /* _LINUX_PERF_COUNTER_H */
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment