Commit 96200591 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'tracing/hw-breakpoints' into perf/core

Conflicts:
	arch/x86/kernel/kprobes.c
	kernel/trace/Makefile

Merge reason: hw-breakpoints perf integration is looking
              good in testing and in reviews, plus conflicts
              are mounting up - so merge & resolve.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents 7031281e 68efa37d
......@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG
config HAVE_DEFAULT_NO_SPIN_MUTEXES
bool
config HAVE_HW_BREAKPOINT
bool
depends on HAVE_PERF_EVENTS
select ANON_INODES
select PERF_EVENTS
source "kernel/gcov/Kconfig"
......@@ -49,6 +49,7 @@ config X86
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
select HAVE_HW_BREAKPOINT
select HAVE_ARCH_KMEMCHECK
config OUTPUT_FORMAT
......
......@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
header-y += sigcontext32.h
header-y += ucontext.h
header-y += processor-flags.h
header-y += hw_breakpoint.h
unifdef-y += e820.h
unifdef-y += ist.h
......
......@@ -17,6 +17,7 @@
#include <linux/user.h>
#include <linux/elfcore.h>
#include <asm/debugreg.h>
/*
* fill in the user structure for an a.out core dump
......@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
>> PAGE_SHIFT;
dump->u_dsize -= dump->u_tsize;
dump->u_ssize = 0;
dump->u_debugreg[0] = current->thread.debugreg0;
dump->u_debugreg[1] = current->thread.debugreg1;
dump->u_debugreg[2] = current->thread.debugreg2;
dump->u_debugreg[3] = current->thread.debugreg3;
dump->u_debugreg[4] = 0;
dump->u_debugreg[5] = 0;
dump->u_debugreg[6] = current->thread.debugreg6;
dump->u_debugreg[7] = current->thread.debugreg7;
aout_dump_debugregs(dump);
if (dump->start_stack < TASK_SIZE)
dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
......
......@@ -18,6 +18,7 @@
#define DR_TRAP1 (0x2) /* db1 */
#define DR_TRAP2 (0x4) /* db2 */
#define DR_TRAP3 (0x8) /* db3 */
#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
#define DR_STEP (0x4000) /* single-step */
#define DR_SWITCH (0x8000) /* task switch */
......@@ -49,6 +50,8 @@
#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
......@@ -67,4 +70,34 @@
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
/*
* HW breakpoint additions
*/
#ifdef __KERNEL__
DECLARE_PER_CPU(unsigned long, dr7);
static inline void hw_breakpoint_disable(void)
{
/* Zero the control register for HW Breakpoint */
set_debugreg(0UL, 7);
/* Zero-out the individual HW breakpoint address registers */
set_debugreg(0UL, 0);
set_debugreg(0UL, 1);
set_debugreg(0UL, 2);
set_debugreg(0UL, 3);
}
static inline int hw_breakpoint_active(void)
{
return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK;
}
extern void aout_dump_debugregs(struct user *dump);
extern void hw_breakpoint_restore(void);
#endif /* __KERNEL__ */
#endif /* _ASM_X86_DEBUGREG_H */
#ifndef _I386_HW_BREAKPOINT_H
#define _I386_HW_BREAKPOINT_H
#ifdef __KERNEL__
#define __ARCH_HW_BREAKPOINT_H
/*
* The name should probably be something dealt in
* a higher level. While dealing with the user
* (display/resolving)
*/
struct arch_hw_breakpoint {
char *name; /* Contains name of the symbol to set bkpt */
unsigned long address;
u8 len;
u8 type;
};
#include <linux/kdebug.h>
#include <linux/percpu.h>
#include <linux/list.h>
/* Available HW breakpoint length encodings */
#define X86_BREAKPOINT_LEN_1 0x40
#define X86_BREAKPOINT_LEN_2 0x44
#define X86_BREAKPOINT_LEN_4 0x4c
#define X86_BREAKPOINT_LEN_EXECUTE 0x40
#ifdef CONFIG_X86_64
#define X86_BREAKPOINT_LEN_8 0x48
#endif
/* Available HW breakpoint type encodings */
/* trigger on instruction execute */
#define X86_BREAKPOINT_EXECUTE 0x80
/* trigger on memory write */
#define X86_BREAKPOINT_WRITE 0x81
/* trigger on memory read or write */
#define X86_BREAKPOINT_RW 0x83
/* Total number of available HW breakpoint registers */
#define HBP_NUM 4
struct perf_event;
struct pmu;
extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
struct task_struct *tsk);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
int arch_install_hw_breakpoint(struct perf_event *bp);
void arch_uninstall_hw_breakpoint(struct perf_event *bp);
void hw_breakpoint_pmu_read(struct perf_event *bp);
void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
extern void
arch_fill_perf_breakpoint(struct perf_event *bp);
unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
extern int arch_bp_generic_fields(int x86_len, int x86_type,
int *gen_len, int *gen_type);
extern struct pmu perf_ops_bp;
#endif /* __KERNEL__ */
#endif /* _I386_HW_BREAKPOINT_H */
......@@ -30,6 +30,7 @@ struct mm_struct;
#include <linux/math64.h>
#include <linux/init.h>
#define HBP_NUM 4
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
......@@ -422,6 +423,8 @@ extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
struct perf_event;
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
......@@ -443,13 +446,10 @@ struct thread_struct {
unsigned long fs;
#endif
unsigned long gs;
/* Hardware debugging registers: */
unsigned long debugreg0;
unsigned long debugreg1;
unsigned long debugreg2;
unsigned long debugreg3;
unsigned long debugreg6;
unsigned long debugreg7;
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
unsigned long debugreg6;
/* Fault info: */
unsigned long cr2;
unsigned long trap_no;
......
......@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o
obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
......
This diff is collapsed.
......@@ -43,6 +43,7 @@
#include <linux/smp.h>
#include <linux/nmi.h>
#include <asm/debugreg.h>
#include <asm/apicdef.h>
#include <asm/system.h>
......@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
"resuming...\n");
kgdb_arch_handle_exception(args->trapnr, args->signr,
args->err, "c", "", regs);
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
return NOTIFY_STOP;
}
......
......@@ -56,6 +56,7 @@
#include <asm/uaccess.h>
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/debugreg.h>
void jprobe_return_end(void);
......@@ -945,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
if (post_kprobe_handler(args->regs))
if (post_kprobe_handler(args->regs)) {
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
}
break;
case DIE_GPF:
/*
......
......@@ -25,6 +25,7 @@
#include <asm/desc.h>
#include <asm/system.h>
#include <asm/cacheflush.h>
#include <asm/debugreg.h>
static void set_idt(void *newidt, __u16 limit)
{
......@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
......
......@@ -18,6 +18,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/debugreg.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
......@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
......
......@@ -10,6 +10,7 @@
#include <linux/clockchips.h>
#include <linux/random.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
......@@ -17,6 +18,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
#include <asm/debugreg.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
......@@ -103,14 +105,7 @@ void flush_thread(void)
}
#endif
clear_tsk_thread_flag(tsk, TIF_DEBUG);
tsk->thread.debugreg0 = 0;
tsk->thread.debugreg1 = 0;
tsk->thread.debugreg2 = 0;
tsk->thread.debugreg3 = 0;
tsk->thread.debugreg6 = 0;
tsk->thread.debugreg7 = 0;
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
......@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
set_debugreg(next->debugreg0, 0);
set_debugreg(next->debugreg1, 1);
set_debugreg(next->debugreg2, 2);
set_debugreg(next->debugreg3, 3);
/* no 4 and 5 */
set_debugreg(next->debugreg6, 6);
set_debugreg(next->debugreg7, 7);
}
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
......
......@@ -58,6 +58,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
#include <asm/debugreg.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
......@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
task_user_gs(p) = get_user_gs(regs);
p->thread.io_bitmap_ptr = NULL;
tsk = current;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
......
......@@ -52,6 +52,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
#include <asm/debugreg.h>
asmlinkage extern void ret_from_fork(void);
......@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
......@@ -341,6 +346,7 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
return err;
}
......@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
if (preload_fpu)
__math_state_restore();
return prev_p;
}
......
......@@ -22,6 +22,8 @@
#include <linux/seccomp.h>
#include <linux/signal.h>
#include <linux/workqueue.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -34,6 +36,7 @@
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
#include <asm/hw_breakpoint.h>
#include "tls.h"
......@@ -249,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
return 0;
}
static unsigned long debugreg_addr_limit(struct task_struct *task)
{
return TASK_SIZE - 3;
}
#else /* CONFIG_X86_64 */
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
......@@ -378,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
return 0;
}
static unsigned long debugreg_addr_limit(struct task_struct *task)
{
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
return IA32_PAGE_OFFSET - 3;
#endif
return TASK_SIZE_MAX - 7;
}
#endif /* CONFIG_X86_32 */
static unsigned long get_flags(struct task_struct *task)
......@@ -566,98 +555,228 @@ static int genregs_set(struct task_struct *target,
return ret;
}
static void ptrace_triggered(struct perf_event *bp, void *data)
{
int i;
struct thread_struct *thread = &(current->thread);
/*
* Store in the virtual DR6 register the fact that the breakpoint
* was hit so the thread's debugger will see it.
*/
for (i = 0; i < HBP_NUM; i++) {
if (thread->ptrace_bps[i] == bp)
break;
}
thread->debugreg6 |= (DR_TRAP0 << i);
}
/*
* This function is trivial and will be inlined by the compiler.
* Having it separates the implementation details of debug
* registers from the interface details of ptrace.
* Walk through every ptrace breakpoints for this thread and
* build the dr7 value on top of their attributes.
*
*/
static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
static unsigned long ptrace_get_dr7(struct perf_event *bp[])
{
switch (n) {
case 0: return child->thread.debugreg0;
case 1: return child->thread.debugreg1;
case 2: return child->thread.debugreg2;
case 3: return child->thread.debugreg3;
case 6: return child->thread.debugreg6;
case 7: return child->thread.debugreg7;
int i;
int dr7 = 0;
struct arch_hw_breakpoint *info;
for (i = 0; i < HBP_NUM; i++) {
if (bp[i] && !bp[i]->attr.disabled) {
info = counter_arch_bp(bp[i]);
dr7 |= encode_dr7(i, info->len, info->type);
}
}
return 0;
return dr7;
}
static int ptrace_set_debugreg(struct task_struct *child,
int n, unsigned long data)
/*
* Handle ptrace writes to debug register 7.
*/
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
{
int i;
struct thread_struct *thread = &(tsk->thread);
unsigned long old_dr7;
int i, orig_ret = 0, rc = 0;
int enabled, second_pass = 0;
unsigned len, type;
int gen_len, gen_type;
struct perf_event *bp;
data &= ~DR_CONTROL_RESERVED;
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
restore:
/*
* Loop through all the hardware breakpoints, making the
* appropriate changes to each.
*/
for (i = 0; i < HBP_NUM; i++) {
enabled = decode_dr7(data, i, &len, &type);
bp = thread->ptrace_bps[i];
if (!enabled) {
if (bp) {
/*
* Don't unregister the breakpoints right-away,
* unless all register_user_hw_breakpoint()
* requests have succeeded. This prevents
* any window of opportunity for debug
* register grabbing by other users.
*/
if (!second_pass)
continue;
thread->ptrace_bps[i] = NULL;
unregister_hw_breakpoint(bp);
}
continue;
}
if (unlikely(n == 4 || n == 5))
return -EIO;
/*
* We shoud have at least an inactive breakpoint at this
* slot. It means the user is writing dr7 without having
* written the address register first
*/
if (!bp) {
rc = -EINVAL;
break;
}
if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
return -EIO;
rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
if (rc)
break;
switch (n) {
case 0: child->thread.debugreg0 = data; break;
case 1: child->thread.debugreg1 = data; break;
case 2: child->thread.debugreg2 = data; break;
case 3: child->thread.debugreg3 = data; break;
/*
* This is a temporary thing as bp is unregistered/registered
* to simulate modification
*/
bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
gen_type, bp->callback,
tsk, true);
thread->ptrace_bps[i] = NULL;
case 6:
if ((data & ~0xffffffffUL) != 0)
return -EIO;
child->thread.debugreg6 = data;
break;
if (!bp) { /* incorrect bp, or we have a bug in bp API */
rc = -EINVAL;
break;
}
if (IS_ERR(bp)) {
rc = PTR_ERR(bp);
bp = NULL;
break;
}
thread->ptrace_bps[i] = bp;
}
/*
* Make a second pass to free the remaining unused breakpoints
* or to restore the original breakpoints if an error occurred.
*/
if (!second_pass) {
second_pass = 1;
if (rc < 0) {
orig_ret = rc;
data = old_dr7;
}
goto restore;
}
return ((orig_ret < 0) ? orig_ret : rc);
}
case 7:
/*
* Handle PTRACE_PEEKUSR calls for the debug register area.
*/
static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
{
struct thread_struct *thread = &(tsk->thread);
unsigned long val = 0;
if (n < HBP_NUM) {
struct perf_event *bp;
bp = thread->ptrace_bps[n];
if (!bp)
return 0;
val = bp->hw.info.address;
} else if (n == 6) {
val = thread->debugreg6;
} else if (n == 7) {
val = ptrace_get_dr7(thread->ptrace_bps);
}
return val;
}
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
unsigned long addr)
{
struct perf_event *bp;
struct thread_struct *t = &tsk->thread;
if (!t->ptrace_bps[nr]) {
/*
* Sanity-check data. Take one half-byte at once with
* check = (val >> (16 + 4*i)) & 0xf. It contains the
* R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
* 2 and 3 are LENi. Given a list of invalid values,
* we do mask |= 1 << invalid_value, so that
* (mask >> check) & 1 is a correct test for invalid
* values.
*
* R/Wi contains the type of the breakpoint /
* watchpoint, LENi contains the length of the watched
* data in the watchpoint case.
*
* The invalid values are:
* - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
* - R/Wi == 0x10 (break on I/O reads or writes), so
* mask |= 0x4444.
* - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
* 0x1110.
*
* Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
*
* See the Intel Manual "System Programming Guide",
* 15.2.4
*
* Note that LENi == 0x10 is defined on x86_64 in long
* mode (i.e. even for 32-bit userspace software, but
* 64-bit kernel), so the x86_64 mask value is 0x5454.
* See the AMD manual no. 24593 (AMD64 System Programming)
* Put stub len and type to register (reserve) an inactive but
* correct bp
*/
#ifdef CONFIG_X86_32
#define DR7_MASK 0x5f54
#else
#define DR7_MASK 0x5554
#endif
data &= ~DR_CONTROL_RESERVED;
for (i = 0; i < 4; i++)
if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
return -EIO;
child->thread.debugreg7 = data;
if (data)
set_tsk_thread_flag(child, TIF_DEBUG);
else
clear_tsk_thread_flag(child, TIF_DEBUG);
break;
bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
HW_BREAKPOINT_W,
ptrace_triggered, tsk,
false);
} else {
bp = t->ptrace_bps[nr];
t->ptrace_bps[nr] = NULL;
bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
bp->attr.bp_type,
bp->callback,
tsk,
bp->attr.disabled);
}
if (!bp)
return -EIO;
/*
* CHECKME: the previous code returned -EIO if the addr wasn't a
* valid task virtual addr. The new one will return -EINVAL in this
* case.
* -EINVAL may be what we want for in-kernel breakpoints users, but
* -EIO looks better for ptrace, since we refuse a register writing
* for the user. And anyway this is the previous behaviour.
*/
if (IS_ERR(bp))
return PTR_ERR(bp);
t->ptrace_bps[nr] = bp;
return 0;
}
/*
* Handle PTRACE_POKEUSR calls for the debug register area.
*/
int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
{
struct thread_struct *thread = &(tsk->thread);
int rc = 0;
/* There are no DR4 or DR5 registers */
if (n == 4 || n == 5)
return -EIO;
if (n == 6) {
thread->debugreg6 = val;
goto ret_path;
}
if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val);
if (rc)
return rc;
}
/* All that's left is DR7 */
if (n == 7)
rc = ptrace_write_dr7(tsk, val);
ret_path:
return rc;
}
/*
* These access the current or another (stopped) task's io permission
* bitmap for debugging or core dump.
......
......@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/*
* Re-enable any watchpoints before delivering the
* signal to user space. The processor register will
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
if (current->thread.debugreg7)
set_debugreg(current->thread.debugreg7, 7);
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
......
......@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
unsigned long condition;
unsigned long dr6;
int si_code;
get_debugreg(condition, 6);
get_debugreg(dr6, 6);
/* Catch kmemcheck conditions first of all! */
if (condition & DR_STEP && kmemcheck_trap(regs))
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
/* DR6 may or may not be cleared by the CPU */
set_debugreg(0, 6);
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
tsk->thread.debugctlmsr = 0;
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP)
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
if (!tsk->thread.debugreg7)
goto clear_dr7;
if (regs->flags & X86_VM_MASK) {
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
return;
}
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK)
goto debug_vm86;
#endif
/* Save debug status register where ptrace can see it */
tsk->thread.debugreg6 = condition;
/*
* Single-stepping through TF: make sure we ignore any events in
* kernel space (but re-enable TF when returning to user mode).
* Single-stepping through system calls: ignore any exceptions in
* kernel space, but re-enable TF when returning to user mode.
*
* We already checked v86 mode above, so we can check for kernel mode
* by just checking the CPL of CS.
*/
if (condition & DR_STEP) {
if (!user_mode(regs))
goto clear_TF_reenable;
if ((dr6 & DR_STEP) && !user_mode(regs)) {
tsk->thread.debugreg6 &= ~DR_STEP;
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
}
si_code = get_si_code(condition);
/* Ok, finally something we can handle */
send_sigtrap(tsk, regs, error_code, si_code);
/*
* Disable additional traps. They'll be re-enabled when
* the signal is delivered.
*/
clear_dr7:
set_debugreg(0, 7);
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
return;
#ifdef CONFIG_X86_32
debug_vm86:
/* reenable preemption: handle_vm86_trap() might sleep */
dec_preempt_count();
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
conditional_cli(regs);
return;
#endif
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
preempt_conditional_cli(regs);
return;
}
......
......@@ -42,6 +42,7 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
#include <asm/debugreg.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
#include <asm/desc.h>
......@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
trace_kvm_entry(vcpu->vcpu_id);
kvm_x86_ops->run(vcpu, kvm_run);
if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
set_debugreg(current->thread.debugreg0, 0);
set_debugreg(current->thread.debugreg1, 1);
set_debugreg(current->thread.debugreg2, 2);
set_debugreg(current->thread.debugreg3, 3);
set_debugreg(current->thread.debugreg6, 6);
set_debugreg(current->thread.debugreg7, 7);
}
/*
* If the guest has used debug registers, at least dr7
* will be disabled while returning to the host.
* If we don't have active breakpoints in the host, we don't
* care about the messed up debug address registers. But if
* we have some of them active, restore the old state.
*/
if (hw_breakpoint_active())
hw_breakpoint_restore();
set_bit(KVM_REQ_KICK, &vcpu->requests);
local_irq_enable();
......
......@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
struct die_args *arg = args;
if (val == DIE_DEBUG && (arg->err & DR_STEP))
if (post_kmmio_handler(arg->err, arg->regs) == 1)
if (post_kmmio_handler(arg->err, arg->regs) == 1) {
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
return NOTIFY_STOP;
}
return NOTIFY_DONE;
}
......
......@@ -18,6 +18,7 @@
#include <asm/mce.h>
#include <asm/xcr.h>
#include <asm/suspend.h>
#include <asm/debugreg.h>
#ifdef CONFIG_X86_32
static struct saved_context saved_context;
......@@ -142,31 +143,6 @@ static void fix_processor_context(void)
#endif
load_TR_desc(); /* This does ltr */
load_LDT(&current->active_mm->context); /* This does lldt */
/*
* Now maybe reload the debug registers
*/
if (current->thread.debugreg7) {
#ifdef CONFIG_X86_32
set_debugreg(current->thread.debugreg0, 0);
set_debugreg(current->thread.debugreg1, 1);
set_debugreg(current->thread.debugreg2, 2);
set_debugreg(current->thread.debugreg3, 3);
/* no 4 and 5 */
set_debugreg(current->thread.debugreg6, 6);
set_debugreg(current->thread.debugreg7, 7);
#else
/* CONFIG_X86_64 */
loaddebug(&current->thread, 0);
loaddebug(&current->thread, 1);
loaddebug(&current->thread, 2);
loaddebug(&current->thread, 3);
/* no 4 and 5 */
loaddebug(&current->thread, 6);
loaddebug(&current->thread, 7);
#endif
}
}
/**
......
#ifndef _LINUX_HW_BREAKPOINT_H
#define _LINUX_HW_BREAKPOINT_H
#include <linux/perf_event.h>
enum {
HW_BREAKPOINT_LEN_1 = 1,
HW_BREAKPOINT_LEN_2 = 2,
HW_BREAKPOINT_LEN_4 = 4,
HW_BREAKPOINT_LEN_8 = 8,
};
enum {
HW_BREAKPOINT_R = 1,
HW_BREAKPOINT_W = 2,
HW_BREAKPOINT_X = 4,
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
{
return bp->attr.bp_addr;
}
static inline int hw_breakpoint_type(struct perf_event *bp)
{
return bp->attr.bp_type;
}
static inline int hw_breakpoint_len(struct perf_event *bp)
{
return bp->attr.bp_len;
}
extern struct perf_event *
register_user_hw_breakpoint(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
struct task_struct *tsk,
bool active);
/* FIXME: only change from the attr, and don't unregister */
extern struct perf_event *
modify_user_hw_breakpoint(struct perf_event *bp,
unsigned long addr,
int len,
int type,
perf_callback_t triggered,
struct task_struct *tsk,
bool active);
/*
* Kernel breakpoints are not associated with any particular thread.
*/
extern struct perf_event *
register_wide_hw_breakpoint_cpu(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
int cpu,
bool active);
extern struct perf_event **
register_wide_hw_breakpoint(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
bool active);
extern int register_perf_hw_breakpoint(struct perf_event *bp);
extern int __register_perf_hw_breakpoint(struct perf_event *bp);
extern void unregister_hw_breakpoint(struct perf_event *bp);
extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
extern int reserve_bp_slot(struct perf_event *bp);
extern void release_bp_slot(struct perf_event *bp);
extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
{
return &bp->hw.info;
}
#else /* !CONFIG_HAVE_HW_BREAKPOINT */
static inline struct perf_event *
register_user_hw_breakpoint(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
struct task_struct *tsk,
bool active) { return NULL; }
static inline struct perf_event *
modify_user_hw_breakpoint(struct perf_event *bp,
unsigned long addr,
int len,
int type,
perf_callback_t triggered,
struct task_struct *tsk,
bool active) { return NULL; }
static inline struct perf_event *
register_wide_hw_breakpoint_cpu(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
int cpu,
bool active) { return NULL; }
static inline struct perf_event **
register_wide_hw_breakpoint(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
bool active) { return NULL; }
static inline int
register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
static inline int
__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
static inline void
unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { }
static inline int
reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
static inline void release_bp_slot(struct perf_event *bp) { }
static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
{
return NULL;
}
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif /* _LINUX_HW_BREAKPOINT_H */
......@@ -18,6 +18,10 @@
#include <linux/ioctl.h>
#include <asm/byteorder.h>
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <asm/hw_breakpoint.h>
#endif
/*
* User-space ABI bits:
*/
......@@ -31,6 +35,7 @@ enum perf_type_id {
PERF_TYPE_TRACEPOINT = 2,
PERF_TYPE_HW_CACHE = 3,
PERF_TYPE_RAW = 4,
PERF_TYPE_BREAKPOINT = 5,
PERF_TYPE_MAX, /* non-ABI */
};
......@@ -209,6 +214,15 @@ struct perf_event_attr {
__u32 wakeup_events; /* wakeup every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
union {
struct { /* Hardware breakpoint info */
__u64 bp_addr;
__u32 bp_type;
__u32 bp_len;
};
};
__u32 __reserved_2;
__u64 __reserved_3;
......@@ -478,6 +492,11 @@ struct hw_perf_event {
s64 remaining;
struct hrtimer hrtimer;
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
union { /* breakpoint */
struct arch_hw_breakpoint info;
};
#endif
};
atomic64_t prev_count;
u64 sample_period;
......@@ -546,6 +565,8 @@ struct perf_pending_entry {
void (*func)(struct perf_pending_entry *);
};
typedef void (*perf_callback_t)(struct perf_event *, void *);
/**
* struct perf_event - performance event kernel representation:
*/
......@@ -588,7 +609,7 @@ struct perf_event {
u64 tstamp_running;
u64 tstamp_stopped;
struct perf_event_attr attr;
struct perf_event_attr attr;
struct hw_perf_event hw;
struct perf_event_context *ctx;
......@@ -641,6 +662,10 @@ struct perf_event {
struct event_filter *filter;
#endif
perf_callback_t callback;
perf_callback_t event_callback;
#endif /* CONFIG_PERF_EVENTS */
};
......@@ -745,6 +770,13 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx, int cpu);
extern void perf_event_update_userpage(struct perf_event *event);
extern int perf_event_release_kernel(struct perf_event *event);
extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr,
int cpu,
pid_t pid,
perf_callback_t callback);
extern u64 perf_event_read_value(struct perf_event *event);
struct perf_sample_data {
u64 type;
......@@ -821,6 +853,7 @@ extern int sysctl_perf_event_sample_rate;
extern void perf_event_init(void);
extern void perf_tp_event(int event_id, u64 addr, u64 count,
void *record, int entry_size);
extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
......@@ -855,6 +888,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; }
static inline void
perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }
static inline void
perf_bp_event(struct perf_event *event, void *data) { }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_comm(struct task_struct *tsk) { }
......
......@@ -95,6 +95,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
......
......@@ -49,6 +49,7 @@
#include <linux/init_task.h>
#include <linux/perf_event.h>
#include <trace/events/sched.h>
#include <linux/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
......@@ -977,6 +978,10 @@ NORET_TYPE void do_exit(long code)
proc_exit_connector(tsk);
/*
* FIXME: do that only when needed, using sched_exit tracepoint
*/
flush_ptrace_hw_breakpoint(tsk);
/*
* Flush inherited counters to the parent - before the parent
* gets woken up by child-exit notifications.
......
This diff is collapsed.
......@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
}
return module_kallsyms_lookup_name(name);
}
EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
unsigned long),
......
......@@ -29,6 +29,7 @@
#include <linux/kernel_stat.h>
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/irq_regs.h>
......@@ -1725,6 +1726,26 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
int perf_event_release_kernel(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
perf_event_remove_from_context(event);
mutex_unlock(&ctx->mutex);
mutex_lock(&event->owner->perf_event_mutex);
list_del_init(&event->owner_entry);
mutex_unlock(&event->owner->perf_event_mutex);
put_task_struct(event->owner);
free_event(event);
return 0;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
static int perf_event_read_size(struct perf_event *event)
{
int entry = sizeof(u64); /* value */
......@@ -1750,7 +1771,7 @@ static int perf_event_read_size(struct perf_event *event)
return size;
}
static u64 perf_event_read_value(struct perf_event *event)
u64 perf_event_read_value(struct perf_event *event)
{
struct perf_event *child;
u64 total = 0;
......@@ -1761,6 +1782,7 @@ static u64 perf_event_read_value(struct perf_event *event)
return total;
}
EXPORT_SYMBOL_GPL(perf_event_read_value);
static int perf_event_read_entry(struct perf_event *event,
u64 read_format, char __user *buf)
......@@ -4231,6 +4253,51 @@ static void perf_event_free_filter(struct perf_event *event)
#endif /* CONFIG_EVENT_PROFILE */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
static void bp_perf_event_destroy(struct perf_event *event)
{
release_bp_slot(event);
}
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
{
int err;
/*
* The breakpoint is already filled if we haven't created the counter
* through perf syscall
* FIXME: manage to get trigerred to NULL if it comes from syscalls
*/
if (!bp->callback)
err = register_perf_hw_breakpoint(bp);
else
err = __register_perf_hw_breakpoint(bp);
if (err)
return ERR_PTR(err);
bp->destroy = bp_perf_event_destroy;
return &perf_ops_bp;
}
void perf_bp_event(struct perf_event *bp, void *regs)
{
/* TODO */
}
#else
static void bp_perf_event_destroy(struct perf_event *event)
{
}
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
{
return NULL;
}
void perf_bp_event(struct perf_event *bp, void *regs)
{
}
#endif
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
static void sw_perf_event_destroy(struct perf_event *event)
......@@ -4297,6 +4364,7 @@ perf_event_alloc(struct perf_event_attr *attr,
struct perf_event_context *ctx,
struct perf_event *group_leader,
struct perf_event *parent_event,
perf_callback_t callback,
gfp_t gfpflags)
{
const struct pmu *pmu;
......@@ -4339,6 +4407,11 @@ perf_event_alloc(struct perf_event_attr *attr,
event->state = PERF_EVENT_STATE_INACTIVE;
if (!callback && parent_event)
callback = parent_event->callback;
event->callback = callback;
if (attr->disabled)
event->state = PERF_EVENT_STATE_OFF;
......@@ -4373,6 +4446,11 @@ perf_event_alloc(struct perf_event_attr *attr,
pmu = tp_perf_event_init(event);
break;
case PERF_TYPE_BREAKPOINT:
pmu = bp_perf_event_init(event);
break;
default:
break;
}
......@@ -4615,7 +4693,7 @@ SYSCALL_DEFINE5(perf_event_open,
}
event = perf_event_alloc(&attr, cpu, ctx, group_leader,
NULL, GFP_KERNEL);
NULL, NULL, GFP_KERNEL);
err = PTR_ERR(event);
if (IS_ERR(event))
goto err_put_context;
......@@ -4663,6 +4741,58 @@ err_put_context:
return err;
}
/**
* perf_event_create_kernel_counter
*
* @attr: attributes of the counter to create
* @cpu: cpu in which the counter is bound
* @pid: task to profile
*/
struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
pid_t pid, perf_callback_t callback)
{
struct perf_event *event;
struct perf_event_context *ctx;
int err;
/*
* Get the target context (task or percpu):
*/
ctx = find_get_context(pid, cpu);
if (IS_ERR(ctx))
return NULL;
event = perf_event_alloc(attr, cpu, ctx, NULL,
NULL, callback, GFP_KERNEL);
err = PTR_ERR(event);
if (IS_ERR(event))
goto err_put_context;
event->filp = NULL;
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
perf_install_in_context(ctx, event, cpu);
++ctx->generation;
mutex_unlock(&ctx->mutex);
event->owner = current;
get_task_struct(current);
mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
return event;
err_put_context:
if (err < 0)
put_ctx(ctx);
return NULL;
}
EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
/*
* inherit a event from parent task to child task:
*/
......@@ -4688,7 +4818,7 @@ inherit_event(struct perf_event *parent_event,
child_event = perf_event_alloc(&parent_event->attr,
parent_event->cpu, child_ctx,
group_leader, parent_event,
GFP_KERNEL);
NULL, GFP_KERNEL);
if (IS_ERR(child_event))
return child_event;
get_ctx(child_ctx);
......
......@@ -339,6 +339,27 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
config KSYM_TRACER
bool "Trace read and write access on kernel memory locations"
depends on HAVE_HW_BREAKPOINT
select TRACING
help
This tracer helps find read and write operations on any given kernel
symbol i.e. /proc/kallsyms.
config PROFILE_KSYM_TRACER
bool "Profile all kernel memory accesses on 'watched' variables"
depends on KSYM_TRACER
help
This tracer profiles kernel accesses on variables watched through the
ksym tracer ftrace plugin. Depending upon the hardware, all read
and write operations on kernel variables can be monitored for
accesses.
The results will be displayed in:
/debugfs/tracing/profile_ksym
Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
......
......@@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
libftrace-y := ftrace.o
......@@ -11,6 +11,7 @@
#include <linux/ftrace.h>
#include <trace/boot.h>
#include <linux/kmemtrace.h>
#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
......@@ -37,6 +38,7 @@ enum trace_type {
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
TRACE_BLK,
TRACE_KSYM,
__TRACE_LAST_TYPE,
};
......@@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void);
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
......@@ -387,6 +390,8 @@ int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
int is_tracing_stopped(void);
extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
#ifdef CONFIG_TRACER_MAX_TRACE
......@@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_ksym(struct tracer *trace,
struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
......
......@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
F_printk("type:%u call_site:%lx ptr:%p",
__entry->type_id, __entry->call_site, __entry->ptr)
);
FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
TRACE_KSYM,
F_STRUCT(
__field( unsigned long, ip )
__field( unsigned char, type )
__array( char , cmd, TASK_COMM_LEN )
__field( unsigned long, addr )
),
F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
(void *)__entry->ip, (unsigned int)__entry->type,
(void *)__entry->addr, __entry->cmd)
);
This diff is collapsed.
......@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
case TRACE_HW_BRANCHES:
case TRACE_KSYM:
return 1;
}
return 0;
......@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
return ret;
}
#endif /* CONFIG_HW_BRANCH_TRACER */
#ifdef CONFIG_KSYM_TRACER
static int ksym_selftest_dummy;
int
trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
/* start the tracing */
ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
}
ksym_selftest_dummy = 0;
/* Register the read-write tracing request */
ret = process_new_ksym_entry("ksym_selftest_dummy",
HW_BREAKPOINT_R | HW_BREAKPOINT_W,
(unsigned long)(&ksym_selftest_dummy));
if (ret < 0) {
printk(KERN_CONT "ksym_trace read-write startup test failed\n");
goto ret_path;
}
/* Perform a read and a write operation over the dummy variable to
* trigger the tracer
*/
if (ksym_selftest_dummy == 0)
ksym_selftest_dummy++;
/* stop the tracing. */
tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
tracing_start();
/* read & write operations - one each is performed on the dummy variable
* triggering two entries in the trace buffer
*/
if (!ret && count != 2) {
printk(KERN_CONT "Ksym tracer startup test failed");
ret = -1;
}
ret_path:
return ret;
}
#endif /* CONFIG_KSYM_TRACER */
......@@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES
default m
depends on SAMPLE_KPROBES && KRETPROBES
config SAMPLE_HW_BREAKPOINT
tristate "Build kernel hardware breakpoint examples -- loadable module only"
depends on HAVE_HW_BREAKPOINT && m
help
This builds kernel hardware breakpoint example modules.
endif # SAMPLES
# Makefile for Linux samples code
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
hw_breakpoint/
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
/*
* data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* usage: insmod data_breakpoint.ko ksym=<ksym_name>
*
* This file is a kernel module that places a breakpoint over ksym_name kernel
* variable using Hardware Breakpoint register. The corresponding handler which
* prints a backtrace is invoked everytime a write operation is performed on
* that variable.
*
* Copyright (C) IBM Corporation, 2009
*/
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/init.h> /* Needed for the macros */
#include <linux/kallsyms.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
struct perf_event **sample_hbp;
static char ksym_name[KSYM_NAME_LEN] = "pid_max";
module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
" write operations on the kernel symbol");
static void sample_hbp_handler(struct perf_event *temp, void *data)
{
printk(KERN_INFO "%s value is changed\n", ksym_name);
dump_stack();
printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
}
static int __init hw_break_module_init(void)
{
int ret;
unsigned long addr;
addr = kallsyms_lookup_name(ksym_name);
sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4,
HW_BREAKPOINT_W | HW_BREAKPOINT_R,
sample_hbp_handler, true);
if (IS_ERR(sample_hbp)) {
ret = PTR_ERR(sample_hbp);
goto fail;
} else if (!sample_hbp) {
ret = -EINVAL;
goto fail;
}
printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name);
return 0;
fail:
printk(KERN_INFO "Breakpoint registration failed\n");
return ret;
}
static void __exit hw_break_module_exit(void)
{
unregister_wide_hw_breakpoint(sample_hbp);
printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
}
module_init(hw_break_module_init);
module_exit(hw_break_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("K.Prasad");
MODULE_DESCRIPTION("ksym breakpoint");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment