Commit 9b93418e authored by Carl Love's avatar Carl Love Committed by Robert Richter

powerpc/oprofile: IBM CELL: cleanup and restructuring

This patch restructures and cleans up the code a bit to make it
easier to add new functionality later.  The patch makes no
functional changes to the existing code.
Signed-off-by: default avatarCarl Love <carll@us.ibm.com>
Signed-off-by: default avatarRobert Richter <robert.richter@amd.com>
parent 4a6908a3
...@@ -31,8 +31,8 @@ static unsigned int profiling_interval; ...@@ -31,8 +31,8 @@ static unsigned int profiling_interval;
#define SPU_PC_MASK 0xFFFF #define SPU_PC_MASK 0xFFFF
static DEFINE_SPINLOCK(sample_array_lock); static DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
unsigned long sample_array_lock_flags; unsigned long oprof_spu_smpl_arry_lck_flags;
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
{ {
...@@ -145,13 +145,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) ...@@ -145,13 +145,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
* sample array must be loaded and then processed for a given * sample array must be loaded and then processed for a given
* cpu. The sample array is not per cpu. * cpu. The sample array is not per cpu.
*/ */
spin_lock_irqsave(&sample_array_lock, spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
sample_array_lock_flags); oprof_spu_smpl_arry_lck_flags);
num_samples = cell_spu_pc_collection(cpu); num_samples = cell_spu_pc_collection(cpu);
if (num_samples == 0) { if (num_samples == 0) {
spin_unlock_irqrestore(&sample_array_lock, spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
sample_array_lock_flags); oprof_spu_smpl_arry_lck_flags);
continue; continue;
} }
...@@ -162,8 +162,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) ...@@ -162,8 +162,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
num_samples); num_samples);
} }
spin_unlock_irqrestore(&sample_array_lock, spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
sample_array_lock_flags); oprof_spu_smpl_arry_lck_flags);
} }
smp_wmb(); /* insure spu event buffer updates are written */ smp_wmb(); /* insure spu event buffer updates are written */
...@@ -182,13 +182,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) ...@@ -182,13 +182,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
static struct hrtimer timer; static struct hrtimer timer;
/* /*
* Entry point for SPU profiling. * Entry point for SPU cycle profiling.
* NOTE: SPU profiling is done system-wide, not per-CPU. * NOTE: SPU profiling is done system-wide, not per-CPU.
* *
* cycles_reset is the count value specified by the user when * cycles_reset is the count value specified by the user when
* setting up OProfile to count SPU_CYCLES. * setting up OProfile to count SPU_CYCLES.
*/ */
int start_spu_profiling(unsigned int cycles_reset) int start_spu_profiling_cycles(unsigned int cycles_reset)
{ {
ktime_t kt; ktime_t kt;
...@@ -212,10 +212,10 @@ int start_spu_profiling(unsigned int cycles_reset) ...@@ -212,10 +212,10 @@ int start_spu_profiling(unsigned int cycles_reset)
return 0; return 0;
} }
void stop_spu_profiling(void) void stop_spu_profiling_cycles(void)
{ {
spu_prof_running = 0; spu_prof_running = 0;
hrtimer_cancel(&timer); hrtimer_cancel(&timer);
kfree(samples); kfree(samples);
pr_debug("SPU_PROF: stop_spu_profiling issued\n"); pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
} }
...@@ -40,14 +40,9 @@ ...@@ -40,14 +40,9 @@
#include "../platforms/cell/interrupt.h" #include "../platforms/cell/interrupt.h"
#include "cell/pr_util.h" #include "cell/pr_util.h"
static void cell_global_stop_spu(void); #define PPU_PROFILING 0
#define SPU_PROFILING_CYCLES 1
/* #define SPU_PROFILING_EVENTS 2
* spu_cycle_reset is the number of cycles between samples.
* This variable is used for SPU profiling and should ONLY be set
* at the beginning of cell_reg_setup; otherwise, it's read-only.
*/
static unsigned int spu_cycle_reset;
#define NUM_SPUS_PER_NODE 8 #define NUM_SPUS_PER_NODE 8
#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
...@@ -66,6 +61,14 @@ static unsigned int spu_cycle_reset; ...@@ -66,6 +61,14 @@ static unsigned int spu_cycle_reset;
#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
/*
* spu_cycle_reset is the number of cycles between samples.
* This variable is used for SPU profiling and should ONLY be set
* at the beginning of cell_reg_setup; otherwise, it's read-only.
*/
static unsigned int spu_cycle_reset;
static unsigned int profiling_mode;
struct pmc_cntrl_data { struct pmc_cntrl_data {
unsigned long vcntr; unsigned long vcntr;
unsigned long evnts; unsigned long evnts;
...@@ -122,7 +125,6 @@ static struct { ...@@ -122,7 +125,6 @@ static struct {
#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
/* /*
...@@ -165,7 +167,7 @@ static int spu_rtas_token; /* token for SPU cycle profiling */ ...@@ -165,7 +167,7 @@ static int spu_rtas_token; /* token for SPU cycle profiling */
static u32 reset_value[NR_PHYS_CTRS]; static u32 reset_value[NR_PHYS_CTRS];
static int num_counters; static int num_counters;
static int oprofile_running; static int oprofile_running;
static DEFINE_SPINLOCK(virt_cntr_lock); static DEFINE_SPINLOCK(cntr_lock);
static u32 ctr_enabled; static u32 ctr_enabled;
...@@ -367,7 +369,7 @@ static void write_pm_cntrl(int cpu) ...@@ -367,7 +369,7 @@ static void write_pm_cntrl(int cpu)
if (pm_regs.pm_cntrl.stop_at_max == 1) if (pm_regs.pm_cntrl.stop_at_max == 1)
val |= CBE_PM_STOP_AT_MAX; val |= CBE_PM_STOP_AT_MAX;
if (pm_regs.pm_cntrl.trace_mode == 1) if (pm_regs.pm_cntrl.trace_mode != 0)
val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
if (pm_regs.pm_cntrl.freeze == 1) if (pm_regs.pm_cntrl.freeze == 1)
...@@ -441,7 +443,7 @@ static void cell_virtual_cntr(unsigned long data) ...@@ -441,7 +443,7 @@ static void cell_virtual_cntr(unsigned long data)
* not both playing with the counters on the same node. * not both playing with the counters on the same node.
*/ */
spin_lock_irqsave(&virt_cntr_lock, flags); spin_lock_irqsave(&cntr_lock, flags);
prev_hdw_thread = hdw_thread; prev_hdw_thread = hdw_thread;
...@@ -527,7 +529,7 @@ static void cell_virtual_cntr(unsigned long data) ...@@ -527,7 +529,7 @@ static void cell_virtual_cntr(unsigned long data)
cbe_enable_pm(cpu); cbe_enable_pm(cpu);
} }
spin_unlock_irqrestore(&virt_cntr_lock, flags); spin_unlock_irqrestore(&cntr_lock, flags);
mod_timer(&timer_virt_cntr, jiffies + HZ / 10); mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
} }
...@@ -541,44 +543,30 @@ static void start_virt_cntrs(void) ...@@ -541,44 +543,30 @@ static void start_virt_cntrs(void)
add_timer(&timer_virt_cntr); add_timer(&timer_virt_cntr);
} }
/* This function is called once for all cpus combined */ static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
static int cell_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys, int num_ctrs) struct op_system_config *sys, int num_ctrs)
{ {
int i, j, cpu; spu_cycle_reset = ctr[0].count;
spu_cycle_reset = 0;
if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
spu_cycle_reset = ctr[0].count;
/*
* Each node will need to make the rtas call to start
* and stop SPU profiling. Get the token once and store it.
*/
spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
printk(KERN_ERR
"%s: rtas token ibm,cbe-spu-perftools unknown\n",
__func__);
return -EIO;
}
}
pm_rtas_token = rtas_token("ibm,cbe-perftools");
/* /*
* For all events excetp PPU CYCLEs, each node will need to make * Each node will need to make the rtas call to start
* the rtas cbe-perftools call to setup and reset the debug bus. * and stop SPU profiling. Get the token once and store it.
* Make the token lookup call once and store it in the global
* variable pm_rtas_token.
*/ */
if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
printk(KERN_ERR printk(KERN_ERR
"%s: rtas token ibm,cbe-perftools unknown\n", "%s: rtas token ibm,cbe-spu-perftools unknown\n",
__func__); __func__);
return -EIO; return -EIO;
} }
return 0;
}
static int cell_reg_setup_ppu(struct op_counter_config *ctr,
struct op_system_config *sys, int num_ctrs)
{
int i, j, cpu;
num_counters = num_ctrs; num_counters = num_ctrs;
...@@ -665,6 +653,41 @@ static int cell_reg_setup(struct op_counter_config *ctr, ...@@ -665,6 +653,41 @@ static int cell_reg_setup(struct op_counter_config *ctr,
} }
/* This function is called once for all cpus combined */
static int cell_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys, int num_ctrs)
{
int ret;
spu_cycle_reset = 0;
/*
* For all events except PPU CYCLEs, each node will need to make
* the rtas cbe-perftools call to setup and reset the debug bus.
* Make the token lookup call once and store it in the global
* variable pm_rtas_token.
*/
pm_rtas_token = rtas_token("ibm,cbe-perftools");
if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
printk(KERN_ERR
"%s: rtas token ibm,cbe-perftools unknown\n",
__func__);
return -EIO;
}
if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
profiling_mode = SPU_PROFILING_CYCLES;
ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
} else {
profiling_mode = PPU_PROFILING;
ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
}
return ret;
}
/* This function is called once for each cpu */ /* This function is called once for each cpu */
static int cell_cpu_setup(struct op_counter_config *cntr) static int cell_cpu_setup(struct op_counter_config *cntr)
...@@ -673,7 +696,11 @@ static int cell_cpu_setup(struct op_counter_config *cntr) ...@@ -673,7 +696,11 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
u32 num_enabled = 0; u32 num_enabled = 0;
int i; int i;
if (spu_cycle_reset) /* Cycle based SPU profiling does not use the performance
* counters. The trace array is configured to collect
* the data.
*/
if (profiling_mode == SPU_PROFILING_CYCLES)
return 0; return 0;
/* There is one performance monitor per processor chip (i.e. node), /* There is one performance monitor per processor chip (i.e. node),
...@@ -686,7 +713,6 @@ static int cell_cpu_setup(struct op_counter_config *cntr) ...@@ -686,7 +713,6 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
cbe_disable_pm(cpu); cbe_disable_pm(cpu);
cbe_disable_pm_interrupts(cpu); cbe_disable_pm_interrupts(cpu);
cbe_write_pm(cpu, pm_interval, 0);
cbe_write_pm(cpu, pm_start_stop, 0); cbe_write_pm(cpu, pm_start_stop, 0);
cbe_write_pm(cpu, group_control, pm_regs.group_control); cbe_write_pm(cpu, group_control, pm_regs.group_control);
cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
...@@ -885,7 +911,94 @@ static struct notifier_block cpu_freq_notifier_block = { ...@@ -885,7 +911,94 @@ static struct notifier_block cpu_freq_notifier_block = {
}; };
#endif #endif
static int cell_global_start_spu(struct op_counter_config *ctr) /*
* Note the generic OProfile stop calls do not support returning
* an error on stop. Hence, will not return an error if the FW
* calls fail on stop. Failure to reset the debug bus is not an issue.
* Failure to disable the SPU profiling is not an issue. The FW calls
* to enable the performance counters and debug bus will work even if
* the hardware was not cleanly reset.
*/
static void cell_global_stop_spu_cycles(void)
{
int subfunc, rtn_value;
unsigned int lfsr_value;
int cpu;
oprofile_running = 0;
#ifdef CONFIG_CPU_FREQ
cpufreq_unregister_notifier(&cpu_freq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
#endif
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
subfunc = 3; /*
* 2 - activate SPU tracing,
* 3 - deactivate
*/
lfsr_value = 0x8f100000;
rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
subfunc, cbe_cpu_to_node(cpu),
lfsr_value);
if (unlikely(rtn_value != 0)) {
printk(KERN_ERR
"%s: rtas call ibm,cbe-spu-perftools " \
"failed, return = %d\n",
__func__, rtn_value);
}
/* Deactivate the signals */
pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
}
if (profiling_mode == SPU_PROFILING_CYCLES)
stop_spu_profiling_cycles();
}
static void cell_global_stop_ppu(void)
{
int cpu;
/*
* This routine will be called once for the system.
* There is one performance monitor per node, so we
* only need to perform this function once per node.
*/
del_timer_sync(&timer_virt_cntr);
oprofile_running = 0;
smp_wmb();
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
cbe_sync_irq(cbe_cpu_to_node(cpu));
/* Stop the counters */
cbe_disable_pm(cpu);
/* Deactivate the signals */
pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
/* Deactivate interrupts */
cbe_disable_pm_interrupts(cpu);
}
}
static void cell_global_stop(void)
{
if (profiling_mode == PPU_PROFILING)
cell_global_stop_ppu();
else
cell_global_stop_spu_cycles();
}
static int cell_global_start_spu_cycles(struct op_counter_config *ctr)
{ {
int subfunc; int subfunc;
unsigned int lfsr_value; unsigned int lfsr_value;
...@@ -955,14 +1068,14 @@ static int cell_global_start_spu(struct op_counter_config *ctr) ...@@ -955,14 +1068,14 @@ static int cell_global_start_spu(struct op_counter_config *ctr)
if (unlikely(ret != 0)) { if (unlikely(ret != 0)) {
printk(KERN_ERR printk(KERN_ERR
"%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", "%s: rtas call ibm,cbe-spu-perftools failed, " \
__func__, ret); "return = %d\n", __func__, ret);
rtas_error = -EIO; rtas_error = -EIO;
goto out; goto out;
} }
} }
rtas_error = start_spu_profiling(spu_cycle_reset); rtas_error = start_spu_profiling_cycles(spu_cycle_reset);
if (rtas_error) if (rtas_error)
goto out_stop; goto out_stop;
...@@ -970,7 +1083,7 @@ static int cell_global_start_spu(struct op_counter_config *ctr) ...@@ -970,7 +1083,7 @@ static int cell_global_start_spu(struct op_counter_config *ctr)
return 0; return 0;
out_stop: out_stop:
cell_global_stop_spu(); /* clean up the PMU/debug bus */ cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */
out: out:
return rtas_error; return rtas_error;
} }
...@@ -1024,99 +1137,15 @@ static int cell_global_start_ppu(struct op_counter_config *ctr) ...@@ -1024,99 +1137,15 @@ static int cell_global_start_ppu(struct op_counter_config *ctr)
static int cell_global_start(struct op_counter_config *ctr) static int cell_global_start(struct op_counter_config *ctr)
{ {
if (spu_cycle_reset) if (profiling_mode == SPU_PROFILING_CYCLES)
return cell_global_start_spu(ctr); return cell_global_start_spu_cycles(ctr);
else else
return cell_global_start_ppu(ctr); return cell_global_start_ppu(ctr);
} }
/*
* Note the generic OProfile stop calls do not support returning
* an error on stop. Hence, will not return an error if the FW
* calls fail on stop. Failure to reset the debug bus is not an issue.
* Failure to disable the SPU profiling is not an issue. The FW calls
* to enable the performance counters and debug bus will work even if
* the hardware was not cleanly reset.
*/
static void cell_global_stop_spu(void)
{
int subfunc, rtn_value;
unsigned int lfsr_value;
int cpu;
oprofile_running = 0;
#ifdef CONFIG_CPU_FREQ static void cell_handle_interrupt_ppu(struct pt_regs *regs,
cpufreq_unregister_notifier(&cpu_freq_notifier_block, struct op_counter_config *ctr)
CPUFREQ_TRANSITION_NOTIFIER);
#endif
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
subfunc = 3; /*
* 2 - activate SPU tracing,
* 3 - deactivate
*/
lfsr_value = 0x8f100000;
rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
subfunc, cbe_cpu_to_node(cpu),
lfsr_value);
if (unlikely(rtn_value != 0)) {
printk(KERN_ERR
"%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
__func__, rtn_value);
}
/* Deactivate the signals */
pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
}
stop_spu_profiling();
}
static void cell_global_stop_ppu(void)
{
int cpu;
/*
* This routine will be called once for the system.
* There is one performance monitor per node, so we
* only need to perform this function once per node.
*/
del_timer_sync(&timer_virt_cntr);
oprofile_running = 0;
smp_wmb();
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
cbe_sync_irq(cbe_cpu_to_node(cpu));
/* Stop the counters */
cbe_disable_pm(cpu);
/* Deactivate the signals */
pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
/* Deactivate interrupts */
cbe_disable_pm_interrupts(cpu);
}
}
static void cell_global_stop(void)
{
if (spu_cycle_reset)
cell_global_stop_spu();
else
cell_global_stop_ppu();
}
static void cell_handle_interrupt(struct pt_regs *regs,
struct op_counter_config *ctr)
{ {
u32 cpu; u32 cpu;
u64 pc; u64 pc;
...@@ -1132,7 +1161,7 @@ static void cell_handle_interrupt(struct pt_regs *regs, ...@@ -1132,7 +1161,7 @@ static void cell_handle_interrupt(struct pt_regs *regs,
* routine are not running at the same time. See the * routine are not running at the same time. See the
* cell_virtual_cntr() routine for additional comments. * cell_virtual_cntr() routine for additional comments.
*/ */
spin_lock_irqsave(&virt_cntr_lock, flags); spin_lock_irqsave(&cntr_lock, flags);
/* /*
* Need to disable and reenable the performance counters * Need to disable and reenable the performance counters
...@@ -1185,7 +1214,14 @@ static void cell_handle_interrupt(struct pt_regs *regs, ...@@ -1185,7 +1214,14 @@ static void cell_handle_interrupt(struct pt_regs *regs,
*/ */
cbe_enable_pm(cpu); cbe_enable_pm(cpu);
} }
spin_unlock_irqrestore(&virt_cntr_lock, flags); spin_unlock_irqrestore(&cntr_lock, flags);
}
static void cell_handle_interrupt(struct pt_regs *regs,
struct op_counter_config *ctr)
{
if (profiling_mode == PPU_PROFILING)
cell_handle_interrupt_ppu(regs, ctr);
} }
/* /*
...@@ -1195,7 +1231,8 @@ static void cell_handle_interrupt(struct pt_regs *regs, ...@@ -1195,7 +1231,8 @@ static void cell_handle_interrupt(struct pt_regs *regs,
*/ */
static int cell_sync_start(void) static int cell_sync_start(void)
{ {
if (spu_cycle_reset) if ((profiling_mode == SPU_PROFILING_CYCLES) ||
(profiling_mode == SPU_PROFILING_EVENTS))
return spu_sync_start(); return spu_sync_start();
else else
return DO_GENERIC_SYNC; return DO_GENERIC_SYNC;
...@@ -1203,7 +1240,8 @@ static int cell_sync_start(void) ...@@ -1203,7 +1240,8 @@ static int cell_sync_start(void)
static int cell_sync_stop(void) static int cell_sync_stop(void)
{ {
if (spu_cycle_reset) if ((profiling_mode == SPU_PROFILING_CYCLES) ||
(profiling_mode == SPU_PROFILING_EVENTS))
return spu_sync_stop(); return spu_sync_stop();
else else
return 1; return 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment