Commit 2fb9d206 authored by Arnd Bergmann's avatar Arnd Bergmann Committed by Paul Mackerras

[PATCH] spufs: set irq affinity for running threads

For far, all SPU triggered interrupts always end up on
the first SMT thread, which is a bad solution.

This patch implements setting the affinity to the
CPU that was running last when entering execution on
an SPU. This should result in a significant reduction
in IPI calls and better cache locality for SPE thread
specific data.
Signed-off-by: default avatarArnd Bergmann <arndb@de.ibm.com>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent aeb01377
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/module.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/types.h> #include <linux/types.h>
...@@ -55,6 +56,7 @@ struct iic_regs { ...@@ -55,6 +56,7 @@ struct iic_regs {
struct iic { struct iic {
struct iic_regs __iomem *regs; struct iic_regs __iomem *regs;
u8 target_id;
}; };
static DEFINE_PER_CPU(struct iic, iic); static DEFINE_PER_CPU(struct iic, iic);
...@@ -172,12 +174,11 @@ int iic_get_irq(struct pt_regs *regs) ...@@ -172,12 +174,11 @@ int iic_get_irq(struct pt_regs *regs)
return irq; return irq;
} }
static struct iic_regs __iomem *find_iic(int cpu) static int setup_iic(int cpu, struct iic *iic)
{ {
struct device_node *np; struct device_node *np;
int nodeid = cpu / 2; int nodeid = cpu / 2;
unsigned long regs; unsigned long regs;
struct iic_regs __iomem *iic_regs;
for (np = of_find_node_by_type(NULL, "cpu"); for (np = of_find_node_by_type(NULL, "cpu");
np; np;
...@@ -188,20 +189,23 @@ static struct iic_regs __iomem *find_iic(int cpu) ...@@ -188,20 +189,23 @@ static struct iic_regs __iomem *find_iic(int cpu)
if (!np) { if (!np) {
printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); printk(KERN_WARNING "IIC: CPU %d not found\n", cpu);
iic_regs = NULL; iic->regs = NULL;
} else { iic->target_id = 0xff;
regs = *(long *)get_property(np, "iic", NULL); return -ENODEV;
/* hack until we have decided on the devtree info */
regs += 0x400;
if (cpu & 1)
regs += 0x20;
printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs);
iic_regs = __ioremap(regs, sizeof(struct iic_regs),
_PAGE_NO_CACHE);
} }
return iic_regs;
regs = *(long *)get_property(np, "iic", NULL);
/* hack until we have decided on the devtree info */
regs += 0x400;
if (cpu & 1)
regs += 0x20;
printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs);
iic->regs = __ioremap(regs, sizeof(struct iic_regs),
_PAGE_NO_CACHE);
iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe);
return 0;
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -227,6 +231,12 @@ void iic_cause_IPI(int cpu, int mesg) ...@@ -227,6 +231,12 @@ void iic_cause_IPI(int cpu, int mesg)
out_be64(&per_cpu(iic, cpu).regs->generate, (IIC_NUM_IPIS - 1 - mesg) << 4); out_be64(&per_cpu(iic, cpu).regs->generate, (IIC_NUM_IPIS - 1 - mesg) << 4);
} }
u8 iic_get_target_id(int cpu)
{
return per_cpu(iic, cpu).target_id;
}
EXPORT_SYMBOL_GPL(iic_get_target_id);
static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
{ {
smp_message_recv(iic_irq_to_ipi(irq), regs); smp_message_recv(iic_irq_to_ipi(irq), regs);
...@@ -276,7 +286,7 @@ void iic_init_IRQ(void) ...@@ -276,7 +286,7 @@ void iic_init_IRQ(void)
irq_offset = 0; irq_offset = 0;
for_each_cpu(cpu) { for_each_cpu(cpu) {
iic = &per_cpu(iic, cpu); iic = &per_cpu(iic, cpu);
iic->regs = find_iic(cpu); setup_iic(cpu, iic);
if (iic->regs) if (iic->regs)
out_be64(&iic->regs->prio, 0xff); out_be64(&iic->regs->prio, 0xff);
} }
......
...@@ -54,6 +54,7 @@ extern void iic_setup_cpu(void); ...@@ -54,6 +54,7 @@ extern void iic_setup_cpu(void);
extern void iic_local_enable(void); extern void iic_local_enable(void);
extern void iic_local_disable(void); extern void iic_local_disable(void);
extern u8 iic_get_target_id(int cpu);
extern void spider_init_IRQ(void); extern void spider_init_IRQ(void);
extern int spider_get_irq(unsigned long int_pending); extern int spider_get_irq(unsigned long int_pending);
......
...@@ -507,6 +507,14 @@ int spu_irq_class_1_bottom(struct spu *spu) ...@@ -507,6 +507,14 @@ int spu_irq_class_1_bottom(struct spu *spu)
return ret; return ret;
} }
void spu_irq_setaffinity(struct spu *spu, int cpu)
{
u64 target = iic_get_target_id(cpu);
u64 route = target << 48 | target << 32 | target << 16;
spu_int_route_set(spu, route);
}
EXPORT_SYMBOL_GPL(spu_irq_setaffinity);
static void __iomem * __init map_spe_prop(struct device_node *n, static void __iomem * __init map_spe_prop(struct device_node *n,
const char *name) const char *name)
{ {
......
...@@ -357,6 +357,11 @@ int spu_activate(struct spu_context *ctx, u64 flags) ...@@ -357,6 +357,11 @@ int spu_activate(struct spu_context *ctx, u64 flags)
if (!spu) if (!spu)
return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN;
bind_context(spu, ctx); bind_context(spu, ctx);
/*
* We're likely to wait for interrupts on the same
* CPU that we are now on, so send them here.
*/
spu_irq_setaffinity(spu, raw_smp_processor_id());
put_active_spu(spu); put_active_spu(spu);
return 0; return 0;
} }
......
...@@ -147,6 +147,7 @@ struct spu *spu_alloc(void); ...@@ -147,6 +147,7 @@ struct spu *spu_alloc(void);
void spu_free(struct spu *spu); void spu_free(struct spu *spu);
int spu_irq_class_0_bottom(struct spu *spu); int spu_irq_class_0_bottom(struct spu *spu);
int spu_irq_class_1_bottom(struct spu *spu); int spu_irq_class_1_bottom(struct spu *spu);
void spu_irq_setaffinity(struct spu *spu, int cpu);
extern struct spufs_calls { extern struct spufs_calls {
asmlinkage long (*create_thread)(const char __user *name, asmlinkage long (*create_thread)(const char __user *name,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment