Commit 54d5d424 authored by Ashok Raj's avatar Ashok Raj Committed by Linus Torvalds

[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity

When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.

CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.

- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
  lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
  handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
  it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
  when using generic irq framework.

Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.

MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch.  Will test in a couple days.
Signed-off-by: default avatarAshok Raj <ashok.raj@intel.com>
Acked-by: default avatarZwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: default avatarAndi Kleen <ak@muc.de>
Signed-off-by: default avatarCoywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: default avatarAshok Raj <ashok.raj@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent f63ed39c
...@@ -1318,6 +1318,11 @@ config GENERIC_IRQ_PROBE ...@@ -1318,6 +1318,11 @@ config GENERIC_IRQ_PROBE
bool bool
default y default y
config GENERIC_PENDING_IRQ
bool
depends on GENERIC_HARDIRQS && SMP
default y
config X86_SMP config X86_SMP
bool bool
depends on SMP && !X86_VOYAGER depends on SMP && !X86_VOYAGER
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/desc.h> #include <asm/desc.h>
...@@ -222,13 +223,21 @@ static void clear_IO_APIC (void) ...@@ -222,13 +223,21 @@ static void clear_IO_APIC (void)
clear_IO_APIC_pin(apic, pin); clear_IO_APIC_pin(apic, pin);
} }
#ifdef CONFIG_SMP
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
{ {
unsigned long flags; unsigned long flags;
int pin; int pin;
struct irq_pin_list *entry = irq_2_pin + irq; struct irq_pin_list *entry = irq_2_pin + irq;
unsigned int apicid_value; unsigned int apicid_value;
cpumask_t tmp;
cpus_and(tmp, cpumask, cpu_online_map);
if (cpus_empty(tmp))
tmp = TARGET_CPUS;
cpus_and(cpumask, tmp, CPU_MASK_ALL);
apicid_value = cpu_mask_to_apicid(cpumask); apicid_value = cpu_mask_to_apicid(cpumask);
/* Prepare to do the io_apic_write */ /* Prepare to do the io_apic_write */
apicid_value = apicid_value << 24; apicid_value = apicid_value << 24;
...@@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) ...@@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
break; break;
entry = irq_2_pin + entry->next; entry = irq_2_pin + entry->next;
} }
set_irq_info(irq, cpumask);
spin_unlock_irqrestore(&ioapic_lock, flags); spin_unlock_irqrestore(&ioapic_lock, flags);
} }
...@@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) ...@@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
# define Dprintk(x...) # define Dprintk(x...)
# endif # endif
cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
#define IRQBALANCE_CHECK_ARCH -999 #define IRQBALANCE_CHECK_ARCH -999
static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
...@@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, int irq) ...@@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, int irq)
cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
new_cpu = move(cpu, allowed_mask, now, 1); new_cpu = move(cpu, allowed_mask, now, 1);
if (cpu != new_cpu) { if (cpu != new_cpu) {
irq_desc_t *desc = irq_desc + irq; set_pending_irq(irq, cpumask_of_cpu(new_cpu));
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
spin_unlock_irqrestore(&desc->lock, flags);
} }
} }
...@@ -528,16 +532,12 @@ tryanotherirq: ...@@ -528,16 +532,12 @@ tryanotherirq:
cpus_and(tmp, target_cpu_mask, allowed_mask); cpus_and(tmp, target_cpu_mask, allowed_mask);
if (!cpus_empty(tmp)) { if (!cpus_empty(tmp)) {
irq_desc_t *desc = irq_desc + selected_irq;
unsigned long flags;
Dprintk("irq = %d moved to cpu = %d\n", Dprintk("irq = %d moved to cpu = %d\n",
selected_irq, min_loaded); selected_irq, min_loaded);
/* mark for change destination */ /* mark for change destination */
spin_lock_irqsave(&desc->lock, flags); set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
pending_irq_balance_cpumask[selected_irq] =
cpumask_of_cpu(min_loaded);
spin_unlock_irqrestore(&desc->lock, flags);
/* Since we made a change, come back sooner to /* Since we made a change, come back sooner to
* check for more variation. * check for more variation.
*/ */
...@@ -568,7 +568,8 @@ static int balanced_irq(void *unused) ...@@ -568,7 +568,8 @@ static int balanced_irq(void *unused)
/* push everything to CPU 0 to give us a starting point. */ /* push everything to CPU 0 to give us a starting point. */
for (i = 0 ; i < NR_IRQS ; i++) { for (i = 0 ; i < NR_IRQS ; i++) {
pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); pending_irq_cpumask[i] = cpumask_of_cpu(0);
set_pending_irq(i, cpumask_of_cpu(0));
} }
for ( ; ; ) { for ( ; ; ) {
...@@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str) ...@@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str)
__setup("noirqbalance", irqbalance_disable); __setup("noirqbalance", irqbalance_disable);
static inline void move_irq(int irq)
{
/* note - we hold the desc->lock */
if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
cpus_clear(pending_irq_balance_cpumask[irq]);
}
}
late_initcall(balanced_irq_init); late_initcall(balanced_irq_init);
#else /* !CONFIG_IRQBALANCE */
static inline void move_irq(int irq) { }
#endif /* CONFIG_IRQBALANCE */ #endif /* CONFIG_IRQBALANCE */
#endif /* CONFIG_SMP */
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
void fastcall send_IPI_self(int vector) void fastcall send_IPI_self(int vector)
...@@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); ...@@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
* we need to reprogram the ioredtbls to cater for the cpus which have come online * we need to reprogram the ioredtbls to cater for the cpus which have come online
* so mask in all cases should simply be TARGET_CPUS * so mask in all cases should simply be TARGET_CPUS
*/ */
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void) void __init setup_ioapic_dest(void)
{ {
int pin, ioapic, irq, irq_entry; int pin, ioapic, irq, irq_entry;
...@@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void) ...@@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void)
} }
} }
#endif
/* /*
* EISA Edge/Level control register, ELCR * EISA Edge/Level control register, ELCR
...@@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(void) ...@@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(void)
spin_lock_irqsave(&ioapic_lock, flags); spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags); spin_unlock_irqrestore(&ioapic_lock, flags);
} }
} }
...@@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) ...@@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
{ {
int irq = vector_to_irq(vector); int irq = vector_to_irq(vector);
move_irq(vector);
ack_edge_ioapic_irq(irq); ack_edge_ioapic_irq(irq);
} }
...@@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (unsigned int vector) ...@@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (unsigned int vector)
{ {
int irq = vector_to_irq(vector); int irq = vector_to_irq(vector);
move_irq(vector);
end_level_ioapic_irq(irq); end_level_ioapic_irq(irq);
} }
...@@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) ...@@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsigned int vector)
unmask_IO_APIC_irq(irq); unmask_IO_APIC_irq(irq);
} }
#ifdef CONFIG_SMP
static void set_ioapic_affinity_vector (unsigned int vector, static void set_ioapic_affinity_vector (unsigned int vector,
cpumask_t cpu_mask) cpumask_t cpu_mask)
{ {
int irq = vector_to_irq(vector); int irq = vector_to_irq(vector);
set_native_irq_info(vector, cpu_mask);
set_ioapic_affinity_irq(irq, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask);
} }
#endif #endif
#endif
/* /*
* Level and edge triggered IO-APIC interrupts need different handling, * Level and edge triggered IO-APIC interrupts need different handling,
...@@ -2000,7 +1998,9 @@ static struct hw_interrupt_type ioapic_edge_type = { ...@@ -2000,7 +1998,9 @@ static struct hw_interrupt_type ioapic_edge_type = {
.disable = disable_edge_ioapic, .disable = disable_edge_ioapic,
.ack = ack_edge_ioapic, .ack = ack_edge_ioapic,
.end = end_edge_ioapic, .end = end_edge_ioapic,
#ifdef CONFIG_SMP
.set_affinity = set_ioapic_affinity, .set_affinity = set_ioapic_affinity,
#endif
}; };
static struct hw_interrupt_type ioapic_level_type = { static struct hw_interrupt_type ioapic_level_type = {
...@@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_level_type = { ...@@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_level_type = {
.disable = disable_level_ioapic, .disable = disable_level_ioapic,
.ack = mask_and_ack_level_ioapic, .ack = mask_and_ack_level_ioapic,
.end = end_level_ioapic, .end = end_level_ioapic,
#ifdef CONFIG_SMP
.set_affinity = set_ioapic_affinity, .set_affinity = set_ioapic_affinity,
#endif
}; };
static inline void init_IO_APIC_traps(void) static inline void init_IO_APIC_traps(void)
...@@ -2569,6 +2571,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a ...@@ -2569,6 +2571,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
spin_lock_irqsave(&ioapic_lock, flags); spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags); spin_unlock_irqrestore(&ioapic_lock, flags);
return 0; return 0;
......
...@@ -434,6 +434,11 @@ config GENERIC_IRQ_PROBE ...@@ -434,6 +434,11 @@ config GENERIC_IRQ_PROBE
bool bool
default y default y
config GENERIC_PENDING_IRQ
bool
depends on GENERIC_HARDIRQS && SMP
default y
source "arch/ia64/hp/sim/Kconfig" source "arch/ia64/hp/sim/Kconfig"
source "arch/ia64/oprofile/Kconfig" source "arch/ia64/oprofile/Kconfig"
......
...@@ -91,23 +91,8 @@ skip: ...@@ -91,23 +91,8 @@ skip:
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/*
* This is updated when the user sets irq affinity via /proc
*/
static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)];
static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
/*
* Arch specific routine for deferred write to iosapic rte to reprogram
* intr destination.
*/
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
pending_irq_cpumask[irq] = mask_val;
}
void set_irq_affinity_info (unsigned int irq, int hwid, int redir) void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
{ {
cpumask_t mask = CPU_MASK_NONE; cpumask_t mask = CPU_MASK_NONE;
...@@ -116,32 +101,10 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir) ...@@ -116,32 +101,10 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
if (irq < NR_IRQS) { if (irq < NR_IRQS) {
irq_affinity[irq] = mask; irq_affinity[irq] = mask;
set_irq_info(irq, mask);
irq_redir[irq] = (char) (redir & 0xff); irq_redir[irq] = (char) (redir & 0xff);
} }
} }
void move_irq(int irq)
{
/* note - we hold desc->lock */
cpumask_t tmp;
irq_desc_t *desc = irq_descp(irq);
int redir = test_bit(irq, pending_irq_redir);
if (unlikely(!desc->handler->set_affinity))
return;
if (!cpus_empty(pending_irq_cpumask[irq])) {
cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
if (unlikely(!cpus_empty(tmp))) {
desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0),
pending_irq_cpumask[irq]);
}
cpus_clear(pending_irq_cpumask[irq]);
}
}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
......
...@@ -441,6 +441,11 @@ config ISA_DMA_API ...@@ -441,6 +441,11 @@ config ISA_DMA_API
bool bool
default y default y
config GENERIC_PENDING_IRQ
bool
depends on GENERIC_HARDIRQS && SMP
default y
menu "Power management options" menu "Power management options"
source kernel/power/Kconfig source kernel/power/Kconfig
......
...@@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; ...@@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
#define vector_to_irq(vector) (vector) #define vector_to_irq(vector) (vector)
#endif #endif
#define __DO_ACTION(R, ACTION, FINAL) \
\
{ \
int pin; \
struct irq_pin_list *entry = irq_2_pin + irq; \
\
for (;;) { \
unsigned int reg; \
pin = entry->pin; \
if (pin == -1) \
break; \
reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
reg ACTION; \
io_apic_modify(entry->apic, reg); \
if (!entry->next) \
break; \
entry = irq_2_pin + entry->next; \
} \
FINAL; \
}
#ifdef CONFIG_SMP
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
{
unsigned long flags;
unsigned int dest;
cpumask_t tmp;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
tmp = TARGET_CPUS;
cpus_and(mask, tmp, CPU_MASK_ALL);
dest = cpu_mask_to_apicid(mask);
/*
* Only the high 8 bits are valid.
*/
dest = SET_APIC_LOGICAL_ID(dest);
spin_lock_irqsave(&ioapic_lock, flags);
__DO_ACTION(1, = dest, )
set_irq_info(irq, mask);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
#endif
/* /*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super * shared ISA-space IRQs, so we have to support them. We are super
...@@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) ...@@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
entry->pin = pin; entry->pin = pin;
} }
#define __DO_ACTION(R, ACTION, FINAL) \
\
{ \
int pin; \
struct irq_pin_list *entry = irq_2_pin + irq; \
\
for (;;) { \
unsigned int reg; \
pin = entry->pin; \
if (pin == -1) \
break; \
reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
reg ACTION; \
io_apic_modify(entry->apic, reg); \
if (!entry->next) \
break; \
entry = irq_2_pin + entry->next; \
} \
FINAL; \
}
#define DO_ACTION(name,R,ACTION, FINAL) \ #define DO_ACTION(name,R,ACTION, FINAL) \
\ \
...@@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void) ...@@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void)
spin_lock_irqsave(&ioapic_lock, flags); spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags); spin_unlock_irqrestore(&ioapic_lock, flags);
} }
} }
...@@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) ...@@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
*/ */
static void ack_edge_ioapic_irq(unsigned int irq) static void ack_edge_ioapic_irq(unsigned int irq)
{ {
move_irq(irq);
if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
== (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED))
mask_IO_APIC_irq(irq); mask_IO_APIC_irq(irq);
...@@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq) ...@@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq)
static void end_level_ioapic_irq (unsigned int irq) static void end_level_ioapic_irq (unsigned int irq)
{ {
move_irq(irq);
ack_APIC_irq(); ack_APIC_irq();
} }
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
{
unsigned long flags;
unsigned int dest;
dest = cpu_mask_to_apicid(mask);
/*
* Only the high 8 bits are valid.
*/
dest = SET_APIC_LOGICAL_ID(dest);
spin_lock_irqsave(&ioapic_lock, flags);
__DO_ACTION(1, = dest, )
spin_unlock_irqrestore(&ioapic_lock, flags);
}
#ifdef CONFIG_PCI_MSI #ifdef CONFIG_PCI_MSI
static unsigned int startup_edge_ioapic_vector(unsigned int vector) static unsigned int startup_edge_ioapic_vector(unsigned int vector)
{ {
...@@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) ...@@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
{ {
int irq = vector_to_irq(vector); int irq = vector_to_irq(vector);
move_native_irq(vector);
ack_edge_ioapic_irq(irq); ack_edge_ioapic_irq(irq);
} }
...@@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector) ...@@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector)
{ {
int irq = vector_to_irq(vector); int irq = vector_to_irq(vector);
move_native_irq(vector);
end_level_ioapic_irq(irq); end_level_ioapic_irq(irq);
} }
...@@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) ...@@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector)
unmask_IO_APIC_irq(irq); unmask_IO_APIC_irq(irq);
} }
#ifdef CONFIG_SMP
static void set_ioapic_affinity_vector (unsigned int vector, static void set_ioapic_affinity_vector (unsigned int vector,
cpumask_t cpu_mask) cpumask_t cpu_mask)
{ {
int irq = vector_to_irq(vector); int irq = vector_to_irq(vector);
set_native_irq_info(vector, cpu_mask);
set_ioapic_affinity_irq(irq, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask);
} }
#endif #endif // CONFIG_SMP
#endif // CONFIG_PCI_MSI
/* /*
* Level and edge triggered IO-APIC interrupts need different handling, * Level and edge triggered IO-APIC interrupts need different handling,
...@@ -1432,7 +1451,9 @@ static struct hw_interrupt_type ioapic_edge_type = { ...@@ -1432,7 +1451,9 @@ static struct hw_interrupt_type ioapic_edge_type = {
.disable = disable_edge_ioapic, .disable = disable_edge_ioapic,
.ack = ack_edge_ioapic, .ack = ack_edge_ioapic,
.end = end_edge_ioapic, .end = end_edge_ioapic,
#ifdef CONFIG_SMP
.set_affinity = set_ioapic_affinity, .set_affinity = set_ioapic_affinity,
#endif
}; };
static struct hw_interrupt_type ioapic_level_type = { static struct hw_interrupt_type ioapic_level_type = {
...@@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = { ...@@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = {
.disable = disable_level_ioapic, .disable = disable_level_ioapic,
.ack = mask_and_ack_level_ioapic, .ack = mask_and_ack_level_ioapic,
.end = end_level_ioapic, .end = end_level_ioapic,
#ifdef CONFIG_SMP
.set_affinity = set_ioapic_affinity, .set_affinity = set_ioapic_affinity,
#endif
}; };
static inline void init_IO_APIC_traps(void) static inline void init_IO_APIC_traps(void)
...@@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a ...@@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
spin_lock_irqsave(&ioapic_lock, flags); spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags); spin_unlock_irqrestore(&ioapic_lock, flags);
return 0; return 0;
...@@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a ...@@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
* we need to reprogram the ioredtbls to cater for the cpus which have come online * we need to reprogram the ioredtbls to cater for the cpus which have come online
* so mask in all cases should simply be TARGET_CPUS * so mask in all cases should simply be TARGET_CPUS
*/ */
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void) void __init setup_ioapic_dest(void)
{ {
int pin, ioapic, irq, irq_entry; int pin, ioapic, irq, irq_entry;
...@@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void) ...@@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void)
} }
} }
#endif
...@@ -91,6 +91,7 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask) ...@@ -91,6 +91,7 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask)
{ {
struct msi_desc *entry; struct msi_desc *entry;
struct msg_address address; struct msg_address address;
unsigned int irq = vector;
entry = (struct msi_desc *)msi_desc[vector]; entry = (struct msi_desc *)msi_desc[vector];
if (!entry || !entry->dev) if (!entry || !entry->dev)
...@@ -112,6 +113,7 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask) ...@@ -112,6 +113,7 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask)
entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask);
pci_write_config_dword(entry->dev, msi_lower_address_reg(pos), pci_write_config_dword(entry->dev, msi_lower_address_reg(pos),
address.lo_address.value); address.lo_address.value);
set_native_irq_info(irq, cpu_mask);
break; break;
} }
case PCI_CAP_ID_MSIX: case PCI_CAP_ID_MSIX:
...@@ -125,22 +127,13 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask) ...@@ -125,22 +127,13 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask)
MSI_TARGET_CPU_SHIFT); MSI_TARGET_CPU_SHIFT);
entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask);
writel(address.lo_address.value, entry->mask_base + offset); writel(address.lo_address.value, entry->mask_base + offset);
set_native_irq_info(irq, cpu_mask);
break; break;
} }
default: default:
break; break;
} }
} }
#ifdef CONFIG_IRQBALANCE
static inline void move_msi(int vector)
{
if (!cpus_empty(pending_irq_balance_cpumask[vector])) {
set_msi_affinity(vector, pending_irq_balance_cpumask[vector]);
cpus_clear(pending_irq_balance_cpumask[vector]);
}
}
#endif /* CONFIG_IRQBALANCE */
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
static void mask_MSI_irq(unsigned int vector) static void mask_MSI_irq(unsigned int vector)
...@@ -191,13 +184,13 @@ static void shutdown_msi_irq(unsigned int vector) ...@@ -191,13 +184,13 @@ static void shutdown_msi_irq(unsigned int vector)
static void end_msi_irq_wo_maskbit(unsigned int vector) static void end_msi_irq_wo_maskbit(unsigned int vector)
{ {
move_msi(vector); move_native_irq(vector);
ack_APIC_irq(); ack_APIC_irq();
} }
static void end_msi_irq_w_maskbit(unsigned int vector) static void end_msi_irq_w_maskbit(unsigned int vector)
{ {
move_msi(vector); move_native_irq(vector);
unmask_MSI_irq(vector); unmask_MSI_irq(vector);
ack_APIC_irq(); ack_APIC_irq();
} }
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#define NR_HP_RESERVED_VECTORS 20 #define NR_HP_RESERVED_VECTORS 20
extern int vector_irq[NR_VECTORS]; extern int vector_irq[NR_VECTORS];
extern cpumask_t pending_irq_balance_cpumask[NR_IRQS];
extern void (*interrupt[NR_IRQS])(void); extern void (*interrupt[NR_IRQS])(void);
extern int pci_vector_resources(int last, int nr_released); extern int pci_vector_resources(int last, int nr_released);
...@@ -29,10 +28,6 @@ extern int pci_vector_resources(int last, int nr_released); ...@@ -29,10 +28,6 @@ extern int pci_vector_resources(int last, int nr_released);
#define set_msi_irq_affinity NULL #define set_msi_irq_affinity NULL
#endif #endif
#ifndef CONFIG_IRQBALANCE
static inline void move_msi(int vector) {}
#endif
/* /*
* MSI-X Address Register * MSI-X Address Register
*/ */
......
...@@ -116,13 +116,6 @@ __ia64_local_vector_to_irq (ia64_vector vec) ...@@ -116,13 +116,6 @@ __ia64_local_vector_to_irq (ia64_vector vec)
* and to obtain the irq descriptor for a given irq number. * and to obtain the irq descriptor for a given irq number.
*/ */
/* Return a pointer to the irq descriptor for IRQ. */
static inline irq_desc_t *
irq_descp (int irq)
{
return irq_desc + irq;
}
/* Extract the IA-64 vector that corresponds to IRQ. */ /* Extract the IA-64 vector that corresponds to IRQ. */
static inline ia64_vector static inline ia64_vector
irq_to_vector (int irq) irq_to_vector (int irq)
......
...@@ -30,12 +30,6 @@ extern void disable_irq_nosync (unsigned int); ...@@ -30,12 +30,6 @@ extern void disable_irq_nosync (unsigned int);
extern void enable_irq (unsigned int); extern void enable_irq (unsigned int);
extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
#ifdef CONFIG_SMP
extern void move_irq(int irq);
#else
#define move_irq(irq)
#endif
struct irqaction; struct irqaction;
struct pt_regs; struct pt_regs;
int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
......
...@@ -71,16 +71,139 @@ typedef struct irq_desc { ...@@ -71,16 +71,139 @@ typedef struct irq_desc {
unsigned int irq_count; /* For detecting broken interrupts */ unsigned int irq_count; /* For detecting broken interrupts */
unsigned int irqs_unhandled; unsigned int irqs_unhandled;
spinlock_t lock; spinlock_t lock;
#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
unsigned int move_irq; /* Flag need to re-target intr dest*/
#endif
} ____cacheline_aligned irq_desc_t; } ____cacheline_aligned irq_desc_t;
extern irq_desc_t irq_desc [NR_IRQS]; extern irq_desc_t irq_desc [NR_IRQS];
/* Return a pointer to the irq descriptor for IRQ. */
static inline irq_desc_t *
irq_descp (int irq)
{
return irq_desc + irq;
}
#include <asm/hw_irq.h> /* the arch dependent stuff */ #include <asm/hw_irq.h> /* the arch dependent stuff */
extern int setup_irq(unsigned int irq, struct irqaction * new); extern int setup_irq(unsigned int irq, struct irqaction * new);
#ifdef CONFIG_GENERIC_HARDIRQS #ifdef CONFIG_GENERIC_HARDIRQS
extern cpumask_t irq_affinity[NR_IRQS]; extern cpumask_t irq_affinity[NR_IRQS];
#ifdef CONFIG_SMP
static inline void set_native_irq_info(int irq, cpumask_t mask)
{
irq_affinity[irq] = mask;
}
#else
static inline void set_native_irq_info(int irq, cpumask_t mask)
{
}
#endif
#ifdef CONFIG_SMP
#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
extern cpumask_t pending_irq_cpumask[NR_IRQS];
static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
{
irq_desc_t *desc = irq_desc + irq;
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
desc->move_irq = 1;
pending_irq_cpumask[irq] = mask;
spin_unlock_irqrestore(&desc->lock, flags);
}
static inline void
move_native_irq(int irq)
{
cpumask_t tmp;
irq_desc_t *desc = irq_descp(irq);
if (likely (!desc->move_irq))
return;
desc->move_irq = 0;
if (likely(cpus_empty(pending_irq_cpumask[irq])))
return;
if (!desc->handler->set_affinity)
return;
/* note - we hold the desc->lock */
cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
/*
* If there was a valid mask to work with, please
* do the disable, re-program, enable sequence.
* This is *not* particularly important for level triggered
* but in a edge trigger case, we might be setting rte
* when an active trigger is comming in. This could
* cause some ioapics to mal-function.
* Being paranoid i guess!
*/
if (unlikely(!cpus_empty(tmp))) {
desc->handler->disable(irq);
desc->handler->set_affinity(irq,tmp);
desc->handler->enable(irq);
}
cpus_clear(pending_irq_cpumask[irq]);
}
#ifdef CONFIG_PCI_MSI
/*
* Wonder why these are dummies?
* For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq()
* counter part after translating the vector to irq info. We need to perform
* this operation on the real irq, when we dont use vector, i.e when
* pci_use_vector() is false.
*/
static inline void move_irq(int irq)
{
}
static inline void set_irq_info(int irq, cpumask_t mask)
{
}
#else // CONFIG_PCI_MSI
static inline void move_irq(int irq)
{
move_native_irq(irq);
}
static inline void set_irq_info(int irq, cpumask_t mask)
{
set_native_irq_info(irq, mask);
}
#endif // CONFIG_PCI_MSI
#else // CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE
#define move_irq(x)
#define move_native_irq(x)
#define set_pending_irq(x,y)
static inline void set_irq_info(int irq, cpumask_t mask)
{
set_native_irq_info(irq, mask);
}
#endif // CONFIG_GENERIC_PENDING_IRQ
#else // CONFIG_SMP
#define move_irq(x)
#define move_native_irq(x)
#endif // CONFIG_SMP
extern int no_irq_affinity; extern int no_irq_affinity;
extern int noirqdebug_setup(char *str); extern int noirqdebug_setup(char *str);
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
#endif
/** /**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs) * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* *
......
...@@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS]; ...@@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
*/ */
static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
void __attribute__((weak)) #ifdef CONFIG_GENERIC_PENDING_IRQ
proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
/*
* Save these away for later use. Re-progam when the
* interrupt is pending
*/
set_pending_irq(irq, mask_val);
}
#else
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{ {
irq_affinity[irq] = mask_val; irq_affinity[irq] = mask_val;
irq_desc[irq].handler->set_affinity(irq, mask_val); irq_desc[irq].handler->set_affinity(irq, mask_val);
} }
#endif
static int irq_affinity_read_proc(char *page, char **start, off_t off, static int irq_affinity_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data) int count, int *eof, void *data)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment