Commit a32a8813 authored by Rusty Russell's avatar Rusty Russell

lguest: improve interrupt handling, speed up stream networking

lguest never checked for pending interrupts when enabling interrupts, and
things still worked.  However, it makes a significant difference to TCP
performance, so it's time we fixed it by introducing a pending_irq flag
and checking it on irq_restore and irq_enable.

These two routines are now too big to patch into the 8/10 bytes
patch space, so we drop that code.

Note: The high latency on interrupt delivery had a very curious
effect: once everything else was optimized, networking without GSO was
faster than networking with GSO, since more interrupts were sent and
hence a greater chance of one getting through to the Guest!

Note2: (Almost) Closing the same loophole for iret doesn't have any
measurable effect, so I'm leaving that patch for the moment.

Before:
	1GB tcpblast Guest->Host:		30.7 seconds
	1GB tcpblast Guest->Host (no GSO):	76.0 seconds

After:
	1GB tcpblast Guest->Host:		6.8 seconds
	1GB tcpblast Guest->Host (no GSO):	27.8 seconds
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent abd41f03
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#define LHCALL_LOAD_TLS 16 #define LHCALL_LOAD_TLS 16
#define LHCALL_NOTIFY 17 #define LHCALL_NOTIFY 17
#define LHCALL_LOAD_GDT_ENTRY 18 #define LHCALL_LOAD_GDT_ENTRY 18
#define LHCALL_SEND_INTERRUPTS 19
#define LGUEST_TRAP_ENTRY 0x1F #define LGUEST_TRAP_ENTRY 0x1F
......
...@@ -205,6 +205,12 @@ PV_CALLEE_SAVE_REGS_THUNK(save_fl); ...@@ -205,6 +205,12 @@ PV_CALLEE_SAVE_REGS_THUNK(save_fl);
static void restore_fl(unsigned long flags) static void restore_fl(unsigned long flags)
{ {
lguest_data.irq_enabled = flags; lguest_data.irq_enabled = flags;
mb();
/* Null hcall forces interrupt delivery now, if irq_pending is
* set to X86_EFLAGS_IF (ie. an interrupt is pending, and flags
* enables interrupts. */
if (flags & lguest_data.irq_pending)
kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
} }
PV_CALLEE_SAVE_REGS_THUNK(restore_fl); PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
...@@ -219,6 +225,11 @@ PV_CALLEE_SAVE_REGS_THUNK(irq_disable); ...@@ -219,6 +225,11 @@ PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
static void irq_enable(void) static void irq_enable(void)
{ {
lguest_data.irq_enabled = X86_EFLAGS_IF; lguest_data.irq_enabled = X86_EFLAGS_IF;
mb();
/* Null hcall forces interrupt delivery now. */
if (lguest_data.irq_pending)
kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
} }
PV_CALLEE_SAVE_REGS_THUNK(irq_enable); PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
...@@ -972,10 +983,10 @@ static void lguest_restart(char *reason) ...@@ -972,10 +983,10 @@ static void lguest_restart(char *reason)
* *
* Our current solution is to allow the paravirt back end to optionally patch * Our current solution is to allow the paravirt back end to optionally patch
* over the indirect calls to replace them with something more efficient. We * over the indirect calls to replace them with something more efficient. We
* patch the four most commonly called functions: disable interrupts, enable * patch two of the simplest of the most commonly called functions: disable
* interrupts, restore interrupts and save interrupts. We usually have 6 or 10 * interrupts and save interrupts. We usually have 6 or 10 bytes to patch
* bytes to patch into: the Guest versions of these operations are small enough * into: the Guest versions of these operations are small enough that we can
* that we can fit comfortably. * fit comfortably.
* *
* First we need assembly templates of each of the patchable Guest operations, * First we need assembly templates of each of the patchable Guest operations,
* and these are in i386_head.S. */ * and these are in i386_head.S. */
...@@ -986,8 +997,6 @@ static const struct lguest_insns ...@@ -986,8 +997,6 @@ static const struct lguest_insns
const char *start, *end; const char *start, *end;
} lguest_insns[] = { } lguest_insns[] = {
[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
[PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
[PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
}; };
......
...@@ -46,8 +46,6 @@ ENTRY(lguest_entry) ...@@ -46,8 +46,6 @@ ENTRY(lguest_entry)
.globl lgstart_##name; .globl lgend_##name .globl lgstart_##name; .globl lgend_##name
LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
/*:*/ /*:*/
......
...@@ -189,6 +189,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) ...@@ -189,6 +189,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
/* We stop running once the Guest is dead. */ /* We stop running once the Guest is dead. */
while (!cpu->lg->dead) { while (!cpu->lg->dead) {
unsigned int irq; unsigned int irq;
bool more;
/* First we run any hypercalls the Guest wants done. */ /* First we run any hypercalls the Guest wants done. */
if (cpu->hcall) if (cpu->hcall)
...@@ -213,9 +214,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) ...@@ -213,9 +214,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
/* Check if there are any interrupts which can be delivered now: /* Check if there are any interrupts which can be delivered now:
* if so, this sets up the hander to be executed when we next * if so, this sets up the hander to be executed when we next
* run the Guest. */ * run the Guest. */
irq = interrupt_pending(cpu); irq = interrupt_pending(cpu, &more);
if (irq < LGUEST_IRQS) if (irq < LGUEST_IRQS)
try_deliver_interrupt(cpu, irq); try_deliver_interrupt(cpu, irq, more);
/* All long-lived kernel loops need to check with this horrible /* All long-lived kernel loops need to check with this horrible
* thing called the freezer. If the Host is trying to suspend, * thing called the freezer. If the Host is trying to suspend,
...@@ -233,7 +234,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) ...@@ -233,7 +234,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
/* Just before we sleep, make sure nothing snuck in /* Just before we sleep, make sure nothing snuck in
* which we should be doing. */ * which we should be doing. */
if (interrupt_pending(cpu) < LGUEST_IRQS if (interrupt_pending(cpu, &more) < LGUEST_IRQS
|| cpu->break_out) || cpu->break_out)
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
else else
......
...@@ -37,6 +37,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) ...@@ -37,6 +37,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
/* This call does nothing, except by breaking out of the Guest /* This call does nothing, except by breaking out of the Guest
* it makes us process all the asynchronous hypercalls. */ * it makes us process all the asynchronous hypercalls. */
break; break;
case LHCALL_SEND_INTERRUPTS:
/* This call does nothing too, but by breaking out of the Guest
* it makes us process any pending interrupts. */
break;
case LHCALL_LGUEST_INIT: case LHCALL_LGUEST_INIT:
/* You can't get here unless you're already initialized. Don't /* You can't get here unless you're already initialized. Don't
* do that. */ * do that. */
......
...@@ -131,7 +131,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, ...@@ -131,7 +131,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
* interrupt_pending() returns the first pending interrupt which isn't blocked * interrupt_pending() returns the first pending interrupt which isn't blocked
* by the Guest. It is called before every entry to the Guest, and just before * by the Guest. It is called before every entry to the Guest, and just before
* we go to sleep when the Guest has halted itself. */ * we go to sleep when the Guest has halted itself. */
unsigned int interrupt_pending(struct lg_cpu *cpu) unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
{ {
unsigned int irq; unsigned int irq;
DECLARE_BITMAP(blk, LGUEST_IRQS); DECLARE_BITMAP(blk, LGUEST_IRQS);
...@@ -149,13 +149,14 @@ unsigned int interrupt_pending(struct lg_cpu *cpu) ...@@ -149,13 +149,14 @@ unsigned int interrupt_pending(struct lg_cpu *cpu)
/* Find the first interrupt. */ /* Find the first interrupt. */
irq = find_first_bit(blk, LGUEST_IRQS); irq = find_first_bit(blk, LGUEST_IRQS);
*more = find_next_bit(blk, LGUEST_IRQS, irq+1);
return irq; return irq;
} }
/* This actually diverts the Guest to running an interrupt handler, once an /* This actually diverts the Guest to running an interrupt handler, once an
* interrupt has been identified by interrupt_pending(). */ * interrupt has been identified by interrupt_pending(). */
void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq) void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
{ {
struct desc_struct *idt; struct desc_struct *idt;
...@@ -178,9 +179,13 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq) ...@@ -178,9 +179,13 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
u32 irq_enabled; u32 irq_enabled;
if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled)) if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
irq_enabled = 0; irq_enabled = 0;
if (!irq_enabled) if (!irq_enabled) {
/* Make sure they know an IRQ is pending. */
put_user(X86_EFLAGS_IF,
&cpu->lg->lguest_data->irq_pending);
return; return;
} }
}
/* Look at the IDT entry the Guest gave us for this interrupt. The /* Look at the IDT entry the Guest gave us for this interrupt. The
* first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
...@@ -202,6 +207,11 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq) ...@@ -202,6 +207,11 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
* here is a compromise which means at least it gets updated every * here is a compromise which means at least it gets updated every
* timer interrupt. */ * timer interrupt. */
write_timestamp(cpu); write_timestamp(cpu);
/* If there are no other interrupts we want to deliver, clear
* the pending flag. */
if (!more)
put_user(0, &cpu->lg->lguest_data->irq_pending);
} }
/*:*/ /*:*/
......
...@@ -139,8 +139,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user); ...@@ -139,8 +139,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT)
/* interrupts_and_traps.c: */ /* interrupts_and_traps.c: */
unsigned int interrupt_pending(struct lg_cpu *cpu); unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq); void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
bool deliver_trap(struct lg_cpu *cpu, unsigned int num); bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i, void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
u32 low, u32 hi); u32 low, u32 hi);
......
...@@ -30,6 +30,10 @@ struct lguest_data ...@@ -30,6 +30,10 @@ struct lguest_data
/* Wallclock time set by the Host. */ /* Wallclock time set by the Host. */
struct timespec time; struct timespec time;
/* Interrupt pending set by the Host. The Guest should do a hypercall
* if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */
int irq_pending;
/* Async hypercall ring. Instead of directly making hypercalls, we can /* Async hypercall ring. Instead of directly making hypercalls, we can
* place them in here for processing the next time the Host wants. * place them in here for processing the next time the Host wants.
* This batching can be quite efficient. */ * This batching can be quite efficient. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment