Commit b2b47c21 authored by Rusty Russell's avatar Rusty Russell Committed by Linus Torvalds

lguest: documentation II: Guest

Documentation: The Guest
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f938d2c8
This diff is collapsed.
...@@ -4,15 +4,15 @@ ...@@ -4,15 +4,15 @@
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
/* /*G:020 This is where we begin: we have a magic signature which the launcher
* This is where we begin: we have a magic signature which the launcher looks * looks for. The plan is that the Linux boot protocol will be extended with a
* for. The plan is that the Linux boot protocol will be extended with a
* "platform type" field which will guide us here from the normal entry point, * "platform type" field which will guide us here from the normal entry point,
* but for the moment this suffices. We pass the virtual address of the boot * but for the moment this suffices. The normal boot code uses %esi for the
* info to lguest_init(). * boot header, so we do too. We convert it to a virtual address by adding
* PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax).
* *
* We put it in .init.text will be discarded after boot. * The .section line puts this code in .init.text so it will be discarded after
*/ * boot. */
.section .init.text, "ax", @progbits .section .init.text, "ax", @progbits
.ascii "GenuineLguest" .ascii "GenuineLguest"
/* Set up initial stack. */ /* Set up initial stack. */
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
addl $__PAGE_OFFSET, %eax addl $__PAGE_OFFSET, %eax
jmp lguest_init jmp lguest_init
/* The templates for inline patching. */ /*G:055 We create a macro which puts the assembler code between lgstart_ and
* lgend_ markers. These templates end up in the .init.text section, so they
* are discarded after boot. */
#define LGUEST_PATCH(name, insns...) \ #define LGUEST_PATCH(name, insns...) \
lgstart_##name: insns; lgend_##name:; \ lgstart_##name: insns; lgend_##name:; \
.globl lgstart_##name; .globl lgend_##name .globl lgstart_##name; .globl lgend_##name
...@@ -30,24 +32,47 @@ LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) ...@@ -30,24 +32,47 @@ LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
/*:*/
.text .text
/* These demark the EIP range where host should never deliver interrupts. */ /* These demark the EIP range where host should never deliver interrupts. */
.global lguest_noirq_start .global lguest_noirq_start
.global lguest_noirq_end .global lguest_noirq_end
/* /*G:045 There is one final paravirt_op that the Guest implements, and glancing
* We move eflags word to lguest_data.irq_enabled to restore interrupt state. * at it you can see why I left it to last. It's *cool*! It's in *assembler*!
* For page faults, gpfs and virtual interrupts, the hypervisor has saved *
* eflags manually, otherwise it was delivered directly and so eflags reflects * The "iret" instruction is used to return from an interrupt or trap. The
* the real machine IF state, ie. interrupts on. Since the kernel always dies * stack looks like this:
* if it takes such a trap with interrupts disabled anyway, turning interrupts * old address
* back on unconditionally here is OK. * old code segment & privilege level
*/ * old processor flags ("eflags")
*
* The "iret" instruction pops those values off the stack and restores them all
* at once. The only problem is that eflags includes the Interrupt Flag which
* the Guest can't change: the CPU will simply ignore it when we do an "iret".
* So we have to copy eflags from the stack to lguest_data.irq_enabled before
* we do the "iret".
*
* There are two problems with this: firstly, we need to use a register to do
* the copy and secondly, the whole thing needs to be atomic. The first
* problem is easy to solve: push %eax on the stack so we can use it, and then
* restore it at the end just before the real "iret".
*
* The second is harder: copying eflags to lguest_data.irq_enabled will turn
* interrupts on before we're finished, so we could be interrupted before we
* return to userspace or wherever. Our solution to this is to surround the
* code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the
* Host that it is *never* to interrupt us there, even if interrupts seem to be
* enabled. */
ENTRY(lguest_iret) ENTRY(lguest_iret)
pushl %eax pushl %eax
movl 12(%esp), %eax movl 12(%esp), %eax
lguest_noirq_start: lguest_noirq_start:
/* Note the %ss: segment prefix here. Normal data accesses use the
* "ds" segment, but that will have already been restored for whatever
* we're returning to (such as userspace): we can't trust it. The %ss:
* prefix makes sure we use the stack segment, which is still valid. */
movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
popl %eax popl %eax
iret iret
......
...@@ -27,18 +27,38 @@ ...@@ -27,18 +27,38 @@
#define LG_CLOCK_MIN_DELTA 100UL #define LG_CLOCK_MIN_DELTA 100UL
#define LG_CLOCK_MAX_DELTA ULONG_MAX #define LG_CLOCK_MAX_DELTA ULONG_MAX
/*G:031 First, how does our Guest contact the Host to ask for privileged
* operations? There are two ways: the direct way is to make a "hypercall",
* to make requests of the Host Itself.
*
* Our hypercall mechanism uses the highest unused trap code (traps 32 and
* above are used by real hardware interrupts). Seventeen hypercalls are
* available: the hypercall number is put in the %eax register, and the
* arguments (when required) are placed in %edx, %ebx and %ecx. If a return
* value makes sense, it's returned in %eax.
*
* Grossly invalid calls result in Sudden Death at the hands of the vengeful
* Host, rather than returning failure. This reflects Winston Churchill's
* definition of a gentleman: "someone who is only rude intentionally". */
#define LGUEST_TRAP_ENTRY 0x1F #define LGUEST_TRAP_ENTRY 0x1F
static inline unsigned long static inline unsigned long
hcall(unsigned long call, hcall(unsigned long call,
unsigned long arg1, unsigned long arg2, unsigned long arg3) unsigned long arg1, unsigned long arg2, unsigned long arg3)
{ {
/* "int" is the Intel instruction to trigger a trap. */
asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
/* The call is in %eax (aka "a"), and can be replaced */
: "=a"(call) : "=a"(call)
/* The other arguments are in %eax, %edx, %ebx & %ecx */
: "a"(call), "d"(arg1), "b"(arg2), "c"(arg3) : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
/* "memory" means this might write somewhere in memory.
* This isn't true for all calls, but it's safe to tell
* gcc that it might happen so it doesn't get clever. */
: "memory"); : "memory");
return call; return call;
} }
/*:*/
void async_hcall(unsigned long call, void async_hcall(unsigned long call,
unsigned long arg1, unsigned long arg2, unsigned long arg3); unsigned long arg1, unsigned long arg2, unsigned long arg3);
...@@ -52,31 +72,40 @@ struct hcall_ring ...@@ -52,31 +72,40 @@ struct hcall_ring
u32 eax, edx, ebx, ecx; u32 eax, edx, ebx, ecx;
}; };
/* All the good stuff happens here: guest registers it with LGUEST_INIT */ /*G:032 The second method of communicating with the Host is to via "struct
* lguest_data". The Guest's very first hypercall is to tell the Host where
* this is, and then the Guest and Host both publish information in it. :*/
struct lguest_data struct lguest_data
{ {
/* Fields which change during running: */ /* 512 == enabled (same as eflags in normal hardware). The Guest
/* 512 == enabled (same as eflags) */ * changes interrupts so often that a hypercall is too slow. */
unsigned int irq_enabled; unsigned int irq_enabled;
/* Interrupts blocked by guest. */ /* Fine-grained interrupt disabling by the Guest */
DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS); DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS);
/* Virtual address of page fault. */ /* The Host writes the virtual address of the last page fault here,
* which saves the Guest a hypercall. CR2 is the native register where
* this address would normally be found. */
unsigned long cr2; unsigned long cr2;
/* Async hypercall ring. 0xFF == done, 0 == pending. */ /* Async hypercall ring. Instead of directly making hypercalls, we can
* place them in here for processing the next time the Host wants.
* This batching can be quite efficient. */
/* 0xFF == done (set by Host), 0 == pending (set by Guest). */
u8 hcall_status[LHCALL_RING_SIZE]; u8 hcall_status[LHCALL_RING_SIZE];
/* The actual registers for the hypercalls. */
struct hcall_ring hcalls[LHCALL_RING_SIZE]; struct hcall_ring hcalls[LHCALL_RING_SIZE];
/* Fields initialized by the hypervisor at boot: */ /* Fields initialized by the Host at boot: */
/* Memory not to try to access */ /* Memory not to try to access */
unsigned long reserve_mem; unsigned long reserve_mem;
/* ID of this guest (used by network driver to set ethernet address) */ /* ID of this Guest (used by network driver to set ethernet address) */
u16 guestid; u16 guestid;
/* KHz for the TSC clock. */ /* KHz for the TSC clock. */
u32 tsc_khz; u32 tsc_khz;
/* Fields initialized by the guest at boot: */ /* Fields initialized by the Guest at boot: */
/* Instruction range to suppress interrupts even if enabled */ /* Instruction range to suppress interrupts even if enabled */
unsigned long noirq_start, noirq_end; unsigned long noirq_start, noirq_end;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment