Commit 07ad157f authored by Rusty Russell's avatar Rusty Russell Committed by Linus Torvalds

lguest: the guest code

lguest is a simple hypervisor for Linux on Linux.  Unlike kvm it doesn't need
VT/SVM hardware.  Unlike Xen it's simply "modprobe and go".  Unlike both, it's
5000 lines and self-contained.

Performance is ok, but not great (-30% on kernel compile).  But given its
hackability, I expect this to improve, along with the paravirt_ops code which
it supplies a complete example for.  There's also a 64-bit version being
worked on and other craziness.

But most of all, lguest is awesome fun!  Too much of the kernel is a big ball
of hair.  lguest is simple enough to dive into and hack, plus has some warts
which scream "fork me!".

This patch:

This is the code and headers required to make an i386 kernel an lguest guest.
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5992b6da
This diff is collapsed.
#include <linux/linkage.h>
#include <linux/lguest.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
/* FIXME: Once asm/processor-flags.h goes in, include that */
#define X86_EFLAGS_IF 0x00000200
/*
* This is where we begin: we have a magic signature which the launcher looks
* for. The plan is that the Linux boot protocol will be extended with a
* "platform type" field which will guide us here from the normal entry point,
* but for the moment this suffices.
*
* We put it in .init.text will be discarded after boot.
*/
.section .init.text, "ax", @progbits
.ascii "GenuineLguest"
/* Set up initial stack. */
movl $(init_thread_union+THREAD_SIZE),%esp
jmp lguest_init
/* The templates for inline patching. */
#define LGUEST_PATCH(name, insns...) \
lgstart_##name: insns; lgend_##name:; \
.globl lgstart_##name; .globl lgend_##name
LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
.text
/* These demark the EIP range where host should never deliver interrupts. */
.global lguest_noirq_start
.global lguest_noirq_end
/*
* We move eflags word to lguest_data.irq_enabled to restore interrupt state.
* For page faults, gpfs and virtual interrupts, the hypervisor has saved
* eflags manually, otherwise it was delivered directly and so eflags reflects
* the real machine IF state, ie. interrupts on. Since the kernel always dies
* if it takes such a trap with interrupts disabled anyway, turning interrupts
* back on unconditionally here is OK.
*/
ENTRY(lguest_iret)
pushl %eax
movl 12(%esp), %eax
lguest_noirq_start:
movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
popl %eax
iret
lguest_noirq_end:
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/lguest_bus.h>
#include <asm/io.h>
static ssize_t type_show(struct device *_dev,
struct device_attribute *attr, char *buf)
{
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
return sprintf(buf, "%hu", lguest_devices[dev->index].type);
}
static ssize_t features_show(struct device *_dev,
struct device_attribute *attr, char *buf)
{
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
return sprintf(buf, "%hx", lguest_devices[dev->index].features);
}
static ssize_t pfn_show(struct device *_dev,
struct device_attribute *attr, char *buf)
{
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
return sprintf(buf, "%u", lguest_devices[dev->index].pfn);
}
static ssize_t status_show(struct device *_dev,
struct device_attribute *attr, char *buf)
{
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
return sprintf(buf, "%hx", lguest_devices[dev->index].status);
}
static ssize_t status_store(struct device *_dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) != 1)
return -EINVAL;
return count;
}
static struct device_attribute lguest_dev_attrs[] = {
__ATTR_RO(type),
__ATTR_RO(features),
__ATTR_RO(pfn),
__ATTR(status, 0644, status_show, status_store),
__ATTR_NULL
};
static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
{
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv);
return (drv->device_type == lguest_devices[dev->index].type);
}
struct lguest_bus {
struct bus_type bus;
struct device dev;
};
static struct lguest_bus lguest_bus = {
.bus = {
.name = "lguest",
.match = lguest_dev_match,
.dev_attrs = lguest_dev_attrs,
},
.dev = {
.parent = NULL,
.bus_id = "lguest",
}
};
static int lguest_dev_probe(struct device *_dev)
{
int ret;
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
struct lguest_driver *drv = container_of(dev->dev.driver,
struct lguest_driver, drv);
lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
ret = drv->probe(dev);
if (ret == 0)
lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK;
return ret;
}
int register_lguest_driver(struct lguest_driver *drv)
{
if (!lguest_devices)
return 0;
drv->drv.bus = &lguest_bus.bus;
drv->drv.name = drv->name;
drv->drv.owner = drv->owner;
drv->drv.probe = lguest_dev_probe;
return driver_register(&drv->drv);
}
EXPORT_SYMBOL_GPL(register_lguest_driver);
static void add_lguest_device(unsigned int index)
{
struct lguest_device *new;
lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
if (!new) {
printk(KERN_EMERG "Cannot allocate lguest device %u\n", index);
lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
return;
}
new->index = index;
new->private = NULL;
memset(&new->dev, 0, sizeof(new->dev));
new->dev.parent = &lguest_bus.dev;
new->dev.bus = &lguest_bus.bus;
sprintf(new->dev.bus_id, "%u", index);
if (device_register(&new->dev) != 0) {
printk(KERN_EMERG "Cannot register lguest device %u\n", index);
lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
kfree(new);
}
}
static void scan_devices(void)
{
unsigned int i;
for (i = 0; i < LGUEST_MAX_DEVICES; i++)
if (lguest_devices[i].type)
add_lguest_device(i);
}
static int __init lguest_bus_init(void)
{
if (strcmp(paravirt_ops.name, "lguest") != 0)
return 0;
/* Devices are in page above top of "normal" mem. */
lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
if (bus_register(&lguest_bus.bus) != 0
|| device_register(&lguest_bus.dev) != 0)
panic("lguest bus registration failed");
scan_devices();
return 0;
}
postcore_initcall(lguest_bus_init);
/* Things the lguest guest needs to know. Note: like all lguest interfaces,
* this is subject to wild and random change between versions. */
#ifndef _ASM_LGUEST_H
#define _ASM_LGUEST_H
/* These are randomly chosen numbers which indicate we're an lguest at boot */
#define LGUEST_MAGIC_EBP 0x4C687970
#define LGUEST_MAGIC_EDI 0x652D4D65
#define LGUEST_MAGIC_ESI 0xFFFFFFFF
#ifndef __ASSEMBLY__
#include <asm/irq.h>
#define LHCALL_FLUSH_ASYNC 0
#define LHCALL_LGUEST_INIT 1
#define LHCALL_CRASH 2
#define LHCALL_LOAD_GDT 3
#define LHCALL_NEW_PGTABLE 4
#define LHCALL_FLUSH_TLB 5
#define LHCALL_LOAD_IDT_ENTRY 6
#define LHCALL_SET_STACK 7
#define LHCALL_TS 8
#define LHCALL_TIMER_READ 9
#define LHCALL_HALT 10
#define LHCALL_GET_WALLCLOCK 11
#define LHCALL_BIND_DMA 12
#define LHCALL_SEND_DMA 13
#define LHCALL_SET_PTE 14
#define LHCALL_SET_PMD 15
#define LHCALL_LOAD_TLS 16
#define LGUEST_TRAP_ENTRY 0x1F
static inline unsigned long
hcall(unsigned long call,
unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
: "=a"(call)
: "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
: "memory");
return call;
}
void async_hcall(unsigned long call,
unsigned long arg1, unsigned long arg2, unsigned long arg3);
/* Can't use our min() macro here: needs to be a constant */
#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
#define LHCALL_RING_SIZE 64
struct hcall_ring
{
u32 eax, edx, ebx, ecx;
};
/* All the good stuff happens here: guest registers it with LGUEST_INIT */
struct lguest_data
{
/* Fields which change during running: */
/* 512 == enabled (same as eflags) */
unsigned int irq_enabled;
/* Interrupts blocked by guest. */
DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS);
/* Virtual address of page fault. */
unsigned long cr2;
/* Async hypercall ring. 0xFF == done, 0 == pending. */
u8 hcall_status[LHCALL_RING_SIZE];
struct hcall_ring hcalls[LHCALL_RING_SIZE];
/* Fields initialized by the hypervisor at boot: */
/* Memory not to try to access */
unsigned long reserve_mem;
/* ID of this guest (used by network driver to set ethernet address) */
u16 guestid;
/* Fields initialized by the guest at boot: */
/* Instruction range to suppress interrupts even if enabled */
unsigned long noirq_start, noirq_end;
};
extern struct lguest_data lguest_data;
#endif /* __ASSEMBLY__ */
#endif /* _ASM_LGUEST_H */
#ifndef _ASM_LGUEST_DEVICE_H
#define _ASM_LGUEST_DEVICE_H
/* Everything you need to know about lguest devices. */
#include <linux/device.h>
#include <linux/lguest.h>
#include <linux/lguest_launcher.h>
struct lguest_device {
/* Unique busid, and index into lguest_page->devices[] */
unsigned int index;
struct device dev;
/* Driver can hang data off here. */
void *private;
};
/* By convention, each device can use irq index+1 if it wants to. */
static inline int lgdev_irq(const struct lguest_device *dev)
{
return dev->index + 1;
}
/* dma args must not be vmalloced! */
void lguest_send_dma(unsigned long key, struct lguest_dma *dma);
int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
unsigned int num, u8 irq);
void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas);
/* Map the virtual device space */
void *lguest_map(unsigned long phys_addr, unsigned long pages);
void lguest_unmap(void *);
struct lguest_driver {
const char *name;
struct module *owner;
u16 device_type;
int (*probe)(struct lguest_device *dev);
void (*remove)(struct lguest_device *dev);
struct device_driver drv;
};
extern int register_lguest_driver(struct lguest_driver *drv);
extern void unregister_lguest_driver(struct lguest_driver *drv);
extern struct lguest_device_desc *lguest_devices; /* Just past max_pfn */
#endif /* _ASM_LGUEST_DEVICE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment