Commit e2c97843 authored by Rusty Russell's avatar Rusty Russell Committed by Linus Torvalds

lguest: documentation III: Drivers

Documentation: The Drivers
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b2b47c21
This diff is collapsed.
/* Simple console for lguest. /*D:300
* The Guest console driver
* *
* Copyright (C) 2006 Rusty Russell, IBM Corporation * This is a trivial console driver: we use lguest's DMA mechanism to send
* bytes out, and register a DMA buffer to receive bytes in. It is assumed to
* be present and available from the very beginning of boot.
*
* Writing console drivers is one of the few remaining Dark Arts in Linux.
* Fortunately for us, the path of virtual consoles has been well-trodden by
* the PowerPC folks, who wrote "hvc_console.c" to generically support any
* virtual console. We use that infrastructure which only requires us to write
* the basic put_chars and get_chars functions and call the right register
* functions.
:*/
/* Copyright (C) 2006 Rusty Russell, IBM Corporation
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -21,49 +34,81 @@ ...@@ -21,49 +34,81 @@
#include <linux/lguest_bus.h> #include <linux/lguest_bus.h>
#include "hvc_console.h" #include "hvc_console.h"
/*D:340 This is our single console input buffer, with associated "struct
* lguest_dma" referring to it. Note the 0-terminated length array, and the
* use of physical address for the buffer itself. */
static char inbuf[256]; static char inbuf[256];
static struct lguest_dma cons_input = { .used_len = 0, static struct lguest_dma cons_input = { .used_len = 0,
.addr[0] = __pa(inbuf), .addr[0] = __pa(inbuf),
.len[0] = sizeof(inbuf), .len[0] = sizeof(inbuf),
.len[1] = 0 }; .len[1] = 0 };
/*D:310 The put_chars() callback is pretty straightforward.
*
* First we put the pointer and length in a "struct lguest_dma": we only have
* one pointer, so we set the second length to 0. Then we use SEND_DMA to send
* the data to (Host) buffers attached to the console key. Usually a device's
* key is a physical address within the device's memory, but because the
* console device doesn't have any associated physical memory, we use the
* LGUEST_CONSOLE_DMA_KEY constant (aka 0). */
static int put_chars(u32 vtermno, const char *buf, int count) static int put_chars(u32 vtermno, const char *buf, int count)
{ {
struct lguest_dma dma; struct lguest_dma dma;
/* FIXME: what if it's over a page boundary? */ /* FIXME: DMA buffers in a "struct lguest_dma" are not allowed
* to go over page boundaries. This never seems to happen,
* but if it did we'd need to fix this code. */
dma.len[0] = count; dma.len[0] = count;
dma.len[1] = 0; dma.len[1] = 0;
dma.addr[0] = __pa(buf); dma.addr[0] = __pa(buf);
lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma); lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma);
/* We're expected to return the amount of data we wrote: all of it. */
return count; return count;
} }
/*D:350 get_chars() is the callback from the hvc_console infrastructure when
* an interrupt is received.
*
* Firstly we see if our buffer has been filled: if not, we return. The rest
* of the code deals with the fact that the hvc_console() infrastructure only
* asks us for 16 bytes at a time. We keep a "cons_offset" variable for
* partially-read buffers. */
static int get_chars(u32 vtermno, char *buf, int count) static int get_chars(u32 vtermno, char *buf, int count)
{ {
static int cons_offset; static int cons_offset;
/* Nothing left to see here... */
if (!cons_input.used_len) if (!cons_input.used_len)
return 0; return 0;
/* You want more than we have to give? Well, try wanting less! */
if (cons_input.used_len - cons_offset < count) if (cons_input.used_len - cons_offset < count)
count = cons_input.used_len - cons_offset; count = cons_input.used_len - cons_offset;
/* Copy across to their buffer and increment offset. */
memcpy(buf, inbuf + cons_offset, count); memcpy(buf, inbuf + cons_offset, count);
cons_offset += count; cons_offset += count;
/* Finished? Zero offset, and reset cons_input so Host will use it
* again. */
if (cons_offset == cons_input.used_len) { if (cons_offset == cons_input.used_len) {
cons_offset = 0; cons_offset = 0;
cons_input.used_len = 0; cons_input.used_len = 0;
} }
return count; return count;
} }
/*:*/
static struct hv_ops lguest_cons = { static struct hv_ops lguest_cons = {
.get_chars = get_chars, .get_chars = get_chars,
.put_chars = put_chars, .put_chars = put_chars,
}; };
/*D:320 Console drivers are initialized very early so boot messages can go
* out. At this stage, the console is output-only. Our driver checks we're a
* Guest, and if so hands hvc_instantiate() the console number (0), priority
* (0), and the struct hv_ops containing the put_chars() function. */
static int __init cons_init(void) static int __init cons_init(void)
{ {
if (strcmp(paravirt_ops.name, "lguest") != 0) if (strcmp(paravirt_ops.name, "lguest") != 0)
...@@ -73,21 +118,46 @@ static int __init cons_init(void) ...@@ -73,21 +118,46 @@ static int __init cons_init(void)
} }
console_initcall(cons_init); console_initcall(cons_init);
/*D:370 To set up and manage our virtual console, we call hvc_alloc() and
* stash the result in the private pointer of the "struct lguest_device".
* Since we never remove the console device we never need this pointer again,
* but using ->private is considered good form, and you never know who's going
* to copy your driver.
*
* Once the console is set up, we bind our input buffer ready for input. */
static int lguestcons_probe(struct lguest_device *lgdev) static int lguestcons_probe(struct lguest_device *lgdev)
{ {
int err; int err;
/* The first argument of hvc_alloc() is the virtual console number, so
* we use zero. The second argument is the interrupt number.
*
* The third argument is a "struct hv_ops" containing the put_chars()
* and get_chars() pointers. The final argument is the output buffer
* size: we use 256 and expect the Host to have room for us to send
* that much. */
lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256); lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256);
if (IS_ERR(lgdev->private)) if (IS_ERR(lgdev->private))
return PTR_ERR(lgdev->private); return PTR_ERR(lgdev->private);
/* We bind a single DMA buffer at key LGUEST_CONSOLE_DMA_KEY.
* "cons_input" is that statically-initialized global DMA buffer we saw
* above, and we also give the interrupt we want. */
err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1, err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1,
lgdev_irq(lgdev)); lgdev_irq(lgdev));
if (err) if (err)
printk("lguest console: failed to bind buffer.\n"); printk("lguest console: failed to bind buffer.\n");
return err; return err;
} }
/* Note the use of lgdev_irq() for the interrupt number. We tell hvc_alloc()
* to expect input when this interrupt is triggered, and then tell
* lguest_bind_dma() that is the interrupt to send us when input comes in. */
/*D:360 From now on the console driver follows standard Guest driver form:
* register_lguest_driver() registers the device type and probe function, and
* the probe function sets up the device.
*
* The standard "struct lguest_driver": */
static struct lguest_driver lguestcons_drv = { static struct lguest_driver lguestcons_drv = {
.name = "lguestcons", .name = "lguestcons",
.owner = THIS_MODULE, .owner = THIS_MODULE,
...@@ -95,6 +165,7 @@ static struct lguest_driver lguestcons_drv = { ...@@ -95,6 +165,7 @@ static struct lguest_driver lguestcons_drv = {
.probe = lguestcons_probe, .probe = lguestcons_probe,
}; };
/* The standard init function */
static int __init hvc_lguest_init(void) static int __init hvc_lguest_init(void)
{ {
return register_lguest_driver(&lguestcons_drv); return register_lguest_driver(&lguestcons_drv);
......
...@@ -46,6 +46,10 @@ static struct device_attribute lguest_dev_attrs[] = { ...@@ -46,6 +46,10 @@ static struct device_attribute lguest_dev_attrs[] = {
__ATTR_NULL __ATTR_NULL
}; };
/*D:130 The generic bus infrastructure requires a function which says whether a
* device matches a driver. For us, it is simple: "struct lguest_driver"
* contains a "device_type" field which indicates what type of device it can
* handle, so we just cast the args and compare: */
static int lguest_dev_match(struct device *_dev, struct device_driver *_drv) static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
{ {
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
...@@ -53,6 +57,7 @@ static int lguest_dev_match(struct device *_dev, struct device_driver *_drv) ...@@ -53,6 +57,7 @@ static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
return (drv->device_type == lguest_devices[dev->index].type); return (drv->device_type == lguest_devices[dev->index].type);
} }
/*:*/
struct lguest_bus { struct lguest_bus {
struct bus_type bus; struct bus_type bus;
...@@ -71,11 +76,24 @@ static struct lguest_bus lguest_bus = { ...@@ -71,11 +76,24 @@ static struct lguest_bus lguest_bus = {
} }
}; };
/*D:140 This is the callback which occurs once the bus infrastructure matches
* up a device and driver, ie. in response to add_lguest_device() calling
* device_register(), or register_lguest_driver() calling driver_register().
*
* At the moment it's always the latter: the devices are added first, since
* scan_devices() is called from a "core_initcall", and the drivers themselves
* called later as a normal "initcall". But it would work the other way too.
*
* So now we have the happy couple, we add the status bit to indicate that we
* found a driver. If the driver truly loves the device, it will return
* happiness from its probe function (ok, perhaps this wasn't my greatest
* analogy), and we set the final "driver ok" bit so the Host sees it's all
* green. */
static int lguest_dev_probe(struct device *_dev) static int lguest_dev_probe(struct device *_dev)
{ {
int ret; int ret;
struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); struct lguest_device*dev = container_of(_dev,struct lguest_device,dev);
struct lguest_driver *drv = container_of(dev->dev.driver, struct lguest_driver*drv = container_of(dev->dev.driver,
struct lguest_driver, drv); struct lguest_driver, drv);
lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER; lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
...@@ -85,6 +103,10 @@ static int lguest_dev_probe(struct device *_dev) ...@@ -85,6 +103,10 @@ static int lguest_dev_probe(struct device *_dev)
return ret; return ret;
} }
/* The last part of the bus infrastructure is the function lguest drivers use
* to register themselves. Firstly, we do nothing if there's no lguest bus
* (ie. this is not a Guest), otherwise we fill in the embedded generic "struct
* driver" fields and call the generic driver_register(). */
int register_lguest_driver(struct lguest_driver *drv) int register_lguest_driver(struct lguest_driver *drv)
{ {
if (!lguest_devices) if (!lguest_devices)
...@@ -97,12 +119,36 @@ int register_lguest_driver(struct lguest_driver *drv) ...@@ -97,12 +119,36 @@ int register_lguest_driver(struct lguest_driver *drv)
return driver_register(&drv->drv); return driver_register(&drv->drv);
} }
/* At the moment we build all the drivers into the kernel because they're so
* simple: 8144 bytes for all three of them as I type this. And as the console
* really needs to be built in, it's actually only 3527 bytes for the network
* and block drivers.
*
* If they get complex it will make sense for them to be modularized, so we
* need to explicitly export the symbol.
*
* I don't think non-GPL modules make sense, so it's a GPL-only export.
*/
EXPORT_SYMBOL_GPL(register_lguest_driver); EXPORT_SYMBOL_GPL(register_lguest_driver);
/*D:120 This is the core of the lguest bus: actually adding a new device.
* It's a separate function because it's neater that way, and because an
* earlier version of the code supported hotplug and unplug. They were removed
* early on because they were never used.
*
* As Andrew Tridgell says, "Untested code is buggy code".
*
* It's worth reading this carefully: we start with an index into the array of
* "struct lguest_device_desc"s indicating the device which is new: */
static void add_lguest_device(unsigned int index) static void add_lguest_device(unsigned int index)
{ {
struct lguest_device *new; struct lguest_device *new;
/* Each "struct lguest_device_desc" has a "status" field, which the
* Guest updates as the device is probed. In the worst case, the Host
* can look at these bits to tell what part of device setup failed,
* even if the console isn't available. */
lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE; lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL); new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
if (!new) { if (!new) {
...@@ -111,12 +157,17 @@ static void add_lguest_device(unsigned int index) ...@@ -111,12 +157,17 @@ static void add_lguest_device(unsigned int index)
return; return;
} }
/* The "struct lguest_device" setup is pretty straight-forward example
* code. */
new->index = index; new->index = index;
new->private = NULL; new->private = NULL;
memset(&new->dev, 0, sizeof(new->dev)); memset(&new->dev, 0, sizeof(new->dev));
new->dev.parent = &lguest_bus.dev; new->dev.parent = &lguest_bus.dev;
new->dev.bus = &lguest_bus.bus; new->dev.bus = &lguest_bus.bus;
sprintf(new->dev.bus_id, "%u", index); sprintf(new->dev.bus_id, "%u", index);
/* device_register() causes the bus infrastructure to look for a
* matching driver. */
if (device_register(&new->dev) != 0) { if (device_register(&new->dev) != 0) {
printk(KERN_EMERG "Cannot register lguest device %u\n", index); printk(KERN_EMERG "Cannot register lguest device %u\n", index);
lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
...@@ -124,6 +175,9 @@ static void add_lguest_device(unsigned int index) ...@@ -124,6 +175,9 @@ static void add_lguest_device(unsigned int index)
} }
} }
/*D:110 scan_devices() simply iterates through the device array. The type 0
* is reserved to mean "no device", and anything else means we have found a
* device: add it. */
static void scan_devices(void) static void scan_devices(void)
{ {
unsigned int i; unsigned int i;
...@@ -133,12 +187,23 @@ static void scan_devices(void) ...@@ -133,12 +187,23 @@ static void scan_devices(void)
add_lguest_device(i); add_lguest_device(i);
} }
/*D:100 Fairly early in boot, lguest_bus_init() is called to set up the lguest
* bus. We check that we are a Guest by checking paravirt_ops.name: there are
* other ways of checking, but this seems most obvious to me.
*
* So we can access the array of "struct lguest_device_desc"s easily, we map
* that memory and store the pointer in the global "lguest_devices". Then we
* register the bus with the core. Doing two registrations seems clunky to me,
* but it seems to be the correct sysfs incantation.
*
* Finally we call scan_devices() which adds all the devices found in the
* "struct lguest_device_desc" array. */
static int __init lguest_bus_init(void) static int __init lguest_bus_init(void)
{ {
if (strcmp(paravirt_ops.name, "lguest") != 0) if (strcmp(paravirt_ops.name, "lguest") != 0)
return 0; return 0;
/* Devices are in page above top of "normal" mem. */ /* Devices are in a single page above top of "normal" mem */
lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
if (bus_register(&lguest_bus.bus) != 0 if (bus_register(&lguest_bus.bus) != 0
...@@ -148,4 +213,5 @@ static int __init lguest_bus_init(void) ...@@ -148,4 +213,5 @@ static int __init lguest_bus_init(void)
scan_devices(); scan_devices();
return 0; return 0;
} }
/* Do this after core stuff, before devices. */
postcore_initcall(lguest_bus_init); postcore_initcall(lguest_bus_init);
This diff is collapsed.
...@@ -15,11 +15,14 @@ struct lguest_device { ...@@ -15,11 +15,14 @@ struct lguest_device {
void *private; void *private;
}; };
/* By convention, each device can use irq index+1 if it wants to. */ /*D:380 Since interrupt numbers are arbitrary, we use a convention: each device
* can use the interrupt number corresponding to its index. The +1 is because
* interrupt 0 is not usable (it's actually the timer interrupt). */
static inline int lgdev_irq(const struct lguest_device *dev) static inline int lgdev_irq(const struct lguest_device *dev)
{ {
return dev->index + 1; return dev->index + 1;
} }
/*:*/
/* dma args must not be vmalloced! */ /* dma args must not be vmalloced! */
void lguest_send_dma(unsigned long key, struct lguest_dma *dma); void lguest_send_dma(unsigned long key, struct lguest_dma *dma);
......
...@@ -9,14 +9,45 @@ ...@@ -9,14 +9,45 @@
/* How many devices? Assume each one wants up to two dma arrays per device. */ /* How many devices? Assume each one wants up to two dma arrays per device. */
#define LGUEST_MAX_DEVICES (LGUEST_MAX_DMA/2) #define LGUEST_MAX_DEVICES (LGUEST_MAX_DMA/2)
/*D:200
* Lguest I/O
*
* The lguest I/O mechanism is the only way Guests can talk to devices. There
* are two hypercalls involved: SEND_DMA for output and BIND_DMA for input. In
* each case, "struct lguest_dma" describes the buffer: this contains 16
* addr/len pairs, and if there are fewer buffer elements the len array is
* terminated with a 0.
*
* I/O is organized by keys: BIND_DMA attaches buffers to a particular key, and
* SEND_DMA transfers to buffers bound to particular key. By convention, keys
* correspond to a physical address within the device's page. This means that
* devices will never accidentally end up with the same keys, and allows the
* Host use The Futex Trick (as we'll see later in our journey).
*
* SEND_DMA simply indicates a key to send to, and the physical address of the
* "struct lguest_dma" to send. The Host will write the number of bytes
* transferred into the "struct lguest_dma"'s used_len member.
*
* BIND_DMA indicates a key to bind to, a pointer to an array of "struct
* lguest_dma"s ready for receiving, the size of that array, and an interrupt
* to trigger when data is received. The Host will only allow transfers into
* buffers with a used_len of zero: it then sets used_len to the number of
* bytes transferred and triggers the interrupt for the Guest to process the
* new input. */
struct lguest_dma struct lguest_dma
{ {
/* 0 if free to be used, filled by hypervisor. */ /* 0 if free to be used, filled by the Host. */
u32 used_len; u32 used_len;
unsigned long addr[LGUEST_MAX_DMA_SECTIONS]; unsigned long addr[LGUEST_MAX_DMA_SECTIONS];
u16 len[LGUEST_MAX_DMA_SECTIONS]; u16 len[LGUEST_MAX_DMA_SECTIONS];
}; };
/*:*/
/*D:460 This is the layout of a block device memory page. The Launcher sets up
* the num_sectors initially to tell the Guest the size of the disk. The Guest
* puts the type, sector and length of the request in the first three fields,
* then DMAs to the Host. The Host processes the request, sets up the result,
* then DMAs back to the Guest. */
struct lguest_block_page struct lguest_block_page
{ {
/* 0 is a read, 1 is a write. */ /* 0 is a read, 1 is a write. */
...@@ -28,27 +59,47 @@ struct lguest_block_page ...@@ -28,27 +59,47 @@ struct lguest_block_page
u32 num_sectors; /* Disk length = num_sectors * 512 */ u32 num_sectors; /* Disk length = num_sectors * 512 */
}; };
/* There is a shared page of these. */ /*D:520 The network device is basically a memory page where all the Guests on
* the network publish their MAC (ethernet) addresses: it's an array of "struct
* lguest_net": */
struct lguest_net struct lguest_net
{ {
/* Simply the mac address (with multicast bit meaning promisc). */ /* Simply the mac address (with multicast bit meaning promisc). */
unsigned char mac[6]; unsigned char mac[6];
}; };
/*:*/
/* Where the Host expects the Guest to SEND_DMA console output to. */ /* Where the Host expects the Guest to SEND_DMA console output to. */
#define LGUEST_CONSOLE_DMA_KEY 0 #define LGUEST_CONSOLE_DMA_KEY 0
/* We have a page of these descriptors in the lguest_device page. */ /*D:010
* Drivers
*
* The Guest needs devices to do anything useful. Since we don't let it touch
* real devices (think of the damage it could do!) we provide virtual devices.
* We could emulate a PCI bus with various devices on it, but that is a fairly
* complex burden for the Host and suboptimal for the Guest, so we have our own
* "lguest" bus and simple drivers.
*
* Devices are described by an array of LGUEST_MAX_DEVICES of these structs,
* placed by the Launcher just above the top of physical memory:
*/
struct lguest_device_desc { struct lguest_device_desc {
/* The device type: console, network, disk etc. */
u16 type; u16 type;
#define LGUEST_DEVICE_T_CONSOLE 1 #define LGUEST_DEVICE_T_CONSOLE 1
#define LGUEST_DEVICE_T_NET 2 #define LGUEST_DEVICE_T_NET 2
#define LGUEST_DEVICE_T_BLOCK 3 #define LGUEST_DEVICE_T_BLOCK 3
/* The specific features of this device: these depends on device type
* except for LGUEST_DEVICE_F_RANDOMNESS. */
u16 features; u16 features;
#define LGUEST_NET_F_NOCSUM 0x4000 /* Don't bother checksumming */ #define LGUEST_NET_F_NOCSUM 0x4000 /* Don't bother checksumming */
#define LGUEST_DEVICE_F_RANDOMNESS 0x8000 /* IRQ is fairly random */ #define LGUEST_DEVICE_F_RANDOMNESS 0x8000 /* IRQ is fairly random */
/* This is how the Guest reports status of the device: the Host can set
* LGUEST_DEVICE_S_REMOVED to indicate removal, but the rest are only
* ever manipulated by the Guest, and only ever set. */
u16 status; u16 status;
/* 256 and above are device specific. */ /* 256 and above are device specific. */
#define LGUEST_DEVICE_S_ACKNOWLEDGE 1 /* We have seen device. */ #define LGUEST_DEVICE_S_ACKNOWLEDGE 1 /* We have seen device. */
...@@ -58,9 +109,12 @@ struct lguest_device_desc { ...@@ -58,9 +109,12 @@ struct lguest_device_desc {
#define LGUEST_DEVICE_S_REMOVED_ACK 16 /* Driver has been told. */ #define LGUEST_DEVICE_S_REMOVED_ACK 16 /* Driver has been told. */
#define LGUEST_DEVICE_S_FAILED 128 /* Something actually failed */ #define LGUEST_DEVICE_S_FAILED 128 /* Something actually failed */
/* Each device exists somewhere in Guest physical memory, over some
* number of pages. */
u16 num_pages; u16 num_pages;
u32 pfn; u32 pfn;
}; };
/*:*/
/* Write command first word is a request. */ /* Write command first word is a request. */
enum lguest_req enum lguest_req
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment