Commit 42b36cc0 authored by Rusty Russell's avatar Rusty Russell

virtio: Force use of power-of-two for descriptor ring sizes

The virtio descriptor rings of size N-1 were nicely set up to be
aligned to an N-byte boundary.  But as Anthony Liguori points out, the
free-running indices used by virtio require that the sizes be a power
of 2, otherwise we get problems on wrap (demonstrated with lguest).

So we replace the clever "2^n-1" scheme with a simple "align to page
boundary" scheme: this means that all virtio rings take at least two
pages, but it's safer than guessing cache alignment.
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent 1200e646
...@@ -62,8 +62,8 @@ typedef uint8_t u8; ...@@ -62,8 +62,8 @@ typedef uint8_t u8;
#endif #endif
/* We can have up to 256 pages for devices. */ /* We can have up to 256 pages for devices. */
#define DEVICE_PAGES 256 #define DEVICE_PAGES 256
/* This fits nicely in a single 4096-byte page. */ /* This will occupy 2 pages: it must be a power of 2. */
#define VIRTQUEUE_NUM 127 #define VIRTQUEUE_NUM 128
/*L:120 verbose is both a global flag and a macro. The C preprocessor allows /*L:120 verbose is both a global flag and a macro. The C preprocessor allows
* this, and although I wouldn't recommend it, it works quite nicely here. */ * this, and although I wouldn't recommend it, it works quite nicely here. */
...@@ -1036,7 +1036,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, ...@@ -1036,7 +1036,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
void *p; void *p;
/* First we need some pages for this virtqueue. */ /* First we need some pages for this virtqueue. */
pages = (vring_size(num_descs) + getpagesize() - 1) / getpagesize(); pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
/ getpagesize();
p = get_pages(pages); p = get_pages(pages);
/* Initialize the configuration. */ /* Initialize the configuration. */
...@@ -1045,7 +1046,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, ...@@ -1045,7 +1046,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
vq->config.pfn = to_guest_phys(p) / getpagesize(); vq->config.pfn = to_guest_phys(p) / getpagesize();
/* Initialize the vring. */ /* Initialize the vring. */
vring_init(&vq->vring, num_descs, p); vring_init(&vq->vring, num_descs, p, getpagesize());
/* Add the configuration information to this device's descriptor. */ /* Add the configuration information to this device's descriptor. */
add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE,
......
...@@ -200,7 +200,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, ...@@ -200,7 +200,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
/* Figure out how many pages the ring will take, and map that memory */ /* Figure out how many pages the ring will take, and map that memory */
lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT,
DIV_ROUND_UP(vring_size(lvq->config.num), DIV_ROUND_UP(vring_size(lvq->config.num,
PAGE_SIZE),
PAGE_SIZE)); PAGE_SIZE));
if (!lvq->pages) { if (!lvq->pages) {
err = -ENOMEM; err = -ENOMEM;
......
...@@ -277,11 +277,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, ...@@ -277,11 +277,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
struct vring_virtqueue *vq; struct vring_virtqueue *vq;
unsigned int i; unsigned int i;
/* We assume num is a power of 2. */
if (num & (num - 1)) {
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return NULL;
}
vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
if (!vq) if (!vq)
return NULL; return NULL;
vring_init(&vq->vring, num, pages); vring_init(&vq->vring, num, pages, PAGE_SIZE);
vq->vq.callback = callback; vq->vq.callback = callback;
vq->vq.vdev = vdev; vq->vq.vdev = vdev;
vq->vq.vq_ops = &vring_vq_ops; vq->vq.vq_ops = &vring_vq_ops;
......
...@@ -67,7 +67,7 @@ struct vring { ...@@ -67,7 +67,7 @@ struct vring {
}; };
/* The standard layout for the ring is a continuous chunk of memory which looks /* The standard layout for the ring is a continuous chunk of memory which looks
* like this. The used fields will be aligned to a "num+1" boundary. * like this. We assume num is a power of 2.
* *
* struct vring * struct vring
* { * {
...@@ -79,8 +79,8 @@ struct vring { ...@@ -79,8 +79,8 @@ struct vring {
* __u16 avail_idx; * __u16 avail_idx;
* __u16 available[num]; * __u16 available[num];
* *
* // Padding so a correctly-chosen num value will cache-align used_idx. * // Padding to the next page boundary.
* char pad[sizeof(struct vring_desc) - sizeof(avail_flags)]; * char pad[];
* *
* // A ring of used descriptor heads with free-running index. * // A ring of used descriptor heads with free-running index.
* __u16 used_flags; * __u16 used_flags;
...@@ -88,18 +88,21 @@ struct vring { ...@@ -88,18 +88,21 @@ struct vring {
* struct vring_used_elem used[num]; * struct vring_used_elem used[num];
* }; * };
*/ */
static inline void vring_init(struct vring *vr, unsigned int num, void *p) static inline void vring_init(struct vring *vr, unsigned int num, void *p,
unsigned int pagesize)
{ {
vr->num = num; vr->num = num;
vr->desc = p; vr->desc = p;
vr->avail = p + num*sizeof(struct vring_desc); vr->avail = p + num*sizeof(struct vring_desc);
vr->used = p + (num+1)*(sizeof(struct vring_desc) + sizeof(__u16)); vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1)
& ~(pagesize - 1));
} }
static inline unsigned vring_size(unsigned int num) static inline unsigned vring_size(unsigned int num, unsigned int pagesize)
{ {
return (num + 1) * (sizeof(struct vring_desc) + sizeof(__u16)) return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
+ sizeof(__u32) + num * sizeof(struct vring_used_elem); + pagesize - 1) & ~(pagesize - 1))
+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
} }
#ifdef __KERNEL__ #ifdef __KERNEL__
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment