Commit f1e53987 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Alasdair G Kergon

dm io: remove extra bi_io_vec region hack

Remove the hack where we allocate an extra bi_io_vec to store additional
private data.  This hack prevents us from supporting barriers in
dm-raid1 without first making another little block layer change.
Instead of doing that, this patch eliminates the bi_io_vec abuse by
storing the region number directly in the low bits of bi_private.

We need to store two things for each bio, the pointer to the main io
structure and, if parallel writes were requested, an index indicating
which of these writes this bio belongs to.  There can be at most
BITS_PER_LONG regions - 32 or 64.

The index (region number) was stored in the last (hidden) bio vector and
the pointer to struct io was stored in bi_private.

This patch now aligns "struct io" on BITS_PER_LONG bytes and stores the
region number in the low BITS_PER_LONG bits of bi_private.
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarAlasdair G Kergon <agk@redhat.com>
parent 952b3557
...@@ -16,12 +16,19 @@ ...@@ -16,12 +16,19 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/dm-io.h> #include <linux/dm-io.h>
#define DM_MSG_PREFIX "io"
#define DM_IO_MAX_REGIONS BITS_PER_LONG
struct dm_io_client { struct dm_io_client {
mempool_t *pool; mempool_t *pool;
struct bio_set *bios; struct bio_set *bios;
}; };
/* FIXME: can we shrink this ? */ /*
* Aligning 'struct io' reduces the number of bits required to store
* its address. Refer to store_io_and_region_in_bio() below.
*/
struct io { struct io {
unsigned long error_bits; unsigned long error_bits;
unsigned long eopnotsupp_bits; unsigned long eopnotsupp_bits;
...@@ -30,7 +37,7 @@ struct io { ...@@ -30,7 +37,7 @@ struct io {
struct dm_io_client *client; struct dm_io_client *client;
io_notify_fn callback; io_notify_fn callback;
void *context; void *context;
}; } __attribute__((aligned(DM_IO_MAX_REGIONS)));
static struct kmem_cache *_dm_io_cache; static struct kmem_cache *_dm_io_cache;
...@@ -92,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy); ...@@ -92,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy);
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
* We need to keep track of which region a bio is doing io for. * We need to keep track of which region a bio is doing io for.
* In order to save a memory allocation we store this the last * To avoid a memory allocation to store just 5 or 6 bits, we
* bvec which we know is unused (blech). * ensure the 'struct io' pointer is aligned so enough low bits are
* XXX This is ugly and can OOPS with some configs... find another way. * always zero and then combine it with the region number directly in
* bi_private.
*---------------------------------------------------------------*/ *---------------------------------------------------------------*/
static inline void bio_set_region(struct bio *bio, unsigned region) static void store_io_and_region_in_bio(struct bio *bio, struct io *io,
unsigned region)
{ {
bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
DMCRIT("Unaligned struct io pointer %p", io);
BUG();
}
bio->bi_private = (void *)((unsigned long)io | region);
} }
static inline unsigned bio_get_region(struct bio *bio) static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
unsigned *region)
{ {
return bio->bi_io_vec[bio->bi_max_vecs].bv_len; unsigned long val = (unsigned long)bio->bi_private;
*io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
*region = val & (DM_IO_MAX_REGIONS - 1);
} }
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
...@@ -144,10 +162,8 @@ static void endio(struct bio *bio, int error) ...@@ -144,10 +162,8 @@ static void endio(struct bio *bio, int error)
/* /*
* The bio destructor in bio_put() may use the io object. * The bio destructor in bio_put() may use the io object.
*/ */
io = bio->bi_private; retrieve_io_and_region_from_bio(bio, &io, &region);
region = bio_get_region(bio);
bio->bi_max_vecs++;
bio_put(bio); bio_put(bio);
dec_count(io, region, error); dec_count(io, region, error);
...@@ -247,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data) ...@@ -247,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data)
static void dm_bio_destructor(struct bio *bio) static void dm_bio_destructor(struct bio *bio)
{ {
struct io *io = bio->bi_private; unsigned region;
struct io *io;
retrieve_io_and_region_from_bio(bio, &io, &region);
bio_free(bio, io->client->bios); bio_free(bio, io->client->bios);
} }
...@@ -292,24 +311,17 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, ...@@ -292,24 +311,17 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
while (remaining) { while (remaining) {
/* /*
* Allocate a suitably sized-bio: we add an extra * Allocate a suitably sized-bio.
* bvec for bio_get/set_region() and decrement bi_max_vecs
* to hide it from bio_add_page().
*/ */
num_bvecs = dm_sector_div_up(remaining, num_bvecs = dm_sector_div_up(remaining,
(PAGE_SIZE >> SECTOR_SHIFT)); (PAGE_SIZE >> SECTOR_SHIFT));
num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs);
num_bvecs);
if (unlikely(num_bvecs > BIO_MAX_PAGES))
num_bvecs = BIO_MAX_PAGES;
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_sector = where->sector + (where->count - remaining); bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev; bio->bi_bdev = where->bdev;
bio->bi_end_io = endio; bio->bi_end_io = endio;
bio->bi_private = io;
bio->bi_destructor = dm_bio_destructor; bio->bi_destructor = dm_bio_destructor;
bio->bi_max_vecs--; store_io_and_region_in_bio(bio, io, region);
bio_set_region(bio, region);
/* /*
* Try and add as many pages as possible. * Try and add as many pages as possible.
...@@ -337,6 +349,8 @@ static void dispatch_io(int rw, unsigned int num_regions, ...@@ -337,6 +349,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
int i; int i;
struct dpages old_pages = *dp; struct dpages old_pages = *dp;
BUG_ON(num_regions > DM_IO_MAX_REGIONS);
if (sync) if (sync)
rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
...@@ -361,7 +375,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, ...@@ -361,7 +375,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, int rw, struct dpages *dp, struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits) unsigned long *error_bits)
{ {
struct io io; /*
* gcc <= 4.3 can't do the alignment for stack variables, so we must
* align it on our own.
* volatile prevents the optimizer from removing or reusing
* "io_" field from the stack frame (allowed in ANSI C).
*/
volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
if (num_regions > 1 && (rw & RW_MASK) != WRITE) { if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1); WARN_ON(1);
...@@ -369,33 +390,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, ...@@ -369,33 +390,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
} }
retry: retry:
io.error_bits = 0; io->error_bits = 0;
io.eopnotsupp_bits = 0; io->eopnotsupp_bits = 0;
atomic_set(&io.count, 1); /* see dispatch_io() */ atomic_set(&io->count, 1); /* see dispatch_io() */
io.sleeper = current; io->sleeper = current;
io.client = client; io->client = client;
dispatch_io(rw, num_regions, where, dp, &io, 1); dispatch_io(rw, num_regions, where, dp, io, 1);
while (1) { while (1) {
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
if (!atomic_read(&io.count)) if (!atomic_read(&io->count))
break; break;
io_schedule(); io_schedule();
} }
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
rw &= ~(1 << BIO_RW_BARRIER); rw &= ~(1 << BIO_RW_BARRIER);
goto retry; goto retry;
} }
if (error_bits) if (error_bits)
*error_bits = io.error_bits; *error_bits = io->error_bits;
return io.error_bits ? -EIO : 0; return io->error_bits ? -EIO : 0;
} }
static int async_io(struct dm_io_client *client, unsigned int num_regions, static int async_io(struct dm_io_client *client, unsigned int num_regions,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment