Commit 7ecaa1e6 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: Infrastructure to allow normal IO to continue while array is expanding

We need to allow that different stripes are of different effective sizes, and
use the appropriate size.  Also, when a stripe is being expanded, we must
block any IO attempts until the stripe is stable again.

Key elements in this change are:
 - each stripe_head gets a 'disk' field which is part of the key,
   thus there can sometimes be two stripe heads of the same area of
   the array, but covering different numbers of devices.  One of these
   will be marked STRIPE_EXPANDING and so won't accept new requests.
 - conf->expand_progress tracks how the expansion is progressing and
   is used to determine whether the target part of the array has been
   expanded yet or not.
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ad01c9e3
...@@ -178,10 +178,10 @@ static int grow_buffers(struct stripe_head *sh, int num) ...@@ -178,10 +178,10 @@ static int grow_buffers(struct stripe_head *sh, int num)
static void raid5_build_block (struct stripe_head *sh, int i); static void raid5_build_block (struct stripe_head *sh, int i);
static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx) static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks, i; int i;
if (atomic_read(&sh->count) != 0) if (atomic_read(&sh->count) != 0)
BUG(); BUG();
...@@ -198,7 +198,9 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx) ...@@ -198,7 +198,9 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
sh->pd_idx = pd_idx; sh->pd_idx = pd_idx;
sh->state = 0; sh->state = 0;
for (i=disks; i--; ) { sh->disks = disks;
for (i = sh->disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if (dev->toread || dev->towrite || dev->written || if (dev->toread || dev->towrite || dev->written ||
...@@ -215,7 +217,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx) ...@@ -215,7 +217,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
insert_hash(conf, sh); insert_hash(conf, sh);
} }
static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector) static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, int disks)
{ {
struct stripe_head *sh; struct stripe_head *sh;
struct hlist_node *hn; struct hlist_node *hn;
...@@ -223,7 +225,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector) ...@@ -223,7 +225,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
CHECK_DEVLOCK(); CHECK_DEVLOCK();
PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
if (sh->sector == sector) if (sh->sector == sector && sh->disks == disks)
return sh; return sh;
PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
return NULL; return NULL;
...@@ -232,7 +234,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector) ...@@ -232,7 +234,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
static void unplug_slaves(mddev_t *mddev); static void unplug_slaves(mddev_t *mddev);
static void raid5_unplug_device(request_queue_t *q); static void raid5_unplug_device(request_queue_t *q);
static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks,
int pd_idx, int noblock) int pd_idx, int noblock)
{ {
struct stripe_head *sh; struct stripe_head *sh;
...@@ -245,7 +247,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector ...@@ -245,7 +247,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
wait_event_lock_irq(conf->wait_for_stripe, wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0, conf->quiesce == 0,
conf->device_lock, /* nothing */); conf->device_lock, /* nothing */);
sh = __find_stripe(conf, sector); sh = __find_stripe(conf, sector, disks);
if (!sh) { if (!sh) {
if (!conf->inactive_blocked) if (!conf->inactive_blocked)
sh = get_free_stripe(conf); sh = get_free_stripe(conf);
...@@ -263,7 +265,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector ...@@ -263,7 +265,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
); );
conf->inactive_blocked = 0; conf->inactive_blocked = 0;
} else } else
init_stripe(sh, sector, pd_idx); init_stripe(sh, sector, pd_idx, disks);
} else { } else {
if (atomic_read(&sh->count)) { if (atomic_read(&sh->count)) {
if (!list_empty(&sh->lru)) if (!list_empty(&sh->lru))
...@@ -300,6 +302,7 @@ static int grow_one_stripe(raid5_conf_t *conf) ...@@ -300,6 +302,7 @@ static int grow_one_stripe(raid5_conf_t *conf)
kmem_cache_free(conf->slab_cache, sh); kmem_cache_free(conf->slab_cache, sh);
return 0; return 0;
} }
sh->disks = conf->raid_disks;
/* we just created an active stripe so... */ /* we just created an active stripe so... */
atomic_set(&sh->count, 1); atomic_set(&sh->count, 1);
atomic_inc(&conf->active_stripes); atomic_inc(&conf->active_stripes);
...@@ -483,7 +486,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, ...@@ -483,7 +486,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
{ {
struct stripe_head *sh = bi->bi_private; struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks, i; int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
if (bi->bi_size) if (bi->bi_size)
...@@ -581,7 +584,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done, ...@@ -581,7 +584,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
{ {
struct stripe_head *sh = bi->bi_private; struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks, i; int disks = sh->disks, i;
unsigned long flags; unsigned long flags;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
...@@ -735,7 +738,7 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, ...@@ -735,7 +738,7 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
static sector_t compute_blocknr(struct stripe_head *sh, int i) static sector_t compute_blocknr(struct stripe_head *sh, int i)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int raid_disks = conf->raid_disks, data_disks = raid_disks - 1; int raid_disks = sh->disks, data_disks = raid_disks - 1;
sector_t new_sector = sh->sector, check; sector_t new_sector = sh->sector, check;
int sectors_per_chunk = conf->chunk_size >> 9; int sectors_per_chunk = conf->chunk_size >> 9;
sector_t stripe; sector_t stripe;
...@@ -836,8 +839,7 @@ static void copy_data(int frombio, struct bio *bio, ...@@ -836,8 +839,7 @@ static void copy_data(int frombio, struct bio *bio,
static void compute_block(struct stripe_head *sh, int dd_idx) static void compute_block(struct stripe_head *sh, int dd_idx)
{ {
raid5_conf_t *conf = sh->raid_conf; int i, count, disks = sh->disks;
int i, count, disks = conf->raid_disks;
void *ptr[MAX_XOR_BLOCKS], *p; void *ptr[MAX_XOR_BLOCKS], *p;
PRINTK("compute_block, stripe %llu, idx %d\n", PRINTK("compute_block, stripe %llu, idx %d\n",
...@@ -867,7 +869,7 @@ static void compute_block(struct stripe_head *sh, int dd_idx) ...@@ -867,7 +869,7 @@ static void compute_block(struct stripe_head *sh, int dd_idx)
static void compute_parity(struct stripe_head *sh, int method) static void compute_parity(struct stripe_head *sh, int method)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count; int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
void *ptr[MAX_XOR_BLOCKS]; void *ptr[MAX_XOR_BLOCKS];
struct bio *chosen; struct bio *chosen;
...@@ -1055,7 +1057,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in ...@@ -1055,7 +1057,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
static void handle_stripe(struct stripe_head *sh) static void handle_stripe(struct stripe_head *sh)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks; int disks = sh->disks;
struct bio *return_bi= NULL; struct bio *return_bi= NULL;
struct bio *bi; struct bio *bi;
int i; int i;
...@@ -1649,12 +1651,10 @@ static inline void raid5_plug_device(raid5_conf_t *conf) ...@@ -1649,12 +1651,10 @@ static inline void raid5_plug_device(raid5_conf_t *conf)
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
} }
static int make_request (request_queue_t *q, struct bio * bi) static int make_request(request_queue_t *q, struct bio * bi)
{ {
mddev_t *mddev = q->queuedata; mddev_t *mddev = q->queuedata;
raid5_conf_t *conf = mddev_to_conf(mddev); raid5_conf_t *conf = mddev_to_conf(mddev);
const unsigned int raid_disks = conf->raid_disks;
const unsigned int data_disks = raid_disks - 1;
unsigned int dd_idx, pd_idx; unsigned int dd_idx, pd_idx;
sector_t new_sector; sector_t new_sector;
sector_t logical_sector, last_sector; sector_t logical_sector, last_sector;
...@@ -1678,20 +1678,48 @@ static int make_request (request_queue_t *q, struct bio * bi) ...@@ -1678,20 +1678,48 @@ static int make_request (request_queue_t *q, struct bio * bi)
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w); DEFINE_WAIT(w);
int disks;
new_sector = raid5_compute_sector(logical_sector, retry:
raid_disks, data_disks, &dd_idx, &pd_idx, conf); if (likely(conf->expand_progress == MaxSector))
disks = conf->raid_disks;
else {
spin_lock_irq(&conf->device_lock);
disks = conf->raid_disks;
if (logical_sector >= conf->expand_progress)
disks = conf->previous_raid_disks;
spin_unlock_irq(&conf->device_lock);
}
new_sector = raid5_compute_sector(logical_sector, disks, disks - 1,
&dd_idx, &pd_idx, conf);
PRINTK("raid5: make_request, sector %llu logical %llu\n", PRINTK("raid5: make_request, sector %llu logical %llu\n",
(unsigned long long)new_sector, (unsigned long long)new_sector,
(unsigned long long)logical_sector); (unsigned long long)logical_sector);
retry:
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK)); sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
if (sh) { if (sh) {
if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { if (unlikely(conf->expand_progress != MaxSector)) {
/* Add failed due to overlap. Flush everything /* expansion might have moved on while waiting for a
* stripe, so we much do the range check again.
*/
int must_retry = 0;
spin_lock_irq(&conf->device_lock);
if (logical_sector < conf->expand_progress &&
disks == conf->previous_raid_disks)
/* mismatch, need to try again */
must_retry = 1;
spin_unlock_irq(&conf->device_lock);
if (must_retry) {
release_stripe(sh);
goto retry;
}
}
if (test_bit(STRIPE_EXPANDING, &sh->state) ||
!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
/* Stripe is busy expanding or
* add failed due to overlap. Flush everything
* and wait a while * and wait a while
*/ */
raid5_unplug_device(mddev->queue); raid5_unplug_device(mddev->queue);
...@@ -1703,7 +1731,6 @@ static int make_request (request_queue_t *q, struct bio * bi) ...@@ -1703,7 +1731,6 @@ static int make_request (request_queue_t *q, struct bio * bi)
raid5_plug_device(conf); raid5_plug_device(conf);
handle_stripe(sh); handle_stripe(sh);
release_stripe(sh); release_stripe(sh);
} else { } else {
/* cannot get stripe for read-ahead, just give-up */ /* cannot get stripe for read-ahead, just give-up */
clear_bit(BIO_UPTODATE, &bi->bi_flags); clear_bit(BIO_UPTODATE, &bi->bi_flags);
...@@ -1779,9 +1806,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i ...@@ -1779,9 +1806,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
first_sector = raid5_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk first_sector = raid5_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk
+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf); + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
sh = get_active_stripe(conf, sector_nr, pd_idx, 1); sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
if (sh == NULL) { if (sh == NULL) {
sh = get_active_stripe(conf, sector_nr, pd_idx, 0); sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0);
/* make sure we don't swamp the stripe cache if someone else /* make sure we don't swamp the stripe cache if someone else
* is trying to get access * is trying to get access
*/ */
...@@ -1998,6 +2025,7 @@ static int run(mddev_t *mddev) ...@@ -1998,6 +2025,7 @@ static int run(mddev_t *mddev)
conf->level = mddev->level; conf->level = mddev->level;
conf->algorithm = mddev->layout; conf->algorithm = mddev->layout;
conf->max_nr_stripes = NR_STRIPES; conf->max_nr_stripes = NR_STRIPES;
conf->expand_progress = MaxSector;
/* device size must be a multiple of chunk size */ /* device size must be a multiple of chunk size */
mddev->size &= ~(mddev->chunk_size/1024 -1); mddev->size &= ~(mddev->chunk_size/1024 -1);
...@@ -2128,7 +2156,7 @@ static void print_sh (struct stripe_head *sh) ...@@ -2128,7 +2156,7 @@ static void print_sh (struct stripe_head *sh)
printk("sh %llu, count %d.\n", printk("sh %llu, count %d.\n",
(unsigned long long)sh->sector, atomic_read(&sh->count)); (unsigned long long)sh->sector, atomic_read(&sh->count));
printk("sh %llu, ", (unsigned long long)sh->sector); printk("sh %llu, ", (unsigned long long)sh->sector);
for (i = 0; i < sh->raid_conf->raid_disks; i++) { for (i = 0; i < sh->disks; i++) {
printk("(cache%d: %p %ld) ", printk("(cache%d: %p %ld) ",
i, sh->dev[i].page, sh->dev[i].flags); i, sh->dev[i].page, sh->dev[i].flags);
} }
......
...@@ -135,6 +135,7 @@ struct stripe_head { ...@@ -135,6 +135,7 @@ struct stripe_head {
atomic_t count; /* nr of active thread/requests */ atomic_t count; /* nr of active thread/requests */
spinlock_t lock; spinlock_t lock;
int bm_seq; /* sequence number for bitmap flushes */ int bm_seq; /* sequence number for bitmap flushes */
int disks; /* disks in stripe */
struct r5dev { struct r5dev {
struct bio req; struct bio req;
struct bio_vec vec; struct bio_vec vec;
...@@ -174,6 +175,7 @@ struct stripe_head { ...@@ -174,6 +175,7 @@ struct stripe_head {
#define STRIPE_DELAYED 6 #define STRIPE_DELAYED 6
#define STRIPE_DEGRADED 7 #define STRIPE_DEGRADED 7
#define STRIPE_BIT_DELAY 8 #define STRIPE_BIT_DELAY 8
#define STRIPE_EXPANDING 9
/* /*
* Plugging: * Plugging:
...@@ -211,6 +213,10 @@ struct raid5_private_data { ...@@ -211,6 +213,10 @@ struct raid5_private_data {
int raid_disks, working_disks, failed_disks; int raid_disks, working_disks, failed_disks;
int max_nr_stripes; int max_nr_stripes;
/* used during an expand */
sector_t expand_progress; /* MaxSector when no expand happening */
int previous_raid_disks;
struct list_head handle_list; /* stripes needing handling */ struct list_head handle_list; /* stripes needing handling */
struct list_head delayed_list; /* stripes that have plugged requests */ struct list_head delayed_list; /* stripes that have plugged requests */
struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment