Commit 6ed3003c authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

md: fix an occasional deadlock in raid5

raid5's 'make_request' function calls generic_make_request on underlying
devices and if we run out of stripe heads, it could end up waiting for one of
those requests to complete.  This is bad as recursive calls to
generic_make_request go on a queue and are not even attempted until
make_request completes.

So: don't make any generic_make_request calls in raid5 make_request until all
waiting has been done.  We do this by simply setting STRIPE_HANDLE instead of
calling handle_stripe().

If we need more stripe_heads, raid5d will get called to process the pending
stripe_heads which will call generic_make_request from a

This change by itself causes a performance hit.  So add a change so that
raid5_activate_delayed is only called at unplug time, never in raid5.  This
seems to bring back the performance numbers.  Calling it in raid5d was
sometimes too soon...

Neil said:

  How about we queue it for 2.6.25-rc1 and then about when -rc2 comes out,
  we queue it for 2.6.24.y?
Acked-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Tested-by: default avatardean gaudet <dean@arctic.org>
Cc: <stable@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 73c34431
...@@ -3159,7 +3159,8 @@ static void raid5_activate_delayed(raid5_conf_t *conf) ...@@ -3159,7 +3159,8 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
atomic_inc(&conf->preread_active_stripes); atomic_inc(&conf->preread_active_stripes);
list_add_tail(&sh->lru, &conf->handle_list); list_add_tail(&sh->lru, &conf->handle_list);
} }
} } else
blk_plug_device(conf->mddev->queue);
} }
static void activate_bit_delay(raid5_conf_t *conf) static void activate_bit_delay(raid5_conf_t *conf)
...@@ -3549,7 +3550,8 @@ static int make_request(struct request_queue *q, struct bio * bi) ...@@ -3549,7 +3550,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
goto retry; goto retry;
} }
finish_wait(&conf->wait_for_overlap, &w); finish_wait(&conf->wait_for_overlap, &w);
handle_stripe(sh, NULL); set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
release_stripe(sh); release_stripe(sh);
} else { } else {
/* cannot get stripe for read-ahead, just give-up */ /* cannot get stripe for read-ahead, just give-up */
...@@ -3892,7 +3894,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) ...@@ -3892,7 +3894,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
* During the scan, completed stripes are saved for us by the interrupt * During the scan, completed stripes are saved for us by the interrupt
* handler, so that they will not have to wait for our next wakeup. * handler, so that they will not have to wait for our next wakeup.
*/ */
static void raid5d (mddev_t *mddev) static void raid5d(mddev_t *mddev)
{ {
struct stripe_head *sh; struct stripe_head *sh;
raid5_conf_t *conf = mddev_to_conf(mddev); raid5_conf_t *conf = mddev_to_conf(mddev);
...@@ -3917,12 +3919,6 @@ static void raid5d (mddev_t *mddev) ...@@ -3917,12 +3919,6 @@ static void raid5d (mddev_t *mddev)
activate_bit_delay(conf); activate_bit_delay(conf);
} }
if (list_empty(&conf->handle_list) &&
atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD &&
!blk_queue_plugged(mddev->queue) &&
!list_empty(&conf->delayed_list))
raid5_activate_delayed(conf);
while ((bio = remove_bio_from_retry(conf))) { while ((bio = remove_bio_from_retry(conf))) {
int ok; int ok;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment