Commit 894bcdfb authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: don't retry recovery of raid1 that fails due to error on source drive.
  md: Allow md devices to be created by name.
  md: make devices disappear when they are no longer needed.
  md: centralise all freeing of an 'mddev' in 'md_free'
  md: move allocation of ->queue from mddev_find to md_probe
  md: need another print_sb for mdp_superblock_1
  md: use list_for_each_entry macro directly
  md: raid0: make hash_spacing and preshift sector-based.
  md: raid0: Represent the size of strip zones in sectors.
  md: raid0 create_strip_zones(): Add KERN_INFO/KERN_ERR to printk's.
  md: raid0 create_strip_zones(): Make two local variables sector-based.
  md: raid0: Represent zone->zone_offset in sectors.
  md: raid0: Represent device offset in sectors.
  md: raid0_make_request(): Replace local variable block by sector.
  md: raid0_make_request(): Remove local variable chunk_size.
  md: raid0_make_request(): Replace chunksize_bits by chunksect_bits.
  md: use sysfs_notify_dirent to notify changes to md/sync_action.
  md: fix bitmap-on-external-file bug.
parents a419df8a 4044ba58
...@@ -215,7 +215,6 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, ...@@ -215,7 +215,6 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
/* choose a good rdev and read the page from there */ /* choose a good rdev and read the page from there */
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
sector_t target; sector_t target;
if (!page) if (!page)
...@@ -223,7 +222,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, ...@@ -223,7 +222,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
if (!page) if (!page)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
if (! test_bit(In_sync, &rdev->flags) if (! test_bit(In_sync, &rdev->flags)
|| test_bit(Faulty, &rdev->flags)) || test_bit(Faulty, &rdev->flags))
continue; continue;
...@@ -964,7 +963,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) ...@@ -964,7 +963,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
*/ */
page = bitmap->sb_page; page = bitmap->sb_page;
offset = sizeof(bitmap_super_t); offset = sizeof(bitmap_super_t);
read_sb_page(bitmap->mddev, bitmap->offset, if (!file)
read_sb_page(bitmap->mddev,
bitmap->offset,
page, page,
index, count); index, count);
} else if (file) { } else if (file) {
......
...@@ -283,7 +283,6 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size) ...@@ -283,7 +283,6 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
static int run(mddev_t *mddev) static int run(mddev_t *mddev)
{ {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
int i; int i;
conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL);
...@@ -296,7 +295,7 @@ static int run(mddev_t *mddev) ...@@ -296,7 +295,7 @@ static int run(mddev_t *mddev)
} }
conf->nfaults = 0; conf->nfaults = 0;
rdev_for_each(rdev, tmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
conf->rdev = rdev; conf->rdev = rdev;
mddev->array_sectors = mddev->size * 2; mddev->array_sectors = mddev->size * 2;
......
...@@ -105,7 +105,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) ...@@ -105,7 +105,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
int i, nb_zone, cnt; int i, nb_zone, cnt;
sector_t min_sectors; sector_t min_sectors;
sector_t curr_sector; sector_t curr_sector;
struct list_head *tmp;
conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
GFP_KERNEL); GFP_KERNEL);
...@@ -115,7 +114,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) ...@@ -115,7 +114,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
cnt = 0; cnt = 0;
conf->array_sectors = 0; conf->array_sectors = 0;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
int j = rdev->raid_disk; int j = rdev->raid_disk;
dev_info_t *disk = conf->disks + j; dev_info_t *disk = conf->disks + j;
......
...@@ -214,19 +214,32 @@ static inline mddev_t *mddev_get(mddev_t *mddev) ...@@ -214,19 +214,32 @@ static inline mddev_t *mddev_get(mddev_t *mddev)
return mddev; return mddev;
} }
static void mddev_delayed_delete(struct work_struct *ws)
{
mddev_t *mddev = container_of(ws, mddev_t, del_work);
kobject_del(&mddev->kobj);
kobject_put(&mddev->kobj);
}
static void mddev_put(mddev_t *mddev) static void mddev_put(mddev_t *mddev)
{ {
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return; return;
if (!mddev->raid_disks && list_empty(&mddev->disks)) { if (!mddev->raid_disks && list_empty(&mddev->disks) &&
!mddev->hold_active) {
list_del(&mddev->all_mddevs); list_del(&mddev->all_mddevs);
spin_unlock(&all_mddevs_lock); if (mddev->gendisk) {
blk_cleanup_queue(mddev->queue); /* we did a probe so need to clean up.
if (mddev->sysfs_state) * Call schedule_work inside the spinlock
sysfs_put(mddev->sysfs_state); * so that flush_scheduled_work() after
mddev->sysfs_state = NULL; * mddev_find will succeed in waiting for the
kobject_put(&mddev->kobj); * work to be done.
*/
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
schedule_work(&mddev->del_work);
} else } else
kfree(mddev);
}
spin_unlock(&all_mddevs_lock); spin_unlock(&all_mddevs_lock);
} }
...@@ -236,6 +249,8 @@ static mddev_t * mddev_find(dev_t unit) ...@@ -236,6 +249,8 @@ static mddev_t * mddev_find(dev_t unit)
retry: retry:
spin_lock(&all_mddevs_lock); spin_lock(&all_mddevs_lock);
if (unit) {
list_for_each_entry(mddev, &all_mddevs, all_mddevs) list_for_each_entry(mddev, &all_mddevs, all_mddevs)
if (mddev->unit == unit) { if (mddev->unit == unit) {
mddev_get(mddev); mddev_get(mddev);
...@@ -245,6 +260,39 @@ static mddev_t * mddev_find(dev_t unit) ...@@ -245,6 +260,39 @@ static mddev_t * mddev_find(dev_t unit)
} }
if (new) { if (new) {
list_add(&new->all_mddevs, &all_mddevs);
spin_unlock(&all_mddevs_lock);
new->hold_active = UNTIL_IOCTL;
return new;
}
} else if (new) {
/* find an unused unit number */
static int next_minor = 512;
int start = next_minor;
int is_free = 0;
int dev = 0;
while (!is_free) {
dev = MKDEV(MD_MAJOR, next_minor);
next_minor++;
if (next_minor > MINORMASK)
next_minor = 0;
if (next_minor == start) {
/* Oh dear, all in use. */
spin_unlock(&all_mddevs_lock);
kfree(new);
return NULL;
}
is_free = 1;
list_for_each_entry(mddev, &all_mddevs, all_mddevs)
if (mddev->unit == dev) {
is_free = 0;
break;
}
}
new->unit = dev;
new->md_minor = MINOR(dev);
new->hold_active = UNTIL_STOP;
list_add(&new->all_mddevs, &all_mddevs); list_add(&new->all_mddevs, &all_mddevs);
spin_unlock(&all_mddevs_lock); spin_unlock(&all_mddevs_lock);
return new; return new;
...@@ -275,16 +323,6 @@ static mddev_t * mddev_find(dev_t unit) ...@@ -275,16 +323,6 @@ static mddev_t * mddev_find(dev_t unit)
new->resync_max = MaxSector; new->resync_max = MaxSector;
new->level = LEVEL_NONE; new->level = LEVEL_NONE;
new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
kfree(new);
return NULL;
}
/* Can be unlocked because the queue is new: no concurrency */
queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
blk_queue_make_request(new->queue, md_fail_request);
goto retry; goto retry;
} }
...@@ -307,25 +345,23 @@ static inline void mddev_unlock(mddev_t * mddev) ...@@ -307,25 +345,23 @@ static inline void mddev_unlock(mddev_t * mddev)
static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
{ {
mdk_rdev_t * rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->desc_nr == nr) if (rdev->desc_nr == nr)
return rdev; return rdev;
}
return NULL; return NULL;
} }
static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
{ {
struct list_head *tmp;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->bdev->bd_dev == dev) if (rdev->bdev->bd_dev == dev)
return rdev; return rdev;
}
return NULL; return NULL;
} }
...@@ -861,7 +897,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -861,7 +897,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
{ {
mdp_super_t *sb; mdp_super_t *sb;
struct list_head *tmp;
mdk_rdev_t *rdev2; mdk_rdev_t *rdev2;
int next_spare = mddev->raid_disks; int next_spare = mddev->raid_disks;
...@@ -933,7 +968,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -933,7 +968,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->state |= (1<<MD_SB_BITMAP_PRESENT); sb->state |= (1<<MD_SB_BITMAP_PRESENT);
sb->disks[0].state = (1<<MD_DISK_REMOVED); sb->disks[0].state = (1<<MD_DISK_REMOVED);
rdev_for_each(rdev2, tmp, mddev) { list_for_each_entry(rdev2, &mddev->disks, same_set) {
mdp_disk_t *d; mdp_disk_t *d;
int desc_nr; int desc_nr;
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
...@@ -1259,7 +1294,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1259,7 +1294,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
{ {
struct mdp_superblock_1 *sb; struct mdp_superblock_1 *sb;
struct list_head *tmp;
mdk_rdev_t *rdev2; mdk_rdev_t *rdev2;
int max_dev, i; int max_dev, i;
/* make rdev->sb match mddev and rdev data. */ /* make rdev->sb match mddev and rdev data. */
...@@ -1307,7 +1341,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1307,7 +1341,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
} }
max_dev = 0; max_dev = 0;
rdev_for_each(rdev2, tmp, mddev) list_for_each_entry(rdev2, &mddev->disks, same_set)
if (rdev2->desc_nr+1 > max_dev) if (rdev2->desc_nr+1 > max_dev)
max_dev = rdev2->desc_nr+1; max_dev = rdev2->desc_nr+1;
...@@ -1316,7 +1350,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1316,7 +1350,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
for (i=0; i<max_dev;i++) for (i=0; i<max_dev;i++)
sb->dev_roles[i] = cpu_to_le16(0xfffe); sb->dev_roles[i] = cpu_to_le16(0xfffe);
rdev_for_each(rdev2, tmp, mddev) { list_for_each_entry(rdev2, &mddev->disks, same_set) {
i = rdev2->desc_nr; i = rdev2->desc_nr;
if (test_bit(Faulty, &rdev2->flags)) if (test_bit(Faulty, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(0xfffe); sb->dev_roles[i] = cpu_to_le16(0xfffe);
...@@ -1466,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) ...@@ -1466,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
list_add_rcu(&rdev->same_set, &mddev->disks); list_add_rcu(&rdev->same_set, &mddev->disks);
bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
/* May as well allow recovery to be retried once */
mddev->recovery_disabled = 0;
return 0; return 0;
fail: fail:
...@@ -1571,8 +1608,7 @@ static void kick_rdev_from_array(mdk_rdev_t * rdev) ...@@ -1571,8 +1608,7 @@ static void kick_rdev_from_array(mdk_rdev_t * rdev)
static void export_array(mddev_t *mddev) static void export_array(mddev_t *mddev)
{ {
struct list_head *tmp; mdk_rdev_t *rdev, *tmp;
mdk_rdev_t *rdev;
rdev_for_each(rdev, tmp, mddev) { rdev_for_each(rdev, tmp, mddev) {
if (!rdev->mddev) { if (!rdev->mddev) {
...@@ -1593,7 +1629,7 @@ static void print_desc(mdp_disk_t *desc) ...@@ -1593,7 +1629,7 @@ static void print_desc(mdp_disk_t *desc)
desc->major,desc->minor,desc->raid_disk,desc->state); desc->major,desc->minor,desc->raid_disk,desc->state);
} }
static void print_sb(mdp_super_t *sb) static void print_sb_90(mdp_super_t *sb)
{ {
int i; int i;
...@@ -1624,10 +1660,57 @@ static void print_sb(mdp_super_t *sb) ...@@ -1624,10 +1660,57 @@ static void print_sb(mdp_super_t *sb)
} }
printk(KERN_INFO "md: THIS: "); printk(KERN_INFO "md: THIS: ");
print_desc(&sb->this_disk); print_desc(&sb->this_disk);
} }
static void print_rdev(mdk_rdev_t *rdev) static void print_sb_1(struct mdp_superblock_1 *sb)
{
__u8 *uuid;
uuid = sb->set_uuid;
printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
KERN_INFO "md: Name: \"%s\" CT:%llu\n",
le32_to_cpu(sb->major_version),
le32_to_cpu(sb->feature_map),
uuid[0], uuid[1], uuid[2], uuid[3],
uuid[4], uuid[5], uuid[6], uuid[7],
uuid[8], uuid[9], uuid[10], uuid[11],
uuid[12], uuid[13], uuid[14], uuid[15],
sb->set_name,
(unsigned long long)le64_to_cpu(sb->ctime)
& MD_SUPERBLOCK_1_TIME_SEC_MASK);
uuid = sb->device_uuid;
printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
" RO:%llu\n"
KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
":%02x%02x%02x%02x%02x%02x\n"
KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
KERN_INFO "md: (MaxDev:%u) \n",
le32_to_cpu(sb->level),
(unsigned long long)le64_to_cpu(sb->size),
le32_to_cpu(sb->raid_disks),
le32_to_cpu(sb->layout),
le32_to_cpu(sb->chunksize),
(unsigned long long)le64_to_cpu(sb->data_offset),
(unsigned long long)le64_to_cpu(sb->data_size),
(unsigned long long)le64_to_cpu(sb->super_offset),
(unsigned long long)le64_to_cpu(sb->recovery_offset),
le32_to_cpu(sb->dev_number),
uuid[0], uuid[1], uuid[2], uuid[3],
uuid[4], uuid[5], uuid[6], uuid[7],
uuid[8], uuid[9], uuid[10], uuid[11],
uuid[12], uuid[13], uuid[14], uuid[15],
sb->devflags,
(unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
(unsigned long long)le64_to_cpu(sb->events),
(unsigned long long)le64_to_cpu(sb->resync_offset),
le32_to_cpu(sb->sb_csum),
le32_to_cpu(sb->max_dev)
);
}
static void print_rdev(mdk_rdev_t *rdev, int major_version)
{ {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
...@@ -1635,15 +1718,22 @@ static void print_rdev(mdk_rdev_t *rdev) ...@@ -1635,15 +1718,22 @@ static void print_rdev(mdk_rdev_t *rdev)
test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
rdev->desc_nr); rdev->desc_nr);
if (rdev->sb_loaded) { if (rdev->sb_loaded) {
printk(KERN_INFO "md: rdev superblock:\n"); printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
print_sb((mdp_super_t*)page_address(rdev->sb_page)); switch (major_version) {
case 0:
print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
break;
case 1:
print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
break;
}
} else } else
printk(KERN_INFO "md: no rdev superblock!\n"); printk(KERN_INFO "md: no rdev superblock!\n");
} }
static void md_print_devices(void) static void md_print_devices(void)
{ {
struct list_head *tmp, *tmp2; struct list_head *tmp;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
mddev_t *mddev; mddev_t *mddev;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
...@@ -1658,12 +1748,12 @@ static void md_print_devices(void) ...@@ -1658,12 +1748,12 @@ static void md_print_devices(void)
bitmap_print_sb(mddev->bitmap); bitmap_print_sb(mddev->bitmap);
else else
printk("%s: ", mdname(mddev)); printk("%s: ", mdname(mddev));
rdev_for_each(rdev, tmp2, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
printk("<%s>", bdevname(rdev->bdev,b)); printk("<%s>", bdevname(rdev->bdev,b));
printk("\n"); printk("\n");
rdev_for_each(rdev, tmp2, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
print_rdev(rdev); print_rdev(rdev, mddev->major_version);
} }
printk("md: **********************************\n"); printk("md: **********************************\n");
printk("\n"); printk("\n");
...@@ -1679,9 +1769,8 @@ static void sync_sbs(mddev_t * mddev, int nospares) ...@@ -1679,9 +1769,8 @@ static void sync_sbs(mddev_t * mddev, int nospares)
* with the rest of the array) * with the rest of the array)
*/ */
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->sb_events == mddev->events || if (rdev->sb_events == mddev->events ||
(nospares && (nospares &&
rdev->raid_disk < 0 && rdev->raid_disk < 0 &&
...@@ -1699,7 +1788,6 @@ static void sync_sbs(mddev_t * mddev, int nospares) ...@@ -1699,7 +1788,6 @@ static void sync_sbs(mddev_t * mddev, int nospares)
static void md_update_sb(mddev_t * mddev, int force_change) static void md_update_sb(mddev_t * mddev, int force_change)
{ {
struct list_head *tmp;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int sync_req; int sync_req;
int nospares = 0; int nospares = 0;
...@@ -1790,7 +1878,7 @@ repeat: ...@@ -1790,7 +1878,7 @@ repeat:
mdname(mddev),mddev->in_sync); mdname(mddev),mddev->in_sync);
bitmap_update_sb(mddev->bitmap); bitmap_update_sb(mddev->bitmap);
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
dprintk(KERN_INFO "md: "); dprintk(KERN_INFO "md: ");
if (rdev->sb_loaded != 1) if (rdev->sb_loaded != 1)
...@@ -1999,7 +2087,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) ...@@ -1999,7 +2087,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
md_wakeup_thread(rdev->mddev->thread); md_wakeup_thread(rdev->mddev->thread);
} else if (rdev->mddev->pers) { } else if (rdev->mddev->pers) {
mdk_rdev_t *rdev2; mdk_rdev_t *rdev2;
struct list_head *tmp;
/* Activating a spare .. or possibly reactivating /* Activating a spare .. or possibly reactivating
* if we every get bitmaps working here. * if we every get bitmaps working here.
*/ */
...@@ -2010,7 +2097,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) ...@@ -2010,7 +2097,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (rdev->mddev->pers->hot_add_disk == NULL) if (rdev->mddev->pers->hot_add_disk == NULL)
return -EINVAL; return -EINVAL;
rdev_for_each(rdev2, tmp, rdev->mddev) list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
if (rdev2->raid_disk == slot) if (rdev2->raid_disk == slot)
return -EEXIST; return -EEXIST;
...@@ -2125,14 +2212,14 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) ...@@ -2125,14 +2212,14 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
*/ */
mddev_t *mddev; mddev_t *mddev;
int overlap = 0; int overlap = 0;
struct list_head *tmp, *tmp2; struct list_head *tmp;
mddev_unlock(my_mddev); mddev_unlock(my_mddev);
for_each_mddev(mddev, tmp) { for_each_mddev(mddev, tmp) {
mdk_rdev_t *rdev2; mdk_rdev_t *rdev2;
mddev_lock(mddev); mddev_lock(mddev);
rdev_for_each(rdev2, tmp2, mddev) list_for_each_entry(rdev2, &mddev->disks, same_set)
if (test_bit(AllReserved, &rdev2->flags) || if (test_bit(AllReserved, &rdev2->flags) ||
(rdev->bdev == rdev2->bdev && (rdev->bdev == rdev2->bdev &&
rdev != rdev2 && rdev != rdev2 &&
...@@ -2328,8 +2415,7 @@ abort_free: ...@@ -2328,8 +2415,7 @@ abort_free:
static void analyze_sbs(mddev_t * mddev) static void analyze_sbs(mddev_t * mddev)
{ {
int i; int i;
struct list_head *tmp; mdk_rdev_t *rdev, *freshest, *tmp;
mdk_rdev_t *rdev, *freshest;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
freshest = NULL; freshest = NULL;
...@@ -3046,7 +3132,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) ...@@ -3046,7 +3132,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
} }
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify_dirent(mddev->sysfs_action);
return len; return len;
} }
...@@ -3404,6 +3490,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, ...@@ -3404,6 +3490,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EACCES; return -EACCES;
rv = mddev_lock(mddev); rv = mddev_lock(mddev);
if (mddev->hold_active == UNTIL_IOCTL)
mddev->hold_active = 0;
if (!rv) { if (!rv) {
rv = entry->store(mddev, page, length); rv = entry->store(mddev, page, length);
mddev_unlock(mddev); mddev_unlock(mddev);
...@@ -3414,6 +3502,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, ...@@ -3414,6 +3502,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
static void md_free(struct kobject *ko) static void md_free(struct kobject *ko)
{ {
mddev_t *mddev = container_of(ko, mddev_t, kobj); mddev_t *mddev = container_of(ko, mddev_t, kobj);
if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state);
if (mddev->gendisk) {
del_gendisk(mddev->gendisk);
put_disk(mddev->gendisk);
}
if (mddev->queue)
blk_cleanup_queue(mddev->queue);
kfree(mddev); kfree(mddev);
} }
...@@ -3429,34 +3528,74 @@ static struct kobj_type md_ktype = { ...@@ -3429,34 +3528,74 @@ static struct kobj_type md_ktype = {
int mdp_major = 0; int mdp_major = 0;
static struct kobject *md_probe(dev_t dev, int *part, void *data) static int md_alloc(dev_t dev, char *name)
{ {
static DEFINE_MUTEX(disks_mutex); static DEFINE_MUTEX(disks_mutex);
mddev_t *mddev = mddev_find(dev); mddev_t *mddev = mddev_find(dev);
struct gendisk *disk; struct gendisk *disk;
int partitioned = (MAJOR(dev) != MD_MAJOR); int partitioned;
int shift = partitioned ? MdpMinorShift : 0; int shift;
int unit = MINOR(dev) >> shift; int unit;
int error; int error;
if (!mddev) if (!mddev)
return NULL; return -ENODEV;
partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
shift = partitioned ? MdpMinorShift : 0;
unit = MINOR(mddev->unit) >> shift;
/* wait for any previous instance if this device
* to be completed removed (mddev_delayed_delete).
*/
flush_scheduled_work();
mutex_lock(&disks_mutex); mutex_lock(&disks_mutex);
if (mddev->gendisk) { if (mddev->gendisk) {
mutex_unlock(&disks_mutex); mutex_unlock(&disks_mutex);
mddev_put(mddev); mddev_put(mddev);
return NULL; return -EEXIST;
} }
if (name) {
/* Need to ensure that 'name' is not a duplicate.
*/
mddev_t *mddev2;
spin_lock(&all_mddevs_lock);
list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
if (mddev2->gendisk &&
strcmp(mddev2->gendisk->disk_name, name) == 0) {
spin_unlock(&all_mddevs_lock);
return -EEXIST;
}
spin_unlock(&all_mddevs_lock);
}
mddev->queue = blk_alloc_queue(GFP_KERNEL);
if (!mddev->queue) {
mutex_unlock(&disks_mutex);
mddev_put(mddev);
return -ENOMEM;
}
/* Can be unlocked because the queue is new: no concurrency */
queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
blk_queue_make_request(mddev->queue, md_fail_request);
disk = alloc_disk(1 << shift); disk = alloc_disk(1 << shift);
if (!disk) { if (!disk) {
mutex_unlock(&disks_mutex); mutex_unlock(&disks_mutex);
blk_cleanup_queue(mddev->queue);
mddev->queue = NULL;
mddev_put(mddev); mddev_put(mddev);
return NULL; return -ENOMEM;
} }
disk->major = MAJOR(dev); disk->major = MAJOR(mddev->unit);
disk->first_minor = unit << shift; disk->first_minor = unit << shift;
if (partitioned) if (name)
strcpy(disk->disk_name, name);
else if (partitioned)
sprintf(disk->disk_name, "md_d%d", unit); sprintf(disk->disk_name, "md_d%d", unit);
else else
sprintf(disk->disk_name, "md%d", unit); sprintf(disk->disk_name, "md%d", unit);
...@@ -3464,7 +3603,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) ...@@ -3464,7 +3603,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
disk->private_data = mddev; disk->private_data = mddev;
disk->queue = mddev->queue; disk->queue = mddev->queue;
/* Allow extended partitions. This makes the /* Allow extended partitions. This makes the
* 'mdp' device redundant, but we can really * 'mdp' device redundant, but we can't really
* remove it now. * remove it now.
*/ */
disk->flags |= GENHD_FL_EXT_DEVT; disk->flags |= GENHD_FL_EXT_DEVT;
...@@ -3480,9 +3619,35 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) ...@@ -3480,9 +3619,35 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
kobject_uevent(&mddev->kobj, KOBJ_ADD); kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
} }
mddev_put(mddev);
return 0;
}
static struct kobject *md_probe(dev_t dev, int *part, void *data)
{
md_alloc(dev, NULL);
return NULL; return NULL;
} }
static int add_named_array(const char *val, struct kernel_param *kp)
{
/* val must be "md_*" where * is not all digits.
* We allocate an array with a large free minor number, and
* set the name to val. val must not already be an active name.
*/
int len = strlen(val);
char buf[DISK_NAME_LEN];
while (len && val[len-1] == '\n')
len--;
if (len >= DISK_NAME_LEN)
return -E2BIG;
strlcpy(buf, val, len+1);
if (strncmp(buf, "md_", 3) != 0)
return -EINVAL;
return md_alloc(0, buf);
}
static void md_safemode_timeout(unsigned long data) static void md_safemode_timeout(unsigned long data)
{ {
mddev_t *mddev = (mddev_t *) data; mddev_t *mddev = (mddev_t *) data;
...@@ -3501,7 +3666,6 @@ static int do_md_run(mddev_t * mddev) ...@@ -3501,7 +3666,6 @@ static int do_md_run(mddev_t * mddev)
{ {
int err; int err;
int chunk_size; int chunk_size;
struct list_head *tmp;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct gendisk *disk; struct gendisk *disk;
struct mdk_personality *pers; struct mdk_personality *pers;
...@@ -3540,7 +3704,7 @@ static int do_md_run(mddev_t * mddev) ...@@ -3540,7 +3704,7 @@ static int do_md_run(mddev_t * mddev)
} }
/* devices must have minimum size of one chunk */ /* devices must have minimum size of one chunk */
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
if (test_bit(Faulty, &rdev->flags)) if (test_bit(Faulty, &rdev->flags))
continue; continue;
if (rdev->size < chunk_size / 1024) { if (rdev->size < chunk_size / 1024) {
...@@ -3565,7 +3729,7 @@ static int do_md_run(mddev_t * mddev) ...@@ -3565,7 +3729,7 @@ static int do_md_run(mddev_t * mddev)
* the only valid external interface is through the md * the only valid external interface is through the md
* device. * device.
*/ */
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
if (test_bit(Faulty, &rdev->flags)) if (test_bit(Faulty, &rdev->flags))
continue; continue;
sync_blockdev(rdev->bdev); sync_blockdev(rdev->bdev);
...@@ -3630,10 +3794,10 @@ static int do_md_run(mddev_t * mddev) ...@@ -3630,10 +3794,10 @@ static int do_md_run(mddev_t * mddev)
*/ */
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
mdk_rdev_t *rdev2; mdk_rdev_t *rdev2;
struct list_head *tmp2;
int warned = 0; int warned = 0;
rdev_for_each(rdev, tmp, mddev) {
rdev_for_each(rdev2, tmp2, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set)
list_for_each_entry(rdev2, &mddev->disks, same_set) {
if (rdev < rdev2 && if (rdev < rdev2 &&
rdev->bdev->bd_contains == rdev->bdev->bd_contains ==
rdev2->bdev->bd_contains) { rdev2->bdev->bd_contains) {
...@@ -3647,7 +3811,7 @@ static int do_md_run(mddev_t * mddev) ...@@ -3647,7 +3811,7 @@ static int do_md_run(mddev_t * mddev)
warned = 1; warned = 1;
} }
} }
}
if (warned) if (warned)
printk(KERN_WARNING printk(KERN_WARNING
"True protection against single-disk" "True protection against single-disk"
...@@ -3684,6 +3848,7 @@ static int do_md_run(mddev_t * mddev) ...@@ -3684,6 +3848,7 @@ static int do_md_run(mddev_t * mddev)
printk(KERN_WARNING printk(KERN_WARNING
"md: cannot register extra attributes for %s\n", "md: cannot register extra attributes for %s\n",
mdname(mddev)); mdname(mddev));
mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
} else if (mddev->ro == 2) /* auto-readonly not meaningful */ } else if (mddev->ro == 2) /* auto-readonly not meaningful */
mddev->ro = 0; mddev->ro = 0;
...@@ -3694,7 +3859,7 @@ static int do_md_run(mddev_t * mddev) ...@@ -3694,7 +3859,7 @@ static int do_md_run(mddev_t * mddev)
mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
mddev->in_sync = 1; mddev->in_sync = 1;
rdev_for_each(rdev, tmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) { if (rdev->raid_disk >= 0) {
char nm[20]; char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk); sprintf(nm, "rd%d", rdev->raid_disk);
...@@ -3725,9 +3890,8 @@ static int do_md_run(mddev_t * mddev) ...@@ -3725,9 +3890,8 @@ static int do_md_run(mddev_t * mddev)
* it will remove the drives and not do the right thing * it will remove the drives and not do the right thing
*/ */
if (mddev->degraded && !mddev->sync_thread) { if (mddev->degraded && !mddev->sync_thread) {
struct list_head *rtmp;
int spares = 0; int spares = 0;
rdev_for_each(rdev, rtmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags)) !test_bit(Faulty, &rdev->flags))
...@@ -3754,7 +3918,8 @@ static int do_md_run(mddev_t * mddev) ...@@ -3754,7 +3918,8 @@ static int do_md_run(mddev_t * mddev)
mddev->changed = 1; mddev->changed = 1;
md_new_event(mddev); md_new_event(mddev);
sysfs_notify_dirent(mddev->sysfs_state); sysfs_notify_dirent(mddev->sysfs_state);
sysfs_notify(&mddev->kobj, NULL, "sync_action"); if (mddev->sysfs_action)
sysfs_notify_dirent(mddev->sysfs_action);
sysfs_notify(&mddev->kobj, NULL, "degraded"); sysfs_notify(&mddev->kobj, NULL, "degraded");
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
return 0; return 0;
...@@ -3854,9 +4019,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -3854,9 +4019,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
mddev->queue->merge_bvec_fn = NULL; mddev->queue->merge_bvec_fn = NULL;
mddev->queue->unplug_fn = NULL; mddev->queue->unplug_fn = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL; mddev->queue->backing_dev_info.congested_fn = NULL;
if (mddev->pers->sync_request) if (mddev->pers->sync_request) {
sysfs_remove_group(&mddev->kobj, &md_redundancy_group); sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
if (mddev->sysfs_action)
sysfs_put(mddev->sysfs_action);
mddev->sysfs_action = NULL;
}
module_put(mddev->pers->owner); module_put(mddev->pers->owner);
mddev->pers = NULL; mddev->pers = NULL;
/* tell userspace to handle 'inactive' */ /* tell userspace to handle 'inactive' */
...@@ -3883,7 +4051,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -3883,7 +4051,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
*/ */
if (mode == 0) { if (mode == 0) {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
...@@ -3895,7 +4062,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -3895,7 +4062,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
} }
mddev->bitmap_offset = 0; mddev->bitmap_offset = 0;
rdev_for_each(rdev, tmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) { if (rdev->raid_disk >= 0) {
char nm[20]; char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk); sprintf(nm, "rd%d", rdev->raid_disk);
...@@ -3941,6 +4108,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -3941,6 +4108,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
mddev->barriers_work = 0; mddev->barriers_work = 0;
mddev->safemode = 0; mddev->safemode = 0;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
if (mddev->hold_active == UNTIL_STOP)
mddev->hold_active = 0;
} else if (mddev->pers) } else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n", printk(KERN_INFO "md: %s switched to read-only mode.\n",
...@@ -3956,7 +4125,6 @@ out: ...@@ -3956,7 +4125,6 @@ out:
static void autorun_array(mddev_t *mddev) static void autorun_array(mddev_t *mddev)
{ {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
int err; int err;
if (list_empty(&mddev->disks)) if (list_empty(&mddev->disks))
...@@ -3964,7 +4132,7 @@ static void autorun_array(mddev_t *mddev) ...@@ -3964,7 +4132,7 @@ static void autorun_array(mddev_t *mddev)
printk(KERN_INFO "md: running: "); printk(KERN_INFO "md: running: ");
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
printk("<%s>", bdevname(rdev->bdev,b)); printk("<%s>", bdevname(rdev->bdev,b));
} }
...@@ -3991,8 +4159,7 @@ static void autorun_array(mddev_t *mddev) ...@@ -3991,8 +4159,7 @@ static void autorun_array(mddev_t *mddev)
*/ */
static void autorun_devices(int part) static void autorun_devices(int part)
{ {
struct list_head *tmp; mdk_rdev_t *rdev0, *rdev, *tmp;
mdk_rdev_t *rdev0, *rdev;
mddev_t *mddev; mddev_t *mddev;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
...@@ -4007,7 +4174,7 @@ static void autorun_devices(int part) ...@@ -4007,7 +4174,7 @@ static void autorun_devices(int part)
printk(KERN_INFO "md: considering %s ...\n", printk(KERN_INFO "md: considering %s ...\n",
bdevname(rdev0->bdev,b)); bdevname(rdev0->bdev,b));
INIT_LIST_HEAD(&candidates); INIT_LIST_HEAD(&candidates);
rdev_for_each_list(rdev, tmp, pending_raid_disks) rdev_for_each_list(rdev, tmp, &pending_raid_disks)
if (super_90_load(rdev, rdev0, 0) >= 0) { if (super_90_load(rdev, rdev0, 0) >= 0) {
printk(KERN_INFO "md: adding %s ...\n", printk(KERN_INFO "md: adding %s ...\n",
bdevname(rdev->bdev,b)); bdevname(rdev->bdev,b));
...@@ -4053,7 +4220,7 @@ static void autorun_devices(int part) ...@@ -4053,7 +4220,7 @@ static void autorun_devices(int part)
} else { } else {
printk(KERN_INFO "md: created %s\n", mdname(mddev)); printk(KERN_INFO "md: created %s\n", mdname(mddev));
mddev->persistent = 1; mddev->persistent = 1;
rdev_for_each_list(rdev, tmp, candidates) { rdev_for_each_list(rdev, tmp, &candidates) {
list_del_init(&rdev->same_set); list_del_init(&rdev->same_set);
if (bind_rdev_to_array(rdev, mddev)) if (bind_rdev_to_array(rdev, mddev))
export_rdev(rdev); export_rdev(rdev);
...@@ -4064,7 +4231,7 @@ static void autorun_devices(int part) ...@@ -4064,7 +4231,7 @@ static void autorun_devices(int part)
/* on success, candidates will be empty, on error /* on success, candidates will be empty, on error
* it won't... * it won't...
*/ */
rdev_for_each_list(rdev, tmp, candidates) { rdev_for_each_list(rdev, tmp, &candidates) {
list_del_init(&rdev->same_set); list_del_init(&rdev->same_set);
export_rdev(rdev); export_rdev(rdev);
} }
...@@ -4093,10 +4260,9 @@ static int get_array_info(mddev_t * mddev, void __user * arg) ...@@ -4093,10 +4260,9 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
mdu_array_info_t info; mdu_array_info_t info;
int nr,working,active,failed,spare; int nr,working,active,failed,spare;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
nr=working=active=failed=spare=0; nr=working=active=failed=spare=0;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
nr++; nr++;
if (test_bit(Faulty, &rdev->flags)) if (test_bit(Faulty, &rdev->flags))
failed++; failed++;
...@@ -4614,9 +4780,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) ...@@ -4614,9 +4780,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
static int update_size(mddev_t *mddev, sector_t num_sectors) static int update_size(mddev_t *mddev, sector_t num_sectors)
{ {
mdk_rdev_t * rdev; mdk_rdev_t *rdev;
int rv; int rv;
struct list_head *tmp;
int fit = (num_sectors == 0); int fit = (num_sectors == 0);
if (mddev->pers->resize == NULL) if (mddev->pers->resize == NULL)
...@@ -4638,7 +4803,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) ...@@ -4638,7 +4803,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
* grow, and re-add. * grow, and re-add.
*/ */
return -EBUSY; return -EBUSY;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
sector_t avail; sector_t avail;
avail = rdev->size * 2; avail = rdev->size * 2;
...@@ -5000,6 +5165,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -5000,6 +5165,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
done_unlock: done_unlock:
abort_unlock: abort_unlock:
if (mddev->hold_active == UNTIL_IOCTL &&
err != -EINVAL)
mddev->hold_active = 0;
mddev_unlock(mddev); mddev_unlock(mddev);
return err; return err;
...@@ -5016,14 +5184,25 @@ static int md_open(struct block_device *bdev, fmode_t mode) ...@@ -5016,14 +5184,25 @@ static int md_open(struct block_device *bdev, fmode_t mode)
* Succeed if we can lock the mddev, which confirms that * Succeed if we can lock the mddev, which confirms that
* it isn't being stopped right now. * it isn't being stopped right now.
*/ */
mddev_t *mddev = bdev->bd_disk->private_data; mddev_t *mddev = mddev_find(bdev->bd_dev);
int err; int err;
if (mddev->gendisk != bdev->bd_disk) {
/* we are racing with mddev_put which is discarding this
* bd_disk.
*/
mddev_put(mddev);
/* Wait until bdev->bd_disk is definitely gone */
flush_scheduled_work();
/* Then retry the open from the top */
return -ERESTARTSYS;
}
BUG_ON(mddev != bdev->bd_disk->private_data);
if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
goto out; goto out;
err = 0; err = 0;
mddev_get(mddev);
atomic_inc(&mddev->openers); atomic_inc(&mddev->openers);
mddev_unlock(mddev); mddev_unlock(mddev);
...@@ -5187,11 +5366,10 @@ static void status_unused(struct seq_file *seq) ...@@ -5187,11 +5366,10 @@ static void status_unused(struct seq_file *seq)
{ {
int i = 0; int i = 0;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
seq_printf(seq, "unused devices: "); seq_printf(seq, "unused devices: ");
rdev_for_each_list(rdev, tmp, pending_raid_disks) { list_for_each_entry(rdev, &pending_raid_disks, same_set) {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
i++; i++;
seq_printf(seq, "%s ", seq_printf(seq, "%s ",
...@@ -5350,7 +5528,6 @@ static int md_seq_show(struct seq_file *seq, void *v) ...@@ -5350,7 +5528,6 @@ static int md_seq_show(struct seq_file *seq, void *v)
{ {
mddev_t *mddev = v; mddev_t *mddev = v;
sector_t size; sector_t size;
struct list_head *tmp2;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct mdstat_info *mi = seq->private; struct mdstat_info *mi = seq->private;
struct bitmap *bitmap; struct bitmap *bitmap;
...@@ -5387,7 +5564,7 @@ static int md_seq_show(struct seq_file *seq, void *v) ...@@ -5387,7 +5564,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
} }
size = 0; size = 0;
rdev_for_each(rdev, tmp2, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
seq_printf(seq, " %s[%d]", seq_printf(seq, " %s[%d]",
bdevname(rdev->bdev,b), rdev->desc_nr); bdevname(rdev->bdev,b), rdev->desc_nr);
...@@ -5694,7 +5871,6 @@ void md_do_sync(mddev_t *mddev) ...@@ -5694,7 +5871,6 @@ void md_do_sync(mddev_t *mddev)
struct list_head *tmp; struct list_head *tmp;
sector_t last_check; sector_t last_check;
int skipped = 0; int skipped = 0;
struct list_head *rtmp;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
char *desc; char *desc;
...@@ -5799,7 +5975,7 @@ void md_do_sync(mddev_t *mddev) ...@@ -5799,7 +5975,7 @@ void md_do_sync(mddev_t *mddev)
/* recovery follows the physical size of devices */ /* recovery follows the physical size of devices */
max_sectors = mddev->size << 1; max_sectors = mddev->size << 1;
j = MaxSector; j = MaxSector;
rdev_for_each(rdev, rtmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) && !test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags) &&
...@@ -5949,7 +6125,7 @@ void md_do_sync(mddev_t *mddev) ...@@ -5949,7 +6125,7 @@ void md_do_sync(mddev_t *mddev)
} else { } else {
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
mddev->curr_resync = MaxSector; mddev->curr_resync = MaxSector;
rdev_for_each(rdev, rtmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) && !test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags) &&
...@@ -5985,10 +6161,9 @@ EXPORT_SYMBOL_GPL(md_do_sync); ...@@ -5985,10 +6161,9 @@ EXPORT_SYMBOL_GPL(md_do_sync);
static int remove_and_add_spares(mddev_t *mddev) static int remove_and_add_spares(mddev_t *mddev)
{ {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *rtmp;
int spares = 0; int spares = 0;
rdev_for_each(rdev, rtmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) && !test_bit(Blocked, &rdev->flags) &&
(test_bit(Faulty, &rdev->flags) || (test_bit(Faulty, &rdev->flags) ||
...@@ -6003,8 +6178,8 @@ static int remove_and_add_spares(mddev_t *mddev) ...@@ -6003,8 +6178,8 @@ static int remove_and_add_spares(mddev_t *mddev)
} }
} }
if (mddev->degraded && ! mddev->ro) { if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
rdev_for_each(rdev, rtmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags) &&
!test_bit(Blocked, &rdev->flags)) !test_bit(Blocked, &rdev->flags))
...@@ -6056,7 +6231,6 @@ static int remove_and_add_spares(mddev_t *mddev) ...@@ -6056,7 +6231,6 @@ static int remove_and_add_spares(mddev_t *mddev)
void md_check_recovery(mddev_t *mddev) void md_check_recovery(mddev_t *mddev)
{ {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *rtmp;
if (mddev->bitmap) if (mddev->bitmap)
...@@ -6120,7 +6294,7 @@ void md_check_recovery(mddev_t *mddev) ...@@ -6120,7 +6294,7 @@ void md_check_recovery(mddev_t *mddev)
if (mddev->flags) if (mddev->flags)
md_update_sb(mddev, 0); md_update_sb(mddev, 0);
rdev_for_each(rdev, rtmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (test_and_clear_bit(StateChanged, &rdev->flags)) if (test_and_clear_bit(StateChanged, &rdev->flags))
sysfs_notify_dirent(rdev->sysfs_state); sysfs_notify_dirent(rdev->sysfs_state);
...@@ -6149,13 +6323,13 @@ void md_check_recovery(mddev_t *mddev) ...@@ -6149,13 +6323,13 @@ void md_check_recovery(mddev_t *mddev)
* information must be scrapped * information must be scrapped
*/ */
if (!mddev->degraded) if (!mddev->degraded)
rdev_for_each(rdev, rtmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
rdev->saved_raid_disk = -1; rdev->saved_raid_disk = -1;
mddev->recovery = 0; mddev->recovery = 0;
/* flag recovery needed just to double check */ /* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify_dirent(mddev->sysfs_action);
md_new_event(mddev); md_new_event(mddev);
goto unlock; goto unlock;
} }
...@@ -6216,7 +6390,7 @@ void md_check_recovery(mddev_t *mddev) ...@@ -6216,7 +6390,7 @@ void md_check_recovery(mddev_t *mddev)
mddev->recovery = 0; mddev->recovery = 0;
} else } else
md_wakeup_thread(mddev->sync_thread); md_wakeup_thread(mddev->sync_thread);
sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify_dirent(mddev->sysfs_action);
md_new_event(mddev); md_new_event(mddev);
} }
unlock: unlock:
...@@ -6224,7 +6398,8 @@ void md_check_recovery(mddev_t *mddev) ...@@ -6224,7 +6398,8 @@ void md_check_recovery(mddev_t *mddev)
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
if (test_and_clear_bit(MD_RECOVERY_RECOVER, if (test_and_clear_bit(MD_RECOVERY_RECOVER,
&mddev->recovery)) &mddev->recovery))
sysfs_notify(&mddev->kobj, NULL, "sync_action"); if (mddev->sysfs_action)
sysfs_notify_dirent(mddev->sysfs_action);
} }
mddev_unlock(mddev); mddev_unlock(mddev);
} }
...@@ -6386,14 +6561,8 @@ static __exit void md_exit(void) ...@@ -6386,14 +6561,8 @@ static __exit void md_exit(void)
unregister_sysctl_table(raid_table_header); unregister_sysctl_table(raid_table_header);
remove_proc_entry("mdstat", NULL); remove_proc_entry("mdstat", NULL);
for_each_mddev(mddev, tmp) { for_each_mddev(mddev, tmp) {
struct gendisk *disk = mddev->gendisk;
if (!disk)
continue;
export_array(mddev); export_array(mddev);
del_gendisk(disk); mddev->hold_active = 0;
put_disk(disk);
mddev->gendisk = NULL;
mddev_put(mddev);
} }
} }
...@@ -6418,6 +6587,7 @@ static int set_ro(const char *val, struct kernel_param *kp) ...@@ -6418,6 +6587,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
EXPORT_SYMBOL(register_md_personality); EXPORT_SYMBOL(register_md_personality);
EXPORT_SYMBOL(unregister_md_personality); EXPORT_SYMBOL(unregister_md_personality);
......
...@@ -408,7 +408,6 @@ static int multipath_run (mddev_t *mddev) ...@@ -408,7 +408,6 @@ static int multipath_run (mddev_t *mddev)
int disk_idx; int disk_idx;
struct multipath_info *disk; struct multipath_info *disk;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
if (mddev->level != LEVEL_MULTIPATH) { if (mddev->level != LEVEL_MULTIPATH) {
printk("multipath: %s: raid level not set to multipath IO (%d)\n", printk("multipath: %s: raid level not set to multipath IO (%d)\n",
...@@ -441,7 +440,7 @@ static int multipath_run (mddev_t *mddev) ...@@ -441,7 +440,7 @@ static int multipath_run (mddev_t *mddev)
} }
conf->working_disks = 0; conf->working_disks = 0;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_idx = rdev->raid_disk; disk_idx = rdev->raid_disk;
if (disk_idx < 0 || if (disk_idx < 0 ||
disk_idx >= mddev->raid_disks) disk_idx >= mddev->raid_disks)
......
...@@ -53,11 +53,10 @@ static int raid0_congested(void *data, int bits) ...@@ -53,11 +53,10 @@ static int raid0_congested(void *data, int bits)
static int create_strip_zones (mddev_t *mddev) static int create_strip_zones (mddev_t *mddev)
{ {
int i, c, j; int i, c, j;
sector_t current_offset, curr_zone_offset; sector_t current_start, curr_zone_start;
sector_t min_spacing; sector_t min_spacing;
raid0_conf_t *conf = mddev_to_conf(mddev); raid0_conf_t *conf = mddev_to_conf(mddev);
mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev;
struct list_head *tmp1, *tmp2;
struct strip_zone *zone; struct strip_zone *zone;
int cnt; int cnt;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
...@@ -67,19 +66,19 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -67,19 +66,19 @@ static int create_strip_zones (mddev_t *mddev)
*/ */
conf->nr_strip_zones = 0; conf->nr_strip_zones = 0;
rdev_for_each(rdev1, tmp1, mddev) { list_for_each_entry(rdev1, &mddev->disks, same_set) {
printk("raid0: looking at %s\n", printk(KERN_INFO "raid0: looking at %s\n",
bdevname(rdev1->bdev,b)); bdevname(rdev1->bdev,b));
c = 0; c = 0;
rdev_for_each(rdev2, tmp2, mddev) { list_for_each_entry(rdev2, &mddev->disks, same_set) {
printk("raid0: comparing %s(%llu)", printk(KERN_INFO "raid0: comparing %s(%llu)",
bdevname(rdev1->bdev,b), bdevname(rdev1->bdev,b),
(unsigned long long)rdev1->size); (unsigned long long)rdev1->size);
printk(" with %s(%llu)\n", printk(KERN_INFO " with %s(%llu)\n",
bdevname(rdev2->bdev,b), bdevname(rdev2->bdev,b),
(unsigned long long)rdev2->size); (unsigned long long)rdev2->size);
if (rdev2 == rdev1) { if (rdev2 == rdev1) {
printk("raid0: END\n"); printk(KERN_INFO "raid0: END\n");
break; break;
} }
if (rdev2->size == rdev1->size) if (rdev2->size == rdev1->size)
...@@ -88,19 +87,20 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -88,19 +87,20 @@ static int create_strip_zones (mddev_t *mddev)
* Not unique, don't count it as a new * Not unique, don't count it as a new
* group * group
*/ */
printk("raid0: EQUAL\n"); printk(KERN_INFO "raid0: EQUAL\n");
c = 1; c = 1;
break; break;
} }
printk("raid0: NOT EQUAL\n"); printk(KERN_INFO "raid0: NOT EQUAL\n");
} }
if (!c) { if (!c) {
printk("raid0: ==> UNIQUE\n"); printk(KERN_INFO "raid0: ==> UNIQUE\n");
conf->nr_strip_zones++; conf->nr_strip_zones++;
printk("raid0: %d zones\n", conf->nr_strip_zones); printk(KERN_INFO "raid0: %d zones\n",
conf->nr_strip_zones);
} }
} }
printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones);
conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
conf->nr_strip_zones, GFP_KERNEL); conf->nr_strip_zones, GFP_KERNEL);
...@@ -119,16 +119,17 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -119,16 +119,17 @@ static int create_strip_zones (mddev_t *mddev)
cnt = 0; cnt = 0;
smallest = NULL; smallest = NULL;
zone->dev = conf->devlist; zone->dev = conf->devlist;
rdev_for_each(rdev1, tmp1, mddev) { list_for_each_entry(rdev1, &mddev->disks, same_set) {
int j = rdev1->raid_disk; int j = rdev1->raid_disk;
if (j < 0 || j >= mddev->raid_disks) { if (j < 0 || j >= mddev->raid_disks) {
printk("raid0: bad disk number %d - aborting!\n", j); printk(KERN_ERR "raid0: bad disk number %d - "
"aborting!\n", j);
goto abort; goto abort;
} }
if (zone->dev[j]) { if (zone->dev[j]) {
printk("raid0: multiple devices for %d - aborting!\n", printk(KERN_ERR "raid0: multiple devices for %d - "
j); "aborting!\n", j);
goto abort; goto abort;
} }
zone->dev[j] = rdev1; zone->dev[j] = rdev1;
...@@ -149,16 +150,16 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -149,16 +150,16 @@ static int create_strip_zones (mddev_t *mddev)
cnt++; cnt++;
} }
if (cnt != mddev->raid_disks) { if (cnt != mddev->raid_disks) {
printk("raid0: too few disks (%d of %d) - aborting!\n", printk(KERN_ERR "raid0: too few disks (%d of %d) - "
cnt, mddev->raid_disks); "aborting!\n", cnt, mddev->raid_disks);
goto abort; goto abort;
} }
zone->nb_dev = cnt; zone->nb_dev = cnt;
zone->size = smallest->size * cnt; zone->sectors = smallest->size * cnt * 2;
zone->zone_offset = 0; zone->zone_start = 0;
current_offset = smallest->size; current_start = smallest->size * 2;
curr_zone_offset = zone->size; curr_zone_start = zone->sectors;
/* now do the other zones */ /* now do the other zones */
for (i = 1; i < conf->nr_strip_zones; i++) for (i = 1; i < conf->nr_strip_zones; i++)
...@@ -166,40 +167,41 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -166,40 +167,41 @@ static int create_strip_zones (mddev_t *mddev)
zone = conf->strip_zone + i; zone = conf->strip_zone + i;
zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks;
printk("raid0: zone %d\n", i); printk(KERN_INFO "raid0: zone %d\n", i);
zone->dev_offset = current_offset; zone->dev_start = current_start;
smallest = NULL; smallest = NULL;
c = 0; c = 0;
for (j=0; j<cnt; j++) { for (j=0; j<cnt; j++) {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
rdev = conf->strip_zone[0].dev[j]; rdev = conf->strip_zone[0].dev[j];
printk("raid0: checking %s ...", bdevname(rdev->bdev,b)); printk(KERN_INFO "raid0: checking %s ...",
if (rdev->size > current_offset) bdevname(rdev->bdev, b));
{ if (rdev->size > current_start / 2) {
printk(" contained as device %d\n", c); printk(KERN_INFO " contained as device %d\n",
c);
zone->dev[c] = rdev; zone->dev[c] = rdev;
c++; c++;
if (!smallest || (rdev->size <smallest->size)) { if (!smallest || (rdev->size <smallest->size)) {
smallest = rdev; smallest = rdev;
printk(" (%llu) is smallest!.\n", printk(KERN_INFO " (%llu) is smallest!.\n",
(unsigned long long)rdev->size); (unsigned long long)rdev->size);
} }
} else } else
printk(" nope.\n"); printk(KERN_INFO " nope.\n");
} }
zone->nb_dev = c; zone->nb_dev = c;
zone->size = (smallest->size - current_offset) * c; zone->sectors = (smallest->size * 2 - current_start) * c;
printk("raid0: zone->nb_dev: %d, size: %llu\n", printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
zone->nb_dev, (unsigned long long)zone->size); zone->nb_dev, (unsigned long long)zone->sectors);
zone->zone_offset = curr_zone_offset; zone->zone_start = curr_zone_start;
curr_zone_offset += zone->size; curr_zone_start += zone->sectors;
current_offset = smallest->size; current_start = smallest->size * 2;
printk("raid0: current zone offset: %llu\n", printk(KERN_INFO "raid0: current zone start: %llu\n",
(unsigned long long)current_offset); (unsigned long long)current_start);
} }
/* Now find appropriate hash spacing. /* Now find appropriate hash spacing.
...@@ -210,16 +212,16 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -210,16 +212,16 @@ static int create_strip_zones (mddev_t *mddev)
* strip though as it's size has no bearing on the efficacy of the hash * strip though as it's size has no bearing on the efficacy of the hash
* table. * table.
*/ */
conf->hash_spacing = curr_zone_offset; conf->spacing = curr_zone_start;
min_spacing = curr_zone_offset; min_spacing = curr_zone_start;
sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*));
for (i=0; i < conf->nr_strip_zones-1; i++) { for (i=0; i < conf->nr_strip_zones-1; i++) {
sector_t sz = 0; sector_t s = 0;
for (j=i; j<conf->nr_strip_zones-1 && for (j = i; j < conf->nr_strip_zones - 1 &&
sz < min_spacing ; j++) s < min_spacing; j++)
sz += conf->strip_zone[j].size; s += conf->strip_zone[j].sectors;
if (sz >= min_spacing && sz < conf->hash_spacing) if (s >= min_spacing && s < conf->spacing)
conf->hash_spacing = sz; conf->spacing = s;
} }
mddev->queue->unplug_fn = raid0_unplug; mddev->queue->unplug_fn = raid0_unplug;
...@@ -227,7 +229,7 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -227,7 +229,7 @@ static int create_strip_zones (mddev_t *mddev)
mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_fn = raid0_congested;
mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_data = mddev;
printk("raid0: done.\n"); printk(KERN_INFO "raid0: done.\n");
return 0; return 0;
abort: abort:
return 1; return 1;
...@@ -262,10 +264,9 @@ static int raid0_mergeable_bvec(struct request_queue *q, ...@@ -262,10 +264,9 @@ static int raid0_mergeable_bvec(struct request_queue *q,
static int raid0_run (mddev_t *mddev) static int raid0_run (mddev_t *mddev)
{ {
unsigned cur=0, i=0, nb_zone; unsigned cur=0, i=0, nb_zone;
s64 size; s64 sectors;
raid0_conf_t *conf; raid0_conf_t *conf;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
if (mddev->chunk_size == 0) { if (mddev->chunk_size == 0) {
printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
...@@ -291,54 +292,54 @@ static int raid0_run (mddev_t *mddev) ...@@ -291,54 +292,54 @@ static int raid0_run (mddev_t *mddev)
/* calculate array device size */ /* calculate array device size */
mddev->array_sectors = 0; mddev->array_sectors = 0;
rdev_for_each(rdev, tmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
mddev->array_sectors += rdev->size * 2; mddev->array_sectors += rdev->size * 2;
printk("raid0 : md_size is %llu blocks.\n", printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
(unsigned long long)mddev->array_sectors / 2); (unsigned long long)mddev->array_sectors);
printk("raid0 : conf->hash_spacing is %llu blocks.\n", printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
(unsigned long long)conf->hash_spacing); (unsigned long long)conf->spacing);
{ {
sector_t s = mddev->array_sectors / 2; sector_t s = mddev->array_sectors;
sector_t space = conf->hash_spacing; sector_t space = conf->spacing;
int round; int round;
conf->preshift = 0; conf->sector_shift = 0;
if (sizeof(sector_t) > sizeof(u32)) { if (sizeof(sector_t) > sizeof(u32)) {
/*shift down space and s so that sector_div will work */ /*shift down space and s so that sector_div will work */
while (space > (sector_t) (~(u32)0)) { while (space > (sector_t) (~(u32)0)) {
s >>= 1; s >>= 1;
space >>= 1; space >>= 1;
s += 1; /* force round-up */ s += 1; /* force round-up */
conf->preshift++; conf->sector_shift++;
} }
} }
round = sector_div(s, (u32)space) ? 1 : 0; round = sector_div(s, (u32)space) ? 1 : 0;
nb_zone = s + round; nb_zone = s + round;
} }
printk("raid0 : nb_zone is %d.\n", nb_zone); printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone);
printk("raid0 : Allocating %Zd bytes for hash.\n", printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n",
nb_zone*sizeof(struct strip_zone*)); nb_zone*sizeof(struct strip_zone*));
conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL);
if (!conf->hash_table) if (!conf->hash_table)
goto out_free_conf; goto out_free_conf;
size = conf->strip_zone[cur].size; sectors = conf->strip_zone[cur].sectors;
conf->hash_table[0] = conf->strip_zone + cur; conf->hash_table[0] = conf->strip_zone + cur;
for (i=1; i< nb_zone; i++) { for (i=1; i< nb_zone; i++) {
while (size <= conf->hash_spacing) { while (sectors <= conf->spacing) {
cur++; cur++;
size += conf->strip_zone[cur].size; sectors += conf->strip_zone[cur].sectors;
} }
size -= conf->hash_spacing; sectors -= conf->spacing;
conf->hash_table[i] = conf->strip_zone + cur; conf->hash_table[i] = conf->strip_zone + cur;
} }
if (conf->preshift) { if (conf->sector_shift) {
conf->hash_spacing >>= conf->preshift; conf->spacing >>= conf->sector_shift;
/* round hash_spacing up so when we divide by it, we /* round spacing up so when we divide by it, we
* err on the side of too-low, which is safest * err on the side of too-low, which is safest
*/ */
conf->hash_spacing++; conf->spacing++;
} }
/* calculate the max read-ahead size. /* calculate the max read-ahead size.
...@@ -387,12 +388,12 @@ static int raid0_stop (mddev_t *mddev) ...@@ -387,12 +388,12 @@ static int raid0_stop (mddev_t *mddev)
static int raid0_make_request (struct request_queue *q, struct bio *bio) static int raid0_make_request (struct request_queue *q, struct bio *bio)
{ {
mddev_t *mddev = q->queuedata; mddev_t *mddev = q->queuedata;
unsigned int sect_in_chunk, chunksize_bits, chunk_size, chunk_sects; unsigned int sect_in_chunk, chunksect_bits, chunk_sects;
raid0_conf_t *conf = mddev_to_conf(mddev); raid0_conf_t *conf = mddev_to_conf(mddev);
struct strip_zone *zone; struct strip_zone *zone;
mdk_rdev_t *tmp_dev; mdk_rdev_t *tmp_dev;
sector_t chunk; sector_t chunk;
sector_t block, rsect; sector_t sector, rsect;
const int rw = bio_data_dir(bio); const int rw = bio_data_dir(bio);
int cpu; int cpu;
...@@ -407,11 +408,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) ...@@ -407,11 +408,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
bio_sectors(bio)); bio_sectors(bio));
part_stat_unlock(); part_stat_unlock();
chunk_size = mddev->chunk_size >> 10;
chunk_sects = mddev->chunk_size >> 9; chunk_sects = mddev->chunk_size >> 9;
chunksize_bits = ffz(~chunk_size); chunksect_bits = ffz(~chunk_sects);
block = bio->bi_sector >> 1; sector = bio->bi_sector;
if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
struct bio_pair *bp; struct bio_pair *bp;
...@@ -434,28 +433,27 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) ...@@ -434,28 +433,27 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
{ {
sector_t x = block >> conf->preshift; sector_t x = sector >> conf->sector_shift;
sector_div(x, (u32)conf->hash_spacing); sector_div(x, (u32)conf->spacing);
zone = conf->hash_table[x]; zone = conf->hash_table[x];
} }
while (block >= (zone->zone_offset + zone->size)) while (sector >= zone->zone_start + zone->sectors)
zone++; zone++;
sect_in_chunk = bio->bi_sector & ((chunk_size<<1) -1); sect_in_chunk = bio->bi_sector & (chunk_sects - 1);
{ {
sector_t x = (block - zone->zone_offset) >> chunksize_bits; sector_t x = (sector - zone->zone_start) >> chunksect_bits;
sector_div(x, zone->nb_dev); sector_div(x, zone->nb_dev);
chunk = x; chunk = x;
x = block >> chunksize_bits; x = sector >> chunksect_bits;
tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; tmp_dev = zone->dev[sector_div(x, zone->nb_dev)];
} }
rsect = (((chunk << chunksize_bits) + zone->dev_offset)<<1) rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk;
+ sect_in_chunk;
bio->bi_bdev = tmp_dev->bdev; bio->bi_bdev = tmp_dev->bdev;
bio->bi_sector = rsect + tmp_dev->data_offset; bio->bi_sector = rsect + tmp_dev->data_offset;
...@@ -467,7 +465,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) ...@@ -467,7 +465,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
bad_map: bad_map:
printk("raid0_make_request bug: can't convert block across chunks" printk("raid0_make_request bug: can't convert block across chunks"
" or bigger than %dk %llu %d\n", chunk_size, " or bigger than %dk %llu %d\n", chunk_sects / 2,
(unsigned long long)bio->bi_sector, bio->bi_size >> 10); (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
bio_io_error(bio); bio_io_error(bio);
...@@ -492,10 +490,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) ...@@ -492,10 +490,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev)
seq_printf(seq, "%s/", bdevname( seq_printf(seq, "%s/", bdevname(
conf->strip_zone[j].dev[k]->bdev,b)); conf->strip_zone[j].dev[k]->bdev,b));
seq_printf(seq, "] zo=%d do=%d s=%d\n", seq_printf(seq, "] zs=%d ds=%d s=%d\n",
conf->strip_zone[j].zone_offset, conf->strip_zone[j].zone_start,
conf->strip_zone[j].dev_offset, conf->strip_zone[j].dev_start,
conf->strip_zone[j].size); conf->strip_zone[j].sectors);
} }
#endif #endif
seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); seq_printf(seq, " %dk chunks", mddev->chunk_size/1024);
......
...@@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
* else mark the drive as failed * else mark the drive as failed
*/ */
if (test_bit(In_sync, &rdev->flags) if (test_bit(In_sync, &rdev->flags)
&& (conf->raid_disks - mddev->degraded) == 1) && (conf->raid_disks - mddev->degraded) == 1) {
/* /*
* Don't fail the drive, act as though we were just a * Don't fail the drive, act as though we were just a
* normal single drive * normal single drive.
* However don't try a recovery from this drive as
* it is very likely to fail.
*/ */
mddev->recovery_disabled = 1;
return; return;
}
if (test_and_clear_bit(In_sync, &rdev->flags)) { if (test_and_clear_bit(In_sync, &rdev->flags)) {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
...@@ -1919,7 +1923,6 @@ static int run(mddev_t *mddev) ...@@ -1919,7 +1923,6 @@ static int run(mddev_t *mddev)
int i, j, disk_idx; int i, j, disk_idx;
mirror_info_t *disk; mirror_info_t *disk;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
if (mddev->level != 1) { if (mddev->level != 1) {
printk("raid1: %s: raid level not set to mirroring (%d)\n", printk("raid1: %s: raid level not set to mirroring (%d)\n",
...@@ -1964,7 +1967,7 @@ static int run(mddev_t *mddev) ...@@ -1964,7 +1967,7 @@ static int run(mddev_t *mddev)
spin_lock_init(&conf->device_lock); spin_lock_init(&conf->device_lock);
mddev->queue->queue_lock = &conf->device_lock; mddev->queue->queue_lock = &conf->device_lock;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_idx = rdev->raid_disk; disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks if (disk_idx >= mddev->raid_disks
|| disk_idx < 0) || disk_idx < 0)
......
...@@ -2025,7 +2025,6 @@ static int run(mddev_t *mddev) ...@@ -2025,7 +2025,6 @@ static int run(mddev_t *mddev)
int i, disk_idx; int i, disk_idx;
mirror_info_t *disk; mirror_info_t *disk;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp;
int nc, fc, fo; int nc, fc, fo;
sector_t stride, size; sector_t stride, size;
...@@ -2108,7 +2107,7 @@ static int run(mddev_t *mddev) ...@@ -2108,7 +2107,7 @@ static int run(mddev_t *mddev)
spin_lock_init(&conf->device_lock); spin_lock_init(&conf->device_lock);
mddev->queue->queue_lock = &conf->device_lock; mddev->queue->queue_lock = &conf->device_lock;
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_idx = rdev->raid_disk; disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks if (disk_idx >= mddev->raid_disks
|| disk_idx < 0) || disk_idx < 0)
......
...@@ -3998,7 +3998,6 @@ static int run(mddev_t *mddev) ...@@ -3998,7 +3998,6 @@ static int run(mddev_t *mddev)
int raid_disk, memory; int raid_disk, memory;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct disk_info *disk; struct disk_info *disk;
struct list_head *tmp;
int working_disks = 0; int working_disks = 0;
if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
...@@ -4108,7 +4107,7 @@ static int run(mddev_t *mddev) ...@@ -4108,7 +4107,7 @@ static int run(mddev_t *mddev)
pr_debug("raid5: run(%s) called.\n", mdname(mddev)); pr_debug("raid5: run(%s) called.\n", mdname(mddev));
rdev_for_each(rdev, tmp, mddev) { list_for_each_entry(rdev, &mddev->disks, same_set) {
raid_disk = rdev->raid_disk; raid_disk = rdev->raid_disk;
if (raid_disk >= conf->raid_disks if (raid_disk >= conf->raid_disks
|| raid_disk < 0) || raid_disk < 0)
...@@ -4533,7 +4532,6 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -4533,7 +4532,6 @@ static int raid5_start_reshape(mddev_t *mddev)
{ {
raid5_conf_t *conf = mddev_to_conf(mddev); raid5_conf_t *conf = mddev_to_conf(mddev);
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *rtmp;
int spares = 0; int spares = 0;
int added_devices = 0; int added_devices = 0;
unsigned long flags; unsigned long flags;
...@@ -4541,7 +4539,7 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -4541,7 +4539,7 @@ static int raid5_start_reshape(mddev_t *mddev)
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY; return -EBUSY;
rdev_for_each(rdev, rtmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk < 0 && if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) !test_bit(Faulty, &rdev->flags))
spares++; spares++;
...@@ -4563,7 +4561,7 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -4563,7 +4561,7 @@ static int raid5_start_reshape(mddev_t *mddev)
/* Add some new drives, as many as will fit. /* Add some new drives, as many as will fit.
* We know there are enough to make the newly sized array work. * We know there are enough to make the newly sized array work.
*/ */
rdev_for_each(rdev, rtmp, mddev) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk < 0 && if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
if (raid5_add_disk(mddev, rdev) == 0) { if (raid5_add_disk(mddev, rdev) == 0) {
......
...@@ -1005,6 +1005,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) ...@@ -1005,6 +1005,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
} }
lock_kernel(); lock_kernel();
restart:
ret = -ENXIO; ret = -ENXIO;
disk = get_gendisk(bdev->bd_dev, &partno); disk = get_gendisk(bdev->bd_dev, &partno);
...@@ -1025,6 +1026,19 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) ...@@ -1025,6 +1026,19 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (disk->fops->open) { if (disk->fops->open) {
ret = disk->fops->open(bdev, mode); ret = disk->fops->open(bdev, mode);
if (ret == -ERESTARTSYS) {
/* Lost a race with 'disk' being
* deleted, try again.
* See md.c
*/
disk_put_part(bdev->bd_part);
bdev->bd_part = NULL;
module_put(disk->fops->owner);
put_disk(disk);
bdev->bd_disk = NULL;
mutex_unlock(&bdev->bd_mutex);
goto restart;
}
if (ret) if (ret)
goto out_clear; goto out_clear;
} }
......
...@@ -137,6 +137,9 @@ struct mddev_s ...@@ -137,6 +137,9 @@ struct mddev_s
struct gendisk *gendisk; struct gendisk *gendisk;
struct kobject kobj; struct kobject kobj;
int hold_active;
#define UNTIL_IOCTL 1
#define UNTIL_STOP 2
/* Superblock information */ /* Superblock information */
int major_version, int major_version,
...@@ -215,6 +218,9 @@ struct mddev_s ...@@ -215,6 +218,9 @@ struct mddev_s
#define MD_RECOVERY_FROZEN 9 #define MD_RECOVERY_FROZEN 9
unsigned long recovery; unsigned long recovery;
int recovery_disabled; /* if we detect that recovery
* will always fail, set this
* so we don't loop trying */
int in_sync; /* know to not need resync */ int in_sync; /* know to not need resync */
struct mutex reconfig_mutex; struct mutex reconfig_mutex;
...@@ -244,6 +250,9 @@ struct mddev_s ...@@ -244,6 +250,9 @@ struct mddev_s
struct sysfs_dirent *sysfs_state; /* handle for 'array_state' struct sysfs_dirent *sysfs_state; /* handle for 'array_state'
* file in sysfs. * file in sysfs.
*/ */
struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */
struct work_struct del_work; /* used for delayed sysfs removal */
spinlock_t write_lock; spinlock_t write_lock;
wait_queue_head_t sb_wait; /* for waiting on superblock updates */ wait_queue_head_t sb_wait; /* for waiting on superblock updates */
...@@ -334,17 +343,14 @@ static inline char * mdname (mddev_t * mddev) ...@@ -334,17 +343,14 @@ static inline char * mdname (mddev_t * mddev)
* iterates through some rdev ringlist. It's safe to remove the * iterates through some rdev ringlist. It's safe to remove the
* current 'rdev'. Dont touch 'tmp' though. * current 'rdev'. Dont touch 'tmp' though.
*/ */
#define rdev_for_each_list(rdev, tmp, list) \ #define rdev_for_each_list(rdev, tmp, head) \
\ list_for_each_entry_safe(rdev, tmp, head, same_set)
for ((tmp) = (list).next; \
(rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \
(tmp) = (tmp)->next, (tmp)->prev != &(list) \
; )
/* /*
* iterates through the 'same array disks' ringlist * iterates through the 'same array disks' ringlist
*/ */
#define rdev_for_each(rdev, tmp, mddev) \ #define rdev_for_each(rdev, tmp, mddev) \
rdev_for_each_list(rdev, tmp, (mddev)->disks) list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
#define rdev_for_each_rcu(rdev, mddev) \ #define rdev_for_each_rcu(rdev, mddev) \
list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
......
...@@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) { ...@@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) {
return (ev<<32)| sb->events_lo; return (ev<<32)| sb->events_lo;
} }
#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1)
/* /*
* The version-1 superblock : * The version-1 superblock :
* All numeric fields are little-endian. * All numeric fields are little-endian.
......
...@@ -5,9 +5,9 @@ ...@@ -5,9 +5,9 @@
struct strip_zone struct strip_zone
{ {
sector_t zone_offset; /* Zone offset in md_dev */ sector_t zone_start; /* Zone offset in md_dev (in sectors) */
sector_t dev_offset; /* Zone offset in real dev */ sector_t dev_start; /* Zone offset in real dev (in sectors) */
sector_t size; /* Zone size */ sector_t sectors; /* Zone size in sectors */
int nb_dev; /* # of devices attached to the zone */ int nb_dev; /* # of devices attached to the zone */
mdk_rdev_t **dev; /* Devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */
}; };
...@@ -19,8 +19,8 @@ struct raid0_private_data ...@@ -19,8 +19,8 @@ struct raid0_private_data
mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
int nr_strip_zones; int nr_strip_zones;
sector_t hash_spacing; sector_t spacing;
int preshift; /* shift this before divide by hash_spacing */ int sector_shift; /* shift this before divide by spacing */
}; };
typedef struct raid0_private_data raid0_conf_t; typedef struct raid0_private_data raid0_conf_t;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment