Commit 618f0636 authored by Kirill Korotaev's avatar Kirill Korotaev Committed by Linus Torvalds

[PATCH] O(1) sb list traversing on syncs

This patch removes O(n^2) super block loops in sync_inodes(),
sync_filesystems() etc.  in favour of using __put_super_and_need_restart()
which I introduced earlier.  We faced a noticably long freezes on sb
syncing when there are thousands of super blocks in the system.
Signed-Off-By: default avatarKirill Korotaev <dev@sw.ru>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4fea2838
...@@ -485,32 +485,6 @@ static void set_sb_syncing(int val) ...@@ -485,32 +485,6 @@ static void set_sb_syncing(int val)
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
} }
/*
* Find a superblock with inodes that need to be synced
*/
static struct super_block *get_super_to_sync(void)
{
struct super_block *sb;
restart:
spin_lock(&sb_lock);
sb = sb_entry(super_blocks.prev);
for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
if (sb->s_syncing)
continue;
sb->s_syncing = 1;
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
if (!sb->s_root) {
drop_super(sb);
goto restart;
}
return sb;
}
spin_unlock(&sb_lock);
return NULL;
}
/** /**
* sync_inodes - writes all inodes to disk * sync_inodes - writes all inodes to disk
* @wait: wait for completion * @wait: wait for completion
...@@ -530,23 +504,39 @@ restart: ...@@ -530,23 +504,39 @@ restart:
* outstanding dirty inodes, the writeback goes block-at-a-time within the * outstanding dirty inodes, the writeback goes block-at-a-time within the
* filesystem's write_inode(). This is extremely slow. * filesystem's write_inode(). This is extremely slow.
*/ */
void sync_inodes(int wait) static void __sync_inodes(int wait)
{ {
struct super_block *sb; struct super_block *sb;
set_sb_syncing(0); spin_lock(&sb_lock);
while ((sb = get_super_to_sync()) != NULL) { restart:
sync_inodes_sb(sb, 0); list_for_each_entry(sb, &super_blocks, s_list) {
if (sb->s_syncing)
continue;
sb->s_syncing = 1;
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
if (sb->s_root) {
sync_inodes_sb(sb, wait);
sync_blockdev(sb->s_bdev); sync_blockdev(sb->s_bdev);
drop_super(sb);
} }
up_read(&sb->s_umount);
spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
goto restart;
}
spin_unlock(&sb_lock);
}
void sync_inodes(int wait)
{
set_sb_syncing(0);
__sync_inodes(0);
if (wait) { if (wait) {
set_sb_syncing(0); set_sb_syncing(0);
while ((sb = get_super_to_sync()) != NULL) { __sync_inodes(1);
sync_inodes_sb(sb, 1);
sync_blockdev(sb->s_bdev);
drop_super(sb);
}
} }
} }
......
...@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t ...@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
return error; return error;
} }
static struct super_block *get_super_to_sync(int type)
{
struct list_head *head;
int cnt, dirty;
restart:
spin_lock(&sb_lock);
list_for_each(head, &super_blocks) {
struct super_block *sb = list_entry(head, struct super_block, s_list);
/* This test just improves performance so it needn't be reliable... */
for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
&& info_any_dirty(&sb_dqopt(sb)->info[cnt]))
dirty = 1;
if (!dirty)
continue;
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
if (!sb->s_root) {
drop_super(sb);
goto restart;
}
return sb;
}
spin_unlock(&sb_lock);
return NULL;
}
static void quota_sync_sb(struct super_block *sb, int type) static void quota_sync_sb(struct super_block *sb, int type)
{ {
int cnt; int cnt;
...@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type) ...@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type)
void sync_dquots(struct super_block *sb, int type) void sync_dquots(struct super_block *sb, int type)
{ {
int cnt, dirty;
if (sb) { if (sb) {
if (sb->s_qcop->quota_sync) if (sb->s_qcop->quota_sync)
quota_sync_sb(sb, type); quota_sync_sb(sb, type);
return;
} }
else {
while ((sb = get_super_to_sync(type)) != NULL) { spin_lock(&sb_lock);
if (sb->s_qcop->quota_sync) restart:
list_for_each_entry(sb, &super_blocks, s_list) {
/* This test just improves performance so it needn't be reliable... */
for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
&& info_any_dirty(&sb_dqopt(sb)->info[cnt]))
dirty = 1;
if (!dirty)
continue;
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
if (sb->s_root && sb->s_qcop->quota_sync)
quota_sync_sb(sb, type); quota_sync_sb(sb, type);
drop_super(sb); up_read(&sb->s_umount);
} spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
goto restart;
} }
spin_unlock(&sb_lock);
} }
/* Copy parameters and call proper function */ /* Copy parameters and call proper function */
......
...@@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb) ...@@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb)
*/ */
void sync_supers(void) void sync_supers(void)
{ {
struct super_block * sb; struct super_block *sb;
restart:
spin_lock(&sb_lock); spin_lock(&sb_lock);
sb = sb_entry(super_blocks.next); restart:
while (sb != sb_entry(&super_blocks)) list_for_each_entry(sb, &super_blocks, s_list) {
if (sb->s_dirt) { if (sb->s_dirt) {
sb->s_count++; sb->s_count++;
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
down_read(&sb->s_umount); down_read(&sb->s_umount);
write_super(sb); write_super(sb);
drop_super(sb); up_read(&sb->s_umount);
spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
goto restart; goto restart;
} else }
sb = sb_entry(sb->s_list.next); }
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
} }
...@@ -381,20 +383,16 @@ void sync_filesystems(int wait) ...@@ -381,20 +383,16 @@ void sync_filesystems(int wait)
down(&mutex); /* Could be down_interruptible */ down(&mutex); /* Could be down_interruptible */
spin_lock(&sb_lock); spin_lock(&sb_lock);
for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); list_for_each_entry(sb, &super_blocks, s_list) {
sb = sb_entry(sb->s_list.next)) {
if (!sb->s_op->sync_fs) if (!sb->s_op->sync_fs)
continue; continue;
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY)
continue; continue;
sb->s_need_sync_fs = 1; sb->s_need_sync_fs = 1;
} }
spin_unlock(&sb_lock);
restart: restart:
spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) {
for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
sb = sb_entry(sb->s_list.next)) {
if (!sb->s_need_sync_fs) if (!sb->s_need_sync_fs)
continue; continue;
sb->s_need_sync_fs = 0; sb->s_need_sync_fs = 0;
...@@ -405,7 +403,10 @@ restart: ...@@ -405,7 +403,10 @@ restart:
down_read(&sb->s_umount); down_read(&sb->s_umount);
if (sb->s_root && (wait || sb->s_dirt)) if (sb->s_root && (wait || sb->s_dirt))
sb->s_op->sync_fs(sb, wait); sb->s_op->sync_fs(sb, wait);
drop_super(sb); up_read(&sb->s_umount);
/* restart only when sb is no longer on the list */
spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
goto restart; goto restart;
} }
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
...@@ -422,20 +423,24 @@ restart: ...@@ -422,20 +423,24 @@ restart:
struct super_block * get_super(struct block_device *bdev) struct super_block * get_super(struct block_device *bdev)
{ {
struct list_head *p; struct super_block *sb;
if (!bdev) if (!bdev)
return NULL; return NULL;
rescan:
spin_lock(&sb_lock); spin_lock(&sb_lock);
list_for_each(p, &super_blocks) { rescan:
struct super_block *s = sb_entry(p); list_for_each_entry(sb, &super_blocks, s_list) {
if (s->s_bdev == bdev) { if (sb->s_bdev == bdev) {
s->s_count++; sb->s_count++;
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
down_read(&s->s_umount); down_read(&sb->s_umount);
if (s->s_root) if (sb->s_root)
return s; return sb;
drop_super(s); up_read(&sb->s_umount);
/* restart only when sb is no longer on the list */
spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
goto rescan; goto rescan;
} }
} }
...@@ -447,19 +452,21 @@ EXPORT_SYMBOL(get_super); ...@@ -447,19 +452,21 @@ EXPORT_SYMBOL(get_super);
struct super_block * user_get_super(dev_t dev) struct super_block * user_get_super(dev_t dev)
{ {
struct list_head *p; struct super_block *sb;
rescan:
spin_lock(&sb_lock); spin_lock(&sb_lock);
list_for_each(p, &super_blocks) { rescan:
struct super_block *s = sb_entry(p); list_for_each_entry(sb, &super_blocks, s_list) {
if (s->s_dev == dev) { if (sb->s_dev == dev) {
s->s_count++; sb->s_count++;
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
down_read(&s->s_umount); down_read(&sb->s_umount);
if (s->s_root) if (sb->s_root)
return s; return sb;
drop_super(s); up_read(&sb->s_umount);
/* restart only when sb is no longer on the list */
spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
goto rescan; goto rescan;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment