Commit 03158cd7 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

fs: restore nobh

Implement nobh in new aops.  This is a bit tricky.  FWIW, nobh_truncate is
now implemented in a way that does not create blocks in sparse regions,
which is a silly thing for it to have been doing (isn't it?)

ext2 survives fsx and fsstress. jfs is converted as well... ext3
should be easy to do (but not done yet).

[akpm@linux-foundation.org: coding-style fixes]
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b6af1bcd
...@@ -2369,7 +2369,7 @@ out_unlock: ...@@ -2369,7 +2369,7 @@ out_unlock:
} }
/* /*
* nobh_prepare_write()'s prereads are special: the buffer_heads are freed * nobh_write_begin()'s prereads are special: the buffer_heads are freed
* immediately, while under the page lock. So it needs a special end_io * immediately, while under the page lock. So it needs a special end_io
* handler which does not touch the bh after unlocking it. * handler which does not touch the bh after unlocking it.
*/ */
...@@ -2378,17 +2378,46 @@ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate) ...@@ -2378,17 +2378,46 @@ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
__end_buffer_read_notouch(bh, uptodate); __end_buffer_read_notouch(bh, uptodate);
} }
/*
* Attach the singly-linked list of buffers created by nobh_write_begin, to
* the page (converting it to circular linked list and taking care of page
* dirty races).
*/
static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
{
struct buffer_head *bh;
BUG_ON(!PageLocked(page));
spin_lock(&page->mapping->private_lock);
bh = head;
do {
if (PageDirty(page))
set_buffer_dirty(bh);
if (!bh->b_this_page)
bh->b_this_page = head;
bh = bh->b_this_page;
} while (bh != head);
attach_page_buffers(page, head);
spin_unlock(&page->mapping->private_lock);
}
/* /*
* On entry, the page is fully not uptodate. * On entry, the page is fully not uptodate.
* On exit the page is fully uptodate in the areas outside (from,to) * On exit the page is fully uptodate in the areas outside (from,to)
*/ */
int nobh_prepare_write(struct page *page, unsigned from, unsigned to, int nobh_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block) get_block_t *get_block)
{ {
struct inode *inode = page->mapping->host; struct inode *inode = mapping->host;
const unsigned blkbits = inode->i_blkbits; const unsigned blkbits = inode->i_blkbits;
const unsigned blocksize = 1 << blkbits; const unsigned blocksize = 1 << blkbits;
struct buffer_head *head, *bh; struct buffer_head *head, *bh;
struct page *page;
pgoff_t index;
unsigned from, to;
unsigned block_in_page; unsigned block_in_page;
unsigned block_start, block_end; unsigned block_start, block_end;
sector_t block_in_file; sector_t block_in_file;
...@@ -2397,8 +2426,23 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, ...@@ -2397,8 +2426,23 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
int ret = 0; int ret = 0;
int is_mapped_to_disk = 1; int is_mapped_to_disk = 1;
if (page_has_buffers(page)) index = pos >> PAGE_CACHE_SHIFT;
return block_prepare_write(page, from, to, get_block); from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
page = __grab_cache_page(mapping, index);
if (!page)
return -ENOMEM;
*pagep = page;
*fsdata = NULL;
if (page_has_buffers(page)) {
unlock_page(page);
page_cache_release(page);
*pagep = NULL;
return block_write_begin(file, mapping, pos, len, flags, pagep,
fsdata, get_block);
}
if (PageMappedToDisk(page)) if (PageMappedToDisk(page))
return 0; return 0;
...@@ -2413,8 +2457,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, ...@@ -2413,8 +2457,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
* than the circular one we're used to. * than the circular one we're used to.
*/ */
head = alloc_page_buffers(page, blocksize, 0); head = alloc_page_buffers(page, blocksize, 0);
if (!head) if (!head) {
return -ENOMEM; ret = -ENOMEM;
goto out_release;
}
block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
...@@ -2483,15 +2529,12 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, ...@@ -2483,15 +2529,12 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
if (is_mapped_to_disk) if (is_mapped_to_disk)
SetPageMappedToDisk(page); SetPageMappedToDisk(page);
do { *fsdata = head; /* to be released by nobh_write_end */
bh = head;
head = head->b_this_page;
free_buffer_head(bh);
} while (head);
return 0; return 0;
failed: failed:
BUG_ON(!ret);
/* /*
* Error recovery is a bit difficult. We need to zero out blocks that * Error recovery is a bit difficult. We need to zero out blocks that
* were newly allocated, and dirty them to ensure they get written out. * were newly allocated, and dirty them to ensure they get written out.
...@@ -2499,64 +2542,57 @@ failed: ...@@ -2499,64 +2542,57 @@ failed:
* the handling of potential IO errors during writeout would be hard * the handling of potential IO errors during writeout would be hard
* (could try doing synchronous writeout, but what if that fails too?) * (could try doing synchronous writeout, but what if that fails too?)
*/ */
spin_lock(&page->mapping->private_lock); attach_nobh_buffers(page, head);
bh = head; page_zero_new_buffers(page, from, to);
block_start = 0;
do {
if (PageUptodate(page))
set_buffer_uptodate(bh);
if (PageDirty(page))
set_buffer_dirty(bh);
block_end = block_start+blocksize; out_release:
if (block_end <= from) unlock_page(page);
goto next; page_cache_release(page);
if (block_start >= to) *pagep = NULL;
goto next;
if (buffer_new(bh)) { if (pos + len > inode->i_size)
clear_buffer_new(bh); vmtruncate(inode, inode->i_size);
if (!buffer_uptodate(bh)) {
zero_user_page(page, block_start, bh->b_size, KM_USER0);
set_buffer_uptodate(bh);
}
mark_buffer_dirty(bh);
}
next:
block_start = block_end;
if (!bh->b_this_page)
bh->b_this_page = head;
bh = bh->b_this_page;
} while (bh != head);
attach_page_buffers(page, head);
spin_unlock(&page->mapping->private_lock);
return ret; return ret;
} }
EXPORT_SYMBOL(nobh_prepare_write); EXPORT_SYMBOL(nobh_write_begin);
/* int nobh_write_end(struct file *file, struct address_space *mapping,
* Make sure any changes to nobh_commit_write() are reflected in loff_t pos, unsigned len, unsigned copied,
* nobh_truncate_page(), since it doesn't call commit_write(). struct page *page, void *fsdata)
*/
int nobh_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{ {
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; struct buffer_head *head = NULL;
struct buffer_head *bh;
if (page_has_buffers(page)) if (!PageMappedToDisk(page)) {
return generic_commit_write(file, page, from, to); if (unlikely(copied < len) && !page_has_buffers(page))
attach_nobh_buffers(page, head);
if (page_has_buffers(page))
return generic_write_end(file, mapping, pos, len,
copied, page, fsdata);
}
SetPageUptodate(page); SetPageUptodate(page);
set_page_dirty(page); set_page_dirty(page);
if (pos > inode->i_size) { if (pos+copied > inode->i_size) {
i_size_write(inode, pos); i_size_write(inode, pos+copied);
mark_inode_dirty(inode); mark_inode_dirty(inode);
} }
return 0;
unlock_page(page);
page_cache_release(page);
head = fsdata;
while (head) {
bh = head;
head = head->b_this_page;
free_buffer_head(bh);
}
return copied;
} }
EXPORT_SYMBOL(nobh_commit_write); EXPORT_SYMBOL(nobh_write_end);
/* /*
* nobh_writepage() - based on block_full_write_page() except * nobh_writepage() - based on block_full_write_page() except
...@@ -2609,44 +2645,79 @@ out: ...@@ -2609,44 +2645,79 @@ out:
} }
EXPORT_SYMBOL(nobh_writepage); EXPORT_SYMBOL(nobh_writepage);
/* int nobh_truncate_page(struct address_space *mapping,
* This function assumes that ->prepare_write() uses nobh_prepare_write(). loff_t from, get_block_t *get_block)
*/
int nobh_truncate_page(struct address_space *mapping, loff_t from)
{ {
struct inode *inode = mapping->host;
unsigned blocksize = 1 << inode->i_blkbits;
pgoff_t index = from >> PAGE_CACHE_SHIFT; pgoff_t index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned offset = from & (PAGE_CACHE_SIZE-1);
unsigned to; unsigned blocksize;
sector_t iblock;
unsigned length, pos;
struct inode *inode = mapping->host;
struct page *page; struct page *page;
const struct address_space_operations *a_ops = mapping->a_ops; struct buffer_head map_bh;
int ret = 0; int err;
if ((offset & (blocksize - 1)) == 0) blocksize = 1 << inode->i_blkbits;
goto out; length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!length)
return 0;
length = blocksize - length;
iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
ret = -ENOMEM;
page = grab_cache_page(mapping, index); page = grab_cache_page(mapping, index);
err = -ENOMEM;
if (!page) if (!page)
goto out; goto out;
to = (offset + blocksize) & ~(blocksize - 1); if (page_has_buffers(page)) {
ret = a_ops->prepare_write(NULL, page, offset, to); has_buffers:
if (ret == 0) { unlock_page(page);
zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, page_cache_release(page);
KM_USER0); return block_truncate_page(mapping, from, get_block);
/*
* It would be more correct to call aops->commit_write()
* here, but this is more efficient.
*/
SetPageUptodate(page);
set_page_dirty(page);
} }
/* Find the buffer that contains "offset" */
pos = blocksize;
while (offset >= pos) {
iblock++;
pos += blocksize;
}
err = get_block(inode, iblock, &map_bh, 0);
if (err)
goto unlock;
/* unmapped? It's a hole - nothing to do */
if (!buffer_mapped(&map_bh))
goto unlock;
/* Ok, it's mapped. Make sure it's up-to-date */
if (!PageUptodate(page)) {
err = mapping->a_ops->readpage(NULL, page);
if (err) {
page_cache_release(page);
goto out;
}
lock_page(page);
if (!PageUptodate(page)) {
err = -EIO;
goto unlock;
}
if (page_has_buffers(page))
goto has_buffers;
}
zero_user_page(page, offset, length, KM_USER0);
set_page_dirty(page);
err = 0;
unlock:
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
out: out:
return ret; return err;
} }
EXPORT_SYMBOL(nobh_truncate_page); EXPORT_SYMBOL(nobh_truncate_page);
......
...@@ -659,6 +659,20 @@ ext2_write_begin(struct file *file, struct address_space *mapping, ...@@ -659,6 +659,20 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
} }
static int
ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
/*
* Dir-in-pagecache still uses ext2_write_begin. Would have to rework
* directory handling code to pass around offsets rather than struct
* pages in order to make this work easily.
*/
return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
ext2_get_block);
}
static int ext2_nobh_writepage(struct page *page, static int ext2_nobh_writepage(struct page *page,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
...@@ -710,7 +724,8 @@ const struct address_space_operations ext2_nobh_aops = { ...@@ -710,7 +724,8 @@ const struct address_space_operations ext2_nobh_aops = {
.readpages = ext2_readpages, .readpages = ext2_readpages,
.writepage = ext2_nobh_writepage, .writepage = ext2_nobh_writepage,
.sync_page = block_sync_page, .sync_page = block_sync_page,
/* XXX: todo */ .write_begin = ext2_nobh_write_begin,
.write_end = nobh_write_end,
.bmap = ext2_bmap, .bmap = ext2_bmap,
.direct_IO = ext2_direct_IO, .direct_IO = ext2_direct_IO,
.writepages = ext2_writepages, .writepages = ext2_writepages,
...@@ -927,7 +942,8 @@ void ext2_truncate (struct inode * inode) ...@@ -927,7 +942,8 @@ void ext2_truncate (struct inode * inode)
if (mapping_is_xip(inode->i_mapping)) if (mapping_is_xip(inode->i_mapping))
xip_truncate_page(inode->i_mapping, inode->i_size); xip_truncate_page(inode->i_mapping, inode->i_size);
else if (test_opt(inode->i_sb, NOBH)) else if (test_opt(inode->i_sb, NOBH))
nobh_truncate_page(inode->i_mapping, inode->i_size); nobh_truncate_page(inode->i_mapping,
inode->i_size, ext2_get_block);
else else
block_truncate_page(inode->i_mapping, block_truncate_page(inode->i_mapping,
inode->i_size, ext2_get_block); inode->i_size, ext2_get_block);
......
...@@ -279,8 +279,7 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping, ...@@ -279,8 +279,7 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata) struct page **pagep, void **fsdata)
{ {
*pagep = NULL; return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
jfs_get_block); jfs_get_block);
} }
...@@ -306,7 +305,7 @@ const struct address_space_operations jfs_aops = { ...@@ -306,7 +305,7 @@ const struct address_space_operations jfs_aops = {
.writepages = jfs_writepages, .writepages = jfs_writepages,
.sync_page = block_sync_page, .sync_page = block_sync_page,
.write_begin = jfs_write_begin, .write_begin = jfs_write_begin,
.write_end = generic_write_end, .write_end = nobh_write_end,
.bmap = jfs_bmap, .bmap = jfs_bmap,
.direct_IO = jfs_direct_IO, .direct_IO = jfs_direct_IO,
}; };
...@@ -359,7 +358,7 @@ void jfs_truncate(struct inode *ip) ...@@ -359,7 +358,7 @@ void jfs_truncate(struct inode *ip)
{ {
jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
IWRITE_LOCK(ip, RDWRLOCK_NORMAL); IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
jfs_truncate_nolock(ip, ip->i_size); jfs_truncate_nolock(ip, ip->i_size);
......
...@@ -226,9 +226,13 @@ sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); ...@@ -226,9 +226,13 @@ sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
int block_truncate_page(struct address_space *, loff_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *);
int file_fsync(struct file *, struct dentry *, int); int file_fsync(struct file *, struct dentry *, int);
int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*); int nobh_write_begin(struct file *, struct address_space *,
int nobh_commit_write(struct file *, struct page *, unsigned, unsigned); loff_t, unsigned, unsigned,
int nobh_truncate_page(struct address_space *, loff_t); struct page **, void **, get_block_t*);
int nobh_write_end(struct file *, struct address_space *,
loff_t, unsigned, unsigned,
struct page *, void *);
int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
int nobh_writepage(struct page *page, get_block_t *get_block, int nobh_writepage(struct page *page, get_block_t *get_block,
struct writeback_control *wbc); struct writeback_control *wbc);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment