Commit a32ea1e1 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

Fix read/truncate race

do_generic_mapping_read currently samples the i_size at the start and doesn't
do so again unless it needs to call ->readpage to load a page.  After
->readpage it has to re-sample i_size as a truncate may have caused that page
to be filled with zeros, and the read() call should not see these.

However there are other activities that might cause ->readpage to be called on
a page between the time that do_generic_mapping_read samples i_size and when
it finds that it has an uptodate page.  These include at least read-ahead and
possibly another thread performing a read.

So do_generic_mapping_read must sample i_size *after* it has an uptodate page.
 Thus the current sampling at the start and after a read can be replaced with
a sampling before the copy-out.

The same change applied to __generic_file_splice_read.

Note that this fixes any race with truncate_complete_page, but does not fix a
possible race with truncate_partial_page.  If a partial truncate happens after
do_generic_mapping_read samples i_size and before the copy_out, the nuls that
truncate_partial_page place in the page could be copied out incorrectly.

I think the best fix for that is to *not* zero out parts of the page in
truncate_partial_page, but rather to zero out the tail of a page when
increasing i_size.
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Cc: Jens Axboe <jens.axboe@oracle.com>
Acked-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e21ea246
...@@ -867,13 +867,11 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -867,13 +867,11 @@ void do_generic_mapping_read(struct address_space *mapping,
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
unsigned long index; unsigned long index;
unsigned long end_index;
unsigned long offset; unsigned long offset;
unsigned long last_index; unsigned long last_index;
unsigned long next_index; unsigned long next_index;
unsigned long prev_index; unsigned long prev_index;
unsigned int prev_offset; unsigned int prev_offset;
loff_t isize;
struct page *cached_page; struct page *cached_page;
int error; int error;
struct file_ra_state ra = *_ra; struct file_ra_state ra = *_ra;
...@@ -886,27 +884,12 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -886,27 +884,12 @@ void do_generic_mapping_read(struct address_space *mapping,
last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK; offset = *ppos & ~PAGE_CACHE_MASK;
isize = i_size_read(inode);
if (!isize)
goto out;
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
for (;;) { for (;;) {
struct page *page; struct page *page;
unsigned long end_index;
loff_t isize;
unsigned long nr, ret; unsigned long nr, ret;
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
if (index >= end_index) {
if (index > end_index)
goto out;
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
if (nr <= offset) {
goto out;
}
}
nr = nr - offset;
cond_resched(); cond_resched();
if (index == next_index) if (index == next_index)
next_index = page_cache_readahead(mapping, &ra, filp, next_index = page_cache_readahead(mapping, &ra, filp,
...@@ -921,6 +904,32 @@ find_page: ...@@ -921,6 +904,32 @@ find_page:
if (!PageUptodate(page)) if (!PageUptodate(page))
goto page_not_up_to_date; goto page_not_up_to_date;
page_ok: page_ok:
/*
* i_size must be checked after we know the page is Uptodate.
*
* Checking i_size after the check allows us to calculate
* the correct value for "nr", which means the zero-filled
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
isize = i_size_read(inode);
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
if (unlikely(!isize || index > end_index)) {
page_cache_release(page);
goto out;
}
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
if (index == end_index) {
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
if (nr <= offset) {
page_cache_release(page);
goto out;
}
}
nr = nr - offset;
/* If users can be writing to this page using arbitrary /* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing * virtual addresses, take care about potential aliasing
...@@ -1007,31 +1016,6 @@ readpage: ...@@ -1007,31 +1016,6 @@ readpage:
unlock_page(page); unlock_page(page);
} }
/*
* i_size must be checked after we have done ->readpage.
*
* Checking i_size after the readpage allows us to calculate
* the correct value for "nr", which means the zero-filled
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
isize = i_size_read(inode);
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
if (unlikely(!isize || index > end_index)) {
page_cache_release(page);
goto out;
}
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
if (index == end_index) {
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
if (nr <= offset) {
page_cache_release(page);
goto out;
}
}
nr = nr - offset;
goto page_ok; goto page_ok;
readpage_error: readpage_error:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment