Commit f6098cf4 authored by Anton Altaparmakov's avatar Anton Altaparmakov

NTFS: Fix ntfs_{read,write}page() to cope with concurrent truncates better.

Signed-off-by: default avatarAnton Altaparmakov <aia21@cantab.net>
parent 4e64c886
...@@ -59,39 +59,49 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) ...@@ -59,39 +59,49 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
unsigned long flags; unsigned long flags;
struct buffer_head *first, *tmp; struct buffer_head *first, *tmp;
struct page *page; struct page *page;
struct inode *vi;
ntfs_inode *ni; ntfs_inode *ni;
int page_uptodate = 1; int page_uptodate = 1;
page = bh->b_page; page = bh->b_page;
ni = NTFS_I(page->mapping->host); vi = page->mapping->host;
ni = NTFS_I(vi);
if (likely(uptodate)) { if (likely(uptodate)) {
s64 file_ofs, initialized_size; loff_t i_size;
s64 file_ofs, init_size;
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
bh_offset(bh); bh_offset(bh);
read_lock_irqsave(&ni->size_lock, flags); read_lock_irqsave(&ni->size_lock, flags);
initialized_size = ni->initialized_size; init_size = ni->initialized_size;
i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags); read_unlock_irqrestore(&ni->size_lock, flags);
if (unlikely(init_size > i_size)) {
/* Race with shrinking truncate. */
init_size = i_size;
}
/* Check for the current buffer head overflowing. */ /* Check for the current buffer head overflowing. */
if (file_ofs + bh->b_size > initialized_size) { if (unlikely(file_ofs + bh->b_size > init_size)) {
char *addr; u8 *kaddr;
int ofs = 0; int ofs;
if (file_ofs < initialized_size) ofs = 0;
ofs = initialized_size - file_ofs; if (file_ofs < init_size)
addr = kmap_atomic(page, KM_BIO_SRC_IRQ); ofs = init_size - file_ofs;
memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
memset(kaddr + bh_offset(bh) + ofs, 0,
bh->b_size - ofs);
kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
flush_dcache_page(page); flush_dcache_page(page);
kunmap_atomic(addr, KM_BIO_SRC_IRQ);
} }
} else { } else {
clear_buffer_uptodate(bh); clear_buffer_uptodate(bh);
SetPageError(page); SetPageError(page);
ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
(unsigned long long)bh->b_blocknr); "0x%llx.", (unsigned long long)bh->b_blocknr);
} }
first = page_buffers(page); first = page_buffers(page);
local_irq_save(flags); local_irq_save(flags);
...@@ -124,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) ...@@ -124,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
if (likely(page_uptodate && !PageError(page))) if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page); SetPageUptodate(page);
} else { } else {
char *addr; u8 *kaddr;
unsigned int i, recs; unsigned int i, recs;
u32 rec_size; u32 rec_size;
...@@ -132,12 +142,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) ...@@ -132,12 +142,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
recs = PAGE_CACHE_SIZE / rec_size; recs = PAGE_CACHE_SIZE / rec_size;
/* Should have been verified before we got here... */ /* Should have been verified before we got here... */
BUG_ON(!recs); BUG_ON(!recs);
addr = kmap_atomic(page, KM_BIO_SRC_IRQ); kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
for (i = 0; i < recs; i++) for (i = 0; i < recs; i++)
post_read_mst_fixup((NTFS_RECORD*)(addr + post_read_mst_fixup((NTFS_RECORD*)(kaddr +
i * rec_size), rec_size); i * rec_size), rec_size);
kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
flush_dcache_page(page); flush_dcache_page(page);
kunmap_atomic(addr, KM_BIO_SRC_IRQ);
if (likely(page_uptodate && !PageError(page))) if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page); SetPageUptodate(page);
} }
...@@ -168,8 +178,11 @@ still_busy: ...@@ -168,8 +178,11 @@ still_busy:
*/ */
static int ntfs_read_block(struct page *page) static int ntfs_read_block(struct page *page)
{ {
loff_t i_size;
VCN vcn; VCN vcn;
LCN lcn; LCN lcn;
s64 init_size;
struct inode *vi;
ntfs_inode *ni; ntfs_inode *ni;
ntfs_volume *vol; ntfs_volume *vol;
runlist_element *rl; runlist_element *rl;
...@@ -180,7 +193,8 @@ static int ntfs_read_block(struct page *page) ...@@ -180,7 +193,8 @@ static int ntfs_read_block(struct page *page)
int i, nr; int i, nr;
unsigned char blocksize_bits; unsigned char blocksize_bits;
ni = NTFS_I(page->mapping->host); vi = page->mapping->host;
ni = NTFS_I(vi);
vol = ni->vol; vol = ni->vol;
/* $MFT/$DATA must have its complete runlist in memory at all times. */ /* $MFT/$DATA must have its complete runlist in memory at all times. */
...@@ -199,11 +213,28 @@ static int ntfs_read_block(struct page *page) ...@@ -199,11 +213,28 @@ static int ntfs_read_block(struct page *page)
bh = head = page_buffers(page); bh = head = page_buffers(page);
BUG_ON(!bh); BUG_ON(!bh);
/*
* We may be racing with truncate. To avoid some of the problems we
* now take a snapshot of the various sizes and use those for the whole
* of the function. In case of an extending truncate it just means we
* may leave some buffers unmapped which are now allocated. This is
* not a problem since these buffers will just get mapped when a write
* occurs. In case of a shrinking truncate, we will detect this later
* on due to the runlist being incomplete and if the page is being
* fully truncated, truncate will throw it away as soon as we unlock
* it so no need to worry what we do with it.
*/
iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
read_lock_irqsave(&ni->size_lock, flags); read_lock_irqsave(&ni->size_lock, flags);
lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; init_size = ni->initialized_size;
i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags); read_unlock_irqrestore(&ni->size_lock, flags);
if (unlikely(init_size > i_size)) {
/* Race with shrinking truncate. */
init_size = i_size;
}
zblock = (init_size + blocksize - 1) >> blocksize_bits;
/* Loop through all the buffers in the page. */ /* Loop through all the buffers in the page. */
rl = NULL; rl = NULL;
...@@ -366,6 +397,8 @@ handle_zblock: ...@@ -366,6 +397,8 @@ handle_zblock:
*/ */
static int ntfs_readpage(struct file *file, struct page *page) static int ntfs_readpage(struct file *file, struct page *page)
{ {
loff_t i_size;
struct inode *vi;
ntfs_inode *ni, *base_ni; ntfs_inode *ni, *base_ni;
u8 *kaddr; u8 *kaddr;
ntfs_attr_search_ctx *ctx; ntfs_attr_search_ctx *ctx;
...@@ -384,7 +417,8 @@ retry_readpage: ...@@ -384,7 +417,8 @@ retry_readpage:
unlock_page(page); unlock_page(page);
return 0; return 0;
} }
ni = NTFS_I(page->mapping->host); vi = page->mapping->host;
ni = NTFS_I(vi);
/* /*
* Only $DATA attributes can be encrypted and only unnamed $DATA * Only $DATA attributes can be encrypted and only unnamed $DATA
* attributes can be compressed. Index root can have the flags set but * attributes can be compressed. Index root can have the flags set but
...@@ -458,7 +492,12 @@ retry_readpage: ...@@ -458,7 +492,12 @@ retry_readpage:
read_lock_irqsave(&ni->size_lock, flags); read_lock_irqsave(&ni->size_lock, flags);
if (unlikely(attr_len > ni->initialized_size)) if (unlikely(attr_len > ni->initialized_size))
attr_len = ni->initialized_size; attr_len = ni->initialized_size;
i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags); read_unlock_irqrestore(&ni->size_lock, flags);
if (unlikely(attr_len > i_size)) {
/* Race with shrinking truncate. */
attr_len = i_size;
}
kaddr = kmap_atomic(page, KM_USER0); kaddr = kmap_atomic(page, KM_USER0);
/* Copy the data to the page. */ /* Copy the data to the page. */
memcpy(kaddr, (u8*)ctx->attr + memcpy(kaddr, (u8*)ctx->attr +
...@@ -1383,8 +1422,8 @@ retry_writepage: ...@@ -1383,8 +1422,8 @@ retry_writepage:
unsigned int ofs = i_size & ~PAGE_CACHE_MASK; unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
kaddr = kmap_atomic(page, KM_USER0); kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0); kunmap_atomic(kaddr, KM_USER0);
flush_dcache_page(page);
} }
/* Handle mst protected attributes. */ /* Handle mst protected attributes. */
if (NInoMstProtected(ni)) if (NInoMstProtected(ni))
...@@ -1447,34 +1486,33 @@ retry_writepage: ...@@ -1447,34 +1486,33 @@ retry_writepage:
BUG_ON(PageWriteback(page)); BUG_ON(PageWriteback(page));
set_page_writeback(page); set_page_writeback(page);
unlock_page(page); unlock_page(page);
/*
* Here, we do not need to zero the out of bounds area everytime
* because the below memcpy() already takes care of the
* mmap-at-end-of-file requirements. If the file is converted to a
* non-resident one, then the code path use is switched to the
* non-resident one where the zeroing happens on each ntfs_writepage()
* invocation.
*/
attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
i_size = i_size_read(vi); i_size = i_size_read(vi);
if (unlikely(attr_len > i_size)) { if (unlikely(attr_len > i_size)) {
/* Race with shrinking truncate or a failed truncate. */
attr_len = i_size; attr_len = i_size;
ctx->attr->data.resident.value_length = cpu_to_le32(attr_len); /*
* If the truncate failed, fix it up now. If a concurrent
* truncate, we do its job, so it does not have to do anything.
*/
err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
attr_len);
/* Shrinking cannot fail. */
BUG_ON(err);
} }
kaddr = kmap_atomic(page, KM_USER0); kaddr = kmap_atomic(page, KM_USER0);
/* Copy the data from the page to the mft record. */ /* Copy the data from the page to the mft record. */
memcpy((u8*)ctx->attr + memcpy((u8*)ctx->attr +
le16_to_cpu(ctx->attr->data.resident.value_offset), le16_to_cpu(ctx->attr->data.resident.value_offset),
kaddr, attr_len); kaddr, attr_len);
flush_dcache_mft_record_page(ctx->ntfs_ino);
/* Zero out of bounds area in the page cache page. */ /* Zero out of bounds area in the page cache page. */
memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0); kunmap_atomic(kaddr, KM_USER0);
flush_dcache_mft_record_page(ctx->ntfs_ino);
flush_dcache_page(page);
/* We are done with the page. */
end_page_writeback(page); end_page_writeback(page);
/* Finally, mark the mft record dirty, so it gets written back. */
/* Mark the mft record dirty, so it gets written back. */
mark_mft_record_dirty(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino);
ntfs_attr_put_search_ctx(ctx); ntfs_attr_put_search_ctx(ctx);
unmap_mft_record(base_ni); unmap_mft_record(base_ni);
......
...@@ -1166,6 +1166,8 @@ err_out: ...@@ -1166,6 +1166,8 @@ err_out:
* *
* Return 0 on success and -errno on error. In the error case, the inode will * Return 0 on success and -errno on error. In the error case, the inode will
* have had make_bad_inode() executed on it. * have had make_bad_inode() executed on it.
*
* Note this cannot be called for AT_INDEX_ALLOCATION.
*/ */
static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
{ {
...@@ -1242,8 +1244,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) ...@@ -1242,8 +1244,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
} }
} }
/* /*
* The encryption flag set in an index root just means to * The compressed/sparse flag set in an index root just means
* compress all files. * to compress all files.
*/ */
if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
ntfs_error(vi->i_sb, "Found mst protected attribute " ntfs_error(vi->i_sb, "Found mst protected attribute "
...@@ -1319,8 +1321,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) ...@@ -1319,8 +1321,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
"the mapping pairs array."); "the mapping pairs array.");
goto unm_err_out; goto unm_err_out;
} }
if ((NInoCompressed(ni) || NInoSparse(ni)) && if (NInoCompressed(ni) || NInoSparse(ni)) {
ni->type != AT_INDEX_ROOT) {
if (a->data.non_resident.compression_unit != 4) { if (a->data.non_resident.compression_unit != 4) {
ntfs_error(vi->i_sb, "Found nonstandard " ntfs_error(vi->i_sb, "Found nonstandard "
"compression unit (%u instead " "compression unit (%u instead "
......
/* /*
* malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
* *
* Copyright (c) 2001-2004 Anton Altaparmakov * Copyright (c) 2001-2005 Anton Altaparmakov
* *
* This program/include file is free software; you can redistribute it and/or * This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published * modify it under the terms of the GNU General Public License as published
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment