Commit 7f3c74fb authored by Chris Mason's avatar Chris Mason

Btrfs: Keep extent mappings in ram until pending ordered extents are done

It was possible for stale mappings from disk to be used instead of the
new pending ordered extent.  This adds a flag to the extent map struct
to keep it pinned until the pending ordered extent is actually on disk.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 211f90e6
...@@ -2000,7 +2000,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2000,7 +2000,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
struct block_device *bdev; struct block_device *bdev;
int ret; int ret;
int nr = 0; int nr = 0;
size_t page_offset = 0; size_t pg_offset = 0;
size_t blocksize; size_t blocksize;
loff_t i_size = i_size_read(inode); loff_t i_size = i_size_read(inode);
unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
...@@ -2008,9 +2008,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2008,9 +2008,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 delalloc_end; u64 delalloc_end;
WARN_ON(!PageLocked(page)); WARN_ON(!PageLocked(page));
page_offset = i_size & (PAGE_CACHE_SIZE - 1); pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index || if (page->index > end_index ||
(page->index == end_index && !page_offset)) { (page->index == end_index && !pg_offset)) {
page->mapping->a_ops->invalidatepage(page, 0); page->mapping->a_ops->invalidatepage(page, 0);
unlock_page(page); unlock_page(page);
return 0; return 0;
...@@ -2020,12 +2020,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2020,12 +2020,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
char *userpage; char *userpage;
userpage = kmap_atomic(page, KM_USER0); userpage = kmap_atomic(page, KM_USER0);
memset(userpage + page_offset, 0, memset(userpage + pg_offset, 0,
PAGE_CACHE_SIZE - page_offset); PAGE_CACHE_SIZE - pg_offset);
kunmap_atomic(userpage, KM_USER0); kunmap_atomic(userpage, KM_USER0);
flush_dcache_page(page); flush_dcache_page(page);
} }
page_offset = 0; pg_offset = 0;
set_page_extent_mapped(page); set_page_extent_mapped(page);
...@@ -2088,7 +2088,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2088,7 +2088,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
unlock_start = page_end + 1; unlock_start = page_end + 1;
break; break;
} }
em = epd->get_extent(inode, page, page_offset, cur, em = epd->get_extent(inode, page, pg_offset, cur,
end - cur + 1, 1); end - cur + 1, 1);
if (IS_ERR(em) || !em) { if (IS_ERR(em) || !em) {
SetPageError(page); SetPageError(page);
...@@ -2113,12 +2113,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2113,12 +2113,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
unlock_extent(tree, unlock_start, cur + iosize -1, unlock_extent(tree, unlock_start, cur + iosize -1,
GFP_NOFS); GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook) if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur, tree->ops->writepage_end_io_hook(page, cur,
cur + iosize - 1, cur + iosize - 1,
NULL, 1); NULL, 1);
cur = cur + iosize; cur = cur + iosize;
page_offset += iosize; pg_offset += iosize;
unlock_start = cur; unlock_start = cur;
continue; continue;
} }
...@@ -2127,7 +2128,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2127,7 +2128,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (0 && !test_range_bit(tree, cur, cur + iosize - 1, if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
EXTENT_DIRTY, 0)) { EXTENT_DIRTY, 0)) {
cur = cur + iosize; cur = cur + iosize;
page_offset += iosize; pg_offset += iosize;
continue; continue;
} }
clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
...@@ -2141,6 +2142,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2141,6 +2142,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
SetPageError(page); SetPageError(page);
} else { } else {
unsigned long max_nr = end_index + 1; unsigned long max_nr = end_index + 1;
set_range_writeback(tree, cur, cur + iosize - 1); set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) { if (!PageWriteback(page)) {
printk("warning page %lu not writeback, " printk("warning page %lu not writeback, "
...@@ -2150,14 +2152,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2150,14 +2152,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
} }
ret = submit_extent_page(WRITE, tree, page, sector, ret = submit_extent_page(WRITE, tree, page, sector,
iosize, page_offset, bdev, iosize, pg_offset, bdev,
&epd->bio, max_nr, &epd->bio, max_nr,
end_bio_extent_writepage, 0); end_bio_extent_writepage, 0);
if (ret) if (ret)
SetPageError(page); SetPageError(page);
} }
cur = cur + iosize; cur = cur + iosize;
page_offset += iosize; pg_offset += iosize;
nr++; nr++;
} }
done: done:
...@@ -2579,7 +2581,8 @@ int try_release_extent_mapping(struct extent_map_tree *map, ...@@ -2579,7 +2581,8 @@ int try_release_extent_mapping(struct extent_map_tree *map,
spin_unlock(&map->lock); spin_unlock(&map->lock);
break; break;
} }
if (em->start != start) { if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
em->start != start) {
spin_unlock(&map->lock); spin_unlock(&map->lock);
free_extent_map(em); free_extent_map(em);
break; break;
......
...@@ -173,6 +173,9 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) ...@@ -173,6 +173,9 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
static int mergable_maps(struct extent_map *prev, struct extent_map *next) static int mergable_maps(struct extent_map *prev, struct extent_map *next)
{ {
if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
return 0;
if (extent_map_end(prev) == next->start && if (extent_map_end(prev) == next->start &&
prev->flags == next->flags && prev->flags == next->flags &&
prev->bdev == next->bdev && prev->bdev == next->bdev &&
...@@ -320,6 +323,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) ...@@ -320,6 +323,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{ {
int ret = 0; int ret = 0;
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
BUG_ON(spin_trylock(&tree->lock)); BUG_ON(spin_trylock(&tree->lock));
rb_erase(&em->rb_node, &tree->map); rb_erase(&em->rb_node, &tree->map);
em->in_tree = 0; em->in_tree = 0;
......
...@@ -8,6 +8,9 @@ ...@@ -8,6 +8,9 @@
#define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_INLINE (u64)-2
#define EXTENT_MAP_DELALLOC (u64)-1 #define EXTENT_MAP_DELALLOC (u64)-1
/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
struct extent_map { struct extent_map {
struct rb_node rb_node; struct rb_node rb_node;
......
...@@ -192,7 +192,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, ...@@ -192,7 +192,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
(char *)&sector_sum->sum); (char *)&sector_sum->sum);
sector_sum->offset = page_offset(bvec->bv_page) + sector_sum->offset = page_offset(bvec->bv_page) +
bvec->bv_offset; bvec->bv_offset;
sector_sum++; sector_sum++;
bio_index++; bio_index++;
total_bytes += bvec->bv_len; total_bytes += bvec->bv_len;
...@@ -201,9 +200,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, ...@@ -201,9 +200,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
} }
btrfs_add_ordered_sum(inode, ordered, sums); btrfs_add_ordered_sum(inode, ordered, sums);
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
if (total_bytes != bio->bi_size) {
printk("warning, total bytes %lu bio size %u\n", total_bytes, bio->bi_size);
}
return 0; return 0;
} }
...@@ -372,6 +368,7 @@ next_sector: ...@@ -372,6 +368,7 @@ next_sector:
write_extent_buffer(leaf, &sector_sum->sum, write_extent_buffer(leaf, &sector_sum->sum,
(unsigned long)item, BTRFS_CRC32_SIZE); (unsigned long)item, BTRFS_CRC32_SIZE);
} }
total_bytes += root->sectorsize; total_bytes += root->sectorsize;
sector_sum++; sector_sum++;
if (total_bytes < sums->len) { if (total_bytes < sums->len) {
......
...@@ -358,9 +358,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) ...@@ -358,9 +358,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
struct extent_map *split = NULL; struct extent_map *split = NULL;
struct extent_map *split2 = NULL; struct extent_map *split2 = NULL;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *tmp;
u64 len = end - start + 1; u64 len = end - start + 1;
u64 next_start;
int ret; int ret;
int testend = 1; int testend = 1;
...@@ -381,8 +379,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) ...@@ -381,8 +379,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
spin_unlock(&em_tree->lock); spin_unlock(&em_tree->lock);
break; break;
} }
tmp = rb_entry(&em->rb_node, struct extent_map, rb_node); if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
next_start = tmp->start; start = em->start + em->len;
free_extent_map(em);
spin_unlock(&em_tree->lock);
if (start < end) {
len = end - start + 1;
continue;
}
break;
}
remove_extent_mapping(em_tree, em); remove_extent_mapping(em_tree, em);
if (em->block_start < EXTENT_MAP_LAST_BYTE && if (em->block_start < EXTENT_MAP_LAST_BYTE &&
......
...@@ -144,6 +144,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) ...@@ -144,6 +144,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
em->len = ins.offset; em->len = ins.offset;
em->block_start = ins.objectid; em->block_start = ins.objectid;
em->bdev = root->fs_info->fs_devices->latest_bdev; em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while(1) { while(1) {
spin_lock(&em_tree->lock); spin_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em); ret = add_extent_mapping(em_tree, em);
...@@ -483,6 +484,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ...@@ -483,6 +484,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_ordered_extent *ordered_extent; struct btrfs_ordered_extent *ordered_extent;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em;
u64 alloc_hint = 0; u64 alloc_hint = 0;
struct list_head list; struct list_head list;
struct btrfs_key ins; struct btrfs_key ins;
...@@ -524,6 +527,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ...@@ -524,6 +527,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->len, ordered_extent->len,
ordered_extent->len, 0); ordered_extent->len, 0);
BUG_ON(ret); BUG_ON(ret);
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, ordered_extent->file_offset,
ordered_extent->len);
if (em) {
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
free_extent_map(em);
}
spin_unlock(&em_tree->lock);
btrfs_drop_extent_cache(inode, ordered_extent->file_offset, btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->file_offset +
ordered_extent->len - 1); ordered_extent->len - 1);
...@@ -538,6 +552,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ...@@ -538,6 +552,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
btrfs_ordered_update_i_size(inode, ordered_extent); btrfs_ordered_update_i_size(inode, ordered_extent);
btrfs_remove_ordered_extent(inode, ordered_extent); btrfs_remove_ordered_extent(inode, ordered_extent);
/* once for us */ /* once for us */
btrfs_put_ordered_extent(ordered_extent); btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */ /* once for the tree */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment