Commit 6af118ce authored by Chris Mason's avatar Chris Mason

Btrfs: Index extent buffers in an rbtree

Before, extent buffers were a temporary object, meant to map a number of pages
at once and collect operations on them.

But, a few extra fields have crept in, and they are also the best place to
store a per-tree block lock field as well.  This commit puts the extent
buffers into an rbtree, and ensures a single extent buffer for each
tree block.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 4a096752
......@@ -381,7 +381,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
end = eb->start + end - 1;
release_extent_buffer_tail_pages(eb);
err:
free_extent_buffer(eb);
out:
......@@ -563,21 +562,21 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
struct extent_map_tree *map;
int ret;
if (page_count(page) > 3) {
/* once for page->private, once for the caller, once
* once for the page cache
*/
return 0;
}
tree = &BTRFS_I(page->mapping->host)->io_tree;
map = &BTRFS_I(page->mapping->host)->extent_tree;
ret = try_release_extent_state(map, tree, page, gfp_flags);
if (!ret) {
return 0;
}
ret = try_release_extent_buffer(tree, page);
if (ret == 1) {
invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
}
return ret;
}
......@@ -588,7 +587,8 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
extent_invalidatepage(tree, page, offset);
btree_releasepage(page, GFP_NOFS);
if (PagePrivate(page)) {
invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
printk("warning page private not zero on page %Lu\n",
page_offset(page));
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
......@@ -1456,7 +1456,6 @@ fail_tree_root:
free_extent_buffer(tree_root->node);
fail_sys_array:
fail_sb_buffer:
extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
btrfs_stop_workers(&fs_info->fixup_workers);
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
......@@ -1705,13 +1704,6 @@ int close_ctree(struct btrfs_root *root)
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
extent_io_tree_empty_lru(&fs_info->free_space_cache);
extent_io_tree_empty_lru(&fs_info->block_group_cache);
extent_io_tree_empty_lru(&fs_info->pinned_extents);
extent_io_tree_empty_lru(&fs_info->pending_del);
extent_io_tree_empty_lru(&fs_info->extent_ins);
extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
btrfs_stop_workers(&fs_info->fixup_workers);
......
......@@ -91,29 +91,16 @@ void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping, gfp_t mask)
{
tree->state.rb_node = NULL;
tree->buffer.rb_node = NULL;
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
spin_lock_init(&tree->lru_lock);
spin_lock_init(&tree->buffer_lock);
tree->mapping = mapping;
INIT_LIST_HEAD(&tree->buffer_lru);
tree->lru_size = 0;
tree->last = NULL;
}
EXPORT_SYMBOL(extent_io_tree_init);
void extent_io_tree_empty_lru(struct extent_io_tree *tree)
{
struct extent_buffer *eb;
while(!list_empty(&tree->buffer_lru)) {
eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
lru);
list_del_init(&eb->lru);
free_extent_buffer(eb);
}
}
EXPORT_SYMBOL(extent_io_tree_empty_lru);
struct extent_state *alloc_extent_state(gfp_t mask)
{
struct extent_state *state;
......@@ -245,6 +232,50 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
return ret;
}
static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
u64 offset, struct rb_node *node)
{
struct rb_root *root = &tree->buffer;
struct rb_node ** p = &root->rb_node;
struct rb_node * parent = NULL;
struct extent_buffer *eb;
while(*p) {
parent = *p;
eb = rb_entry(parent, struct extent_buffer, rb_node);
if (offset < eb->start)
p = &(*p)->rb_left;
else if (offset > eb->start)
p = &(*p)->rb_right;
else
return eb;
}
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
}
static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
u64 offset)
{
struct rb_root *root = &tree->buffer;
struct rb_node * n = root->rb_node;
struct extent_buffer *eb;
while(n) {
eb = rb_entry(n, struct extent_buffer, rb_node);
if (offset < eb->start)
n = n->rb_left;
else if (offset > eb->start)
n = n->rb_right;
else
return eb;
}
return NULL;
}
/*
* utility function to look for merge candidates inside a given range.
* Any extents with matching state are merged together into a single
......@@ -1817,9 +1848,8 @@ void set_page_extent_mapped(struct page *page)
{
if (!PagePrivate(page)) {
SetPagePrivate(page);
WARN_ON(!page->mapping->a_ops->invalidatepage);
set_page_private(page, EXTENT_PAGE_PRIVATE);
page_cache_get(page);
set_page_private(page, EXTENT_PAGE_PRIVATE);
}
}
......@@ -2627,51 +2657,6 @@ out:
return sector;
}
static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb)
{
if (list_empty(&eb->lru)) {
extent_buffer_get(eb);
list_add(&eb->lru, &tree->buffer_lru);
tree->lru_size++;
if (tree->lru_size >= BUFFER_LRU_MAX) {
struct extent_buffer *rm;
rm = list_entry(tree->buffer_lru.prev,
struct extent_buffer, lru);
tree->lru_size--;
list_del_init(&rm->lru);
free_extent_buffer(rm);
}
} else
list_move(&eb->lru, &tree->buffer_lru);
return 0;
}
static struct extent_buffer *find_lru(struct extent_io_tree *tree,
u64 start, unsigned long len)
{
struct list_head *lru = &tree->buffer_lru;
struct list_head *cur = lru->next;
struct extent_buffer *eb;
if (list_empty(lru))
return NULL;
do {
eb = list_entry(cur, struct extent_buffer, lru);
if (eb->start == start && eb->len == len) {
extent_buffer_get(eb);
return eb;
}
cur = cur->next;
} while (cur != lru);
return NULL;
}
static inline unsigned long num_extent_pages(u64 start, u64 len)
{
return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
(start >> PAGE_CACHE_SHIFT);
}
static inline struct page *extent_buffer_page(struct extent_buffer *eb,
unsigned long i)
{
......@@ -2688,44 +2673,10 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb,
return p;
}
int release_extent_buffer_tail_pages(struct extent_buffer *eb)
{
unsigned long num_pages = num_extent_pages(eb->start, eb->len);
struct page *page;
unsigned long i;
if (num_pages == 1)
return 0;
for (i = 1; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
page_cache_release(page);
}
return 0;
}
int invalidate_extent_lru(struct extent_io_tree *tree, u64 start,
unsigned long len)
static inline unsigned long num_extent_pages(u64 start, u64 len)
{
struct list_head *lru = &tree->buffer_lru;
struct list_head *cur = lru->next;
struct extent_buffer *eb;
int found = 0;
spin_lock(&tree->lru_lock);
if (list_empty(lru))
goto out;
do {
eb = list_entry(cur, struct extent_buffer, lru);
if (eb->start <= start && eb->start + eb->len > start) {
eb->flags &= ~EXTENT_UPTODATE;
}
cur = cur->next;
} while (cur != lru);
out:
spin_unlock(&tree->lru_lock);
return found;
return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
(start >> PAGE_CACHE_SHIFT);
}
static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
......@@ -2736,15 +2687,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
struct extent_buffer *eb = NULL;
unsigned long flags;
spin_lock(&tree->lru_lock);
eb = find_lru(tree, start, len);
spin_unlock(&tree->lru_lock);
if (eb) {
return eb;
}
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
INIT_LIST_HEAD(&eb->lru);
eb->start = start;
eb->len = len;
spin_lock_irqsave(&leak_lock, flags);
......@@ -2773,17 +2716,24 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
unsigned long i;
unsigned long index = start >> PAGE_CACHE_SHIFT;
struct extent_buffer *eb;
struct extent_buffer *exists = NULL;
struct page *p;
struct address_space *mapping = tree->mapping;
int uptodate = 1;
spin_lock(&tree->buffer_lock);
eb = buffer_search(tree, start);
if (eb) {
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
return eb;
}
spin_unlock(&tree->buffer_lock);
eb = __alloc_extent_buffer(tree, start, len, mask);
if (!eb)
return NULL;
if (eb->flags & EXTENT_BUFFER_FILLED)
goto lru_add;
if (page0) {
eb->first_page = page0;
i = 1;
......@@ -2800,7 +2750,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
if (!p) {
WARN_ON(1);
goto fail;
goto free_eb;
}
set_page_extent_mapped(p);
mark_page_accessed(p);
......@@ -2818,25 +2768,28 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
eb->flags |= EXTENT_UPTODATE;
eb->flags |= EXTENT_BUFFER_FILLED;
lru_add:
spin_lock(&tree->lru_lock);
add_lru(tree, eb);
spin_unlock(&tree->lru_lock);
spin_lock(&tree->buffer_lock);
exists = buffer_tree_insert(tree, start, &eb->rb_node);
if (exists) {
/* add one reference for the caller */
atomic_inc(&exists->refs);
spin_unlock(&tree->buffer_lock);
goto free_eb;
}
spin_unlock(&tree->buffer_lock);
/* add one reference for the tree */
atomic_inc(&eb->refs);
return eb;
fail:
spin_lock(&tree->lru_lock);
list_del_init(&eb->lru);
spin_unlock(&tree->lru_lock);
free_eb:
if (!atomic_dec_and_test(&eb->refs))
return NULL;
for (index = 1; index < i; index++) {
return exists;
for (index = 1; index < i; index++)
page_cache_release(extent_buffer_page(eb, index));
}
if (i > 0)
page_cache_release(extent_buffer_page(eb, 0));
page_cache_release(extent_buffer_page(eb, 0));
__free_extent_buffer(eb);
return NULL;
return exists;
}
EXPORT_SYMBOL(alloc_extent_buffer);
......@@ -2844,89 +2797,27 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len,
gfp_t mask)
{
unsigned long num_pages = num_extent_pages(start, len);
unsigned long i;
unsigned long index = start >> PAGE_CACHE_SHIFT;
struct extent_buffer *eb;
struct page *p;
struct address_space *mapping = tree->mapping;
int uptodate = 1;
eb = __alloc_extent_buffer(tree, start, len, mask);
if (!eb)
return NULL;
if (eb->flags & EXTENT_BUFFER_FILLED)
goto lru_add;
for (i = 0; i < num_pages; i++, index++) {
p = find_get_page(mapping, index);
if (!p) {
goto fail;
}
if (TestSetPageLocked(p)) {
page_cache_release(p);
goto fail;
}
set_page_extent_mapped(p);
mark_page_accessed(p);
if (i == 0) {
eb->first_page = p;
set_page_extent_head(p, len);
} else {
set_page_private(p, EXTENT_PAGE_PRIVATE);
}
if (!PageUptodate(p))
uptodate = 0;
unlock_page(p);
}
if (uptodate)
eb->flags |= EXTENT_UPTODATE;
eb->flags |= EXTENT_BUFFER_FILLED;
spin_lock(&tree->buffer_lock);
eb = buffer_search(tree, start);
if (eb)
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
lru_add:
spin_lock(&tree->lru_lock);
add_lru(tree, eb);
spin_unlock(&tree->lru_lock);
return eb;
fail:
spin_lock(&tree->lru_lock);
list_del_init(&eb->lru);
spin_unlock(&tree->lru_lock);
if (!atomic_dec_and_test(&eb->refs))
return NULL;
for (index = 1; index < i; index++) {
page_cache_release(extent_buffer_page(eb, index));
}
if (i > 0)
page_cache_release(extent_buffer_page(eb, 0));
__free_extent_buffer(eb);
return NULL;
}
EXPORT_SYMBOL(find_extent_buffer);
void free_extent_buffer(struct extent_buffer *eb)
{
unsigned long i;
unsigned long num_pages;
if (!eb)
return;
if (!atomic_dec_and_test(&eb->refs))
return;
WARN_ON(!list_empty(&eb->lru));
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 1; i < num_pages; i++) {
page_cache_release(extent_buffer_page(eb, i));
}
page_cache_release(extent_buffer_page(eb, 0));
__free_extent_buffer(eb);
WARN_ON(1);
}
EXPORT_SYMBOL(free_extent_buffer);
......@@ -3583,3 +3474,35 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
}
EXPORT_SYMBOL(memmove_extent_buffer);
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
{
u64 start = page_offset(page);
struct extent_buffer *eb;
int ret = 1;
unsigned long i;
unsigned long num_pages;
spin_lock(&tree->buffer_lock);
eb = buffer_search(tree, start);
if (!eb)
goto out;
if (atomic_read(&eb->refs) > 1) {
ret = 0;
goto out;
}
/* at this point we can safely release the extent buffer */
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
struct page *page = extent_buffer_page(eb, i);
page_cache_release(page);
}
rb_erase(&eb->rb_node, &tree->buffer);
__free_extent_buffer(eb);
out:
spin_unlock(&tree->buffer_lock);
return ret;
}
EXPORT_SYMBOL(try_release_extent_buffer);
......@@ -54,13 +54,12 @@ struct extent_io_ops {
struct extent_io_tree {
struct rb_root state;
struct rb_root buffer;
struct address_space *mapping;
u64 dirty_bytes;
spinlock_t lock;
spinlock_t buffer_lock;
struct extent_io_ops *ops;
spinlock_t lru_lock;
struct list_head buffer_lru;
int lru_size;
struct extent_state *last;
};
......@@ -87,10 +86,10 @@ struct extent_buffer {
unsigned long map_start;
unsigned long map_len;
struct page *first_page;
struct list_head lru;
atomic_t refs;
int flags;
struct list_head leak_list;
struct rb_node rb_node;
};
struct extent_map_tree;
......@@ -112,10 +111,10 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode,
void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping, gfp_t mask);
void extent_io_tree_empty_lru(struct extent_io_tree *tree);
int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page);
int try_release_extent_state(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
......@@ -241,8 +240,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long *map_start,
unsigned long *map_len, int km);
void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
int invalidate_extent_lru(struct extent_io_tree *tree, u64 start,
unsigned long len);
int release_extent_buffer_tail_pages(struct extent_buffer *eb);
int extent_range_uptodate(struct extent_io_tree *tree,
u64 start, u64 end);
......
......@@ -2670,7 +2670,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
map = &BTRFS_I(page->mapping->host)->extent_tree;
ret = try_release_extent_mapping(map, tree, page, gfp_flags);
if (ret == 1) {
invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
......@@ -2721,8 +2720,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
ClearPageChecked(page);
if (PagePrivate(page)) {
invalidate_extent_lru(tree, page_offset(page),
PAGE_CACHE_SIZE);
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment