Commit dcbeb0be authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: always pin metadata in discard mode
  Btrfs: enable discard support
  Btrfs: add -o discard option
  Btrfs: properly wait log writers during log sync
  Btrfs: fix possible ENOSPC problems with truncate
  Btrfs: fix btrfs acl #ifdef checks
  Btrfs: streamline tree-log btree block writeout
  Btrfs: avoid tree log commit when there are no changes
  Btrfs: only write one super copy during fsync
parents 2b650df2 444528b3
......@@ -27,7 +27,7 @@
#include "btrfs_inode.h"
#include "xattr.h"
#ifdef CONFIG_BTRFS_POSIX_ACL
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
{
......@@ -313,7 +313,7 @@ struct xattr_handler btrfs_xattr_acl_access_handler = {
.set = btrfs_xattr_acl_access_set,
};
#else /* CONFIG_BTRFS_POSIX_ACL */
#else /* CONFIG_BTRFS_FS_POSIX_ACL */
int btrfs_acl_chmod(struct inode *inode)
{
......@@ -325,4 +325,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
return 0;
}
#endif /* CONFIG_BTRFS_POSIX_ACL */
#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
......@@ -86,6 +86,12 @@ struct btrfs_inode {
* transid of the trans_handle that last modified this inode
*/
u64 last_trans;
/*
* log transid when this inode was last modified
*/
u64 last_sub_trans;
/*
* transid that last logged this inode
*/
......
......@@ -1009,6 +1009,7 @@ struct btrfs_root {
atomic_t log_writers;
atomic_t log_commit[2];
unsigned long log_transid;
unsigned long last_log_commit;
unsigned long log_batch;
pid_t log_start_pid;
bool log_multiple_pids;
......@@ -1152,6 +1153,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
#define BTRFS_MOUNT_NOSSD (1 << 9)
#define BTRFS_MOUNT_DISCARD (1 << 10)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
......@@ -2373,7 +2375,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options);
int btrfs_sync_fs(struct super_block *sb, int wait);
/* acl.c */
#ifdef CONFIG_BTRFS_POSIX_ACL
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
int btrfs_check_acl(struct inode *inode, int mask);
#else
#define btrfs_check_acl NULL
......
......@@ -917,6 +917,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_writers, 0);
root->log_batch = 0;
root->log_transid = 0;
root->last_log_commit = 0;
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping, GFP_NOFS);
......@@ -1087,6 +1088,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(root->log_root);
root->log_root = log_root;
root->log_transid = 0;
root->last_log_commit = 0;
return 0;
}
......
......@@ -1568,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
return ret;
}
#ifdef BIO_RW_DISCARD
static void btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
DISCARD_FL_BARRIER);
}
#endif
static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes)
{
#ifdef BIO_RW_DISCARD
int ret;
u64 map_length = num_bytes;
struct btrfs_multi_bio *multi = NULL;
if (!btrfs_test_opt(root, DISCARD))
return 0;
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
bytenr, &map_length, &multi, 0);
......@@ -1604,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
}
return ret;
#else
return 0;
#endif
}
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
......@@ -3690,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
if (is_data)
goto pinit;
/*
* discard is sloooow, and so triggering discards on
* individual btree blocks isn't a good plan. Just
* pin everything in discard mode.
*/
if (btrfs_test_opt(root, DISCARD))
goto pinit;
buf = btrfs_find_tree_block(root, bytenr, num_bytes);
if (!buf)
goto pinit;
......
......@@ -1086,8 +1086,10 @@ out_nolock:
btrfs_end_transaction(trans, root);
else
btrfs_commit_transaction(trans, root);
} else {
} else if (ret != BTRFS_NO_LOG_SYNC) {
btrfs_commit_transaction(trans, root);
} else {
btrfs_end_transaction(trans, root);
}
}
if (file->f_flags & O_DIRECT) {
......@@ -1137,6 +1139,13 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
int ret = 0;
struct btrfs_trans_handle *trans;
/* we wait first, since the writeback may change the inode */
root->log_batch++;
/* the VFS called filemap_fdatawrite for us */
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->log_batch++;
/*
* check the transaction that last modified this inode
* and see if its already been committed
......@@ -1144,6 +1153,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (!BTRFS_I(inode)->last_trans)
goto out;
/*
* if the last transaction that changed this file was before
* the current transaction, we can bail out now without any
* syncing
*/
mutex_lock(&root->fs_info->trans_mutex);
if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
......@@ -1153,13 +1167,6 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
}
mutex_unlock(&root->fs_info->trans_mutex);
root->log_batch++;
filemap_fdatawrite(inode->i_mapping);
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->log_batch++;
if (datasync && !(inode->i_state & I_DIRTY_PAGES))
goto out;
/*
* ok we haven't committed the transaction yet, lets do a commit
*/
......@@ -1188,6 +1195,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
*/
mutex_unlock(&dentry->d_inode->i_mutex);
if (ret != BTRFS_NO_LOG_SYNC) {
if (ret > 0) {
ret = btrfs_commit_transaction(trans, root);
} else {
......@@ -1197,6 +1205,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
else
ret = btrfs_commit_transaction(trans, root);
}
} else {
ret = btrfs_end_transaction(trans, root);
}
mutex_lock(&dentry->d_inode->i_mutex);
out:
return ret > 0 ? EIO : ret;
......
......@@ -3032,12 +3032,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
if ((offset & (blocksize - 1)) == 0)
goto out;
ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
if (ret)
goto out;
ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (ret)
goto out;
ret = -ENOMEM;
again:
page = grab_cache_page(mapping, index);
if (!page)
if (!page) {
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
goto out;
}
page_start = page_offset(page);
page_end = page_start + PAGE_CACHE_SIZE - 1;
......@@ -3070,6 +3080,10 @@ again:
goto again;
}
clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
GFP_NOFS);
ret = btrfs_set_extent_delalloc(inode, page_start, page_end);
if (ret) {
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
......@@ -3088,6 +3102,9 @@ again:
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
if (ret)
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
unlock_page(page);
page_cache_release(page);
out:
......@@ -3111,7 +3128,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
if (size <= hole_start)
return 0;
btrfs_truncate_page(inode->i_mapping, inode->i_size);
err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (err)
return err;
while (1) {
struct btrfs_ordered_extent *ordered;
......@@ -3480,6 +3499,7 @@ static noinline void init_btrfs_i(struct inode *inode)
bi->generation = 0;
bi->sequence = 0;
bi->last_trans = 0;
bi->last_sub_trans = 0;
bi->logged_trans = 0;
bi->delalloc_bytes = 0;
bi->reserved_bytes = 0;
......@@ -4980,7 +5000,9 @@ again:
set_page_dirty(page);
SetPageUptodate(page);
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
BTRFS_I(inode)->last_trans = root->fs_info->generation;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
......@@ -5005,7 +5027,9 @@ static void btrfs_truncate(struct inode *inode)
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return;
btrfs_truncate_page(inode->i_mapping, inode->i_size);
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (ret)
return;
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
trans = btrfs_start_transaction(root, 1);
......@@ -5100,6 +5124,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
if (!ei)
return NULL;
ei->last_trans = 0;
ei->last_sub_trans = 0;
ei->logged_trans = 0;
ei->outstanding_extents = 0;
ei->reserved_extents = 0;
......
......@@ -66,7 +66,8 @@ enum {
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
Opt_discard, Opt_err,
};
static match_table_t tokens = {
......@@ -88,6 +89,7 @@ static match_table_t tokens = {
{Opt_notreelog, "notreelog"},
{Opt_flushoncommit, "flushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
{Opt_discard, "discard"},
{Opt_err, NULL},
};
......@@ -257,6 +259,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
info->metadata_ratio);
}
break;
case Opt_discard:
btrfs_set_opt(info->mount_opt, DISCARD);
break;
default:
break;
}
......@@ -344,7 +349,7 @@ static int btrfs_fill_super(struct super_block *sb,
sb->s_export_op = &btrfs_export_ops;
sb->s_xattr = btrfs_xattr_handlers;
sb->s_time_gran = 1;
#ifdef CONFIG_BTRFS_POSIX_ACL
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
sb->s_flags |= MS_POSIXACL;
#endif
......
......@@ -344,9 +344,9 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
/*
* when btree blocks are allocated, they have some corresponding bits set for
* them in one of two extent_io trees. This is used to make sure all of
* those extents are on disk for transaction or log commit
* those extents are sent to disk but does not wait on them
*/
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
int btrfs_write_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages)
{
int ret;
......@@ -394,6 +394,29 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
page_cache_release(page);
}
}
if (err)
werr = err;
return werr;
}
/*
* when btree blocks are allocated, they have some corresponding bits set for
* them in one of two extent_io trees. This is used to make sure all of
* those extents are on disk for transaction or log commit. We wait
* on all the pages and clear them from the dirty pages state tree
*/
int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages)
{
int ret;
int err = 0;
int werr = 0;
struct page *page;
struct inode *btree_inode = root->fs_info->btree_inode;
u64 start = 0;
u64 end;
unsigned long index;
while (1) {
ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
EXTENT_DIRTY);
......@@ -424,6 +447,22 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
return werr;
}
/*
* when btree blocks are allocated, they have some corresponding bits set for
* them in one of two extent_io trees. This is used to make sure all of
* those extents are on disk for transaction or log commit
*/
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages)
{
int ret;
int ret2;
ret = btrfs_write_marked_extents(root, dirty_pages);
ret2 = btrfs_wait_marked_extents(root, dirty_pages);
return ret || ret2;
}
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
......
......@@ -79,6 +79,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
struct inode *inode)
{
BTRFS_I(inode)->last_trans = trans->transaction->transid;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
}
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
......@@ -107,5 +108,9 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages);
int btrfs_write_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages);
int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
#endif
......@@ -1980,6 +1980,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
int ret;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
u64 log_transid = 0;
mutex_lock(&root->log_mutex);
index1 = root->log_transid % 2;
......@@ -1994,12 +1995,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
wait_log_commit(trans, root, root->log_transid - 1);
while (root->log_multiple_pids) {
while (1) {
unsigned long batch = root->log_batch;
if (root->log_multiple_pids) {
mutex_unlock(&root->log_mutex);
schedule_timeout_uninterruptible(1);
mutex_lock(&root->log_mutex);
}
wait_for_writer(trans, root);
if (batch == root->log_batch)
break;
......@@ -2012,12 +2014,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out;
}
ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
/* we start IO on all the marked extents here, but we don't actually
* wait for them until later.
*/
ret = btrfs_write_marked_extents(log, &log->dirty_log_pages);
BUG_ON(ret);
btrfs_set_root_node(&log->root_item, log->node);
root->log_batch = 0;
log_transid = root->log_transid;
root->log_transid++;
log->log_transid = root->log_transid;
root->log_start_pid = 0;
......@@ -2046,6 +2052,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
index2 = log_root_tree->log_transid % 2;
if (atomic_read(&log_root_tree->log_commit[index2])) {
btrfs_wait_marked_extents(log, &log->dirty_log_pages);
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
mutex_unlock(&log_root_tree->log_mutex);
......@@ -2065,6 +2072,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* check the full commit flag again
*/
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
btrfs_wait_marked_extents(log, &log->dirty_log_pages);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
......@@ -2073,6 +2081,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_write_and_wait_marked_extents(log_root_tree,
&log_root_tree->dirty_log_pages);
BUG_ON(ret);
btrfs_wait_marked_extents(log, &log->dirty_log_pages);
btrfs_set_super_log_root(&root->fs_info->super_for_commit,
log_root_tree->node->start);
......@@ -2092,9 +2101,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
write_ctree_super(trans, root->fs_info->tree_root, 2);
write_ctree_super(trans, root->fs_info->tree_root, 1);
ret = 0;
mutex_lock(&root->log_mutex);
if (root->last_log_commit < log_transid)
root->last_log_commit = log_transid;
mutex_unlock(&root->log_mutex);
out_wake_log_root:
atomic_set(&log_root_tree->log_commit[index2], 0);
smp_mb();
......@@ -2862,6 +2876,21 @@ out:
return ret;
}
static int inode_in_log(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
mutex_lock(&root->log_mutex);
if (BTRFS_I(inode)->logged_trans == trans->transid &&
BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
ret = 1;
mutex_unlock(&root->log_mutex);
return ret;
}
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
......@@ -2901,6 +2930,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
if (inode_in_log(trans, inode)) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
}
start_log_trans(trans, root);
ret = btrfs_log_inode(trans, root, inode, inode_only);
......
......@@ -19,6 +19,9 @@
#ifndef __TREE_LOG_
#define __TREE_LOG_
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
......
......@@ -260,7 +260,7 @@ err:
* attributes are handled directly.
*/
struct xattr_handler *btrfs_xattr_handlers[] = {
#ifdef CONFIG_BTRFS_POSIX_ACL
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
&btrfs_xattr_acl_access_handler,
&btrfs_xattr_acl_default_handler,
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment