Commit 257c62e1 authored by Chris Mason's avatar Chris Mason

Btrfs: avoid tree log commit when there are no changes

rpm has a habit of running fdatasync when the file hasn't
changed.  We already detect if a file hasn't been changed
in the current transaction but it might have been sent to
the tree-log in this transaction and not changed since
the last call to fsync.

In this case, we want to avoid a tree log sync, which includes
a number of synchronous writes and barriers.  This commit
extends the existing tracking of the last transaction to change
a file to also track the last sub-transaction.

The end result is that rpm -ivh and -Uvh are roughly twice as fast,
and on par with ext3.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 4722607d
...@@ -86,6 +86,12 @@ struct btrfs_inode { ...@@ -86,6 +86,12 @@ struct btrfs_inode {
* transid of the trans_handle that last modified this inode * transid of the trans_handle that last modified this inode
*/ */
u64 last_trans; u64 last_trans;
/*
* log transid when this inode was last modified
*/
u64 last_sub_trans;
/* /*
* transid that last logged this inode * transid that last logged this inode
*/ */
......
...@@ -1009,6 +1009,7 @@ struct btrfs_root { ...@@ -1009,6 +1009,7 @@ struct btrfs_root {
atomic_t log_writers; atomic_t log_writers;
atomic_t log_commit[2]; atomic_t log_commit[2];
unsigned long log_transid; unsigned long log_transid;
unsigned long last_log_commit;
unsigned long log_batch; unsigned long log_batch;
pid_t log_start_pid; pid_t log_start_pid;
bool log_multiple_pids; bool log_multiple_pids;
......
...@@ -919,6 +919,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, ...@@ -919,6 +919,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_writers, 0); atomic_set(&root->log_writers, 0);
root->log_batch = 0; root->log_batch = 0;
root->log_transid = 0; root->log_transid = 0;
root->last_log_commit = 0;
extent_io_tree_init(&root->dirty_log_pages, extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->btree_inode->i_mapping, GFP_NOFS);
...@@ -1089,6 +1090,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, ...@@ -1089,6 +1090,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(root->log_root); WARN_ON(root->log_root);
root->log_root = log_root; root->log_root = log_root;
root->log_transid = 0; root->log_transid = 0;
root->last_log_commit = 0;
return 0; return 0;
} }
......
...@@ -1087,8 +1087,10 @@ out_nolock: ...@@ -1087,8 +1087,10 @@ out_nolock:
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
else else
btrfs_commit_transaction(trans, root); btrfs_commit_transaction(trans, root);
} else { } else if (ret != BTRFS_NO_LOG_SYNC) {
btrfs_commit_transaction(trans, root); btrfs_commit_transaction(trans, root);
} else {
btrfs_end_transaction(trans, root);
} }
} }
if (file->f_flags & O_DIRECT) { if (file->f_flags & O_DIRECT) {
...@@ -1138,6 +1140,13 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) ...@@ -1138,6 +1140,13 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
int ret = 0; int ret = 0;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
/* we wait first, since the writeback may change the inode */
root->log_batch++;
/* the VFS called filemap_fdatawrite for us */
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->log_batch++;
/* /*
* check the transaction that last modified this inode * check the transaction that last modified this inode
* and see if its already been committed * and see if its already been committed
...@@ -1145,6 +1154,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) ...@@ -1145,6 +1154,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (!BTRFS_I(inode)->last_trans) if (!BTRFS_I(inode)->last_trans)
goto out; goto out;
/*
* if the last transaction that changed this file was before
* the current transaction, we can bail out now without any
* syncing
*/
mutex_lock(&root->fs_info->trans_mutex); mutex_lock(&root->fs_info->trans_mutex);
if (BTRFS_I(inode)->last_trans <= if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) { root->fs_info->last_trans_committed) {
...@@ -1154,13 +1168,6 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) ...@@ -1154,13 +1168,6 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
} }
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
root->log_batch++;
filemap_fdatawrite(inode->i_mapping);
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->log_batch++;
if (datasync && !(inode->i_state & I_DIRTY_PAGES))
goto out;
/* /*
* ok we haven't committed the transaction yet, lets do a commit * ok we haven't committed the transaction yet, lets do a commit
*/ */
...@@ -1189,14 +1196,18 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) ...@@ -1189,14 +1196,18 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
*/ */
mutex_unlock(&dentry->d_inode->i_mutex); mutex_unlock(&dentry->d_inode->i_mutex);
if (ret > 0) { if (ret != BTRFS_NO_LOG_SYNC) {
ret = btrfs_commit_transaction(trans, root); if (ret > 0) {
} else {
ret = btrfs_sync_log(trans, root);
if (ret == 0)
ret = btrfs_end_transaction(trans, root);
else
ret = btrfs_commit_transaction(trans, root); ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_sync_log(trans, root);
if (ret == 0)
ret = btrfs_end_transaction(trans, root);
else
ret = btrfs_commit_transaction(trans, root);
}
} else {
ret = btrfs_end_transaction(trans, root);
} }
mutex_lock(&dentry->d_inode->i_mutex); mutex_lock(&dentry->d_inode->i_mutex);
out: out:
......
...@@ -3480,6 +3480,7 @@ static noinline void init_btrfs_i(struct inode *inode) ...@@ -3480,6 +3480,7 @@ static noinline void init_btrfs_i(struct inode *inode)
bi->generation = 0; bi->generation = 0;
bi->sequence = 0; bi->sequence = 0;
bi->last_trans = 0; bi->last_trans = 0;
bi->last_sub_trans = 0;
bi->logged_trans = 0; bi->logged_trans = 0;
bi->delalloc_bytes = 0; bi->delalloc_bytes = 0;
bi->reserved_bytes = 0; bi->reserved_bytes = 0;
...@@ -4980,7 +4981,9 @@ again: ...@@ -4980,7 +4981,9 @@ again:
set_page_dirty(page); set_page_dirty(page);
SetPageUptodate(page); SetPageUptodate(page);
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; BTRFS_I(inode)->last_trans = root->fs_info->generation;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock: out_unlock:
...@@ -5100,6 +5103,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ...@@ -5100,6 +5103,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
if (!ei) if (!ei)
return NULL; return NULL;
ei->last_trans = 0; ei->last_trans = 0;
ei->last_sub_trans = 0;
ei->logged_trans = 0; ei->logged_trans = 0;
ei->outstanding_extents = 0; ei->outstanding_extents = 0;
ei->reserved_extents = 0; ei->reserved_extents = 0;
......
...@@ -79,6 +79,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, ...@@ -79,6 +79,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
struct inode *inode) struct inode *inode)
{ {
BTRFS_I(inode)->last_trans = trans->transaction->transid; BTRFS_I(inode)->last_trans = trans->transaction->transid;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
} }
int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_end_transaction(struct btrfs_trans_handle *trans,
......
...@@ -1980,6 +1980,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ...@@ -1980,6 +1980,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
int ret; int ret;
struct btrfs_root *log = root->log_root; struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
u64 log_transid = 0;
mutex_lock(&root->log_mutex); mutex_lock(&root->log_mutex);
index1 = root->log_transid % 2; index1 = root->log_transid % 2;
...@@ -2018,6 +2019,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ...@@ -2018,6 +2019,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
btrfs_set_root_node(&log->root_item, log->node); btrfs_set_root_node(&log->root_item, log->node);
root->log_batch = 0; root->log_batch = 0;
log_transid = root->log_transid;
root->log_transid++; root->log_transid++;
log->log_transid = root->log_transid; log->log_transid = root->log_transid;
root->log_start_pid = 0; root->log_start_pid = 0;
...@@ -2095,6 +2097,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ...@@ -2095,6 +2097,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
write_ctree_super(trans, root->fs_info->tree_root, 1); write_ctree_super(trans, root->fs_info->tree_root, 1);
ret = 0; ret = 0;
mutex_lock(&root->log_mutex);
if (root->last_log_commit < log_transid)
root->last_log_commit = log_transid;
mutex_unlock(&root->log_mutex);
out_wake_log_root: out_wake_log_root:
atomic_set(&log_root_tree->log_commit[index2], 0); atomic_set(&log_root_tree->log_commit[index2], 0);
smp_mb(); smp_mb();
...@@ -2862,6 +2869,21 @@ out: ...@@ -2862,6 +2869,21 @@ out:
return ret; return ret;
} }
static int inode_in_log(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
mutex_lock(&root->log_mutex);
if (BTRFS_I(inode)->logged_trans == trans->transid &&
BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
ret = 1;
mutex_unlock(&root->log_mutex);
return ret;
}
/* /*
* helper function around btrfs_log_inode to make sure newly created * helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref * parent directories also end up in the log. A minimal inode and backref
...@@ -2901,6 +2923,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, ...@@ -2901,6 +2923,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret) if (ret)
goto end_no_trans; goto end_no_trans;
if (inode_in_log(trans, inode)) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
}
start_log_trans(trans, root); start_log_trans(trans, root);
ret = btrfs_log_inode(trans, root, inode, inode_only); ret = btrfs_log_inode(trans, root, inode, inode_only);
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
#ifndef __TREE_LOG_ #ifndef __TREE_LOG_
#define __TREE_LOG_ #define __TREE_LOG_
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
int btrfs_sync_log(struct btrfs_trans_handle *trans, int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment