Commit 6bf13c0c authored by Sage Weil's avatar Sage Weil Committed by Chris Mason

Btrfs: transaction ioctls

These ioctls let a user application hold a transaction open while it
performs a series of operations.  A final ioctl does a sync on the fs
(closing the current transaction).  This is the main requirement for
Ceph's OSD to be able to keep the data it's storing in a btrfs volume
consistent, and AFAICS it works just fine.  The application would do
something like

	fd = ::open("some/file", O_RDONLY);
	::ioctl(fd, BTRFS_IOC_TRANS_START);
	/* do a bunch of stuff */
	::ioctl(fd, BTRFS_IOC_TRANS_END);
or just
	::close(fd);

And to ensure it commits to disk,

	::ioctl(fd, BTRFS_IOC_SYNC);

When a transaction is held open, the trans_handle is attached to the
struct file (via private_data) so that it will get cleaned up if the
process dies unexpectedly.  A held transaction is also ended on fsync() to
avoid a deadlock.

A misbehaving application could also deliberately hold a transaction open,
effectively locking up the FS, so it may make sense to restrict something
like this to root or something.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent eba12c7b
...@@ -1575,6 +1575,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb); ...@@ -1575,6 +1575,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode); void btrfs_destroy_inode(struct inode *inode);
int btrfs_init_cachep(void); int btrfs_init_cachep(void);
void btrfs_destroy_cachep(void); void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
struct btrfs_root *root); struct btrfs_root *root);
...@@ -1595,6 +1596,8 @@ extern struct file_operations btrfs_file_operations; ...@@ -1595,6 +1596,8 @@ extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans, int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, struct btrfs_root *root, struct inode *inode,
u64 start, u64 end, u64 inline_limit, u64 *hint_block); u64 start, u64 end, u64 inline_limit, u64 *hint_block);
int btrfs_release_file(struct inode *inode, struct file *file);
/* tree-defrag.c */ /* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int cache_only); struct btrfs_root *root, int cache_only);
...@@ -1615,4 +1618,5 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, ...@@ -1615,4 +1618,5 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans,
u64 btrfs_parse_size(char *str); u64 btrfs_parse_size(char *str);
int btrfs_parse_options(char *options, struct btrfs_root *root, int btrfs_parse_options(char *options, struct btrfs_root *root,
char **subvol_name); char **subvol_name);
int btrfs_sync_fs(struct super_block *sb, int wait);
#endif #endif
...@@ -978,9 +978,11 @@ out_nolock: ...@@ -978,9 +978,11 @@ out_nolock:
return num_written ? num_written : err; return num_written ? num_written : err;
} }
static int btrfs_release_file (struct inode * inode, struct file * filp) int btrfs_release_file(struct inode * inode, struct file * filp)
{ {
btrfs_del_ordered_inode(inode); btrfs_del_ordered_inode(inode);
if (filp->private_data)
btrfs_ioctl_trans_end(filp);
return 0; return 0;
} }
...@@ -1011,6 +1013,9 @@ static int btrfs_sync_file(struct file *file, ...@@ -1011,6 +1013,9 @@ static int btrfs_sync_file(struct file *file,
/* /*
* ok we haven't committed the transaction yet, lets do a commit * ok we haven't committed the transaction yet, lets do a commit
*/ */
if (file->private_data)
btrfs_ioctl_trans_end(file);
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
if (!trans) { if (!trans) {
ret = -ENOMEM; ret = -ENOMEM;
......
...@@ -3336,6 +3336,61 @@ out_fput: ...@@ -3336,6 +3336,61 @@ out_fput:
return ret; return ret;
} }
/*
* there are many ways the trans_start and trans_end ioctls can lead
* to deadlocks. They should only be used by applications that
* basically own the machine, and have a very in depth understanding
* of all the possible deadlocks and enospc problems.
*/
long btrfs_ioctl_trans_start(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
mutex_lock(&root->fs_info->fs_mutex);
if (file->private_data) {
ret = -EINPROGRESS;
goto out;
}
trans = btrfs_start_transaction(root, 0);
if (trans)
file->private_data = trans;
else
ret = -ENOMEM;
/*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/
out:
mutex_unlock(&root->fs_info->fs_mutex);
return ret;
}
/*
* there are many ways the trans_start and trans_end ioctls can lead
* to deadlocks. They should only be used by applications that
* basically own the machine, and have a very in depth understanding
* of all the possible deadlocks and enospc problems.
*/
long btrfs_ioctl_trans_end(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
mutex_lock(&root->fs_info->fs_mutex);
trans = file->private_data;
if (!trans) {
ret = -EINVAL;
goto out;
}
btrfs_end_transaction(trans, root);
file->private_data = 0;
out:
mutex_unlock(&root->fs_info->fs_mutex);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg) cmd, unsigned long arg)
{ {
...@@ -3356,6 +3411,13 @@ long btrfs_ioctl(struct file *file, unsigned int ...@@ -3356,6 +3411,13 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_balance(root->fs_info->dev_root); return btrfs_balance(root->fs_info->dev_root);
case BTRFS_IOC_CLONE: case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg); return btrfs_ioctl_clone(file, arg);
case BTRFS_IOC_TRANS_START:
return btrfs_ioctl_trans_start(file);
case BTRFS_IOC_TRANS_END:
return btrfs_ioctl_trans_end(file);
case BTRFS_IOC_SYNC:
btrfs_sync_fs(file->f_dentry->d_sb, 1);
return 0;
} }
return -ENOTTY; return -ENOTTY;
...@@ -3679,6 +3741,7 @@ static struct file_operations btrfs_dir_file_operations = { ...@@ -3679,6 +3741,7 @@ static struct file_operations btrfs_dir_file_operations = {
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl, .compat_ioctl = btrfs_ioctl,
#endif #endif
.release = btrfs_release_file,
}; };
static struct extent_io_ops btrfs_extent_io_ops = { static struct extent_io_ops btrfs_extent_io_ops = {
......
...@@ -36,6 +36,14 @@ struct btrfs_ioctl_vol_args { ...@@ -36,6 +36,14 @@ struct btrfs_ioctl_vol_args {
struct btrfs_ioctl_vol_args) struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
struct btrfs_ioctl_vol_args) struct btrfs_ioctl_vol_args)
/* trans start and trans end are dangerous, and only for
* use by applications that know how to avoid the
* resulting deadlocks
*/
#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) #define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
struct btrfs_ioctl_vol_args) struct btrfs_ioctl_vol_args)
...@@ -43,4 +51,5 @@ struct btrfs_ioctl_vol_args { ...@@ -43,4 +51,5 @@ struct btrfs_ioctl_vol_args {
struct btrfs_ioctl_vol_args) struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
struct btrfs_ioctl_vol_args) struct btrfs_ioctl_vol_args)
#endif #endif
...@@ -293,7 +293,7 @@ fail_close: ...@@ -293,7 +293,7 @@ fail_close:
return err; return err;
} }
static int btrfs_sync_fs(struct super_block *sb, int wait) int btrfs_sync_fs(struct super_block *sb, int wait)
{ {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_root *root; struct btrfs_root *root;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment