Commit 6a14b90b authored by Jens Axboe's avatar Jens Axboe

vmsplice: add vmsplice-to-user support

A bit of a cheat, it actually just copies the data to userspace. But
this makes the interface nice and symmetric and enables people to build
on splice, with room for future improvement in performance.
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent c66ab6fa
...@@ -1640,7 +1640,7 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, ...@@ -1640,7 +1640,7 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
.total_len = len, .total_len = len,
.flags = flags, .flags = flags,
.pos = *ppos, .pos = *ppos,
.file = out, .u.file = out,
}; };
ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor); ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
......
...@@ -528,7 +528,7 @@ EXPORT_SYMBOL(generic_file_splice_read); ...@@ -528,7 +528,7 @@ EXPORT_SYMBOL(generic_file_splice_read);
static int pipe_to_sendpage(struct pipe_inode_info *pipe, static int pipe_to_sendpage(struct pipe_inode_info *pipe,
struct pipe_buffer *buf, struct splice_desc *sd) struct pipe_buffer *buf, struct splice_desc *sd)
{ {
struct file *file = sd->file; struct file *file = sd->u.file;
loff_t pos = sd->pos; loff_t pos = sd->pos;
int ret, more; int ret, more;
...@@ -566,7 +566,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, ...@@ -566,7 +566,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd) struct splice_desc *sd)
{ {
struct file *file = sd->file; struct file *file = sd->u.file;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
unsigned int offset, this_len; unsigned int offset, this_len;
struct page *page; struct page *page;
...@@ -769,7 +769,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, ...@@ -769,7 +769,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
.total_len = len, .total_len = len,
.flags = flags, .flags = flags,
.pos = *ppos, .pos = *ppos,
.file = out, .u.file = out,
}; };
/* /*
...@@ -807,7 +807,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, ...@@ -807,7 +807,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
.total_len = len, .total_len = len,
.flags = flags, .flags = flags,
.pos = *ppos, .pos = *ppos,
.file = out, .u.file = out,
}; };
ssize_t ret; ssize_t ret;
int err; int err;
...@@ -1087,7 +1087,7 @@ EXPORT_SYMBOL(splice_direct_to_actor); ...@@ -1087,7 +1087,7 @@ EXPORT_SYMBOL(splice_direct_to_actor);
static int direct_splice_actor(struct pipe_inode_info *pipe, static int direct_splice_actor(struct pipe_inode_info *pipe,
struct splice_desc *sd) struct splice_desc *sd)
{ {
struct file *file = sd->file; struct file *file = sd->u.file;
return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
} }
...@@ -1100,7 +1100,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ...@@ -1100,7 +1100,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
.total_len = len, .total_len = len,
.flags = flags, .flags = flags,
.pos = *ppos, .pos = *ppos,
.file = out, .u.file = out,
}; };
size_t ret; size_t ret;
...@@ -1289,28 +1289,131 @@ static int get_iovec_page_array(const struct iovec __user *iov, ...@@ -1289,28 +1289,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
return error; return error;
} }
static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
char *src;
int ret;
ret = buf->ops->pin(pipe, buf);
if (unlikely(ret))
return ret;
/*
* See if we can use the atomic maps, by prefaulting in the
* pages and doing an atomic copy
*/
if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
src = buf->ops->map(pipe, buf, 1);
ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
sd->len);
buf->ops->unmap(pipe, buf, src);
if (!ret) {
ret = sd->len;
goto out;
}
}
/*
* No dice, use slow non-atomic map and copy
*/
src = buf->ops->map(pipe, buf, 0);
ret = sd->len;
if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
ret = -EFAULT;
out:
if (ret > 0)
sd->u.userptr += ret;
buf->ops->unmap(pipe, buf, src);
return ret;
}
/*
* For lack of a better implementation, implement vmsplice() to userspace
* as a simple copy of the pipes pages to the user iov.
*/
static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
unsigned long nr_segs, unsigned int flags)
{
struct pipe_inode_info *pipe;
struct splice_desc sd;
ssize_t size;
int error;
long ret;
pipe = pipe_info(file->f_path.dentry->d_inode);
if (!pipe)
return -EBADF;
if (pipe->inode)
mutex_lock(&pipe->inode->i_mutex);
error = ret = 0;
while (nr_segs) {
void __user *base;
size_t len;
/*
* Get user address base and length for this iovec.
*/
error = get_user(base, &iov->iov_base);
if (unlikely(error))
break;
error = get_user(len, &iov->iov_len);
if (unlikely(error))
break;
/*
* Sanity check this iovec. 0 read succeeds.
*/
if (unlikely(!len))
break;
if (unlikely(!base)) {
error = -EFAULT;
break;
}
sd.len = 0;
sd.total_len = len;
sd.flags = flags;
sd.u.userptr = base;
sd.pos = 0;
size = __splice_from_pipe(pipe, &sd, pipe_to_user);
if (size < 0) {
if (!ret)
ret = size;
break;
}
ret += size;
if (size < len)
break;
nr_segs--;
iov++;
}
if (pipe->inode)
mutex_unlock(&pipe->inode->i_mutex);
if (!ret)
ret = error;
return ret;
}
/* /*
* vmsplice splices a user address range into a pipe. It can be thought of * vmsplice splices a user address range into a pipe. It can be thought of
* as splice-from-memory, where the regular splice is splice-from-file (or * as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally. * to file). In both cases the output is a pipe, naturally.
*
* Note that vmsplice only supports splicing _from_ user memory to a pipe,
* not the other way around. Splicing from user memory is a simple operation
* that can be supported without any funky alignment restrictions or nasty
* vm tricks. We simply map in the user memory and fill them into a pipe.
* The reverse isn't quite as easy, though. There are two possible solutions
* for that:
*
* - memcpy() the data internally, at which point we might as well just
* do a regular read() on the buffer anyway.
* - Lots of nasty vm tricks, that are neither fast nor flexible (it
* has restriction limitations on both ends of the pipe).
*
* Alas, it isn't here.
*
*/ */
static long do_vmsplice(struct file *file, const struct iovec __user *iov, static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
unsigned long nr_segs, unsigned int flags) unsigned long nr_segs, unsigned int flags)
{ {
struct pipe_inode_info *pipe; struct pipe_inode_info *pipe;
struct page *pages[PIPE_BUFFERS]; struct page *pages[PIPE_BUFFERS];
...@@ -1325,10 +1428,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, ...@@ -1325,10 +1428,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
pipe = pipe_info(file->f_path.dentry->d_inode); pipe = pipe_info(file->f_path.dentry->d_inode);
if (!pipe) if (!pipe)
return -EBADF; return -EBADF;
if (unlikely(nr_segs > UIO_MAXIOV))
return -EINVAL;
else if (unlikely(!nr_segs))
return 0;
spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
flags & SPLICE_F_GIFT); flags & SPLICE_F_GIFT);
...@@ -1338,6 +1437,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, ...@@ -1338,6 +1437,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
return splice_to_pipe(pipe, &spd); return splice_to_pipe(pipe, &spd);
} }
/*
* Note that vmsplice only really supports true splicing _from_ user memory
* to a pipe, not the other way around. Splicing from user memory is a simple
* operation that can be supported without any funky alignment restrictions
* or nasty vm tricks. We simply map in the user memory and fill them into
* a pipe. The reverse isn't quite as easy, though. There are two possible
* solutions for that:
*
* - memcpy() the data internally, at which point we might as well just
* do a regular read() on the buffer anyway.
* - Lots of nasty vm tricks, that are neither fast nor flexible (it
* has restriction limitations on both ends of the pipe).
*
* Currently we punt and implement it as a normal copy, see pipe_to_user().
*
*/
asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
unsigned long nr_segs, unsigned int flags) unsigned long nr_segs, unsigned int flags)
{ {
...@@ -1345,11 +1460,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, ...@@ -1345,11 +1460,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
long error; long error;
int fput; int fput;
if (unlikely(nr_segs > UIO_MAXIOV))
return -EINVAL;
else if (unlikely(!nr_segs))
return 0;
error = -EBADF; error = -EBADF;
file = fget_light(fd, &fput); file = fget_light(fd, &fput);
if (file) { if (file) {
if (file->f_mode & FMODE_WRITE) if (file->f_mode & FMODE_WRITE)
error = do_vmsplice(file, iov, nr_segs, flags); error = vmsplice_to_pipe(file, iov, nr_segs, flags);
else if (file->f_mode & FMODE_READ)
error = vmsplice_to_user(file, iov, nr_segs, flags);
fput_light(file, fput); fput_light(file, fput);
} }
......
...@@ -88,7 +88,13 @@ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); ...@@ -88,7 +88,13 @@ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
struct splice_desc { struct splice_desc {
unsigned int len, total_len; /* current and remaining length */ unsigned int len, total_len; /* current and remaining length */
unsigned int flags; /* splice flags */ unsigned int flags; /* splice flags */
struct file *file; /* file to read/write */ /*
* actor() private data
*/
union {
void __user *userptr; /* memory to write to */
struct file *file; /* file to read/write */
} u;
loff_t pos; /* file position */ loff_t pos; /* file position */
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment