Commit 3a307ffc authored by Mark Fasheh's avatar Mark Fasheh

ocfs2: rework ocfs2_buffered_write_cluster()

Use some ideas from the new-aops patch series and turn
ocfs2_buffered_write_cluster() into a 2 stage operation with the caller
copying data in between. The code now understands multiple cluster writes as
a result of having to deal with a full page write for greater than 4k pages.

This sets us up to easily call into the write path during ->page_mkwrite().
Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent 2e89b2e4
This diff is collapsed.
...@@ -42,57 +42,13 @@ int walk_page_buffers( handle_t *handle, ...@@ -42,57 +42,13 @@ int walk_page_buffers( handle_t *handle,
int (*fn)( handle_t *handle, int (*fn)( handle_t *handle,
struct buffer_head *bh)); struct buffer_head *bh));
struct ocfs2_write_ctxt; int ocfs2_write_begin(struct file *file, struct address_space *mapping,
typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *, loff_t pos, unsigned len, unsigned flags,
u64 *, unsigned int *, unsigned int *); struct page **pagep, void **fsdata);
ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, int ocfs2_write_end(struct file *file, struct address_space *mapping,
size_t count, ocfs2_page_writer *actor, loff_t pos, unsigned len, unsigned copied,
void *priv); struct page *page, void *fsdata);
struct ocfs2_write_ctxt {
size_t w_count;
loff_t w_pos;
u32 w_cpos;
unsigned int w_finished_copy;
/* This is true if page_size > cluster_size */
unsigned int w_large_pages;
/* Filler callback and private data */
ocfs2_page_writer *w_write_data_page;
void *w_private;
/* Only valid for the filler callback */
struct page *w_this_page;
unsigned int w_this_page_new;
};
struct ocfs2_buffered_write_priv {
char *b_src_buf;
const struct iovec *b_cur_iov; /* Current iovec */
size_t b_cur_off; /* Offset in the
* current iovec */
};
int ocfs2_map_and_write_user_data(struct inode *inode,
struct ocfs2_write_ctxt *wc,
u64 *p_blkno,
unsigned int *ret_from,
unsigned int *ret_to);
struct ocfs2_splice_write_priv {
struct splice_desc *s_sd;
struct pipe_buffer *s_buf;
struct pipe_inode_info *s_pipe;
/* Neither offset value is ever larger than one page */
unsigned int s_offset;
unsigned int s_buf_offset;
};
int ocfs2_map_and_write_splice_data(struct inode *inode,
struct ocfs2_write_ctxt *wc,
u64 *p_blkno,
unsigned int *ret_from,
unsigned int *ret_to);
/* all ocfs2_dio_end_io()'s fault */ /* all ocfs2_dio_end_io()'s fault */
#define ocfs2_iocb_is_rw_locked(iocb) \ #define ocfs2_iocb_is_rw_locked(iocb) \
......
...@@ -1335,15 +1335,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes) ...@@ -1335,15 +1335,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
*basep = base; *basep = base;
} }
static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp, static struct page * ocfs2_get_write_source(char **ret_src_buf,
const struct iovec *cur_iov, const struct iovec *cur_iov,
size_t iov_offset) size_t iov_offset)
{ {
int ret; int ret;
char *buf; char *buf = cur_iov->iov_base + iov_offset;
struct page *src_page = NULL; struct page *src_page = NULL;
unsigned long off;
buf = cur_iov->iov_base + iov_offset; off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;
if (!segment_eq(get_fs(), KERNEL_DS)) { if (!segment_eq(get_fs(), KERNEL_DS)) {
/* /*
...@@ -1355,18 +1356,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp ...@@ -1355,18 +1356,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp
(unsigned long)buf & PAGE_CACHE_MASK, 1, (unsigned long)buf & PAGE_CACHE_MASK, 1,
0, 0, &src_page, NULL); 0, 0, &src_page, NULL);
if (ret == 1) if (ret == 1)
bp->b_src_buf = kmap(src_page); *ret_src_buf = kmap(src_page) + off;
else else
src_page = ERR_PTR(-EFAULT); src_page = ERR_PTR(-EFAULT);
} else { } else {
bp->b_src_buf = buf; *ret_src_buf = buf;
} }
return src_page; return src_page;
} }
static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp, static void ocfs2_put_write_source(struct page *page)
struct page *page)
{ {
if (page) { if (page) {
kunmap(page); kunmap(page);
...@@ -1382,10 +1382,12 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, ...@@ -1382,10 +1382,12 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
{ {
int ret = 0; int ret = 0;
ssize_t copied, total = 0; ssize_t copied, total = 0;
size_t iov_offset = 0; size_t iov_offset = 0, bytes;
loff_t pos;
const struct iovec *cur_iov = iov; const struct iovec *cur_iov = iov;
struct ocfs2_buffered_write_priv bp; struct page *user_page, *page;
struct page *page; char *buf, *dst;
void *fsdata;
/* /*
* handle partial DIO write. Adjust cur_iov if needed. * handle partial DIO write. Adjust cur_iov if needed.
...@@ -1393,21 +1395,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, ...@@ -1393,21 +1395,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
do { do {
bp.b_cur_off = iov_offset; pos = *ppos;
bp.b_cur_iov = cur_iov;
page = ocfs2_get_write_source(&bp, cur_iov, iov_offset); user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
if (IS_ERR(page)) { if (IS_ERR(user_page)) {
ret = PTR_ERR(page); ret = PTR_ERR(user_page);
goto out; goto out;
} }
copied = ocfs2_buffered_write_cluster(file, *ppos, count, /* Stay within our page boundaries */
ocfs2_map_and_write_user_data, bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
&bp); (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
/* Stay within the vector boundary */
bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
/* Stay within count */
bytes = min(bytes, count);
page = NULL;
ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
&page, &fsdata);
if (ret) {
mlog_errno(ret);
goto out;
}
ocfs2_put_write_source(&bp, page); dst = kmap_atomic(page, KM_USER0);
memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
kunmap_atomic(dst, KM_USER0);
flush_dcache_page(page);
ocfs2_put_write_source(user_page);
copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
bytes, page, fsdata);
if (copied < 0) { if (copied < 0) {
mlog_errno(copied); mlog_errno(copied);
ret = copied; ret = copied;
...@@ -1415,7 +1434,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, ...@@ -1415,7 +1434,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
} }
total += copied; total += copied;
*ppos = *ppos + copied; *ppos = pos + copied;
count -= copied; count -= copied;
ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
...@@ -1585,52 +1604,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe, ...@@ -1585,52 +1604,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
struct pipe_buffer *buf, struct pipe_buffer *buf,
struct splice_desc *sd) struct splice_desc *sd)
{ {
int ret, count, total = 0; int ret, count;
ssize_t copied = 0; ssize_t copied = 0;
struct ocfs2_splice_write_priv sp; struct file *file = sd->u.file;
unsigned int offset;
struct page *page = NULL;
void *fsdata;
char *src, *dst;
ret = buf->ops->confirm(pipe, buf); ret = buf->ops->confirm(pipe, buf);
if (ret) if (ret)
goto out; goto out;
sp.s_sd = sd; offset = sd->pos & ~PAGE_CACHE_MASK;
sp.s_buf = buf;
sp.s_pipe = pipe;
sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
sp.s_buf_offset = buf->offset;
count = sd->len; count = sd->len;
if (count + sp.s_offset > PAGE_CACHE_SIZE) if (count + offset > PAGE_CACHE_SIZE)
count = PAGE_CACHE_SIZE - sp.s_offset; count = PAGE_CACHE_SIZE - offset;
do { ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
/* &page, &fsdata);
* splice wants us to copy up to one page at a if (ret) {
* time. For pagesize > cluster size, this means we mlog_errno(ret);
* might enter ocfs2_buffered_write_cluster() more goto out;
* than once, so keep track of our progress here. }
*/
copied = ocfs2_buffered_write_cluster(sd->u.file, src = buf->ops->map(pipe, buf, 1);
(loff_t)sd->pos + total, dst = kmap_atomic(page, KM_USER1);
count, memcpy(dst + offset, src + buf->offset, count);
ocfs2_map_and_write_splice_data, kunmap_atomic(page, KM_USER1);
&sp); buf->ops->unmap(pipe, buf, src);
copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
page, fsdata);
if (copied < 0) { if (copied < 0) {
mlog_errno(copied); mlog_errno(copied);
ret = copied; ret = copied;
goto out; goto out;
} }
count -= copied;
sp.s_offset += copied;
sp.s_buf_offset += copied;
total += copied;
} while (count);
ret = 0;
out: out:
return total ? total : ret; return copied ? copied : ret;
} }
static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment