Commit 9191ca3b authored by Tom Talpey's avatar Tom Talpey Committed by Trond Myklebust

RPC/RDMA: adhere to protocol for unpadded client trailing write chunks.

The RPC/RDMA protocol allows clients and servers to avoid RDMA
operations for data which is purely the result of XDR padding.
On the client, automatically insert the necessary padding for
such server replies, and optionally don't marshal such chunks.
Signed-off-by: default avatarTom Talpey <talpey@netapp.com>
Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
parent fee08caf
...@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, ...@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
} }
if (xdrbuf->tail[0].iov_len) { if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
* xdr pad bytes, saving the server an RDMA operation. */
if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
return n;
if (n == nsegs) if (n == nsegs)
return 0; return 0;
seg[n].mr_page = NULL; seg[n].mr_page = NULL;
...@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b ...@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
* Scatter inline received data back into provided iov's. * Scatter inline received data back into provided iov's.
*/ */
static void static void
rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
{ {
int i, npages, curlen, olen; int i, npages, curlen, olen;
char *destp; char *destp;
...@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) ...@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
} else } else
rqst->rq_rcv_buf.tail[0].iov_len = 0; rqst->rq_rcv_buf.tail[0].iov_len = 0;
if (pad) {
/* implicit padding on terminal chunk */
unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
while (pad--)
p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
}
if (copy_len) if (copy_len)
dprintk("RPC: %s: %d bytes in" dprintk("RPC: %s: %d bytes in"
" %d extra segments (%d lost)\n", " %d extra segments (%d lost)\n",
...@@ -794,14 +805,20 @@ repost: ...@@ -794,14 +805,20 @@ repost:
((unsigned char *)iptr - (unsigned char *)headerp); ((unsigned char *)iptr - (unsigned char *)headerp);
status = rep->rr_len + rdmalen; status = rep->rr_len + rdmalen;
r_xprt->rx_stats.total_rdma_reply += rdmalen; r_xprt->rx_stats.total_rdma_reply += rdmalen;
/* special case - last chunk may omit padding */
if (rdmalen &= 3) {
rdmalen = 4 - rdmalen;
status += rdmalen;
}
} else { } else {
/* else ordinary inline */ /* else ordinary inline */
rdmalen = 0;
iptr = (__be32 *)((unsigned char *)headerp + 28); iptr = (__be32 *)((unsigned char *)headerp + 28);
rep->rr_len -= 28; /*sizeof *headerp;*/ rep->rr_len -= 28; /*sizeof *headerp;*/
status = rep->rr_len; status = rep->rr_len;
} }
/* Fix up the rpc results for upper layer */ /* Fix up the rpc results for upper layer */
rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
break; break;
case __constant_htonl(RDMA_NOMSG): case __constant_htonl(RDMA_NOMSG):
......
...@@ -71,6 +71,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; ...@@ -71,6 +71,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize = 0;
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
...@@ -135,6 +136,14 @@ static ctl_table xr_tunables_table[] = { ...@@ -135,6 +136,14 @@ static ctl_table xr_tunables_table[] = {
.extra1 = &min_memreg, .extra1 = &min_memreg,
.extra2 = &max_memreg, .extra2 = &max_memreg,
}, },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "rdma_pad_optimize",
.data = &xprt_rdma_pad_optimize,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ {
.ctl_name = 0, .ctl_name = 0,
}, },
......
...@@ -280,6 +280,11 @@ struct rpcrdma_xprt { ...@@ -280,6 +280,11 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
extern int xprt_rdma_pad_optimize;
/* /*
* Interface Adapter calls - xprtrdma/verbs.c * Interface Adapter calls - xprtrdma/verbs.c
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment