Commit 34d16e42 authored by Tom Tucker's avatar Tom Tucker

svcrdma: Use RPC reply map for RDMA_WRITE processing

Use the new svc_rdma_req_map data type for mapping the client side memory
to the server side memory. Move the DMA mapping to the context pointed to
by each WR individually so that it is unmapped after the WR completes.
Signed-off-by: default avatarTom Tucker <tom@opengridcomputing.com>
parent ab96dddb
...@@ -63,52 +63,44 @@ ...@@ -63,52 +63,44 @@
* SGE[2..sge_count-2] data from xdr->pages[] * SGE[2..sge_count-2] data from xdr->pages[]
* SGE[sge_count-1] data from xdr->tail. * SGE[sge_count-1] data from xdr->tail.
* *
* The max SGE we need is the length of the XDR / pagesize + one for
* head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
* reserves a page for both the request and the reply header, and this
* array is only concerned with the reply we are assured that we have
* on extra page for the RPCRMDA header.
*/ */
static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, static void xdr_to_sge(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr, struct xdr_buf *xdr,
struct ib_sge *sge, struct svc_rdma_req_map *vec)
int *sge_count)
{ {
/* Max we need is the length of the XDR / pagesize + one for
* head + one for tail + one for RPCRDMA header
*/
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no; int sge_no;
u32 byte_count = xdr->len;
u32 sge_bytes; u32 sge_bytes;
u32 page_bytes; u32 page_bytes;
int page_off; u32 page_off;
int page_no; int page_no;
BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
/* Skip the first sge, this is for the RPCRDMA header */ /* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1; sge_no = 1;
/* Head SGE */ /* Head SGE */
sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
xdr->head[0].iov_base, vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
xdr->head[0].iov_len,
DMA_TO_DEVICE);
sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
byte_count -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++; sge_no++;
/* pages SGE */ /* pages SGE */
page_no = 0; page_no = 0;
page_bytes = xdr->page_len; page_bytes = xdr->page_len;
page_off = xdr->page_base; page_off = xdr->page_base;
while (byte_count && page_bytes) { while (page_bytes) {
sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); vec->sge[sge_no].iov_base =
sge[sge_no].addr = page_address(xdr->pages[page_no]) + page_off;
ib_dma_map_page(xprt->sc_cm_id->device, sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
xdr->pages[page_no], page_off,
sge_bytes, DMA_TO_DEVICE);
sge_bytes = min(sge_bytes, page_bytes);
byte_count -= sge_bytes;
page_bytes -= sge_bytes; page_bytes -= sge_bytes;
sge[sge_no].length = sge_bytes; vec->sge[sge_no].iov_len = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++; sge_no++;
page_no++; page_no++;
...@@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, ...@@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
} }
/* Tail SGE */ /* Tail SGE */
if (byte_count && xdr->tail[0].iov_len) { if (xdr->tail[0].iov_len) {
sge[sge_no].addr = vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
ib_dma_map_single(xprt->sc_cm_id->device, vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
xdr->tail[0].iov_base,
xdr->tail[0].iov_len,
DMA_TO_DEVICE);
sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
byte_count -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++; sge_no++;
} }
BUG_ON(sge_no > sge_max); BUG_ON(sge_no > sge_max);
BUG_ON(byte_count != 0); vec->count = sge_no;
*sge_count = sge_no;
return sge;
} }
/* Assumptions: /* Assumptions:
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/ */
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 rmr, u64 to, u32 rmr, u64 to,
u32 xdr_off, int write_len, u32 xdr_off, int write_len,
struct ib_sge *xdr_sge, int sge_count) struct svc_rdma_req_map *vec)
{ {
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct ib_send_wr write_wr; struct ib_send_wr write_wr;
struct ib_sge *sge; struct ib_sge *sge;
int xdr_sge_no; int xdr_sge_no;
...@@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, ...@@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
int sge_off; int sge_off;
int bc; int bc;
struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_op_ctxt *ctxt;
int ret = 0;
BUG_ON(sge_count > RPCSVC_MAXPAGES); BUG_ON(vec->count > RPCSVC_MAXPAGES);
dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
"write_len=%d, xdr_sge=%p, sge_count=%d\n", "write_len=%d, vec->sge=%p, vec->count=%lu\n",
rmr, (unsigned long long)to, xdr_off, rmr, (unsigned long long)to, xdr_off,
write_len, xdr_sge, sge_count); write_len, vec->sge, vec->count);
ctxt = svc_rdma_get_context(xprt); ctxt = svc_rdma_get_context(xprt);
ctxt->count = 0; ctxt->direction = DMA_TO_DEVICE;
tmp_sge_ctxt = svc_rdma_get_context(xprt); sge = ctxt->sge;
sge = tmp_sge_ctxt->sge;
/* Find the SGE associated with xdr_off */ /* Find the SGE associated with xdr_off */
for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
xdr_sge_no++) { xdr_sge_no++) {
if (xdr_sge[xdr_sge_no].length > bc) if (vec->sge[xdr_sge_no].iov_len > bc)
break; break;
bc -= xdr_sge[xdr_sge_no].length; bc -= vec->sge[xdr_sge_no].iov_len;
} }
sge_off = bc; sge_off = bc;
...@@ -180,21 +158,27 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, ...@@ -180,21 +158,27 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_no = 0; sge_no = 0;
/* Copy the remaining SGE */ /* Copy the remaining SGE */
while (bc != 0 && xdr_sge_no < sge_count) { while (bc != 0 && xdr_sge_no < vec->count) {
sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
sge_bytes = min((size_t)bc, sge_bytes = min((size_t)bc,
(size_t)(xdr_sge[xdr_sge_no].length-sge_off)); (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
sge[sge_no].length = sge_bytes; sge[sge_no].length = sge_bytes;
sge[sge_no].addr =
ib_dma_map_single(xprt->sc_cm_id->device,
(void *)
vec->sge[xdr_sge_no].iov_base + sge_off,
sge_bytes, DMA_TO_DEVICE);
if (dma_mapping_error(sge[sge_no].addr))
goto err;
sge_off = 0; sge_off = 0;
sge_no++; sge_no++;
ctxt->count++;
xdr_sge_no++; xdr_sge_no++;
bc -= sge_bytes; bc -= sge_bytes;
} }
BUG_ON(bc != 0); BUG_ON(bc != 0);
BUG_ON(xdr_sge_no > sge_count); BUG_ON(xdr_sge_no > vec->count);
/* Prepare WRITE WR */ /* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr); memset(&write_wr, 0, sizeof write_wr);
...@@ -209,21 +193,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, ...@@ -209,21 +193,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
/* Post It */ /* Post It */
atomic_inc(&rdma_stat_write); atomic_inc(&rdma_stat_write);
if (svc_rdma_send(xprt, &write_wr)) { if (svc_rdma_send(xprt, &write_wr))
svc_rdma_put_context(ctxt, 1); goto err;
/* Fatal error, close transport */ return 0;
ret = -EIO; err:
} svc_rdma_put_context(ctxt, 0);
svc_rdma_put_context(tmp_sge_ctxt, 0); /* Fatal error, close transport */
return ret; return -EIO;
} }
static int send_write_chunks(struct svcxprt_rdma *xprt, static int send_write_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp, struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp, struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp, struct svc_rqst *rqstp,
struct ib_sge *sge, struct svc_rdma_req_map *vec)
int sge_count)
{ {
u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
int write_len; int write_len;
...@@ -269,8 +252,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, ...@@ -269,8 +252,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
rs_offset + chunk_off, rs_offset + chunk_off,
xdr_off, xdr_off,
this_write, this_write,
sge, vec);
sge_count);
if (ret) { if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret); ret);
...@@ -292,8 +274,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, ...@@ -292,8 +274,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp, struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp, struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp, struct svc_rqst *rqstp,
struct ib_sge *sge, struct svc_rdma_req_map *vec)
int sge_count)
{ {
u32 xfer_len = rqstp->rq_res.len; u32 xfer_len = rqstp->rq_res.len;
int write_len; int write_len;
...@@ -341,8 +322,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, ...@@ -341,8 +322,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
rs_offset + chunk_off, rs_offset + chunk_off,
xdr_off, xdr_off,
this_write, this_write,
sge, vec);
sge_count);
if (ret) { if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret); ret);
...@@ -380,7 +360,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ...@@ -380,7 +360,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
struct page *page, struct page *page,
struct rpcrdma_msg *rdma_resp, struct rpcrdma_msg *rdma_resp,
struct svc_rdma_op_ctxt *ctxt, struct svc_rdma_op_ctxt *ctxt,
int sge_count, struct svc_rdma_req_map *vec,
int byte_count) int byte_count)
{ {
struct ib_send_wr send_wr; struct ib_send_wr send_wr;
...@@ -413,10 +393,15 @@ static int send_reply(struct svcxprt_rdma *rdma, ...@@ -413,10 +393,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
/* Determine how many of our SGE are to be transmitted */ /* Determine how many of our SGE are to be transmitted */
for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
sge_bytes = min((size_t)ctxt->sge[sge_no].length, sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
(size_t)byte_count);
byte_count -= sge_bytes; byte_count -= sge_bytes;
ctxt->sge[sge_no].addr =
ib_dma_map_single(rdma->sc_cm_id->device,
vec->sge[sge_no].iov_base,
sge_bytes, DMA_TO_DEVICE);
ctxt->sge[sge_no].length = sge_bytes;
ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
} }
BUG_ON(byte_count != 0); BUG_ON(byte_count != 0);
...@@ -428,8 +413,10 @@ static int send_reply(struct svcxprt_rdma *rdma, ...@@ -428,8 +413,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++; ctxt->count++;
rqstp->rq_respages[page_no] = NULL; rqstp->rq_respages[page_no] = NULL;
/* If there are more pages than SGE, terminate SGE list */
if (page_no+1 >= sge_no)
ctxt->sge[page_no+1].length = 0;
} }
BUG_ON(sge_no > rdma->sc_max_sge); BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr); memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND; ctxt->wr_op = IB_WR_SEND;
...@@ -473,20 +460,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -473,20 +460,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
enum rpcrdma_proc reply_type; enum rpcrdma_proc reply_type;
int ret; int ret;
int inline_bytes; int inline_bytes;
struct ib_sge *sge;
int sge_count = 0;
struct page *res_page; struct page *res_page;
struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_op_ctxt *ctxt;
struct svc_rdma_req_map *vec;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
/* Get the RDMA request header. */ /* Get the RDMA request header. */
rdma_argp = xdr_start(&rqstp->rq_arg); rdma_argp = xdr_start(&rqstp->rq_arg);
/* Build an SGE for the XDR */ /* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma); ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE; ctxt->direction = DMA_TO_DEVICE;
sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); vec = svc_rdma_get_req_map();
xdr_to_sge(rdma, &rqstp->rq_res, vec);
inline_bytes = rqstp->rq_res.len; inline_bytes = rqstp->rq_res.len;
...@@ -503,7 +490,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -503,7 +490,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Send any write-chunk data and build resp write-list */ /* Send any write-chunk data and build resp write-list */
ret = send_write_chunks(rdma, rdma_argp, rdma_resp, ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
rqstp, sge, sge_count); rqstp, vec);
if (ret < 0) { if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
ret); ret);
...@@ -513,7 +500,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -513,7 +500,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Send any reply-list data and update resp reply-list */ /* Send any reply-list data and update resp reply-list */
ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
rqstp, sge, sge_count); rqstp, vec);
if (ret < 0) { if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
ret); ret);
...@@ -521,11 +508,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -521,11 +508,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
} }
inline_bytes -= ret; inline_bytes -= ret;
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
inline_bytes); inline_bytes);
svc_rdma_put_req_map(vec);
dprintk("svcrdma: send_reply returns %d\n", ret); dprintk("svcrdma: send_reply returns %d\n", ret);
return ret; return ret;
error: error:
svc_rdma_put_req_map(vec);
svc_rdma_put_context(ctxt, 0); svc_rdma_put_context(ctxt, 0);
put_page(res_page); put_page(res_page);
return ret; return ret;
......
...@@ -387,10 +387,13 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) ...@@ -387,10 +387,13 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
switch (ctxt->wr_op) { switch (ctxt->wr_op) {
case IB_WR_SEND: case IB_WR_SEND:
case IB_WR_RDMA_WRITE:
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
break; break;
case IB_WR_RDMA_WRITE:
svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ: case IB_WR_RDMA_READ:
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment