Commit d40ace0c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-2.6.26' of git://linux-nfs.org/~bfields/linux

* 'for-2.6.26' of git://linux-nfs.org/~bfields/linux: (25 commits)
  svcrdma: Verify read-list fits within RPCSVC_MAXPAGES
  svcrdma: Change svc_rdma_send_error return type to void
  svcrdma: Copy transport address and arm CQ before calling rdma_accept
  svcrdma: Set rqstp transport address in rdma_read_complete function
  svcrdma: Use ib verbs version of dma_unmap
  svcrdma: Cleanup queued, but unprocessed I/O in svc_rdma_free
  svcrdma: Move the QP and cm_id destruction to svc_rdma_free
  svcrdma: Add reference for each SQ/RQ WR
  svcrdma: Move destroy to kernel thread
  svcrdma: Shrink scope of spinlock on RQ CQ
  svcrdma: Use standard Linux lists for context cache
  svcrdma: Simplify RDMA_READ deferral buffer management
  svcrdma: Remove unused READ_DONE context flags bit
  svcrdma: Return error from rdma_read_xdr so caller knows to free context
  svcrdma: Fix error handling during listening endpoint creation
  svcrdma: Free context on post_recv error in send_reply
  svcrdma: Free context on ib_post_recv error
  svcrdma: Add put of connection ESTABLISHED reference in rdma_cma_handler
  svcrdma: Fix return value in svc_rdma_send
  svcrdma: Fix race with dto_tasklet in svc_rdma_send
  ...
parents e616c630 68432a03
...@@ -419,9 +419,9 @@ static int do_probe_callback(void *data) ...@@ -419,9 +419,9 @@ static int do_probe_callback(void *data)
out_release_client: out_release_client:
rpc_shutdown_client(client); rpc_shutdown_client(client);
out_err: out_err:
put_nfs4_client(clp);
dprintk("NFSD: warning: no callback path to client %.*s\n", dprintk("NFSD: warning: no callback path to client %.*s\n",
(int)clp->cl_name.len, clp->cl_name.data); (int)clp->cl_name.len, clp->cl_name.data);
put_nfs4_client(clp);
return status; return status;
} }
......
...@@ -71,7 +71,8 @@ extern atomic_t rdma_stat_sq_prod; ...@@ -71,7 +71,8 @@ extern atomic_t rdma_stat_sq_prod;
* completes. * completes.
*/ */
struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt {
struct svc_rdma_op_ctxt *next; struct svc_rdma_op_ctxt *read_hdr;
struct list_head free_list;
struct xdr_buf arg; struct xdr_buf arg;
struct list_head dto_q; struct list_head dto_q;
enum ib_wr_opcode wr_op; enum ib_wr_opcode wr_op;
...@@ -85,7 +86,6 @@ struct svc_rdma_op_ctxt { ...@@ -85,7 +86,6 @@ struct svc_rdma_op_ctxt {
struct page *pages[RPCSVC_MAXPAGES]; struct page *pages[RPCSVC_MAXPAGES];
}; };
#define RDMACTXT_F_READ_DONE 1
#define RDMACTXT_F_LAST_CTXT 2 #define RDMACTXT_F_LAST_CTXT 2
struct svcxprt_rdma { struct svcxprt_rdma {
...@@ -104,7 +104,8 @@ struct svcxprt_rdma { ...@@ -104,7 +104,8 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd; struct ib_pd *sc_pd;
struct svc_rdma_op_ctxt *sc_ctxt_head; atomic_t sc_ctxt_used;
struct list_head sc_ctxt_free;
int sc_ctxt_cnt; int sc_ctxt_cnt;
int sc_ctxt_bump; int sc_ctxt_bump;
int sc_ctxt_max; int sc_ctxt_max;
...@@ -123,6 +124,7 @@ struct svcxprt_rdma { ...@@ -123,6 +124,7 @@ struct svcxprt_rdma {
struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */
struct list_head sc_read_complete_q; struct list_head sc_read_complete_q;
spinlock_t sc_read_complete_lock; spinlock_t sc_read_complete_lock;
struct work_struct sc_work;
}; };
/* sc_flags */ /* sc_flags */
#define RDMAXPRT_RQ_PENDING 1 #define RDMAXPRT_RQ_PENDING 1
...@@ -164,8 +166,8 @@ extern int svc_rdma_sendto(struct svc_rqst *); ...@@ -164,8 +166,8 @@ extern int svc_rdma_sendto(struct svc_rqst *);
/* svc_rdma_transport.c */ /* svc_rdma_transport.c */
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
enum rpcrdma_errcode); enum rpcrdma_errcode);
struct page *svc_rdma_get_page(void); struct page *svc_rdma_get_page(void);
extern int svc_rdma_post_recv(struct svcxprt_rdma *); extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
......
...@@ -6,30 +6,9 @@ ...@@ -6,30 +6,9 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/file.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/tcp_states.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svc_xprt.h>
...@@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) ...@@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
if (!(xprt->xpt_flags & if (!(xprt->xpt_flags &
((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
return; return;
if (test_bit(XPT_DEAD, &xprt->xpt_flags))
return;
cpu = get_cpu(); cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu); pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
......
...@@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m, ...@@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m,
dom = im->m_client->h.name; dom = im->m_client->h.name;
if (ipv6_addr_v4mapped(&addr)) { if (ipv6_addr_v4mapped(&addr)) {
seq_printf(m, "%s" NIPQUAD_FMT "%s\n", seq_printf(m, "%s " NIPQUAD_FMT " %s\n",
im->m_class, im->m_class,
ntohl(addr.s6_addr32[3]) >> 24 & 0xff, ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
ntohl(addr.s6_addr32[3]) >> 16 & 0xff, ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
...@@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m, ...@@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m,
ntohl(addr.s6_addr32[3]) >> 0 & 0xff, ntohl(addr.s6_addr32[3]) >> 0 & 0xff,
dom); dom);
} else { } else {
seq_printf(m, "%s" NIP6_FMT "%s\n", seq_printf(m, "%s " NIP6_FMT " %s\n",
im->m_class, NIP6(addr), dom); im->m_class, NIP6(addr), dom);
} }
return 0; return 0;
......
...@@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) ...@@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
* On our side, we need to read into a pagelist. The first page immediately * On our side, we need to read into a pagelist. The first page immediately
* follows the RPC header. * follows the RPC header.
* *
* This function returns 1 to indicate success. The data is not yet in * This function returns:
* 0 - No error and no read-list found.
*
* 1 - Successful read-list processing. The data is not yet in
* the pagelist and therefore the RPC request must be deferred. The * the pagelist and therefore the RPC request must be deferred. The
* I/O completion will enqueue the transport again and * I/O completion will enqueue the transport again and
* svc_rdma_recvfrom will complete the request. * svc_rdma_recvfrom will complete the request.
* *
* <0 - Error processing/posting read-list.
*
* NOTE: The ctxt must not be touched after the last WR has been posted * NOTE: The ctxt must not be touched after the last WR has been posted
* because the I/O completion processing may occur on another * because the I/O completion processing may occur on another
* processor and free / modify the context. Ne touche pas! * processor and free / modify the context. Ne touche pas!
...@@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, ...@@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
u64 sgl_offset; u64 sgl_offset;
struct rpcrdma_read_chunk *ch; struct rpcrdma_read_chunk *ch;
struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *ctxt = NULL;
struct svc_rdma_op_ctxt *head;
struct svc_rdma_op_ctxt *tmp_sge_ctxt; struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct svc_rdma_op_ctxt *tmp_ch_ctxt; struct svc_rdma_op_ctxt *tmp_ch_ctxt;
struct chunk_sge *ch_sge_ary; struct chunk_sge *ch_sge_ary;
...@@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, ...@@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL;
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
sge, ch_sge_ary, sge, ch_sge_ary,
ch_count, byte_count); ch_count, byte_count);
head = svc_rdma_get_context(xprt);
sgl_offset = 0; sgl_offset = 0;
ch_no = 0; ch_no = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch->rc_discrim != 0; ch++, ch_no++) { ch->rc_discrim != 0; ch++, ch_no++) {
next_sge: next_sge:
if (!ctxt) ctxt = svc_rdma_get_context(xprt);
ctxt = head;
else {
ctxt->next = svc_rdma_get_context(xprt);
ctxt = ctxt->next;
}
ctxt->next = NULL;
ctxt->direction = DMA_FROM_DEVICE; ctxt->direction = DMA_FROM_DEVICE;
clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
/* Prepare READ WR */ /* Prepare READ WR */
...@@ -347,20 +345,15 @@ next_sge: ...@@ -347,20 +345,15 @@ next_sge:
* the client and the RPC needs to be enqueued. * the client and the RPC needs to be enqueued.
*/ */
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
ctxt->next = hdr_ctxt; ctxt->read_hdr = hdr_ctxt;
hdr_ctxt->next = head;
} }
/* Post the read */ /* Post the read */
err = svc_rdma_send(xprt, &read_wr); err = svc_rdma_send(xprt, &read_wr);
if (err) { if (err) {
printk(KERN_ERR "svcrdma: Error posting send = %d\n", printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
err); err);
/* set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
* Break the circular list so free knows when svc_rdma_put_context(ctxt, 0);
* to stop if the error happened to occur on
* the last read
*/
ctxt->next = NULL;
goto out; goto out;
} }
atomic_inc(&rdma_stat_read); atomic_inc(&rdma_stat_read);
...@@ -371,7 +364,7 @@ next_sge: ...@@ -371,7 +364,7 @@ next_sge:
goto next_sge; goto next_sge;
} }
sgl_offset = 0; sgl_offset = 0;
err = 0; err = 1;
} }
out: out:
...@@ -389,25 +382,12 @@ next_sge: ...@@ -389,25 +382,12 @@ next_sge:
while (rqstp->rq_resused) while (rqstp->rq_resused)
rqstp->rq_respages[--rqstp->rq_resused] = NULL; rqstp->rq_respages[--rqstp->rq_resused] = NULL;
if (err) { return err;
printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
/* Free the linked list of read contexts */
while (head != NULL) {
ctxt = head->next;
svc_rdma_put_context(head, 1);
head = ctxt;
}
return 0;
}
return 1;
} }
static int rdma_read_complete(struct svc_rqst *rqstp, static int rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *data) struct svc_rdma_op_ctxt *head)
{ {
struct svc_rdma_op_ctxt *head = data->next;
int page_no; int page_no;
int ret; int ret;
...@@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ...@@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_arg.len = head->arg.len; rqstp->rq_arg.len = head->arg.len;
rqstp->rq_arg.buflen = head->arg.buflen; rqstp->rq_arg.buflen = head->arg.buflen;
/* Free the context */
svc_rdma_put_context(head, 0);
/* XXX: What should this be? */ /* XXX: What should this be? */
rqstp->rq_prot = IPPROTO_MAX; rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
/*
* Free the contexts we used to build the RDMA_READ. We have
* to be careful here because the context list uses the same
* next pointer used to chain the contexts associated with the
* RDMA_READ
*/
data->next = NULL; /* terminate circular list */
do {
data = head->next;
svc_rdma_put_context(head, 0);
head = data;
} while (head != NULL);
ret = rqstp->rq_arg.head[0].iov_len ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len + rqstp->rq_arg.page_len
...@@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ...@@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len); rqstp->rq_arg.head[0].iov_len);
/* Indicate that we've consumed an RQ credit */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
svc_xprt_received(rqstp->rq_xprt); svc_xprt_received(rqstp->rq_xprt);
return ret; return ret;
} }
...@@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
dprintk("svcrdma: rqstp=%p\n", rqstp); dprintk("svcrdma: rqstp=%p\n", rqstp);
/*
* The rq_xprt_ctxt indicates if we've consumed an RQ credit
* or not. It is used in the rdma xpo_release_rqst function to
* determine whether or not to return an RQ WQE to the RQ.
*/
rqstp->rq_xprt_ctxt = NULL;
spin_lock_bh(&rdma_xprt->sc_read_complete_lock); spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) { if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
...@@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
/* If the request is invalid, reply with an error */ /* If the request is invalid, reply with an error */
if (len < 0) { if (len < 0) {
if (len == -ENOSYS) if (len == -ENOSYS)
(void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
goto close_out; goto close_out;
} }
/* Read read-list data. If we would need to wait, defer /* Read read-list data. */
* it. Not that in this case, we don't return the RQ credit ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
* until after the read completes. if (ret > 0) {
*/ /* read-list posted, defer until data received from client. */
if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
svc_xprt_received(xprt); svc_xprt_received(xprt);
return 0; return 0;
} }
if (ret < 0) {
/* Indicate we've consumed an RQ credit */ /* Post of read-list failed, free context. */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt; svc_rdma_put_context(ctxt, 1);
return 0;
}
ret = rqstp->rq_arg.head[0].iov_len ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len + rqstp->rq_arg.page_len
...@@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
return ret; return ret;
close_out: close_out:
if (ctxt) { if (ctxt)
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
/* Indicate we've consumed an RQ credit */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
}
dprintk("svcrdma: transport %p is closing\n", xprt); dprintk("svcrdma: transport %p is closing\n", xprt);
/* /*
* Set the close bit and enqueue it. svc_recv will see the * Set the close bit and enqueue it. svc_recv will see the
......
...@@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma, ...@@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
int page_no; int page_no;
int ret; int ret;
/* Post a recv buffer to handle another request. */
ret = svc_rdma_post_recv(rdma);
if (ret) {
printk(KERN_INFO
"svcrdma: could not post a receive buffer, err=%d."
"Closing transport %p.\n", ret, rdma);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 0);
return -ENOTCONN;
}
/* Prepare the context */ /* Prepare the context */
ctxt->pages[0] = page; ctxt->pages[0] = page;
ctxt->count = 1; ctxt->count = 1;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment