Commit e509be89 authored by Ralph Campbell's avatar Ralph Campbell Committed by Roland Dreier

IB/ipath: Fix many locking issues when switching to error state

The send DMA hardware queue voided a number of prior assumptions about
when a send is complete which led to completions being generated out of
order.  There were also a number of locking issues when switching the QP
to the error or reset states, and we implement the IB_QPS_SQD state.
Signed-off-by: default avatarRalph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 53dc1ca1
......@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
{
struct ipath_qp *q, **qpp;
unsigned long flags;
int fnd = 0;
spin_lock_irqsave(&qpt->lock, flags);
......@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
*qpp = qp->next;
qp->next = NULL;
atomic_dec(&qp->refcount);
fnd = 1;
break;
}
}
spin_unlock_irqrestore(&qpt->lock, flags);
if (!fnd)
return;
free_qpn(qpt, qp->ibqp.qp_num);
wait_event(qp->wait, !atomic_read(&qp->refcount));
}
/**
* ipath_free_all_qps - remove all QPs from the table
* ipath_free_all_qps - check for QPs still in use
* @qpt: the QP table to empty
*
* There should not be any QPs still in use.
* Free memory for table.
*/
void ipath_free_all_qps(struct ipath_qp_table *qpt)
unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
{
unsigned long flags;
struct ipath_qp *qp, *nqp;
u32 n;
struct ipath_qp *qp;
u32 n, qp_inuse = 0;
spin_lock_irqsave(&qpt->lock, flags);
for (n = 0; n < qpt->max; n++) {
spin_lock_irqsave(&qpt->lock, flags);
qp = qpt->table[n];
qpt->table[n] = NULL;
spin_unlock_irqrestore(&qpt->lock, flags);
while (qp) {
nqp = qp->next;
free_qpn(qpt, qp->ibqp.qp_num);
if (!atomic_dec_and_test(&qp->refcount) ||
!ipath_destroy_qp(&qp->ibqp))
ipath_dbg("QP memory leak!\n");
qp = nqp;
}
for (; qp; qp = qp->next)
qp_inuse++;
}
spin_unlock_irqrestore(&qpt->lock, flags);
for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
if (qpt->map[n].page)
free_page((unsigned long)qpt->map[n].page);
}
free_page((unsigned long) qpt->map[n].page);
return qp_inuse;
}
/**
......@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
qp->s_busy = 0;
atomic_set(&qp->s_dma_busy, 0);
qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_wqe = NULL;
qp->s_pkt_delay = 0;
qp->s_draining = 0;
qp->s_psn = 0;
qp->r_psn = 0;
qp->r_msn = 0;
......@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
qp->r_wrid_valid = 0;
qp->r_aflags = 0;
qp->r_flags = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
......@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
qp->s_wait_credit = 0;
memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0;
......@@ -370,7 +359,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
}
qp->r_reuse_sge = 0;
}
/**
......@@ -402,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
/* Schedule the sending tasklet to drain the send work queue. */
if (qp->s_last != qp->s_head)
ipath_schedule_send(qp);
memset(&wc, 0, sizeof(wc));
wc.qp = &qp->ibqp;
wc.src_qp = 0;
wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = 0;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
if (qp->r_wrid_valid) {
qp->r_wrid_valid = 0;
wc.opcode = IB_WC_RECV;
if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
wc.wr_id = qp->r_wr_id;
wc.opcode = IB_WC_RECV;
wc.status = err;
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
}
wc.status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
wc.wr_id = wqe->wr.wr_id;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
}
qp->s_cur = qp->s_tail = qp->s_head;
qp->s_hdrwords = 0;
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
if (qp->r_rq.wq) {
struct ipath_rwq *wq;
u32 head;
......@@ -450,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
tail = wq->tail;
if (tail >= qp->r_rq.size)
tail = 0;
wc.opcode = IB_WC_RECV;
while (tail != head) {
wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
if (++tail >= qp->r_rq.size)
......@@ -482,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_qp *qp = to_iqp(ibqp);
enum ib_qp_state cur_state, new_state;
unsigned long flags;
int lastwqe = 0;
int ret;
spin_lock_irqsave(&qp->s_lock, flags);
spin_lock_irq(&qp->s_lock);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
......@@ -539,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
switch (new_state) {
case IB_QPS_RESET:
if (qp->state != IB_QPS_RESET) {
qp->state = IB_QPS_RESET;
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
qp->s_flags &= ~IPATH_S_ANY_WAIT;
spin_unlock_irq(&qp->s_lock);
/* Stop the sending tasklet */
tasklet_kill(&qp->s_task);
wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
spin_lock_irq(&qp->s_lock);
}
ipath_reset_qp(qp, ibqp->qp_type);
break;
case IB_QPS_SQD:
qp->s_draining = qp->s_last != qp->s_cur;
qp->state = new_state;
break;
case IB_QPS_SQE:
if (qp->ibqp.qp_type == IB_QPT_RC)
goto inval;
qp->state = new_state;
break;
case IB_QPS_ERR:
lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
qp->state = new_state;
break;
}
if (attr_mask & IB_QP_PKEY_INDEX)
......@@ -601,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
qp->s_max_rd_atomic = attr->max_rd_atomic;
qp->state = new_state;
spin_unlock_irqrestore(&qp->s_lock, flags);
spin_unlock_irq(&qp->s_lock);
if (lastwqe) {
struct ib_event ev;
......@@ -616,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto bail;
inval:
spin_unlock_irqrestore(&qp->s_lock, flags);
spin_unlock_irq(&qp->s_lock);
ret = -EINVAL;
bail:
......@@ -647,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->pkey_index = qp->s_pkey_index;
attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0;
attr->sq_draining = 0;
attr->sq_draining = qp->s_draining;
attr->max_rd_atomic = qp->s_max_rd_atomic;
attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
attr->min_rnr_timer = qp->r_min_rnr_timer;
......@@ -837,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
init_waitqueue_head(&qp->wait_dma);
tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
INIT_LIST_HEAD(&qp->piowait);
INIT_LIST_HEAD(&qp->timerwait);
......@@ -930,6 +924,7 @@ bail_ip:
else
vfree(qp->r_rq.wq);
ipath_free_qp(&dev->qp_table, qp);
free_qpn(&dev->qp_table, qp->ibqp.qp_num);
bail_qp:
kfree(qp);
bail_swq:
......@@ -951,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device);
unsigned long flags;
spin_lock_irqsave(&qp->s_lock, flags);
qp->state = IB_QPS_ERR;
spin_unlock_irqrestore(&qp->s_lock, flags);
spin_lock(&dev->n_qps_lock);
dev->n_qps_allocated--;
spin_unlock(&dev->n_qps_lock);
/* Make sure HW and driver activity is stopped. */
spin_lock_irq(&qp->s_lock);
if (qp->state != IB_QPS_RESET) {
qp->state = IB_QPS_RESET;
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
qp->s_flags &= ~IPATH_S_ANY_WAIT;
spin_unlock_irq(&qp->s_lock);
/* Stop the sending tasklet */
tasklet_kill(&qp->s_task);
wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
} else
spin_unlock_irq(&qp->s_lock);
/* Stop the sending tasklet. */
tasklet_kill(&qp->s_task);
ipath_free_qp(&dev->qp_table, qp);
if (qp->s_tx) {
atomic_dec(&qp->refcount);
if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
kfree(qp->s_tx->txreq.map_addr);
spin_lock_irq(&dev->pending_lock);
list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
spin_unlock_irq(&dev->pending_lock);
qp->s_tx = NULL;
}
/* Make sure the QP isn't on the timeout list. */
spin_lock_irqsave(&dev->pending_lock, flags);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
if (qp->s_tx)
list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
spin_unlock_irqrestore(&dev->pending_lock, flags);
wait_event(qp->wait, !atomic_read(&qp->refcount));
/*
* Make sure that the QP is not in the QPN table so receive
* interrupts will discard packets for this QP. XXX Also remove QP
* from multicast table.
*/
if (atomic_read(&qp->refcount) != 0)
ipath_free_qp(&dev->qp_table, qp);
/* all user's cleaned up, mark it available */
free_qpn(&dev->qp_table, qp->ibqp.qp_num);
spin_lock(&dev->n_qps_lock);
dev->n_qps_allocated--;
spin_unlock(&dev->n_qps_lock);
if (qp->ip)
kref_put(&qp->ip->ref, ipath_release_mmap_info);
......@@ -1055,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
}
/* Restart sending if it was blocked due to lack of credits. */
if (qp->s_cur != qp->s_head &&
if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
qp->s_cur != qp->s_head &&
(qp->s_lsn == (u32) -1 ||
ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
qp->s_lsn + 1) <= 0))
tasklet_hi_schedule(&qp->s_task);
ipath_schedule_send(qp);
}
This diff is collapsed.
......@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
* @qp: the QP
*
* Called with the QP s_lock held and interrupts disabled.
* XXX Use a simple list for now. We might need a priority
* queue if we have lots of QPs waiting for RNR timeouts
* but that should be rare.
......@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
void ipath_insert_rnr_queue(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
spin_lock_irqsave(&dev->pending_lock, flags);
/* We already did a spin_lock_irqsave(), so just use spin_lock */
spin_lock(&dev->pending_lock);
if (list_empty(&dev->rnrwait))
list_add(&qp->timerwait, &dev->rnrwait);
else {
......@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
nqp->s_rnr_timeout -= qp->s_rnr_timeout;
list_add(&qp->timerwait, l);
}
spin_unlock_irqrestore(&dev->pending_lock, flags);
spin_unlock(&dev->pending_lock);
}
/**
......@@ -185,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
spin_lock_irqsave(&rq->lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
ret = 0;
goto unlock;
}
wq = rq->wq;
tail = wq->tail;
/* Validate tail before using it since it is user writable. */
......@@ -192,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
tail = 0;
do {
if (unlikely(tail == wq->head)) {
spin_unlock_irqrestore(&rq->lock, flags);
ret = 0;
goto bail;
goto unlock;
}
/* Make sure entry is read after head index is read. */
smp_rmb();
......@@ -207,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wq->tail = tail;
ret = 1;
qp->r_wrid_valid = 1;
set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
......@@ -234,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
goto bail;
}
}
unlock:
spin_unlock_irqrestore(&rq->lock, flags);
bail:
return ret;
}
......@@ -263,35 +268,59 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
atomic64_t *maddr;
enum ib_wc_status send_status;
/*
* Note that we check the responder QP state after
* checking the requester's state.
*/
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
if (!qp) {
dev->n_pkt_drops++;
return;
}
again:
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
sqp->s_rnr_timeout) {
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
/* Return if we are already busy processing a work request. */
if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
goto unlock;
/* Get the next send request. */
if (sqp->s_last == sqp->s_head) {
/* Send work queue is empty. */
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
sqp->s_flags |= IPATH_S_BUSY;
again:
if (sqp->s_last == sqp->s_head)
goto clr_busy;
wqe = get_swqe_ptr(sqp, sqp->s_last);
/* Return if it is not OK to start a new work reqeust. */
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
goto clr_busy;
/* We are in the error state, flush the work request. */
send_status = IB_WC_WR_FLUSH_ERR;
goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
* We increment s_cur to indicate s_last is in progress.
*/
wqe = get_swqe_ptr(sqp, sqp->s_last);
if (sqp->s_last == sqp->s_cur) {
if (++sqp->s_cur >= sqp->s_size)
sqp->s_cur = 0;
}
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++;
/*
* For RC, the requester would timeout and retry so
* shortcut the timeouts and just signal too many retries.
*/
if (sqp->ibqp.qp_type == IB_QPT_RC)
send_status = IB_WC_RETRY_EXC_ERR;
else
send_status = IB_WC_SUCCESS;
goto serr;
}
memset(&wc, 0, sizeof wc);
send_status = IB_WC_SUCCESS;
......@@ -396,8 +425,7 @@ again:
sqp->s_len -= len;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
wqe->wr.opcode == IB_WR_RDMA_READ)
if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
......@@ -417,6 +445,8 @@ again:
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_lock_irqsave(&sqp->s_lock, flags);
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
ipath_send_complete(sqp, wqe, send_status);
goto again;
......@@ -437,11 +467,12 @@ rnr_nak:
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
goto unlock;
goto clr_busy;
sqp->s_flags |= IPATH_S_WAITING;
dev->n_rnr_naks++;
sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto unlock;
goto clr_busy;
inv_err:
send_status = IB_WC_REM_INV_REQ_ERR;
......@@ -473,17 +504,19 @@ serr:
}
goto done;
}
clr_busy:
sqp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
if (atomic_dec_and_test(&qp->refcount))
if (qp && atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
{
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
qp->ibqp.qp_type == IB_QPT_SMI) {
qp->ibqp.qp_type == IB_QPT_SMI) {
unsigned long flags;
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
......@@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
* @dev: the device we ran out of buffers on
*
* Called when we run out of PIO buffers.
* If we are now in the error state, return zero to flush the
* send work request.
*/
static void ipath_no_bufs_available(struct ipath_qp *qp,
static int ipath_no_bufs_available(struct ipath_qp *qp,
struct ipath_ibdev *dev)
{
unsigned long flags;
int ret = 1;
/*
* Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
* could be called. If we are still in the tasklet function,
* tasklet_hi_schedule() will not call us until the next time
* tasklet_hi_schedule() is called.
* We leave the busy flag set so that another post send doesn't
* try to put the same QP on the piowait list again.
* could be called. Therefore, put QP on the piowait list before
* enabling the PIO avail interrupt.
*/
spin_lock_irqsave(&dev->pending_lock, flags);
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock_irqrestore(&dev->pending_lock, flags);
want_buffer(dev->dd, qp);
dev->n_piowait++;
spin_lock_irqsave(&qp->s_lock, flags);
if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
dev->n_piowait++;
qp->s_flags |= IPATH_S_WAITING;
qp->s_flags &= ~IPATH_S_BUSY;
spin_lock(&dev->pending_lock);
if (list_empty(&qp->piowait))
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock(&dev->pending_lock);
} else
ret = 0;
spin_unlock_irqrestore(&qp->s_lock, flags);
if (ret)
want_buffer(dev->dd, qp);
return ret;
}
/**
......@@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data)
struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int (*make_req)(struct ipath_qp *qp);
if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
goto bail;
unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
ipath_ruc_loopback(qp);
goto clear;
goto bail;
}
if (qp->ibqp.qp_type == IB_QPT_RC)
......@@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data)
else
make_req = ipath_make_ud_req;
spin_lock_irqsave(&qp->s_lock, flags);
/* Return if we are already busy processing a work request. */
if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
goto bail;
}
qp->s_flags |= IPATH_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, flags);
again:
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
......@@ -623,8 +677,8 @@ again:
*/
if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size)) {
ipath_no_bufs_available(qp, dev);
goto bail;
if (ipath_no_bufs_available(qp, dev))
goto bail;
}
dev->n_unicast_xmit++;
/* Record that we sent the packet and s_hdr is empty. */
......@@ -633,16 +687,20 @@ again:
if (make_req(qp))
goto again;
clear:
clear_bit(IPATH_S_BUSY, &qp->s_busy);
bail:;
}
/*
* This should be called with s_lock held.
*/
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
enum ib_wc_status status)
{
unsigned long flags;
u32 last;
u32 old_last, last;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
return;
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
......@@ -661,10 +719,14 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
status != IB_WC_SUCCESS);
}
spin_lock_irqsave(&qp->s_lock, flags);
last = qp->s_last;
old_last = last = qp->s_last;
if (++last >= qp->s_size)
last = 0;
qp->s_last = last;
spin_unlock_irqrestore(&qp->s_lock, flags);
if (qp->s_cur == old_last)
qp->s_cur = last;
if (qp->s_tail == old_last)
qp->s_tail = last;
if (qp->state == IB_QPS_SQD && last == qp->s_cur)
qp->s_draining = 0;
}
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......@@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
struct ipath_swqe *wqe;
unsigned long flags;
u32 hwords;
u32 bth0;
u32 len;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
int ret = 0;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
......@@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp)
qp->s_wqe = NULL;
switch (qp->s_state) {
default:
if (!(ib_ipath_state_ops[qp->state] &
IPATH_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */
if (qp->s_cur == qp->s_head)
goto done;
goto bail;
/*
* Start a new request.
*/
......@@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
break;
default:
goto done;
goto bail;
}
break;
......@@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp)
ipath_make_ruc_header(to_idev(qp->ibqp.device),
qp, ohdr, bth0 | (qp->s_state << 24),
qp->s_next_psn++ & IPATH_PSN_MASK);
done:
ret = 1;
goto unlock;
done:
bail:
qp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
......@@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
wc.imm_data = 0;
wc.wc_flags = 0;
memset(&wc, 0, sizeof wc);
/* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
......@@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
if (qp->r_reuse_sge) {
qp->r_reuse_sge = 0;
if (qp->r_flags & IPATH_R_REUSE_SGE) {
qp->r_flags &= ~IPATH_R_REUSE_SGE;
qp->r_sge = qp->s_rdma_read_sge;
} else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++;
......@@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) {
qp->r_reuse_sge = 1;
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) {
qp->r_reuse_sge = 1;
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
......@@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
qp->r_reuse_sge = 1;
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
......@@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) {
qp->r_reuse_sge = 1;
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
......@@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
......@@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
dev->n_pkt_drops++;
goto done;
}
if (qp->r_reuse_sge)
qp->r_reuse_sge = 0;
if (qp->r_flags & IPATH_R_REUSE_SGE)
qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 1)) {
dev->n_pkt_drops++;
goto done;
......
......@@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
u32 length;
qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
if (!qp) {
if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++;
goto send_comp;
goto done;
}
rsge.sg_list = NULL;
......@@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
* present on the wire.
*/
length = swqe->length;
memset(&wc, 0, sizeof wc);
wc.byte_len = length + sizeof(struct ib_grh);
if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = swqe->wr.ex.imm_data;
} else {
wc.wc_flags = 0;
wc.imm_data = 0;
}
/*
......@@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
}
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
......@@ -248,8 +245,7 @@ drop:
kfree(rsge.sg_list);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
send_comp:
ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
done:;
}
/**
......@@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
struct ipath_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct ipath_swqe *wqe;
unsigned long flags;
u32 nwords;
u32 extra_bytes;
u32 bth0;
......@@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp)
u16 lid;
int ret = 0;
if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)))
goto bail;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
if (qp->s_cur == qp->s_head)
goto bail;
wqe = get_swqe_ptr(qp, qp->s_cur);
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
/* Construct the header. */
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
......@@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp)
dev->n_unicast_xmit++;
} else {
dev->n_unicast_xmit++;
lid = ah_attr->dlid &
~((1 << dev->dd->ipath_lmc) - 1);
lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
if (unlikely(lid == dev->dd->ipath_lid)) {
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
* it would be out of order.
* XXX Instead of waiting, we could queue a
* zero length descriptor so we get a callback.
*/
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
}
......@@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp)
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
ret = 1;
goto unlock;
bail:
qp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
......@@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/*
* Get the next work request entry to find where to put the data.
*/
if (qp->r_reuse_sge)
qp->r_reuse_sge = 0;
if (qp->r_flags & IPATH_R_REUSE_SGE)
qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 0)) {
/*
* Count VL15 packets dropped due to no receive buffer.
......@@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
/* Silently drop packets which are too big. */
if (wc.byte_len > qp->r_len) {
qp->r_reuse_sge = 1;
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto bail;
}
......@@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
qp->r_wrid_valid = 0;
if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto bail;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
......
......@@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq);
int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq,
u32 counter);
void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq);
......
This diff is collapsed.
......@@ -74,6 +74,11 @@
#define IPATH_POST_RECV_OK 0x02
#define IPATH_PROCESS_RECV_OK 0x04
#define IPATH_PROCESS_SEND_OK 0x08
#define IPATH_PROCESS_NEXT_SEND_OK 0x10
#define IPATH_FLUSH_SEND 0x20
#define IPATH_FLUSH_RECV 0x40
#define IPATH_PROCESS_OR_FLUSH_SEND \
(IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
......@@ -353,12 +358,14 @@ struct ipath_qp {
struct ib_qp ibqp;
struct ipath_qp *next; /* link list for QPN hash table */
struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
struct list_head piowait; /* link for wait PIO buf */
struct list_head timerwait; /* link for waiting for timeouts */
struct ib_ah_attr remote_ah_attr;
struct ipath_ib_header s_hdr; /* next packet header to send */
atomic_t refcount;
wait_queue_head_t wait;
wait_queue_head_t wait_dma;
struct tasklet_struct s_task;
struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
......@@ -369,7 +376,7 @@ struct ipath_qp {
struct ipath_sge_state s_rdma_read_sge;
struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock;
unsigned long s_busy;
atomic_t s_dma_busy;
u16 s_pkt_delay;
u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */
......@@ -383,6 +390,7 @@ struct ipath_qp {
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
unsigned long r_aflags;
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
......@@ -394,8 +402,7 @@ struct ipath_qp {
u8 r_state; /* opcode of last packet received */
u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
u8 r_flags;
u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 qp_access_flags;
......@@ -404,13 +411,13 @@ struct ipath_qp {
u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
u8 s_flags;
u8 s_dmult;
u8 s_draining;
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
u32 remote_qpn;
......@@ -428,16 +435,39 @@ struct ipath_qp {
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
/* Bit definition for s_busy. */
#define IPATH_S_BUSY 0
/*
* Atomic bit definitions for r_aflags.
*/
#define IPATH_R_WRID_VALID 0
/*
* Bit definitions for r_flags.
*/
#define IPATH_R_REUSE_SGE 0x01
/*
* Bit definitions for s_flags.
*
* IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
* before processing the next SWQE
* IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
* before processing the next SWQE
* IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
* IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
* IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
* next send completion entry not via send DMA.
*/
#define IPATH_S_SIGNAL_REQ_WR 0x01
#define IPATH_S_FENCE_PENDING 0x02
#define IPATH_S_RDMAR_PENDING 0x04
#define IPATH_S_ACK_PENDING 0x08
#define IPATH_S_BUSY 0x10
#define IPATH_S_WAITING 0x20
#define IPATH_S_WAIT_SSN_CREDIT 0x40
#define IPATH_S_WAIT_DMA 0x80
#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
#define IPATH_PSN_CREDIT 512
......@@ -573,13 +603,11 @@ struct ipath_ibdev {
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
u32 n_rc_stalls;
u32 n_pkt_drops;
u32 n_vl15_dropped;
u32 n_wqe_errs;
u32 n_rdma_dup_busy;
u32 n_piowait;
u32 n_no_piobuf;
u32 n_unaligned;
u32 port_cap_flags;
u32 pma_sample_start;
......@@ -657,6 +685,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
return container_of(ibdev, struct ipath_ibdev, ibdev);
}
/*
* This must be called with s_lock held.
*/
static inline void ipath_schedule_send(struct ipath_qp *qp)
{
if (qp->s_flags & IPATH_S_ANY_WAIT)
qp->s_flags &= ~IPATH_S_ANY_WAIT;
if (!(qp->s_flags & IPATH_S_BUSY))
tasklet_hi_schedule(&qp->s_task);
}
int ipath_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
......@@ -706,7 +745,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
void ipath_free_all_qps(struct ipath_qp_table *qpt);
unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment