Commit f56bcd80 authored by Eli Cohen's avatar Eli Cohen Committed by Roland Dreier

IPoIB: Use separate CQ for UD send completions

Use a dedicated CQ for UD send completions. Also, do not arm the UD
send CQ, which reduces the number of interrupts generated.  This patch
farther reduces overhead by not calling poll CQ for every posted send
WR -- it does polls only when there 16 or more outstanding work requests.
Signed-off-by: default avatarEli Cohen <eli@mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 87528227
...@@ -95,6 +95,8 @@ enum { ...@@ -95,6 +95,8 @@ enum {
IPOIB_MCAST_FLAG_SENDONLY = 1, IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
IPOIB_MCAST_FLAG_ATTACHED = 3, IPOIB_MCAST_FLAG_ATTACHED = 3,
MAX_SEND_CQE = 16,
}; };
#define IPOIB_OP_RECV (1ul << 31) #define IPOIB_OP_RECV (1ul << 31)
...@@ -285,7 +287,8 @@ struct ipoib_dev_priv { ...@@ -285,7 +287,8 @@ struct ipoib_dev_priv {
u16 pkey_index; u16 pkey_index;
struct ib_pd *pd; struct ib_pd *pd;
struct ib_mr *mr; struct ib_mr *mr;
struct ib_cq *cq; struct ib_cq *recv_cq;
struct ib_cq *send_cq;
struct ib_qp *qp; struct ib_qp *qp;
u32 qkey; u32 qkey;
...@@ -305,6 +308,7 @@ struct ipoib_dev_priv { ...@@ -305,6 +308,7 @@ struct ipoib_dev_priv {
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_send_wr tx_wr; struct ib_send_wr tx_wr;
unsigned tx_outstanding; unsigned tx_outstanding;
struct ib_wc send_wc[MAX_SEND_CQE];
struct ib_recv_wr rx_wr; struct ib_recv_wr rx_wr;
struct ib_sge rx_sge[IPOIB_UD_RX_SG]; struct ib_sge rx_sge[IPOIB_UD_RX_SG];
...@@ -662,7 +666,6 @@ static inline int ipoib_register_debugfs(void) { return 0; } ...@@ -662,7 +666,6 @@ static inline int ipoib_register_debugfs(void) { return 0; }
static inline void ipoib_unregister_debugfs(void) { } static inline void ipoib_unregister_debugfs(void) { }
#endif #endif
#define ipoib_printk(level, priv, format, arg...) \ #define ipoib_printk(level, priv, format, arg...) \
printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
#define ipoib_warn(priv, format, arg...) \ #define ipoib_warn(priv, format, arg...) \
......
...@@ -249,8 +249,8 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, ...@@ -249,8 +249,8 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = { struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler, .event_handler = ipoib_cm_rx_event_handler,
.send_cq = priv->cq, /* For drain WR */ .send_cq = priv->recv_cq, /* For drain WR */
.recv_cq = priv->cq, .recv_cq = priv->recv_cq,
.srq = priv->cm.srq, .srq = priv->cm.srq,
.cap.max_send_wr = 1, /* For drain WR */ .cap.max_send_wr = 1, /* For drain WR */
.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
...@@ -951,8 +951,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ ...@@ -951,8 +951,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = { struct ib_qp_init_attr attr = {
.send_cq = priv->cq, .send_cq = priv->recv_cq,
.recv_cq = priv->cq, .recv_cq = priv->recv_cq,
.srq = priv->cm.srq, .srq = priv->cm.srq,
.cap.max_send_wr = ipoib_sendq_size, .cap.max_send_wr = ipoib_sendq_size,
.cap.max_send_sge = 1, .cap.max_send_sge = 1,
......
...@@ -71,7 +71,7 @@ static int ipoib_set_coalesce(struct net_device *dev, ...@@ -71,7 +71,7 @@ static int ipoib_set_coalesce(struct net_device *dev,
coal->rx_max_coalesced_frames > 0xffff) coal->rx_max_coalesced_frames > 0xffff)
return -EINVAL; return -EINVAL;
ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames, ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
coal->rx_coalesce_usecs); coal->rx_coalesce_usecs);
if (ret && ret != -ENOSYS) { if (ret && ret != -ENOSYS) {
ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
......
...@@ -364,7 +364,6 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -364,7 +364,6 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id; unsigned int wr_id = wc->wr_id;
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
unsigned long flags;
ipoib_dbg_data(priv, "send completion: id %d, status: %d\n", ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
wr_id, wc->status); wr_id, wc->status);
...@@ -384,13 +383,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -384,13 +383,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
dev_kfree_skb_any(tx_req->skb); dev_kfree_skb_any(tx_req->skb);
spin_lock_irqsave(&priv->tx_lock, flags);
++priv->tx_tail; ++priv->tx_tail;
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) && netif_queue_stopped(dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev); netif_wake_queue(dev);
spin_unlock_irqrestore(&priv->tx_lock, flags);
if (wc->status != IB_WC_SUCCESS && if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR) wc->status != IB_WC_WR_FLUSH_ERR)
...@@ -399,6 +396,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -399,6 +396,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
wc->status, wr_id, wc->vendor_err); wc->status, wr_id, wc->vendor_err);
} }
static int poll_tx(struct ipoib_dev_priv *priv)
{
int n, i;
n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
for (i = 0; i < n; ++i)
ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
return n == MAX_SEND_CQE;
}
int ipoib_poll(struct napi_struct *napi, int budget) int ipoib_poll(struct napi_struct *napi, int budget)
{ {
struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
...@@ -414,7 +422,7 @@ poll_more: ...@@ -414,7 +422,7 @@ poll_more:
int max = (budget - done); int max = (budget - done);
t = min(IPOIB_NUM_WC, max); t = min(IPOIB_NUM_WC, max);
n = ib_poll_cq(priv->cq, t, priv->ibwc); n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct ib_wc *wc = priv->ibwc + i; struct ib_wc *wc = priv->ibwc + i;
...@@ -425,12 +433,8 @@ poll_more: ...@@ -425,12 +433,8 @@ poll_more:
ipoib_cm_handle_rx_wc(dev, wc); ipoib_cm_handle_rx_wc(dev, wc);
else else
ipoib_ib_handle_rx_wc(dev, wc); ipoib_ib_handle_rx_wc(dev, wc);
} else { } else
if (wc->wr_id & IPOIB_OP_CM) ipoib_cm_handle_tx_wc(priv->dev, wc);
ipoib_cm_handle_tx_wc(dev, wc);
else
ipoib_ib_handle_tx_wc(dev, wc);
}
} }
if (n != t) if (n != t)
...@@ -439,7 +443,7 @@ poll_more: ...@@ -439,7 +443,7 @@ poll_more:
if (done < budget) { if (done < budget) {
netif_rx_complete(dev, napi); netif_rx_complete(dev, napi);
if (unlikely(ib_req_notify_cq(priv->cq, if (unlikely(ib_req_notify_cq(priv->recv_cq,
IB_CQ_NEXT_COMP | IB_CQ_NEXT_COMP |
IB_CQ_REPORT_MISSED_EVENTS)) && IB_CQ_REPORT_MISSED_EVENTS)) &&
netif_rx_reschedule(dev, napi)) netif_rx_reschedule(dev, napi))
...@@ -562,12 +566,16 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -562,12 +566,16 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
address->last_send = priv->tx_head; address->last_send = priv->tx_head;
++priv->tx_head; ++priv->tx_head;
skb_orphan(skb);
if (++priv->tx_outstanding == ipoib_sendq_size) { if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev); netif_stop_queue(dev);
} }
} }
if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
poll_tx(priv);
} }
static void __ipoib_reap_ah(struct net_device *dev) static void __ipoib_reap_ah(struct net_device *dev)
...@@ -714,7 +722,7 @@ void ipoib_drain_cq(struct net_device *dev) ...@@ -714,7 +722,7 @@ void ipoib_drain_cq(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
int i, n; int i, n;
do { do {
n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc); n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
/* /*
* Convert any successful completions to flush * Convert any successful completions to flush
...@@ -729,14 +737,13 @@ void ipoib_drain_cq(struct net_device *dev) ...@@ -729,14 +737,13 @@ void ipoib_drain_cq(struct net_device *dev)
ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
else else
ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
} else { } else
if (priv->ibwc[i].wr_id & IPOIB_OP_CM) ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
else
ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
}
} }
} while (n == IPOIB_NUM_WC); } while (n == IPOIB_NUM_WC);
while (poll_tx(priv))
; /* nothing */
} }
int ipoib_ib_dev_stop(struct net_device *dev, int flush) int ipoib_ib_dev_stop(struct net_device *dev, int flush)
...@@ -826,7 +833,7 @@ timeout: ...@@ -826,7 +833,7 @@ timeout:
msleep(1); msleep(1);
} }
ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
return 0; return 0;
} }
......
...@@ -1298,7 +1298,8 @@ static int __init ipoib_init_module(void) ...@@ -1298,7 +1298,8 @@ static int __init ipoib_init_module(void)
ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
IPOIB_MIN_QUEUE_SIZE));
#ifdef CONFIG_INFINIBAND_IPOIB_CM #ifdef CONFIG_INFINIBAND_IPOIB_CM
ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
#endif #endif
......
...@@ -171,26 +171,33 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -171,26 +171,33 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_pd; goto out_free_pd;
} }
size = ipoib_sendq_size + ipoib_recvq_size + 1; size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev); ret = ipoib_cm_dev_init(dev);
if (!ret) { if (!ret) {
size += ipoib_sendq_size;
if (ipoib_cm_has_srq(dev)) if (ipoib_cm_has_srq(dev))
size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
else else
size += ipoib_recvq_size * ipoib_max_conn_qp; size += ipoib_recvq_size * ipoib_max_conn_qp;
} }
priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->cq)) { if (IS_ERR(priv->recv_cq)) {
printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
goto out_free_mr; goto out_free_mr;
} }
if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0);
goto out_free_cq; if (IS_ERR(priv->send_cq)) {
printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
goto out_free_recv_cq;
}
if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
goto out_free_send_cq;
init_attr.send_cq = priv->cq; init_attr.send_cq = priv->send_cq;
init_attr.recv_cq = priv->cq; init_attr.recv_cq = priv->recv_cq;
if (priv->hca_caps & IB_DEVICE_UD_TSO) if (priv->hca_caps & IB_DEVICE_UD_TSO)
init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
...@@ -201,7 +208,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -201,7 +208,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->qp = ib_create_qp(priv->pd, &init_attr); priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) { if (IS_ERR(priv->qp)) {
printk(KERN_WARNING "%s: failed to create QP\n", ca->name); printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
goto out_free_cq; goto out_free_send_cq;
} }
priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
...@@ -230,8 +237,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -230,8 +237,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
return 0; return 0;
out_free_cq: out_free_send_cq:
ib_destroy_cq(priv->cq); ib_destroy_cq(priv->send_cq);
out_free_recv_cq:
ib_destroy_cq(priv->recv_cq);
out_free_mr: out_free_mr:
ib_dereg_mr(priv->mr); ib_dereg_mr(priv->mr);
...@@ -254,8 +264,11 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) ...@@ -254,8 +264,11 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
} }
if (ib_destroy_cq(priv->cq)) if (ib_destroy_cq(priv->send_cq))
ipoib_warn(priv, "ib_cq_destroy failed\n"); ipoib_warn(priv, "ib_cq_destroy (send) failed\n");
if (ib_destroy_cq(priv->recv_cq))
ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
ipoib_cm_dev_cleanup(dev); ipoib_cm_dev_cleanup(dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment