Commit 1812063b authored by Michael S. Tsirkin's avatar Michael S. Tsirkin Committed by Roland Dreier

IPoIB/cm: Improve small message bandwidth

Avoid the overhead of freeing/reallocating and mapping/unmapping for
DMA pages that have not been written to by hardware.
Signed-off-by: default avatarMichael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent c9add6ec
...@@ -65,14 +65,14 @@ struct ipoib_cm_id { ...@@ -65,14 +65,14 @@ struct ipoib_cm_id {
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event); struct ib_cm_event *event);
static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
u64 mapping[IPOIB_CM_RX_SG]) u64 mapping[IPOIB_CM_RX_SG])
{ {
int i; int i;
ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
for (i = 0; i < IPOIB_CM_RX_SG - 1; ++i) for (i = 0; i < frags; ++i)
ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
} }
...@@ -90,7 +90,8 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id) ...@@ -90,7 +90,8 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
if (unlikely(ret)) { if (unlikely(ret)) {
ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
priv->cm.srq_ring[id].mapping);
dev_kfree_skb_any(priv->cm.srq_ring[id].skb); dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
priv->cm.srq_ring[id].skb = NULL; priv->cm.srq_ring[id].skb = NULL;
} }
...@@ -98,7 +99,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id) ...@@ -98,7 +99,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
return ret; return ret;
} }
static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int frags,
u64 mapping[IPOIB_CM_RX_SG]) u64 mapping[IPOIB_CM_RX_SG])
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -107,7 +108,7 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, ...@@ -107,7 +108,7 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
if (unlikely(!skb)) if (unlikely(!skb))
return -ENOMEM; return NULL;
/* /*
* IPoIB adds a 4 byte header. So we need 12 more bytes to align the * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
...@@ -119,10 +120,10 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, ...@@ -119,10 +120,10 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
return -EIO; return NULL;
} }
for (i = 0; i < IPOIB_CM_RX_SG - 1; i++) { for (i = 0; i < frags; i++) {
struct page *page = alloc_page(GFP_ATOMIC); struct page *page = alloc_page(GFP_ATOMIC);
if (!page) if (!page)
...@@ -136,7 +137,7 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, ...@@ -136,7 +137,7 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
} }
priv->cm.srq_ring[id].skb = skb; priv->cm.srq_ring[id].skb = skb;
return 0; return skb;
partial_error: partial_error:
...@@ -146,7 +147,7 @@ partial_error: ...@@ -146,7 +147,7 @@ partial_error:
ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
return -ENOMEM; return NULL;
} }
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
...@@ -309,7 +310,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, ...@@ -309,7 +310,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
} }
/* Adjust length of skb with fragments to match received data */ /* Adjust length of skb with fragments to match received data */
static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
unsigned int length) unsigned int length, struct sk_buff *toskb)
{ {
int i, num_frags; int i, num_frags;
unsigned int size; unsigned int size;
...@@ -326,7 +327,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, ...@@ -326,7 +327,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
if (length == 0) { if (length == 0) {
/* don't need this page */ /* don't need this page */
__free_page(frag->page); skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE);
--skb_shinfo(skb)->nr_frags; --skb_shinfo(skb)->nr_frags;
} else { } else {
size = min(length, (unsigned) PAGE_SIZE); size = min(length, (unsigned) PAGE_SIZE);
...@@ -344,10 +345,11 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -344,10 +345,11 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
struct sk_buff *skb; struct sk_buff *skb, *newskb;
struct ipoib_cm_rx *p; struct ipoib_cm_rx *p;
unsigned long flags; unsigned long flags;
u64 mapping[IPOIB_CM_RX_SG]; u64 mapping[IPOIB_CM_RX_SG];
int frags;
ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n", ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n",
wr_id, wc->opcode, wc->status); wr_id, wc->opcode, wc->status);
...@@ -383,7 +385,11 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -383,7 +385,11 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
} }
} }
if (unlikely(ipoib_cm_alloc_rx_skb(dev, wr_id, mapping))) { frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
(unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
if (unlikely(!newskb)) {
/* /*
* If we can't allocate a new RX buffer, dump * If we can't allocate a new RX buffer, dump
* this packet and reuse the old buffer. * this packet and reuse the old buffer.
...@@ -393,13 +399,13 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -393,13 +399,13 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
goto repost; goto repost;
} }
ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[wr_id].mapping); ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping);
memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, sizeof mapping); memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid); wc->byte_len, wc->slid);
skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len); skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
skb->protocol = ((struct ipoib_header *) skb->data)->proto; skb->protocol = ((struct ipoib_header *) skb->data)->proto;
skb->mac.raw = skb->data; skb->mac.raw = skb->data;
...@@ -1193,7 +1199,8 @@ int ipoib_cm_dev_init(struct net_device *dev) ...@@ -1193,7 +1199,8 @@ int ipoib_cm_dev_init(struct net_device *dev)
priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
for (i = 0; i < ipoib_recvq_size; ++i) { for (i = 0; i < ipoib_recvq_size; ++i) {
if (ipoib_cm_alloc_rx_skb(dev, i, priv->cm.srq_ring[i].mapping)) { if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1,
priv->cm.srq_ring[i].mapping)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
ipoib_cm_dev_cleanup(dev); ipoib_cm_dev_cleanup(dev);
return -ENOMEM; return -ENOMEM;
...@@ -1228,7 +1235,8 @@ void ipoib_cm_dev_cleanup(struct net_device *dev) ...@@ -1228,7 +1235,8 @@ void ipoib_cm_dev_cleanup(struct net_device *dev)
return; return;
for (i = 0; i < ipoib_recvq_size; ++i) for (i = 0; i < ipoib_recvq_size; ++i)
if (priv->cm.srq_ring[i].skb) { if (priv->cm.srq_ring[i].skb) {
ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[i].mapping); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
priv->cm.srq_ring[i].mapping);
dev_kfree_skb_any(priv->cm.srq_ring[i].skb); dev_kfree_skb_any(priv->cm.srq_ring[i].skb);
priv->cm.srq_ring[i].skb = NULL; priv->cm.srq_ring[i].skb = NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment