Commit 1993d683 authored by Roland Dreier's avatar Roland Dreier

[IPoIB] Drop RX packets when out of memory

Change the way IPoIB handles RX packets when it can't allocate a new
receive skbuff.  If the allocation of a new receive skb fails, we now
drop the packet we just received and repost the original receive skb.
This means that the receive ring always stays full and we don't have
to monkey around with trying to schedule a refill task for later.
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent cb0f0910
...@@ -100,7 +100,12 @@ struct ipoib_pseudoheader { ...@@ -100,7 +100,12 @@ struct ipoib_pseudoheader {
struct ipoib_mcast; struct ipoib_mcast;
struct ipoib_buf { struct ipoib_rx_buf {
struct sk_buff *skb;
dma_addr_t mapping;
};
struct ipoib_tx_buf {
struct sk_buff *skb; struct sk_buff *skb;
DECLARE_PCI_UNMAP_ADDR(mapping) DECLARE_PCI_UNMAP_ADDR(mapping)
}; };
...@@ -150,14 +155,14 @@ struct ipoib_dev_priv { ...@@ -150,14 +155,14 @@ struct ipoib_dev_priv {
unsigned int admin_mtu; unsigned int admin_mtu;
unsigned int mcast_mtu; unsigned int mcast_mtu;
struct ipoib_buf *rx_ring; struct ipoib_rx_buf *rx_ring;
spinlock_t tx_lock; spinlock_t tx_lock;
struct ipoib_buf *tx_ring; struct ipoib_tx_buf *tx_ring;
unsigned tx_head; unsigned tx_head;
unsigned tx_tail; unsigned tx_tail;
struct ib_sge tx_sge; struct ib_sge tx_sge;
struct ib_send_wr tx_wr; struct ib_send_wr tx_wr;
struct ib_wc ibwc[IPOIB_NUM_WC]; struct ib_wc ibwc[IPOIB_NUM_WC];
......
...@@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref) ...@@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref)
} }
} }
static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv, static int ipoib_ib_post_receive(struct net_device *dev, int id)
unsigned int wr_id,
dma_addr_t addr)
{ {
struct ib_sge list = { struct ipoib_dev_priv *priv = netdev_priv(dev);
.addr = addr, struct ib_sge list;
.length = IPOIB_BUF_SIZE, struct ib_recv_wr param;
.lkey = priv->mr->lkey,
};
struct ib_recv_wr param = {
.wr_id = wr_id | IPOIB_OP_RECV,
.sg_list = &list,
.num_sge = 1,
};
struct ib_recv_wr *bad_wr; struct ib_recv_wr *bad_wr;
int ret;
list.addr = priv->rx_ring[id].mapping;
list.length = IPOIB_BUF_SIZE;
list.lkey = priv->mr->lkey;
param.next = NULL;
param.wr_id = id | IPOIB_OP_RECV;
param.sg_list = &list;
param.num_sge = 1;
ret = ib_post_recv(priv->qp, &param, &bad_wr);
if (unlikely(ret)) {
ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
dma_unmap_single(priv->ca->dma_device,
priv->rx_ring[id].mapping,
IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
dev_kfree_skb_any(priv->rx_ring[id].skb);
priv->rx_ring[id].skb = NULL;
}
return ib_post_recv(priv->qp, &param, &bad_wr); return ret;
} }
static int ipoib_ib_post_receive(struct net_device *dev, int id) static int ipoib_alloc_rx_skb(struct net_device *dev, int id)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb; struct sk_buff *skb;
dma_addr_t addr; dma_addr_t addr;
int ret;
skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
if (!skb) { if (!skb)
ipoib_warn(priv, "failed to allocate receive buffer\n");
priv->rx_ring[id].skb = NULL;
return -ENOMEM; return -ENOMEM;
}
skb_reserve(skb, 4); /* 16 byte align IP header */ /*
priv->rx_ring[id].skb = skb; * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
* header. So we need 4 more bytes to get to 48 and align the
* IP header to a multiple of 16.
*/
skb_reserve(skb, 4);
addr = dma_map_single(priv->ca->dma_device, addr = dma_map_single(priv->ca->dma_device,
skb->data, IPOIB_BUF_SIZE, skb->data, IPOIB_BUF_SIZE,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); if (unlikely(dma_mapping_error(addr))) {
ret = ipoib_ib_receive(priv, id, addr);
if (ret) {
ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
id, ret);
dma_unmap_single(priv->ca->dma_device, addr,
IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
priv->rx_ring[id].skb = NULL; return -EIO;
} }
return ret; priv->rx_ring[id].skb = skb;
priv->rx_ring[id].mapping = addr;
return 0;
} }
static int ipoib_ib_post_receives(struct net_device *dev) static int ipoib_ib_post_receives(struct net_device *dev)
...@@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev) ...@@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev)
int i; int i;
for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
if (ipoib_alloc_rx_skb(dev, i)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
return -ENOMEM;
}
if (ipoib_ib_post_receive(dev, i)) { if (ipoib_ib_post_receive(dev, i)) {
ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
return -EIO; return -EIO;
...@@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev, ...@@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
wr_id &= ~IPOIB_OP_RECV; wr_id &= ~IPOIB_OP_RECV;
if (wr_id < IPOIB_RX_RING_SIZE) { if (wr_id < IPOIB_RX_RING_SIZE) {
struct sk_buff *skb = priv->rx_ring[wr_id].skb; struct sk_buff *skb = priv->rx_ring[wr_id].skb;
dma_addr_t addr = priv->rx_ring[wr_id].mapping;
priv->rx_ring[wr_id].skb = NULL;
dma_unmap_single(priv->ca->dma_device, if (unlikely(wc->status != IB_WC_SUCCESS)) {
pci_unmap_addr(&priv->rx_ring[wr_id],
mapping),
IPOIB_BUF_SIZE,
DMA_FROM_DEVICE);
if (wc->status != IB_WC_SUCCESS) {
if (wc->status != IB_WC_WR_FLUSH_ERR) if (wc->status != IB_WC_WR_FLUSH_ERR)
ipoib_warn(priv, "failed recv event " ipoib_warn(priv, "failed recv event "
"(status=%d, wrid=%d vend_err %x)\n", "(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err); wc->status, wr_id, wc->vendor_err);
dma_unmap_single(priv->ca->dma_device, addr,
IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
priv->rx_ring[wr_id].skb = NULL;
return; return;
} }
/*
* If we can't allocate a new RX buffer, dump
* this packet and reuse the old buffer.
*/
if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
++priv->stats.rx_dropped;
goto repost;
}
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid); wc->byte_len, wc->slid);
dma_unmap_single(priv->ca->dma_device, addr,
IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
skb_put(skb, wc->byte_len); skb_put(skb, wc->byte_len);
skb_pull(skb, IB_GRH_BYTES); skb_pull(skb, IB_GRH_BYTES);
...@@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev, ...@@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
} }
/* repost receive */ repost:
if (ipoib_ib_post_receive(dev, wr_id)) if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
ipoib_warn(priv, "ipoib_ib_post_receive failed " ipoib_warn(priv, "ipoib_ib_post_receive failed "
"for buf %d\n", wr_id); "for buf %d\n", wr_id);
} else } else
...@@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, ...@@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
wr_id); wr_id);
} else { } else {
struct ipoib_buf *tx_req; struct ipoib_tx_buf *tx_req;
unsigned long flags; unsigned long flags;
if (wr_id >= IPOIB_TX_RING_SIZE) { if (wr_id >= IPOIB_TX_RING_SIZE) {
...@@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_ah *address, u32 qpn) struct ipoib_ah *address, u32 qpn)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_buf *tx_req; struct ipoib_tx_buf *tx_req;
dma_addr_t addr; dma_addr_t addr;
if (skb->len > dev->mtu + INFINIBAND_ALEN) { if (skb->len > dev->mtu + INFINIBAND_ALEN) {
...@@ -468,7 +488,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) ...@@ -468,7 +488,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
int attr_mask; int attr_mask;
unsigned long begin; unsigned long begin;
struct ipoib_buf *tx_req; struct ipoib_tx_buf *tx_req;
int i; int i;
/* Kill the existing QP and allocate a new one */ /* Kill the existing QP and allocate a new one */
......
...@@ -732,7 +732,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -732,7 +732,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
/* Allocate RX/TX "rings" to hold queued skbs */ /* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
GFP_KERNEL); GFP_KERNEL);
if (!priv->rx_ring) { if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
...@@ -740,9 +740,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -740,9 +740,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
goto out; goto out;
} }
memset(priv->rx_ring, 0, memset(priv->rx_ring, 0,
IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf));
priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
GFP_KERNEL); GFP_KERNEL);
if (!priv->tx_ring) { if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
...@@ -750,7 +750,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -750,7 +750,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
goto out_rx_ring_cleanup; goto out_rx_ring_cleanup;
} }
memset(priv->tx_ring, 0, memset(priv->tx_ring, 0,
IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf));
/* priv->tx_head & tx_tail are already 0 */ /* priv->tx_head & tx_tail are already 0 */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment