Commit e8224e4b authored by Yossi Etigin's avatar Yossi Etigin Committed by Roland Dreier

IPoIB: Fix deadlock on RTNL between bcast join comp and ipoib_stop()

Taking rtnl_lock in ipoib_mcast_join_complete() causes a deadlock with
ipoib_stop().  We avoid it by scheduling the piece of code that takes
the lock on ipoib_workqueue instead of executing it directly.  This
works because we only flush the ipoib_workqueue with the RTNL not held.

The deadlock happens because ipoib_stop() calls ipoib_ib_dev_down()
which calls ipoib_mcast_dev_flush(), which calls ipoib_mcast_free(),
which calls ipoib_mcast_leave(). The latter calls
ib_sa_free_multicast(), and this waits until the multicast completion
handler finishes.  This handler is ipoib_mcast_join_complete(), which
waits for the rtnl_lock(), which was already taken by ipoib_stop().

This bug was introduced in commit a77a57a1 ("IPoIB: Fix deadlock on
RTNL in ipoib_stop()").
Signed-off-by: default avatarYossi Etigin <yosefe@voltaire.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 1941246d
...@@ -293,6 +293,7 @@ struct ipoib_dev_priv { ...@@ -293,6 +293,7 @@ struct ipoib_dev_priv {
struct delayed_work pkey_poll_task; struct delayed_work pkey_poll_task;
struct delayed_work mcast_task; struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light; struct work_struct flush_light;
struct work_struct flush_normal; struct work_struct flush_normal;
struct work_struct flush_heavy; struct work_struct flush_heavy;
...@@ -464,6 +465,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); ...@@ -464,6 +465,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev); void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_join_task(struct work_struct *work);
void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work); void ipoib_mcast_restart_task(struct work_struct *work);
......
...@@ -1075,6 +1075,7 @@ static void ipoib_setup(struct net_device *dev) ...@@ -1075,6 +1075,7 @@ static void ipoib_setup(struct net_device *dev)
INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
......
...@@ -366,6 +366,21 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) ...@@ -366,6 +366,21 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
return ret; return ret;
} }
void ipoib_mcast_carrier_on_task(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
* removed.
*/
rtnl_lock();
netif_carrier_on(priv->dev);
rtnl_unlock();
}
static int ipoib_mcast_join_complete(int status, static int ipoib_mcast_join_complete(int status,
struct ib_sa_multicast *multicast) struct ib_sa_multicast *multicast)
{ {
...@@ -392,16 +407,12 @@ static int ipoib_mcast_join_complete(int status, ...@@ -392,16 +407,12 @@ static int ipoib_mcast_join_complete(int status,
&priv->mcast_task, 0); &priv->mcast_task, 0);
mutex_unlock(&mcast_mutex); mutex_unlock(&mcast_mutex);
if (mcast == priv->broadcast) {
/* /*
* Take RTNL lock here to avoid racing with * Defer carrier on work to ipoib_workqueue to avoid a
* ipoib_stop() and turning the carrier back * deadlock on rtnl_lock here.
* on while a device is being removed.
*/ */
rtnl_lock(); if (mcast == priv->broadcast)
netif_carrier_on(dev); queue_work(ipoib_workqueue, &priv->carrier_on_task);
rtnl_unlock();
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment