Commit b2220cad authored by Jay Vosburgh's avatar Jay Vosburgh Committed by Jeff Garzik

bonding: refactor ARP active-backup monitor

	Refactor ARP monitor for active-backup mode.  The motivation for
this is to take care of locking issues in a clear manner (particularly to
correctly handle RTNL vs. the bonding locks).  Currently, the a-b ARP
monitor does not hold RTNL at all, but future changes will require RTNL
during ARP monitor failovers.

	Rather than using conditional locking, this patch instead breaks
up the ARP monitor into three discrete steps: inspection, commit changes,
and probe.  The inspection phase marks slaves that require link state
changes.  The commit phase is only called if inspection detects that
changes are needed, and is called with RTNL.  Lastly, the probe phase
issues the ARP probes that the inspection phase uses to determine link
state.
Signed-off-by: default avatarJay Vosburgh <fubar@us.ibm.com>
Signed-off-by: default avatarJeff Garzik <jgarzik@redhat.com>
parent 7893b249
...@@ -1051,6 +1051,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) ...@@ -1051,6 +1051,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
} }
if (new_active) { if (new_active) {
new_active->jiffies = jiffies;
if (new_active->link == BOND_LINK_BACK) { if (new_active->link == BOND_LINK_BACK) {
if (USES_PRIMARY(bond->params.mode)) { if (USES_PRIMARY(bond->params.mode)) {
printk(KERN_INFO DRV_NAME printk(KERN_INFO DRV_NAME
...@@ -1062,7 +1064,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) ...@@ -1062,7 +1064,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
new_active->delay = 0; new_active->delay = 0;
new_active->link = BOND_LINK_UP; new_active->link = BOND_LINK_UP;
new_active->jiffies = jiffies;
if (bond->params.mode == BOND_MODE_8023AD) { if (bond->params.mode == BOND_MODE_8023AD) {
bond_3ad_handle_link_change(new_active, BOND_LINK_UP); bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
...@@ -2795,205 +2796,235 @@ out: ...@@ -2795,205 +2796,235 @@ out:
} }
/* /*
* When using arp monitoring in active-backup mode, this function is * Called to inspect slaves for active-backup mode ARP monitor link state
* called to determine if any backup slaves have went down or a new * changes. Sets new_link in slaves to specify what action should take
* current slave needs to be found. * place for the slave. Returns 0 if no changes are found, >0 if changes
* The backup slaves never generate traffic, they are considered up by merely * to link states must be committed.
* receiving traffic. If the current slave goes down, each backup slave will *
* be given the opportunity to tx/rx an arp before being taken down - this * Called with bond->lock held for read.
* prevents all slaves from being taken down due to the current slave not
* sending any traffic for the backups to receive. The arps are not necessarily
* necessary, any tx and rx traffic will keep the current slave up. While any
* rx traffic will keep the backup slaves up, the current slave is responsible
* for generating traffic to keep them up regardless of any other traffic they
* may have received.
* see loadbalance_arp_monitor for arp monitoring in load balancing mode
*/ */
void bond_activebackup_arp_mon(struct work_struct *work) static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
{ {
struct bonding *bond = container_of(work, struct bonding,
arp_work.work);
struct slave *slave; struct slave *slave;
int delta_in_ticks; int i, commit = 0;
int i;
read_lock(&bond->lock); bond_for_each_slave(bond, slave, i) {
slave->new_link = BOND_LINK_NOCHANGE;
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
if (bond->kill_timers) { if (slave->link != BOND_LINK_UP) {
goto out; if (time_before_eq(jiffies, slave_last_rx(bond, slave) +
delta_in_ticks)) {
slave->new_link = BOND_LINK_UP;
commit++;
} }
if (bond->slave_cnt == 0) { continue;
goto re_arm;
} }
/* determine if any slave has come up or any backup slave has /*
* gone down * Give slaves 2*delta after being enslaved or made
* TODO: what about up/down delay in arp mode? it wasn't here before * active. This avoids bouncing, as the last receive
* so it can wait * times need a full ARP monitor cycle to be updated.
*/ */
bond_for_each_slave(bond, slave, i) { if (!time_after_eq(jiffies, slave->jiffies +
if (slave->link != BOND_LINK_UP) { 2 * delta_in_ticks))
if (time_before_eq(jiffies, continue;
slave_last_rx(bond, slave) + delta_in_ticks)) {
slave->link = BOND_LINK_UP;
write_lock_bh(&bond->curr_slave_lock);
if ((!bond->curr_active_slave) && /*
time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks)) { * Backup slave is down if:
bond_change_active_slave(bond, slave); * - No current_arp_slave AND
bond->current_arp_slave = NULL; * - more than 3*delta since last receive AND
} else if (bond->curr_active_slave != slave) { * - the bond has an IP address
/* this slave has just come up but we *
* already have a current slave; this * Note: a non-null current_arp_slave indicates
* can also happen if bond_enslave adds * the curr_active_slave went down and we are
* a new slave that is up while we are * searching for a new one; under this condition
* searching for a new slave * we only take the curr_active_slave down - this
* gives each slave a chance to tx/rx traffic
* before being taken out
*/ */
bond_set_slave_inactive_flags(slave); if (slave->state == BOND_STATE_BACKUP &&
bond->current_arp_slave = NULL; !bond->current_arp_slave &&
time_after(jiffies, slave_last_rx(bond, slave) +
3 * delta_in_ticks)) {
slave->new_link = BOND_LINK_DOWN;
commit++;
} }
bond_set_carrier(bond); /*
* Active slave is down if:
if (slave == bond->curr_active_slave) { * - more than 2*delta since transmitting OR
printk(KERN_INFO DRV_NAME * - (more than 2*delta since receive AND
": %s: %s is up and now the " * the bond has an IP address)
"active interface\n", */
bond->dev->name, if ((slave->state == BOND_STATE_ACTIVE) &&
slave->dev->name); (time_after_eq(jiffies, slave->dev->trans_start +
netif_carrier_on(bond->dev); 2 * delta_in_ticks) ||
} else { (time_after_eq(jiffies, slave_last_rx(bond, slave)
printk(KERN_INFO DRV_NAME + 2 * delta_in_ticks)))) {
": %s: backup interface %s is " slave->new_link = BOND_LINK_DOWN;
"now up\n", commit++;
bond->dev->name,
slave->dev->name);
} }
write_unlock_bh(&bond->curr_slave_lock);
} }
} else {
read_lock(&bond->curr_slave_lock); read_lock(&bond->curr_slave_lock);
if ((slave != bond->curr_active_slave) && /*
(!bond->current_arp_slave) && * Trigger a commit if the primary option setting has changed.
(time_after_eq(jiffies, slave_last_rx(bond, slave) + 3*delta_in_ticks))) {
/* a backup slave has gone down; three times
* the delta allows the current slave to be
* taken out before the backup slave.
* note: a non-null current_arp_slave indicates
* the curr_active_slave went down and we are
* searching for a new one; under this
* condition we only take the curr_active_slave
* down - this gives each slave a chance to
* tx/rx traffic before being taken out
*/ */
if (bond->primary_slave &&
(bond->primary_slave != bond->curr_active_slave) &&
(bond->primary_slave->link == BOND_LINK_UP))
commit++;
read_unlock(&bond->curr_slave_lock); read_unlock(&bond->curr_slave_lock);
slave->link = BOND_LINK_DOWN; return commit;
}
if (slave->link_failure_count < UINT_MAX) { /*
slave->link_failure_count++; * Called to commit link state changes noted by inspection step of
} * active-backup mode ARP monitor.
*
* Called with RTNL and bond->lock for read.
*/
static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
{
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
switch (slave->new_link) {
case BOND_LINK_NOCHANGE:
continue;
case BOND_LINK_UP:
write_lock_bh(&bond->curr_slave_lock);
if (!bond->curr_active_slave &&
time_before_eq(jiffies, slave->dev->trans_start +
delta_in_ticks)) {
slave->link = BOND_LINK_UP;
bond_change_active_slave(bond, slave);
bond->current_arp_slave = NULL;
printk(KERN_INFO DRV_NAME
": %s: %s is up and now the "
"active interface\n",
bond->dev->name, slave->dev->name);
} else if (bond->curr_active_slave != slave) {
/* this slave has just come up but we
* already have a current slave; this can
* also happen if bond_enslave adds a new
* slave that is up while we are searching
* for a new slave
*/
slave->link = BOND_LINK_UP;
bond_set_slave_inactive_flags(slave); bond_set_slave_inactive_flags(slave);
bond->current_arp_slave = NULL;
printk(KERN_INFO DRV_NAME printk(KERN_INFO DRV_NAME
": %s: backup interface %s is now down\n", ": %s: backup interface %s is now up\n",
bond->dev->name, bond->dev->name, slave->dev->name);
slave->dev->name);
} else {
read_unlock(&bond->curr_slave_lock);
}
}
} }
read_lock(&bond->curr_slave_lock); write_unlock_bh(&bond->curr_slave_lock);
slave = bond->curr_active_slave;
read_unlock(&bond->curr_slave_lock);
if (slave) {
/* if we have sent traffic in the past 2*arp_intervals but
* haven't xmit and rx traffic in that time interval, select
* a different slave. slave->jiffies is only updated when
* a slave first becomes the curr_active_slave - not necessarily
* after every arp; this ensures the slave has a full 2*delta
* before being taken out. if a primary is being used, check
* if it is up and needs to take over as the curr_active_slave
*/
if ((time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
(time_after_eq(jiffies, slave_last_rx(bond, slave) + 2*delta_in_ticks))) &&
time_after_eq(jiffies, slave->jiffies + 2*delta_in_ticks)) {
slave->link = BOND_LINK_DOWN; break;
if (slave->link_failure_count < UINT_MAX) { case BOND_LINK_DOWN:
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++; slave->link_failure_count++;
}
slave->link = BOND_LINK_DOWN;
if (slave == bond->curr_active_slave) {
printk(KERN_INFO DRV_NAME printk(KERN_INFO DRV_NAME
": %s: link status down for active interface " ": %s: link status down for active "
"%s, disabling it\n", "interface %s, disabling it\n",
bond->dev->name, bond->dev->name, slave->dev->name);
slave->dev->name);
bond_set_slave_inactive_flags(slave);
write_lock_bh(&bond->curr_slave_lock); write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond); bond_select_active_slave(bond);
slave = bond->curr_active_slave; if (bond->curr_active_slave)
bond->curr_active_slave->jiffies =
jiffies;
write_unlock_bh(&bond->curr_slave_lock); write_unlock_bh(&bond->curr_slave_lock);
bond->current_arp_slave = slave; bond->current_arp_slave = NULL;
if (slave) { } else if (slave->state == BOND_STATE_BACKUP) {
slave->jiffies = jiffies;
}
} else if ((bond->primary_slave) &&
(bond->primary_slave != slave) &&
(bond->primary_slave->link == BOND_LINK_UP)) {
/* at this point, slave is the curr_active_slave */
printk(KERN_INFO DRV_NAME printk(KERN_INFO DRV_NAME
": %s: changing from interface %s to primary " ": %s: backup interface %s is now down\n",
"interface %s\n", bond->dev->name, slave->dev->name);
bond->dev->name,
slave->dev->name,
bond->primary_slave->dev->name);
/* primary is up so switch to it */ bond_set_slave_inactive_flags(slave);
}
break;
default:
printk(KERN_ERR DRV_NAME
": %s: impossible: new_link %d on slave %s\n",
bond->dev->name, slave->new_link,
slave->dev->name);
}
}
/*
* No race with changes to primary via sysfs, as we hold rtnl.
*/
if (bond->primary_slave &&
(bond->primary_slave != bond->curr_active_slave) &&
(bond->primary_slave->link == BOND_LINK_UP)) {
write_lock_bh(&bond->curr_slave_lock); write_lock_bh(&bond->curr_slave_lock);
bond_change_active_slave(bond, bond->primary_slave); bond_change_active_slave(bond, bond->primary_slave);
write_unlock_bh(&bond->curr_slave_lock); write_unlock_bh(&bond->curr_slave_lock);
slave = bond->primary_slave;
slave->jiffies = jiffies;
} else {
bond->current_arp_slave = NULL;
} }
/* the current slave must tx an arp to ensure backup slaves bond_set_carrier(bond);
* rx traffic }
/*
* Send ARP probes for active-backup mode ARP monitor.
*
* Called with bond->lock held for read.
*/ */
if (slave && IS_UP(slave->dev)) static void bond_ab_arp_probe(struct bonding *bond)
bond_arp_send_all(bond, slave); {
struct slave *slave;
int i;
read_lock(&bond->curr_slave_lock);
if (bond->current_arp_slave && bond->curr_active_slave)
printk("PROBE: c_arp %s && cas %s BAD\n",
bond->current_arp_slave->dev->name,
bond->curr_active_slave->dev->name);
if (bond->curr_active_slave) {
bond_arp_send_all(bond, bond->curr_active_slave);
read_unlock(&bond->curr_slave_lock);
return;
} }
read_unlock(&bond->curr_slave_lock);
/* if we don't have a curr_active_slave, search for the next available /* if we don't have a curr_active_slave, search for the next available
* backup slave from the current_arp_slave and make it the candidate * backup slave from the current_arp_slave and make it the candidate
* for becoming the curr_active_slave * for becoming the curr_active_slave
*/ */
if (!slave) {
if (!bond->current_arp_slave) { if (!bond->current_arp_slave) {
bond->current_arp_slave = bond->first_slave; bond->current_arp_slave = bond->first_slave;
if (!bond->current_arp_slave)
return;
} }
if (bond->current_arp_slave) {
bond_set_slave_inactive_flags(bond->current_arp_slave); bond_set_slave_inactive_flags(bond->current_arp_slave);
/* search for next candidate */ /* search for next candidate */
...@@ -3016,22 +3047,48 @@ void bond_activebackup_arp_mon(struct work_struct *work) ...@@ -3016,22 +3047,48 @@ void bond_activebackup_arp_mon(struct work_struct *work)
*/ */
if (slave->link == BOND_LINK_UP) { if (slave->link == BOND_LINK_UP) {
slave->link = BOND_LINK_DOWN; slave->link = BOND_LINK_DOWN;
if (slave->link_failure_count < UINT_MAX) { if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++; slave->link_failure_count++;
}
bond_set_slave_inactive_flags(slave); bond_set_slave_inactive_flags(slave);
printk(KERN_INFO DRV_NAME printk(KERN_INFO DRV_NAME
": %s: backup interface %s is " ": %s: backup interface %s is now down.\n",
"now down.\n", bond->dev->name, slave->dev->name);
bond->dev->name,
slave->dev->name);
}
} }
} }
}
void bond_activebackup_arp_mon(struct work_struct *work)
{
struct bonding *bond = container_of(work, struct bonding,
arp_work.work);
int delta_in_ticks;
read_lock(&bond->lock);
if (bond->kill_timers)
goto out;
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
if (bond->slave_cnt == 0)
goto re_arm;
if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
read_unlock(&bond->lock);
rtnl_lock();
read_lock(&bond->lock);
bond_ab_arp_commit(bond, delta_in_ticks);
read_unlock(&bond->lock);
rtnl_unlock();
read_lock(&bond->lock);
} }
bond_ab_arp_probe(bond);
re_arm: re_arm:
if (bond->params.arp_interval) { if (bond->params.arp_interval) {
queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
......
...@@ -158,6 +158,7 @@ struct slave { ...@@ -158,6 +158,7 @@ struct slave {
unsigned long jiffies; unsigned long jiffies;
unsigned long last_arp_rx; unsigned long last_arp_rx;
s8 link; /* one of BOND_LINK_XXXX */ s8 link; /* one of BOND_LINK_XXXX */
s8 new_link;
s8 state; /* one of BOND_STATE_XXXX */ s8 state; /* one of BOND_STATE_XXXX */
u32 original_flags; u32 original_flags;
u32 original_mtu; u32 original_mtu;
...@@ -169,6 +170,11 @@ struct slave { ...@@ -169,6 +170,11 @@ struct slave {
struct tlb_slave_info tlb_info; struct tlb_slave_info tlb_info;
}; };
/*
* Link pseudo-state only used internally by monitors
*/
#define BOND_LINK_NOCHANGE -1
/* /*
* Here are the locking policies for the two bonding locks: * Here are the locking policies for the two bonding locks:
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment