Commit 58411d1c authored by John Gregor's avatar John Gregor Committed by Roland Dreier

IB/ipath: Head of Line blocking vs forward progress of user apps

There's a conflict between our need to quiesce PSM-based applications
to avoid HoL blocking when the IB link goes down and the apps' desire
to remain running so that their quiescence timout mechanism can keep
running.

The compromise is to STOP the processes for a fixed period of time and
then alternate between CONT and STOP until the link is again active.

If there are poor interactions with subnet manager configuration at a
given site, the interval can be adjusted via a module paramter.
Signed-off-by: default avatarJohn Gregor <john.gregor@qlogic.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 6be979d7
......@@ -66,6 +66,7 @@
#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */
#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */
#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */
#else /* _IPATH_DEBUGGING */
......
......@@ -330,6 +330,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
struct ipath_devdata *dd;
ssize_t ret = 0;
u64 val;
u32 l_state, lt_state; /* LinkState, LinkTrainingState */
if (count != sizeof(dp)) {
ret = -EINVAL;
......@@ -396,10 +397,17 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
ret = -ENODEV;
goto bail;
}
/* Check link state, but not if we have custom PBC */
val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT &&
val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) {
/*
* Want to skip check for l_state if using custom PBC,
* because we might be trying to force an SM packet out.
* first-cut, skip _all_ state checking in that case.
*/
val = ipath_ib_state(dd, dd->ipath_lastibcstat);
lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
(val != dd->ib_init && val != dd->ib_arm &&
val != dd->ib_active))) {
ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
dd->ipath_unit, (unsigned long long) val);
ret = -EINVAL;
......@@ -438,6 +446,8 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
ret = -EBUSY;
goto bail;
}
/* disarm it just to be extra sure */
ipath_disarm_piobufs(dd, pbufn, 1);
plen >>= 2; /* in dwords */
......
......@@ -77,6 +77,11 @@ unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
static unsigned ipath_hol_timeout_ms = 13000;
module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
MODULE_PARM_DESC(hol_timeout_ms,
"duration of user app suspension after link failure");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("QLogic <support@pathscale.com>");
MODULE_DESCRIPTION("QLogic InfiniPath driver");
......@@ -1670,11 +1675,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
"is %s\n", dd->ipath_unit,
what[linkcmd],
ipath_ibcstatus_str[
(ipath_read_kreg64
(dd, dd->ipath_kregs->kr_ibcstatus) >>
INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
/* flush all queued sends when going to DOWN to be sure that
* they don't block MAD packets */
if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN)
......@@ -1925,9 +1927,8 @@ static void ipath_run_led_override(unsigned long opaque)
*/
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
INFINIPATH_IBCS_LINKSTATE_MASK;
dd->ibcs_lts_mask;
lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK;
dd->ipath_f_setextled(dd, lstate, ltstate);
mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
......@@ -1988,6 +1989,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
ipath_dbg("Shutting down the device\n");
ipath_hol_up(dd); /* make sure user processes aren't suspended */
dd->ipath_flags |= IPATH_LINKUNK;
dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
IPATH_LINKINIT | IPATH_LINKARMED |
......@@ -2037,6 +2040,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
*/
dd->ipath_f_quiet_serdes(dd);
/* stop all the timers that might still be running */
del_timer_sync(&dd->ipath_hol_timer);
if (dd->ipath_stats_timer_active) {
del_timer_sync(&dd->ipath_stats_timer);
dd->ipath_stats_timer_active = 0;
......@@ -2252,6 +2257,114 @@ bail:
return ret;
}
/*
* send a signal to all the processes that have the driver open
* through the normal interfaces (i.e., everything other than diags
* interface). Returns number of signalled processes.
*/
static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
{
int i, sub, any = 0;
pid_t pid;
if (!dd->ipath_pd)
return 0;
for (i = 1; i < dd->ipath_cfgports; i++) {
if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
!dd->ipath_pd[i]->port_pid)
continue;
pid = dd->ipath_pd[i]->port_pid;
dev_info(&dd->pcidev->dev, "context %d in use "
"(PID %u), sending signal %d\n",
i, pid, sig);
kill_proc(pid, sig, 1);
any++;
for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
pid = dd->ipath_pd[i]->port_subpid[sub];
if (!pid)
continue;
dev_info(&dd->pcidev->dev, "sub-context "
"%d:%d in use (PID %u), sending "
"signal %d\n", i, sub, pid, sig);
kill_proc(pid, sig, 1);
any++;
}
}
return any;
}
static void ipath_hol_signal_down(struct ipath_devdata *dd)
{
if (ipath_signal_procs(dd, SIGSTOP))
ipath_dbg("Stopped some processes\n");
ipath_cancel_sends(dd, 1);
}
static void ipath_hol_signal_up(struct ipath_devdata *dd)
{
if (ipath_signal_procs(dd, SIGCONT))
ipath_dbg("Continued some processes\n");
}
/*
* link is down, stop any users processes, and flush pending sends
* to prevent HoL blocking, then start the HoL timer that
* periodically continues, then stop procs, so they can detect
* link down if they want, and do something about it.
* Timer may already be running, so use __mod_timer, not add_timer.
*/
void ipath_hol_down(struct ipath_devdata *dd)
{
dd->ipath_hol_state = IPATH_HOL_DOWN;
ipath_hol_signal_down(dd);
dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
dd->ipath_hol_timer.expires = jiffies +
msecs_to_jiffies(ipath_hol_timeout_ms);
__mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
}
/*
* link is up, continue any user processes, and ensure timer
* is a nop, if running. Let timer keep running, if set; it
* will nop when it sees the link is up
*/
void ipath_hol_up(struct ipath_devdata *dd)
{
ipath_hol_signal_up(dd);
dd->ipath_hol_state = IPATH_HOL_UP;
}
/*
* toggle the running/not running state of user proceses
* to prevent HoL blocking on chip resources, but still allow
* user processes to do link down special case handling.
* Should only be called via the timer
*/
void ipath_hol_event(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
&& dd->ipath_hol_state != IPATH_HOL_UP) {
dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
ipath_dbg("Stopping processes\n");
ipath_hol_signal_down(dd);
} else { /* may do "extra" if also in ipath_hol_up() */
dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
ipath_dbg("Continuing processes\n");
ipath_hol_signal_up(dd);
}
if (dd->ipath_hol_state == IPATH_HOL_UP)
ipath_dbg("link's up, don't resched timer\n");
else {
dd->ipath_hol_timer.expires = jiffies +
msecs_to_jiffies(ipath_hol_timeout_ms);
__mod_timer(&dd->ipath_hol_timer,
dd->ipath_hol_timer.expires);
}
}
int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
{
u64 val;
......
......@@ -908,6 +908,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_stats_timer_active = 1;
}
/* Set up HoL state */
init_timer(&dd->ipath_hol_timer);
dd->ipath_hol_timer.function = ipath_hol_event;
dd->ipath_hol_timer.data = (unsigned long)dd;
dd->ipath_hol_state = IPATH_HOL_UP;
done:
if (!ret) {
*dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
......
This diff is collapsed.
......@@ -427,6 +427,11 @@ struct ipath_devdata {
unsigned long ipath_ureg_align; /* user register alignment */
/* HoL blocking / user app forward-progress state */
unsigned ipath_hol_state;
unsigned ipath_hol_next;
struct timer_list ipath_hol_timer;
/*
* Shadow copies of registers; size indicates read access size.
* Most of them are readonly, but some are write-only register,
......@@ -706,6 +711,13 @@ struct ipath_devdata {
u16 ipath_jint_max_packets; /* max packets across all ports */
};
/* ipath_hol_state values (stopping/starting user proc, send flushing) */
#define IPATH_HOL_UP 0
#define IPATH_HOL_DOWN 1
/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
#define IPATH_HOL_DOWNSTOP 0
#define IPATH_HOL_DOWNCONT 1
/* Private data for file operations */
struct ipath_filedata {
struct ipath_portdata *pd;
......@@ -775,6 +787,9 @@ int ipath_set_lid(struct ipath_devdata *, u32, u8);
int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
void ipath_enable_armlaunch(struct ipath_devdata *);
void ipath_disable_armlaunch(struct ipath_devdata *);
void ipath_hol_down(struct ipath_devdata *);
void ipath_hol_up(struct ipath_devdata *);
void ipath_hol_event(unsigned long);
/* for use in system calls, where we want to know device type, etc. */
#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
......@@ -830,6 +845,7 @@ void ipath_disable_armlaunch(struct ipath_devdata *);
/* Suppress heartbeat, even if turning off loopback */
#define IPATH_NO_HRTBT 0x1000000
#define IPATH_HAS_MULT_IB_SPEED 0x8000000
#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
/* Bits in GPIO for the added interrupts */
#define IPATH_GPIO_PORT0_BIT 2
......@@ -1029,6 +1045,21 @@ static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
dd->ibcs_lts_mask;
}
/*
* from contents of IBCStatus (or a saved copy), return logical link state
* combination of link state and linktraining state (down, active, init,
* arm, etc.
*/
static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
{
u32 ibs;
ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
dd->ibcs_lts_mask;
ibs |= (u32)(ibcs &
(INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
return ibs;
}
/*
* sysfs interface.
*/
......
......@@ -200,7 +200,6 @@
#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
/* kr_ibcstatus bits */
#define INFINIPATH_IBCS_LINKTRAININGSTATE_MASK 0xF
#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
#define INFINIPATH_IBCS_LINKSTATE_SHIFT 4
......@@ -221,30 +220,13 @@
#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c
#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e
#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f
/* link state machine states (shift by INFINIPATH_IBCS_LINKSTATE_SHIFT) */
/* link state machine states (shift by ibcs_ls_shift) */
#define INFINIPATH_IBCS_L_STATE_DOWN 0x0
#define INFINIPATH_IBCS_L_STATE_INIT 0x1
#define INFINIPATH_IBCS_L_STATE_ARM 0x2
#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3
#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4
/* combination link status states that we use with some frequency */
#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
<< INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
(INFINIPATH_IBCS_LINKSTATE_MASK \
<<INFINIPATH_IBCS_LINKSTATE_SHIFT))
#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
#define IPATH_IBSTATE_ARM ((INFINIPATH_IBCS_L_STATE_ARM \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
#define IPATH_IBSTATE_ACTIVE ((INFINIPATH_IBCS_L_STATE_ACTIVE \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
/* kr_extstatus bits */
#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment