Commit 78d1e02f authored by Dave Olson's avatar Dave Olson Committed by Roland Dreier

IB/ipath: Workaround problem of errormask register being overwritten

On some system hardware, we are seeing moderately common cases of the
chip errormask register being overwritten due to a chip bug in iba6120
that is triggered by a vendor-specific PCIe broadcast message.  This
patch merely checks periodically, and corrects it if needed (the
overwrite can cause us to not get error and hardware error
interrupts).  Also, make dd->ipath_errormask the one, true canonical
source for kr_errormask, and remove references to ipath_ignorederrs as
it is currently unused.
Signed-off-by: default avatarDave Olson <dave.olson@qlogic.com>
Signed-off-by: default avatarJohn Gregor <john.gregor@qlogic.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 3810f2a8
...@@ -851,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ...@@ -851,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask); dd->ipath_hwerrmask);
dd->ipath_maskederrs = dd->ipath_ignorederrs;
/* clear all */ /* clear all */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
/* enable errors that are masked, at least this first time. */ /* enable errors that are masked, at least this first time. */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs); ~dd->ipath_maskederrs);
/* clear any interrups up to this point (ints still not enabled) */ dd->ipath_errormask = ipath_read_kreg64(dd,
dd->ipath_kregs->kr_errormask);
/* clear any interrupts up to this point (ints still not enabled) */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
/* /*
......
...@@ -517,10 +517,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) ...@@ -517,10 +517,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
/* /* don't report errors that are masked */
* don't report errors that are masked (includes those always
* ignored)
*/
errs &= ~dd->ipath_maskederrs; errs &= ~dd->ipath_maskederrs;
/* do these first, they are most important */ /* do these first, they are most important */
...@@ -566,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) ...@@ -566,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* ones on this particular interrupt, which also isn't great * ones on this particular interrupt, which also isn't great
*/ */
dd->ipath_maskederrs |= dd->ipath_lasterror | errs; dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
dd->ipath_errormask &= ~dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs); dd->ipath_errormask);
s_iserr = ipath_decode_err(msg, sizeof msg, s_iserr = ipath_decode_err(msg, sizeof msg,
(dd->ipath_maskederrs & ~dd-> dd->ipath_maskederrs);
ipath_ignorederrs));
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & if (dd->ipath_maskederrs &
~(INFINIPATH_E_RRCVEGRFULL | ~(INFINIPATH_E_RRCVEGRFULL |
INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
ipath_dev_err(dd, "Temporarily disabling " ipath_dev_err(dd, "Temporarily disabling "
"error(s) %llx reporting; too frequent (%s)\n", "error(s) %llx reporting; too frequent (%s)\n",
(unsigned long long) (dd->ipath_maskederrs & (unsigned long long)dd->ipath_maskederrs,
~dd->ipath_ignorederrs), msg); msg);
else { else {
/* /*
* rcvegrfull and rcvhdrqfull are "normal", * rcvegrfull and rcvhdrqfull are "normal",
...@@ -793,6 +790,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd) ...@@ -793,6 +790,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
/* disable error interrupts, to avoid confusion */ /* disable error interrupts, to avoid confusion */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
/* also disable interrupts; errormask is sometimes overwriten */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
/* /*
* clear all sends, because they have may been * clear all sends, because they have may been
* completed by usercode while in freeze mode, and * completed by usercode while in freeze mode, and
...@@ -817,7 +817,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd) ...@@ -817,7 +817,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
for (i = 0; i < dd->ipath_pioavregs; i++) { for (i = 0; i < dd->ipath_pioavregs; i++) {
/* deal with 6110 chip bug */ /* deal with 6110 chip bug */
im = i > 3 ? ((i&1) ? i-1 : i+1) : i; im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64))); val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im);
dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
= le64_to_cpu(val); = le64_to_cpu(val);
} }
...@@ -832,7 +832,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd) ...@@ -832,7 +832,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
E_SPKT_ERRS_IGNORE); E_SPKT_ERRS_IGNORE);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs); dd->ipath_errormask);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
} }
......
...@@ -261,18 +261,10 @@ struct ipath_devdata { ...@@ -261,18 +261,10 @@ struct ipath_devdata {
* limiting of hwerror reporting * limiting of hwerror reporting
*/ */
ipath_err_t ipath_lasthwerror; ipath_err_t ipath_lasthwerror;
/* /* errors masked because they occur too fast */
* errors masked because they occur too fast, also includes errors
* that are always ignored (ipath_ignorederrs)
*/
ipath_err_t ipath_maskederrs; ipath_err_t ipath_maskederrs;
/* time in jiffies at which to re-enable maskederrs */ /* time in jiffies at which to re-enable maskederrs */
unsigned long ipath_unmasktime; unsigned long ipath_unmasktime;
/*
* errors always ignored (masked), at least for a given
* chip/device, because they are wrong or not useful
*/
ipath_err_t ipath_ignorederrs;
/* count of egrfull errors, combined for all ports */ /* count of egrfull errors, combined for all ports */
u64 ipath_last_tidfull; u64 ipath_last_tidfull;
/* for ipath_qcheck() */ /* for ipath_qcheck() */
...@@ -436,6 +428,7 @@ struct ipath_devdata { ...@@ -436,6 +428,7 @@ struct ipath_devdata {
u64 ipath_lastibcstat; u64 ipath_lastibcstat;
/* hwerrmask shadow */ /* hwerrmask shadow */
ipath_err_t ipath_hwerrmask; ipath_err_t ipath_hwerrmask;
ipath_err_t ipath_errormask; /* errormask shadow */
/* interrupt config reg shadow */ /* interrupt config reg shadow */
u64 ipath_intconfig; u64 ipath_intconfig;
/* kr_sendpiobufbase value */ /* kr_sendpiobufbase value */
......
...@@ -196,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd) ...@@ -196,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd)
} }
} }
static void ipath_chk_errormask(struct ipath_devdata *dd)
{
static u32 fixed;
u32 ctrl;
unsigned long errormask;
unsigned long hwerrs;
if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
return;
errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
if (errormask == dd->ipath_errormask)
return;
fixed++;
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
dd->ipath_errormask);
if ((hwerrs & dd->ipath_hwerrmask) ||
(ctrl & INFINIPATH_C_FREEZEMODE)) {
/* force re-interrupt of pending events, just in case */
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
dev_info(&dd->pcidev->dev,
"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
fixed, errormask, (unsigned long)dd->ipath_errormask,
ctrl, hwerrs);
} else
ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
fixed, errormask,
(unsigned long)dd->ipath_errormask);
}
/** /**
* ipath_get_faststats - get word counters from chip before they overflow * ipath_get_faststats - get word counters from chip before they overflow
* @opaque - contains a pointer to the infinipath device ipath_devdata * @opaque - contains a pointer to the infinipath device ipath_devdata
...@@ -251,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque) ...@@ -251,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque)
dd->ipath_lasterror = 0; dd->ipath_lasterror = 0;
if (dd->ipath_lasthwerror) if (dd->ipath_lasthwerror)
dd->ipath_lasthwerror = 0; dd->ipath_lasthwerror = 0;
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) if (dd->ipath_maskederrs
&& time_after(jiffies, dd->ipath_unmasktime)) { && time_after(jiffies, dd->ipath_unmasktime)) {
char ebuf[256]; char ebuf[256];
int iserr; int iserr;
iserr = ipath_decode_err(ebuf, sizeof ebuf, iserr = ipath_decode_err(ebuf, sizeof ebuf,
(dd->ipath_maskederrs & ~dd-> dd->ipath_maskederrs);
ipath_ignorederrs)); if (dd->ipath_maskederrs &
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
INFINIPATH_E_PKTERRS )) INFINIPATH_E_PKTERRS ))
ipath_dev_err(dd, "Re-enabling masked errors " ipath_dev_err(dd, "Re-enabling masked errors "
...@@ -278,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque) ...@@ -278,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque)
ipath_cdbg(ERRPKT, "Re-enabling packet" ipath_cdbg(ERRPKT, "Re-enabling packet"
" problem interrupt (%s)\n", ebuf); " problem interrupt (%s)\n", ebuf);
} }
dd->ipath_maskederrs = dd->ipath_ignorederrs;
/* re-enable masked errors */
dd->ipath_errormask |= dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs); dd->ipath_errormask);
dd->ipath_maskederrs = 0;
} }
/* limit qfull messages to ~one per minute per port */ /* limit qfull messages to ~one per minute per port */
...@@ -294,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque) ...@@ -294,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque)
} }
} }
ipath_chk_errormask(dd);
done: done:
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment