Commit 396e8e76 authored by Russ Anderson's avatar Russ Anderson Committed by Tony Luck

[IA64] Cache error recovery

Similar to memory error recovery, when a cache error is consumed
by a user process terminate the user instead of crashing the system.

Signed-off-by: Russ Anderson (rja@sgi.com)
Acked-by: default avatarHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent 618b206f
...@@ -602,6 +602,8 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, ...@@ -602,6 +602,8 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
default: default:
break; break;
} }
} else if (psp->cc && !psp->bc) { /* Cache error */
status = recover_from_read_error(slidx, peidx, pbci, sos);
} }
return status; return status;
...@@ -645,13 +647,6 @@ recover_from_tlb_check(peidx_table_t *peidx) ...@@ -645,13 +647,6 @@ recover_from_tlb_check(peidx_table_t *peidx)
* Return value: * Return value:
* 1 on Success / 0 on Failure * 1 on Success / 0 on Failure
*/ */
/*
* Later we try to recover when below all conditions are satisfied.
* 1. Only one processor error section is exist.
* 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK)
* 3. The entry of BUS_CHECK_INFO is 1.
* 4. "External bus error" flag is set and the others are not set.
*/
static int static int
recover_from_processor_error(int platform, slidx_table_t *slidx, recover_from_processor_error(int platform, slidx_table_t *slidx,
...@@ -687,36 +682,31 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, ...@@ -687,36 +682,31 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
/* /*
* The cache check and bus check bits have four possible states * The cache check and bus check bits have four possible states
* cc bc * cc bc
* 0 0 Weird record, not recovered
* 1 0 Cache error, not recovered
* 0 1 I/O error, attempt recovery
* 1 1 Memory error, attempt recovery * 1 1 Memory error, attempt recovery
* 1 0 Cache error, attempt recovery
* 0 1 I/O error, attempt recovery
* 0 0 Other error type, not recovered
*/ */
if (psp->bc == 0 || pbci == NULL) if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL))
return fatal_mca("No bus check"); return fatal_mca("No cache or bus check");
/* /*
* Sorry, we cannot handle so many. * Cannot handle more than one bus check.
*/ */
if (peidx_bus_check_num(peidx) > 1) if (peidx_bus_check_num(peidx) > 1)
return fatal_mca("Too many bus checks"); return fatal_mca("Too many bus checks");
/*
* Well, here is only one bus error.
*/
if (pbci->ib) if (pbci->ib)
return fatal_mca("Internal Bus error"); return fatal_mca("Internal Bus error");
if (pbci->cc)
return fatal_mca("Cache-cache error");
if (pbci->eb && pbci->bsi > 0) if (pbci->eb && pbci->bsi > 0)
return fatal_mca("External bus check fatal status"); return fatal_mca("External bus check fatal status");
/* /*
* This is a local MCA and estimated as recoverble external bus error. * This is a local MCA and estimated as a recoverble error.
* (e.g. a load from poisoned memory)
* This means "there are some platform errors".
*/ */
if (platform) if (platform)
return recover_from_platform_error(slidx, peidx, pbci, sos); return recover_from_platform_error(slidx, peidx, pbci, sos);
/* /*
* On account of strange SAL error record, we cannot recover. * On account of strange SAL error record, we cannot recover.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment