Commit a9ee4c8a authored by Kurt Hackel's avatar Kurt Hackel Committed by Mark Fasheh

ocfs2: better error handling during assert master message

handle errors during lock assert master by either killing self or other node
Signed-off-by: default avatarKurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent a7f90d83
...@@ -1633,6 +1633,8 @@ again: ...@@ -1633,6 +1633,8 @@ again:
dlm_node_iter_init(nodemap, &iter); dlm_node_iter_init(nodemap, &iter);
while ((to = dlm_node_iter_next(&iter)) >= 0) { while ((to = dlm_node_iter_next(&iter)) >= 0) {
int r = 0; int r = 0;
struct dlm_master_list_entry *mle = NULL;
mlog(0, "sending assert master to %d (%.*s)\n", to, mlog(0, "sending assert master to %d (%.*s)\n", to,
namelen, lockname); namelen, lockname);
memset(&assert, 0, sizeof(assert)); memset(&assert, 0, sizeof(assert));
...@@ -1657,7 +1659,15 @@ again: ...@@ -1657,7 +1659,15 @@ again:
/* ok, something horribly messed. kill thyself. */ /* ok, something horribly messed. kill thyself. */
mlog(ML_ERROR,"during assert master of %.*s to %u, " mlog(ML_ERROR,"during assert master of %.*s to %u, "
"got %d.\n", namelen, lockname, to, r); "got %d.\n", namelen, lockname, to, r);
dlm_dump_lock_resources(dlm); spin_lock(&dlm->spinlock);
spin_lock(&dlm->master_lock);
if (dlm_find_mle(dlm, &mle, (char *)lockname,
namelen)) {
dlm_print_one_mle(mle);
__dlm_put_mle(mle);
}
spin_unlock(&dlm->master_lock);
spin_unlock(&dlm->spinlock);
BUG(); BUG();
} else if (r == EAGAIN) { } else if (r == EAGAIN) {
mlog(0, "%.*s: node %u create mles on other " mlog(0, "%.*s: node %u create mles on other "
...@@ -1922,12 +1932,12 @@ done: ...@@ -1922,12 +1932,12 @@ done:
kill: kill:
/* kill the caller! */ /* kill the caller! */
mlog(ML_ERROR, "Bad message received from another node. Dumping state "
"and killing the other node now! This node is OK and can continue.\n");
__dlm_print_one_lock_resource(res);
spin_unlock(&res->spinlock); spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock); spin_unlock(&dlm->spinlock);
dlm_lockres_put(res); dlm_lockres_put(res);
mlog(ML_ERROR, "Bad message received from another node. Dumping state "
"and killing the other node now! This node is OK and can continue.\n");
dlm_dump_lock_resources(dlm);
dlm_put(dlm); dlm_put(dlm);
return -EINVAL; return -EINVAL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment