Commit c8df412e authored by Kurt Hackel's avatar Kurt Hackel Committed by Mark Fasheh

ocfs2: special case recovery lock in dlmlock_remote()

If the previous master of the recovery lock dies, let calc_usage take it
down completely and let the caller completely redo the dlmlock() call.
Otherwise, there will never be an opportunity to re-master the lockres and
recovery wont be able to progress.
Signed-off-by: default avatarKurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent 36407488
...@@ -227,7 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, ...@@ -227,7 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
res->state &= ~DLM_LOCK_RES_IN_PROGRESS; res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
lock->lock_pending = 0; lock->lock_pending = 0;
if (status != DLM_NORMAL) { if (status != DLM_NORMAL) {
if (status != DLM_NOTQUEUED) { if (status == DLM_RECOVERING &&
dlm_is_recovery_lock(res->lockname.name,
res->lockname.len)) {
/* recovery lock was mastered by dead node.
* we need to have calc_usage shoot down this
* lockres and completely remaster it. */
mlog(0, "%s: recovery lock was owned by "
"dead node %u, remaster it now.\n",
dlm->name, res->owner);
} else if (status != DLM_NOTQUEUED) {
/* /*
* DO NOT call calc_usage, as this would unhash * DO NOT call calc_usage, as this would unhash
* the remote lockres before we ever get to use * the remote lockres before we ever get to use
...@@ -691,19 +700,23 @@ retry_lock: ...@@ -691,19 +700,23 @@ retry_lock:
msleep(100); msleep(100);
/* no waiting for dlm_reco_thread */ /* no waiting for dlm_reco_thread */
if (recovery) { if (recovery) {
if (status == DLM_RECOVERING) { if (status != DLM_RECOVERING)
goto retry_lock;
mlog(0, "%s: got RECOVERING " mlog(0, "%s: got RECOVERING "
"for $REOCVERY lock, master " "for $RECOVERY lock, master "
"was %u\n", dlm->name, "was %u\n", dlm->name,
res->owner); res->owner);
/* wait to see the node go down, then
* drop down and allow the lockres to
* get cleaned up. need to remaster. */
dlm_wait_for_node_death(dlm, res->owner, dlm_wait_for_node_death(dlm, res->owner,
DLM_NODE_DEATH_WAIT_MAX); DLM_NODE_DEATH_WAIT_MAX);
}
} else { } else {
dlm_wait_for_recovery(dlm); dlm_wait_for_recovery(dlm);
}
goto retry_lock; goto retry_lock;
} }
}
if (status != DLM_NORMAL) { if (status != DLM_NORMAL) {
lock->lksb->flags &= ~DLM_LKSB_GET_LVB; lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
......
...@@ -2314,6 +2314,10 @@ again: ...@@ -2314,6 +2314,10 @@ again:
mlog(0, "%s: reco master %u is ready to recover %u\n", mlog(0, "%s: reco master %u is ready to recover %u\n",
dlm->name, dlm->reco.new_master, dlm->reco.dead_node); dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
status = -EEXIST; status = -EEXIST;
} else if (ret == DLM_RECOVERING) {
mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n",
dlm->name, dlm->node_num);
goto again;
} else { } else {
struct dlm_lock_resource *res; struct dlm_lock_resource *res;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment