Commit ef0c2bb0 authored by David Teigland's avatar David Teigland Committed by Steven Whitehouse

[DLM] overlapping cancel and unlock

Full cancel and force-unlock support.  In the past, cancel and force-unlock
wouldn't work if there was another operation in progress on the lock.  Now,
both cancel and unlock-force can overlap an operation on a lock, meaning there
may be 2 or 3 operations in progress on a lock in parallel.  This support is
important not only because cancel and force-unlock are explicit operations
that an app can use, but both are used implicitly when a process exits while
holding locks.

Summary of changes:

- add-to and remove-from waiters functions were rewritten to handle situations
  with more than one remote operation outstanding on a lock

- validate_unlock_args detects when an overlapping cancel/unlock-force
  can be sent and when it needs to be delayed until a request/lookup
  reply is received

- processing request/lookup replies detects when cancel/unlock-force
  occured during the op, and carries out the delayed cancel/unlock-force

- manipulation of the "waiters" (remote operation) state of a lock moved under
  the standard rsb mutex that protects all the other lock state

- the two recovery routines related to locks on the waiters list changed
  according to the way lkb's are now locked before accessing waiters state

- waiters recovery detects when lkb's being recovered have overlapping
  cancel/unlock-force, and may not recover such locks

- revert_lock (cancel) returns a value to distinguish cases where it did
  nothing vs cases where it actually did a cancel; the cancel completion ast
  should only be done when cancel did something

- orphaned locks put on new list so they can be found later for purging

- cancel must be called on a lock when making it an orphan

- flag user locks (ENDOFLIFE) at the end of their useful life (to the
  application) so we can return an error for any further cancel/unlock-force

- we weren't setting COMP/BAST ast flags if one was already set, so we'd lose
  either a completion or blocking ast

- clear an unread bast on a lock that's become unlocked
Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
Signed-off-by: default avatarSteven Whitehouse <swhiteho@redhat.com>
parent 03206727
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
******************************************************************************* *******************************************************************************
** **
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
** **
** This copyrighted material is made available to anyone wishing to use, ** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions ** modify, copy, or redistribute it subject to the terms and conditions
...@@ -210,6 +210,9 @@ struct dlm_args { ...@@ -210,6 +210,9 @@ struct dlm_args {
#define DLM_IFL_MSTCPY 0x00010000 #define DLM_IFL_MSTCPY 0x00010000
#define DLM_IFL_RESEND 0x00020000 #define DLM_IFL_RESEND 0x00020000
#define DLM_IFL_DEAD 0x00040000 #define DLM_IFL_DEAD 0x00040000
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
#define DLM_IFL_USER 0x00000001 #define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002 #define DLM_IFL_ORPHAN 0x00000002
...@@ -230,8 +233,8 @@ struct dlm_lkb { ...@@ -230,8 +233,8 @@ struct dlm_lkb {
int8_t lkb_grmode; /* granted lock mode */ int8_t lkb_grmode; /* granted lock mode */
int8_t lkb_bastmode; /* requested mode */ int8_t lkb_bastmode; /* requested mode */
int8_t lkb_highbast; /* highest mode bast sent for */ int8_t lkb_highbast; /* highest mode bast sent for */
int8_t lkb_wait_type; /* type of reply waiting for */ int8_t lkb_wait_type; /* type of reply waiting for */
int8_t lkb_wait_count;
int8_t lkb_ast_type; /* type of ast queued for */ int8_t lkb_ast_type; /* type of ast queued for */
struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
...@@ -440,6 +443,9 @@ struct dlm_ls { ...@@ -440,6 +443,9 @@ struct dlm_ls {
struct mutex ls_waiters_mutex; struct mutex ls_waiters_mutex;
struct list_head ls_waiters; /* lkbs needing a reply */ struct list_head ls_waiters; /* lkbs needing a reply */
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes; /* current nodes in ls */
struct list_head ls_nodes_gone; /* dead node list, recovery */ struct list_head ls_nodes_gone; /* dead node list, recovery */
int ls_num_nodes; /* number of nodes in ls */ int ls_num_nodes; /* number of nodes in ls */
......
This diff is collapsed.
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
******************************************************************************* *******************************************************************************
** **
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
** **
** This copyrighted material is made available to anyone wishing to use, ** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions ** modify, copy, or redistribute it subject to the terms and conditions
...@@ -459,6 +459,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ...@@ -459,6 +459,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
INIT_LIST_HEAD(&ls->ls_waiters); INIT_LIST_HEAD(&ls->ls_waiters);
mutex_init(&ls->ls_waiters_mutex); mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_orphans);
mutex_init(&ls->ls_orphans_mutex);
INIT_LIST_HEAD(&ls->ls_nodes); INIT_LIST_HEAD(&ls->ls_nodes);
INIT_LIST_HEAD(&ls->ls_nodes_gone); INIT_LIST_HEAD(&ls->ls_nodes_gone);
......
/* /*
* Copyright (C) 2006 Red Hat, Inc. All rights reserved. * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
* *
* This copyrighted material is made available to anyone wishing to use, * This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions * modify, copy, or redistribute it subject to the terms and conditions
...@@ -128,35 +128,30 @@ static void compat_output(struct dlm_lock_result *res, ...@@ -128,35 +128,30 @@ static void compat_output(struct dlm_lock_result *res,
} }
#endif #endif
/* we could possibly check if the cancel of an orphan has resulted in the lkb
being removed and then remove that lkb from the orphans list and free it */
void dlm_user_add_ast(struct dlm_lkb *lkb, int type) void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
{ {
struct dlm_ls *ls; struct dlm_ls *ls;
struct dlm_user_args *ua; struct dlm_user_args *ua;
struct dlm_user_proc *proc; struct dlm_user_proc *proc;
int remove_ownqueue = 0; int eol = 0, ast_type;
/* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
lkb before dealing with it. We need to check this
flag before taking ls_clear_proc_locks mutex because if
it's set, dlm_clear_proc_locks() holds the mutex. */
if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) {
/* log_print("user_add_ast skip1 %x", lkb->lkb_flags); */
return; return;
}
ls = lkb->lkb_resource->res_ls; ls = lkb->lkb_resource->res_ls;
mutex_lock(&ls->ls_clear_proc_locks); mutex_lock(&ls->ls_clear_proc_locks);
/* If ORPHAN/DEAD flag is set, it means the process is dead so an ast /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
lkb->ua so we can't try to use it. */ lkb->ua so we can't try to use it. This second check is necessary
for cases where a completion ast is received for an operation that
began before clear_proc_locks did its cancel/unlock. */
if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
/* log_print("user_add_ast skip2 %x", lkb->lkb_flags); */
goto out; goto out;
}
DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb););
ua = (struct dlm_user_args *)lkb->lkb_astparam; ua = (struct dlm_user_args *)lkb->lkb_astparam;
...@@ -166,28 +161,42 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) ...@@ -166,28 +161,42 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
goto out; goto out;
spin_lock(&proc->asts_spin); spin_lock(&proc->asts_spin);
if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) {
ast_type = lkb->lkb_ast_type;
lkb->lkb_ast_type |= type;
if (!ast_type) {
kref_get(&lkb->lkb_ref); kref_get(&lkb->lkb_ref);
list_add_tail(&lkb->lkb_astqueue, &proc->asts); list_add_tail(&lkb->lkb_astqueue, &proc->asts);
lkb->lkb_ast_type |= type;
wake_up_interruptible(&proc->wait); wake_up_interruptible(&proc->wait);
} }
if (type == AST_COMP && (ast_type & AST_COMP))
/* noqueue requests that fail may need to be removed from the log_debug(ls, "ast overlap %x status %x %x",
proc's locks list, there should be a better way of detecting lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
this situation than checking all these things... */
/* Figure out if this lock is at the end of its life and no longer
if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV && available for the application to use. The lkb still exists until
ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) the final ast is read. A lock becomes EOL in three situations:
remove_ownqueue = 1; 1. a noqueue request fails with EAGAIN
2. an unlock completes with EUNLOCK
/* unlocks or cancels of waiting requests need to be removed from the 3. a cancel of a waiting request completes with ECANCEL
proc's unlocking list, again there must be a better way... */ An EOL lock needs to be removed from the process's list of locks.
And we can't allow any new operation on an EOL lock. This is
if (ua->lksb.sb_status == -DLM_EUNLOCK || not related to the lifetime of the lkb struct which is managed
entirely by refcount. */
if (type == AST_COMP &&
lkb->lkb_grmode == DLM_LOCK_IV &&
ua->lksb.sb_status == -EAGAIN)
eol = 1;
else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
(ua->lksb.sb_status == -DLM_ECANCEL && (ua->lksb.sb_status == -DLM_ECANCEL &&
lkb->lkb_grmode == DLM_LOCK_IV)) lkb->lkb_grmode == DLM_LOCK_IV))
remove_ownqueue = 1; eol = 1;
if (eol) {
lkb->lkb_ast_type &= ~AST_BAST;
lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
}
/* We want to copy the lvb to userspace when the completion /* We want to copy the lvb to userspace when the completion
ast is read if the status is 0, the lock has an lvb and ast is read if the status is 0, the lock has an lvb and
...@@ -204,12 +213,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) ...@@ -204,12 +213,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
spin_unlock(&proc->asts_spin); spin_unlock(&proc->asts_spin);
if (remove_ownqueue) { if (eol) {
spin_lock(&ua->proc->locks_spin); spin_lock(&ua->proc->locks_spin);
if (!list_empty(&lkb->lkb_ownqueue)) {
list_del_init(&lkb->lkb_ownqueue); list_del_init(&lkb->lkb_ownqueue);
spin_unlock(&ua->proc->locks_spin);
dlm_put_lkb(lkb); dlm_put_lkb(lkb);
} }
spin_unlock(&ua->proc->locks_spin);
}
out: out:
mutex_unlock(&ls->ls_clear_proc_locks); mutex_unlock(&ls->ls_clear_proc_locks);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment