Commit 8b0e7b2c authored by David Teigland's avatar David Teigland Committed by Steven Whitehouse

[DLM] wait for config check during join [6/6]

Joining the lockspace should wait for the initial round of inter-node
config checks to complete before returning.  This way, if there's a
configuration mismatch between the joining node and the existing nodes,
the join can fail and return an error to the application.
Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
Signed-off-by: default avatarSteven Whitehouse <swhiteho@redhat.com>
parent 79d72b54
...@@ -472,6 +472,8 @@ struct dlm_ls { ...@@ -472,6 +472,8 @@ struct dlm_ls {
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result; int ls_uevent_result;
struct completion ls_members_done;
int ls_members_result;
struct miscdevice ls_device; struct miscdevice ls_device;
......
...@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in) ...@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
else else
kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
/* dlm_controld will see the uevent, do the necessary group management
and then write to sysfs to wake us */
error = wait_event_interruptible(ls->ls_uevent_wait, error = wait_event_interruptible(ls->ls_uevent_wait,
test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
if (error) if (error)
goto out; goto out;
error = ls->ls_uevent_result; error = ls->ls_uevent_result;
out: out:
if (error)
log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
error, ls->ls_uevent_result);
return error; return error;
} }
...@@ -490,6 +501,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ...@@ -490,6 +501,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
init_waitqueue_head(&ls->ls_uevent_wait); init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0; ls->ls_uevent_result = 0;
init_completion(&ls->ls_members_done);
ls->ls_members_result = -1;
ls->ls_recoverd_task = NULL; ls->ls_recoverd_task = NULL;
mutex_init(&ls->ls_recoverd_active); mutex_init(&ls->ls_recoverd_active);
...@@ -540,10 +553,21 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ...@@ -540,10 +553,21 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
/* let kobject handle freeing of ls if there's an error */ /* let kobject handle freeing of ls if there's an error */
do_unreg = 1; do_unreg = 1;
/* This uevent triggers dlm_controld in userspace to add us to the
group of nodes that are members of this lockspace (managed by the
cluster infrastructure.) Once it's done that, it tells us who the
current lockspace members are (via configfs) and then tells the
lockspace to start running (via sysfs) in dlm_ls_start(). */
error = do_uevent(ls, 1); error = do_uevent(ls, 1);
if (error) if (error)
goto out_stop; goto out_stop;
wait_for_completion(&ls->ls_members_done);
error = ls->ls_members_result;
if (error)
goto out_members;
dlm_create_debug_file(ls); dlm_create_debug_file(ls);
log_debug(ls, "join complete"); log_debug(ls, "join complete");
...@@ -551,6 +575,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ...@@ -551,6 +575,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
*lockspace = ls; *lockspace = ls;
return 0; return 0;
out_members:
do_uevent(ls, 0);
dlm_clear_members(ls);
kfree(ls->ls_node_array);
out_stop: out_stop:
dlm_recoverd_stop(ls); dlm_recoverd_stop(ls);
out_delist: out_delist:
...@@ -588,6 +616,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, ...@@ -588,6 +616,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
error = new_lockspace(name, namelen, lockspace, flags, lvblen); error = new_lockspace(name, namelen, lockspace, flags, lvblen);
if (!error) if (!error)
ls_count++; ls_count++;
else if (!ls_count)
threads_stop();
out: out:
mutex_unlock(&ls_lock); mutex_unlock(&ls_lock);
return error; return error;
......
...@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) ...@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
*neg_out = neg; *neg_out = neg;
error = ping_members(ls); error = ping_members(ls);
if (!error || error == -EPROTO) {
/* new_lockspace() may be waiting to know if the config
is good or bad */
ls->ls_members_result = error;
complete(&ls->ls_members_done);
}
if (error) if (error)
goto out; goto out;
......
...@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) ...@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
log_error(ls, "version mismatch: %x nodeid %d: %x", log_error(ls, "version mismatch: %x nodeid %d: %x",
DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
rc->rc_header.h_version); rc->rc_header.h_version);
return -EINVAL; return -EPROTO;
} }
if (rf->rf_lvblen != ls->ls_lvblen || if (rf->rf_lvblen != ls->ls_lvblen ||
...@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) ...@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
ls->ls_lvblen, ls->ls_exflags, ls->ls_lvblen, ls->ls_exflags,
nodeid, rf->rf_lvblen, rf->rf_lsflags); nodeid, rf->rf_lvblen, rf->rf_lsflags);
return -EINVAL; return -EPROTO;
} }
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment