Commit 370eaf38 authored by Thomas Gleixner's avatar Thomas Gleixner

futex: Revert "futex: Wake up waiter outside the hb->lock section"

This reverts commit 928686b7.

The patch was an optimization of the old futex wake code where we woke
the waiter and then set q->lock_ptr to NULL. When the waiter preempted
the waker then we run into lock contention on q->lock_ptr
aka. hb->lock.

commit f1a11e (futex: remove the wait queue) changes the wakeup logic
by setting q->lock_ptr to NULL _before_ waking the task. It keeps a
reference on the task struct of the to be woken task to avoid an exit
race.

The combination of both patches resulted in different race on -RT:

    A is blocked on futex
    B calls futex_wake
    B sets q(A)->lock_ptr to NULL and puts A on the wake list
    B is preempted
    ...
    A wakes up (e.g. timer, signal)
    A detects q->lock_ptr = NULL and returns
    A waits on a different futex

    B is scheduled back in
    B wakes A
    A sees a spurious wake up
Reported-by: default avatarBlaise Gassend <blaise@willowgarage.com>
Debugged-by: default avatarDarren Hart <dvhltc@us.ibm.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>

 enter the commit message for your changes. Lines starting
parent a03d1035
......@@ -1492,7 +1492,6 @@ struct task_struct {
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
struct task_struct *futex_wakeup;
#endif
#ifdef CONFIG_PERF_COUNTERS
struct perf_counter_context *perf_counter_ctxp;
......
......@@ -1190,7 +1190,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#endif
INIT_LIST_HEAD(&p->pi_state_list);
p->pi_state_cache = NULL;
p->futex_wakeup = NULL;
#endif
/*
* sigaltstack should be cleared when sharing the same VM
......
......@@ -713,7 +713,7 @@ retry:
* The hash bucket lock must be held when this is called.
* Afterwards, the futex_q must not be accessed.
*/
static void wake_futex(struct task_struct **wake_list, struct futex_q *q)
static void wake_futex(struct futex_q *q)
{
struct task_struct *p = q->task;
......@@ -736,51 +736,8 @@ static void wake_futex(struct task_struct **wake_list, struct futex_q *q)
smp_wmb();
q->lock_ptr = NULL;
/*
* Atomically grab the task, if ->futex_wakeup is !0 already it means
* its already queued (either by us or someone else) and will get the
* wakeup due to that.
*
* This cmpxchg() implies a full barrier, which pairs with the write
* barrier implied by the wakeup in wake_futex_list().
*/
if (cmpxchg(&p->futex_wakeup, 0, p) != 0) {
/*
* It was already queued, drop the extra ref and we're done.
*/
put_task_struct(p);
return;
}
/*
* Put the task on our wakeup list by atomically switching it with
* the list head. (XXX its a local list, no possible concurrency,
* this could be written without cmpxchg).
*/
do {
p->futex_wakeup = *wake_list;
} while (cmpxchg(wake_list, p->futex_wakeup, p) != p->futex_wakeup);
}
/*
* For each task on the list, deliver the pending wakeup and release the
* task reference obtained in wake_futex().
*/
static void wake_futex_list(struct task_struct *head)
{
while (head != &init_task) {
struct task_struct *next = head->futex_wakeup;
head->futex_wakeup = NULL;
/*
* wake_up_state() implies a wmb() to pair with the queueing
* in wake_futex() so as to not miss wakeups.
*/
wake_up_state(head, TASK_NORMAL);
put_task_struct(head);
head = next;
}
wake_up_state(p, TASK_NORMAL);
put_task_struct(p);
}
static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
......@@ -894,7 +851,6 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
struct futex_q *this, *next;
struct plist_head *head;
union futex_key key = FUTEX_KEY_INIT;
struct task_struct *wake_list = &init_task;
int ret;
if (!bitset)
......@@ -919,7 +875,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
if (!(this->bitset & bitset))
continue;
wake_futex(&wake_list, this);
wake_futex(this);
if (++ret >= nr_wake)
break;
}
......@@ -927,8 +883,6 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
spin_unlock(&hb->lock);
put_futex_key(fshared, &key);
wake_futex_list(wake_list);
out:
return ret;
}
......@@ -945,7 +899,6 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
struct futex_hash_bucket *hb1, *hb2;
struct plist_head *head;
struct futex_q *this, *next;
struct task_struct *wake_list = &init_task;
int ret, op_ret;
retry:
......@@ -996,7 +949,7 @@ retry_private:
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key1)) {
wake_futex(&wake_list, this);
wake_futex(this);
if (++ret >= nr_wake)
break;
}
......@@ -1008,7 +961,7 @@ retry_private:
op_ret = 0;
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key2)) {
wake_futex(&wake_list, this);
wake_futex(this);
if (++op_ret >= nr_wake2)
break;
}
......@@ -1021,8 +974,6 @@ out_put_keys:
put_futex_key(fshared, &key2);
out_put_key1:
put_futex_key(fshared, &key1);
wake_futex_list(wake_list);
out:
return ret;
}
......@@ -1177,7 +1128,6 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
struct futex_hash_bucket *hb1, *hb2;
struct plist_head *head1;
struct futex_q *this, *next;
struct task_struct *wake_list = &init_task;
u32 curval2;
if (requeue_pi) {
......@@ -1322,7 +1272,7 @@ retry_private:
* woken by futex_unlock_pi().
*/
if (++task_count <= nr_wake && !requeue_pi) {
wake_futex(&wake_list, this);
wake_futex(this);
continue;
}
......@@ -1368,8 +1318,6 @@ out_put_keys:
put_futex_key(fshared, &key2);
out_put_key1:
put_futex_key(fshared, &key1);
wake_futex_list(wake_list);
out:
if (pi_state != NULL)
free_pi_state(pi_state);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment