Commit 487ac708 authored by Thomas Gleixner's avatar Thomas Gleixner

rtmutex: prevent missed wakeups

The sleeping locks implementation based on rtmutexes can miss wakeups
for two reasons:

1) The unconditional usage TASK_UNINTERRUPTIBLE for the blocking state

   Results in missed wakeups from wake_up_interruptible*()

   state = TASK_INTERRUPTIBLE;
   blocks_on_lock()
     state = TASK_UNINTERRUPTIBLE;
     schedule();
     ....
     acquires_lock();
     restore_state();

   Until the waiter has restored its state wake_up_interruptible*() will
   fail.

2) The rtmutex wakeup intermediate state TASK_RUNNING_MUTEX

   Results in missed wakeups from wake_up*()

   waiter is woken by mutex wakeup
   	  waiter->state = TASK_RUNNING_MUTEX;
   ....
   acquires_lock();
   restore_state();

   Until the waiter has restored its state wake_up*() will fail.

Solution:

Instead of setting the state to TASK_RUNNING_MUTEX in the mutex wakeup
case we logically OR TASK_RUNNING_MUTEX to the current waiter
state. This keeps the original bits (TASK_INTERRUPTIBLE /
TASK_UNINTERRUPTIBLE) intact and lets wakeups succeed. When a task
blocks on a lock in state TASK_INTERRUPTIBLE and is woken up by a real
wakeup, then we store the state = TASK_RUNNING for the restore and can
safely use TASK_UNINTERRUPTIBLE from that point to avoid further
wakeups which just let us loop in the lock code.

This also removes the extra TASK_RUNNING_MUTEX flags from the
wakeup_process*() functions as they are not longer necessary.
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 640f0c05
...@@ -204,8 +204,7 @@ extern struct semaphore kernel_sem; ...@@ -204,8 +204,7 @@ extern struct semaphore kernel_sem;
/* Convenience macros for the sake of wake_up */ /* Convenience macros for the sake of wake_up */
#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED | \ #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
TASK_RUNNING_MUTEX)
/* get_task_state() */ /* get_task_state() */
#define TASK_REPORT (TASK_RUNNING | TASK_RUNNING_MUTEX | \ #define TASK_REPORT (TASK_RUNNING | TASK_RUNNING_MUTEX | \
......
...@@ -729,16 +729,32 @@ static int adaptive_wait(struct rt_mutex_waiter *waiter, ...@@ -729,16 +729,32 @@ static int adaptive_wait(struct rt_mutex_waiter *waiter,
/* /*
* The state setting needs to preserve the original state and needs to * The state setting needs to preserve the original state and needs to
* take care of non rtmutex wakeups. * take care of non rtmutex wakeups.
*
* Called with rtmutex->wait_lock held to serialize against rtmutex
* wakeups().
*/ */
static inline unsigned long static inline unsigned long
rt_set_current_blocked_state(unsigned long saved_state) rt_set_current_blocked_state(unsigned long saved_state)
{ {
unsigned long state; unsigned long state, block_state;
/*
* If state is TASK_INTERRUPTIBLE, then we set the state for
* blocking to TASK_INTERRUPTIBLE as well, otherwise we would
* miss real wakeups via wake_up_interruptible(). If such a
* wakeup happens we see the running state and preserve it in
* saved_state. Now we can ignore further wakeups as we will
* return in state running from our "spin" sleep.
*/
if (saved_state == TASK_INTERRUPTIBLE)
block_state = TASK_INTERRUPTIBLE;
else
block_state = TASK_UNINTERRUPTIBLE;
state = xchg(&current->state, TASK_UNINTERRUPTIBLE); state = xchg(&current->state, block_state);
/* /*
* Take care of non rtmutex wakeups. rtmutex wakeups * Take care of non rtmutex wakeups. rtmutex wakeups
* set the state to TASK_RUNNING_MUTEX. * or TASK_RUNNING_MUTEX to (UN)INTERRUPTIBLE.
*/ */
if (state == TASK_RUNNING) if (state == TASK_RUNNING)
saved_state = TASK_RUNNING; saved_state = TASK_RUNNING;
......
...@@ -2530,8 +2530,16 @@ out_running: ...@@ -2530,8 +2530,16 @@ out_running:
trace_sched_wakeup(rq, p, success); trace_sched_wakeup(rq, p, success);
check_preempt_curr(rq, p, sync); check_preempt_curr(rq, p, sync);
/*
* For a mutex wakeup we or TASK_RUNNING_MUTEX to the task
* state to preserve the original state, so a real wakeup
* still can see the (UN)INTERRUPTIBLE bits in the state check
* above. We dont have to worry about the | TASK_RUNNING_MUTEX
* here. The waiter is serialized by the mutex lock and nobody
* else can fiddle with p->state as we hold rq lock.
*/
if (mutex) if (mutex)
p->state = TASK_RUNNING_MUTEX; p->state |= TASK_RUNNING_MUTEX;
else else
p->state = TASK_RUNNING; p->state = TASK_RUNNING;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -2581,7 +2589,7 @@ EXPORT_SYMBOL(wake_up_process_mutex_sync); ...@@ -2581,7 +2589,7 @@ EXPORT_SYMBOL(wake_up_process_mutex_sync);
int wake_up_state(struct task_struct *p, unsigned int state) int wake_up_state(struct task_struct *p, unsigned int state)
{ {
return try_to_wake_up(p, state | TASK_RUNNING_MUTEX, 0, 0); return try_to_wake_up(p, state, 0, 0);
} }
/* /*
...@@ -5385,7 +5393,7 @@ need_resched_nonpreemptible: ...@@ -5385,7 +5393,7 @@ need_resched_nonpreemptible:
update_rq_clock(rq); update_rq_clock(rq);
clear_tsk_need_resched(prev); clear_tsk_need_resched(prev);
if ((prev->state & ~TASK_RUNNING_MUTEX) && if (!(prev->state & TASK_RUNNING_MUTEX) && prev->state &&
!(preempt_count() & PREEMPT_ACTIVE)) { !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev))) if (unlikely(signal_pending_state(prev->state, prev)))
prev->state = TASK_RUNNING; prev->state = TASK_RUNNING;
...@@ -5585,8 +5593,7 @@ asmlinkage void __sched preempt_schedule_irq(void) ...@@ -5585,8 +5593,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
void *key) void *key)
{ {
return try_to_wake_up(curr->private, mode | TASK_RUNNING_MUTEX, return try_to_wake_up(curr->private, mode, sync, 0);
sync, 0);
} }
EXPORT_SYMBOL(default_wake_function); EXPORT_SYMBOL(default_wake_function);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment