Commit f65eda4f authored by Steven Rostedt's avatar Steven Rostedt Committed by Ingo Molnar

sched: pull RT tasks from overloaded runqueues

This patch adds the algorithm to pull tasks from RT overloaded runqueues.

When a pull RT is initiated, all overloaded runqueues are examined for
a RT task that is higher in prio than the highest prio task queued on the
target runqueue. If another runqueue holds a RT task that is of higher
prio than the highest prio task on the target runqueue is found it is pulled
to the target runqueue.
Signed-off-by: default avatarSteven Rostedt <srostedt@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 4fd29176
...@@ -3721,6 +3721,8 @@ need_resched_nonpreemptible: ...@@ -3721,6 +3721,8 @@ need_resched_nonpreemptible:
switch_count = &prev->nvcsw; switch_count = &prev->nvcsw;
} }
schedule_balance_rt(rq, prev);
if (unlikely(!rq->nr_running)) if (unlikely(!rq->nr_running))
idle_balance(cpu, rq); idle_balance(cpu, rq);
......
...@@ -179,8 +179,17 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) ...@@ -179,8 +179,17 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int double_lock_balance(struct rq *this_rq, struct rq *busiest); static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
(cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
return 1;
return 0;
}
/* Return the second highest RT task, NULL otherwise */ /* Return the second highest RT task, NULL otherwise */
static struct task_struct *pick_next_highest_task_rt(struct rq *rq) static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
int cpu)
{ {
struct rt_prio_array *array = &rq->rt.active; struct rt_prio_array *array = &rq->rt.active;
struct task_struct *next; struct task_struct *next;
...@@ -199,26 +208,36 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq) ...@@ -199,26 +208,36 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
} }
queue = array->queue + idx; queue = array->queue + idx;
BUG_ON(list_empty(queue));
next = list_entry(queue->next, struct task_struct, run_list); next = list_entry(queue->next, struct task_struct, run_list);
if (unlikely(next != rq->curr)) if (unlikely(pick_rt_task(rq, next, cpu)))
return next; goto out;
if (queue->next->next != queue) { if (queue->next->next != queue) {
/* same prio task */ /* same prio task */
next = list_entry(queue->next->next, struct task_struct, run_list); next = list_entry(queue->next->next, struct task_struct, run_list);
return next; if (pick_rt_task(rq, next, cpu))
goto out;
} }
retry:
/* slower, but more flexible */ /* slower, but more flexible */
idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
if (unlikely(idx >= MAX_RT_PRIO)) { if (unlikely(idx >= MAX_RT_PRIO))
WARN_ON(1); /* rt_nr_running was 2 and above! */
return NULL; return NULL;
}
queue = array->queue + idx; queue = array->queue + idx;
next = list_entry(queue->next, struct task_struct, run_list); BUG_ON(list_empty(queue));
list_for_each_entry(next, queue, run_list) {
if (pick_rt_task(rq, next, cpu))
goto out;
}
goto retry;
out:
return next; return next;
} }
...@@ -305,13 +324,15 @@ static int push_rt_task(struct rq *this_rq) ...@@ -305,13 +324,15 @@ static int push_rt_task(struct rq *this_rq)
assert_spin_locked(&this_rq->lock); assert_spin_locked(&this_rq->lock);
next_task = pick_next_highest_task_rt(this_rq); next_task = pick_next_highest_task_rt(this_rq, -1);
if (!next_task) if (!next_task)
return 0; return 0;
retry: retry:
if (unlikely(next_task == this_rq->curr)) if (unlikely(next_task == this_rq->curr)) {
WARN_ON(1);
return 0; return 0;
}
/* /*
* It's possible that the next_task slipped in of * It's possible that the next_task slipped in of
...@@ -335,7 +356,7 @@ static int push_rt_task(struct rq *this_rq) ...@@ -335,7 +356,7 @@ static int push_rt_task(struct rq *this_rq)
* so it is possible that next_task has changed. * so it is possible that next_task has changed.
* If it has, then try again. * If it has, then try again.
*/ */
task = pick_next_highest_task_rt(this_rq); task = pick_next_highest_task_rt(this_rq, -1);
if (unlikely(task != next_task) && task && paranoid--) { if (unlikely(task != next_task) && task && paranoid--) {
put_task_struct(next_task); put_task_struct(next_task);
next_task = task; next_task = task;
...@@ -378,6 +399,149 @@ static void push_rt_tasks(struct rq *rq) ...@@ -378,6 +399,149 @@ static void push_rt_tasks(struct rq *rq)
; ;
} }
static int pull_rt_task(struct rq *this_rq)
{
struct task_struct *next;
struct task_struct *p;
struct rq *src_rq;
cpumask_t *rto_cpumask;
int this_cpu = this_rq->cpu;
int cpu;
int ret = 0;
assert_spin_locked(&this_rq->lock);
/*
* If cpusets are used, and we have overlapping
* run queue cpusets, then this algorithm may not catch all.
* This is just the price you pay on trying to keep
* dirtying caches down on large SMP machines.
*/
if (likely(!rt_overloaded()))
return 0;
next = pick_next_task_rt(this_rq);
rto_cpumask = rt_overload();
for_each_cpu_mask(cpu, *rto_cpumask) {
if (this_cpu == cpu)
continue;
src_rq = cpu_rq(cpu);
if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
/*
* It is possible that overlapping cpusets
* will miss clearing a non overloaded runqueue.
* Clear it now.
*/
if (double_lock_balance(this_rq, src_rq)) {
/* unlocked our runqueue lock */
struct task_struct *old_next = next;
next = pick_next_task_rt(this_rq);
if (next != old_next)
ret = 1;
}
if (likely(src_rq->rt.rt_nr_running <= 1))
/*
* Small chance that this_rq->curr changed
* but it's really harmless here.
*/
rt_clear_overload(this_rq);
else
/*
* Heh, the src_rq is now overloaded, since
* we already have the src_rq lock, go straight
* to pulling tasks from it.
*/
goto try_pulling;
spin_unlock(&src_rq->lock);
continue;
}
/*
* We can potentially drop this_rq's lock in
* double_lock_balance, and another CPU could
* steal our next task - hence we must cause
* the caller to recalculate the next task
* in that case:
*/
if (double_lock_balance(this_rq, src_rq)) {
struct task_struct *old_next = next;
next = pick_next_task_rt(this_rq);
if (next != old_next)
ret = 1;
}
/*
* Are there still pullable RT tasks?
*/
if (src_rq->rt.rt_nr_running <= 1) {
spin_unlock(&src_rq->lock);
continue;
}
try_pulling:
p = pick_next_highest_task_rt(src_rq, this_cpu);
/*
* Do we have an RT task that preempts
* the to-be-scheduled task?
*/
if (p && (!next || (p->prio < next->prio))) {
WARN_ON(p == src_rq->curr);
WARN_ON(!p->se.on_rq);
/*
* There's a chance that p is higher in priority
* than what's currently running on its cpu.
* This is just that p is wakeing up and hasn't
* had a chance to schedule. We only pull
* p if it is lower in priority than the
* current task on the run queue or
* this_rq next task is lower in prio than
* the current task on that rq.
*/
if (p->prio < src_rq->curr->prio ||
(next && next->prio < src_rq->curr->prio))
goto bail;
ret = 1;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
activate_task(this_rq, p, 0);
/*
* We continue with the search, just in
* case there's an even higher prio task
* in another runqueue. (low likelyhood
* but possible)
*/
/*
* Update next so that we won't pick a task
* on another cpu with a priority lower (or equal)
* than the one we just picked.
*/
next = p;
}
bail:
spin_unlock(&src_rq->lock);
}
return ret;
}
static void schedule_balance_rt(struct rq *rq,
struct task_struct *prev)
{
/* Try to pull RT tasks here if we lower this rq's prio */
if (unlikely(rt_task(prev)) &&
rq->rt.highest_prio > prev->prio)
pull_rt_task(rq);
}
static void schedule_tail_balance_rt(struct rq *rq) static void schedule_tail_balance_rt(struct rq *rq)
{ {
/* /*
...@@ -500,6 +664,7 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, ...@@ -500,6 +664,7 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
} }
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
# define schedule_tail_balance_rt(rq) do { } while (0) # define schedule_tail_balance_rt(rq) do { } while (0)
# define schedule_balance_rt(rq, prev) do { } while (0)
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
static void task_tick_rt(struct rq *rq, struct task_struct *p) static void task_tick_rt(struct rq *rq, struct task_struct *p)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment