Commit 43010659 authored by Peter Williams's avatar Peter Williams Committed by Ingo Molnar

sched: simplify move_tasks()

The move_tasks() function is currently multiplexed with two distinct
capabilities:

1. attempt to move a specified amount of weighted load from one run
queue to another; and
2. attempt to move a specified number of tasks from one run queue to
another.

The first of these capabilities is used in two places, load_balance()
and load_balance_idle(), and in both of these cases the return value of
move_tasks() is used purely to decide if tasks/load were moved and no
notice of the actual number of tasks moved is taken.

The second capability is used in exactly one place,
active_load_balance(), to attempt to move exactly one task and, as
before, the return value is only used as an indicator of success or failure.

This multiplexing of sched_task() was introduced, by me, as part of the
smpnice patches and was motivated by the fact that the alternative, one
function to move specified load and one to move a single task, would
have led to two functions of roughly the same complexity as the old
move_tasks() (or the new balance_tasks()).  However, the new modular
design of the new CFS scheduler allows a simpler solution to be adopted
and this patch addresses that solution by:

1. adding a new function, move_one_task(), to be used by
active_load_balance(); and
2. making move_tasks() a single purpose function that tries to move a
specified weighted load and returns 1 for success and 0 for failure.

One of the consequences of these changes is that neither move_one_task()
or the new move_tasks() care how many tasks sched_class.load_balance()
moves and this enables its interface to be simplified by returning the
amount of load moved as its result and removing the load_moved pointer
from the argument list.  This helps simplify the new move_tasks() and
slightly reduces the amount of work done in each of
sched_class.load_balance()'s implementations.

Further simplification, e.g. changes to balance_tasks(), are possible
but (slightly) complicated by the special needs of load_balance_fair()
so I've left them to a later patch (if this one gets accepted).

NB Since move_tasks() gets called with two run queue locks held even
small reductions in overhead are worthwhile.

[ mingo@elte.hu ]

this change also reduces code size nicely:

   text    data     bss     dec     hex filename
   39216    3618      24   42858    a76a sched.o.before
   39173    3618      24   42815    a73f sched.o.after
Signed-off-by: default avatarPeter Williams <pwil3058@bigpond.net.au>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent f1a438d8
...@@ -866,11 +866,11 @@ struct sched_class { ...@@ -866,11 +866,11 @@ struct sched_class {
struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); struct task_struct * (*pick_next_task) (struct rq *rq, u64 now);
void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now);
int (*load_balance) (struct rq *this_rq, int this_cpu, unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
struct rq *busiest, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move, unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle, struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *total_load_moved); int *all_pinned);
void (*set_curr_task) (struct rq *rq); void (*set_curr_task) (struct rq *rq);
void (*task_tick) (struct rq *rq, struct task_struct *p); void (*task_tick) (struct rq *rq, struct task_struct *p);
......
...@@ -2231,32 +2231,49 @@ out: ...@@ -2231,32 +2231,49 @@ out:
} }
/* /*
* move_tasks tries to move up to max_nr_move tasks and max_load_move weighted * move_tasks tries to move up to max_load_move weighted load from busiest to
* load from busiest to this_rq, as part of a balancing operation within * this_rq, as part of a balancing operation within domain "sd".
* "domain". Returns the number of tasks moved. * Returns 1 if successful and 0 otherwise.
* *
* Called with both runqueues locked. * Called with both runqueues locked.
*/ */
static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle, struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned) int *all_pinned)
{ {
struct sched_class *class = sched_class_highest; struct sched_class *class = sched_class_highest;
unsigned long load_moved, total_nr_moved = 0, nr_moved; unsigned long total_load_moved = 0;
long rem_load_move = max_load_move;
do { do {
nr_moved = class->load_balance(this_rq, this_cpu, busiest, total_load_moved +=
max_nr_move, (unsigned long)rem_load_move, class->load_balance(this_rq, this_cpu, busiest,
sd, idle, all_pinned, &load_moved); ULONG_MAX, max_load_move - total_load_moved,
total_nr_moved += nr_moved; sd, idle, all_pinned);
max_nr_move -= nr_moved;
rem_load_move -= load_moved;
class = class->next; class = class->next;
} while (class && max_nr_move && rem_load_move > 0); } while (class && max_load_move > total_load_moved);
return total_nr_moved; return total_load_moved > 0;
}
/*
* move_one_task tries to move exactly one task from busiest to this_rq, as
* part of active balancing operations within "domain".
* Returns 1 if successful and 0 otherwise.
*
* Called with both runqueues locked.
*/
static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
struct sched_class *class;
for (class = sched_class_highest; class; class = class->next)
if (class->load_balance(this_rq, this_cpu, busiest,
1, ULONG_MAX, sd, idle, NULL))
return 1;
return 0;
} }
/* /*
...@@ -2588,11 +2605,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, ...@@ -2588,11 +2605,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
*/ */
#define MAX_PINNED_INTERVAL 512 #define MAX_PINNED_INTERVAL 512
static inline unsigned long minus_1_or_zero(unsigned long n)
{
return n > 0 ? n - 1 : 0;
}
/* /*
* Check this_cpu to ensure it is balanced within domain. Attempt to move * Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance. * tasks if there is an imbalance.
...@@ -2601,7 +2613,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -2601,7 +2613,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle, struct sched_domain *sd, enum cpu_idle_type idle,
int *balance) int *balance)
{ {
int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
struct sched_group *group; struct sched_group *group;
unsigned long imbalance; unsigned long imbalance;
struct rq *busiest; struct rq *busiest;
...@@ -2642,18 +2654,17 @@ redo: ...@@ -2642,18 +2654,17 @@ redo:
schedstat_add(sd, lb_imbalance[idle], imbalance); schedstat_add(sd, lb_imbalance[idle], imbalance);
nr_moved = 0; ld_moved = 0;
if (busiest->nr_running > 1) { if (busiest->nr_running > 1) {
/* /*
* Attempt to move tasks. If find_busiest_group has found * Attempt to move tasks. If find_busiest_group has found
* an imbalance but busiest->nr_running <= 1, the group is * an imbalance but busiest->nr_running <= 1, the group is
* still unbalanced. nr_moved simply stays zero, so it is * still unbalanced. ld_moved simply stays zero, so it is
* correctly treated as an imbalance. * correctly treated as an imbalance.
*/ */
local_irq_save(flags); local_irq_save(flags);
double_rq_lock(this_rq, busiest); double_rq_lock(this_rq, busiest);
nr_moved = move_tasks(this_rq, this_cpu, busiest, ld_moved = move_tasks(this_rq, this_cpu, busiest,
minus_1_or_zero(busiest->nr_running),
imbalance, sd, idle, &all_pinned); imbalance, sd, idle, &all_pinned);
double_rq_unlock(this_rq, busiest); double_rq_unlock(this_rq, busiest);
local_irq_restore(flags); local_irq_restore(flags);
...@@ -2661,7 +2672,7 @@ redo: ...@@ -2661,7 +2672,7 @@ redo:
/* /*
* some other cpu did the load balance for us. * some other cpu did the load balance for us.
*/ */
if (nr_moved && this_cpu != smp_processor_id()) if (ld_moved && this_cpu != smp_processor_id())
resched_cpu(this_cpu); resched_cpu(this_cpu);
/* All tasks on this runqueue were pinned by CPU affinity */ /* All tasks on this runqueue were pinned by CPU affinity */
...@@ -2673,7 +2684,7 @@ redo: ...@@ -2673,7 +2684,7 @@ redo:
} }
} }
if (!nr_moved) { if (!ld_moved) {
schedstat_inc(sd, lb_failed[idle]); schedstat_inc(sd, lb_failed[idle]);
sd->nr_balance_failed++; sd->nr_balance_failed++;
...@@ -2722,10 +2733,10 @@ redo: ...@@ -2722,10 +2733,10 @@ redo:
sd->balance_interval *= 2; sd->balance_interval *= 2;
} }
if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
return -1; return -1;
return nr_moved; return ld_moved;
out_balanced: out_balanced:
schedstat_inc(sd, lb_balanced[idle]); schedstat_inc(sd, lb_balanced[idle]);
...@@ -2757,7 +2768,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) ...@@ -2757,7 +2768,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
struct sched_group *group; struct sched_group *group;
struct rq *busiest = NULL; struct rq *busiest = NULL;
unsigned long imbalance; unsigned long imbalance;
int nr_moved = 0; int ld_moved = 0;
int sd_idle = 0; int sd_idle = 0;
int all_pinned = 0; int all_pinned = 0;
cpumask_t cpus = CPU_MASK_ALL; cpumask_t cpus = CPU_MASK_ALL;
...@@ -2792,12 +2803,11 @@ redo: ...@@ -2792,12 +2803,11 @@ redo:
schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance);
nr_moved = 0; ld_moved = 0;
if (busiest->nr_running > 1) { if (busiest->nr_running > 1) {
/* Attempt to move tasks */ /* Attempt to move tasks */
double_lock_balance(this_rq, busiest); double_lock_balance(this_rq, busiest);
nr_moved = move_tasks(this_rq, this_cpu, busiest, ld_moved = move_tasks(this_rq, this_cpu, busiest,
minus_1_or_zero(busiest->nr_running),
imbalance, sd, CPU_NEWLY_IDLE, imbalance, sd, CPU_NEWLY_IDLE,
&all_pinned); &all_pinned);
spin_unlock(&busiest->lock); spin_unlock(&busiest->lock);
...@@ -2809,7 +2819,7 @@ redo: ...@@ -2809,7 +2819,7 @@ redo:
} }
} }
if (!nr_moved) { if (!ld_moved) {
schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
...@@ -2817,7 +2827,7 @@ redo: ...@@ -2817,7 +2827,7 @@ redo:
} else } else
sd->nr_balance_failed = 0; sd->nr_balance_failed = 0;
return nr_moved; return ld_moved;
out_balanced: out_balanced:
schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]);
...@@ -2905,8 +2915,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) ...@@ -2905,8 +2915,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
if (likely(sd)) { if (likely(sd)) {
schedstat_inc(sd, alb_cnt); schedstat_inc(sd, alb_cnt);
if (move_tasks(target_rq, target_cpu, busiest_rq, 1, if (move_one_task(target_rq, target_cpu, busiest_rq,
ULONG_MAX, sd, CPU_IDLE, NULL)) sd, CPU_IDLE))
schedstat_inc(sd, alb_pushed); schedstat_inc(sd, alb_pushed);
else else
schedstat_inc(sd, alb_failed); schedstat_inc(sd, alb_failed);
......
...@@ -944,11 +944,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) ...@@ -944,11 +944,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
return p->prio; return p->prio;
} }
static int static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move, unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle, struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *total_load_moved) int *all_pinned)
{ {
struct cfs_rq *busy_cfs_rq; struct cfs_rq *busy_cfs_rq;
unsigned long load_moved, total_nr_moved = 0, nr_moved; unsigned long load_moved, total_nr_moved = 0, nr_moved;
...@@ -1006,9 +1006,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, ...@@ -1006,9 +1006,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
break; break;
} }
*total_load_moved = max_load_move - rem_load_move; return max_load_move - rem_load_move;
return total_nr_moved;
} }
/* /*
......
...@@ -37,11 +37,11 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) ...@@ -37,11 +37,11 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now)
{ {
} }
static int static unsigned long
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move, unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle, struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *total_load_moved) int *all_pinned)
{ {
return 0; return 0;
} }
......
...@@ -172,15 +172,16 @@ static struct task_struct *load_balance_next_rt(void *arg) ...@@ -172,15 +172,16 @@ static struct task_struct *load_balance_next_rt(void *arg)
return p; return p;
} }
static int static unsigned long
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move, unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle, struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved) int *all_pinned)
{ {
int this_best_prio, best_prio, best_prio_seen = 0; int this_best_prio, best_prio, best_prio_seen = 0;
int nr_moved; int nr_moved;
struct rq_iterator rt_rq_iterator; struct rq_iterator rt_rq_iterator;
unsigned long load_moved;
best_prio = sched_find_first_bit(busiest->rt.active.bitmap); best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
...@@ -203,11 +204,11 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, ...@@ -203,11 +204,11 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
rt_rq_iterator.arg = busiest; rt_rq_iterator.arg = busiest;
nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
max_load_move, sd, idle, all_pinned, load_moved, max_load_move, sd, idle, all_pinned, &load_moved,
this_best_prio, best_prio, best_prio_seen, this_best_prio, best_prio, best_prio_seen,
&rt_rq_iterator); &rt_rq_iterator);
return nr_moved; return load_moved;
} }
static void task_tick_rt(struct rq *rq, struct task_struct *p) static void task_tick_rt(struct rq *rq, struct task_struct *p)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment