Commit 5969fe06 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] sched: HT optimisation

If an idle sibling of an HT queue encounters a busy sibling, then make
higher level load balancing of the non-idle variety.

Performance of multiprocessor HT systems with low numbers of tasks
(generally < number of virtual CPUs) can be significantly worse than the
exact same workloads when running in non-HT mode.  The reason is largely
due to poor scheduling behaviour.

This patch improves the situation, making the performance gap far less
significant on one problematic test case (tbench).
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Acked-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent e17224bf
...@@ -1906,7 +1906,7 @@ out: ...@@ -1906,7 +1906,7 @@ out:
*/ */
static struct sched_group * static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu, find_busiest_group(struct sched_domain *sd, int this_cpu,
unsigned long *imbalance, enum idle_type idle) unsigned long *imbalance, enum idle_type idle, int *sd_idle)
{ {
struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
unsigned long max_load, avg_load, total_load, this_load, total_pwr; unsigned long max_load, avg_load, total_load, this_load, total_pwr;
...@@ -1931,6 +1931,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, ...@@ -1931,6 +1931,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
avg_load = 0; avg_load = 0;
for_each_cpu_mask(i, group->cpumask) { for_each_cpu_mask(i, group->cpumask) {
if (*sd_idle && !idle_cpu(i))
*sd_idle = 0;
/* Bias balancing toward cpus of our domain */ /* Bias balancing toward cpus of our domain */
if (local_group) if (local_group)
load = target_load(i, load_idx); load = target_load(i, load_idx);
...@@ -2074,10 +2077,14 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, ...@@ -2074,10 +2077,14 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
unsigned long imbalance; unsigned long imbalance;
int nr_moved, all_pinned = 0; int nr_moved, all_pinned = 0;
int active_balance = 0; int active_balance = 0;
int sd_idle = 0;
if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER)
sd_idle = 1;
schedstat_inc(sd, lb_cnt[idle]); schedstat_inc(sd, lb_cnt[idle]);
group = find_busiest_group(sd, this_cpu, &imbalance, idle); group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
if (!group) { if (!group) {
schedstat_inc(sd, lb_nobusyg[idle]); schedstat_inc(sd, lb_nobusyg[idle]);
goto out_balanced; goto out_balanced;
...@@ -2150,6 +2157,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, ...@@ -2150,6 +2157,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
sd->balance_interval *= 2; sd->balance_interval *= 2;
} }
if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER)
return -1;
return nr_moved; return nr_moved;
out_balanced: out_balanced:
...@@ -2161,6 +2170,8 @@ out_balanced: ...@@ -2161,6 +2170,8 @@ out_balanced:
(sd->balance_interval < sd->max_interval)) (sd->balance_interval < sd->max_interval))
sd->balance_interval *= 2; sd->balance_interval *= 2;
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
return -1;
return 0; return 0;
} }
...@@ -2178,9 +2189,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, ...@@ -2178,9 +2189,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
runqueue_t *busiest = NULL; runqueue_t *busiest = NULL;
unsigned long imbalance; unsigned long imbalance;
int nr_moved = 0; int nr_moved = 0;
int sd_idle = 0;
if (sd->flags & SD_SHARE_CPUPOWER)
sd_idle = 1;
schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
if (!group) { if (!group) {
schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
goto out_balanced; goto out_balanced;
...@@ -2205,15 +2220,19 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, ...@@ -2205,15 +2220,19 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
spin_unlock(&busiest->lock); spin_unlock(&busiest->lock);
} }
if (!nr_moved) if (!nr_moved) {
schedstat_inc(sd, lb_failed[NEWLY_IDLE]); schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
else if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
return -1;
} else
sd->nr_balance_failed = 0; sd->nr_balance_failed = 0;
return nr_moved; return nr_moved;
out_balanced: out_balanced:
schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
return -1;
sd->nr_balance_failed = 0; sd->nr_balance_failed = 0;
return 0; return 0;
} }
...@@ -2338,7 +2357,10 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, ...@@ -2338,7 +2357,10 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
if (j - sd->last_balance >= interval) { if (j - sd->last_balance >= interval) {
if (load_balance(this_cpu, this_rq, sd, idle)) { if (load_balance(this_cpu, this_rq, sd, idle)) {
/* We've pulled tasks over so no longer idle */ /* We've pulled tasks over so either we're no
* longer idle, or one of our SMT siblings is
* not idle.
*/
idle = NOT_IDLE; idle = NOT_IDLE;
} }
sd->last_balance += interval; sd->last_balance += interval;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment