Commit f7cf1cdd authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Thomas Gleixner

sched: remove reciprocal for cpu_power

Its a source of fail, also, now that cpu_power is dynamical, its a
waste of time.

before:
<idle>-0   [000]   132.877936: find_busiest_group: avg_load: 0 group_load: 8241 power: 1 

after:
bash-1689  [001]   137.862151: find_busiest_group: avg_load: 10636288 group_load: 10387 power: 1

[ dino: backport to 31-rt ]
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Darren Hart <dvhltc@us.ibm.com>
Cc: John Kacur <jkacur@redhat.com>
[andreas.herrmann3@amd.com: remove include]
Signed-off-by: default avatarDinakar Guniguntala <dino@in.ibm.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent e9265e74
...@@ -905,15 +905,9 @@ struct sched_group { ...@@ -905,15 +905,9 @@ struct sched_group {
/* /*
* CPU power of this group, SCHED_LOAD_SCALE being max power for a * CPU power of this group, SCHED_LOAD_SCALE being max power for a
* single CPU. This is read only (except for setup, hotplug CPU). * single CPU.
* Note : Never change cpu_power without recompute its reciprocal
*/ */
unsigned int __cpu_power; unsigned int cpu_power;
/*
* reciprocal value of cpu_power to avoid expensive divides
* (see include/linux/reciprocal_div.h)
*/
u32 reciprocal_cpu_power;
/* /*
* The CPUs this group covers. * The CPUs this group covers.
......
...@@ -137,30 +137,8 @@ ...@@ -137,30 +137,8 @@
*/ */
#define RUNTIME_INF ((u64)~0ULL) #define RUNTIME_INF ((u64)~0ULL)
#ifdef CONFIG_SMP
static void double_rq_lock(struct rq *rq1, struct rq *rq2); static void double_rq_lock(struct rq *rq1, struct rq *rq2);
/*
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
* Since cpu_power is a 'constant', we can use a reciprocal divide.
*/
static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load)
{
return reciprocal_divide(load, sg->reciprocal_cpu_power);
}
/*
* Each time a sched group cpu_power is changed,
* we must compute its reciprocal value
*/
static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
{
sg->__cpu_power += val;
sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);
}
#endif
#define TASK_PREEMPTS_CURR(p, rq) \ #define TASK_PREEMPTS_CURR(p, rq) \
((p)->prio < (rq)->curr->prio) ((p)->prio < (rq)->curr->prio)
...@@ -2401,8 +2379,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) ...@@ -2401,8 +2379,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
} }
/* Adjust by relative CPU power of the group */ /* Adjust by relative CPU power of the group */
avg_load = sg_div_cpu_power(group, avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
avg_load * SCHED_LOAD_SCALE);
if (local_group) { if (local_group) {
this_load = avg_load; this_load = avg_load;
...@@ -3849,7 +3826,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) ...@@ -3849,7 +3826,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
unsigned long weight = cpumask_weight(sched_domain_span(sd)); unsigned long weight = cpumask_weight(sched_domain_span(sd));
unsigned long power = SCHED_LOAD_SCALE; unsigned long power = SCHED_LOAD_SCALE;
struct sched_group *sdg = sd->groups; struct sched_group *sdg = sd->groups;
unsigned long old = sdg->__cpu_power;
/* here we could scale based on cpufreq */ /* here we could scale based on cpufreq */
...@@ -3864,33 +3840,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) ...@@ -3864,33 +3840,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
if (!power) if (!power)
power = 1; power = 1;
if (power != old) { sdg->cpu_power = power;
sdg->__cpu_power = power;
sdg->reciprocal_cpu_power = reciprocal_value(power);
}
} }
static void update_group_power(struct sched_domain *sd, int cpu) static void update_group_power(struct sched_domain *sd, int cpu)
{ {
struct sched_domain *child = sd->child; struct sched_domain *child = sd->child;
struct sched_group *group, *sdg = sd->groups; struct sched_group *group, *sdg = sd->groups;
unsigned long power = sdg->__cpu_power;
if (!child) { if (!child) {
update_cpu_power(sd, cpu); update_cpu_power(sd, cpu);
return; return;
} }
sdg->__cpu_power = 0; sdg->cpu_power = 0;
group = child->groups; group = child->groups;
do { do {
sdg->__cpu_power += group->__cpu_power; sdg->cpu_power += group->cpu_power;
group = group->next; group = group->next;
} while (group != child->groups); } while (group != child->groups);
if (power != sdg->__cpu_power)
sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power);
} }
/** /**
...@@ -3970,8 +3939,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, ...@@ -3970,8 +3939,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
} }
/* Adjust by relative CPU power of the group */ /* Adjust by relative CPU power of the group */
sgs->avg_load = sg_div_cpu_power(group, sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power;
sgs->group_load * SCHED_LOAD_SCALE);
/* /*
...@@ -3983,14 +3951,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, ...@@ -3983,14 +3951,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
* normalized nr_running number somewhere that negates * normalized nr_running number somewhere that negates
* the hierarchy? * the hierarchy?
*/ */
avg_load_per_task = sg_div_cpu_power(group, avg_load_per_task = (sum_avg_load_per_task * SCHED_LOAD_SCALE) /
sum_avg_load_per_task * SCHED_LOAD_SCALE); group->cpu_power;
if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
sgs->group_imb = 1; sgs->group_imb = 1;
sgs->group_capacity = sgs->group_capacity =
DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE); DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
} }
/** /**
...@@ -4032,7 +4000,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, ...@@ -4032,7 +4000,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
return; return;
sds->total_load += sgs.group_load; sds->total_load += sgs.group_load;
sds->total_pwr += group->__cpu_power; sds->total_pwr += group->cpu_power;
/* /*
* In case the child domain prefers tasks go to siblings * In case the child domain prefers tasks go to siblings
...@@ -4097,28 +4065,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, ...@@ -4097,28 +4065,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
* moving them. * moving them.
*/ */
pwr_now += sds->busiest->__cpu_power * pwr_now += sds->busiest->cpu_power *
min(sds->busiest_load_per_task, sds->max_load); min(sds->busiest_load_per_task, sds->max_load);
pwr_now += sds->this->__cpu_power * pwr_now += sds->this->cpu_power *
min(sds->this_load_per_task, sds->this_load); min(sds->this_load_per_task, sds->this_load);
pwr_now /= SCHED_LOAD_SCALE; pwr_now /= SCHED_LOAD_SCALE;
/* Amount of load we'd subtract */ /* Amount of load we'd subtract */
tmp = sg_div_cpu_power(sds->busiest, tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
sds->busiest_load_per_task * SCHED_LOAD_SCALE); sds->busiest->cpu_power;
if (sds->max_load > tmp) if (sds->max_load > tmp)
pwr_move += sds->busiest->__cpu_power * pwr_move += sds->busiest->cpu_power *
min(sds->busiest_load_per_task, sds->max_load - tmp); min(sds->busiest_load_per_task, sds->max_load - tmp);
/* Amount of load we'd add */ /* Amount of load we'd add */
if (sds->max_load * sds->busiest->__cpu_power < if (sds->max_load * sds->busiest->cpu_power <
sds->busiest_load_per_task * SCHED_LOAD_SCALE) sds->busiest_load_per_task * SCHED_LOAD_SCALE)
tmp = sg_div_cpu_power(sds->this, tmp = (sds->max_load * sds->busiest->cpu_power) /
sds->max_load * sds->busiest->__cpu_power); sds->this->cpu_power;
else else
tmp = sg_div_cpu_power(sds->this, tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
sds->busiest_load_per_task * SCHED_LOAD_SCALE); sds->this->cpu_power;
pwr_move += sds->this->__cpu_power * pwr_move += sds->this->cpu_power *
min(sds->this_load_per_task, sds->this_load + tmp); min(sds->this_load_per_task, sds->this_load + tmp);
pwr_move /= SCHED_LOAD_SCALE; pwr_move /= SCHED_LOAD_SCALE;
...@@ -4153,8 +4121,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, ...@@ -4153,8 +4121,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
sds->max_load - sds->busiest_load_per_task); sds->max_load - sds->busiest_load_per_task);
/* How much load to actually move to equalise the imbalance */ /* How much load to actually move to equalise the imbalance */
*imbalance = min(max_pull * sds->busiest->__cpu_power, *imbalance = min(max_pull * sds->busiest->cpu_power,
(sds->avg_load - sds->this_load) * sds->this->__cpu_power) (sds->avg_load - sds->this_load) * sds->this->cpu_power)
/ SCHED_LOAD_SCALE; / SCHED_LOAD_SCALE;
/* /*
...@@ -4289,7 +4257,7 @@ static unsigned long power_of(int cpu) ...@@ -4289,7 +4257,7 @@ static unsigned long power_of(int cpu)
if (!group) if (!group)
return SCHED_LOAD_SCALE; return SCHED_LOAD_SCALE;
return group->__cpu_power; return group->cpu_power;
} }
/* /*
...@@ -8226,7 +8194,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, ...@@ -8226,7 +8194,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
break; break;
} }
if (!group->__cpu_power) { if (!group->cpu_power) {
printk(KERN_CONT "\n"); printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: domain->cpu_power not " printk(KERN_ERR "ERROR: domain->cpu_power not "
"set\n"); "set\n");
...@@ -8250,9 +8218,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, ...@@ -8250,9 +8218,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
printk(KERN_CONT " %s", str); printk(KERN_CONT " %s", str);
if (group->__cpu_power != SCHED_LOAD_SCALE) { if (group->cpu_power != SCHED_LOAD_SCALE) {
printk(KERN_CONT " (__cpu_power = %d)", printk(KERN_CONT " (cpu_power = %d)",
group->__cpu_power); group->cpu_power);
} }
group = group->next; group = group->next;
...@@ -8537,7 +8505,7 @@ init_sched_build_groups(const struct cpumask *span, ...@@ -8537,7 +8505,7 @@ init_sched_build_groups(const struct cpumask *span,
continue; continue;
cpumask_clear(sched_group_cpus(sg)); cpumask_clear(sched_group_cpus(sg));
sg->__cpu_power = 0; sg->cpu_power = 0;
for_each_cpu(j, span) { for_each_cpu(j, span) {
if (group_fn(j, cpu_map, NULL, tmpmask) != group) if (group_fn(j, cpu_map, NULL, tmpmask) != group)
...@@ -8762,7 +8730,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) ...@@ -8762,7 +8730,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
continue; continue;
} }
sg_inc_cpu_power(sg, sd->groups->__cpu_power); sg->cpu_power += sd->groups->cpu_power;
} }
sg = sg->next; sg = sg->next;
} while (sg != group_head); } while (sg != group_head);
...@@ -8835,7 +8803,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) ...@@ -8835,7 +8803,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
child = sd->child; child = sd->child;
sd->groups->__cpu_power = 0; sd->groups->cpu_power = 0;
if (!child) { if (!child) {
power = SCHED_LOAD_SCALE; power = SCHED_LOAD_SCALE;
...@@ -8851,7 +8819,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) ...@@ -8851,7 +8819,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
power /= weight; power /= weight;
power >>= SCHED_LOAD_SHIFT; power >>= SCHED_LOAD_SHIFT;
} }
sg_inc_cpu_power(sd->groups, power); sd->groups->cpu_power += power;
return; return;
} }
...@@ -8860,7 +8828,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) ...@@ -8860,7 +8828,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
*/ */
group = child->groups; group = child->groups;
do { do {
sg_inc_cpu_power(sd->groups, group->__cpu_power); sd->groups->cpu_power += group->cpu_power;
group = group->next; group = group->next;
} while (group != child->groups); } while (group != child->groups);
} }
...@@ -9133,7 +9101,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, ...@@ -9133,7 +9101,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
sd = &per_cpu(node_domains, j).sd; sd = &per_cpu(node_domains, j).sd;
sd->groups = sg; sd->groups = sg;
} }
sg->__cpu_power = 0; sg->cpu_power = 0;
cpumask_copy(sched_group_cpus(sg), nodemask); cpumask_copy(sched_group_cpus(sg), nodemask);
sg->next = sg; sg->next = sg;
cpumask_or(covered, covered, nodemask); cpumask_or(covered, covered, nodemask);
...@@ -9160,7 +9128,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, ...@@ -9160,7 +9128,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
"Can not alloc domain group for node %d\n", j); "Can not alloc domain group for node %d\n", j);
goto error; goto error;
} }
sg->__cpu_power = 0; sg->cpu_power = 0;
cpumask_copy(sched_group_cpus(sg), tmpmask); cpumask_copy(sched_group_cpus(sg), tmpmask);
sg->next = prev->next; sg->next = prev->next;
cpumask_or(covered, covered, tmpmask); cpumask_or(covered, covered, tmpmask);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment