sched: try to deal with low capacity

When the capacity drops low, we want to migrate load away. Allow the load-balancer to remove all tasks when we hit rock bottom. [ dino: backport to 31-rt ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: John Stultz <johnstul@us.ibm.com> Cc: Darren Hart <dvhltc@us.ibm.com> Cc: John Kacur <jkacur@redhat.com> [ego@in.ibm.com: fix to update_sd_power_savings_stats] Signed-off-by: Dinakar Guniguntala <dino@in.ibm.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

sched: try to deal with low capacity
When the capacity drops low, we want to migrate load away. Allow the load-balancer to remove all tasks when we hit rock bottom. [ dino: backport to 31-rt ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: John Stultz <johnstul@us.ibm.com> Cc: Darren Hart <dvhltc@us.ibm.com> Cc: John Kacur <jkacur@redhat.com> [ego@in.ibm.com: fix to update_sd_power_savings_stats] Signed-off-by: Dinakar Guniguntala <dino@in.ibm.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
e9265e74 · Peter Zijlstra · Thomas Gleixner · 82e5a1bb · e9265e74
Commit e9265e74 authored Oct 22, 2009 by Peter Zijlstra Committed by Thomas Gleixner Oct 29, 2009
Show whitespace changes
Inline Side-by-side

Showing with 29 additions and 6 deletions

kernel/sched.c kernel/sched.c +29 -6

No files found.
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3749,7 +3749,7 @@ static inline void update_sd_power_savings_stats(struct sched_group *group,
 	 * capacity but still has some space to pick up some load
 	 * from other group and save more power
 	 */
-	if (sgs->sum_nr_running > sgs->group_capacity - 1)
+	if (sgs->sum_nr_running + 1 > sgs->group_capacity)
 		return;
 	if (sgs->sum_nr_running > sds->leader_nr_running ||
@@ -3989,8 +3989,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
 		sgs->group_imb = 1;
-	sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
+	sgs->group_capacity =
+		DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE);
 }
 /**
@@ -4040,7 +4040,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 		 * and move all the excess tasks away.
 		 */
 		if (prefer_sibling)
-			sgs.group_capacity = 1;
+			sgs.group_capacity = min(sgs.group_capacity, 1UL);
 		if (local_group) {
 			sds->this_load = sgs.avg_load;
@@ -4272,6 +4272,26 @@ ret:
 	return NULL;
 }
+static struct sched_group *group_of(int cpu)
+{
+	struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+	if (!sd)
+		return NULL;
+	return sd->groups;
+}
+static unsigned long power_of(int cpu)
+{
+	struct sched_group *group = group_of(cpu);
+	if (!group)
+		return SCHED_LOAD_SCALE;
+	return group->__cpu_power;
+}
 /*
 * find_busiest_queue - find the busiest runqueue among the cpus in group.
 */
@@ -4284,15 +4304,18 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 	int i;
 	for_each_cpu(i, sched_group_cpus(group)) {
+		unsigned long power = power_of(i);
+		unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
 		unsigned long wl;
 		if (!cpumask_test_cpu(i, cpus))
 			continue;
 		rq = cpu_rq(i);
-		wl = weighted_cpuload(i);
+		wl = weighted_cpuload(i) * SCHED_LOAD_SCALE;
+		wl /= power;
-		if (rq->nr_running == 1 && wl > imbalance)
+		if (capacity && rq->nr_running == 1 && wl > imbalance)
 			continue;
 		if (wl > max_load) {