Commit 82e5a1bb authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Thomas Gleixner

sched: scale down cpu_power due to RT tasks

Keep an average on the amount of time spend on RT tasks and use that
fraction to scale down the cpu_power for regular tasks.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarDinakar Guniguntala <dino@in.ibm.com>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Darren Hart <dvhltc@us.ibm.com>
Cc: John Kacur <jkacur@redhat.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 56e91477
...@@ -1915,6 +1915,7 @@ extern unsigned int sysctl_sched_child_runs_first; ...@@ -1915,6 +1915,7 @@ extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration; extern unsigned int sysctl_timer_migration;
int sched_nr_latency_handler(struct ctl_table *table, int write, int sched_nr_latency_handler(struct ctl_table *table, int write,
......
...@@ -673,6 +673,9 @@ struct rq { ...@@ -673,6 +673,9 @@ struct rq {
struct task_struct *migration_thread; struct task_struct *migration_thread;
struct list_head migration_queue; struct list_head migration_queue;
u64 rt_avg;
u64 age_stamp;
#endif #endif
/* calc_load related fields */ /* calc_load related fields */
...@@ -926,6 +929,14 @@ unsigned int sysctl_sched_shares_ratelimit = 250000; ...@@ -926,6 +929,14 @@ unsigned int sysctl_sched_shares_ratelimit = 250000;
*/ */
unsigned int sysctl_sched_shares_thresh = 4; unsigned int sysctl_sched_shares_thresh = 4;
/*
* period over which we average the RT time consumption, measured
* in ms.
*
* default: 1s
*/
const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
/* /*
* period over which we measure -rt task cpu usage in us. * period over which we measure -rt task cpu usage in us.
* default: 1s * default: 1s
...@@ -1370,12 +1381,37 @@ void wake_up_idle_cpu(int cpu) ...@@ -1370,12 +1381,37 @@ void wake_up_idle_cpu(int cpu)
} }
#endif /* CONFIG_NO_HZ */ #endif /* CONFIG_NO_HZ */
static u64 sched_avg_period(void)
{
return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
}
static void sched_avg_update(struct rq *rq)
{
s64 period = sched_avg_period();
while ((s64)(rq->clock - rq->age_stamp) > period) {
rq->age_stamp += period;
rq->rt_avg /= 2;
}
}
static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
{
rq->rt_avg += rt_delta;
sched_avg_update(rq);
}
#else /* !CONFIG_SMP */ #else /* !CONFIG_SMP */
static void resched_task(struct task_struct *p) static void resched_task(struct task_struct *p)
{ {
assert_atomic_spin_locked(&task_rq(p)->lock); assert_atomic_spin_locked(&task_rq(p)->lock);
set_tsk_need_resched(p); set_tsk_need_resched(p);
} }
static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
{
}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#if BITS_PER_LONG == 32 #if BITS_PER_LONG == 32
...@@ -3780,7 +3816,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, ...@@ -3780,7 +3816,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
} }
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu) unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
{ {
unsigned long weight = cpumask_weight(sched_domain_span(sd)); unsigned long weight = cpumask_weight(sched_domain_span(sd));
unsigned long smt_gain = sd->smt_gain; unsigned long smt_gain = sd->smt_gain;
...@@ -3790,6 +3826,24 @@ unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu) ...@@ -3790,6 +3826,24 @@ unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
return smt_gain; return smt_gain;
} }
unsigned long scale_rt_power(int cpu)
{
struct rq *rq = cpu_rq(cpu);
u64 total, available;
sched_avg_update(rq);
total = sched_avg_period() + (rq->clock - rq->age_stamp);
available = total - rq->rt_avg;
if (unlikely((s64)total < SCHED_LOAD_SCALE))
total = SCHED_LOAD_SCALE;
total >>= SCHED_LOAD_SHIFT;
return div_u64(available, total);
}
static void update_cpu_power(struct sched_domain *sd, int cpu) static void update_cpu_power(struct sched_domain *sd, int cpu)
{ {
unsigned long weight = cpumask_weight(sched_domain_span(sd)); unsigned long weight = cpumask_weight(sched_domain_span(sd));
...@@ -3800,11 +3854,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) ...@@ -3800,11 +3854,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
/* here we could scale based on cpufreq */ /* here we could scale based on cpufreq */
if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
power *= arch_smt_gain(sd, cpu); power *= arch_scale_smt_power(sd, cpu);
power >>= SCHED_LOAD_SHIFT; power >>= SCHED_LOAD_SHIFT;
} }
/* here we could scale based on RT time */ power *= scale_rt_power(cpu);
power >>= SCHED_LOAD_SHIFT;
if (!power)
power = 1;
if (power != old) { if (power != old) {
sdg->__cpu_power = power; sdg->__cpu_power = power;
......
...@@ -602,6 +602,8 @@ static void update_curr_rt(struct rq *rq) ...@@ -602,6 +602,8 @@ static void update_curr_rt(struct rq *rq)
curr->se.exec_start = rq->clock; curr->se.exec_start = rq->clock;
cpuacct_charge(curr, delta_exec); cpuacct_charge(curr, delta_exec);
sched_rt_avg_update(rq, delta_exec);
if (!rt_bandwidth_enabled()) if (!rt_bandwidth_enabled())
return; return;
...@@ -926,8 +928,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) ...@@ -926,8 +928,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p); enqueue_pushable_task(rq, p);
inc_cpu_load(rq, p->se.load.weight);
} }
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
...@@ -942,8 +942,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) ...@@ -942,8 +942,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
dequeue_rt_entity(rt_se); dequeue_rt_entity(rt_se);
dequeue_pushable_task(rq, p); dequeue_pushable_task(rq, p);
dec_cpu_load(rq, p->se.load.weight);
} }
/* /*
......
...@@ -330,6 +330,14 @@ static struct ctl_table kern_table[] = { ...@@ -330,6 +330,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_time_avg",
.data = &sysctl_sched_time_avg,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
.procname = "timer_migration", .procname = "timer_migration",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment