Commit d0b27fa7 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched: rt-group: synchonised bandwidth period

Various SMP balancing algorithms require that the bandwidth period
run in sync.

Possible improvements are moving the rt_bandwidth thing into root_domain
and keeping a span per rt_bandwidth which marks throttled cpus.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 57d3da29
...@@ -1563,6 +1563,10 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, ...@@ -1563,6 +1563,10 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
extern unsigned int sysctl_sched_rt_period; extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime; extern int sysctl_sched_rt_runtime;
int sched_rt_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);
extern unsigned int sysctl_sched_compat_yield; extern unsigned int sysctl_sched_compat_yield;
#ifdef CONFIG_RT_MUTEXES #ifdef CONFIG_RT_MUTEXES
...@@ -2052,6 +2056,9 @@ extern unsigned long sched_group_shares(struct task_group *tg); ...@@ -2052,6 +2056,9 @@ extern unsigned long sched_group_shares(struct task_group *tg);
extern int sched_group_set_rt_runtime(struct task_group *tg, extern int sched_group_set_rt_runtime(struct task_group *tg,
long rt_runtime_us); long rt_runtime_us);
extern long sched_group_rt_runtime(struct task_group *tg); extern long sched_group_rt_runtime(struct task_group *tg);
extern int sched_group_set_rt_period(struct task_group *tg,
long rt_period_us);
extern long sched_group_rt_period(struct task_group *tg);
#endif #endif
#endif #endif
......
This diff is collapsed.
...@@ -62,7 +62,7 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) ...@@ -62,7 +62,7 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
if (!rt_rq->tg) if (!rt_rq->tg)
return RUNTIME_INF; return RUNTIME_INF;
return rt_rq->tg->rt_runtime; return rt_rq->tg->rt_bandwidth.rt_runtime;
} }
#define for_each_leaf_rt_rq(rt_rq, rq) \ #define for_each_leaf_rt_rq(rt_rq, rq) \
...@@ -127,14 +127,29 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) ...@@ -127,14 +127,29 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
return p->prio != p->normal_prio; return p->prio != p->normal_prio;
} }
#ifdef CONFIG_SMP
static inline cpumask_t sched_rt_period_mask(void)
{
return cpu_rq(smp_processor_id())->rd->span;
}
#else #else
static inline cpumask_t sched_rt_period_mask(void)
{
return cpu_online_map;
}
#endif
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) static inline
struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
{ {
if (sysctl_sched_rt_runtime == -1) return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
return RUNTIME_INF; }
#else
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
return def_rt_bandwidth.rt_runtime;
} }
#define for_each_leaf_rt_rq(rt_rq, rq) \ #define for_each_leaf_rt_rq(rt_rq, rq) \
...@@ -173,8 +188,55 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq) ...@@ -173,8 +188,55 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
{ {
return rt_rq->rt_throttled; return rt_rq->rt_throttled;
} }
static inline cpumask_t sched_rt_period_mask(void)
{
return cpu_online_map;
}
static inline
struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
{
return &cpu_rq(cpu)->rt;
}
#endif #endif
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
{
int i, idle = 1;
cpumask_t span;
if (rt_b->rt_runtime == RUNTIME_INF)
return 1;
span = sched_rt_period_mask();
for_each_cpu_mask(i, span) {
int enqueue = 0;
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
struct rq *rq = rq_of_rt_rq(rt_rq);
spin_lock(&rq->lock);
if (rt_rq->rt_time) {
u64 runtime = rt_b->rt_runtime;
rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
rt_rq->rt_throttled = 0;
enqueue = 1;
}
if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
}
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
spin_unlock(&rq->lock);
}
return idle;
}
static inline int rt_se_prio(struct sched_rt_entity *rt_se) static inline int rt_se_prio(struct sched_rt_entity *rt_se)
{ {
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
...@@ -198,11 +260,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) ...@@ -198,11 +260,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
return rt_rq_throttled(rt_rq); return rt_rq_throttled(rt_rq);
if (rt_rq->rt_time > runtime) { if (rt_rq->rt_time > runtime) {
struct rq *rq = rq_of_rt_rq(rt_rq);
rq->rt_throttled = 1;
rt_rq->rt_throttled = 1; rt_rq->rt_throttled = 1;
if (rt_rq_throttled(rt_rq)) { if (rt_rq_throttled(rt_rq)) {
sched_rt_rq_dequeue(rt_rq); sched_rt_rq_dequeue(rt_rq);
return 1; return 1;
...@@ -212,29 +270,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) ...@@ -212,29 +270,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
return 0; return 0;
} }
static void update_sched_rt_period(struct rq *rq)
{
struct rt_rq *rt_rq;
u64 period;
while (rq->clock > rq->rt_period_expire) {
period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
rq->rt_period_expire += period;
for_each_leaf_rt_rq(rt_rq, rq) {
u64 runtime = sched_rt_runtime(rt_rq);
rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
rt_rq->rt_throttled = 0;
sched_rt_rq_enqueue(rt_rq);
}
}
rq->rt_throttled = 0;
}
}
/* /*
* Update the current task's runtime statistics. Skip current tasks that * Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class. * are not in our scheduling class.
...@@ -284,6 +319,11 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) ...@@ -284,6 +319,11 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
if (rt_se_boosted(rt_se)) if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted++; rt_rq->rt_nr_boosted++;
if (rt_rq->tg)
start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
#else
start_rt_bandwidth(&def_rt_bandwidth);
#endif #endif
} }
......
...@@ -307,7 +307,7 @@ static struct ctl_table kern_table[] = { ...@@ -307,7 +307,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_sched_rt_period, .data = &sysctl_sched_rt_period,
.maxlen = sizeof(unsigned int), .maxlen = sizeof(unsigned int),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &sched_rt_handler,
}, },
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
...@@ -315,7 +315,7 @@ static struct ctl_table kern_table[] = { ...@@ -315,7 +315,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_sched_rt_runtime, .data = &sysctl_sched_rt_runtime,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &sched_rt_handler,
}, },
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
......
...@@ -191,7 +191,6 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) ...@@ -191,7 +191,6 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
void tick_nohz_stop_sched_tick(void) void tick_nohz_stop_sched_tick(void)
{ {
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
unsigned long rt_jiffies;
struct tick_sched *ts; struct tick_sched *ts;
ktime_t last_update, expires, now; ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
...@@ -243,10 +242,6 @@ void tick_nohz_stop_sched_tick(void) ...@@ -243,10 +242,6 @@ void tick_nohz_stop_sched_tick(void)
next_jiffies = get_next_timer_interrupt(last_jiffies); next_jiffies = get_next_timer_interrupt(last_jiffies);
delta_jiffies = next_jiffies - last_jiffies; delta_jiffies = next_jiffies - last_jiffies;
rt_jiffies = rt_needs_cpu(cpu);
if (rt_jiffies && rt_jiffies < delta_jiffies)
delta_jiffies = rt_jiffies;
if (rcu_needs_cpu(cpu)) if (rcu_needs_cpu(cpu))
delta_jiffies = 1; delta_jiffies = 1;
/* /*
......
...@@ -193,6 +193,33 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj, ...@@ -193,6 +193,33 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
static struct kobj_attribute cpu_rt_runtime_attr = static struct kobj_attribute cpu_rt_runtime_attr =
__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
static ssize_t cpu_rt_period_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg));
}
static ssize_t cpu_rt_period_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t size)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
unsigned long rt_period;
int rc;
sscanf(buf, "%lu", &rt_period);
rc = sched_group_set_rt_period(up->tg, rt_period);
return (rc ? rc : size);
}
static struct kobj_attribute cpu_rt_period_attr =
__ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store);
#endif #endif
/* default attributes per uid directory */ /* default attributes per uid directory */
...@@ -202,6 +229,7 @@ static struct attribute *uids_attributes[] = { ...@@ -202,6 +229,7 @@ static struct attribute *uids_attributes[] = {
#endif #endif
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
&cpu_rt_runtime_attr.attr, &cpu_rt_runtime_attr.attr,
&cpu_rt_period_attr.attr,
#endif #endif
NULL NULL
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment