Commit 5b6e135f authored by Ingo Molnar's avatar Ingo Molnar Committed by Thomas Gleixner

sched: mmdrop needs to be delayed on -rt

Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 0a930ce9
...@@ -247,6 +247,9 @@ struct mm_struct { ...@@ -247,6 +247,9 @@ struct mm_struct {
/* Architecture-specific MM context */ /* Architecture-specific MM context */
mm_context_t context; mm_context_t context;
/* realtime bits */
struct list_head delayed_drop;
/* Swap token stuff */ /* Swap token stuff */
/* /*
* Last value of global fault stamp as seen by this process. * Last value of global fault stamp as seen by this process.
......
...@@ -2111,12 +2111,20 @@ extern struct mm_struct * mm_alloc(void); ...@@ -2111,12 +2111,20 @@ extern struct mm_struct * mm_alloc(void);
/* mmdrop drops the mm and the page tables */ /* mmdrop drops the mm and the page tables */
extern void __mmdrop(struct mm_struct *); extern void __mmdrop(struct mm_struct *);
extern void __mmdrop_delayed(struct mm_struct *);
static inline void mmdrop(struct mm_struct * mm) static inline void mmdrop(struct mm_struct * mm)
{ {
if (unlikely(atomic_dec_and_test(&mm->mm_count))) if (unlikely(atomic_dec_and_test(&mm->mm_count)))
__mmdrop(mm); __mmdrop(mm);
} }
static inline void mmdrop_delayed(struct mm_struct * mm)
{
if (atomic_dec_and_test(&mm->mm_count))
__mmdrop_delayed(mm);
}
/* mmput gets rid of the mappings and all user-space */ /* mmput gets rid of the mappings and all user-space */
extern void mmput(struct mm_struct *); extern void mmput(struct mm_struct *);
/* Grab a reference to a task's mm, if it is not already going away */ /* Grab a reference to a task's mm, if it is not already going away */
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/tracehook.h> #include <linux/tracehook.h>
#include <linux/interrupt.h>
#include <linux/futex.h> #include <linux/futex.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/task_io_accounting_ops.h> #include <linux/task_io_accounting_ops.h>
...@@ -48,6 +49,8 @@ ...@@ -48,6 +49,8 @@
#include <linux/memcontrol.h> #include <linux/memcontrol.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/profile.h> #include <linux/profile.h>
#include <linux/kthread.h>
#include <linux/notifier.h>
#include <linux/rmap.h> #include <linux/rmap.h>
#include <linux/acct.h> #include <linux/acct.h>
#include <linux/tsacct_kern.h> #include <linux/tsacct_kern.h>
...@@ -88,6 +91,14 @@ DEFINE_RWLOCK(tasklist_lock); /* outer */ ...@@ -88,6 +91,14 @@ DEFINE_RWLOCK(tasklist_lock); /* outer */
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
#endif #endif
/*
* Delayed mmdrop. In the PREEMPT_RT case we
* dont want to do this from the scheduling
* context.
*/
static DEFINE_PER_CPU(struct task_struct *, desched_task);
static DEFINE_PER_CPU(struct list_head, delayed_drop_list);
int nr_processes(void) int nr_processes(void)
{ {
int cpu; int cpu;
...@@ -174,6 +185,8 @@ void __put_task_struct(struct task_struct *tsk) ...@@ -174,6 +185,8 @@ void __put_task_struct(struct task_struct *tsk)
void __init fork_init(unsigned long mempages) void __init fork_init(unsigned long mempages)
{ {
int i;
#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN #ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
...@@ -204,6 +217,9 @@ void __init fork_init(unsigned long mempages) ...@@ -204,6 +217,9 @@ void __init fork_init(unsigned long mempages)
init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_SIGPENDING] =
init_task.signal->rlim[RLIMIT_NPROC]; init_task.signal->rlim[RLIMIT_NPROC];
for (i = 0; i < NR_CPUS; i++)
INIT_LIST_HEAD(&per_cpu(delayed_drop_list, i));
} }
int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
...@@ -285,6 +301,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) ...@@ -285,6 +301,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
mm->locked_vm = 0; mm->locked_vm = 0;
mm->mmap = NULL; mm->mmap = NULL;
mm->mmap_cache = NULL; mm->mmap_cache = NULL;
INIT_LIST_HEAD(&mm->delayed_drop);
mm->free_area_cache = oldmm->mmap_base; mm->free_area_cache = oldmm->mmap_base;
mm->cached_hole_size = ~0UL; mm->cached_hole_size = ~0UL;
mm->map_count = 0; mm->map_count = 0;
...@@ -1270,7 +1287,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1270,7 +1287,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
attach_pid(p, PIDTYPE_SID, task_session(current)); attach_pid(p, PIDTYPE_SID, task_session(current));
list_add_tail_rcu(&p->tasks, &init_task.tasks); list_add_tail_rcu(&p->tasks, &init_task.tasks);
preempt_disable();
__get_cpu_var(process_counts)++; __get_cpu_var(process_counts)++;
preempt_enable();
} }
attach_pid(p, PIDTYPE_PID, pid); attach_pid(p, PIDTYPE_PID, pid);
nr_threads++; nr_threads++;
...@@ -1748,3 +1767,138 @@ int unshare_files(struct files_struct **displaced) ...@@ -1748,3 +1767,138 @@ int unshare_files(struct files_struct **displaced)
task_unlock(task); task_unlock(task);
return 0; return 0;
} }
static int mmdrop_complete(void)
{
struct list_head *head;
int ret = 0;
head = &get_cpu_var(delayed_drop_list);
while (!list_empty(head)) {
struct mm_struct *mm = list_entry(head->next,
struct mm_struct, delayed_drop);
list_del(&mm->delayed_drop);
put_cpu_var(delayed_drop_list);
__mmdrop(mm);
ret = 1;
head = &get_cpu_var(delayed_drop_list);
}
put_cpu_var(delayed_drop_list);
return ret;
}
/*
* We dont want to do complex work from the scheduler, thus
* we delay the work to a per-CPU worker thread:
*/
void __mmdrop_delayed(struct mm_struct *mm)
{
struct task_struct *desched_task;
struct list_head *head;
head = &get_cpu_var(delayed_drop_list);
list_add_tail(&mm->delayed_drop, head);
desched_task = __get_cpu_var(desched_task);
if (desched_task)
wake_up_process(desched_task);
put_cpu_var(delayed_drop_list);
}
static void takeover_delayed_drop(int hotcpu)
{
struct list_head *head = &per_cpu(delayed_drop_list, hotcpu);
while (!list_empty(head)) {
struct mm_struct *mm = list_entry(head->next,
struct mm_struct, delayed_drop);
list_del(&mm->delayed_drop);
__mmdrop_delayed(mm);
}
}
static int desched_thread(void * __bind_cpu)
{
set_user_nice(current, -10);
current->flags |= PF_NOFREEZE | PF_SOFTIRQ;
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
if (mmdrop_complete())
continue;
schedule();
/*
* This must be called from time to time on ia64, and is a
* no-op on other archs. Used to be in cpu_idle(), but with
* the new -rt semantics it can't stay there.
*/
check_pgt_cache();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
}
static int __devinit cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
int hotcpu = (unsigned long)hcpu;
struct task_struct *p;
switch (action) {
case CPU_UP_PREPARE:
BUG_ON(per_cpu(desched_task, hotcpu));
INIT_LIST_HEAD(&per_cpu(delayed_drop_list, hotcpu));
p = kthread_create(desched_thread, hcpu, "desched/%d", hotcpu);
if (IS_ERR(p)) {
printk("desched_thread for %i failed\n", hotcpu);
return NOTIFY_BAD;
}
per_cpu(desched_task, hotcpu) = p;
kthread_bind(p, hotcpu);
break;
case CPU_ONLINE:
wake_up_process(per_cpu(desched_task, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
/* Unbind so it can run. Fall thru. */
kthread_bind(per_cpu(desched_task, hotcpu), smp_processor_id());
case CPU_DEAD:
p = per_cpu(desched_task, hotcpu);
per_cpu(desched_task, hotcpu) = NULL;
kthread_stop(p);
takeover_delayed_drop(hotcpu);
takeover_tasklets(hotcpu);
break;
#endif /* CONFIG_HOTPLUG_CPU */
}
return NOTIFY_OK;
}
static struct notifier_block __devinitdata cpu_nfb = {
.notifier_call = cpu_callback
};
__init int spawn_desched_task(void)
{
void *cpu = (void *)(long)smp_processor_id();
cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb);
return 0;
}
...@@ -2938,8 +2938,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) ...@@ -2938,8 +2938,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
#endif #endif
fire_sched_in_preempt_notifiers(current); fire_sched_in_preempt_notifiers(current);
/*
* Delay the final freeing of the mm or task, so that we dont have
* to do complex work from within the scheduler:
*/
if (mm) if (mm)
mmdrop(mm); mmdrop_delayed(mm);
if (unlikely(prev_state == TASK_DEAD)) { if (unlikely(prev_state == TASK_DEAD)) {
/* /*
* Remove function-return probe instances associated with this * Remove function-return probe instances associated with this
...@@ -7573,7 +7577,11 @@ void idle_task_exit(void) ...@@ -7573,7 +7577,11 @@ void idle_task_exit(void)
if (mm != &init_mm) if (mm != &init_mm)
switch_mm(mm, &init_mm, current); switch_mm(mm, &init_mm, current);
#ifdef CONFIG_PREEMPT_RT
mmdrop_delayed(mm);
#else
mmdrop(mm); mmdrop(mm);
#endif
} }
/* called under rq->lock with disabled interrupts */ /* called under rq->lock with disabled interrupts */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment