Commit e9888fb9 authored by Ingo Molnar's avatar Ingo Molnar Committed by Thomas Gleixner

rt: core implementation

Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 94b3cbf2
...@@ -77,8 +77,8 @@ ...@@ -77,8 +77,8 @@
* Are we doing bottom half or hardware interrupt processing? * Are we doing bottom half or hardware interrupt processing?
* Are we in a softirq context? Interrupt context? * Are we in a softirq context? Interrupt context?
*/ */
#define in_irq() (hardirq_count()) #define in_irq() (hardirq_count() || (current->flags & PF_HARDIRQ))
#define in_softirq() (softirq_count()) #define in_softirq() (softirq_count() || (current->flags & PF_SOFTIRQ))
#define in_interrupt() (irq_count()) #define in_interrupt() (irq_count())
/* /*
......
...@@ -124,7 +124,7 @@ extern int _cond_resched(void); ...@@ -124,7 +124,7 @@ extern int _cond_resched(void);
# define might_resched() do { } while (0) # define might_resched() do { } while (0)
#endif #endif
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP #if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT)
void __might_sleep(char *file, int line); void __might_sleep(char *file, int line);
/** /**
* might_sleep - annotation for functions that can sleep * might_sleep - annotation for functions that can sleep
...@@ -284,6 +284,12 @@ extern void printk_tick(void); ...@@ -284,6 +284,12 @@ extern void printk_tick(void);
extern void asmlinkage __attribute__((format(printf, 1, 2))) extern void asmlinkage __attribute__((format(printf, 1, 2)))
early_printk(const char *fmt, ...); early_printk(const char *fmt, ...);
#ifdef CONFIG_PREEMPT_RT
extern void zap_rt_locks(void);
#else
# define zap_rt_locks() do { } while (0)
#endif
unsigned long int_sqrt(unsigned long); unsigned long int_sqrt(unsigned long);
static inline void console_silent(void) static inline void console_silent(void)
...@@ -313,6 +319,7 @@ extern int root_mountflags; ...@@ -313,6 +319,7 @@ extern int root_mountflags;
/* Values used for system_state */ /* Values used for system_state */
extern enum system_states { extern enum system_states {
SYSTEM_BOOTING, SYSTEM_BOOTING,
SYSTEM_BOOTING_SCHEDULER_OK,
SYSTEM_RUNNING, SYSTEM_RUNNING,
SYSTEM_HALT, SYSTEM_HALT,
SYSTEM_POWER_OFF, SYSTEM_POWER_OFF,
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define SCHED_PROFILING 2 #define SCHED_PROFILING 2
#define SLEEP_PROFILING 3 #define SLEEP_PROFILING 3
#define KVM_PROFILING 4 #define KVM_PROFILING 4
#define PREEMPT_PROFILING 5
struct proc_dir_entry; struct proc_dir_entry;
struct pt_regs; struct pt_regs;
...@@ -36,6 +37,8 @@ enum profile_type { ...@@ -36,6 +37,8 @@ enum profile_type {
PROFILE_MUNMAP PROFILE_MUNMAP
}; };
extern int prof_pid;
#ifdef CONFIG_PROFILING #ifdef CONFIG_PROFILING
extern int prof_on __read_mostly; extern int prof_on __read_mostly;
......
...@@ -169,7 +169,18 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root, ...@@ -169,7 +169,18 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
unsigned long index, unsigned long max_scan); unsigned long index, unsigned long max_scan);
unsigned long radix_tree_prev_hole(struct radix_tree_root *root, unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
unsigned long index, unsigned long max_scan); unsigned long index, unsigned long max_scan);
/*
* On a mutex based kernel we can freely schedule within the radix code:
*/
#ifdef CONFIG_PREEMPT_RT
static inline int radix_tree_preload(gfp_t gfp_mask)
{
return 0;
}
#else
int radix_tree_preload(gfp_t gfp_mask); int radix_tree_preload(gfp_t gfp_mask);
#endif
void radix_tree_init(void); void radix_tree_init(void);
void *radix_tree_tag_set(struct radix_tree_root *root, void *radix_tree_tag_set(struct radix_tree_root *root,
unsigned long index, unsigned int tag); unsigned long index, unsigned int tag);
...@@ -189,7 +200,9 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); ...@@ -189,7 +200,9 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
static inline void radix_tree_preload_end(void) static inline void radix_tree_preload_end(void)
{ {
#ifndef CONFIG_PREEMPT_RT
preempt_enable(); preempt_enable();
#endif
} }
#endif /* _LINUX_RADIX_TREE_H */ #endif /* _LINUX_RADIX_TREE_H */
...@@ -50,6 +50,16 @@ extern void smp_send_stop(void); ...@@ -50,6 +50,16 @@ extern void smp_send_stop(void);
*/ */
extern void smp_send_reschedule(int cpu); extern void smp_send_reschedule(int cpu);
/*
* trigger a reschedule on all other CPUs:
*/
extern void smp_send_reschedule_allbutself(void);
/*
* trigger a reschedule on all other CPUs:
*/
extern void smp_send_reschedule_allbutself(void);
/* /*
* Prepare machine for booting other CPUs. * Prepare machine for booting other CPUs.
...@@ -142,6 +152,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) ...@@ -142,6 +152,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
0; \ 0; \
}) })
static inline void smp_send_reschedule(int cpu) { } static inline void smp_send_reschedule(int cpu) { }
static inline void smp_send_reschedule_allbutself(void) { }
#define num_booting_cpus() 1 #define num_booting_cpus() 1
#define smp_prepare_boot_cpu() do {} while (0) #define smp_prepare_boot_cpu() do {} while (0)
#define smp_call_function_mask(mask, func, info, wait) \ #define smp_call_function_mask(mask, func, info, wait) \
......
...@@ -45,7 +45,7 @@ static inline void cycle_kernel_lock(void) ...@@ -45,7 +45,7 @@ static inline void cycle_kernel_lock(void)
#define unlock_kernel() do { } while(0) #define unlock_kernel() do { } while(0)
#define release_kernel_lock(task) do { } while(0) #define release_kernel_lock(task) do { } while(0)
#define cycle_kernel_lock() do { } while(0) #define cycle_kernel_lock() do { } while(0)
#define reacquire_kernel_lock(task) 0 #define reacquire_kernel_lock(task) do { } while(0)
#define kernel_locked() 1 #define kernel_locked() 1
#endif /* CONFIG_LOCK_KERNEL */ #endif /* CONFIG_LOCK_KERNEL */
......
...@@ -195,6 +195,9 @@ __create_workqueue_key(const char *name, int singlethread, ...@@ -195,6 +195,9 @@ __create_workqueue_key(const char *name, int singlethread,
#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0) #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0) #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
extern void set_workqueue_prio(struct workqueue_struct *wq, int policy,
int rt_priority, int nice);
extern void destroy_workqueue(struct workqueue_struct *wq); extern void destroy_workqueue(struct workqueue_struct *wq);
extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
......
choice choice
prompt "Preemption Model" prompt "Preemption Mode"
default PREEMPT_NONE default PREEMPT_RT
config PREEMPT_NONE config PREEMPT_NONE
bool "No Forced Preemption (Server)" bool "No Forced Preemption (Server)"
help help
This is the traditional Linux preemption model, geared towards This is the traditional Linux preemption model geared towards
throughput. It will still provide good latencies most of the throughput. It will still provide good latencies most of the
time, but there are no guarantees and occasional longer delays time but there are no guarantees and occasional long delays
are possible. are possible.
Select this option if you are building a kernel for a server or Select this option if you are building a kernel for a server or
...@@ -21,7 +20,7 @@ config PREEMPT_VOLUNTARY ...@@ -21,7 +20,7 @@ config PREEMPT_VOLUNTARY
help help
This option reduces the latency of the kernel by adding more This option reduces the latency of the kernel by adding more
"explicit preemption points" to the kernel code. These new "explicit preemption points" to the kernel code. These new
preemption points have been selected to reduce the maximum preemption points have been selected to minimize the maximum
latency of rescheduling, providing faster application reactions, latency of rescheduling, providing faster application reactions,
at the cost of slightly lower throughput. at the cost of slightly lower throughput.
...@@ -33,25 +32,60 @@ config PREEMPT_VOLUNTARY ...@@ -33,25 +32,60 @@ config PREEMPT_VOLUNTARY
Select this if you are building a kernel for a desktop system. Select this if you are building a kernel for a desktop system.
config PREEMPT config PREEMPT_DESKTOP
bool "Preemptible Kernel (Low-Latency Desktop)" bool "Preemptible Kernel (Low-Latency Desktop)"
help help
This option reduces the latency of the kernel by making This option reduces the latency of the kernel by making
all kernel code (that is not executing in a critical section) all kernel code that is not executing in a critical section
preemptible. This allows reaction to interactive events by preemptible. This allows reaction to interactive events by
permitting a low priority process to be preempted involuntarily permitting a low priority process to be preempted involuntarily
even if it is in kernel mode executing a system call and would even if it is in kernel mode executing a system call and would
otherwise not be about to reach a natural preemption point. otherwise not about to reach a preemption point. This allows
This allows applications to run more 'smoothly' even when the applications to run more 'smoothly' even when the system is
system is under load, at the cost of slightly lower throughput under load, at the cost of slighly lower throughput and a
and a slight runtime overhead to kernel code. slight runtime overhead to kernel code.
(According to profiles, when this mode is selected then even
during kernel-intense workloads the system is in an immediately
preemptible state more than 50% of the time.)
Select this if you are building a kernel for a desktop or Select this if you are building a kernel for a desktop or
embedded system with latency requirements in the milliseconds embedded system with latency requirements in the milliseconds
range. range.
config PREEMPT_RT
bool "Complete Preemption (Real-Time)"
select PREEMPT_SOFTIRQS
select PREEMPT_HARDIRQS
select PREEMPT_RCU
select RT_MUTEXES
help
This option further reduces the scheduling latency of the
kernel by replacing almost every spinlock used by the kernel
with preemptible mutexes and thus making all but the most
critical kernel code involuntarily preemptible. The remaining
handful of lowlevel non-preemptible codepaths are short and
have a deterministic latency of a couple of tens of
microseconds (depending on the hardware). This also allows
applications to run more 'smoothly' even when the system is
under load, at the cost of lower throughput and runtime
overhead to kernel code.
(According to profiles, when this mode is selected then even
during kernel-intense workloads the system is in an immediately
preemptible state more than 95% of the time.)
Select this if you are building a kernel for a desktop,
embedded or real-time system with guaranteed latency
requirements of 100 usecs or lower.
endchoice endchoice
config PREEMPT
bool
default y
depends on PREEMPT_DESKTOP || PREEMPT_RT
config PREEMPT_SOFTIRQS config PREEMPT_SOFTIRQS
bool "Thread Softirqs" bool "Thread Softirqs"
default n default n
...@@ -86,4 +120,3 @@ config PREEMPT_HARDIRQS ...@@ -86,4 +120,3 @@ config PREEMPT_HARDIRQS
runtime flags. runtime flags.
Say N if you are unsure. Say N if you are unsure.
...@@ -67,7 +67,9 @@ static void __unhash_process(struct task_struct *p) ...@@ -67,7 +67,9 @@ static void __unhash_process(struct task_struct *p)
detach_pid(p, PIDTYPE_SID); detach_pid(p, PIDTYPE_SID);
list_del_rcu(&p->tasks); list_del_rcu(&p->tasks);
preempt_disable();
__get_cpu_var(process_counts)--; __get_cpu_var(process_counts)--;
preempt_enable();
} }
list_del_rcu(&p->thread_group); list_del_rcu(&p->thread_group);
list_del_init(&p->sibling); list_del_init(&p->sibling);
...@@ -685,9 +687,11 @@ static void exit_mm(struct task_struct * tsk) ...@@ -685,9 +687,11 @@ static void exit_mm(struct task_struct * tsk)
task_lock(tsk); task_lock(tsk);
tsk->mm = NULL; tsk->mm = NULL;
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
preempt_disable(); // FIXME
enter_lazy_tlb(mm, current); enter_lazy_tlb(mm, current);
/* We don't want this task to be frozen prematurely */ /* We don't want this task to be frozen prematurely */
clear_freeze_flag(tsk); clear_freeze_flag(tsk);
preempt_enable();
task_unlock(tsk); task_unlock(tsk);
mm_update_next_owner(mm); mm_update_next_owner(mm);
mmput(mm); mmput(mm);
...@@ -1009,14 +1013,17 @@ NORET_TYPE void do_exit(long code) ...@@ -1009,14 +1013,17 @@ NORET_TYPE void do_exit(long code)
if (tsk->splice_pipe) if (tsk->splice_pipe)
__free_pipe_info(tsk->splice_pipe); __free_pipe_info(tsk->splice_pipe);
preempt_disable(); again:
local_irq_disable();
/* causes final put_task_struct in finish_task_switch(). */ /* causes final put_task_struct in finish_task_switch(). */
tsk->state = TASK_DEAD; tsk->state = TASK_DEAD;
schedule(); __schedule();
BUG(); printk(KERN_ERR "BUG: dead task %s:%d back from the grave!\n",
/* Avoid "noreturn function does return". */ current->comm, current->pid);
for (;;) printk(KERN_ERR ".... flags: %08x, count: %d, state: %08lx\n",
cpu_relax(); /* For when BUG is null */ current->flags, atomic_read(&current->usage), current->state);
printk(KERN_ERR ".... trying again ...\n");
goto again;
} }
EXPORT_SYMBOL_GPL(do_exit); EXPORT_SYMBOL_GPL(do_exit);
...@@ -1476,6 +1483,9 @@ static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, ...@@ -1476,6 +1483,9 @@ static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
int ptrace, struct task_struct *p) int ptrace, struct task_struct *p)
{ {
int ret = eligible_child(wo, p); int ret = eligible_child(wo, p);
BUG_ON(!atomic_read(&p->usage));
if (!ret) if (!ret)
return ret; return ret;
......
...@@ -175,6 +175,16 @@ void __put_task_struct(struct task_struct *tsk) ...@@ -175,6 +175,16 @@ void __put_task_struct(struct task_struct *tsk)
free_task(tsk); free_task(tsk);
} }
#ifdef CONFIG_PREEMPT_RT
void __put_task_struct_cb(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
__put_task_struct(tsk);
}
#endif
/* /*
* macro override instead of weak attribute alias, to workaround * macro override instead of weak attribute alias, to workaround
* gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
...@@ -1235,11 +1245,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1235,11 +1245,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* to ensure it is on a valid CPU (and if not, just force it back to * to ensure it is on a valid CPU (and if not, just force it back to
* parent's CPU). This avoids alot of nasty races. * parent's CPU). This avoids alot of nasty races.
*/ */
preempt_disable();
p->cpus_allowed = current->cpus_allowed; p->cpus_allowed = current->cpus_allowed;
p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
!cpu_online(task_cpu(p)))) !cpu_online(task_cpu(p))))
set_task_cpu(p, smp_processor_id()); set_task_cpu(p, smp_processor_id());
preempt_enable();
/* CLONE_PARENT re-uses the old parent */ /* CLONE_PARENT re-uses the old parent */
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
......
...@@ -71,7 +71,7 @@ static int notifier_chain_unregister(struct notifier_block **nl, ...@@ -71,7 +71,7 @@ static int notifier_chain_unregister(struct notifier_block **nl,
* @returns: notifier_call_chain returns the value returned by the * @returns: notifier_call_chain returns the value returned by the
* last notifier function called. * last notifier function called.
*/ */
static int __kprobes notifier_call_chain(struct notifier_block **nl, static int __kprobes notrace notifier_call_chain(struct notifier_block **nl,
unsigned long val, void *v, unsigned long val, void *v,
int nr_to_call, int *nr_calls) int nr_to_call, int *nr_calls)
{ {
...@@ -217,7 +217,7 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, ...@@ -217,7 +217,7 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
* not yet working and interrupts must remain disabled. At * not yet working and interrupts must remain disabled. At
* such times we must not call down_write(). * such times we must not call down_write().
*/ */
if (unlikely(system_state == SYSTEM_BOOTING)) if (unlikely(system_state < SYSTEM_RUNNING))
return notifier_chain_register(&nh->head, n); return notifier_chain_register(&nh->head, n);
down_write(&nh->rwsem); down_write(&nh->rwsem);
......
...@@ -916,8 +916,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, ...@@ -916,8 +916,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
trace_sched_signal_send(sig, t); trace_sched_signal_send(sig, t);
#ifdef CONFIG_SMP
assert_spin_locked(&t->sighand->siglock); assert_spin_locked(&t->sighand->siglock);
#endif
if (!prepare_signal(sig, t, from_ancestor_ns)) if (!prepare_signal(sig, t, from_ancestor_ns))
return 0; return 0;
...@@ -1692,15 +1693,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) ...@@ -1692,15 +1693,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
if (may_ptrace_stop()) { if (may_ptrace_stop()) {
do_notify_parent_cldstop(current, CLD_TRAPPED); do_notify_parent_cldstop(current, CLD_TRAPPED);
/*
* Don't want to allow preemption here, because
* sys_ptrace() needs this task to be inactive.
*
* XXX: implement read_unlock_no_resched().
*/
preempt_disable();
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
preempt_enable_and_schedule(); schedule();
} else { } else {
/* /*
* By the time we got the lock, our tracer went away. * By the time we got the lock, our tracer went away.
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/delay.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/percpu.h> #include <linux/percpu.h>
...@@ -106,6 +107,8 @@ static void trigger_softirqs(void) ...@@ -106,6 +107,8 @@ static void trigger_softirqs(void)
} }
} }
#ifndef CONFIG_PREEMPT_RT
/* /*
* This one is for softirq.c-internal use, * This one is for softirq.c-internal use,
* where hardirqs are disabled legitimately: * where hardirqs are disabled legitimately:
...@@ -207,6 +210,8 @@ void local_bh_enable_ip(unsigned long ip) ...@@ -207,6 +210,8 @@ void local_bh_enable_ip(unsigned long ip)
} }
EXPORT_SYMBOL(local_bh_enable_ip); EXPORT_SYMBOL(local_bh_enable_ip);
#endif
/* /*
* We restart softirq processing MAX_SOFTIRQ_RESTART times, * We restart softirq processing MAX_SOFTIRQ_RESTART times,
* and we fall back to softirqd after that. * and we fall back to softirqd after that.
...@@ -606,7 +611,7 @@ void tasklet_kill(struct tasklet_struct *t) ...@@ -606,7 +611,7 @@ void tasklet_kill(struct tasklet_struct *t)
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
do { do {
yield(); msleep(1);
} while (test_bit(TASKLET_STATE_SCHED, &t->state)); } while (test_bit(TASKLET_STATE_SCHED, &t->state));
} }
tasklet_unlock_wait(t); tasklet_unlock_wait(t);
...@@ -1064,6 +1069,11 @@ int softirq_preemption = 1; ...@@ -1064,6 +1069,11 @@ int softirq_preemption = 1;
EXPORT_SYMBOL(softirq_preemption); EXPORT_SYMBOL(softirq_preemption);
/*
* Real-Time Preemption depends on softirq threading:
*/
#ifndef CONFIG_PREEMPT_RT
static int __init softirq_preempt_setup (char *str) static int __init softirq_preempt_setup (char *str)
{ {
if (!strncmp(str, "off", 3)) if (!strncmp(str, "off", 3))
...@@ -1077,7 +1087,7 @@ static int __init softirq_preempt_setup (char *str) ...@@ -1077,7 +1087,7 @@ static int __init softirq_preempt_setup (char *str)
} }
__setup("softirq-preempt=", softirq_preempt_setup); __setup("softirq-preempt=", softirq_preempt_setup);
#endif
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/syscalls.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/mempolicy.h> #include <linux/mempolicy.h>
...@@ -36,6 +37,8 @@ ...@@ -36,6 +37,8 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h> #include <trace/events/workqueue.h>
#include <asm/uaccess.h>
/* /*
* The per-CPU workqueue (if single thread, we always use the first * The per-CPU workqueue (if single thread, we always use the first
* possible cpu). * possible cpu).
...@@ -159,13 +162,14 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, ...@@ -159,13 +162,14 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
* *
* We queue the work to the CPU on which it was submitted, but if the CPU dies * We queue the work to the CPU on which it was submitted, but if the CPU dies
* it can be processed by another CPU. * it can be processed by another CPU.
*
* Especially no such guarantee on PREEMPT_RT.
*/ */
int queue_work(struct workqueue_struct *wq, struct work_struct *work) int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{ {
int ret; int ret = 0, cpu = raw_smp_processor_id();
ret = queue_work_on(get_cpu(), wq, work); ret = queue_work_on(cpu, wq, work);
put_cpu();
return ret; return ret;
} }
...@@ -883,6 +887,49 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) ...@@ -883,6 +887,49 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
cwq->thread = NULL; cwq->thread = NULL;
} }
void set_workqueue_thread_prio(struct workqueue_struct *wq, int cpu,
int policy, int rt_priority, int nice)
{
struct sched_param param = { .sched_priority = rt_priority };
struct cpu_workqueue_struct *cwq;
mm_segment_t oldfs = get_fs();
struct task_struct *p;
unsigned long flags;
int ret;
cwq = per_cpu_ptr(wq->cpu_wq, cpu);
spin_lock_irqsave(&cwq->lock, flags);
p = cwq->thread;
spin_unlock_irqrestore(&cwq->lock, flags);
set_user_nice(p, nice);
set_fs(KERNEL_DS);
ret = sys_sched_setscheduler(p->pid, policy, &param);
set_fs(oldfs);
WARN_ON(ret);
}
void set_workqueue_prio(struct workqueue_struct *wq, int policy,
int rt_priority, int nice)
{
int cpu;
/* We don't need the distraction of CPUs appearing and vanishing. */
get_online_cpus();
spin_lock(&workqueue_lock);
if (is_wq_single_threaded(wq))
set_workqueue_thread_prio(wq, 0, policy, rt_priority, nice);
else {
for_each_online_cpu(cpu)
set_workqueue_thread_prio(wq, cpu, policy,
rt_priority, nice);
}
spin_unlock(&workqueue_lock);
put_online_cpus();
}
/** /**
* destroy_workqueue - safely terminate a workqueue * destroy_workqueue - safely terminate a workqueue
* @wq: target workqueue * @wq: target workqueue
...@@ -1015,4 +1062,5 @@ void __init init_workqueues(void) ...@@ -1015,4 +1062,5 @@ void __init init_workqueues(void)
hotcpu_notifier(workqueue_cpu_callback, 0); hotcpu_notifier(workqueue_cpu_callback, 0);
keventd_wq = create_workqueue("events"); keventd_wq = create_workqueue("events");
BUG_ON(!keventd_wq); BUG_ON(!keventd_wq);
set_workqueue_prio(keventd_wq, SCHED_FIFO, 1, -20);
} }
...@@ -397,6 +397,8 @@ config DEBUG_RT_MUTEXES ...@@ -397,6 +397,8 @@ config DEBUG_RT_MUTEXES
help help
This allows rt mutex semantics violations and rt mutex related This allows rt mutex semantics violations and rt mutex related
deadlocks (lockups) to be detected and reported automatically. deadlocks (lockups) to be detected and reported automatically.
When realtime preemption is enabled this includes spinlocks,
rwlocks, mutexes and (rw)semaphores
config DEBUG_PI_LIST config DEBUG_PI_LIST
bool bool
...@@ -420,7 +422,7 @@ config DEBUG_SPINLOCK ...@@ -420,7 +422,7 @@ config DEBUG_SPINLOCK
config DEBUG_MUTEXES config DEBUG_MUTEXES
bool "Mutex debugging: basic checks" bool "Mutex debugging: basic checks"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL && !PREEMPT_RT
help help
This feature allows mutex semantics violations to be detected and This feature allows mutex semantics violations to be detected and
reported. reported.
......
...@@ -34,7 +34,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o ...@@ -34,7 +34,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_PREEMPT_RT) += plist.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
......
...@@ -35,6 +35,8 @@ DEFINE_SEMAPHORE(kernel_sem); ...@@ -35,6 +35,8 @@ DEFINE_SEMAPHORE(kernel_sem);
* about recursion, both due to the down() and due to the enabling of * about recursion, both due to the down() and due to the enabling of
* preemption. schedule() will re-check the preemption flag after * preemption. schedule() will re-check the preemption flag after
* reacquiring the semaphore. * reacquiring the semaphore.
*
* Called with interrupts disabled.
*/ */
int __lockfunc __reacquire_kernel_lock(void) int __lockfunc __reacquire_kernel_lock(void)
{ {
...@@ -67,11 +69,15 @@ void __lockfunc lock_kernel(void) ...@@ -67,11 +69,15 @@ void __lockfunc lock_kernel(void)
struct task_struct *task = current; struct task_struct *task = current;
int depth = task->lock_depth + 1; int depth = task->lock_depth + 1;
if (likely(!depth)) if (likely(!depth)) {
/* /*
* No recursion worries - we set up lock_depth _after_ * No recursion worries - we set up lock_depth _after_
*/ */
down(&kernel_sem); down(&kernel_sem);
#ifdef CONFIG_DEBUG_RT_MUTEXES
current->last_kernel_lock = __builtin_return_address(0);
#endif
}
task->lock_depth = depth; task->lock_depth = depth;
} }
...@@ -82,8 +88,12 @@ void __lockfunc unlock_kernel(void) ...@@ -82,8 +88,12 @@ void __lockfunc unlock_kernel(void)
BUG_ON(task->lock_depth < 0); BUG_ON(task->lock_depth < 0);
if (likely(--task->lock_depth < 0)) if (likely(--task->lock_depth < 0)) {
#ifdef CONFIG_DEBUG_RT_MUTEXES
current->last_kernel_lock = NULL;
#endif
up(&kernel_sem); up(&kernel_sem);
}
} }
EXPORT_SYMBOL(lock_kernel); EXPORT_SYMBOL(lock_kernel);
......
...@@ -158,7 +158,7 @@ static void init_shared_classes(void) ...@@ -158,7 +158,7 @@ static void init_shared_classes(void)
local_bh_disable(); \ local_bh_disable(); \
local_irq_disable(); \ local_irq_disable(); \
lockdep_softirq_enter(); \ lockdep_softirq_enter(); \
WARN_ON(!in_softirq()); /* FIXME: preemptible softirqs. WARN_ON(!in_softirq()); */
#define SOFTIRQ_EXIT() \ #define SOFTIRQ_EXIT() \
lockdep_softirq_exit(); \ lockdep_softirq_exit(); \
...@@ -549,6 +549,11 @@ GENERATE_TESTCASE(init_held_rsem) ...@@ -549,6 +549,11 @@ GENERATE_TESTCASE(init_held_rsem)
#undef E #undef E
/*
* FIXME: turns these into raw-spinlock tests on -rt
*/
#ifndef CONFIG_PREEMPT_RT
/* /*
* locking an irq-safe lock with irqs enabled: * locking an irq-safe lock with irqs enabled:
*/ */
...@@ -890,6 +895,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) ...@@ -890,6 +895,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
#include "locking-selftest-softirq.h" #include "locking-selftest-softirq.h"
// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft) // GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
#endif /* !CONFIG_PREEMPT_RT */
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
# define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) # define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map)
# define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map) # define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map)
...@@ -1179,6 +1186,7 @@ void locking_selftest(void) ...@@ -1179,6 +1186,7 @@ void locking_selftest(void)
/* /*
* irq-context testcases: * irq-context testcases:
*/ */
#ifndef CONFIG_PREEMPT_RT
DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1); DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A); DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B); DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
...@@ -1188,6 +1196,7 @@ void locking_selftest(void) ...@@ -1188,6 +1196,7 @@ void locking_selftest(void)
DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
// DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
#endif
if (unexpected_testcase_failures) { if (unexpected_testcase_failures) {
printk("-----------------------------------------------------------------\n"); printk("-----------------------------------------------------------------\n");
......
...@@ -157,12 +157,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) ...@@ -157,12 +157,14 @@ radix_tree_node_alloc(struct radix_tree_root *root)
* succeed in getting a node here (and never reach * succeed in getting a node here (and never reach
* kmem_cache_alloc) * kmem_cache_alloc)
*/ */
rtp = &get_cpu_var(radix_tree_preloads);
rtp = &__get_cpu_var(radix_tree_preloads); rtp = &__get_cpu_var(radix_tree_preloads);
if (rtp->nr) { if (rtp->nr) {
ret = rtp->nodes[rtp->nr - 1]; ret = rtp->nodes[rtp->nr - 1];
rtp->nodes[rtp->nr - 1] = NULL; rtp->nodes[rtp->nr - 1] = NULL;
rtp->nr--; rtp->nr--;
} }
put_cpu_var(radix_tree_preloads);
} }
if (ret == NULL) if (ret == NULL)
ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
...@@ -195,6 +197,8 @@ radix_tree_node_free(struct radix_tree_node *node) ...@@ -195,6 +197,8 @@ radix_tree_node_free(struct radix_tree_node *node)
call_rcu(&node->rcu_head, radix_tree_node_rcu_free); call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
} }
#ifndef CONFIG_PREEMPT_RT
/* /*
* Load up this CPU's radix_tree_node buffer with sufficient objects to * Load up this CPU's radix_tree_node buffer with sufficient objects to
* ensure that the addition of a single element in the tree cannot fail. On * ensure that the addition of a single element in the tree cannot fail. On
...@@ -227,6 +231,8 @@ out: ...@@ -227,6 +231,8 @@ out:
} }
EXPORT_SYMBOL(radix_tree_preload); EXPORT_SYMBOL(radix_tree_preload);
#endif
/* /*
* Return the maximum key which can be store into a * Return the maximum key which can be store into a
* radix tree with height HEIGHT. * radix tree with height HEIGHT.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment