Commit ffdb5976 authored by Rusty Russell's avatar Rusty Russell

Simplify stop_machine

stop_machine creates a kthread which creates kernel threads.  We can
create those threads directly and simplify things a little.  Some care
must be taken with CPU hotunplug, which has special needs, but that code
seems more robust than it was in the past.
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Acked-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
parent 5c2aed62
...@@ -17,13 +17,12 @@ ...@@ -17,13 +17,12 @@
* @data: the data ptr for the @fn() * @data: the data ptr for the @fn()
* @cpu: if @cpu == n, run @fn() on cpu n * @cpu: if @cpu == n, run @fn() on cpu n
* if @cpu == NR_CPUS, run @fn() on any cpu * if @cpu == NR_CPUS, run @fn() on any cpu
* if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then * if @cpu == ALL_CPUS, run @fn() on every online CPU.
* concurrently on all the other cpus
* *
* Description: This causes a thread to be scheduled on every other cpu, * Description: This causes a thread to be scheduled on every cpu,
* each of which disables interrupts, and finally interrupts are disabled * each of which disables interrupts. The result is that noone is
* on the current CPU. The result is that noone is holding a spinlock * holding a spinlock or inside any other preempt-disabled region when
* or inside any other preempt-disabled region when @fn() runs. * @fn() runs.
* *
* This can be thought of as a very heavy write lock, equivalent to * This can be thought of as a very heavy write lock, equivalent to
* grabbing every spinlock in the kernel. */ * grabbing every spinlock in the kernel. */
...@@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu); ...@@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
* @data: the data ptr for the @fn * @data: the data ptr for the @fn
* @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS. * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
* *
* Description: This is a special version of the above, which returns the * Description: This is a special version of the above, which assumes cpus
* thread which has run @fn(): kthread_stop will return the return value * won't come or go while it's being called. Used by hotplug cpu.
* of @fn(). Used by hotplug cpu.
*/ */
struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
unsigned int cpu);
#else #else
static inline int stop_machine_run(int (*fn)(void *), void *data, static inline int stop_machine_run(int (*fn)(void *), void *data,
......
...@@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param) ...@@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param)
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{ {
int err, nr_calls = 0; int err, nr_calls = 0;
struct task_struct *p;
cpumask_t old_allowed, tmp; cpumask_t old_allowed, tmp;
void *hcpu = (void *)(long)cpu; void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
...@@ -250,19 +249,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -250,19 +249,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
cpu_clear(cpu, tmp); cpu_clear(cpu, tmp);
set_cpus_allowed_ptr(current, &tmp); set_cpus_allowed_ptr(current, &tmp);
p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
if (IS_ERR(p) || cpu_online(cpu)) { if (err || cpu_online(cpu)) {
/* CPU didn't die: tell everyone. Can't complain. */ /* CPU didn't die: tell everyone. Can't complain. */
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
hcpu) == NOTIFY_BAD) hcpu) == NOTIFY_BAD)
BUG(); BUG();
if (IS_ERR(p)) { goto out_allowed;
err = PTR_ERR(p);
goto out_allowed;
}
goto out_thread;
} }
/* Wait for it to sleep (leaving idle task). */ /* Wait for it to sleep (leaving idle task). */
...@@ -279,8 +274,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -279,8 +274,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
check_for_tasks(cpu); check_for_tasks(cpu);
out_thread:
err = kthread_stop(p);
out_allowed: out_allowed:
set_cpus_allowed_ptr(current, &old_allowed); set_cpus_allowed_ptr(current, &old_allowed);
out_release: out_release:
......
/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. /* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
* GPL v2 and any later version. * GPL v2 and any later version.
*/ */
#include <linux/cpu.h> #include <linux/cpu.h>
...@@ -13,220 +13,177 @@ ...@@ -13,220 +13,177 @@
#include <asm/atomic.h> #include <asm/atomic.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
/* Since we effect priority and affinity (both of which are visible /* This controls the threads on each CPU. */
* to, and settable by outside processes) we do indirection via a
* kthread. */
/* Thread to stop each CPU in user context. */
enum stopmachine_state { enum stopmachine_state {
STOPMACHINE_WAIT, /* Dummy starting state for thread. */
STOPMACHINE_NONE,
/* Awaiting everyone to be scheduled. */
STOPMACHINE_PREPARE, STOPMACHINE_PREPARE,
/* Disable interrupts. */
STOPMACHINE_DISABLE_IRQ, STOPMACHINE_DISABLE_IRQ,
/* Run the function */
STOPMACHINE_RUN, STOPMACHINE_RUN,
/* Exit */
STOPMACHINE_EXIT, STOPMACHINE_EXIT,
}; };
static enum stopmachine_state state;
struct stop_machine_data { struct stop_machine_data {
int (*fn)(void *); int (*fn)(void *);
void *data; void *data;
struct completion done; int fnret;
int run_all; };
} smdata;
static enum stopmachine_state stopmachine_state; /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
static unsigned int stopmachine_num_threads; static unsigned int num_threads;
static atomic_t stopmachine_thread_ack; static atomic_t thread_ack;
static struct completion finished;
static DEFINE_MUTEX(lock);
static int stopmachine(void *cpu) static void set_state(enum stopmachine_state newstate)
{ {
int irqs_disabled = 0; /* Reset ack counter. */
int prepared = 0; atomic_set(&thread_ack, num_threads);
int ran = 0; smp_wmb();
cpumask_of_cpu_ptr(cpumask, (int)(long)cpu); state = newstate;
set_cpus_allowed_ptr(current, cpumask);
/* Ack: we are alive */
smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
atomic_inc(&stopmachine_thread_ack);
/* Simple state machine */
while (stopmachine_state != STOPMACHINE_EXIT) {
if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
&& !irqs_disabled) {
local_irq_disable();
hard_irq_disable();
irqs_disabled = 1;
/* Ack: irqs disabled. */
smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
} else if (stopmachine_state == STOPMACHINE_PREPARE
&& !prepared) {
/* Everyone is in place, hold CPU. */
preempt_disable();
prepared = 1;
smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
smdata.fn(smdata.data);
ran = 1;
smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
}
/* Yield in first stage: migration threads need to
* help our sisters onto their CPUs. */
if (!prepared && !irqs_disabled)
yield();
cpu_relax();
}
/* Ack: we are exiting. */
smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
if (irqs_disabled)
local_irq_enable();
if (prepared)
preempt_enable();
return 0;
} }
/* Change the thread state */ /* Last one to ack a state moves to the next state. */
static void stopmachine_set_state(enum stopmachine_state state) static void ack_state(void)
{ {
atomic_set(&stopmachine_thread_ack, 0); if (atomic_dec_and_test(&thread_ack)) {
smp_wmb(); /* If we're the last one to ack the EXIT, we're finished. */
stopmachine_state = state; if (state == STOPMACHINE_EXIT)
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) complete(&finished);
cpu_relax(); else
set_state(state + 1);
}
} }
static int stop_machine(void) /* This is the actual thread which stops the CPU. It exits by itself rather
* than waiting for kthread_stop(), because it's easier for hotplug CPU. */
static int stop_cpu(struct stop_machine_data *smdata)
{ {
int i, ret = 0; enum stopmachine_state curstate = STOPMACHINE_NONE;
int uninitialized_var(ret);
atomic_set(&stopmachine_thread_ack, 0);
stopmachine_num_threads = 0;
stopmachine_state = STOPMACHINE_WAIT;
for_each_online_cpu(i) { /* Simple state machine */
if (i == raw_smp_processor_id()) do {
continue; /* Chill out and ensure we re-read stopmachine_state. */
ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
if (ret < 0)
break;
stopmachine_num_threads++;
}
/* Wait for them all to come to life. */
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
yield();
cpu_relax(); cpu_relax();
} if (state != curstate) {
curstate = state;
/* If some failed, kill them all. */ switch (curstate) {
if (ret < 0) { case STOPMACHINE_DISABLE_IRQ:
stopmachine_set_state(STOPMACHINE_EXIT); local_irq_disable();
return ret; hard_irq_disable();
} break;
case STOPMACHINE_RUN:
/* Now they are all started, make them hold the CPUs, ready. */ /* |= allows error detection if functions on
preempt_disable(); * multiple CPUs. */
stopmachine_set_state(STOPMACHINE_PREPARE); smdata->fnret |= smdata->fn(smdata->data);
break;
/* Make them disable irqs. */ default:
local_irq_disable(); break;
hard_irq_disable(); }
stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); ack_state();
}
return 0; } while (curstate != STOPMACHINE_EXIT);
}
static void restart_machine(void)
{
stopmachine_set_state(STOPMACHINE_EXIT);
local_irq_enable(); local_irq_enable();
preempt_enable_no_resched(); do_exit(0);
} }
static void run_other_cpus(void) /* Callback for CPUs which aren't supposed to do anything. */
static int chill(void *unused)
{ {
stopmachine_set_state(STOPMACHINE_RUN); return 0;
} }
static int do_stop(void *_smdata) int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
{ {
struct stop_machine_data *smdata = _smdata; int i, err;
int ret; struct stop_machine_data active, idle;
struct task_struct **threads;
active.fn = fn;
active.data = data;
active.fnret = 0;
idle.fn = chill;
idle.data = NULL;
/* If they don't care which cpu fn runs on, just pick one. */
if (cpu == NR_CPUS)
cpu = any_online_cpu(cpu_online_map);
/* This could be too big for stack on large machines. */
threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
if (!threads)
return -ENOMEM;
/* Set up initial state. */
mutex_lock(&lock);
init_completion(&finished);
num_threads = num_online_cpus();
set_state(STOPMACHINE_PREPARE);
ret = stop_machine(); for_each_online_cpu(i) {
if (ret == 0) { struct stop_machine_data *smdata;
ret = smdata->fn(smdata->data); struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
if (smdata->run_all)
run_other_cpus();
restart_machine();
}
/* We're done: you can kthread_stop us now */ if (cpu == ALL_CPUS || i == cpu)
complete(&smdata->done); smdata = &active;
else
smdata = &idle;
threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
i);
if (IS_ERR(threads[i])) {
err = PTR_ERR(threads[i]);
threads[i] = NULL;
goto kill_threads;
}
/* Wait for kthread_stop */ /* Place it onto correct cpu. */
set_current_state(TASK_INTERRUPTIBLE); kthread_bind(threads[i], i);
while (!kthread_should_stop()) {
schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return ret;
}
struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, /* Make it highest prio. */
unsigned int cpu) if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
{ BUG();
static DEFINE_MUTEX(stopmachine_mutex); }
struct stop_machine_data smdata;
struct task_struct *p;
mutex_lock(&stopmachine_mutex); /* We've created all the threads. Wake them all: hold this CPU so one
* doesn't hit this CPU until we're ready. */
cpu = get_cpu();
for_each_online_cpu(i)
wake_up_process(threads[i]);
smdata.fn = fn; /* This will release the thread on our CPU. */
smdata.data = data; put_cpu();
smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0; wait_for_completion(&finished);
init_completion(&smdata.done); mutex_unlock(&lock);
smp_wmb(); /* make sure other cpus see smdata updates */ kfree(threads);
/* If they don't care which CPU fn runs on, bind to any online one. */ return active.fnret;
if (cpu == NR_CPUS || cpu == ALL_CPUS)
cpu = raw_smp_processor_id();
p = kthread_create(do_stop, &smdata, "kstopmachine"); kill_threads:
if (!IS_ERR(p)) { for_each_online_cpu(i)
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; if (threads[i])
kthread_stop(threads[i]);
mutex_unlock(&lock);
/* One high-prio thread per cpu. We'll do this one. */ kfree(threads);
sched_setscheduler_nocheck(p, SCHED_FIFO, &param); return err;
kthread_bind(p, cpu);
wake_up_process(p);
wait_for_completion(&smdata.done);
}
mutex_unlock(&stopmachine_mutex);
return p;
} }
int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
{ {
struct task_struct *p;
int ret; int ret;
/* No CPUs can come up or down during this. */ /* No CPUs can come up or down during this. */
get_online_cpus(); get_online_cpus();
p = __stop_machine_run(fn, data, cpu); ret = __stop_machine_run(fn, data, cpu);
if (!IS_ERR(p))
ret = kthread_stop(p);
else
ret = PTR_ERR(p);
put_online_cpus(); put_online_cpus();
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment