Commit 0fdcbb46 authored by Lai Jiangshan's avatar Lai Jiangshan Committed by James Toy

_cpu_down() changes the current task's affinity and then recovers it at

the end.

It has two problems:

1) The recovery of the current tasks's cpus_allowed will fail under
   some conditions.

   # grep Cpus_allowed_list /proc/$$/status
   Cpus_allowed_list:      0-3

   # taskset -pc 2 $$
   pid 29075's current affinity list: 0-3
   pid 29075's new affinity list: 2

   # grep Cpus_allowed_list /proc/$$/status
   Cpus_allowed_list:      2

   # echo 0 > /sys/devices/system/cpu/cpu2/online

   # grep Cpus_allowed_list /proc/$$/status
   Cpus_allowed_list:      0

   Here, the Cpus_allowed_list was originally "2" and has become
   "0-1,3" after cpu #2 is offlined.

   This "Cpus_allowed_list:      0" is incorrect.

2) If the current task is a userspace task, the user may change its
   cpu-affinity during the CPU hot-unplugging.  This change can be
   overwritten when _cpu_down() changes the current task's affinity.


Fix all this by not changing the current tasks's affinity.  Instead we
create a kernel thread to do the work.
Signed-off-by: default avatarLai Jiangshan <laijs@cn.fujitsu.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent dfdcb38a
...@@ -162,15 +162,17 @@ static inline void check_for_tasks(int cpu) ...@@ -162,15 +162,17 @@ static inline void check_for_tasks(int cpu)
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
} }
struct take_cpu_down_param { struct cpu_down_param {
unsigned long mod; unsigned long mod;
void *hcpu; unsigned int cpu;
int ret;
struct completion done;
}; };
/* Take this CPU down. */ /* Take this CPU down. */
static int __ref take_cpu_down(void *_param) static int __ref take_cpu_down(void *_param)
{ {
struct take_cpu_down_param *param = _param; struct cpu_down_param *param = _param;
int err; int err;
/* Ensure this CPU doesn't handle any more interrupts. */ /* Ensure this CPU doesn't handle any more interrupts. */
...@@ -179,7 +181,7 @@ static int __ref take_cpu_down(void *_param) ...@@ -179,7 +181,7 @@ static int __ref take_cpu_down(void *_param)
return err; return err;
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
param->hcpu); (void *)(long)param->cpu);
/* Force idle task to run as soon as we yield: it should /* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */ immediately notice cpu is offline and die quickly. */
...@@ -187,26 +189,13 @@ static int __ref take_cpu_down(void *_param) ...@@ -187,26 +189,13 @@ static int __ref take_cpu_down(void *_param)
return 0; return 0;
} }
/* Requires cpu_add_remove_lock to be held */ static int __ref _cpu_down_thread(void *_param)
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{ {
struct cpu_down_param *param = _param;
int err, nr_calls = 0; int err, nr_calls = 0;
cpumask_var_t old_allowed; unsigned long mod = param->mod;
unsigned int cpu = param->cpu;
void *hcpu = (void *)(long)cpu; void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
.mod = mod,
.hcpu = hcpu,
};
if (num_online_cpus() == 1)
return -EBUSY;
if (!cpu_online(cpu))
return -EINVAL;
if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
return -ENOMEM;
cpu_hotplug_begin(); cpu_hotplug_begin();
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
...@@ -222,18 +211,16 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -222,18 +211,16 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
} }
/* Ensure that we are not runnable on dying cpu */ /* Ensure that we are not runnable on dying cpu */
cpumask_copy(old_allowed, &current->cpus_allowed); set_cpus_allowed_ptr(current, cpu_active_mask);
set_cpus_allowed_ptr(current,
cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); err = __stop_machine(take_cpu_down, param, cpumask_of(cpu));
if (err) { if (err) {
/* CPU didn't die: tell everyone. Can't complain. */ /* CPU didn't die: tell everyone. Can't complain. */
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
hcpu) == NOTIFY_BAD) hcpu) == NOTIFY_BAD)
BUG(); BUG();
goto out_allowed; goto out_release;
} }
BUG_ON(cpu_online(cpu)); BUG_ON(cpu_online(cpu));
...@@ -251,8 +238,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -251,8 +238,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
check_for_tasks(cpu); check_for_tasks(cpu);
out_allowed:
set_cpus_allowed_ptr(current, old_allowed);
out_release: out_release:
cpu_hotplug_done(); cpu_hotplug_done();
if (!err) { if (!err) {
...@@ -260,8 +245,35 @@ out_release: ...@@ -260,8 +245,35 @@ out_release:
hcpu) == NOTIFY_BAD) hcpu) == NOTIFY_BAD)
BUG(); BUG();
} }
free_cpumask_var(old_allowed); param->ret = err;
return err; complete(&param->done);
return 0;
}
/* Requires cpu_add_remove_lock to be held */
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
struct task_struct *k;
struct cpu_down_param param = {
.mod = tasks_frozen ? CPU_TASKS_FROZEN : 0,
.cpu = cpu,
.ret = 0,
};
if (num_online_cpus() == 1)
return -EBUSY;
if (!cpu_online(cpu))
return -EINVAL;
init_completion(&param.done);
k = kthread_run(_cpu_down_thread, &param, "kcpu_down");
if (IS_ERR(k))
return PTR_ERR(k);
wait_for_completion(&param.done);
return param.ret;
} }
int __ref cpu_down(unsigned int cpu) int __ref cpu_down(unsigned int cpu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment