Commit d1b55138 authored by John Hawkes's avatar John Hawkes Committed by Linus Torvalds

[PATCH] cpusets: fix the "dynamic sched domains" bug

For a NUMA system with multiple CPUs per node, declaring a cpu-exclusive
cpuset that includes only some, but not all, of the CPUs in a node will mangle
the sched domain structures.
Signed-off-by: default avatarJohn Hawkes <hawkes@sgi.com>
Cc; Ingo Molnar <mingo@elte.hu>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 9c1cfda2
...@@ -4970,10 +4970,10 @@ static int cpu_to_phys_group(int cpu) ...@@ -4970,10 +4970,10 @@ static int cpu_to_phys_group(int cpu)
* gets dynamically allocated. * gets dynamically allocated.
*/ */
static DEFINE_PER_CPU(struct sched_domain, node_domains); static DEFINE_PER_CPU(struct sched_domain, node_domains);
static struct sched_group *sched_group_nodes[MAX_NUMNODES]; static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
static struct sched_group sched_group_allnodes[MAX_NUMNODES]; static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
static int cpu_to_allnodes_group(int cpu) static int cpu_to_allnodes_group(int cpu)
{ {
...@@ -4988,6 +4988,21 @@ static int cpu_to_allnodes_group(int cpu) ...@@ -4988,6 +4988,21 @@ static int cpu_to_allnodes_group(int cpu)
void build_sched_domains(const cpumask_t *cpu_map) void build_sched_domains(const cpumask_t *cpu_map)
{ {
int i; int i;
#ifdef CONFIG_NUMA
struct sched_group **sched_group_nodes = NULL;
struct sched_group *sched_group_allnodes = NULL;
/*
* Allocate the per-node list of sched groups
*/
sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
GFP_ATOMIC);
if (!sched_group_nodes) {
printk(KERN_WARNING "Can not alloc sched group node list\n");
return;
}
sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
#endif
/* /*
* Set up domains for cpus specified by the cpu_map. * Set up domains for cpus specified by the cpu_map.
...@@ -5000,8 +5015,21 @@ void build_sched_domains(const cpumask_t *cpu_map) ...@@ -5000,8 +5015,21 @@ void build_sched_domains(const cpumask_t *cpu_map)
cpus_and(nodemask, nodemask, *cpu_map); cpus_and(nodemask, nodemask, *cpu_map);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
if (num_online_cpus() if (cpus_weight(*cpu_map)
> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
if (!sched_group_allnodes) {
sched_group_allnodes
= kmalloc(sizeof(struct sched_group)
* MAX_NUMNODES,
GFP_KERNEL);
if (!sched_group_allnodes) {
printk(KERN_WARNING
"Can not alloc allnodes sched group\n");
break;
}
sched_group_allnodes_bycpu[i]
= sched_group_allnodes;
}
sd = &per_cpu(allnodes_domains, i); sd = &per_cpu(allnodes_domains, i);
*sd = SD_ALLNODES_INIT; *sd = SD_ALLNODES_INIT;
sd->span = *cpu_map; sd->span = *cpu_map;
...@@ -5065,6 +5093,7 @@ void build_sched_domains(const cpumask_t *cpu_map) ...@@ -5065,6 +5093,7 @@ void build_sched_domains(const cpumask_t *cpu_map)
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* Set up node groups */ /* Set up node groups */
if (sched_group_allnodes)
init_sched_build_groups(sched_group_allnodes, *cpu_map, init_sched_build_groups(sched_group_allnodes, *cpu_map,
&cpu_to_allnodes_group); &cpu_to_allnodes_group);
...@@ -5077,8 +5106,10 @@ void build_sched_domains(const cpumask_t *cpu_map) ...@@ -5077,8 +5106,10 @@ void build_sched_domains(const cpumask_t *cpu_map)
int j; int j;
cpus_and(nodemask, nodemask, *cpu_map); cpus_and(nodemask, nodemask, *cpu_map);
if (cpus_empty(nodemask)) if (cpus_empty(nodemask)) {
sched_group_nodes[i] = NULL;
continue; continue;
}
domainspan = sched_domain_node_span(i); domainspan = sched_domain_node_span(i);
cpus_and(domainspan, domainspan, *cpu_map); cpus_and(domainspan, domainspan, *cpu_map);
...@@ -5223,6 +5254,22 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map) ...@@ -5223,6 +5254,22 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
{ {
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
int i; int i;
int cpu;
for_each_cpu_mask(cpu, *cpu_map) {
struct sched_group *sched_group_allnodes
= sched_group_allnodes_bycpu[cpu];
struct sched_group **sched_group_nodes
= sched_group_nodes_bycpu[cpu];
if (sched_group_allnodes) {
kfree(sched_group_allnodes);
sched_group_allnodes_bycpu[cpu] = NULL;
}
if (!sched_group_nodes)
continue;
for (i = 0; i < MAX_NUMNODES; i++) { for (i = 0; i < MAX_NUMNODES; i++) {
cpumask_t nodemask = node_to_cpumask(i); cpumask_t nodemask = node_to_cpumask(i);
struct sched_group *oldsg, *sg = sched_group_nodes[i]; struct sched_group *oldsg, *sg = sched_group_nodes[i];
...@@ -5240,7 +5287,9 @@ next_sg: ...@@ -5240,7 +5287,9 @@ next_sg:
kfree(oldsg); kfree(oldsg);
if (oldsg != sched_group_nodes[i]) if (oldsg != sched_group_nodes[i])
goto next_sg; goto next_sg;
sched_group_nodes[i] = NULL; }
kfree(sched_group_nodes);
sched_group_nodes_bycpu[cpu] = NULL;
} }
#endif #endif
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment