Commit 9dfc6e68 authored by Christoph Lameter's avatar Christoph Lameter Committed by Pekka Enberg

SLUB: Use this_cpu operations in slub

Using per cpu allocations removes the needs for the per cpu arrays in the
kmem_cache struct. These could get quite big if we have to support systems
with thousands of cpus. The use of this_cpu_xx operations results in:

1. The size of kmem_cache for SMP configuration shrinks since we will only
   need 1 pointer instead of NR_CPUS. The same pointer can be used by all
   processors. Reduces cache footprint of the allocator.

2. We can dynamically size kmem_cache according to the actual nodes in the
   system meaning less memory overhead for configurations that may potentially
   support up to 1k NUMA nodes / 4k cpus.

3. We can remove the diddle widdle with allocating and releasing of
   kmem_cache_cpu structures when bringing up and shutting down cpus. The cpu
   alloc logic will do it all for us. Removes some portions of the cpu hotplug
   functionality.

4. Fastpath performance increases since per cpu pointer lookups and
   address calculations are avoided.

V7-V8
- Convert missed get_cpu_slab() under CONFIG_SLUB_STATS
Signed-off-by: default avatarChristoph Lameter <cl@linux-foundation.org>
Signed-off-by: default avatarPekka Enberg <penberg@cs.helsinki.fi>
parent 55639353
...@@ -69,6 +69,7 @@ struct kmem_cache_order_objects { ...@@ -69,6 +69,7 @@ struct kmem_cache_order_objects {
* Slab cache management. * Slab cache management.
*/ */
struct kmem_cache { struct kmem_cache {
struct kmem_cache_cpu *cpu_slab;
/* Used for retriving partial slabs etc */ /* Used for retriving partial slabs etc */
unsigned long flags; unsigned long flags;
int size; /* The size of an object including meta data */ int size; /* The size of an object including meta data */
...@@ -104,11 +105,6 @@ struct kmem_cache { ...@@ -104,11 +105,6 @@ struct kmem_cache {
int remote_node_defrag_ratio; int remote_node_defrag_ratio;
struct kmem_cache_node *node[MAX_NUMNODES]; struct kmem_cache_node *node[MAX_NUMNODES];
#endif #endif
#ifdef CONFIG_SMP
struct kmem_cache_cpu *cpu_slab[NR_CPUS];
#else
struct kmem_cache_cpu cpu_slab;
#endif
}; };
/* /*
......
...@@ -242,15 +242,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) ...@@ -242,15 +242,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
#endif #endif
} }
static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
{
#ifdef CONFIG_SMP
return s->cpu_slab[cpu];
#else
return &s->cpu_slab;
#endif
}
/* Verify that a pointer has an address that is valid within a slab page */ /* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s, static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object) struct page *page, const void *object)
...@@ -1124,7 +1115,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) ...@@ -1124,7 +1115,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
if (!page) if (!page)
return NULL; return NULL;
stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); stat(this_cpu_ptr(s->cpu_slab), ORDER_FALLBACK);
} }
if (kmemcheck_enabled if (kmemcheck_enabled
...@@ -1422,7 +1413,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) ...@@ -1422,7 +1413,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
{ {
struct kmem_cache_node *n = get_node(s, page_to_nid(page)); struct kmem_cache_node *n = get_node(s, page_to_nid(page));
struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
__ClearPageSlubFrozen(page); __ClearPageSlubFrozen(page);
if (page->inuse) { if (page->inuse) {
...@@ -1454,7 +1445,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) ...@@ -1454,7 +1445,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
slab_unlock(page); slab_unlock(page);
} else { } else {
slab_unlock(page); slab_unlock(page);
stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); stat(__this_cpu_ptr(s->cpu_slab), FREE_SLAB);
discard_slab(s, page); discard_slab(s, page);
} }
} }
...@@ -1507,7 +1498,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) ...@@ -1507,7 +1498,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
*/ */
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{ {
struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
if (likely(c && c->page)) if (likely(c && c->page))
flush_slab(s, c); flush_slab(s, c);
...@@ -1673,7 +1664,7 @@ new_slab: ...@@ -1673,7 +1664,7 @@ new_slab:
local_irq_disable(); local_irq_disable();
if (new) { if (new) {
c = get_cpu_slab(s, smp_processor_id()); c = __this_cpu_ptr(s->cpu_slab);
stat(c, ALLOC_SLAB); stat(c, ALLOC_SLAB);
if (c->page) if (c->page)
flush_slab(s, c); flush_slab(s, c);
...@@ -1711,7 +1702,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, ...@@ -1711,7 +1702,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
void **object; void **object;
struct kmem_cache_cpu *c; struct kmem_cache_cpu *c;
unsigned long flags; unsigned long flags;
unsigned int objsize; unsigned long objsize;
gfpflags &= gfp_allowed_mask; gfpflags &= gfp_allowed_mask;
...@@ -1722,14 +1713,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, ...@@ -1722,14 +1713,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
return NULL; return NULL;
local_irq_save(flags); local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id()); c = __this_cpu_ptr(s->cpu_slab);
object = c->freelist;
objsize = c->objsize; objsize = c->objsize;
if (unlikely(!c->freelist || !node_match(c, node))) if (unlikely(!object || !node_match(c, node)))
object = __slab_alloc(s, gfpflags, node, addr, c); object = __slab_alloc(s, gfpflags, node, addr, c);
else { else {
object = c->freelist;
c->freelist = object[c->offset]; c->freelist = object[c->offset];
stat(c, ALLOC_FASTPATH); stat(c, ALLOC_FASTPATH);
} }
...@@ -1800,7 +1791,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, ...@@ -1800,7 +1791,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
void **object = (void *)x; void **object = (void *)x;
struct kmem_cache_cpu *c; struct kmem_cache_cpu *c;
c = get_cpu_slab(s, raw_smp_processor_id()); c = __this_cpu_ptr(s->cpu_slab);
stat(c, FREE_SLOWPATH); stat(c, FREE_SLOWPATH);
slab_lock(page); slab_lock(page);
...@@ -1872,7 +1863,7 @@ static __always_inline void slab_free(struct kmem_cache *s, ...@@ -1872,7 +1863,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
kmemleak_free_recursive(x, s->flags); kmemleak_free_recursive(x, s->flags);
local_irq_save(flags); local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id()); c = __this_cpu_ptr(s->cpu_slab);
kmemcheck_slab_free(s, object, c->objsize); kmemcheck_slab_free(s, object, c->objsize);
debug_check_no_locks_freed(object, c->objsize); debug_check_no_locks_freed(object, c->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS)) if (!(s->flags & SLAB_DEBUG_OBJECTS))
...@@ -2095,130 +2086,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) ...@@ -2095,130 +2086,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
#endif #endif
} }
#ifdef CONFIG_SMP static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
/*
* Per cpu array for per cpu structures.
*
* The per cpu array places all kmem_cache_cpu structures from one processor
* close together meaning that it becomes possible that multiple per cpu
* structures are contained in one cacheline. This may be particularly
* beneficial for the kmalloc caches.
*
* A desktop system typically has around 60-80 slabs. With 100 here we are
* likely able to get per cpu structures for all caches from the array defined
* here. We must be able to cover all kmalloc caches during bootstrap.
*
* If the per cpu array is exhausted then fall back to kmalloc
* of individual cachelines. No sharing is possible then.
*/
#define NR_KMEM_CACHE_CPU 100
static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
kmem_cache_cpu);
static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
int cpu, gfp_t flags)
{
struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
if (c)
per_cpu(kmem_cache_cpu_free, cpu) =
(void *)c->freelist;
else {
/* Table overflow: So allocate ourselves */
c = kmalloc_node(
ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
flags, cpu_to_node(cpu));
if (!c)
return NULL;
}
init_kmem_cache_cpu(s, c);
return c;
}
static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
{
if (c < per_cpu(kmem_cache_cpu, cpu) ||
c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
kfree(c);
return;
}
c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
per_cpu(kmem_cache_cpu_free, cpu) = c;
}
static void free_kmem_cache_cpus(struct kmem_cache *s)
{
int cpu;
for_each_online_cpu(cpu) {
struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
if (c) {
s->cpu_slab[cpu] = NULL;
free_kmem_cache_cpu(c, cpu);
}
}
}
static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
{
int cpu;
for_each_online_cpu(cpu) {
struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
if (c) static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
continue;
c = alloc_kmem_cache_cpu(s, cpu, flags);
if (!c) {
free_kmem_cache_cpus(s);
return 0;
}
s->cpu_slab[cpu] = c;
}
return 1;
}
/*
* Initialize the per cpu array.
*/
static void init_alloc_cpu_cpu(int cpu)
{
int i;
if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
return;
for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
}
static void __init init_alloc_cpu(void)
{ {
int cpu; int cpu;
for_each_online_cpu(cpu) if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches)
init_alloc_cpu_cpu(cpu); /*
} * Boot time creation of the kmalloc array. Use static per cpu data
* since the per cpu allocator is not available yet.
*/
s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches);
else
s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
#else if (!s->cpu_slab)
static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} return 0;
static inline void init_alloc_cpu(void) {}
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) for_each_possible_cpu(cpu)
{ init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
init_kmem_cache_cpu(s, &s->cpu_slab);
return 1; return 1;
} }
#endif
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* /*
...@@ -2609,9 +2498,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) ...@@ -2609,9 +2498,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
int node; int node;
flush_all(s); flush_all(s);
free_percpu(s->cpu_slab);
/* Attempt to free all objects */ /* Attempt to free all objects */
free_kmem_cache_cpus(s);
for_each_node_state(node, N_NORMAL_MEMORY) { for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n = get_node(s, node); struct kmem_cache_node *n = get_node(s, node);
...@@ -2760,7 +2648,19 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) ...@@ -2760,7 +2648,19 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
realsize = kmalloc_caches[index].objsize; realsize = kmalloc_caches[index].objsize;
text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
(unsigned int)realsize); (unsigned int)realsize);
s = kmalloc(kmem_size, flags & ~SLUB_DMA);
if (flags & __GFP_WAIT)
s = kmalloc(kmem_size, flags & ~SLUB_DMA);
else {
int i;
s = NULL;
for (i = 0; i < SLUB_PAGE_SHIFT; i++)
if (kmalloc_caches[i].size) {
s = kmalloc_caches + i;
break;
}
}
/* /*
* Must defer sysfs creation to a workqueue because we don't know * Must defer sysfs creation to a workqueue because we don't know
...@@ -3176,8 +3076,6 @@ void __init kmem_cache_init(void) ...@@ -3176,8 +3076,6 @@ void __init kmem_cache_init(void)
int i; int i;
int caches = 0; int caches = 0;
init_alloc_cpu();
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* /*
* Must first have the slab cache available for the allocations of the * Must first have the slab cache available for the allocations of the
...@@ -3261,8 +3159,10 @@ void __init kmem_cache_init(void) ...@@ -3261,8 +3159,10 @@ void __init kmem_cache_init(void)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
register_cpu_notifier(&slab_notifier); register_cpu_notifier(&slab_notifier);
kmem_size = offsetof(struct kmem_cache, cpu_slab) + #endif
nr_cpu_ids * sizeof(struct kmem_cache_cpu *); #ifdef CONFIG_NUMA
kmem_size = offsetof(struct kmem_cache, node) +
nr_node_ids * sizeof(struct kmem_cache_node *);
#else #else
kmem_size = sizeof(struct kmem_cache); kmem_size = sizeof(struct kmem_cache);
#endif #endif
...@@ -3365,7 +3265,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, ...@@ -3365,7 +3265,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
* per cpu structures * per cpu structures
*/ */
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
get_cpu_slab(s, cpu)->objsize = s->objsize; per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
up_write(&slub_lock); up_write(&slub_lock);
...@@ -3422,11 +3322,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, ...@@ -3422,11 +3322,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
switch (action) { switch (action) {
case CPU_UP_PREPARE: case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN: case CPU_UP_PREPARE_FROZEN:
init_alloc_cpu_cpu(cpu);
down_read(&slub_lock); down_read(&slub_lock);
list_for_each_entry(s, &slab_caches, list) list_for_each_entry(s, &slab_caches, list)
s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
GFP_KERNEL);
up_read(&slub_lock); up_read(&slub_lock);
break; break;
...@@ -3436,13 +3334,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, ...@@ -3436,13 +3334,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
case CPU_DEAD_FROZEN: case CPU_DEAD_FROZEN:
down_read(&slub_lock); down_read(&slub_lock);
list_for_each_entry(s, &slab_caches, list) { list_for_each_entry(s, &slab_caches, list) {
struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
local_irq_save(flags); local_irq_save(flags);
__flush_cpu_slab(s, cpu); __flush_cpu_slab(s, cpu);
local_irq_restore(flags); local_irq_restore(flags);
free_kmem_cache_cpu(c, cpu);
s->cpu_slab[cpu] = NULL;
} }
up_read(&slub_lock); up_read(&slub_lock);
break; break;
...@@ -3928,7 +3822,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, ...@@ -3928,7 +3822,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
int cpu; int cpu;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
if (!c || c->node < 0) if (!c || c->node < 0)
continue; continue;
...@@ -4353,7 +4247,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) ...@@ -4353,7 +4247,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
return -ENOMEM; return -ENOMEM;
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
unsigned x = get_cpu_slab(s, cpu)->stat[si]; unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
data[cpu] = x; data[cpu] = x;
sum += x; sum += x;
...@@ -4376,7 +4270,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si) ...@@ -4376,7 +4270,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si)
int cpu; int cpu;
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
get_cpu_slab(s, cpu)->stat[si] = 0; per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
} }
#define STAT_ATTR(si, text) \ #define STAT_ATTR(si, text) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment