Commit 50ed9354 authored by Nick Piggin's avatar Nick Piggin Committed by Pekka Enberg

slqb: dynamic array allocations

Implement dynamic allocation for SLQB per-cpu and per-node arrays. This
should hopefully have minimal runtime performance impact, because although
there is an extra level of indirection to do allocations, the pointer should
be in the cache hot area of the struct kmem_cache.

It's not quite possible to use dynamic percpu allocator for this: firstly,
that subsystem uses the slab allocator. Secondly, it doesn't have good
support for per-node data. If those problems were improved, we could use it.
For now, just implement a very very simple allocator until the kmalloc
caches are up.

On x86-64 with a NUMA MAXCPUS config, sizes look like this:
   text    data     bss     dec     hex filename
  29960  259565     100  289625   46b59 mm/slab.o
  34130  497130     696  531956   81df4 mm/slub.o
  24575 1634267  111136 1769978  1b01fa mm/slqb.o
  24845   13959     712   39516    9a5c mm/slqb.o + this patch

SLQB is now 2 orders of magnitude smaller than it was, and an order of
magnitude smaller than SLAB or SLUB (in total size -- text size has
always been smaller). So it should now be very suitable for distro-type
configs in this respect.

As a side-effect the UP version of cpu_slab (which is embedded directly
in the kmem_cache struct) moves up to the hot cachelines, so it need no
longer be cacheline aligned on UP. The overall result should be a
reduction in cacheline footprint on UP kernels.
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarPekka Enberg <penberg@cs.helsinki.fi>
parent d895335b
...@@ -111,7 +111,7 @@ struct kmem_cache_cpu { ...@@ -111,7 +111,7 @@ struct kmem_cache_cpu {
struct kmlist rlist; struct kmlist rlist;
struct kmem_cache_list *remote_cache_list; struct kmem_cache_list *remote_cache_list;
#endif #endif
} ____cacheline_aligned; } ____cacheline_aligned_in_smp;
/* /*
* Per-node, per-kmem_cache structure. Used for node-specific allocations. * Per-node, per-kmem_cache structure. Used for node-specific allocations.
...@@ -128,10 +128,19 @@ struct kmem_cache { ...@@ -128,10 +128,19 @@ struct kmem_cache {
unsigned long flags; unsigned long flags;
int hiwater; /* LIFO list high watermark */ int hiwater; /* LIFO list high watermark */
int freebatch; /* LIFO freelist batch flush size */ int freebatch; /* LIFO freelist batch flush size */
#ifdef CONFIG_SMP
struct kmem_cache_cpu **cpu_slab; /* dynamic per-cpu structures */
#else
struct kmem_cache_cpu cpu_slab;
#endif
int objsize; /* Size of object without meta data */ int objsize; /* Size of object without meta data */
int offset; /* Free pointer offset. */ int offset; /* Free pointer offset. */
int objects; /* Number of objects in slab */ int objects; /* Number of objects in slab */
#ifdef CONFIG_NUMA
struct kmem_cache_node **node_slab; /* dynamic per-node structures */
#endif
int size; /* Size of object including meta data */ int size; /* Size of object including meta data */
int order; /* Allocation order */ int order; /* Allocation order */
gfp_t allocflags; /* gfp flags to use on allocation */ gfp_t allocflags; /* gfp flags to use on allocation */
...@@ -148,15 +157,7 @@ struct kmem_cache { ...@@ -148,15 +157,7 @@ struct kmem_cache {
#ifdef CONFIG_SLQB_SYSFS #ifdef CONFIG_SLQB_SYSFS
struct kobject kobj; /* For sysfs */ struct kobject kobj; /* For sysfs */
#endif #endif
#ifdef CONFIG_NUMA } ____cacheline_aligned;
struct kmem_cache_node *node[MAX_NUMNODES];
#endif
#ifdef CONFIG_SMP
struct kmem_cache_cpu *cpu_slab[NR_CPUS];
#else
struct kmem_cache_cpu cpu_slab;
#endif
};
/* /*
* Kmalloc subsystem. * Kmalloc subsystem.
......
...@@ -56,7 +56,6 @@ static inline void struct_slqb_page_wrong_size(void) ...@@ -56,7 +56,6 @@ static inline void struct_slqb_page_wrong_size(void)
#define PG_SLQB_BIT (1 << PG_slab) #define PG_SLQB_BIT (1 << PG_slab)
static int kmem_size __read_mostly;
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
static inline int slab_numa(struct kmem_cache *s) static inline int slab_numa(struct kmem_cache *s)
{ {
...@@ -1329,7 +1328,7 @@ static noinline void *__slab_alloc_page(struct kmem_cache *s, ...@@ -1329,7 +1328,7 @@ static noinline void *__slab_alloc_page(struct kmem_cache *s,
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
struct kmem_cache_node *n; struct kmem_cache_node *n;
n = s->node[slqb_page_to_nid(page)]; n = s->node_slab[slqb_page_to_nid(page)];
l = &n->list; l = &n->list;
page->list = l; page->list = l;
...@@ -1373,7 +1372,7 @@ static void *__remote_slab_alloc_node(struct kmem_cache *s, ...@@ -1373,7 +1372,7 @@ static void *__remote_slab_alloc_node(struct kmem_cache *s,
struct kmem_cache_list *l; struct kmem_cache_list *l;
void *object; void *object;
n = s->node[node]; n = s->node_slab[node];
if (unlikely(!n)) /* node has no memory */ if (unlikely(!n)) /* node has no memory */
return NULL; return NULL;
l = &n->list; l = &n->list;
...@@ -1818,7 +1817,7 @@ static void init_kmem_cache_node(struct kmem_cache *s, ...@@ -1818,7 +1817,7 @@ static void init_kmem_cache_node(struct kmem_cache *s,
} }
#endif #endif
/* Initial slabs. XXX: allocate dynamically (with bootmem maybe) */ /* Initial slabs. */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct kmem_cache_cpu, kmem_cache_cpus); static DEFINE_PER_CPU(struct kmem_cache_cpu, kmem_cache_cpus);
#endif #endif
...@@ -1912,10 +1911,10 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) ...@@ -1912,10 +1911,10 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
for_each_node_state(node, N_NORMAL_MEMORY) { for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n; struct kmem_cache_node *n;
n = s->node[node]; n = s->node_slab[node];
if (n) { if (n) {
kmem_cache_free(&kmem_node_cache, n); kmem_cache_free(&kmem_node_cache, n);
s->node[node] = NULL; s->node_slab[node] = NULL;
} }
} }
} }
...@@ -1933,7 +1932,7 @@ static int alloc_kmem_cache_nodes(struct kmem_cache *s) ...@@ -1933,7 +1932,7 @@ static int alloc_kmem_cache_nodes(struct kmem_cache *s)
return 0; return 0;
} }
init_kmem_cache_node(s, n); init_kmem_cache_node(s, n);
s->node[node] = n; s->node_slab[node] = n;
} }
return 1; return 1;
} }
...@@ -2069,13 +2068,56 @@ static int calculate_sizes(struct kmem_cache *s) ...@@ -2069,13 +2068,56 @@ static int calculate_sizes(struct kmem_cache *s)
} }
#ifdef CONFIG_SMP
/*
* Per-cpu allocator can't be used because it always uses slab allocator,
* and it can't do per-node allocations.
*/
static void *kmem_cache_dyn_array_alloc(int ids)
{
size_t size = sizeof(void *) * ids;
if (unlikely(!slab_is_available())) {
static void *nextmem;
void *ret;
/*
* Special case for setting up initial caches. These will
* never get freed by definition so we can do it rather
* simply.
*/
if (!nextmem) {
nextmem = alloc_pages_exact(size, GFP_KERNEL);
if (!nextmem)
return NULL;
}
ret = nextmem;
nextmem = (void *)((unsigned long)ret + size);
if ((unsigned long)ret >> PAGE_SHIFT !=
(unsigned long)nextmem >> PAGE_SHIFT)
nextmem = NULL;
memset(ret, 0, size);
return ret;
} else {
return kzalloc(size, GFP_KERNEL);
}
}
static void kmem_cache_dyn_array_free(void *array)
{
if (unlikely(!slab_is_available()))
return; /* error case without crashing here (will panic soon) */
kfree(array);
}
#endif
static int kmem_cache_open(struct kmem_cache *s, static int kmem_cache_open(struct kmem_cache *s,
const char *name, size_t size, size_t align, const char *name, size_t size, size_t align,
unsigned long flags, void (*ctor)(void *), int alloc) unsigned long flags, void (*ctor)(void *), int alloc)
{ {
unsigned int left_over; unsigned int left_over;
memset(s, 0, kmem_size); memset(s, 0, sizeof(struct kmem_cache));
s->name = name; s->name = name;
s->ctor = ctor; s->ctor = ctor;
s->objsize = size; s->objsize = size;
...@@ -2094,10 +2136,26 @@ static int kmem_cache_open(struct kmem_cache *s, ...@@ -2094,10 +2136,26 @@ static int kmem_cache_open(struct kmem_cache *s,
s->colour_range = 0; s->colour_range = 0;
} }
/*
* Protect all alloc_kmem_cache_cpus/nodes allocations with slqb_lock
* to lock out hotplug, just in case (probably not strictly needed
* here).
*/
down_write(&slqb_lock); down_write(&slqb_lock);
#ifdef CONFIG_SMP
s->cpu_slab = kmem_cache_dyn_array_alloc(nr_cpu_ids);
if (!s->cpu_slab)
goto error_lock;
# ifdef CONFIG_NUMA
s->node_slab = kmem_cache_dyn_array_alloc(nr_node_ids);
if (!s->node_slab)
goto error_cpu_array;
# endif
#endif
if (likely(alloc)) { if (likely(alloc)) {
if (!alloc_kmem_cache_nodes(s)) if (!alloc_kmem_cache_nodes(s))
goto error_lock; goto error_node_array;
if (!alloc_kmem_cache_cpus(s)) if (!alloc_kmem_cache_cpus(s))
goto error_nodes; goto error_nodes;
...@@ -2111,6 +2169,14 @@ static int kmem_cache_open(struct kmem_cache *s, ...@@ -2111,6 +2169,14 @@ static int kmem_cache_open(struct kmem_cache *s,
error_nodes: error_nodes:
free_kmem_cache_nodes(s); free_kmem_cache_nodes(s);
error_node_array:
#ifdef CONFIG_NUMA
kmem_cache_dyn_array_free(s->node_slab);
#endif
error_cpu_array:
#ifdef CONFIG_SMP
kmem_cache_dyn_array_free(s->cpu_slab);
#endif
error_lock: error_lock:
up_write(&slqb_lock); up_write(&slqb_lock);
error: error:
...@@ -2152,7 +2218,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *ptr) ...@@ -2152,7 +2218,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *ptr)
page = virt_to_head_slqb_page(ptr); page = virt_to_head_slqb_page(ptr);
if (unlikely(!(page->flags & PG_SLQB_BIT))) if (unlikely(!(page->flags & PG_SLQB_BIT)))
goto out; goto out;
if (unlikely(page->list->cache != s)) if (unlikely(page->list->cache != s)) /* XXX: ouch, racy */
goto out; goto out;
return 1; return 1;
out: out:
...@@ -2220,7 +2286,7 @@ void kmem_cache_destroy(struct kmem_cache *s) ...@@ -2220,7 +2286,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
struct kmem_cache_node *n; struct kmem_cache_node *n;
struct kmem_cache_list *l; struct kmem_cache_list *l;
n = s->node[node]; n = s->node_slab[node];
if (!n) if (!n)
continue; continue;
l = &n->list; l = &n->list;
...@@ -2449,7 +2515,7 @@ int kmem_cache_shrink(struct kmem_cache *s) ...@@ -2449,7 +2515,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
struct kmem_cache_node *n; struct kmem_cache_node *n;
struct kmem_cache_list *l; struct kmem_cache_list *l;
n = s->node[node]; n = s->node_slab[node];
if (!n) if (!n)
continue; continue;
l = &n->list; l = &n->list;
...@@ -2502,7 +2568,7 @@ static void kmem_cache_reap(void) ...@@ -2502,7 +2568,7 @@ static void kmem_cache_reap(void)
struct kmem_cache_node *n; struct kmem_cache_node *n;
struct kmem_cache_list *l; struct kmem_cache_list *l;
n = s->node[node]; n = s->node_slab[node];
if (!n) if (!n)
continue; continue;
l = &n->list; l = &n->list;
...@@ -2529,7 +2595,7 @@ static void cache_trim_worker(struct work_struct *w) ...@@ -2529,7 +2595,7 @@ static void cache_trim_worker(struct work_struct *w)
list_for_each_entry(s, &slab_caches, list) { list_for_each_entry(s, &slab_caches, list) {
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
int node = numa_node_id(); int node = numa_node_id();
struct kmem_cache_node *n = s->node[node]; struct kmem_cache_node *n = s->node_slab[node];
if (n) { if (n) {
struct kmem_cache_list *l = &n->list; struct kmem_cache_list *l = &n->list;
...@@ -2618,7 +2684,7 @@ static int slab_mem_going_online_callback(void *arg) ...@@ -2618,7 +2684,7 @@ static int slab_mem_going_online_callback(void *arg)
* since memory is not yet available from the node that * since memory is not yet available from the node that
* is brought up. * is brought up.
*/ */
if (s->node[nid]) /* could be lefover from last online */ if (s->node_slab[nid]) /* could be lefover from last online */
continue; continue;
n = kmem_cache_alloc(&kmem_node_cache, GFP_KERNEL); n = kmem_cache_alloc(&kmem_node_cache, GFP_KERNEL);
if (!n) { if (!n) {
...@@ -2626,7 +2692,7 @@ static int slab_mem_going_online_callback(void *arg) ...@@ -2626,7 +2692,7 @@ static int slab_mem_going_online_callback(void *arg)
goto out; goto out;
} }
init_kmem_cache_node(s, n); init_kmem_cache_node(s, n);
s->node[nid] = n; s->node_slab[nid] = n;
} }
out: out:
up_write(&slqb_lock); up_write(&slqb_lock);
...@@ -2673,15 +2739,8 @@ void __init kmem_cache_init(void) ...@@ -2673,15 +2739,8 @@ void __init kmem_cache_init(void)
* All the ifdefs are rather ugly here, but it's just the setup code, * All the ifdefs are rather ugly here, but it's just the setup code,
* so it doesn't have to be too readable :) * so it doesn't have to be too readable :)
*/ */
#ifdef CONFIG_SMP
kmem_size = offsetof(struct kmem_cache, cpu_slab) +
nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
#else
kmem_size = sizeof(struct kmem_cache);
#endif
kmem_cache_open(&kmem_cache_cache, "kmem_cache", kmem_cache_open(&kmem_cache_cache, "kmem_cache",
kmem_size, 0, flags, NULL, 0); sizeof(struct kmem_cache), 0, flags, NULL, 0);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
kmem_cache_open(&kmem_cpu_cache, "kmem_cache_cpu", kmem_cache_open(&kmem_cpu_cache, "kmem_cache_cpu",
sizeof(struct kmem_cache_cpu), 0, flags, NULL, 0); sizeof(struct kmem_cache_cpu), 0, flags, NULL, 0);
...@@ -2719,15 +2778,15 @@ void __init kmem_cache_init(void) ...@@ -2719,15 +2778,15 @@ void __init kmem_cache_init(void)
n = &per_cpu(kmem_cache_nodes, i); n = &per_cpu(kmem_cache_nodes, i);
init_kmem_cache_node(&kmem_cache_cache, n); init_kmem_cache_node(&kmem_cache_cache, n);
kmem_cache_cache.node[i] = n; kmem_cache_cache.node_slab[i] = n;
n = &per_cpu(kmem_cpu_nodes, i); n = &per_cpu(kmem_cpu_nodes, i);
init_kmem_cache_node(&kmem_cpu_cache, n); init_kmem_cache_node(&kmem_cpu_cache, n);
kmem_cpu_cache.node[i] = n; kmem_cpu_cache.node_slab[i] = n;
n = &per_cpu(kmem_node_nodes, i); n = &per_cpu(kmem_node_nodes, i);
init_kmem_cache_node(&kmem_node_cache, n); init_kmem_cache_node(&kmem_node_cache, n);
kmem_node_cache.node[i] = n; kmem_node_cache.node_slab[i] = n;
} }
#endif #endif
...@@ -2793,7 +2852,7 @@ void __init kmem_cache_init(void) ...@@ -2793,7 +2852,7 @@ void __init kmem_cache_init(void)
#endif #endif
/* /*
* smp_init() has not yet been called, so no worries about memory * smp_init() has not yet been called, so no worries about memory
* ordering here (eg. slab_is_available vs numa_platform) * ordering with __slab_is_available.
*/ */
__slab_is_available = 1; __slab_is_available = 1;
} }
...@@ -3036,7 +3095,7 @@ static void gather_stats(struct kmem_cache *s, struct stats_gather *stats) ...@@ -3036,7 +3095,7 @@ static void gather_stats(struct kmem_cache *s, struct stats_gather *stats)
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
for_each_online_node(node) { for_each_online_node(node) {
struct kmem_cache_node *n = s->node[node]; struct kmem_cache_node *n = s->node_slab[node];
struct kmem_cache_list *l = &n->list; struct kmem_cache_list *l = &n->list;
struct slqb_page *page; struct slqb_page *page;
unsigned long flags; unsigned long flags;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment