Commit cca448fe authored by Yasunori Goto's avatar Yasunori Goto Committed by Linus Torvalds

[PATCH] wait_table and zonelist initializing for memory hotadd: wait_table initialization

Wait_table is initialized according to zone size at boot time.  But, we cannot
know the maixmum zone size when memory hotplug is enabled.  It can be
changed....  And resizing of wait_table is hard.

So kernel allocate and initialzie wait_table as its maximum size.
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarYasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 718127cc
...@@ -1727,6 +1727,7 @@ void __init build_all_zonelists(void) ...@@ -1727,6 +1727,7 @@ void __init build_all_zonelists(void)
*/ */
#define PAGES_PER_WAITQUEUE 256 #define PAGES_PER_WAITQUEUE 256
#ifndef CONFIG_MEMORY_HOTPLUG
static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
{ {
unsigned long size = 1; unsigned long size = 1;
...@@ -1745,6 +1746,29 @@ static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) ...@@ -1745,6 +1746,29 @@ static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
return max(size, 4UL); return max(size, 4UL);
} }
#else
/*
* A zone's size might be changed by hot-add, so it is not possible to determine
* a suitable size for its wait_table. So we use the maximum size now.
*
* The max wait table size = 4096 x sizeof(wait_queue_head_t). ie:
*
* i386 (preemption config) : 4096 x 16 = 64Kbyte.
* ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte.
* ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte.
*
* The maximum entries are prepared when a zone's memory is (512K + 256) pages
* or more by the traditional way. (See above). It equals:
*
* i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte.
* ia64(16K page size) : = ( 8G + 4M)byte.
* powerpc (64K page size) : = (32G +16M)byte.
*/
static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
{
return 4096UL;
}
#endif
/* /*
* This is an integer logarithm so that shifts can be used later * This is an integer logarithm so that shifts can be used later
...@@ -2010,10 +2034,11 @@ void __init setup_per_cpu_pageset(void) ...@@ -2010,10 +2034,11 @@ void __init setup_per_cpu_pageset(void)
#endif #endif
static __meminit static __meminit
void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
{ {
int i; int i;
struct pglist_data *pgdat = zone->zone_pgdat; struct pglist_data *pgdat = zone->zone_pgdat;
size_t alloc_size;
/* /*
* The per-page waitqueue mechanism uses hashed waitqueues * The per-page waitqueue mechanism uses hashed waitqueues
...@@ -2023,12 +2048,32 @@ void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) ...@@ -2023,12 +2048,32 @@ void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
wait_table_hash_nr_entries(zone_size_pages); wait_table_hash_nr_entries(zone_size_pages);
zone->wait_table_bits = zone->wait_table_bits =
wait_table_bits(zone->wait_table_hash_nr_entries); wait_table_bits(zone->wait_table_hash_nr_entries);
zone->wait_table = (wait_queue_head_t *) alloc_size = zone->wait_table_hash_nr_entries
alloc_bootmem_node(pgdat, zone->wait_table_hash_nr_entries * sizeof(wait_queue_head_t);
* sizeof(wait_queue_head_t));
if (system_state == SYSTEM_BOOTING) {
zone->wait_table = (wait_queue_head_t *)
alloc_bootmem_node(pgdat, alloc_size);
} else {
/*
* This case means that a zone whose size was 0 gets new memory
* via memory hot-add.
* But it may be the case that a new node was hot-added. In
* this case vmalloc() will not be able to use this new node's
* memory - this wait_table must be initialized to use this new
* node itself as well.
* To use this new node's memory, further consideration will be
* necessary.
*/
zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size);
}
if (!zone->wait_table)
return -ENOMEM;
for(i = 0; i < zone->wait_table_hash_nr_entries; ++i) for(i = 0; i < zone->wait_table_hash_nr_entries; ++i)
init_waitqueue_head(zone->wait_table + i); init_waitqueue_head(zone->wait_table + i);
return 0;
} }
static __meminit void zone_pcp_init(struct zone *zone) static __meminit void zone_pcp_init(struct zone *zone)
...@@ -2055,8 +2100,10 @@ __meminit int init_currently_empty_zone(struct zone *zone, ...@@ -2055,8 +2100,10 @@ __meminit int init_currently_empty_zone(struct zone *zone,
unsigned long size) unsigned long size)
{ {
struct pglist_data *pgdat = zone->zone_pgdat; struct pglist_data *pgdat = zone->zone_pgdat;
int ret;
zone_wait_table_init(zone, size); ret = zone_wait_table_init(zone, size);
if (ret)
return ret;
pgdat->nr_zones = zone_idx(zone) + 1; pgdat->nr_zones = zone_idx(zone) + 1;
zone->zone_start_pfn = zone_start_pfn; zone->zone_start_pfn = zone_start_pfn;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment