Commit bdc8cb98 authored by Dave Hansen's avatar Dave Hansen Committed by Linus Torvalds

[PATCH] memory hotplug locking: zone span seqlock

See the "fixup bad_range()" patch for more information, but this actually
creates a the lock to protect things making assumptions about a zone's size
staying constant at runtime.
Signed-off-by: default avatarDave Hansen <haveblue@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 208d54e5
...@@ -16,13 +16,36 @@ void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) ...@@ -16,13 +16,36 @@ void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
static inline static inline
void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
{ {
spin_lock_irqrestore(&pgdat->node_size_lock, *flags); spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
} }
static inline static inline
void pgdat_resize_init(struct pglist_data *pgdat) void pgdat_resize_init(struct pglist_data *pgdat)
{ {
spin_lock_init(&pgdat->node_size_lock); spin_lock_init(&pgdat->node_size_lock);
} }
/*
* Zone resizing functions
*/
static inline unsigned zone_span_seqbegin(struct zone *zone)
{
return read_seqbegin(&zone->span_seqlock);
}
static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
{
return read_seqretry(&zone->span_seqlock, iv);
}
static inline void zone_span_writelock(struct zone *zone)
{
write_seqlock(&zone->span_seqlock);
}
static inline void zone_span_writeunlock(struct zone *zone)
{
write_sequnlock(&zone->span_seqlock);
}
static inline void zone_seqlock_init(struct zone *zone)
{
seqlock_init(&zone->span_seqlock);
}
#else /* ! CONFIG_MEMORY_HOTPLUG */ #else /* ! CONFIG_MEMORY_HOTPLUG */
/* /*
* Stub functions for when hotplug is off * Stub functions for when hotplug is off
...@@ -30,5 +53,17 @@ void pgdat_resize_init(struct pglist_data *pgdat) ...@@ -30,5 +53,17 @@ void pgdat_resize_init(struct pglist_data *pgdat)
static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
static inline void pgdat_resize_init(struct pglist_data *pgdat) {} static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
#endif
static inline unsigned zone_span_seqbegin(struct zone *zone)
{
return 0;
}
static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
{
return 0;
}
static inline void zone_span_writelock(struct zone *zone) {}
static inline void zone_span_writeunlock(struct zone *zone) {}
static inline void zone_seqlock_init(struct zone *zone) {}
#endif /* ! CONFIG_MEMORY_HOTPLUG */
#endif /* __LINUX_MEMORY_HOTPLUG_H */ #endif /* __LINUX_MEMORY_HOTPLUG_H */
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/numa.h> #include <linux/numa.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/seqlock.h>
#include <asm/atomic.h> #include <asm/atomic.h>
/* Free memory management - zoned buddy allocator. */ /* Free memory management - zoned buddy allocator. */
...@@ -137,6 +138,10 @@ struct zone { ...@@ -137,6 +138,10 @@ struct zone {
* free areas of different sizes * free areas of different sizes
*/ */
spinlock_t lock; spinlock_t lock;
#ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */
seqlock_t span_seqlock;
#endif
struct free_area free_area[MAX_ORDER]; struct free_area free_area[MAX_ORDER];
...@@ -220,6 +225,16 @@ struct zone { ...@@ -220,6 +225,16 @@ struct zone {
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn; unsigned long zone_start_pfn;
/*
* zone_start_pfn, spanned_pages and present_pages are all
* protected by span_seqlock. It is a seqlock because it has
* to be read outside of zone->lock, and it is done in the main
* allocator path. But, it is written quite infrequently.
*
* The lock is declared along with zone->lock because it is
* frequently read in proximity to zone->lock. It's good to
* give them a chance of being in the same cacheline.
*/
unsigned long spanned_pages; /* total size, including holes */ unsigned long spanned_pages; /* total size, including holes */
unsigned long present_pages; /* amount of memory (excluding holes) */ unsigned long present_pages; /* amount of memory (excluding holes) */
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/cpuset.h> #include <linux/cpuset.h>
#include <linux/memory_hotplug.h>
#include <linux/nodemask.h> #include <linux/nodemask.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
...@@ -80,12 +81,19 @@ unsigned long __initdata nr_all_pages; ...@@ -80,12 +81,19 @@ unsigned long __initdata nr_all_pages;
static int page_outside_zone_boundaries(struct zone *zone, struct page *page) static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
{ {
if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages) int ret = 0;
return 1; unsigned seq;
if (page_to_pfn(page) < zone->zone_start_pfn) unsigned long pfn = page_to_pfn(page);
return 1;
return 0; do {
seq = zone_span_seqbegin(zone);
if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
ret = 1;
else if (pfn < zone->zone_start_pfn)
ret = 1;
} while (zone_span_seqretry(zone, seq));
return ret;
} }
static int page_is_consistent(struct zone *zone, struct page *page) static int page_is_consistent(struct zone *zone, struct page *page)
...@@ -1980,6 +1988,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, ...@@ -1980,6 +1988,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
zone->name = zone_names[j]; zone->name = zone_names[j];
spin_lock_init(&zone->lock); spin_lock_init(&zone->lock);
spin_lock_init(&zone->lru_lock); spin_lock_init(&zone->lru_lock);
zone_seqlock_init(zone);
zone->zone_pgdat = pgdat; zone->zone_pgdat = pgdat;
zone->free_pages = 0; zone->free_pages = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment