Commit a3f5c338 authored by Zou Nan hai's avatar Zou Nan hai Committed by Tony Luck

[IA64] min_low_pfn and max_low_pfn calculation fix

We have seen bad_pte_print when testing crashdump on an SN machine in
recent 2.6.20 kernel.  There are tons of bad pte print (pfn < max_low_pfn)
reports when the crash kernel boots up, all those reported bad pages
are inside initmem range; That is because if the crash kernel code and
data happens to be at the beginning of the 1st node. build_node_maps in
discontig.c will bypass reserved regions with filter_rsvd_memory. Since
min_low_pfn is calculated in build_node_map, so in this case, min_low_pfn
will be greater than kernel code and data.

Because pages inside initmem are freed and reused later, we saw
pfn_valid check fail on those pages.

I think this theoretically happen on a normal kernel. When I check
min_low_pfn and max_low_pfn calculation in contig.c and discontig.c.
I found more issues than this.

1. min_low_pfn and max_low_pfn calculation is inconsistent between
contig.c and discontig.c,
min_low_pfn is calculated as the first page number of boot memmap in
contig.c (Why? Though this may work at the most of the time, I don't
think it is the right logic). It is calculated as the lowest physical
memory page number bypass reserved regions in discontig.c.
max_low_pfn is calculated include reserved regions in contig.c. It is
calculated exclude reserved regions in discontig.c.

2. If kernel code and data region is happen to be at the begin or the
end of physical memory, when min_low_pfn and max_low_pfn calculation is
bypassed kernel code and data, pages in initmem will report bad.

3. initrd is also in reserved regions, if it is at the begin or at the
end of physical memory, kernel will refuse to reuse the memory. Because
the virt_addr_valid check in free_initrd_mem.

So it is better to fix and clean up those issues.
Calculate min_low_pfn and max_low_pfn in a consistent way.
Signed-off-by: default avatarZou Nan hai <nanhai.zou@intel.com>
Acked-by: default avatarJay Lan <jlan@sgi.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent be521466
...@@ -96,26 +96,6 @@ void show_mem(void) ...@@ -96,26 +96,6 @@ void show_mem(void)
/* physical address where the bootmem map is located */ /* physical address where the bootmem map is located */
unsigned long bootmap_start; unsigned long bootmap_start;
/**
* find_max_pfn - adjust the maximum page number callback
* @start: start of range
* @end: end of range
* @arg: address of pointer to global max_pfn variable
*
* Passed as a callback function to efi_memmap_walk() to determine the highest
* available page frame number in the system.
*/
int
find_max_pfn (unsigned long start, unsigned long end, void *arg)
{
unsigned long *max_pfnp = arg, pfn;
pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
if (pfn > *max_pfnp)
*max_pfnp = pfn;
return 0;
}
/** /**
* find_bootmap_location - callback to find a memory area for the bootmap * find_bootmap_location - callback to find a memory area for the bootmap
* @start: start of region * @start: start of region
...@@ -177,9 +157,10 @@ find_memory (void) ...@@ -177,9 +157,10 @@ find_memory (void)
reserve_memory(); reserve_memory();
/* first find highest page frame number */ /* first find highest page frame number */
max_pfn = 0; min_low_pfn = ~0UL;
efi_memmap_walk(find_max_pfn, &max_pfn); max_low_pfn = 0;
efi_memmap_walk(find_max_min_low_pfn, NULL);
max_pfn = max_low_pfn;
/* how many bytes to cover all the pages */ /* how many bytes to cover all the pages */
bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT; bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
...@@ -189,7 +170,8 @@ find_memory (void) ...@@ -189,7 +170,8 @@ find_memory (void)
if (bootmap_start == ~0UL) if (bootmap_start == ~0UL)
panic("Cannot find %ld bytes for bootmap\n", bootmap_size); panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn); bootmap_size = init_bootmem_node(NODE_DATA(0),
(bootmap_start >> PAGE_SHIFT), 0, max_pfn);
/* Free all available memory, then mark bootmem-map as being in use. */ /* Free all available memory, then mark bootmem-map as being in use. */
efi_memmap_walk(filter_rsvd_memory, free_bootmem); efi_memmap_walk(filter_rsvd_memory, free_bootmem);
......
...@@ -88,9 +88,6 @@ static int __init build_node_maps(unsigned long start, unsigned long len, ...@@ -88,9 +88,6 @@ static int __init build_node_maps(unsigned long start, unsigned long len,
bdp->node_low_pfn = max(epfn, bdp->node_low_pfn); bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
} }
min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
return 0; return 0;
} }
...@@ -438,6 +435,7 @@ void __init find_memory(void) ...@@ -438,6 +435,7 @@ void __init find_memory(void)
/* These actually end up getting called by call_pernode_memory() */ /* These actually end up getting called by call_pernode_memory() */
efi_memmap_walk(filter_rsvd_memory, build_node_maps); efi_memmap_walk(filter_rsvd_memory, build_node_maps);
efi_memmap_walk(filter_rsvd_memory, find_pernode_space); efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
efi_memmap_walk(find_max_min_low_pfn, NULL);
for_each_online_node(node) for_each_online_node(node)
if (mem_data[node].bootmem_data.node_low_pfn) { if (mem_data[node].bootmem_data.node_low_pfn) {
......
...@@ -648,6 +648,22 @@ count_reserved_pages (u64 start, u64 end, void *arg) ...@@ -648,6 +648,22 @@ count_reserved_pages (u64 start, u64 end, void *arg)
return 0; return 0;
} }
int
find_max_min_low_pfn (unsigned long start, unsigned long end, void *arg)
{
unsigned long pfn_start, pfn_end;
#ifdef CONFIG_FLATMEM
pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT;
pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT;
#else
pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT;
pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT;
#endif
min_low_pfn = min(min_low_pfn, pfn_start);
max_low_pfn = max(max_low_pfn, pfn_end);
return 0;
}
/* /*
* Boot command-line option "nolwsys" can be used to disable the use of any light-weight * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
* system call handler. When this option is in effect, all fsyscalls will end up bubbling * system call handler. When this option is in effect, all fsyscalls will end up bubbling
......
...@@ -36,6 +36,7 @@ extern void reserve_memory (void); ...@@ -36,6 +36,7 @@ extern void reserve_memory (void);
extern void find_initrd (void); extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg); extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
extern void efi_memmap_init(unsigned long *, unsigned long *); extern void efi_memmap_init(unsigned long *, unsigned long *);
extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
extern unsigned long vmcore_find_descriptor_size(unsigned long address); extern unsigned long vmcore_find_descriptor_size(unsigned long address);
extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end); extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment