Commit 16abfa08 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

Fix sys_remap_file_pages BUG at highmem.c:15!

Gurudas Pai reports kernel BUG at arch/i386/mm/highmem.c:15! below
sys_remap_file_pages, while running Oracle database test on x86 in 6GB
RAM: kunmap thinks we're in_interrupt because the preempt count has
wrapped.

That's because __do_fault expected to unmap page_table, but one of its
two callers do_nonlinear_fault already unmapped it: let do_linear_fault
unmap it first too, and then there's no need to pass the page_table arg
down.

Why have we been so slow to notice this? Probably through forgetting
that the mapping_cap_account_dirty test means that sys_remap_file_pages
nowadays only goes the full nonlinear vma route on a few memory-backed
filesystems like ramfs, tmpfs and hugetlbfs.

[ It also depends on CONFIG_HIGHPTE, so it becomes even harder to
  trigger in practice. Many who have need of large memory have probably
  migrated to x86-64..

  Problem introduced by commit d0217ac0
  ("mm: fault feedback #1")                -- Linus ]
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Cc: gurudas pai <gurudas.pai@oracle.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 804b3f9a
...@@ -2307,13 +2307,14 @@ oom: ...@@ -2307,13 +2307,14 @@ oom:
* do not need to flush old virtual caches or the TLB. * do not need to flush old virtual caches or the TLB.
* *
* We enter with non-exclusive mmap_sem (to exclude vma changes, * We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked. * but allow concurrent faults), and pte neither mapped nor locked.
* We return with mmap_sem still held, but pte unmapped and unlocked. * We return with mmap_sem still held, but pte unmapped and unlocked.
*/ */
static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned long address, pmd_t *pmd,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte) pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{ {
pte_t *page_table;
spinlock_t *ptl; spinlock_t *ptl;
struct page *page; struct page *page;
pte_t entry; pte_t entry;
...@@ -2327,7 +2328,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2327,7 +2328,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
vmf.flags = flags; vmf.flags = flags;
vmf.page = NULL; vmf.page = NULL;
pte_unmap(page_table);
BUG_ON(vma->vm_flags & VM_PFNMAP); BUG_ON(vma->vm_flags & VM_PFNMAP);
if (likely(vma->vm_ops->fault)) { if (likely(vma->vm_ops->fault)) {
...@@ -2468,8 +2468,8 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2468,8 +2468,8 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
return __do_fault(mm, vma, address, page_table, pmd, pgoff, pte_unmap(page_table);
flags, orig_pte); return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
} }
...@@ -2552,9 +2552,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2552,9 +2552,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} }
pgoff = pte_to_pgoff(orig_pte); pgoff = pte_to_pgoff(orig_pte);
return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
return __do_fault(mm, vma, address, page_table, pmd, pgoff,
flags, orig_pte);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment