Commit 538f8ea6 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

mm: xip fix fault vs sparse page invalidate race

XIP has a race between sparse pages being inserted into page tables, and
sparse pages being zapped when its time to put a non-sparse page in.

What can happen is that a process can be left with a dangling sparse page
in a MAP_SHARED mapping, while the rest of the world sees the non-sparse
version.  Ie.  data corruption.

Guard these operations with a seqlock, making fault-in-sparse-pages the
slowpath, and try-to-unmap-sparse-pages the fastpath.
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Cc: Jared Hulbert <jaredeh@gmail.com>
Acked-by: default avatarCarsten Otte <cotte@freenet.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 479db0bf
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include <linux/rmap.h> #include <linux/rmap.h>
#include <linux/mmu_notifier.h> #include <linux/mmu_notifier.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/seqlock.h>
#include <linux/mutex.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -22,22 +24,18 @@ ...@@ -22,22 +24,18 @@
* We do use our own empty page to avoid interference with other users * We do use our own empty page to avoid interference with other users
* of ZERO_PAGE(), such as /dev/zero * of ZERO_PAGE(), such as /dev/zero
*/ */
static DEFINE_MUTEX(xip_sparse_mutex);
static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
static struct page *__xip_sparse_page; static struct page *__xip_sparse_page;
/* called under xip_sparse_mutex */
static struct page *xip_sparse_page(void) static struct page *xip_sparse_page(void)
{ {
if (!__xip_sparse_page) { if (!__xip_sparse_page) {
struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
if (page) { if (page)
static DEFINE_SPINLOCK(xip_alloc_lock);
spin_lock(&xip_alloc_lock);
if (!__xip_sparse_page)
__xip_sparse_page = page; __xip_sparse_page = page;
else
__free_page(page);
spin_unlock(&xip_alloc_lock);
}
} }
return __xip_sparse_page; return __xip_sparse_page;
} }
...@@ -174,11 +172,16 @@ __xip_unmap (struct address_space * mapping, ...@@ -174,11 +172,16 @@ __xip_unmap (struct address_space * mapping,
pte_t pteval; pte_t pteval;
spinlock_t *ptl; spinlock_t *ptl;
struct page *page; struct page *page;
unsigned count;
int locked = 0;
count = read_seqcount_begin(&xip_sparse_seq);
page = __xip_sparse_page; page = __xip_sparse_page;
if (!page) if (!page)
return; return;
retry:
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
mm = vma->vm_mm; mm = vma->vm_mm;
...@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping, ...@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping,
} }
} }
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
if (locked) {
mutex_unlock(&xip_sparse_mutex);
} else if (read_seqcount_retry(&xip_sparse_seq, count)) {
mutex_lock(&xip_sparse_mutex);
locked = 1;
goto retry;
}
} }
/* /*
...@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
int error; int error;
/* XXX: are VM_FAULT_ codes OK? */ /* XXX: are VM_FAULT_ codes OK? */
again:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (vmf->pgoff >= size) if (vmf->pgoff >= size)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
...@@ -245,6 +256,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -245,6 +256,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
__xip_unmap(mapping, vmf->pgoff); __xip_unmap(mapping, vmf->pgoff);
found: found:
printk("%s insert %lx@%lx\n", current->comm, (unsigned long)vmf->virtual_address, xip_pfn);
err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
xip_pfn); xip_pfn);
if (err == -ENOMEM) if (err == -ENOMEM)
...@@ -252,14 +264,34 @@ found: ...@@ -252,14 +264,34 @@ found:
BUG_ON(err); BUG_ON(err);
return VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
} else { } else {
int err, ret = VM_FAULT_OOM;
mutex_lock(&xip_sparse_mutex);
write_seqcount_begin(&xip_sparse_seq);
error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
&xip_mem, &xip_pfn);
if (unlikely(!error)) {
write_seqcount_end(&xip_sparse_seq);
mutex_unlock(&xip_sparse_mutex);
goto again;
}
if (error != -ENODATA)
goto out;
/* not shared and writable, use xip_sparse_page() */ /* not shared and writable, use xip_sparse_page() */
page = xip_sparse_page(); page = xip_sparse_page();
if (!page) if (!page)
return VM_FAULT_OOM; goto out;
err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
page);
if (err == -ENOMEM)
goto out;
page_cache_get(page); ret = VM_FAULT_NOPAGE;
vmf->page = page; out:
return 0; write_seqcount_end(&xip_sparse_seq);
mutex_unlock(&xip_sparse_mutex);
return ret;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment