Commit d0217ac0 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

mm: fault feedback #1

Change ->fault prototype.  We now return an int, which contains
VM_FAULT_xxx code in the low byte, and FAULT_RET_xxx code in the next byte.
 FAULT_RET_ code tells the VM whether a page was found, whether it has been
locked, and potentially other things.  This is not quite the way he wanted
it yet, but that's changed in the next patch (which requires changes to
arch code).

This means we no longer set VM_CAN_INVALIDATE in the vma in order to say
that a page is locked which requires filemap_nopage to go away (because we
can no longer remain backward compatible without that flag), but we were
going to do that anyway.

struct fault_data is renamed to struct vm_fault as Linus asked. address
is now a void __user * that we should firmly encourage drivers not to use
without really good reason.

The page is now returned via a page pointer in the vm_fault struct.
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ed2f2f9b
...@@ -135,26 +135,8 @@ Who: Greg Kroah-Hartman <gregkh@suse.de> ...@@ -135,26 +135,8 @@ Who: Greg Kroah-Hartman <gregkh@suse.de>
--------------------------- ---------------------------
What: filemap_nopage, filemap_populate
When: April 2007
Why: These legacy interfaces no longer have any callers in the kernel and
any functionality provided can be provided with filemap_fault. The
removal schedule is short because they are a big maintainence burden
and have some bugs.
Who: Nick Piggin <npiggin@suse.de>
---------------------------
What: vm_ops.populate, install_page
When: April 2007
Why: These legacy interfaces no longer have any callers in the kernel and
any functionality provided can be provided with vm_ops.fault.
Who: Nick Piggin <npiggin@suse.de>
---------------------------
What: vm_ops.nopage What: vm_ops.nopage
When: February 2008, provided in-kernel callers have been converted When: Soon, provided in-kernel callers have been converted
Why: This interface is replaced by vm_ops.fault, but it has been around Why: This interface is replaced by vm_ops.fault, but it has been around
forever, is used by a lot of drivers, and doesn't cost much to forever, is used by a lot of drivers, and doesn't cost much to
maintain. maintain.
......
...@@ -510,7 +510,7 @@ More details about quota locking can be found in fs/dquot.c. ...@@ -510,7 +510,7 @@ More details about quota locking can be found in fs/dquot.c.
prototypes: prototypes:
void (*open)(struct vm_area_struct*); void (*open)(struct vm_area_struct*);
void (*close)(struct vm_area_struct*); void (*close)(struct vm_area_struct*);
struct page *(*fault)(struct vm_area_struct*, struct fault_data *); int (*fault)(struct vm_area_struct*, struct vm_fault *);
struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
int (*page_mkwrite)(struct vm_area_struct *, struct page *); int (*page_mkwrite)(struct vm_area_struct *, struct page *);
......
...@@ -364,8 +364,6 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -364,8 +364,6 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
else else
vma->vm_ops = &gfs2_vm_ops_private; vma->vm_ops = &gfs2_vm_ops_private;
vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR;
gfs2_glock_dq_uninit(&i_gh); gfs2_glock_dq_uninit(&i_gh);
return error; return error;
......
...@@ -27,13 +27,12 @@ ...@@ -27,13 +27,12 @@
#include "trans.h" #include "trans.h"
#include "util.h" #include "util.h"
static struct page *gfs2_private_fault(struct vm_area_struct *vma, static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct fault_data *fdata)
{ {
struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host);
set_bit(GIF_PAGED, &ip->i_flags); set_bit(GIF_PAGED, &ip->i_flags);
return filemap_fault(vma, fdata); return filemap_fault(vma, vmf);
} }
static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
...@@ -104,55 +103,55 @@ out: ...@@ -104,55 +103,55 @@ out:
return error; return error;
} }
static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma, static int gfs2_sharewrite_fault(struct vm_area_struct *vma,
struct fault_data *fdata) struct vm_fault *vmf)
{ {
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
struct gfs2_file *gf = file->private_data; struct gfs2_file *gf = file->private_data;
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_holder i_gh; struct gfs2_holder i_gh;
struct page *result = NULL;
int alloc_required; int alloc_required;
int error; int error;
int ret = VM_FAULT_MINOR;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
if (error) if (error)
return NULL; goto out;
set_bit(GIF_PAGED, &ip->i_flags); set_bit(GIF_PAGED, &ip->i_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags); set_bit(GIF_SW_PAGED, &ip->i_flags);
error = gfs2_write_alloc_required(ip, error = gfs2_write_alloc_required(ip,
(u64)fdata->pgoff << PAGE_CACHE_SHIFT, (u64)vmf->pgoff << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE, &alloc_required); PAGE_CACHE_SIZE, &alloc_required);
if (error) { if (error) {
fdata->type = VM_FAULT_OOM; /* XXX: are these right? */ ret = VM_FAULT_OOM; /* XXX: are these right? */
goto out; goto out_unlock;
} }
set_bit(GFF_EXLOCK, &gf->f_flags); set_bit(GFF_EXLOCK, &gf->f_flags);
result = filemap_fault(vma, fdata); ret = filemap_fault(vma, vmf);
clear_bit(GFF_EXLOCK, &gf->f_flags); clear_bit(GFF_EXLOCK, &gf->f_flags);
if (!result) if (ret & (VM_FAULT_ERROR | FAULT_RET_NOPAGE))
goto out; goto out_unlock;
if (alloc_required) { if (alloc_required) {
error = alloc_page_backing(ip, result); /* XXX: do we need to drop page lock around alloc_page_backing?*/
error = alloc_page_backing(ip, vmf->page);
if (error) { if (error) {
if (vma->vm_flags & VM_CAN_INVALIDATE) if (ret & FAULT_RET_LOCKED)
unlock_page(result); unlock_page(vmf->page);
page_cache_release(result); page_cache_release(vmf->page);
fdata->type = VM_FAULT_OOM; ret = VM_FAULT_OOM;
result = NULL; goto out_unlock;
goto out;
} }
set_page_dirty(result); set_page_dirty(vmf->page);
} }
out: out_unlock:
gfs2_glock_dq_uninit(&i_gh); gfs2_glock_dq_uninit(&i_gh);
out:
return result; return ret;
} }
struct vm_operations_struct gfs2_vm_ops_private = { struct vm_operations_struct gfs2_vm_ops_private = {
......
...@@ -24,33 +24,35 @@ ...@@ -24,33 +24,35 @@
/* /*
* Fill in the supplied page for mmap * Fill in the supplied page for mmap
* XXX: how are we excluding truncate/invalidate here? Maybe need to lock
* page?
*/ */
static struct page* ncp_file_mmap_fault(struct vm_area_struct *area, static int ncp_file_mmap_fault(struct vm_area_struct *area,
struct fault_data *fdata) struct vm_fault *vmf)
{ {
struct file *file = area->vm_file; struct file *file = area->vm_file;
struct dentry *dentry = file->f_path.dentry; struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct page* page;
char *pg_addr; char *pg_addr;
unsigned int already_read; unsigned int already_read;
unsigned int count; unsigned int count;
int bufsize; int bufsize;
int pos; int pos; /* XXX: loff_t ? */
page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages /*
as long as recvmsg and memset works on it */ * ncpfs has nothing against high pages as long
if (!page) { * as recvmsg and memset works on it
fdata->type = VM_FAULT_OOM; */
return NULL; vmf->page = alloc_page(GFP_HIGHUSER);
} if (!vmf->page)
pg_addr = kmap(page); return VM_FAULT_OOM;
pos = fdata->pgoff << PAGE_SHIFT; pg_addr = kmap(vmf->page);
pos = vmf->pgoff << PAGE_SHIFT;
count = PAGE_SIZE; count = PAGE_SIZE;
if (fdata->address + PAGE_SIZE > area->vm_end) { if ((unsigned long)vmf->virtual_address + PAGE_SIZE > area->vm_end) {
WARN_ON(1); /* shouldn't happen? */ WARN_ON(1); /* shouldn't happen? */
count = area->vm_end - fdata->address; count = area->vm_end - (unsigned long)vmf->virtual_address;
} }
/* what we can read in one go */ /* what we can read in one go */
bufsize = NCP_SERVER(inode)->buffer_size; bufsize = NCP_SERVER(inode)->buffer_size;
...@@ -85,17 +87,16 @@ static struct page* ncp_file_mmap_fault(struct vm_area_struct *area, ...@@ -85,17 +87,16 @@ static struct page* ncp_file_mmap_fault(struct vm_area_struct *area,
if (already_read < PAGE_SIZE) if (already_read < PAGE_SIZE)
memset(pg_addr + already_read, 0, PAGE_SIZE - already_read); memset(pg_addr + already_read, 0, PAGE_SIZE - already_read);
flush_dcache_page(page); flush_dcache_page(vmf->page);
kunmap(page); kunmap(vmf->page);
/* /*
* If I understand ncp_read_kernel() properly, the above always * If I understand ncp_read_kernel() properly, the above always
* fetches from the network, here the analogue of disk. * fetches from the network, here the analogue of disk.
* -- wli * -- wli
*/ */
fdata->type = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT); count_vm_event(PGMAJFAULT);
return page; return VM_FAULT_MAJOR;
} }
static struct vm_operations_struct ncp_file_mmap = static struct vm_operations_struct ncp_file_mmap =
...@@ -124,7 +125,6 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -124,7 +125,6 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma)
return -EFBIG; return -EFBIG;
vma->vm_ops = &ncp_file_mmap; vma->vm_ops = &ncp_file_mmap;
vma->vm_flags |= VM_CAN_INVALIDATE;
file_accessed(file); file_accessed(file);
return 0; return 0;
} }
...@@ -60,30 +60,28 @@ static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) ...@@ -60,30 +60,28 @@ static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
return sigprocmask(SIG_SETMASK, oldset, NULL); return sigprocmask(SIG_SETMASK, oldset, NULL);
} }
static struct page *ocfs2_fault(struct vm_area_struct *area, static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
struct fault_data *fdata)
{ {
struct page *page = NULL;
sigset_t blocked, oldset; sigset_t blocked, oldset;
int ret; int error, ret;
mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff); mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); error = ocfs2_vm_op_block_sigs(&blocked, &oldset);
if (ret < 0) { if (error < 0) {
fdata->type = VM_FAULT_SIGBUS; mlog_errno(error);
mlog_errno(ret); ret = VM_FAULT_SIGBUS;
goto out; goto out;
} }
page = filemap_fault(area, fdata); ret = filemap_fault(area, vmf);
ret = ocfs2_vm_op_unblock_sigs(&oldset); error = ocfs2_vm_op_unblock_sigs(&oldset);
if (ret < 0) if (error < 0)
mlog_errno(ret); mlog_errno(error);
out: out:
mlog_exit_ptr(page); mlog_exit_ptr(vmf->page);
return page; return ret;
} }
static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
...@@ -225,7 +223,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -225,7 +223,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level);
out: out:
vma->vm_ops = &ocfs2_file_vm_ops; vma->vm_ops = &ocfs2_file_vm_ops;
vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; vma->vm_flags |= VM_CAN_NONLINEAR;
return 0; return 0;
} }
...@@ -212,20 +212,18 @@ xfs_file_fsync( ...@@ -212,20 +212,18 @@ xfs_file_fsync(
} }
#ifdef CONFIG_XFS_DMAPI #ifdef CONFIG_XFS_DMAPI
STATIC struct page * STATIC int
xfs_vm_fault( xfs_vm_fault(
struct vm_area_struct *vma, struct vm_area_struct *vma,
struct fault_data *fdata) struct vm_fault *vmf)
{ {
struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
bhv_vnode_t *vp = vn_from_inode(inode); bhv_vnode_t *vp = vn_from_inode(inode);
ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) { if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
fdata->type = VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
return NULL; return filemap_fault(vma, vmf);
}
return filemap_fault(vma, fdata);
} }
#endif /* CONFIG_XFS_DMAPI */ #endif /* CONFIG_XFS_DMAPI */
...@@ -311,7 +309,7 @@ xfs_file_mmap( ...@@ -311,7 +309,7 @@ xfs_file_mmap(
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
vma->vm_ops = &xfs_file_vm_ops; vma->vm_ops = &xfs_file_vm_ops;
vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; vma->vm_flags |= VM_CAN_NONLINEAR;
#ifdef CONFIG_XFS_DMAPI #ifdef CONFIG_XFS_DMAPI
if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI)
......
...@@ -168,12 +168,7 @@ extern unsigned int kobjsize(const void *objp); ...@@ -168,12 +168,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
#define VM_CAN_INVALIDATE 0x08000000 /* The mapping may be invalidated, #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */
* eg. truncate or invalidate_inode_*.
* In this case, do_no_page must
* return with the page locked.
*/
#define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
...@@ -197,24 +192,44 @@ extern unsigned int kobjsize(const void *objp); ...@@ -197,24 +192,44 @@ extern unsigned int kobjsize(const void *objp);
*/ */
extern pgprot_t protection_map[16]; extern pgprot_t protection_map[16];
#define FAULT_FLAG_WRITE 0x01 #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
#define FAULT_FLAG_NONLINEAR 0x02 #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
#define FAULT_RET_NOPAGE 0x0100 /* ->fault did not return a page. This
* can be used if the handler installs
* their own pte.
*/
#define FAULT_RET_LOCKED 0x0200 /* ->fault locked the page, caller must
* unlock after installing the mapping.
* This is used by pagecache in
* particular, where the page lock is
* used to synchronise against truncate
* and invalidate. Mutually exclusive
* with FAULT_RET_NOPAGE.
*/
/* /*
* fault_data is filled in the the pagefault handler and passed to the * vm_fault is filled by the the pagefault handler and passed to the vma's
* vma's ->fault function. That function is responsible for filling in * ->fault function. The vma's ->fault is responsible for returning the
* 'type', which is the type of fault if a page is returned, or the type * VM_FAULT_xxx type which occupies the lowest byte of the return code, ORed
* of error if NULL is returned. * with FAULT_RET_ flags that occupy the next byte and give details about
* how the fault was handled.
* *
* pgoff should be used in favour of address, if possible. If pgoff is * pgoff should be used in favour of virtual_address, if possible. If pgoff
* used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear
* nonlinear mapping support. * mapping support.
*/ */
struct fault_data { struct vm_fault {
unsigned long address; unsigned int flags; /* FAULT_FLAG_xxx flags */
pgoff_t pgoff; pgoff_t pgoff; /* Logical page offset based on vma */
unsigned int flags; void __user *virtual_address; /* Faulting virtual address */
int type;
struct page *page; /* ->fault handlers should return a
* page here, unless FAULT_RET_NOPAGE
* is set (which is also implied by
* VM_FAULT_OOM or SIGBUS).
*/
}; };
/* /*
...@@ -225,15 +240,11 @@ struct fault_data { ...@@ -225,15 +240,11 @@ struct fault_data {
struct vm_operations_struct { struct vm_operations_struct {
void (*open)(struct vm_area_struct * area); void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area);
struct page *(*fault)(struct vm_area_struct *vma, int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
struct fault_data *fdata);
struct page *(*nopage)(struct vm_area_struct *area, struct page *(*nopage)(struct vm_area_struct *area,
unsigned long address, int *type); unsigned long address, int *type);
unsigned long (*nopfn)(struct vm_area_struct *area, unsigned long (*nopfn)(struct vm_area_struct *area,
unsigned long address); unsigned long address);
int (*populate)(struct vm_area_struct *area, unsigned long address,
unsigned long len, pgprot_t prot, unsigned long pgoff,
int nonblock);
/* notification that a previously read-only page is about to become /* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */ * writable, if an error is returned it will cause a SIGBUS */
...@@ -700,8 +711,14 @@ static inline int page_mapped(struct page *page) ...@@ -700,8 +711,14 @@ static inline int page_mapped(struct page *page)
* Used to decide whether a process gets delivered SIGBUS or * Used to decide whether a process gets delivered SIGBUS or
* just gets major/minor fault counters bumped up. * just gets major/minor fault counters bumped up.
*/ */
#define VM_FAULT_OOM 0x00
#define VM_FAULT_SIGBUS 0x01 /*
* VM_FAULT_ERROR is set for the error cases, to make some tests simpler.
*/
#define VM_FAULT_ERROR 0x20
#define VM_FAULT_OOM (0x00 | VM_FAULT_ERROR)
#define VM_FAULT_SIGBUS (0x01 | VM_FAULT_ERROR)
#define VM_FAULT_MINOR 0x02 #define VM_FAULT_MINOR 0x02
#define VM_FAULT_MAJOR 0x03 #define VM_FAULT_MAJOR 0x03
...@@ -711,6 +728,11 @@ static inline int page_mapped(struct page *page) ...@@ -711,6 +728,11 @@ static inline int page_mapped(struct page *page)
*/ */
#define VM_FAULT_WRITE 0x10 #define VM_FAULT_WRITE 0x10
/*
* Mask of VM_FAULT_ flags
*/
#define VM_FAULT_MASK 0xff
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
extern void show_free_areas(void); extern void show_free_areas(void);
...@@ -793,8 +815,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, ...@@ -793,8 +815,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate(struct inode * inode, loff_t offset);
extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma,
...@@ -1135,11 +1155,7 @@ extern void truncate_inode_pages_range(struct address_space *, ...@@ -1135,11 +1155,7 @@ extern void truncate_inode_pages_range(struct address_space *,
loff_t lstart, loff_t lend); loff_t lstart, loff_t lend);
/* generic vm_area_ops exported for stackable file systems */ /* generic vm_area_ops exported for stackable file systems */
extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *); extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
extern struct page * __deprecated_for_modules
filemap_nopage(struct vm_area_struct *, unsigned long, int *);
extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *,
unsigned long, unsigned long, pgprot_t, unsigned long, int);
/* mm/page-writeback.c */ /* mm/page-writeback.c */
int write_one_page(struct page *page, int wait); int write_one_page(struct page *page, int wait);
......
...@@ -224,13 +224,12 @@ static void shm_close(struct vm_area_struct *vma) ...@@ -224,13 +224,12 @@ static void shm_close(struct vm_area_struct *vma)
mutex_unlock(&shm_ids(ns).mutex); mutex_unlock(&shm_ids(ns).mutex);
} }
static struct page *shm_fault(struct vm_area_struct *vma, static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct fault_data *fdata)
{ {
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file); struct shm_file_data *sfd = shm_file_data(file);
return sfd->vm_ops->fault(vma, fdata); return sfd->vm_ops->fault(vma, vmf);
} }
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
......
...@@ -1302,8 +1302,8 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) ...@@ -1302,8 +1302,8 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
/** /**
* filemap_fault - read in file data for page fault handling * filemap_fault - read in file data for page fault handling
* @vma: user vma (not used) * @vma: vma in which the fault was taken
* @fdata: the applicable fault_data * @vmf: struct vm_fault containing details of the fault
* *
* filemap_fault() is invoked via the vma operations vector for a * filemap_fault() is invoked via the vma operations vector for a
* mapped memory region to read in file data during a page fault. * mapped memory region to read in file data during a page fault.
...@@ -1312,7 +1312,7 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) ...@@ -1312,7 +1312,7 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
* it in the page cache, and handles the special cases reasonably without * it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code. * having a lot of duplicated code.
*/ */
struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
int error; int error;
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
...@@ -1322,13 +1322,12 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) ...@@ -1322,13 +1322,12 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
struct page *page; struct page *page;
unsigned long size; unsigned long size;
int did_readaround = 0; int did_readaround = 0;
int ret;
fdata->type = VM_FAULT_MINOR; ret = VM_FAULT_MINOR;
BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (fdata->pgoff >= size) if (vmf->pgoff >= size)
goto outside_data_content; goto outside_data_content;
/* If we don't want any read-ahead, don't bother */ /* If we don't want any read-ahead, don't bother */
...@@ -1342,18 +1341,18 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) ...@@ -1342,18 +1341,18 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
* For sequential accesses, we use the generic readahead logic. * For sequential accesses, we use the generic readahead logic.
*/ */
if (VM_SequentialReadHint(vma)) if (VM_SequentialReadHint(vma))
page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); page_cache_readahead(mapping, ra, file, vmf->pgoff, 1);
/* /*
* Do we have something in the page cache already? * Do we have something in the page cache already?
*/ */
retry_find: retry_find:
page = find_lock_page(mapping, fdata->pgoff); page = find_lock_page(mapping, vmf->pgoff);
if (!page) { if (!page) {
unsigned long ra_pages; unsigned long ra_pages;
if (VM_SequentialReadHint(vma)) { if (VM_SequentialReadHint(vma)) {
handle_ra_miss(mapping, ra, fdata->pgoff); handle_ra_miss(mapping, ra, vmf->pgoff);
goto no_cached_page; goto no_cached_page;
} }
ra->mmap_miss++; ra->mmap_miss++;
...@@ -1370,7 +1369,7 @@ retry_find: ...@@ -1370,7 +1369,7 @@ retry_find:
* check did_readaround, as this is an inner loop. * check did_readaround, as this is an inner loop.
*/ */
if (!did_readaround) { if (!did_readaround) {
fdata->type = VM_FAULT_MAJOR; ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT); count_vm_event(PGMAJFAULT);
} }
did_readaround = 1; did_readaround = 1;
...@@ -1378,11 +1377,11 @@ retry_find: ...@@ -1378,11 +1377,11 @@ retry_find:
if (ra_pages) { if (ra_pages) {
pgoff_t start = 0; pgoff_t start = 0;
if (fdata->pgoff > ra_pages / 2) if (vmf->pgoff > ra_pages / 2)
start = fdata->pgoff - ra_pages / 2; start = vmf->pgoff - ra_pages / 2;
do_page_cache_readahead(mapping, file, start, ra_pages); do_page_cache_readahead(mapping, file, start, ra_pages);
} }
page = find_lock_page(mapping, fdata->pgoff); page = find_lock_page(mapping, vmf->pgoff);
if (!page) if (!page)
goto no_cached_page; goto no_cached_page;
} }
...@@ -1399,7 +1398,7 @@ retry_find: ...@@ -1399,7 +1398,7 @@ retry_find:
/* Must recheck i_size under page lock */ /* Must recheck i_size under page lock */
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (unlikely(fdata->pgoff >= size)) { if (unlikely(vmf->pgoff >= size)) {
unlock_page(page); unlock_page(page);
goto outside_data_content; goto outside_data_content;
} }
...@@ -1408,24 +1407,24 @@ retry_find: ...@@ -1408,24 +1407,24 @@ retry_find:
* Found the page and have a reference on it. * Found the page and have a reference on it.
*/ */
mark_page_accessed(page); mark_page_accessed(page);
return page; vmf->page = page;
return ret | FAULT_RET_LOCKED;
outside_data_content: outside_data_content:
/* /*
* An external ptracer can access pages that normally aren't * An external ptracer can access pages that normally aren't
* accessible.. * accessible..
*/ */
if (vma->vm_mm == current->mm) { if (vma->vm_mm == current->mm)
fdata->type = VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
return NULL;
}
/* Fall through to the non-read-ahead case */ /* Fall through to the non-read-ahead case */
no_cached_page: no_cached_page:
/* /*
* We're only likely to ever get here if MADV_RANDOM is in * We're only likely to ever get here if MADV_RANDOM is in
* effect. * effect.
*/ */
error = page_cache_read(file, fdata->pgoff); error = page_cache_read(file, vmf->pgoff);
/* /*
* The page we want has now been added to the page cache. * The page we want has now been added to the page cache.
...@@ -1441,15 +1440,13 @@ no_cached_page: ...@@ -1441,15 +1440,13 @@ no_cached_page:
* to schedule I/O. * to schedule I/O.
*/ */
if (error == -ENOMEM) if (error == -ENOMEM)
fdata->type = VM_FAULT_OOM; return VM_FAULT_OOM;
else return VM_FAULT_SIGBUS;
fdata->type = VM_FAULT_SIGBUS;
return NULL;
page_not_uptodate: page_not_uptodate:
/* IO error path */ /* IO error path */
if (!did_readaround) { if (!did_readaround) {
fdata->type = VM_FAULT_MAJOR; ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT); count_vm_event(PGMAJFAULT);
} }
...@@ -1468,206 +1465,10 @@ page_not_uptodate: ...@@ -1468,206 +1465,10 @@ page_not_uptodate:
/* Things didn't work out. Return zero to tell the mm layer so. */ /* Things didn't work out. Return zero to tell the mm layer so. */
shrink_readahead_size_eio(file, ra); shrink_readahead_size_eio(file, ra);
fdata->type = VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
return NULL;
} }
EXPORT_SYMBOL(filemap_fault); EXPORT_SYMBOL(filemap_fault);
/*
* filemap_nopage and filemap_populate are legacy exports that are not used
* in tree. Scheduled for removal.
*/
struct page *filemap_nopage(struct vm_area_struct *area,
unsigned long address, int *type)
{
struct page *page;
struct fault_data fdata;
fdata.address = address;
fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
+ area->vm_pgoff;
fdata.flags = 0;
page = filemap_fault(area, &fdata);
if (type)
*type = fdata.type;
return page;
}
EXPORT_SYMBOL(filemap_nopage);
static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
int nonblock)
{
struct address_space *mapping = file->f_mapping;
struct page *page;
int error;
/*
* Do we have something in the page cache already?
*/
retry_find:
page = find_get_page(mapping, pgoff);
if (!page) {
if (nonblock)
return NULL;
goto no_cached_page;
}
/*
* Ok, found a page in the page cache, now we need to check
* that it's up-to-date.
*/
if (!PageUptodate(page)) {
if (nonblock) {
page_cache_release(page);
return NULL;
}
goto page_not_uptodate;
}
success:
/*
* Found the page and have a reference on it.
*/
mark_page_accessed(page);
return page;
no_cached_page:
error = page_cache_read(file, pgoff);
/*
* The page we want has now been added to the page cache.
* In the unlikely event that someone removed it in the
* meantime, we'll just come back here and read it again.
*/
if (error >= 0)
goto retry_find;
/*
* An error return from page_cache_read can result if the
* system is low on memory, or a problem occurs while trying
* to schedule I/O.
*/
return NULL;
page_not_uptodate:
lock_page(page);
/* Did it get truncated while we waited for it? */
if (!page->mapping) {
unlock_page(page);
goto err;
}
/* Did somebody else get it up-to-date? */
if (PageUptodate(page)) {
unlock_page(page);
goto success;
}
error = mapping->a_ops->readpage(file, page);
if (!error) {
wait_on_page_locked(page);
if (PageUptodate(page))
goto success;
} else if (error == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
goto retry_find;
}
/*
* Umm, take care of errors if the page isn't up-to-date.
* Try to re-read it _once_. We do this synchronously,
* because there really aren't any performance issues here
* and we need to check for errors.
*/
lock_page(page);
/* Somebody truncated the page on us? */
if (!page->mapping) {
unlock_page(page);
goto err;
}
/* Somebody else successfully read it in? */
if (PageUptodate(page)) {
unlock_page(page);
goto success;
}
ClearPageError(page);
error = mapping->a_ops->readpage(file, page);
if (!error) {
wait_on_page_locked(page);
if (PageUptodate(page))
goto success;
} else if (error == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
goto retry_find;
}
/*
* Things didn't work out. Return zero to tell the
* mm layer so, possibly freeing the page cache page first.
*/
err:
page_cache_release(page);
return NULL;
}
int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
unsigned long len, pgprot_t prot, unsigned long pgoff,
int nonblock)
{
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
unsigned long size;
struct mm_struct *mm = vma->vm_mm;
struct page *page;
int err;
if (!nonblock)
force_page_cache_readahead(mapping, vma->vm_file,
pgoff, len >> PAGE_CACHE_SHIFT);
repeat:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (pgoff + (len >> PAGE_CACHE_SHIFT) > size)
return -EINVAL;
page = filemap_getpage(file, pgoff, nonblock);
/* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as
* done in shmem_populate calling shmem_getpage */
if (!page && !nonblock)
return -ENOMEM;
if (page) {
err = install_page(mm, vma, addr, page, prot);
if (err) {
page_cache_release(page);
return err;
}
} else if (vma->vm_flags & VM_NONLINEAR) {
/* No page was found just because we can't read it in now (being
* here implies nonblock != 0), but the page may exist, so set
* the PTE to fault it in later. */
err = install_file_pte(mm, vma, addr, pgoff, prot);
if (err)
return err;
}
len -= PAGE_SIZE;
addr += PAGE_SIZE;
pgoff++;
if (len)
goto repeat;
return 0;
}
EXPORT_SYMBOL(filemap_populate);
struct vm_operations_struct generic_file_vm_ops = { struct vm_operations_struct generic_file_vm_ops = {
.fault = filemap_fault, .fault = filemap_fault,
}; };
...@@ -1682,7 +1483,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) ...@@ -1682,7 +1483,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
return -ENOEXEC; return -ENOEXEC;
file_accessed(file); file_accessed(file);
vma->vm_ops = &generic_file_vm_ops; vma->vm_ops = &generic_file_vm_ops;
vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; vma->vm_flags |= VM_CAN_NONLINEAR;
return 0; return 0;
} }
......
...@@ -210,8 +210,7 @@ __xip_unmap (struct address_space * mapping, ...@@ -210,8 +210,7 @@ __xip_unmap (struct address_space * mapping,
* *
* This function is derived from filemap_fault, but used for execute in place * This function is derived from filemap_fault, but used for execute in place
*/ */
static struct page *xip_file_fault(struct vm_area_struct *area, static int xip_file_fault(struct vm_area_struct *area, struct vm_fault *vmf)
struct fault_data *fdata)
{ {
struct file *file = area->vm_file; struct file *file = area->vm_file;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
...@@ -222,19 +221,15 @@ static struct page *xip_file_fault(struct vm_area_struct *area, ...@@ -222,19 +221,15 @@ static struct page *xip_file_fault(struct vm_area_struct *area,
/* XXX: are VM_FAULT_ codes OK? */ /* XXX: are VM_FAULT_ codes OK? */
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (fdata->pgoff >= size) { if (vmf->pgoff >= size)
fdata->type = VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
return NULL;
}
page = mapping->a_ops->get_xip_page(mapping, page = mapping->a_ops->get_xip_page(mapping,
fdata->pgoff*(PAGE_SIZE/512), 0); vmf->pgoff*(PAGE_SIZE/512), 0);
if (!IS_ERR(page)) if (!IS_ERR(page))
goto out; goto out;
if (PTR_ERR(page) != -ENODATA) { if (PTR_ERR(page) != -ENODATA)
fdata->type = VM_FAULT_OOM; return VM_FAULT_OOM;
return NULL;
}
/* sparse block */ /* sparse block */
if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
...@@ -242,26 +237,22 @@ static struct page *xip_file_fault(struct vm_area_struct *area, ...@@ -242,26 +237,22 @@ static struct page *xip_file_fault(struct vm_area_struct *area,
(!(mapping->host->i_sb->s_flags & MS_RDONLY))) { (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
/* maybe shared writable, allocate new block */ /* maybe shared writable, allocate new block */
page = mapping->a_ops->get_xip_page(mapping, page = mapping->a_ops->get_xip_page(mapping,
fdata->pgoff*(PAGE_SIZE/512), 1); vmf->pgoff*(PAGE_SIZE/512), 1);
if (IS_ERR(page)) { if (IS_ERR(page))
fdata->type = VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
return NULL;
}
/* unmap page at pgoff from all other vmas */ /* unmap page at pgoff from all other vmas */
__xip_unmap(mapping, fdata->pgoff); __xip_unmap(mapping, vmf->pgoff);
} else { } else {
/* not shared and writable, use xip_sparse_page() */ /* not shared and writable, use xip_sparse_page() */
page = xip_sparse_page(); page = xip_sparse_page();
if (!page) { if (!page)
fdata->type = VM_FAULT_OOM; return VM_FAULT_OOM;
return NULL;
}
} }
out: out:
fdata->type = VM_FAULT_MINOR;
page_cache_get(page); page_cache_get(page);
return page; vmf->page = page;
return VM_FAULT_MINOR;
} }
static struct vm_operations_struct xip_file_vm_ops = { static struct vm_operations_struct xip_file_vm_ops = {
......
...@@ -20,13 +20,14 @@ ...@@ -20,13 +20,14 @@
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep) unsigned long addr, pte_t *ptep)
{ {
pte_t pte = *ptep; pte_t pte = *ptep;
struct page *page = NULL;
if (pte_present(pte)) { if (pte_present(pte)) {
struct page *page;
flush_cache_page(vma, addr, pte_pfn(pte)); flush_cache_page(vma, addr, pte_pfn(pte));
pte = ptep_clear_flush(vma, addr, ptep); pte = ptep_clear_flush(vma, addr, ptep);
page = vm_normal_page(vma, addr, pte); page = vm_normal_page(vma, addr, pte);
...@@ -35,68 +36,21 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -35,68 +36,21 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
set_page_dirty(page); set_page_dirty(page);
page_remove_rmap(page, vma); page_remove_rmap(page, vma);
page_cache_release(page); page_cache_release(page);
update_hiwater_rss(mm);
dec_mm_counter(mm, file_rss);
} }
} else { } else {
if (!pte_file(pte)) if (!pte_file(pte))
free_swap_and_cache(pte_to_swp_entry(pte)); free_swap_and_cache(pte_to_swp_entry(pte));
pte_clear_not_present_full(mm, addr, ptep, 0); pte_clear_not_present_full(mm, addr, ptep, 0);
} }
return !!page;
} }
/*
* Install a file page to a given virtual memory address, release any
* previously existing mapping.
*/
int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, struct page *page, pgprot_t prot)
{
struct inode *inode;
pgoff_t size;
int err = -ENOMEM;
pte_t *pte;
pte_t pte_val;
spinlock_t *ptl;
pte = get_locked_pte(mm, addr, &ptl);
if (!pte)
goto out;
/*
* This page may have been truncated. Tell the
* caller about it.
*/
err = -EINVAL;
inode = vma->vm_file->f_mapping->host;
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (!page->mapping || page->index >= size)
goto unlock;
err = -ENOMEM;
if (page_mapcount(page) > INT_MAX/2)
goto unlock;
if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
inc_mm_counter(mm, file_rss);
flush_icache_page(vma, page);
pte_val = mk_pte(page, prot);
set_pte_at(mm, addr, pte, pte_val);
page_add_file_rmap(page);
update_mmu_cache(vma, addr, pte_val);
lazy_mmu_prot_update(pte_val);
err = 0;
unlock:
pte_unmap_unlock(pte, ptl);
out:
return err;
}
EXPORT_SYMBOL(install_page);
/* /*
* Install a file pte to a given virtual memory address, release any * Install a file pte to a given virtual memory address, release any
* previously existing mapping. * previously existing mapping.
*/ */
int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long pgoff, pgprot_t prot) unsigned long addr, unsigned long pgoff, pgprot_t prot)
{ {
int err = -ENOMEM; int err = -ENOMEM;
...@@ -107,10 +61,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -107,10 +61,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte) if (!pte)
goto out; goto out;
if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) { if (!pte_none(*pte))
update_hiwater_rss(mm); zap_pte(mm, vma, addr, pte);
dec_mm_counter(mm, file_rss);
}
set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
/* /*
...@@ -208,8 +160,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, ...@@ -208,8 +160,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
goto out; goto out;
if ((!vma->vm_ops || !vma->vm_ops->populate) && if (!vma->vm_flags & VM_CAN_NONLINEAR)
!(vma->vm_flags & VM_CAN_NONLINEAR))
goto out; goto out;
if (end <= start || start < vma->vm_start || end > vma->vm_end) if (end <= start || start < vma->vm_start || end > vma->vm_end)
...@@ -239,18 +190,14 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, ...@@ -239,18 +190,14 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
} }
if (vma->vm_flags & VM_CAN_NONLINEAR) { err = populate_range(mm, vma, start, size, pgoff);
err = populate_range(mm, vma, start, size, pgoff); if (!err && !(flags & MAP_NONBLOCK)) {
if (!err && !(flags & MAP_NONBLOCK)) { if (unlikely(has_write_lock)) {
if (unlikely(has_write_lock)) { downgrade_write(&mm->mmap_sem);
downgrade_write(&mm->mmap_sem); has_write_lock = 0;
has_write_lock = 0;
}
make_pages_present(start, start+size);
} }
} else make_pages_present(start, start+size);
err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, }
pgoff, flags & MAP_NONBLOCK);
/* /*
* We can't clear VM_NONLINEAR because we'd have to do * We can't clear VM_NONLINEAR because we'd have to do
......
...@@ -316,15 +316,14 @@ unsigned long hugetlb_total_pages(void) ...@@ -316,15 +316,14 @@ unsigned long hugetlb_total_pages(void)
* hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
* this far. * this far.
*/ */
static struct page *hugetlb_nopage(struct vm_area_struct *vma, static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long address, int *unused)
{ {
BUG(); BUG();
return NULL; return 0;
} }
struct vm_operations_struct hugetlb_vm_ops = { struct vm_operations_struct hugetlb_vm_ops = {
.nopage = hugetlb_nopage, .fault = hugetlb_vm_op_fault,
}; };
static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
......
...@@ -1834,10 +1834,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, ...@@ -1834,10 +1834,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma,
/* /*
* files that support invalidating or truncating portions of the * files that support invalidating or truncating portions of the
* file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and * file from under mmaped areas must have their ->fault function
* have their .nopage function return the page locked. * return a locked page (and FAULT_RET_LOCKED code). This provides
* synchronisation against concurrent unmapping here.
*/ */
BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
again: again:
restart_addr = vma->vm_truncate_count; restart_addr = vma->vm_truncate_count;
...@@ -2306,63 +2306,62 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2306,63 +2306,62 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte) pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{ {
spinlock_t *ptl; spinlock_t *ptl;
struct page *page, *faulted_page; struct page *page;
pte_t entry; pte_t entry;
int anon = 0; int anon = 0;
struct page *dirty_page = NULL; struct page *dirty_page = NULL;
struct fault_data fdata; struct vm_fault vmf;
int ret;
fdata.address = address & PAGE_MASK; vmf.virtual_address = (void __user *)(address & PAGE_MASK);
fdata.pgoff = pgoff; vmf.pgoff = pgoff;
fdata.flags = flags; vmf.flags = flags;
vmf.page = NULL;
pte_unmap(page_table); pte_unmap(page_table);
BUG_ON(vma->vm_flags & VM_PFNMAP); BUG_ON(vma->vm_flags & VM_PFNMAP);
if (likely(vma->vm_ops->fault)) { if (likely(vma->vm_ops->fault)) {
fdata.type = -1; ret = vma->vm_ops->fault(vma, &vmf);
faulted_page = vma->vm_ops->fault(vma, &fdata); if (unlikely(ret & (VM_FAULT_ERROR | FAULT_RET_NOPAGE)))
WARN_ON(fdata.type == -1); return (ret & VM_FAULT_MASK);
if (unlikely(!faulted_page))
return fdata.type;
} else { } else {
/* Legacy ->nopage path */ /* Legacy ->nopage path */
fdata.type = VM_FAULT_MINOR; ret = VM_FAULT_MINOR;
faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, vmf.page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
&fdata.type);
/* no page was available -- either SIGBUS or OOM */ /* no page was available -- either SIGBUS or OOM */
if (unlikely(faulted_page == NOPAGE_SIGBUS)) if (unlikely(vmf.page == NOPAGE_SIGBUS))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
else if (unlikely(faulted_page == NOPAGE_OOM)) else if (unlikely(vmf.page == NOPAGE_OOM))
return VM_FAULT_OOM; return VM_FAULT_OOM;
} }
/* /*
* For consistency in subsequent calls, make the faulted_page always * For consistency in subsequent calls, make the faulted page always
* locked. * locked.
*/ */
if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) if (unlikely(!(ret & FAULT_RET_LOCKED)))
lock_page(faulted_page); lock_page(vmf.page);
else else
BUG_ON(!PageLocked(faulted_page)); VM_BUG_ON(!PageLocked(vmf.page));
/* /*
* Should we do an early C-O-W break? * Should we do an early C-O-W break?
*/ */
page = faulted_page; page = vmf.page;
if (flags & FAULT_FLAG_WRITE) { if (flags & FAULT_FLAG_WRITE) {
if (!(vma->vm_flags & VM_SHARED)) { if (!(vma->vm_flags & VM_SHARED)) {
anon = 1; anon = 1;
if (unlikely(anon_vma_prepare(vma))) { if (unlikely(anon_vma_prepare(vma))) {
fdata.type = VM_FAULT_OOM; ret = VM_FAULT_OOM;
goto out; goto out;
} }
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
if (!page) { if (!page) {
fdata.type = VM_FAULT_OOM; ret = VM_FAULT_OOM;
goto out; goto out;
} }
copy_user_highpage(page, faulted_page, address, vma); copy_user_highpage(page, vmf.page, address, vma);
} else { } else {
/* /*
* If the page will be shareable, see if the backing * If the page will be shareable, see if the backing
...@@ -2372,11 +2371,23 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2372,11 +2371,23 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (vma->vm_ops->page_mkwrite) { if (vma->vm_ops->page_mkwrite) {
unlock_page(page); unlock_page(page);
if (vma->vm_ops->page_mkwrite(vma, page) < 0) { if (vma->vm_ops->page_mkwrite(vma, page) < 0) {
fdata.type = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
anon = 1; /* no anon but release faulted_page */ anon = 1; /* no anon but release vmf.page */
goto out_unlocked; goto out_unlocked;
} }
lock_page(page); lock_page(page);
/*
* XXX: this is not quite right (racy vs
* invalidate) to unlock and relock the page
* like this, however a better fix requires
* reworking page_mkwrite locking API, which
* is better done later.
*/
if (!page->mapping) {
ret = VM_FAULT_MINOR;
anon = 1; /* no anon but release vmf.page */
goto out;
}
} }
} }
...@@ -2427,16 +2438,16 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2427,16 +2438,16 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pte_unmap_unlock(page_table, ptl); pte_unmap_unlock(page_table, ptl);
out: out:
unlock_page(faulted_page); unlock_page(vmf.page);
out_unlocked: out_unlocked:
if (anon) if (anon)
page_cache_release(faulted_page); page_cache_release(vmf.page);
else if (dirty_page) { else if (dirty_page) {
set_page_dirty_balance(dirty_page); set_page_dirty_balance(dirty_page);
put_page(dirty_page); put_page(dirty_page);
} }
return fdata.type; return (ret & VM_FAULT_MASK);
} }
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
...@@ -2447,18 +2458,10 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2447,18 +2458,10 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); return __do_fault(mm, vma, address, page_table, pmd, pgoff,
flags, orig_pte);
} }
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
int write_access, pgoff_t pgoff, pte_t orig_pte)
{
unsigned int flags = FAULT_FLAG_NONLINEAR |
(write_access ? FAULT_FLAG_WRITE : 0);
return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
}
/* /*
* do_no_pfn() tries to create a new page mapping for a page without * do_no_pfn() tries to create a new page mapping for a page without
...@@ -2519,17 +2522,19 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2519,17 +2522,19 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
* but allow concurrent faults), and pte mapped but not yet locked. * but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked. * We return with mmap_sem still held, but pte unmapped and unlocked.
*/ */
static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned long address, pte_t *page_table, pmd_t *pmd,
int write_access, pte_t orig_pte) int write_access, pte_t orig_pte)
{ {
unsigned int flags = FAULT_FLAG_NONLINEAR |
(write_access ? FAULT_FLAG_WRITE : 0);
pgoff_t pgoff; pgoff_t pgoff;
int err;
if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
return VM_FAULT_MINOR; return VM_FAULT_MINOR;
if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { if (unlikely(!(vma->vm_flags & VM_NONLINEAR) ||
!(vma->vm_flags & VM_CAN_NONLINEAR))) {
/* /*
* Page table corrupted: show pte and kill process. * Page table corrupted: show pte and kill process.
*/ */
...@@ -2539,18 +2544,8 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2539,18 +2544,8 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff = pte_to_pgoff(orig_pte); pgoff = pte_to_pgoff(orig_pte);
if (vma->vm_ops && vma->vm_ops->fault) return __do_fault(mm, vma, address, page_table, pmd, pgoff,
return do_nonlinear_fault(mm, vma, address, page_table, pmd, flags, orig_pte);
write_access, pgoff, orig_pte);
/* We can then assume vm->vm_ops && vma->vm_ops->populate */
err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
vma->vm_page_prot, pgoff, 0);
if (err == -ENOMEM)
return VM_FAULT_OOM;
if (err)
return VM_FAULT_SIGBUS;
return VM_FAULT_MAJOR;
} }
/* /*
...@@ -2588,7 +2583,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, ...@@ -2588,7 +2583,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
pte, pmd, write_access); pte, pmd, write_access);
} }
if (pte_file(entry)) if (pte_file(entry))
return do_file_page(mm, vma, address, return do_nonlinear_fault(mm, vma, address,
pte, pmd, write_access, entry); pte, pmd, write_access, entry);
return do_swap_page(mm, vma, address, return do_swap_page(mm, vma, address,
pte, pmd, write_access, entry); pte, pmd, write_access, entry);
......
...@@ -1341,10 +1341,10 @@ int in_gate_area_no_task(unsigned long addr) ...@@ -1341,10 +1341,10 @@ int in_gate_area_no_task(unsigned long addr)
return 0; return 0;
} }
struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
BUG(); BUG();
return NULL; return 0;
} }
/* /*
......
...@@ -1309,29 +1309,21 @@ failed: ...@@ -1309,29 +1309,21 @@ failed:
return error; return error;
} }
static struct page *shmem_fault(struct vm_area_struct *vma, static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct fault_data *fdata)
{ {
struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
struct page *page = NULL;
int error; int error;
int ret;
BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
return VM_FAULT_SIGBUS;
if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret);
fdata->type = VM_FAULT_SIGBUS; if (error)
return NULL; return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
}
error = shmem_getpage(inode, fdata->pgoff, &page,
SGP_FAULT, &fdata->type);
if (error) {
fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS);
return NULL;
}
mark_page_accessed(page); mark_page_accessed(vmf->page);
return page; return ret | FAULT_RET_LOCKED;
} }
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
...@@ -1378,7 +1370,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -1378,7 +1370,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
{ {
file_accessed(file); file_accessed(file);
vma->vm_ops = &shmem_vm_ops; vma->vm_ops = &shmem_vm_ops;
vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; vma->vm_flags |= VM_CAN_NONLINEAR;
return 0; return 0;
} }
...@@ -2560,6 +2552,5 @@ int shmem_zero_setup(struct vm_area_struct *vma) ...@@ -2560,6 +2552,5 @@ int shmem_zero_setup(struct vm_area_struct *vma)
fput(vma->vm_file); fput(vma->vm_file);
vma->vm_file = file; vma->vm_file = file;
vma->vm_ops = &shmem_vm_ops; vma->vm_ops = &shmem_vm_ops;
vma->vm_flags |= VM_CAN_INVALIDATE;
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment