Commit 6aab341e authored by Linus Torvalds's avatar Linus Torvalds

mm: re-architect the VM_UNPAGED logic

This replaces the (in my opinion horrible) VM_UNMAPPED logic with very
explicit support for a "remapped page range" aka VM_PFNMAP.  It allows a
VM area to contain an arbitrary range of page table entries that the VM
never touches, and never considers to be normal pages.

Any user of "remap_pfn_range()" automatically gets this new
functionality, and doesn't even have to mark the pages reserved or
indeed mark them any other way.  It just works.  As a side effect, doing
mmap() on /dev/mem works for arbitrary ranges.

Sparc update from David in the next commit.
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 458af543
......@@ -145,8 +145,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma)
struct page *pg = virt_to_page(vdso32_kbase +
i*PAGE_SIZE);
struct page *upg = (vma && vma->vm_mm) ?
follow_page(vma->vm_mm, vma->vm_start +
i*PAGE_SIZE, 0)
follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
: NULL;
dump_one_vdso_page(pg, upg);
}
......@@ -157,8 +156,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma)
struct page *pg = virt_to_page(vdso64_kbase +
i*PAGE_SIZE);
struct page *upg = (vma && vma->vm_mm) ?
follow_page(vma->vm_mm, vma->vm_start +
i*PAGE_SIZE, 0)
follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
: NULL;
dump_one_vdso_page(pg, upg);
}
......
......@@ -591,7 +591,7 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
goto out_up;
if (vma->vm_flags & (VM_SHARED | VM_HUGETLB | VM_UNPAGED))
if (vma->vm_flags & (VM_SHARED | VM_HUGETLB))
break;
count = vma->vm_end - addr;
if (count > size)
......
......@@ -402,12 +402,11 @@ struct numa_maps {
/*
* Calculate numa node maps for a vma
*/
static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
static struct numa_maps *get_numa_maps(struct vm_area_struct *vma)
{
int i;
struct page *page;
unsigned long vaddr;
struct mm_struct *mm = vma->vm_mm;
int i;
struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
if (!md)
......@@ -420,7 +419,7 @@ static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
md->node[i] =0;
for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
page = follow_page(mm, vaddr, 0);
page = follow_page(vma, vaddr, 0);
if (page) {
int count = page_mapcount(page);
......
......@@ -145,7 +145,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
#define VM_GROWSUP 0x00000200
#define VM_SHM 0x00000000 /* Means nothing: delete it later */
#define VM_UNPAGED 0x00000400 /* Pages managed without map count */
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
#define VM_EXECUTABLE 0x00001000
......@@ -664,6 +664,7 @@ struct zap_details {
unsigned long truncate_count; /* Compare vm_truncate_count */
};
struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t);
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
unsigned long unmap_vmas(struct mmu_gather **tlb,
......@@ -953,7 +954,7 @@ unsigned long vmalloc_to_pfn(void *addr);
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
struct page *follow_page(struct mm_struct *, unsigned long address,
struct page *follow_page(struct vm_area_struct *, unsigned long address,
unsigned int foll_flags);
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
......
......@@ -27,24 +27,20 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page = NULL;
if (pte_present(pte)) {
unsigned long pfn = pte_pfn(pte);
flush_cache_page(vma, addr, pfn);
flush_cache_page(vma, addr, pte_pfn(pte));
pte = ptep_clear_flush(vma, addr, ptep);
if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, pte, addr);
goto out;
}
page = pfn_to_page(pfn);
page = vm_normal_page(vma, addr, pte);
if (page) {
if (pte_dirty(pte))
set_page_dirty(page);
page_remove_rmap(page);
page_cache_release(page);
}
} else {
if (!pte_file(pte))
free_swap_and_cache(pte_to_swp_entry(pte));
pte_clear(mm, addr, ptep);
}
out:
return !!page;
}
......@@ -65,8 +61,6 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t pte_val;
spinlock_t *ptl;
BUG_ON(vma->vm_flags & VM_UNPAGED);
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
......@@ -122,8 +116,6 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t pte_val;
spinlock_t *ptl;
BUG_ON(vma->vm_flags & VM_UNPAGED);
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
......
......@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
unsigned long start, unsigned long end)
{
*prev = vma;
if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_UNPAGED))
if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
return -EINVAL;
if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
......
This diff is collapsed.
......@@ -189,17 +189,15 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
do {
unsigned long pfn;
struct page *page;
unsigned int nid;
if (!pte_present(*pte))
continue;
pfn = pte_pfn(*pte);
if (!pfn_valid(pfn)) {
print_bad_pte(vma, *pte, addr);
page = vm_normal_page(vma, addr, *pte);
if (!page)
continue;
}
nid = pfn_to_nid(pfn);
nid = page_to_nid(page);
if (!node_isset(nid, *nodes))
break;
} while (pte++, addr += PAGE_SIZE, addr != end);
......@@ -269,8 +267,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
first = find_vma(mm, start);
if (!first)
return ERR_PTR(-EFAULT);
if (first->vm_flags & VM_UNPAGED)
return ERR_PTR(-EACCES);
prev = NULL;
for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
if (!vma->vm_next && vma->vm_end < end)
......
......@@ -27,7 +27,6 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
again:
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
do {
unsigned long pfn;
struct page *page;
if (progress >= 64) {
......@@ -40,13 +39,9 @@ again:
continue;
if (!pte_maybe_dirty(*pte))
continue;
pfn = pte_pfn(*pte);
if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, *pte, addr);
page = vm_normal_page(vma, addr, *pte);
if (!page)
continue;
}
page = pfn_to_page(pfn);
if (ptep_clear_flush_dirty(vma, addr, pte) ||
page_test_and_clear_dirty(page))
set_page_dirty(page);
......@@ -97,9 +92,8 @@ static void msync_page_range(struct vm_area_struct *vma,
/* For hugepages we can't go walking the page table normally,
* but that's ok, hugetlbfs is memory based, so we don't need
* to do anything more on an msync().
* Can't do anything with VM_UNPAGED regions either.
*/
if (vma->vm_flags & (VM_HUGETLB|VM_UNPAGED))
if (vma->vm_flags & VM_HUGETLB)
return;
BUG_ON(addr >= end);
......
......@@ -1045,7 +1045,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
EXPORT_SYMBOL(find_vma);
struct page *follow_page(struct mm_struct *mm, unsigned long address,
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
unsigned int foll_flags)
{
return NULL;
......
......@@ -226,8 +226,6 @@ vma_address(struct page *page, struct vm_area_struct *vma)
/*
* At what user virtual address is page expected in vma? checking that the
* page matches the vma: currently only used on anon pages, by unuse_vma;
* and by extraordinary checks on anon pages in VM_UNPAGED vmas, taking
* care that an mmap of /dev/mem might window free and foreign pages.
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
......@@ -614,7 +612,6 @@ static void try_to_unmap_cluster(unsigned long cursor,
struct page *page;
unsigned long address;
unsigned long end;
unsigned long pfn;
address = (vma->vm_start + cursor) & CLUSTER_MASK;
end = address + CLUSTER_SIZE;
......@@ -643,15 +640,8 @@ static void try_to_unmap_cluster(unsigned long cursor,
for (; address < end; pte++, address += PAGE_SIZE) {
if (!pte_present(*pte))
continue;
pfn = pte_pfn(*pte);
if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, *pte, address);
continue;
}
page = pfn_to_page(pfn);
BUG_ON(PageAnon(page));
page = vm_normal_page(vma, address, *pte);
BUG_ON(!page || PageAnon(page));
if (ptep_clear_flush_young(vma, address, pte))
continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment