Commit 9ba02e11 authored by David Miller's avatar David Miller Committed by James Toy

This is necessary to make the mmap ring buffer work properly on platforms

where D-cache aliasing is an issue.

vmalloc_user() ensures that the kernel side mapping is SHMLBA aligned, and
on platforms with D-cache aliasing matters the presence of VM_SHARED will
similarly SHMLBA align the user side mapping.

Thus the kernel and the user will be writing to the same D-cache aliases
and we'll avoid inconsistencies and corruption.

The only trick with this change is that vfree() cannot be invoked from
interrupt context, and thus it's not allowed from RCU callbacks.

We deal with this by using schedule_work().

Since the ring buffer is now completely linear even on the kernel side,
several simplifications are probably now possible in the code where we add
entries to the ring.

With help from Peter Zijlstra.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent eb7cc917
...@@ -507,6 +507,7 @@ struct file; ...@@ -507,6 +507,7 @@ struct file;
struct perf_mmap_data { struct perf_mmap_data {
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct work_struct work;
int nr_pages; /* nr of data pages */ int nr_pages; /* nr of data pages */
int writable; /* are we writable */ int writable; /* are we writable */
int nr_locked; /* nr pages mlocked */ int nr_locked; /* nr pages mlocked */
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/anon_inodes.h> #include <linux/anon_inodes.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
...@@ -2118,7 +2119,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -2118,7 +2119,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
goto unlock; goto unlock;
if (vmf->pgoff == 0) { if (vmf->pgoff == 0) {
vmf->page = virt_to_page(data->user_page); vmf->page = vmalloc_to_page(data->user_page);
} else { } else {
int nr = vmf->pgoff - 1; int nr = vmf->pgoff - 1;
...@@ -2128,7 +2129,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -2128,7 +2129,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (vmf->flags & FAULT_FLAG_WRITE) if (vmf->flags & FAULT_FLAG_WRITE)
goto unlock; goto unlock;
vmf->page = virt_to_page(data->data_pages[nr]); vmf->page = vmalloc_to_page(data->data_pages[nr]);
} }
get_page(vmf->page); get_page(vmf->page);
...@@ -2142,10 +2143,34 @@ unlock: ...@@ -2142,10 +2143,34 @@ unlock:
return ret; return ret;
} }
static void perf_mmap_unmark_page(void *addr)
{
struct page *page = vmalloc_to_page(addr);
page->mapping = NULL;
}
static void perf_mmap_data_free_work(struct work_struct *work)
{
struct perf_mmap_data *data;
void *base;
int i;
data = container_of(work, struct perf_mmap_data, work);
base = data->user_page;
for (i = 0; i < data->nr_pages + 1; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
kfree(data);
}
static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
{ {
struct perf_mmap_data *data; struct perf_mmap_data *data;
unsigned long size; unsigned long size;
void *all_buf;
int i; int i;
WARN_ON(atomic_read(&counter->mmap_count)); WARN_ON(atomic_read(&counter->mmap_count));
...@@ -2157,15 +2182,16 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) ...@@ -2157,15 +2182,16 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
if (!data) if (!data)
goto fail; goto fail;
data->user_page = (void *)get_zeroed_page(GFP_KERNEL); INIT_WORK(&data->work, perf_mmap_data_free_work);
if (!data->user_page)
goto fail_user_page;
for (i = 0; i < nr_pages; i++) { all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); if (!all_buf)
if (!data->data_pages[i]) goto fail_all_buf;
goto fail_data_pages;
} data->user_page = all_buf;
for (i = 0; i < nr_pages; i++)
data->data_pages[i] = all_buf + ((i + 1) * PAGE_SIZE);
data->nr_pages = nr_pages; data->nr_pages = nr_pages;
atomic_set(&data->lock, -1); atomic_set(&data->lock, -1);
...@@ -2174,39 +2200,19 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) ...@@ -2174,39 +2200,19 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
return 0; return 0;
fail_data_pages: fail_all_buf:
for (i--; i >= 0; i--)
free_page((unsigned long)data->data_pages[i]);
free_page((unsigned long)data->user_page);
fail_user_page:
kfree(data); kfree(data);
fail: fail:
return -ENOMEM; return -ENOMEM;
} }
static void perf_mmap_free_page(unsigned long addr)
{
struct page *page = virt_to_page((void *)addr);
page->mapping = NULL;
__free_page(page);
}
static void __perf_mmap_data_free(struct rcu_head *rcu_head) static void __perf_mmap_data_free(struct rcu_head *rcu_head)
{ {
struct perf_mmap_data *data; struct perf_mmap_data *data;
int i;
data = container_of(rcu_head, struct perf_mmap_data, rcu_head); data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
schedule_work(&data->work);
perf_mmap_free_page((unsigned long)data->user_page);
for (i = 0; i < data->nr_pages; i++)
perf_mmap_free_page((unsigned long)data->data_pages[i]);
kfree(data);
} }
static void perf_mmap_data_free(struct perf_counter *counter) static void perf_mmap_data_free(struct perf_counter *counter)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment