Commit f8f98a93 authored by Russell King's avatar Russell King

[PATCH] ARM: Fix Xscale copy_page implementation

The ARM copypage changes in 2.6.12-rc4-git1 removed the preempt locking
from the copypage functions which broke the XScale implementation.
This patch fixes the locking on XScale and removes the now unneeded
minicache code.
Signed-off-by: default avatarRussell King <rmk@arm.linux.org.uk>
Checked-by: Richard Purdie
parent 17d82fcc
...@@ -228,7 +228,6 @@ config CPU_SA1100 ...@@ -228,7 +228,6 @@ config CPU_SA1100
select CPU_CACHE_V4WB select CPU_CACHE_V4WB
select CPU_CACHE_VIVT select CPU_CACHE_VIVT
select CPU_TLB_V4WB select CPU_TLB_V4WB
select CPU_MINICACHE
# XScale # XScale
config CPU_XSCALE config CPU_XSCALE
...@@ -239,7 +238,6 @@ config CPU_XSCALE ...@@ -239,7 +238,6 @@ config CPU_XSCALE
select CPU_ABRT_EV5T select CPU_ABRT_EV5T
select CPU_CACHE_VIVT select CPU_CACHE_VIVT
select CPU_TLB_V4WBI select CPU_TLB_V4WBI
select CPU_MINICACHE
# ARMv6 # ARMv6
config CPU_V6 config CPU_V6
...@@ -345,11 +343,6 @@ config CPU_TLB_V4WBI ...@@ -345,11 +343,6 @@ config CPU_TLB_V4WBI
config CPU_TLB_V6 config CPU_TLB_V6
bool bool
config CPU_MINICACHE
bool
help
Processor has a minicache.
comment "Processor Features" comment "Processor Features"
config ARM_THUMB config ARM_THUMB
......
...@@ -31,8 +31,6 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o ...@@ -31,8 +31,6 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o
obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o
obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o
obj-$(CONFIG_CPU_MINICACHE) += minicache.o
obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o
obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o
obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o
......
/*
* linux/arch/arm/lib/copypage-xscale.S
*
* Copyright (C) 2001 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/constants.h>
/*
* General note:
* We don't really want write-allocate cache behaviour for these functions
* since that will just eat through 8K of the cache.
*/
.text
.align 5
/*
* XScale optimised copy_user_page
* r0 = destination
* r1 = source
* r2 = virtual user address of ultimate destination page
*
* The source page may have some clean entries in the cache already, but we
* can safely ignore them - break_cow() will flush them out of the cache
* if we eventually end up using our copied page.
*
* What we could do is use the mini-cache to buffer reads from the source
* page. We rely on the mini-cache being smaller than one page, so we'll
* cycle through the complete cache anyway.
*/
ENTRY(xscale_mc_copy_user_page)
stmfd sp!, {r4, r5, lr}
mov r5, r0
mov r0, r1
bl map_page_minicache
mov r1, r5
mov lr, #PAGE_SZ/64-1
/*
* Strangely enough, best performance is achieved
* when prefetching destination as well. (NP)
*/
pld [r0, #0]
pld [r0, #32]
pld [r1, #0]
pld [r1, #32]
1: pld [r0, #64]
pld [r0, #96]
pld [r1, #64]
pld [r1, #96]
2: ldrd r2, [r0], #8
ldrd r4, [r0], #8
mov ip, r1
strd r2, [r1], #8
ldrd r2, [r0], #8
strd r4, [r1], #8
ldrd r4, [r0], #8
strd r2, [r1], #8
strd r4, [r1], #8
mcr p15, 0, ip, c7, c10, 1 @ clean D line
ldrd r2, [r0], #8
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
ldrd r4, [r0], #8
mov ip, r1
strd r2, [r1], #8
ldrd r2, [r0], #8
strd r4, [r1], #8
ldrd r4, [r0], #8
strd r2, [r1], #8
strd r4, [r1], #8
mcr p15, 0, ip, c7, c10, 1 @ clean D line
subs lr, lr, #1
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
bgt 1b
beq 2b
ldmfd sp!, {r4, r5, pc}
.align 5
/*
* XScale optimised clear_user_page
* r0 = destination
* r1 = virtual user address of ultimate destination page
*/
ENTRY(xscale_mc_clear_user_page)
mov r1, #PAGE_SZ/32
mov r2, #0
mov r3, #0
1: mov ip, r0
strd r2, [r0], #8
strd r2, [r0], #8
strd r2, [r0], #8
strd r2, [r0], #8
mcr p15, 0, ip, c7, c10, 1 @ clean D line
subs r1, r1, #1
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
bne 1b
mov pc, lr
__INITDATA
.type xscale_mc_user_fns, #object
ENTRY(xscale_mc_user_fns)
.long xscale_mc_clear_user_page
.long xscale_mc_copy_user_page
.size xscale_mc_user_fns, . - xscale_mc_user_fns
/*
* linux/arch/arm/lib/copypage-xscale.S
*
* Copyright (C) 1995-2005 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This handles the mini data cache, as found on SA11x0 and XScale
* processors. When we copy a user page page, we map it in such a way
* that accesses to this page will not touch the main data cache, but
* will be cached in the mini data cache. This prevents us thrashing
* the main data cache on page faults.
*/
#include <linux/init.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
/*
* 0xffff8000 to 0xffffffff is reserved for any ARM architecture
* specific hacks for copying pages efficiently.
*/
#define COPYPAGE_MINICACHE 0xffff8000
#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
L_PTE_CACHEABLE)
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
static DEFINE_SPINLOCK(minicache_lock);
/*
* XScale mini-dcache optimised copy_user_page
*
* We flush the destination cache lines just before we write the data into the
* corresponding address. Since the Dcache is read-allocate, this removes the
* Dcache aliasing issue. The writes will be forwarded to the write buffer,
* and merged as appropriate.
*/
static void __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
/*
* Strangely enough, best performance is achieved
* when prefetching destination as well. (NP)
*/
asm volatile(
"stmfd sp!, {r4, r5, lr} \n\
mov lr, %2 \n\
pld [r0, #0] \n\
pld [r0, #32] \n\
pld [r1, #0] \n\
pld [r1, #32] \n\
1: pld [r0, #64] \n\
pld [r0, #96] \n\
pld [r1, #64] \n\
pld [r1, #96] \n\
2: ldrd r2, [r0], #8 \n\
ldrd r4, [r0], #8 \n\
mov ip, r1 \n\
strd r2, [r1], #8 \n\
ldrd r2, [r0], #8 \n\
strd r4, [r1], #8 \n\
ldrd r4, [r0], #8 \n\
strd r2, [r1], #8 \n\
strd r4, [r1], #8 \n\
mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
ldrd r2, [r0], #8 \n\
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
ldrd r4, [r0], #8 \n\
mov ip, r1 \n\
strd r2, [r1], #8 \n\
ldrd r2, [r0], #8 \n\
strd r4, [r1], #8 \n\
ldrd r4, [r0], #8 \n\
strd r2, [r1], #8 \n\
strd r4, [r1], #8 \n\
mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
subs lr, lr, #1 \n\
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
bgt 1b \n\
beq 2b \n\
ldmfd sp!, {r4, r5, pc} "
:
: "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1));
}
void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
{
spin_lock(&minicache_lock);
set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot));
flush_tlb_kernel_page(COPYPAGE_MINICACHE);
mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);
spin_unlock(&minicache_lock);
}
/*
* XScale optimised clear_user_page
*/
void __attribute__((naked))
xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
"mov r1, %0 \n\
mov r2, #0 \n\
mov r3, #0 \n\
1: mov ip, r0 \n\
strd r2, [r0], #8 \n\
strd r2, [r0], #8 \n\
strd r2, [r0], #8 \n\
strd r2, [r0], #8 \n\
mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
subs r1, r1, #1 \n\
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
bne 1b \n\
mov pc, lr"
:
: "I" (PAGE_SIZE / 32));
}
struct cpu_user_fns xscale_mc_user_fns __initdata = {
.cpu_clear_user_page = xscale_mc_clear_user_page,
.cpu_copy_user_page = xscale_mc_copy_user_page,
};
/*
* linux/arch/arm/mm/minicache.c
*
* Copyright (C) 2001 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This handles the mini data cache, as found on SA11x0 and XScale
* processors. When we copy a user page page, we map it in such a way
* that accesses to this page will not touch the main data cache, but
* will be cached in the mini data cache. This prevents us thrashing
* the main data cache on page faults.
*/
#include <linux/init.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
/*
* 0xffff8000 to 0xffffffff is reserved for any ARM architecture
* specific hacks for copying pages efficiently.
*/
#define minicache_address (0xffff8000)
#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
L_PTE_CACHEABLE)
static pte_t *minicache_pte;
/*
* Note that this is intended to be called only from the copy_user_page
* asm code; anything else will require special locking to prevent the
* mini-cache space being re-used. (Note: probably preempt unsafe).
*
* We rely on the fact that the minicache is 2K, and we'll be pushing
* 4K of data through it, so we don't actually have to specifically
* flush the minicache when we change the mapping.
*
* Note also: assert(PAGE_OFFSET <= virt < high_memory).
* Unsafe: preempt, kmap.
*/
unsigned long map_page_minicache(unsigned long virt)
{
set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot));
flush_tlb_kernel_page(minicache_address);
return minicache_address;
}
static int __init minicache_init(void)
{
pgd_t *pgd;
pmd_t *pmd;
spin_lock(&init_mm.page_table_lock);
pgd = pgd_offset_k(minicache_address);
pmd = pmd_alloc(&init_mm, pgd, minicache_address);
if (!pmd)
BUG();
minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address);
if (!minicache_pte)
BUG();
spin_unlock(&init_mm.page_table_lock);
return 0;
}
core_initcall(minicache_init);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment