Commit 25e5566e authored by David S. Miller's avatar David S. Miller

[SPARC64]: Fix missing load-twin usage in Niagara-1 memcpy.

For the case where the source is not aligned modulo 8
we don't use load-twins to suck the data in and this
kills performance since normal loads allocate in the
L1 cache (unlike load-twin) and thus big memcpys swipe
the entire L1 D-cache.

We need to allocate a register window to implement this
properly, but that actually simplifies a lot of things
as a nice side-effect.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8cc8c28a
/* NGcopy_from_user.S: Niagara optimized copy from userspace. /* NGcopy_from_user.S: Niagara optimized copy from userspace.
* *
* Copyright (C) 2006 David S. Miller (davem@davemloft.net) * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/ */
#define EX_LD(x) \ #define EX_LD(x) \
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
.section .fixup; \ .section .fixup; \
.align 4; \ .align 4; \
99: wr %g0, ASI_AIUS, %asi;\ 99: wr %g0, ASI_AIUS, %asi;\
retl; \ ret; \
mov 1, %o0; \ restore %g0, 1, %o0; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, 99b; \ .word 98b, 99b; \
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest #define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest
#define LOAD_TWIN(addr_reg,dest0,dest1) \ #define LOAD_TWIN(addr_reg,dest0,dest1) \
ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0 ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0
#define EX_RETVAL(x) 0 #define EX_RETVAL(x) %g0
#ifdef __KERNEL__ #ifdef __KERNEL__
#define PREAMBLE \ #define PREAMBLE \
......
/* NGcopy_to_user.S: Niagara optimized copy to userspace. /* NGcopy_to_user.S: Niagara optimized copy to userspace.
* *
* Copyright (C) 2006 David S. Miller (davem@davemloft.net) * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/ */
#define EX_ST(x) \ #define EX_ST(x) \
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
.section .fixup; \ .section .fixup; \
.align 4; \ .align 4; \
99: wr %g0, ASI_AIUS, %asi;\ 99: wr %g0, ASI_AIUS, %asi;\
retl; \ ret; \
mov 1, %o0; \ restore %g0, 1, %o0; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, 99b; \ .word 98b, 99b; \
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#define FUNC_NAME NGcopy_to_user #define FUNC_NAME NGcopy_to_user
#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS #define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
#define EX_RETVAL(x) 0 #define EX_RETVAL(x) %g0
#ifdef __KERNEL__ #ifdef __KERNEL__
/* Writing to %asi is _expensive_ so we hardcode it. /* Writing to %asi is _expensive_ so we hardcode it.
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment