Commit 007d77d0 authored by Chen, Kenneth W's avatar Chen, Kenneth W Committed by Tony Luck

[IA64] implement csum_ipv6_magic for ia64.

The asm version is 4.4 times faster than the generic C version and
10X smaller in code size.
Signed-off-by: default avatarKen Chen <kenneth.w.chen@intel.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent 5b4d5681
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
* in0: address of buffer to checksum (char *) * in0: address of buffer to checksum (char *)
* in1: length of the buffer (int) * in1: length of the buffer (int)
* *
* Copyright (C) 2002 Intel Corp. * Copyright (C) 2002, 2006 Intel Corp.
* Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
*/ */
#include <asm/asmmacro.h> #include <asm/asmmacro.h>
...@@ -25,6 +25,9 @@ ...@@ -25,6 +25,9 @@
#define in0 r32 #define in0 r32
#define in1 r33 #define in1 r33
#define in2 r34
#define in3 r35
#define in4 r36
#define ret0 r8 #define ret0 r8
GLOBAL_ENTRY(ip_fast_csum) GLOBAL_ENTRY(ip_fast_csum)
...@@ -88,3 +91,51 @@ GLOBAL_ENTRY(ip_fast_csum) ...@@ -88,3 +91,51 @@ GLOBAL_ENTRY(ip_fast_csum)
mov b0=r34 mov b0=r34
br.ret.sptk.many b0 br.ret.sptk.many b0
END(ip_fast_csum) END(ip_fast_csum)
GLOBAL_ENTRY(csum_ipv6_magic)
ld4 r20=[in0],4
ld4 r21=[in1],4
dep r15=in3,in2,32,16
;;
ld4 r22=[in0],4
ld4 r23=[in1],4
mux1 r15=r15,@rev
;;
ld4 r24=[in0],4
ld4 r25=[in1],4
shr.u r15=r15,16
add r16=r20,r21
add r17=r22,r23
;;
ld4 r26=[in0],4
ld4 r27=[in1],4
add r18=r24,r25
add r8=r16,r17
;;
add r19=r26,r27
add r8=r8,r18
;;
add r8=r8,r19
add r15=r15,in4
;;
add r8=r8,r15
;;
shr.u r10=r8,32 // now fold sum into short
zxt4 r11=r8
;;
add r8=r10,r11
;;
shr.u r10=r8,16 // yeah, keep it rolling
zxt2 r11=r8
;;
add r8=r10,r11
;;
shr.u r10=r8,16 // three times lucky
zxt2 r11=r8
;;
add r8=r10,r11
mov r9=0xffff
;;
andcm r8=r9,r8
br.ret.sptk.many b0
END(csum_ipv6_magic)
...@@ -70,4 +70,10 @@ static inline __sum16 csum_fold(__wsum csum) ...@@ -70,4 +70,10 @@ static inline __sum16 csum_fold(__wsum csum)
return (__force __sum16)~sum; return (__force __sum16)~sum;
} }
#define _HAVE_ARCH_IPV6_CSUM 1
struct in6_addr;
extern unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
struct in6_addr *daddr, __u32 len, unsigned short proto,
unsigned int csum);
#endif /* _ASM_IA64_CHECKSUM_H */ #endif /* _ASM_IA64_CHECKSUM_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment