Commit f3b6eaf0 authored by Ingo Molnar's avatar Ingo Molnar

x86: memcpy, clean up

Impact: cleanup

Make this file more readable by bringing it more in line
with the usual kernel style.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent dd1ef4ec
/* Copyright 2002 Andi Kleen */ /* Copyright 2002 Andi Kleen */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/dwarf2.h>
/* /*
* memcpy - Copy a memory block. * memcpy - Copy a memory block.
...@@ -16,15 +17,22 @@ ...@@ -16,15 +17,22 @@
* rax original destination * rax original destination
*/ */
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* Calls to this get patched into the kernel image via the
* alternative instructions framework:
*/
ALIGN ALIGN
memcpy_c: memcpy_c:
CFI_STARTPROC CFI_STARTPROC
movq %rdi,%rax movq %rdi, %rax
movl %edx,%ecx
shrl $3,%ecx movl %edx, %ecx
andl $7,%edx shrl $3, %ecx
andl $7, %edx
rep movsq rep movsq
movl %edx,%ecx movl %edx, %ecx
rep movsb rep movsb
ret ret
CFI_ENDPROC CFI_ENDPROC
...@@ -33,92 +41,110 @@ ENDPROC(memcpy_c) ...@@ -33,92 +41,110 @@ ENDPROC(memcpy_c)
ENTRY(__memcpy) ENTRY(__memcpy)
ENTRY(memcpy) ENTRY(memcpy)
CFI_STARTPROC CFI_STARTPROC
movq %rdi,%rax
movl %edx,%ecx /*
shrl $6,%ecx * Put the number of full 64-byte blocks into %ecx.
* Tail portion is handled at the end:
*/
movq %rdi, %rax
movl %edx, %ecx
shrl $6, %ecx
jz .Lhandle_tail jz .Lhandle_tail
.p2align 4 .p2align 4
.Lloop_64: .Lloop_64:
/*
* We decrement the loop index here - and the zero-flag is
* checked at the end of the loop (instructions inbetween do
* not change the zero flag):
*/
decl %ecx decl %ecx
movq (%rsi),%r11 /*
movq 8(%rsi),%r8 * Move in blocks of 4x16 bytes:
*/
movq %r11,(%rdi) movq 0*8(%rsi), %r11
movq %r8,1*8(%rdi) movq 1*8(%rsi), %r8
movq %r11, 0*8(%rdi)
movq 2*8(%rsi),%r9 movq %r8, 1*8(%rdi)
movq 3*8(%rsi),%r10
movq %r9,2*8(%rdi)
movq %r10,3*8(%rdi)
movq 4*8(%rsi),%r11 movq 2*8(%rsi), %r9
movq 5*8(%rsi),%r8 movq 3*8(%rsi), %r10
movq %r9, 2*8(%rdi)
movq %r10, 3*8(%rdi)
movq %r11,4*8(%rdi) movq 4*8(%rsi), %r11
movq %r8,5*8(%rdi) movq 5*8(%rsi), %r8
movq %r11, 4*8(%rdi)
movq %r8, 5*8(%rdi)
movq 6*8(%rsi),%r9 movq 6*8(%rsi), %r9
movq 7*8(%rsi),%r10 movq 7*8(%rsi), %r10
movq %r9, 6*8(%rdi)
movq %r10, 7*8(%rdi)
movq %r9,6*8(%rdi) leaq 64(%rsi), %rsi
movq %r10,7*8(%rdi) leaq 64(%rdi), %rdi
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
jnz .Lloop_64 jnz .Lloop_64
.Lhandle_tail: .Lhandle_tail:
movl %edx,%ecx movl %edx, %ecx
andl $63,%ecx andl $63, %ecx
shrl $3,%ecx shrl $3, %ecx
jz .Lhandle_7 jz .Lhandle_7
.p2align 4 .p2align 4
.Lloop_8: .Lloop_8:
decl %ecx decl %ecx
movq (%rsi),%r8 movq (%rsi), %r8
movq %r8,(%rdi) movq %r8, (%rdi)
leaq 8(%rdi),%rdi leaq 8(%rdi), %rdi
leaq 8(%rsi),%rsi leaq 8(%rsi), %rsi
jnz .Lloop_8 jnz .Lloop_8
.Lhandle_7: .Lhandle_7:
movl %edx,%ecx movl %edx, %ecx
andl $7,%ecx andl $7, %ecx
jz .Lende jz .Lend
.p2align 4 .p2align 4
.Lloop_1: .Lloop_1:
movb (%rsi),%r8b movb (%rsi), %r8b
movb %r8b,(%rdi) movb %r8b, (%rdi)
incq %rdi incq %rdi
incq %rsi incq %rsi
decl %ecx decl %ecx
jnz .Lloop_1 jnz .Lloop_1
.Lende: .Lend:
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(memcpy) ENDPROC(memcpy)
ENDPROC(__memcpy) ENDPROC(__memcpy)
/* Some CPUs run faster using the string copy instructions. /*
It is also a lot simpler. Use this when possible */ * Some CPUs run faster using the string copy instructions.
* It is also a lot simpler. Use this when possible:
*/
.section .altinstr_replacement,"ax" .section .altinstr_replacement, "ax"
1: .byte 0xeb /* jmp <disp8> */ 1: .byte 0xeb /* jmp <disp8> */
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
2: 2:
.previous .previous
.section .altinstructions,"a"
.section .altinstructions, "a"
.align 8 .align 8
.quad memcpy .quad memcpy
.quad 1b .quad 1b
.byte X86_FEATURE_REP_GOOD .byte X86_FEATURE_REP_GOOD
/* Replace only beginning, memcpy is used to apply alternatives, so it
* is silly to overwrite itself with nops - reboot is only outcome... */ /*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.byte 2b - 1b .byte 2b - 1b
.byte 2b - 1b .byte 2b - 1b
.previous .previous
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment