Commit 8ed3d6a4 authored by Catalin Marinas's avatar Catalin Marinas

Thumb-2: Implement the unified arch/arm/lib functions

This patch adds the ARM/Thumb-2 unified support for the arch/arm/lib/*
files.
Signed-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent 441b91ac
......@@ -25,6 +25,7 @@ along with this program; see the file COPYING. If not, write to
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
#include <asm/unified.h>
#include <linux/linkage.h>
......@@ -36,14 +37,19 @@ Boston, MA 02110-1301, USA. */
#define ah r1
#endif
.type __ashldi3, %function
ENTRY(__ashldi3)
.type __aeabi_llsl, %function
ENTRY(__aeabi_llsl)
subs r3, r2, #32
rsb ip, r2, #32
itett mi
movmi ah, ah, lsl r2
movpl ah, al, lsl r3
orrmi ah, ah, al, lsr ip
ARM( orrmi ah, ah, al, lsr ip )
THUMB( lsrmi r3, al, ip )
THUMB( orrmi ah, ah, r3 )
mov al, al, lsl r2
mov pc, lr
......@@ -25,6 +25,7 @@ along with this program; see the file COPYING. If not, write to
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
#include <asm/unified.h>
#include <linux/linkage.h>
......@@ -36,14 +37,19 @@ Boston, MA 02110-1301, USA. */
#define ah r1
#endif
.type __ashrdi3, %function
ENTRY(__ashrdi3)
.type __aeabi_lasr, %function
ENTRY(__aeabi_lasr)
subs r3, r2, #32
rsb ip, r2, #32
itett mi
movmi al, al, lsr r2
movpl al, ah, asr r3
orrmi al, al, ah, lsl ip
ARM( orrmi al, al, ah, lsl ip )
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, asr r2
mov pc, lr
......@@ -10,6 +10,8 @@
* 27/03/03 Ian Molton Clean up CONFIG_CPU
*
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
......@@ -22,10 +24,12 @@
#define mask r7
#define offset r8
.type __backtrace, %function
ENTRY(__backtrace)
mov r1, #0x10
mov r0, fp
.type c_backtrace, %function
ENTRY(c_backtrace)
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
......@@ -34,11 +38,16 @@ ENTRY(c_backtrace)
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...
tst r1, #0x10 @ 26 or 32-bit?
moveq mask, #0xfc000003
itte eq
ARM( moveq mask, #0xfc000003 )
THUMB( moveq mask, #0xfc000000 )
THUMB( orreq mask, #0x03 )
movne mask, #0
tst mask, r0
it ne
movne r0, #0
movs frame, r0
itt eq
1: moveq r0, #-2
ldmeqfd sp!, {r4 - r8, pc}
......@@ -59,6 +68,7 @@ ENTRY(c_backtrace)
mov r1, r1, lsr #10
ldr r3, .Ldsi+4
teq r1, r3
it eq
subeq save, save, #4
mov r0, save
bic r1, r2, mask
......@@ -70,6 +80,7 @@ ENTRY(c_backtrace)
mov r3, r1, lsr #10
ldr r2, .Ldsi+4
teq r3, r2 @ Check for stmia sp!, {args}
itt eq
addeq save, save, #4 @ next instruction
bleq .Ldumpstm
......@@ -78,12 +89,14 @@ ENTRY(c_backtrace)
mov r3, r1, lsr #10
ldr r2, .Ldsi
teq r3, r2
it eq
bleq .Ldumpstm
/*
* A zero next framepointer means we're done.
*/
teq next, #0
it eq
ldmeqfd sp!, {r4 - r8, pc}
/*
......@@ -124,10 +137,13 @@ ENTRY(c_backtrace)
mov reg, #9
mov r7, #0
1: mov r3, #1
tst instr, r3, lsl reg
ARM( tst instr, r3, lsl reg )
THUMB( lsl r3, reg )
THUMB( tst instr, r3 )
beq 2f
add r7, r7, #1
teq r7, #4
itte eq
moveq r7, #0
moveq r3, #'\n'
movne r3, #' '
......@@ -138,6 +154,7 @@ ENTRY(c_backtrace)
2: subs reg, reg, #1
bpl 1b
teq r7, #0
itt ne
adrne r0, .Lcr
blne printk
mov r0, stack
......
......@@ -20,7 +20,7 @@
mov pc, lr
.endm
.macro testop, instr, store
.macro testop, instr, store, cond=al
and r3, r0, #7 @ Get bit offset
mov r2, #1
add r1, r1, r0, lsr #3 @ Get byte offset
......@@ -34,11 +34,15 @@
#endif
1: ldrexb r2, [r1]
ands r0, r2, r3 @ save old value of bit
\instr r2, r2, r3 @ toggle bit
.ifnc \cond,al
it \cond
.endif
\instr r2, r2, r3 @ toggle bit
strexb ip, r2, [r1]
cmp ip, #0
bne 1b
cmp r0, #0
it ne
movne r0, #1
2: mov pc, lr
.endm
......@@ -63,7 +67,7 @@
* Note: we can trivially conditionalise the store instruction
* to avoid dirting the data cache.
*/
.macro testop, instr, store
.macro testop, instr, store, cond=al
add r1, r1, r0, lsr #3
and r3, r0, #7
mov r0, #1
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "bitops.h"
......@@ -15,7 +17,9 @@
/* Purpose : Function to change a bit
* Prototype: int change_bit(int bit, void *addr)
*/
.type _change_bit_be, %function
ENTRY(_change_bit_be)
eor r0, r0, #0x18 @ big endian byte ordering
.type _change_bit_le, %function
ENTRY(_change_bit_le)
bitop eor
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -18,6 +20,7 @@
* : sz - number of bytes to clear
* Returns : number of bytes NOT cleared
*/
.type __clear_user, %function
ENTRY(__clear_user)
stmfd sp!, {r1, lr}
mov r2, #0
......@@ -26,22 +29,42 @@ ENTRY(__clear_user)
ands ip, r0, #3
beq 1f
cmp ip, #2
USER( strbt r2, [r0], #1)
USER( strlebt r2, [r0], #1)
USER( strltbt r2, [r0], #1)
ARM(USER( strbt r2, [r0], #1 ))
THUMB(USER( strbt r2, [r0] ))
THUMB( add r0, #1 )
ARM(USER( strlebt r2, [r0], #1 ))
itt le
THUMB(USER( strlebt r2, [r0] ))
THUMB( addle r0, #1 )
ARM(USER( strltbt r2, [r0], #1 ))
itt lt
THUMB(USER( strltbt r2, [r0] ))
THUMB( addlt r0, #1 )
rsb ip, ip, #4
sub r1, r1, ip @ 7 6 5 4 3 2 1
1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7
USER( strplt r2, [r0], #4)
USER( strplt r2, [r0], #4)
ARM(USER( strplt r2, [r0], #4 ))
ARM(USER( strplt r2, [r0], #4 ))
itttt pl
THUMB(USER( strplt r2, [r0] ))
THUMB(USER( strplt r2, [r0, #4] ))
THUMB( addpl r0, #8 )
bpl 1b
adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3
USER( strplt r2, [r0], #4)
ARM(USER( strplt r2, [r0], #4 ))
itt pl
THUMB(USER( strplt r2, [r0] ))
THUMB( addpl r0, #4 )
2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x
USER( strnebt r2, [r0], #1)
USER( strnebt r2, [r0], #1)
ARM(USER( strnebt r2, [r0], #1 ))
ARM(USER( strnebt r2, [r0], #1 ))
ittt ne
THUMB(USER( strnebt r2, [r0] ))
THUMB(USER( strnebt r2, [r0, #1] ))
THUMB( addne r0, #2 )
tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1
USER( strnebt r2, [r0], #1)
it ne
USER( strnebt r2, [r0] )
mov r0, #0
ldmfd sp!, {r1, pc}
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "bitops.h"
......@@ -16,7 +18,9 @@
* Purpose : Function to clear a bit
* Prototype: int clear_bit(int bit, void *addr)
*/
.type _clear_bit_be, %function
ENTRY(_clear_bit_be)
eor r0, r0, #0x18 @ big endian byte ordering
.type _clear_bit_le, %function
ENTRY(_clear_bit_le)
bitop bic
......@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -33,8 +34,18 @@
* Number of bytes NOT copied.
*/
#ifndef CONFIG_THUMB2_KERNEL
#define LDR1W_SHIFT 0
#else
#define LDR1W_SHIFT 1
#endif
#define STR1W_SHIFT 0
.macro ldr1w ptr reg abort
100: ldrt \reg, [\ptr], #4
100:
ARM( ldrt \reg, [\ptr], #4 )
THUMB( ldrt \reg, [\ptr] )
THUMB( add.w \ptr, \ptr, #4 )
.section __ex_table, "a"
.long 100b, \abort
.previous
......@@ -53,14 +64,20 @@
.endm
.macro ldr1b ptr reg cond=al abort
100: ldr\cond\()bt \reg, [\ptr], #1
.ifnc \cond,al
itt \cond
.endif
100:
ARM( ldr\cond\()bt \reg, [\ptr], #1 )
THUMB( ldr\cond\()bt \reg, [\ptr] )
THUMB( add\cond \ptr, \ptr, #1 )
.section __ex_table, "a"
.long 100b, \abort
.previous
.endm
.macro str1w ptr reg abort
str \reg, [\ptr], #4
W(str) \reg, [\ptr], #4
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
......@@ -68,6 +85,9 @@
.endm
.macro str1b ptr reg cond=al abort
.ifnc \cond,al
it \cond
.endif
str\cond\()b \reg, [\ptr], #1
.endm
......@@ -83,6 +103,7 @@
.text
.type __copy_from_user, %function
ENTRY(__copy_from_user)
#include "copy_template.S"
......
......@@ -9,6 +9,8 @@
*
* ASM optimised string functions
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
......@@ -23,6 +25,7 @@
* Note that we probably achieve closer to the 100MB/s target with
* the core clock switching.
*/
.type copy_page, %function
ENTRY(copy_page)
stmfd sp!, {r4, lr} @ 2
PLD( pld [r1, #0] )
......@@ -39,8 +42,10 @@ ENTRY(copy_page)
ldmia r1!, {r3, r4, ip, lr} @ 4
subs r2, r2, #1 @ 1
stmia r0!, {r3, r4, ip, lr} @ 4
itt gt
ldmgtia r1!, {r3, r4, ip, lr} @ 4
bgt 1b @ 1
PLD( itt eq )
PLD( ldmeqia r1!, {r3, r4, ip, lr} )
PLD( beq 2b )
ldmfd sp!, {r4, pc} @ 3
......@@ -65,6 +65,13 @@
*
* Restore registers with the values previously saved with the
* 'preserv' macro. Called upon code termination.
*
* LDR1W_SHIFT
* STR1W_SHIFT
*
* Correction to be applied to the "ip" register when branching into
* the ldr1w or str1w instructions (some of these macros may expand to
* than one 32bit instruction in Thumb-2)
*/
......@@ -107,9 +114,13 @@
5: ands ip, r2, #28
rsb ip, ip, #32
addne pc, pc, ip @ C is always clear here
it ne
addne pc, pc, ip, lsl #LDR1W_SHIFT @ C is always clear here
b 7f
6: nop
.rept (1 << LDR1W_SHIFT) - 1
W(nop)
.endr
ldr1w r1, r3, abort=20f
ldr1w r1, r4, abort=20f
ldr1w r1, r5, abort=20f
......@@ -118,9 +129,12 @@
ldr1w r1, r8, abort=20f
ldr1w r1, lr, abort=20f
add pc, pc, ip
add pc, pc, ip, lsl #STR1W_SHIFT
nop
nop
.rept (1 << STR1W_SHIFT) - 1
W(nop)
.endr
str1w r0, r3, abort=20f
str1w r0, r4, abort=20f
str1w r0, r5, abort=20f
......
......@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -33,8 +34,15 @@
* Number of bytes NOT copied.
*/
#define LDR1W_SHIFT 0
#ifndef CONFIG_THUMB2_KERNEL
#define STR1W_SHIFT 0
#else
#define STR1W_SHIFT 1
#endif
.macro ldr1w ptr reg abort
ldr \reg, [\ptr], #4
W(ldr) \reg, [\ptr], #4
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
......@@ -46,11 +54,17 @@
.endm
.macro ldr1b ptr reg cond=al abort
.ifnc \cond,al
it \cond
.endif
ldr\cond\()b \reg, [\ptr], #1
.endm
.macro str1w ptr reg abort
100: strt \reg, [\ptr], #4
100:
ARM( strt \reg, [\ptr], #4 )
THUMB( strt \reg, [\ptr] )
THUMB( add.w \ptr, \ptr, #4 )
.section __ex_table, "a"
.long 100b, \abort
.previous
......@@ -68,7 +82,13 @@
.endm
.macro str1b ptr reg cond=al abort
100: str\cond\()bt \reg, [\ptr], #1
.ifnc \cond,al
itt \cond
.endif
100:
ARM( str\cond\()bt \reg, [\ptr], #1 )
THUMB( str\cond\()bt \reg, [\ptr] )
THUMB( add\cond \ptr, \ptr, #1 )
.section __ex_table, "a"
.long 100b, \abort
.previous
......@@ -86,6 +106,7 @@
.text
.type __copy_to_user, %function
ENTRY(__copy_to_user)
#include "copy_template.S"
......
......@@ -7,11 +7,14 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.type __csum_ipv6_magic, %function
ENTRY(__csum_ipv6_magic)
str lr, [sp, #-4]!
adds ip, r2, r3
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -39,6 +41,7 @@ td3 .req lr
/* we must have at least one byte. */
tst buf, #1 @ odd address?
itttt ne
movne sum, sum, ror #8
ldrneb td0, [buf], #1
subne len, len, #1
......@@ -68,25 +71,30 @@ td3 .req lr
bne .Lless8_wordlp
.Lless8_byte: tst len, #1 @ odd number of bytes
itt ne
ldrneb td0, [buf], #1 @ include last byte
adcnes sum, sum, td0, put_byte_0 @ update checksum
.Ldone: adc r0, sum, #0 @ collect up the last carry
ldr td0, [sp], #4
tst td0, #1 @ check buffer alignment
it ne
movne r0, r0, ror #8 @ rotate checksum by 8 bits
ldr pc, [sp], #4 @ return
.Lnot_aligned: tst buf, #1 @ odd address
ittt ne
ldrneb td0, [buf], #1 @ make even
subne len, len, #1
adcnes sum, sum, td0, put_byte_1 @ update checksum
tst buf, #2 @ 32-bit aligned?
#if __LINUX_ARM_ARCH__ >= 4
itt ne
ldrneh td0, [buf], #2 @ make 32-bit aligned
subne len, len, #2
#else
itttt ne
ldrneb td0, [buf], #1
ldrneb ip, [buf], #1
subne len, len, #2
......@@ -96,19 +104,23 @@ td3 .req lr
orrne td0, ip, td0, lsl #8
#endif
#endif
it ne
adcnes sum, sum, td0 @ update checksum
mov pc, lr
.type csum_partial, %function
ENTRY(csum_partial)
stmfd sp!, {buf, lr}
cmp len, #8 @ Ensure that we have at least
blo .Lless8 @ 8 bytes to copy.
tst buf, #1
it ne
movne sum, sum, ror #8
adds sum, sum, #0 @ C = 0
tst buf, #3 @ Test destination alignment
it ne
blne .Lnot_aligned @ align destination, return here
1: bics ip, len, #31
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -18,13 +20,15 @@
*/
.macro save_regs
mov ip, sp
stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc}
sub fp, ip, #4
ARM( mov ip, sp )
ARM( stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc} )
ARM( sub fp, ip, #4 )
THUMB( stmfd sp!, {r1, r4 - r8, lr} )
.endm
.macro load_regs
ldmfd sp, {r1, r4 - r8, fp, sp, pc}
ARM( ldmfd sp, {r1, r4 - r8, fp, sp, pc} )
THUMB( ldmfd sp!, {r1, r4 - r8, pc} )
.endm
.macro load1b, reg1
......
......@@ -16,6 +16,8 @@
*
* Note that 'tst' and 'teq' preserve the carry flag.
*/
#include <asm/unified.h>
src .req r0
dst .req r1
......@@ -40,6 +42,7 @@ sum .req r3
adcs sum, sum, ip, put_byte_1 @ update checksum
strb ip, [dst], #1
tst dst, #2
it eq
moveq pc, lr @ dst is now 32bit aligned
.Ldst_16bit: load2b r8, ip
......@@ -94,6 +97,7 @@ FN_ENTRY
adds sum, sum, #0 @ C = 0
tst dst, #3 @ Test destination alignment
it ne
blne .Ldst_unaligned @ align destination, return here
/*
......@@ -147,6 +151,7 @@ FN_ENTRY
strb r5, [dst], #1
mov r5, r4, get_byte_2
.Lexit: tst len, #1
ittt ne
strneb r5, [dst], #1
andne r5, r5, #255
adcnes sum, sum, r5, put_byte_0
......@@ -160,6 +165,7 @@ FN_ENTRY
.Ldone: adc r0, sum, #0
ldr sum, [sp, #0] @ dst
tst sum, #1
it ne
movne r0, r0, ror #8
load_regs
......
......@@ -10,6 +10,8 @@
* 27/03/03 Ian Molton Clean up CONFIG_CPU
*
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/errno.h>
......@@ -18,17 +20,22 @@
.text
.macro save_regs
mov ip, sp
stmfd sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc}
sub fp, ip, #4
ARM( mov ip, sp )
ARM( stmfd sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc} )
ARM( sub fp, ip, #4 )
THUMB( stmfd sp!, {r1, r2, r4 - r8, lr} )
.endm
.macro load_regs
ldmfd sp, {r1, r2, r4-r8, fp, sp, pc}
ARM( ldmfd sp, {r1, r2, r4-r8, fp, sp, pc} )
THUMB( ldmfd sp!, {r1, r2, r4 - r8, pc} )
.endm
.macro load1b, reg1
9999: ldrbt \reg1, [r0], $1
9999:
ARM( ldrbt \reg1, [r0], $1 )
THUMB( ldrbt \reg1, [r0] )
THUMB( add \reg1, $1 )
.section __ex_table, "a"
.align 3
.long 9999b, 6001f
......@@ -36,8 +43,14 @@
.endm
.macro load2b, reg1, reg2
9999: ldrbt \reg1, [r0], $1
9998: ldrbt \reg2, [r0], $1
9999:
ARM( ldrbt \reg1, [r0], $1 )
THUMB( ldrbt \reg1, [r0] )
THUMB( add \reg1, $1 )
9998:
ARM( ldrbt \reg2, [r0], $1 )
THUMB( ldrbt \reg2, [r0] )
THUMB( add \reg2, $1 )
.section __ex_table, "a"
.long 9999b, 6001f
.long 9998b, 6001f
......@@ -45,7 +58,10 @@
.endm
.macro load1l, reg1
9999: ldrt \reg1, [r0], $4
9999:
ARM( ldrt \reg1, [r0], $4 )
THUMB( ldrt \reg1, [r0] )
THUMB( add \reg1, $4 )
.section __ex_table, "a"
.align 3
.long 9999b, 6001f
......@@ -53,8 +69,14 @@
.endm
.macro load2l, reg1, reg2
9999: ldrt \reg1, [r0], $4
9998: ldrt \reg2, [r0], $4
9999:
ARM( ldrt \reg1, [r0], $4 )
THUMB( ldrt \reg1, [r0] )
THUMB( add \reg1, $4 )
9998:
ARM( ldrt \reg2, [r0], $4 )
THUMB( ldrt \reg2, [r0] )
THUMB( add \reg2, $4 )
.section __ex_table, "a"
.long 9999b, 6001f
.long 9998b, 6001f
......@@ -62,10 +84,22 @@
.endm
.macro load4l, reg1, reg2, reg3, reg4
9999: ldrt \reg1, [r0], $4
9998: ldrt \reg2, [r0], $4
9997: ldrt \reg3, [r0], $4
9996: ldrt \reg4, [r0], $4
9999:
ARM( ldrt \reg1, [r0], $4 )
THUMB( ldrt \reg1, [r0] )
THUMB( add \reg1, $4 )
9998:
ARM( ldrt \reg2, [r0], $4 )
THUMB( ldrt \reg2, [r0] )
THUMB( add \reg2, $4 )
9997:
ARM( ldrt \reg3, [r0], $4 )
THUMB( ldrt \reg3, [r0] )
THUMB( add \reg3, $4 )
9996:
ARM( ldrt \reg4, [r0], $4 )
THUMB( ldrt \reg4, [r0] )
THUMB( add \reg4, $4 )
.section __ex_table, "a"
.long 9999b, 6001f
.long 9998b, 6001f
......@@ -100,6 +134,7 @@
add r2, r2, r1
mov r0, #0 @ zero the buffer
6002: teq r2, r1
it ne
strneb r0, [r1], #1
bne 6002b
load_regs
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/param.h>
......@@ -21,9 +23,11 @@
* HZ <= 1000
*/
.type __udelay, %function
ENTRY(__udelay)
ldr r2, .LC1
mul r0, r2, r0
.type __const_udelay, %function
ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06
ldr r2, .LC0
ldr r2, [r2] @ max = 0x01ffffff
......@@ -31,6 +35,7 @@ ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06
mov r2, r2, lsr #10 @ max = 0x00007fff
mul r0, r2, r0 @ max = 2^32-1
movs r0, r0, lsr #6
it eq
moveq pc, lr
/*
......@@ -40,6 +45,7 @@ ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06
*/
@ Delay routine
.type __delay, %function
ENTRY(__delay)
subs r0, r0, #1
#if 0
......@@ -58,5 +64,6 @@ ENTRY(__delay)
movls pc, lr
subs r0, r0, #1
#endif
it hi
bhi __delay
mov pc, lr
......@@ -11,6 +11,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
......@@ -43,6 +44,7 @@
* Clobbered regs: xl, ip
*/
.type __do_div64, %function
ENTRY(__do_div64)
@ Test for easy paths first.
......@@ -84,8 +86,10 @@ ENTRY(__do_div64)
@ The division loop for needed upper bit positions.
@ Break out early if dividend reaches 0.
2: cmp xh, yl
itt cs
orrcs yh, yh, ip
subcss xh, xh, yl
it ne
movnes ip, ip, lsr #1
mov yl, yl, lsr #1
bne 2b
......@@ -93,7 +97,9 @@ ENTRY(__do_div64)
@ See if we need to handle lower 32-bit result.
3: cmp xh, #0
mov yl, #0
it eq
cmpeq xl, r4
itt lo
movlo xh, xl
movlo pc, lr
......@@ -104,7 +110,9 @@ ENTRY(__do_div64)
4: movs xl, xl, lsl #1
adcs xh, xh, xh
beq 6f
it cc
cmpcc xh, r4
itt cs
5: orrcs yl, yl, ip
subcs xh, xh, r4
movs ip, ip, lsr #1
......@@ -116,6 +124,7 @@ ENTRY(__do_div64)
@ Otherwise, if lower part is also null then we are done.
6: bcs 5b
cmp xl, #0
it eq
moveq pc, lr
@ We still have remainer bits in the low part. Bring them up.
......@@ -177,13 +186,16 @@ ENTRY(__do_div64)
mov yh, xh, lsr ip
mov yl, xl, lsr ip
rsb ip, ip, #32
orr yl, yl, xh, lsl ip
ARM( orr yl, yl, xh, lsl ip )
THUMB( lsl xh, xh, ip )
THUMB( orr yl, yl, xh )
mov xh, xl, lsl ip
mov xh, xh, lsr ip
mov pc, lr
@ eq -> division by 1: obvious enough...
9: moveq yl, xl
9: itttt eq
moveq yl, xl
moveq yh, xh
moveq xh, #0
moveq pc, lr
......
......@@ -13,6 +13,8 @@
* also call with zero size.
* Reworked by rmk.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
......@@ -21,11 +23,15 @@
* Purpose : Find a 'zero' bit
* Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
*/
.type _find_first_zero_bit_le, %function
ENTRY(_find_first_zero_bit_le)
teq r1, #0
beq 3f
mov r2, #0
1: ldrb r3, [r0, r2, lsr #3]
1:
ARM( ldrb r3, [r0, r2, lsr #3] )
THUMB( lsr r3, r2, #3 )
THUMB( ldrb r3, [r0, r3] )
eors r3, r3, #0xff @ invert bits
bne .L_found @ any now set - found zero bit
add r2, r2, #8 @ next bit pointer
......@@ -38,12 +44,15 @@ ENTRY(_find_first_zero_bit_le)
* Purpose : Find next 'zero' bit
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
*/
.type _find_next_zero_bit_le, %function
ENTRY(_find_next_zero_bit_le)
teq r1, #0
beq 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
ldrb r3, [r0, r2, lsr #3]
ARM( ldrb r3, [r0, r2, lsr #3] )
THUMB( lsr r3, r2, #3 )
THUMB( ldrb r3, [r0, r3] )
eor r3, r3, #0xff @ now looking for a 1 bit
movs r3, r3, lsr ip @ shift off unused bits
bne .L_found
......@@ -55,11 +64,15 @@ ENTRY(_find_next_zero_bit_le)
* Purpose : Find a 'one' bit
* Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit);
*/
.type _find_first_bit_le, %function
ENTRY(_find_first_bit_le)
teq r1, #0
beq 3f
mov r2, #0
1: ldrb r3, [r0, r2, lsr #3]
1:
ARM( ldrb r3, [r0, r2, lsr #3] )
THUMB( lsr r3, r2, #3 )
THUMB( ldrb r3, [r0, r3] )
movs r3, r3
bne .L_found @ any now set - found zero bit
add r2, r2, #8 @ next bit pointer
......@@ -72,12 +85,15 @@ ENTRY(_find_first_bit_le)
* Purpose : Find next 'one' bit
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
*/
.type _find_next_bit_le, %function
ENTRY(_find_next_bit_le)
teq r1, #0
beq 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
ldrb r3, [r0, r2, lsr #3]
ARM( ldrb r3, [r0, r2, lsr #3] )
THUMB( lsr r3, r2, #3 )
THUMB( ldrb r3, [r0, r3] )
movs r3, r3, lsr ip @ shift off unused bits
bne .L_found
orr r2, r2, #7 @ if zero, then no bits here
......@@ -86,12 +102,15 @@ ENTRY(_find_next_bit_le)
#ifdef __ARMEB__
.type _find_first_zero_bit_be, %function
ENTRY(_find_first_zero_bit_be)
teq r1, #0
beq 3f
mov r2, #0
1: eor r3, r2, #0x18 @ big endian byte ordering
ldrb r3, [r0, r3, lsr #3]
ARM( ldrb r3, [r0, r3, lsr #3] )
THUMB( lsr r3, #3 )
THUMB( ldrb r3, [r0, r3] )
eors r3, r3, #0xff @ invert bits
bne .L_found @ any now set - found zero bit
add r2, r2, #8 @ next bit pointer
......@@ -100,13 +119,16 @@ ENTRY(_find_first_zero_bit_be)
3: mov r0, r1 @ no free bits
mov pc, lr
.type _find_next_zero_bit_be, %function
ENTRY(_find_next_zero_bit_be)
teq r1, #0
beq 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
ldrb r3, [r0, r3, lsr #3]
ARM( ldrb r3, [r0, r3, lsr #3] )
THUMB( lsr r3, #3 )
THUMB( ldrb r3, [r0, r3] )
eor r3, r3, #0xff @ now looking for a 1 bit
movs r3, r3, lsr ip @ shift off unused bits
bne .L_found
......@@ -114,12 +136,15 @@ ENTRY(_find_next_zero_bit_be)
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
.type _find_first_bit_be, %function
ENTRY(_find_first_bit_be)
teq r1, #0
beq 3f
mov r2, #0
1: eor r3, r2, #0x18 @ big endian byte ordering
ldrb r3, [r0, r3, lsr #3]
ARM( ldrb r3, [r0, r3, lsr #3] )
THUMB( lsr r3, #3 )
THUMB( ldrb r3, [r0, r3] )
movs r3, r3
bne .L_found @ any now set - found zero bit
add r2, r2, #8 @ next bit pointer
......@@ -128,13 +153,16 @@ ENTRY(_find_first_bit_be)
3: mov r0, r1 @ no free bits
mov pc, lr
.type _find_next_bit_be, %function
ENTRY(_find_next_bit_be)
teq r1, #0
beq 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
ldrb r3, [r0, r3, lsr #3]
ARM( ldrb r3, [r0, r3, lsr #3] )
THUMB( lsr r3, #3 )
THUMB( ldrb r3, [r0, r3] )
movs r3, r3, lsr ip @ shift off unused bits
bne .L_found
orr r2, r2, #7 @ if zero, then no bits here
......
......@@ -26,19 +26,26 @@
* Note that ADDR_LIMIT is either 0 or 0xc0000000.
* Note also that it is intended that __get_user_bad is not global.
*/
#include <asm/unified.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/errno.h>
.global __get_user_1
.type __get_user_1, %function
__get_user_1:
1: ldrbt r2, [r0]
mov r0, #0
mov pc, lr
.global __get_user_2
.type __get_user_2, %function
__get_user_2:
2: ldrbt r2, [r0], #1
2:
ARM( ldrbt r2, [r0], #1 )
THUMB( ldrbt r2, [r0] )
THUMB( add r0, #1 )
3: ldrbt r3, [r0]
#ifndef __ARMEB__
orr r2, r2, r3, lsl #8
......@@ -49,11 +56,13 @@ __get_user_2:
mov pc, lr
.global __get_user_4
.type __get_user_4, %function
__get_user_4:
4: ldrt r2, [r0]
mov r0, #0
mov pc, lr
.type __get_user_bad, %function
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
......
......@@ -7,24 +7,31 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.Linsb_align: rsb ip, ip, #4
cmp ip, r2
it gt
movgt ip, r2
cmp ip, #2
ldrb r3, [r0]
strb r3, [r1], #1
itt ge
ldrgeb r3, [r0]
strgeb r3, [r1], #1
itt gt
ldrgtb r3, [r0]
strgtb r3, [r1], #1
subs r2, r2, ip
bne .Linsb_aligned
.type __raw_readsb, %function
ENTRY(__raw_readsb)
teq r2, #0 @ do we have to check for the zero len?
it eq
moveq pc, lr
ands ip, r1, #3
bne .Linsb_align
......@@ -72,6 +79,7 @@ ENTRY(__raw_readsb)
bpl .Linsb_16_lp
tst r2, #15
it eq
ldmeqfd sp!, {r4 - r6, pc}
.Linsb_no_16: tst r2, #8
......@@ -109,13 +117,16 @@ ENTRY(__raw_readsb)
str r3, [r1], #4
.Linsb_no_4: ands r2, r2, #3
it eq
ldmeqfd sp!, {r4 - r6, pc}
cmp r2, #2
ldrb r3, [r0]
strb r3, [r1], #1
itt ge
ldrgeb r3, [r0]
strgeb r3, [r1], #1
itt gt
ldrgtb r3, [r0]
strgtb r3, [r1]
......
......@@ -7,11 +7,15 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.type __raw_readsl, %function
ENTRY(__raw_readsl)
teq r2, #0 @ do we have to check for the zero len?
it eq
moveq pc, lr
ands ip, r1, #3
bne 3f
......@@ -28,9 +32,11 @@ ENTRY(__raw_readsl)
bpl 1b
ldmfd sp!, {r4, lr}
2: movs r2, r2, lsl #31
ittt cs
ldrcs r3, [r0, #0]
ldrcs ip, [r0, #0]
stmcsia r1!, {r3, ip}
itt ne
ldrne r3, [r0, #0]
strne r3, [r1, #0]
mov pc, lr
......@@ -48,6 +54,7 @@ ENTRY(__raw_readsl)
4: subs r2, r2, #1
mov ip, r3, pull #24
itttt ne
ldrne r3, [r0]
orrne ip, ip, r3, push #8
strne ip, [r1], #4
......@@ -56,6 +63,7 @@ ENTRY(__raw_readsl)
5: subs r2, r2, #1
mov ip, r3, pull #16
itttt ne
ldrne r3, [r0]
orrne ip, ip, r3, push #16
strne ip, [r1], #4
......@@ -64,6 +72,7 @@ ENTRY(__raw_readsl)
6: subs r2, r2, #1
mov ip, r3, pull #8
itttt ne
ldrne r3, [r0]
orrne ip, ip, r3, push #24
strne ip, [r1], #4
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -24,8 +26,10 @@
sub r2, r2, #1
strh ip, [r1], #2
.type __raw_readsw, %function
ENTRY(__raw_readsw)
teq r2, #0
it eq
moveq pc, lr
tst r1, #3
bne .Linsw_align
......@@ -76,7 +80,8 @@ ENTRY(__raw_readsw)
pack r3, r3, ip
str r3, [r1], #4
.Lno_insw_2: ldrneh r3, [r0]
.Lno_insw_2: itt ne
ldrneh r3, [r0]
strneh r3, [r1]
ldmfd sp!, {r4, r5, pc}
......@@ -94,6 +99,7 @@ ENTRY(__raw_readsw)
#endif
.Linsw_noalign: stmfd sp!, {r4, lr}
it cc
ldrccb ip, [r1, #-1]!
bcc 1f
......@@ -121,6 +127,7 @@ ENTRY(__raw_readsw)
3: tst r2, #1
strb ip, [r1], #1
itttt ne
ldrneh ip, [r0]
_BE_ONLY_( movne ip, ip, ror #8 )
strneb ip, [r1], #1
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -32,19 +34,24 @@
.Loutsb_align: rsb ip, ip, #4
cmp ip, r2
it gt
movgt ip, r2
cmp ip, #2
ldrb r3, [r1], #1
strb r3, [r0]
itt ge
ldrgeb r3, [r1], #1
strgeb r3, [r0]
itt gt
ldrgtb r3, [r1], #1
strgtb r3, [r0]
subs r2, r2, ip
bne .Loutsb_aligned
.type __raw_writesb, %function
ENTRY(__raw_writesb)
teq r2, #0 @ do we have to check for the zero len?
it eq
moveq pc, lr
ands ip, r1, #3
bne .Loutsb_align
......@@ -64,6 +71,7 @@ ENTRY(__raw_writesb)
bpl .Loutsb_16_lp
tst r2, #15
it eq
ldmeqfd sp!, {r4, r5, pc}
.Loutsb_no_16: tst r2, #8
......@@ -80,13 +88,16 @@ ENTRY(__raw_writesb)
outword r3
.Loutsb_no_4: ands r2, r2, #3
it eq
ldmeqfd sp!, {r4, r5, pc}
cmp r2, #2
ldrb r3, [r1], #1
strb r3, [r0]
itt ge
ldrgeb r3, [r1], #1
strgeb r3, [r0]
itt gt
ldrgtb r3, [r1]
strgtb r3, [r0]
......
......@@ -7,11 +7,15 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.type __raw_writesl, %function
ENTRY(__raw_writesl)
teq r2, #0 @ do we have to check for the zero len?
it eq
moveq pc, lr
ands ip, r1, #3
bne 3f
......@@ -28,10 +32,14 @@ ENTRY(__raw_writesl)
bpl 1b
ldmfd sp!, {r4, lr}
2: movs r2, r2, lsl #31
itt cs
ldmcsia r1!, {r3, ip}
strcs r3, [r0, #0]
it ne
ldrne r3, [r1, #0]
it cs
strcs ip, [r0, #0]
it ne
strne r3, [r0, #0]
mov pc, lr
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -29,8 +31,10 @@
sub r2, r2, #1
strh r3, [r0]
.type __raw_writesw, %function
ENTRY(__raw_writesw)
teq r2, #0
it eq
moveq pc, lr
ands r3, r1, #3
bne .Loutsw_align
......@@ -61,7 +65,8 @@ ENTRY(__raw_writesw)
ldr r3, [r1], #4
outword r3
.Lno_outsw_2: ldrneh r3, [r1]
.Lno_outsw_2: itt ne
ldrneh r3, [r1]
strneh r3, [r0]
ldmfd sp!, {r4, r5, pc}
......@@ -75,7 +80,11 @@ ENTRY(__raw_writesw)
#endif
.Loutsw_noalign:
ldr r3, [r1, -r3]!
ARM( ldr r3, [r1, -r3]! )
THUMB( rsb r3, r3, #0 )
THUMB( ldr r3, [r1, r3] )
THUMB( sub r1, r3 )
it cs
subcs r2, r2, #1
bcs 2f
subs r2, r2, #2
......@@ -91,6 +100,7 @@ ENTRY(__raw_writesw)
bpl 1b
tst r2, #1
3: movne ip, r3, lsr #8
3: itt ne
movne ip, r3, lsr #8
strneh ip, [r0]
mov pc, lr
......@@ -31,6 +31,7 @@ You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <asm/unified.h>
#include <linux/linkage.h>
......@@ -56,6 +57,7 @@ Boston, MA 02111-1307, USA. */
@ at the left end of each 4 bit nibbles in the division loop
@ to save one loop in most cases.
tst \divisor, #0xe0000000
itte eq
moveq \divisor, \divisor, lsl #3
moveq \curbit, #8
movne \curbit, #1
......@@ -65,6 +67,7 @@ Boston, MA 02111-1307, USA. */
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1: cmp \divisor, #0x10000000
ittt lo
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #4
movlo \curbit, \curbit, lsl #4
......@@ -73,6 +76,7 @@ Boston, MA 02111-1307, USA. */
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1: cmp \divisor, #0x80000000
ittt lo
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #1
movlo \curbit, \curbit, lsl #1
......@@ -84,19 +88,25 @@ Boston, MA 02111-1307, USA. */
@ Division loop
1: cmp \dividend, \divisor
itt hs
subhs \dividend, \dividend, \divisor
orrhs \result, \result, \curbit
cmp \dividend, \divisor, lsr #1
itt hs
subhs \dividend, \dividend, \divisor, lsr #1
orrhs \result, \result, \curbit, lsr #1
cmp \dividend, \divisor, lsr #2
itt hs
subhs \dividend, \dividend, \divisor, lsr #2
orrhs \result, \result, \curbit, lsr #2
cmp \dividend, \divisor, lsr #3
itt hs
subhs \dividend, \dividend, \divisor, lsr #3
orrhs \result, \result, \curbit, lsr #3
cmp \dividend, #0 @ Early termination?
it ne
movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
it ne
movne \divisor, \divisor, lsr #4
bne 1b
......@@ -113,19 +123,24 @@ Boston, MA 02111-1307, USA. */
#else
cmp \divisor, #(1 << 16)
itt hs
movhs \divisor, \divisor, lsr #16
movhs \order, #16
it lo
movlo \order, #0
cmp \divisor, #(1 << 8)
itt hs
movhs \divisor, \divisor, lsr #8
addhs \order, \order, #8
cmp \divisor, #(1 << 4)
itt hs
movhs \divisor, \divisor, lsr #4
addhs \order, \order, #4
cmp \divisor, #(1 << 2)
ite hi
addhi \order, \order, #3
addls \order, \order, \divisor, lsr #1
......@@ -152,6 +167,7 @@ Boston, MA 02111-1307, USA. */
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1: cmp \divisor, #0x10000000
ittt lo
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #4
addlo \order, \order, #4
......@@ -160,6 +176,7 @@ Boston, MA 02111-1307, USA. */
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1: cmp \divisor, #0x80000000
ittt lo
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #1
addlo \order, \order, #1
......@@ -173,19 +190,25 @@ Boston, MA 02111-1307, USA. */
blt 2f
1: cmp \dividend, \divisor
it hs
subhs \dividend, \dividend, \divisor
cmp \dividend, \divisor, lsr #1
it hs
subhs \dividend, \dividend, \divisor, lsr #1
cmp \dividend, \divisor, lsr #2
it hs
subhs \dividend, \dividend, \divisor, lsr #2
cmp \dividend, \divisor, lsr #3
it hs
subhs \dividend, \dividend, \divisor, lsr #3
cmp \dividend, #1
mov \divisor, \divisor, lsr #4
it ge
subges \order, \order, #4
bge 1b
tst \order, #3
it ne
teqne \dividend, #0
beq 5f
......@@ -194,21 +217,27 @@ Boston, MA 02111-1307, USA. */
blt 4f
beq 3f
cmp \dividend, \divisor
it hs
subhs \dividend, \dividend, \divisor
mov \divisor, \divisor, lsr #1
3: cmp \dividend, \divisor
it hs
subhs \dividend, \dividend, \divisor
mov \divisor, \divisor, lsr #1
4: cmp \dividend, \divisor
it hs
subhs \dividend, \dividend, \divisor
5:
.endm
.type __udivsi3, %function
ENTRY(__udivsi3)
.type __aeabi_uidiv, %function
ENTRY(__aeabi_uidiv)
subs r2, r1, #1
it eq
moveq pc, lr
bcc Ldiv0
cmp r0, r1
......@@ -221,7 +250,8 @@ ENTRY(__aeabi_uidiv)
mov r0, r2
mov pc, lr
11: moveq r0, #1
11: ite eq
moveq r0, #1
movne r0, #0
mov pc, lr
......@@ -231,14 +261,19 @@ ENTRY(__aeabi_uidiv)
mov pc, lr
.type __umodsi3, %function
ENTRY(__umodsi3)
subs r2, r1, #1 @ compare divisor with 1
bcc Ldiv0
ite ne
cmpne r0, r1 @ compare dividend with divisor
moveq r0, #0
it hi
tsthi r1, r2 @ see if divisor is power of 2
it eq
andeq r0, r0, r2
it ls
movls pc, lr
ARM_MOD_BODY r0, r1, r2, r3
......@@ -246,16 +281,20 @@ ENTRY(__umodsi3)
mov pc, lr
.type __divsi3, %function
ENTRY(__divsi3)
.type __aeabi_idiv, %function
ENTRY(__aeabi_idiv)
cmp r1, #0
eor ip, r0, r1 @ save the sign of the result.
beq Ldiv0
it mi
rsbmi r1, r1, #0 @ loops below use unsigned.
subs r2, r1, #1 @ division by 1 or -1 ?
beq 10f
movs r3, r0
it mi
rsbmi r3, r0, #0 @ positive dividend value
cmp r3, r1
bls 11f
......@@ -265,14 +304,18 @@ ENTRY(__aeabi_idiv)
ARM_DIV_BODY r3, r1, r0, r2
cmp ip, #0
it mi
rsbmi r0, r0, #0
mov pc, lr
10: teq ip, r0 @ same sign ?
it mi
rsbmi r0, r0, #0
mov pc, lr
11: movlo r0, #0
11: it lo
movlo r0, #0
itt eq
moveq r0, ip, asr #31
orreq r0, r0, #1
mov pc, lr
......@@ -281,32 +324,41 @@ ENTRY(__aeabi_idiv)
cmp ip, #0
mov r0, r3, lsr r2
it mi
rsbmi r0, r0, #0
mov pc, lr
.type __modsi3, %function
ENTRY(__modsi3)
cmp r1, #0
beq Ldiv0
it mi
rsbmi r1, r1, #0 @ loops below use unsigned.
movs ip, r0 @ preserve sign of dividend
it mi
rsbmi r0, r0, #0 @ if negative make positive
subs r2, r1, #1 @ compare divisor with 1
ite ne
cmpne r0, r1 @ compare dividend with divisor
moveq r0, #0
it hi
tsthi r1, r2 @ see if divisor is power of 2
it eq
andeq r0, r0, r2
bls 10f
ARM_MOD_BODY r0, r1, r2, r3
10: cmp ip, #0
it mi
rsbmi r0, r0, #0
mov pc, lr
#ifdef CONFIG_AEABI
.type __aeabi_uidivmod, %function
ENTRY(__aeabi_uidivmod)
stmfd sp!, {r0, r1, ip, lr}
......@@ -316,6 +368,7 @@ ENTRY(__aeabi_uidivmod)
sub r1, r1, r3
mov pc, lr
.type __aeabi_idivmod, %function
ENTRY(__aeabi_idivmod)
stmfd sp!, {r0, r1, ip, lr}
......
......@@ -24,6 +24,7 @@ You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
#include <asm/unified.h>
#include <linux/linkage.h>
......@@ -36,14 +37,19 @@ Boston, MA 02110-1301, USA. */
#define ah r1
#endif
.type __lshrdi3, %function
ENTRY(__lshrdi3)
.type __aeabi_llsr, %function
ENTRY(__aeabi_llsr)
subs r3, r2, #32
rsb ip, r2, #32
itett mi
movmi al, al, lsr r2
movpl al, ah, lsr r3
orrmi al, al, ah, lsl ip
ARM( orrmi al, al, ah, lsl ip )
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, lsr r2
mov pc, lr
......@@ -9,11 +9,14 @@
*
* ASM optimised string functions
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.align 5
.type memchr, %function
ENTRY(memchr)
1: subs r2, r2, #1
bmi 2f
......@@ -21,5 +24,6 @@ ENTRY(memchr)
teq r3, r1
bne 1b
sub r0, r0, #1
2: movne r0, #0
2: it ne
movne r0, #0
mov pc, lr
......@@ -9,12 +9,16 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#define LDR1W_SHIFT 0
#define STR1W_SHIFT 0
.macro ldr1w ptr reg abort
ldr \reg, [\ptr], #4
W(ldr) \reg, [\ptr], #4
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
......@@ -26,11 +30,16 @@
.endm
.macro ldr1b ptr reg cond=al abort
.ifnc \cond,al
it \cond
ldr\cond\()b \reg, [\ptr], #1
.else
ldrb \reg, [\ptr], #1
.endif
.endm
.macro str1w ptr reg abort
str \reg, [\ptr], #4
W(str) \reg, [\ptr], #4
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
......@@ -38,7 +47,12 @@
.endm
.macro str1b ptr reg cond=al abort
.ifnc \cond,al
it \cond
str\cond\()b \reg, [\ptr], #1
.else
strb \reg, [\ptr], #1
.endif
.endm
.macro enter reg1 reg2
......@@ -53,6 +67,7 @@
/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
.type memcpy, %function
ENTRY(memcpy)
#include "copy_template.S"
......
......@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -34,10 +35,13 @@
* occurring in the opposite direction.
*/
.type memmove, %function
ENTRY(memmove)
subs ip, r0, r1
it hi
cmphi r2, ip
it ls
bls memcpy
stmfd sp!, {r0, r4, lr}
......@@ -79,46 +83,55 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
it ne
addne pc, pc, ip @ C is always clear here
b 7f
6: nop
ldr r3, [r1, #-4]!
ldr r4, [r1, #-4]!
ldr r5, [r1, #-4]!
ldr r6, [r1, #-4]!
ldr r7, [r1, #-4]!
ldr r8, [r1, #-4]!
ldr lr, [r1, #-4]!
W(ldr) r3, [r1, #-4]!
W(ldr) r4, [r1, #-4]!
W(ldr) r5, [r1, #-4]!
W(ldr) r6, [r1, #-4]!
W(ldr) r7, [r1, #-4]!
W(ldr) r8, [r1, #-4]!
W(ldr) lr, [r1, #-4]!
add pc, pc, ip
nop
nop
str r3, [r0, #-4]!
str r4, [r0, #-4]!
str r5, [r0, #-4]!
str r6, [r0, #-4]!
str r7, [r0, #-4]!
str r8, [r0, #-4]!
str lr, [r0, #-4]!
W(str) r3, [r0, #-4]!
W(str) r4, [r0, #-4]!
W(str) r5, [r0, #-4]!
W(str) r6, [r0, #-4]!
W(str) r7, [r0, #-4]!
W(str) r8, [r0, #-4]!
W(str) lr, [r0, #-4]!
CALGN( bcs 2b )
7: ldmfd sp!, {r5 - r8}
8: movs r2, r2, lsl #31
it ne
ldrneb r3, [r1, #-1]!
itt cs
ldrcsb r4, [r1, #-1]!
ldrcsb ip, [r1, #-1]
it ne
strneb r3, [r0, #-1]!
itt cs
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
ldmfd sp!, {r0, r4, pc}
9: cmp ip, #2
it gt
ldrgtb r3, [r1, #-1]!
it ge
ldrgeb r4, [r1, #-1]!
ldrb lr, [r1, #-1]!
it gt
strgtb r3, [r0, #-1]!
it ge
strgeb r4, [r0, #-1]!
subs r2, r2, ip
strb lr, [r0, #-1]!
......
......@@ -9,6 +9,8 @@
*
* ASM optimised string functions
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -19,7 +21,9 @@
1: subs r2, r2, #4 @ 1 do we have enough
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
it lt
strltb r1, [r0], #1 @ 1
it le
strleb r1, [r0], #1 @ 1
strb r1, [r0], #1 @ 1
add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
......@@ -28,6 +32,7 @@
* memzero again.
*/
.type memset, %function
ENTRY(memset)
ands r3, r0, #3 @ 1 unaligned?
bne 1b @ 1
......@@ -48,33 +53,41 @@ ENTRY(memset)
mov lr, r1
2: subs r2, r2, #64
itttt ge
stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
bgt 2b
it eq
ldmeqfd sp!, {pc} @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r2, #32
itt ne
stmneia r0!, {r1, r3, ip, lr}
stmneia r0!, {r1, r3, ip, lr}
tst r2, #16
it ne
stmneia r0!, {r1, r3, ip, lr}
ldr lr, [sp], #4
4: tst r2, #8
it ne
stmneia r0!, {r1, r3}
tst r2, #4
it ne
strne r1, [r0], #4
/*
* When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
itt ne
strneb r1, [r0], #1
strneb r1, [r0], #1
tst r2, #1
it ne
strneb r1, [r0], #1
mov pc, lr
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
......@@ -21,7 +23,9 @@
1: subs r1, r1, #4 @ 1 do we have enough
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
it lt
strltb r2, [r0], #1 @ 1
it le
strleb r2, [r0], #1 @ 1
strb r2, [r0], #1 @ 1
add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3))
......@@ -30,6 +34,7 @@
* memzero again.
*/
.type __memzero, %function
ENTRY(__memzero)
mov r2, #0 @ 1
ands r3, r0, #3 @ 1 unaligned?
......@@ -48,33 +53,41 @@ ENTRY(__memzero)
mov lr, r2 @ 1
3: subs r1, r1, #64 @ 1 write 32 bytes out per loop
itttt ge
stmgeia r0!, {r2, r3, ip, lr} @ 4
stmgeia r0!, {r2, r3, ip, lr} @ 4
stmgeia r0!, {r2, r3, ip, lr} @ 4
stmgeia r0!, {r2, r3, ip, lr} @ 4
bgt 3b @ 1
it eq
ldmeqfd sp!, {pc} @ 1/2 quick exit
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r1, #32 @ 1
itt ne
stmneia r0!, {r2, r3, ip, lr} @ 4
stmneia r0!, {r2, r3, ip, lr} @ 4
tst r1, #16 @ 1 16 bytes or more?
it ne
stmneia r0!, {r2, r3, ip, lr} @ 4
ldr lr, [sp], #4 @ 1
4: tst r1, #8 @ 1 8 bytes or more?
it ne
stmneia r0!, {r2, r3} @ 2
tst r1, #4 @ 1 4 bytes or more?
it ne
strne r2, [r0], #4 @ 1
/*
* When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
5: tst r1, #2 @ 1 2 bytes or more?
itt ne
strneb r2, [r0], #1 @ 1
strneb r2, [r0], #1 @ 1
tst r1, #1 @ 1 a byte left over
it ne
strneb r2, [r0], #1 @ 1
mov pc, lr @ 1
......@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
......@@ -24,7 +25,9 @@
#define yh r3
#endif
.type __muldi3, %function
ENTRY(__muldi3)
.type __aeabi_lmul, %function
ENTRY(__aeabi_lmul)
mul xh, yl, xh
......
......@@ -26,42 +26,58 @@
* Note that ADDR_LIMIT is either 0 or 0xc0000000
* Note also that it is intended that __put_user_bad is not global.
*/
#include <asm/unified.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/errno.h>
.global __put_user_1
.type __put_user_1, %function
__put_user_1:
1: strbt r2, [r0]
mov r0, #0
mov pc, lr
.global __put_user_2
.type __put_user_2, %function
__put_user_2:
mov ip, r2, lsr #8
#ifndef __ARMEB__
2: strbt r2, [r0], #1
2:
ARM( strbt r2, [r0], #1 )
THUMB( strbt r2, [r0] )
THUMB( add r0, #1 )
3: strbt ip, [r0]
#else
2: strbt ip, [r0], #1
2:
ARM( strbt ip, [r0], #1 )
THUMB( strbt ip, [r0] )
THUMB( add r0, #1 )
3: strbt r2, [r0]
#endif
mov r0, #0
mov pc, lr
.global __put_user_4
.type __put_user_4, %function
__put_user_4:
4: strt r2, [r0]
mov r0, #0
mov pc, lr
.global __put_user_8
.type __put_user_8, %function
__put_user_8:
5: strt r2, [r0], #4
5:
ARM( strt r2, [r0], #4 )
THUMB( strt r2, [r0] )
THUMB( add r0, #4 )
6: strt r3, [r0]
mov r0, #0
mov pc, lr
.type __put_user_bad, %function
__put_user_bad:
mov r0, #-EFAULT
mov pc, lr
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "bitops.h"
......@@ -16,7 +18,9 @@
* Purpose : Function to set a bit
* Prototype: int set_bit(int bit, void *addr)
*/
.type _set_bit_be, %function
ENTRY(_set_bit_be)
eor r0, r0, #0x18 @ big endian byte ordering
.type _set_bit_le, %function
ENTRY(_set_bit_le)
bitop orr
......@@ -12,6 +12,7 @@
*
* The reference implementation for this code is linux/lib/sha1.c
*/
#include <asm/unified.h>
#include <linux/linkage.h>
......@@ -24,6 +25,7 @@
* Note: the "in" ptr may be unaligned.
*/
.type sha_transform, %function
ENTRY(sha_transform)
stmfd sp!, {r4 - r8, lr}
......@@ -185,6 +187,7 @@ ENTRY(sha_transform)
ldmfd sp!, {r4 - r8, pc}
.align 2
.L_sha_K:
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
......@@ -193,9 +196,11 @@ ENTRY(sha_transform)
* void sha_init(__u32 *buf)
*/
.align 2
.L_sha_initial_digest:
.word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
.type sha_init, %function
ENTRY(sha_init)
str lr, [sp, #-4]!
......
......@@ -9,18 +9,23 @@
*
* ASM optimised string functions
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.align 5
.type strchr, %function
ENTRY(strchr)
and r1, r1, #0xff
1: ldrb r2, [r0], #1
teq r2, r1
it ne
teqne r2, #0
bne 1b
teq r2, r1
ite ne
movne r0, #0
subeq r0, r0, #1
mov pc, lr
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/errno.h>
......@@ -20,10 +22,14 @@
* returns the number of characters copied (strlen of copied string),
* -EFAULT on exception, or "len" if we fill the whole buffer
*/
.type __strncpy_from_user, %function
ENTRY(__strncpy_from_user)
mov ip, r1
1: subs r2, r2, #1
USER( ldrplbt r3, [r1], #1)
ARM(USER( ldrplbt r3, [r1], #1 ))
itt pl
THUMB(USER( ldrplbt r3, [r1] ))
THUMB( addpl r1, #1 )
bmi 2f
strb r3, [r0], #1
teq r3, #0
......
......@@ -7,6 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/errno.h>
......@@ -20,10 +22,13 @@
* Returns : length of string *including terminator*
* or zero on exception, or n + 1 if too long
*/
.type __strnlen_user, %function
ENTRY(__strnlen_user)
mov r2, r0
1:
USER( ldrbt r3, [r0], #1)
ARM(USER( ldrbt r3, [r0], #1 ))
THUMB(USER( ldrbt r3, [r0] ))
THUMB( add r0, #1 )
teq r3, #0
beq 2f
subs r1, r1, #1
......
......@@ -9,15 +9,19 @@
*
* ASM optimised string functions
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.align 5
.type strrchr, %function
ENTRY(strrchr)
mov r3, #0
1: ldrb r2, [r0], #1
teq r2, r1
it eq
subeq r3, r0, #1
teq r2, #0
bne 1b
......
......@@ -7,12 +7,16 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "bitops.h"
.text
.type _test_and_change_bit_be, %function
ENTRY(_test_and_change_bit_be)
eor r0, r0, #0x18 @ big endian byte ordering
.type _test_and_change_bit_le, %function
ENTRY(_test_and_change_bit_le)
testop eor, strb
......@@ -7,12 +7,16 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "bitops.h"
.text
.type _test_and_clear_bit_be, %function
ENTRY(_test_and_clear_bit_be)
eor r0, r0, #0x18 @ big endian byte ordering
.type _test_and_clear_bit_le, %function
ENTRY(_test_and_clear_bit_le)
testop bicne, strneb
testop bicne, strneb, ne
......@@ -7,12 +7,16 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "bitops.h"
.text
.type _test_and_set_bit_be, %function
ENTRY(_test_and_set_bit_be)
eor r0, r0, #0x18 @ big endian byte ordering
.type _test_and_set_bit_le, %function
ENTRY(_test_and_set_bit_le)
testop orreq, streqb
testop orreq, streqb, eq
......@@ -39,6 +39,7 @@ USER( strgtbt r3, [r0], #1) @ May fault
sub r2, r2, ip
b .Lc2u_dest_aligned
.type __copy_to_user, %function
ENTRY(__copy_to_user)
stmfd sp!, {r2, r4 - r7, lr}
cmp r2, #4
......@@ -302,6 +303,7 @@ USER( ldrgtbt r3, [r1], #1) @ May fault
sub r2, r2, ip
b .Lcfu_dest_aligned
.type __copy_from_user, %function
ENTRY(__copy_from_user)
stmfd sp!, {r0, r2, r4 - r7, lr}
cmp r2, #4
......
......@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/unified.h>
#include <linux/linkage.h>
......@@ -24,23 +25,33 @@
#define yh r3
#endif
.type __ucmpdi2, %function
ENTRY(__ucmpdi2)
cmp xh, yh
it eq
cmpeq xl, yl
it lo
movlo r0, #0
it eq
moveq r0, #1
it hi
movhi r0, #2
mov pc, lr
#ifdef CONFIG_AEABI
.type __aeabi_ulcmp, %function
ENTRY(__aeabi_ulcmp)
cmp xh, yh
it eq
cmpeq xl, yl
it lo
movlo r0, #-1
it eq
moveq r0, #0
it hi
movhi r0, #1
mov pc, lr
......
......@@ -73,6 +73,7 @@ ip_fast_csum(const void *iph, unsigned int ihl)
1: adcs %0, %0, %3 \n\
ldr %3, [%1], #4 \n\
tst %2, #15 @ do this carefully \n\
it ne \n\
subne %2, %2, #1 @ without destroying \n\
bne 1b @ the carry flag \n\
adcs %0, %0, %3 \n\
......
......@@ -12,6 +12,7 @@
* User space memory access functions
*/
#include <linux/sched.h>
#include <asm/unified.h>
#include <asm/errno.h>
#include <asm/memory.h>
#include <asm/domain.h>
......@@ -68,7 +69,7 @@ static inline void set_fs(mm_segment_t fs)
#define __addr_ok(addr) ({ \
unsigned long flag; \
__asm__("cmp %2, %0; movlo %0, #0" \
__asm__("cmp %2, %0; it lo; movlo %0, #0" \
: "=&r" (flag) \
: "0" (current_thread_info()->addr_limit), "r" (addr) \
: "cc"); \
......@@ -78,7 +79,7 @@ static inline void set_fs(mm_segment_t fs)
#define __range_ok(addr,size) ({ \
unsigned long flag, roksum; \
__chk_user_ptr(addr); \
__asm__("adds %1, %2, %3; sbcccs %1, %1, %0; movcc %0, #0" \
__asm__("adds %1, %2, %3; it cc; sbcccs %1, %1, %0; it cc; movcc %0, #0" \
: "=&r" (flag), "=&r" (roksum) \
: "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \
: "cc"); \
......@@ -225,7 +226,7 @@ do { \
#define __get_user_asm_byte(x,addr,err) \
__asm__ __volatile__( \
"1: ldrbt %1,[%2],#0\n" \
"1: ldrbt %1,[%2]\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
......@@ -261,7 +262,7 @@ do { \
#define __get_user_asm_word(x,addr,err) \
__asm__ __volatile__( \
"1: ldrt %1,[%2],#0\n" \
"1: ldrt %1,[%2]\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
......@@ -306,7 +307,7 @@ do { \
#define __put_user_asm_byte(x,__pu_addr,err) \
__asm__ __volatile__( \
"1: strbt %1,[%2],#0\n" \
"1: strbt %1,[%2]\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
......@@ -339,7 +340,7 @@ do { \
#define __put_user_asm_word(x,__pu_addr,err) \
__asm__ __volatile__( \
"1: strt %1,[%2],#0\n" \
"1: strt %1,[%2]\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
......@@ -364,8 +365,10 @@ do { \
#define __put_user_asm_dword(x,__pu_addr,err) \
__asm__ __volatile__( \
"1: strt " __reg_oper1 ", [%1], #4\n" \
"2: strt " __reg_oper0 ", [%1], #0\n" \
ARM( "1: strt " __reg_oper1 ", [%1], #4\n" ) \
THUMB( "1: strt " __reg_oper1 ", [%1]\n" ) \
THUMB( " add %1, %1, #4\n" ) \
"2: strt " __reg_oper0 ", [%1]\n" \
"3:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment