Thumb-2: Implement the unified arch/arm/lib functions

This patch adds the ARM/Thumb-2 unified support for the arch/arm/lib/* files. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

Thumb-2: Implement the unified arch/arm/lib functions
This patch adds the ARM/Thumb-2 unified support for the arch/arm/lib/* files. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
ac04c319 · Catalin Marinas · 530ca0d3 · ac04c319 · ac04c319 · ac04c319
Commit ac04c319 authored 17 years ago by Catalin Marinas
47 changed files
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -25,6 +25,7 @@ along with this program; see the file COPYING.  If not, write to
 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
 Boston, MA 02110-1301, USA.  */

+#include <asm/unified.h>

 #include <linux/linkage.h>

@@ -41,9 +42,12 @@ ENTRY(__aeabi_llsl)

 	subs	r3, r2, #32
 	rsb	ip, r2, #32
+	itett	mi
 	movmi	ah, ah, lsl r2
 	movpl	ah, al, lsl r3
-	orrmi	ah, ah, al, lsr ip
+ ARM(	orrmi	ah, ah, al, lsr ip	)
+ THUMB(	lsrmi	r3, al, ip		)
+ THUMB(	orrmi	ah, ah, r3		)
 	mov	al, al, lsl r2
 	mov	pc, lr


--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -25,6 +25,7 @@ along with this program; see the file COPYING.  If not, write to
 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
 Boston, MA 02110-1301, USA.  */

+#include <asm/unified.h>

 #include <linux/linkage.h>

@@ -41,9 +42,12 @@ ENTRY(__aeabi_lasr)

 	subs	r3, r2, #32
 	rsb	ip, r2, #32
+	itett	mi
 	movmi	al, al, lsr r2
 	movpl	al, ah, asr r3
-	orrmi	al, al, ah, lsl ip
+ ARM(	orrmi	al, al, ah, lsl ip	)
+ THUMB(	lslmi	r3, ah, ip		)
+ THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, asr r2
 	mov	pc, lr


--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -10,6 +10,8 @@
 * 27/03/03 Ian Molton Clean up CONFIG_CPU
 *
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 		.text
@@ -38,7 +40,10 @@ ENDPROC(c_backtrace)
 		beq	no_frame		@ we have no stack frames

 		tst	r1, #0x10		@ 26 or 32-bit mode?
-		moveq	mask, #0xfc000003	@ mask for 26-bit
+		itte	eq
+ ARM(		moveq	mask, #0xfc000003	)
+ THUMB(		moveq	mask, #0xfc000000	)
+ THUMB(		orreq	mask, #0x03		)
 		movne	mask, #0		@ mask for 32-bit

 1:		stmfd	sp!, {pc}		@ calculate offset of PC stored
@@ -73,6 +78,7 @@ for_each_frame:	tst	frame, mask		@ Check for address exceptions
 1003:		ldr	r2, [sv_pc, #-4]	@ if stmfd sp!, {args} exists,
 		ldr	r3, .Ldsi+4		@ adjust saved 'pc' back one
 		teq	r3, r2, lsr #10		@ instruction
+		ite	ne
 		subne	r0, sv_pc, #4		@ allow for mov
 		subeq	r0, sv_pc, #8		@ allow for mov + stmia

@@ -84,6 +90,7 @@ for_each_frame:	tst	frame, mask		@ Check for address exceptions
 		ldr	r1, [sv_pc, #-4]	@ if stmfd sp!, {args} exists,
 		ldr	r3, .Ldsi+4
 		teq	r3, r1, lsr #10
+		ittt	eq
 		ldreq	r0, [frame, #-8]	@ get sp
 		subeq	r0, r0, #4		@ point at the last arg
 		bleq	.Ldumpstm		@ dump saved registers
@@ -91,6 +98,7 @@ for_each_frame:	tst	frame, mask		@ Check for address exceptions
 1004:		ldr	r1, [sv_pc, #0]		@ if stmfd sp!, {..., fp, ip, lr, pc}
 		ldr	r3, .Ldsi		@ instruction exists,
 		teq	r3, r1, lsr #10
+		itt	eq
 		subeq	r0, frame, #16
 		bleq	.Ldumpstm		@ dump saved registers

@@ -126,10 +134,13 @@ ENDPROC(c_backtrace)
 		mov	reg, #10
 		mov	r7, #0
 1:		mov	r3, #1
-		tst	instr, r3, lsl reg
+ ARM(		tst	instr, r3, lsl reg	)
+ THUMB(		lsl	r3, reg			)
+ THUMB(		tst	instr, r3		)
 		beq	2f
 		add	r7, r7, #1
 		teq	r7, #6
+		itte	eq
 		moveq	r7, #1
 		moveq	r1, #'\n'
 		movne	r1, #' '
@@ -140,6 +151,7 @@ ENDPROC(c_backtrace)
 2:		subs	reg, reg, #1
 		bpl	1b
 		teq	r7, #0
+		itt	ne
 		adrne	r0, .Lcr
 		blne	printk
 		ldmfd	sp!, {instr, reg, stack, r7, pc}

--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -13,18 +13,22 @@
 	mov	pc, lr
 	.endm

-	.macro	testop, instr, store
+	.macro	testop, instr, store, cond=al
 	and	r3, r0, #7		@ Get bit offset
 	mov	r2, #1
 	add	r1, r1, r0, lsr #3	@ Get byte offset
 	mov	r3, r2, lsl r3		@ create mask
 1:	ldrexb	r2, [r1]
 	ands	r0, r2, r3		@ save old value of bit
-	\instr	r2, r2, r3			@ toggle bit
+	.ifnc	\cond,al
+	it	\cond
+	.endif
+	\instr	r2, r2, r3		@ toggle bit
 	strexb	ip, r2, [r1]
 	cmp	ip, #0
 	bne	1b
 	cmp	r0, #0
+	it	ne
 	movne	r0, #1
 2:	mov	pc, lr
 	.endm
@@ -49,7 +53,7 @@
 * Note: we can trivially conditionalise the store instruction
 * to avoid dirtying the data cache.
 */
-	.macro	testop, instr, store
+	.macro	testop, instr, store, cond=al
 	add	r1, r1, r0, lsr #3
 	and	r3, r0, #7
 	mov	r0, #1

--- a/arch/arm/lib/changebit.S
+++ b/arch/arm/lib/changebit.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include "bitops.h"

--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -26,22 +28,42 @@ ENTRY(__clear_user)
 		ands	ip, r0, #3
 		beq	1f
 		cmp	ip, #2
-USER(		strbt	r2, [r0], #1)
-USER(		strlebt	r2, [r0], #1)
-USER(		strltbt	r2, [r0], #1)
+ARM(USER(	strbt	r2, [r0], #1	))
+THUMB(USER(	strbt	r2, [r0]	))
+THUMB(		add	r0, #1		)
+ARM(USER(	strlebt	r2, [r0], #1	))
+		itt	le
+THUMB(USER(	strlebt	r2, [r0]	))
+THUMB(		addle	r0, #1		)
+ARM(USER(	strltbt	r2, [r0], #1	))
+		itt	lt
+THUMB(USER(	strltbt	r2, [r0]	))
+THUMB(		addlt	r0, #1		)
 		rsb	ip, ip, #4
 		sub	r1, r1, ip		@  7  6  5  4  3  2  1
 1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
-USER(		strplt	r2, [r0], #4)
-USER(		strplt	r2, [r0], #4)
+ARM(USER(	strplt	r2, [r0], #4	))
+ARM(USER(	strplt	r2, [r0], #4	))
+		itttt	pl
+THUMB(USER(	strplt	r2, [r0]	))
+THUMB(USER(	strplt	r2, [r0, #4]	))
+THUMB(		addpl	r0, #8		)
 		bpl	1b
 		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
-USER(		strplt	r2, [r0], #4)
+ARM(USER(	strplt	r2, [r0], #4	))
+		itt	pl
+THUMB(USER(	strplt	r2, [r0]	))
+THUMB(		addpl	r0, #4		)
 2:		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
-USER(		strnebt	r2, [r0], #1)
-USER(		strnebt	r2, [r0], #1)
+ARM(USER(	strnebt	r2, [r0], #1	))
+ARM(USER(	strnebt	r2, [r0], #1	))
+		ittt	ne
+THUMB(USER(	strnebt	r2, [r0]	))
+THUMB(USER(	strnebt	r2, [r0, #1]	))
+THUMB(		addne	r0, #2		)
 		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
-USER(		strnebt	r2, [r0], #1)
+		it	ne
+USER(		strnebt	r2, [r0]	)
 		mov	r0, #0
 		ldmfd	sp!, {r1, pc}
 ENDPROC(__clear_user)

--- a/arch/arm/lib/clearbit.S
+++ b/arch/arm/lib/clearbit.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include "bitops.h"

--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -9,6 +9,7 @@
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */
+#include <asm/unified.h>

 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -33,8 +34,18 @@
 *	Number of bytes NOT copied.
 */

+#ifndef CONFIG_THUMB2_KERNEL
+#define LDR1W_SHIFT	0
+#else
+#define LDR1W_SHIFT	1
+#endif
+#define STR1W_SHIFT	0
+
 	.macro ldr1w ptr reg abort
-100:	ldrt \reg, [\ptr], #4
+100:
+ ARM(	ldrt \reg, [\ptr], #4		)
+ THUMB(	ldrt \reg, [\ptr]		)
+ THUMB(	add.w \ptr, \ptr, #4		)
 	.section __ex_table, "a"
 	.long 100b, \abort
 	.previous
@@ -53,14 +64,20 @@
 	.endm

 	.macro ldr1b ptr reg cond=al abort
-100:	ldr\cond\()bt \reg, [\ptr], #1
+	.ifnc \cond,al
+	itt \cond
+	.endif
+100:
+ ARM(	ldr\cond\()bt \reg, [\ptr], #1	)
+ THUMB(	ldr\cond\()bt \reg, [\ptr]	)
+ THUMB(	add\cond \ptr, \ptr, #1		)
 	.section __ex_table, "a"
 	.long 100b, \abort
 	.previous
 	.endm

 	.macro str1w ptr reg abort
-	str \reg, [\ptr], #4
+	W(str) \reg, [\ptr], #4
 	.endm

 	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
@@ -68,6 +85,9 @@
 	.endm

 	.macro str1b ptr reg cond=al abort
+	.ifnc \cond,al
+	it \cond
+	.endif
 	str\cond\()b \reg, [\ptr], #1
 	.endm


--- a/arch/arm/lib/copy_page.S
+++ b/arch/arm/lib/copy_page.S
@@ -9,6 +9,8 @@
 *
 *  ASM optimised string functions
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
@@ -39,8 +41,10 @@ ENTRY(copy_page)
 		ldmia	r1!, {r3, r4, ip, lr}		@	4
 		subs	r2, r2, #1			@	1
 		stmia	r0!, {r3, r4, ip, lr}		@	4
+		itt	gt
 		ldmgtia	r1!, {r3, r4, ip, lr}		@	4
 		bgt	1b				@	1
+	PLD(	itt	eq			)
 	PLD(	ldmeqia r1!, {r3, r4, ip, lr}	)
 	PLD(	beq	2b			)
 		ldmfd	sp!, {r4, pc}			@	3

--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -65,6 +65,13 @@
 *
 *	Restore registers with the values previously saved with the
 *	'preserv' macro. Called upon code termination.
+ *
+ * LDR1W_SHIFT
+ * STR1W_SHIFT
+ *
+ *	Correction to be applied to the "ip" register when branching into
+ *	the ldr1w or str1w instructions (some of these macros may expand to
+ *	than one 32bit instruction in Thumb-2)
 */


@@ -107,9 +114,16 @@

 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#if LDR1W_SHIFT > 0
+		lsl	ip, ip, #LDR1W_SHIFT
+#endif
+		it	ne
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
-6:		nop
+6:
+		.rept	(1 << LDR1W_SHIFT)
+		W(nop)
+		.endr
 		ldr1w	r1, r3, abort=20f
 		ldr1w	r1, r4, abort=20f
 		ldr1w	r1, r5, abort=20f
@@ -118,9 +132,16 @@
 		ldr1w	r1, r8, abort=20f
 		ldr1w	r1, lr, abort=20f

+#if LDR1W_SHIFT < STR1W_SHIFT
+		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
+#elif LDR1W_SHIFT > STR1W_SHIFT
+		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
+#endif
 		add	pc, pc, ip
 		nop
-		nop
+		.rept	(1 << STR1W_SHIFT)
+		W(nop)
+		.endr
 		str1w	r0, r3, abort=20f
 		str1w	r0, r4, abort=20f
 		str1w	r0, r5, abort=20f

--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -9,6 +9,7 @@
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */
+#include <asm/unified.h>

 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -33,8 +34,15 @@
 *	Number of bytes NOT copied.
 */

+#define LDR1W_SHIFT	0
+#ifndef CONFIG_THUMB2_KERNEL
+#define STR1W_SHIFT	0
+#else
+#define STR1W_SHIFT	1
+#endif
+
 	.macro ldr1w ptr reg abort
-	ldr \reg, [\ptr], #4
+	W(ldr) \reg, [\ptr], #4
 	.endm

 	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
@@ -46,11 +54,17 @@
 	.endm

 	.macro ldr1b ptr reg cond=al abort
+	.ifnc \cond,al
+	it \cond
+	.endif
 	ldr\cond\()b \reg, [\ptr], #1
 	.endm

 	.macro str1w ptr reg abort
-100:	strt \reg, [\ptr], #4
+100:
+ ARM(	strt \reg, [\ptr], #4		)
+ THUMB(	strt \reg, [\ptr]		)
+ THUMB(	add.w \ptr, \ptr, #4		)
 	.section __ex_table, "a"
 	.long 100b, \abort
 	.previous
@@ -68,7 +82,13 @@
 	.endm

 	.macro str1b ptr reg cond=al abort
-100:	str\cond\()bt \reg, [\ptr], #1
+	.ifnc \cond,al
+	itt \cond
+	.endif
+100:
+ ARM(	str\cond\()bt \reg, [\ptr], #1	)
+ THUMB(	str\cond\()bt \reg, [\ptr]	)
+ THUMB(	add\cond \ptr, \ptr, #1		)
 	.section __ex_table, "a"
 	.long 100b, \abort
 	.previous

--- a/arch/arm/lib/csumipv6.S
+++ b/arch/arm/lib/csumipv6.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>


--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -39,6 +41,7 @@ td3	.req	lr

 		/* we must have at least one byte. */
 		tst	buf, #1			@ odd address?
+		itttt	ne
 		movne	sum, sum, ror #8
 		ldrneb	td0, [buf], #1
 		subne	len, len, #1
@@ -68,25 +71,30 @@ td3	.req	lr
 		bne	.Lless8_wordlp

 .Lless8_byte:	tst	len, #1			@ odd number of bytes
+		itt	ne
 		ldrneb	td0, [buf], #1		@ include last byte
 		adcnes	sum, sum, td0, put_byte_0	@ update checksum

 .Ldone:		adc	r0, sum, #0		@ collect up the last carry
 		ldr	td0, [sp], #4
 		tst	td0, #1			@ check buffer alignment
+		it	ne
 		movne	r0, r0, ror #8		@ rotate checksum by 8 bits
 		ldr	pc, [sp], #4		@ return

 .Lnot_aligned:	tst	buf, #1			@ odd address
+		ittt	ne
 		ldrneb	td0, [buf], #1		@ make even
 		subne	len, len, #1
 		adcnes	sum, sum, td0, put_byte_1	@ update checksum

 		tst	buf, #2			@ 32-bit aligned?
 #if __LINUX_ARM_ARCH__ >= 4
+		itt	ne
 		ldrneh	td0, [buf], #2		@ make 32-bit aligned
 		subne	len, len, #2
 #else
+		itttt	ne
 		ldrneb	td0, [buf], #1
 		ldrneb	ip, [buf], #1
 		subne	len, len, #2
@@ -96,6 +104,7 @@ td3	.req	lr
 		orrne	td0, ip, td0, lsl #8
 #endif
 #endif
+		it	ne
 		adcnes	sum, sum, td0		@ update checksum
 		mov	pc, lr

@@ -105,10 +114,12 @@ ENTRY(csum_partial)
 		blo	.Lless8			@ 8 bytes to copy.

 		tst	buf, #1
+		it	ne
 		movne	sum, sum, ror #8

 		adds	sum, sum, #0		@ C = 0
 		tst	buf, #3			@ Test destination alignment
+		it	ne
 		blne	.Lnot_aligned		@ align destination, return here

 1:		bics	ip, len, #31

--- a/arch/arm/lib/csumpartialcopy.S
+++ b/arch/arm/lib/csumpartialcopy.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -18,13 +20,15 @@
 */

 		.macro	save_regs
-		mov	ip, sp
-		stmfd	sp!, {r1, r4 - r8, fp, ip, lr, pc}
-		sub	fp, ip, #4
+ ARM(		mov	ip, sp					)
+ ARM(		stmfd	sp!, {r1, r4 - r8, fp, ip, lr, pc}	)
+ ARM(		sub	fp, ip, #4				)
+ THUMB(		stmfd	sp!, {r1, r4 - r8, lr}			)
 		.endm

 		.macro	load_regs
-		ldmfd	sp, {r1, r4 - r8, fp, sp, pc}
+ ARM(		ldmfd	sp, {r1, r4 - r8, fp, sp, pc}		)
+ THUMB(		ldmfd	sp!, {r1, r4 - r8, pc}			)
 		.endm

 		.macro	load1b, reg1

--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -16,6 +16,8 @@
 *
 * Note that 'tst' and 'teq' preserve the carry flag.
 */
+#include <asm/unified.h>
+

 src	.req	r0
 dst	.req	r1
@@ -40,6 +42,7 @@ sum	.req	r3
 		adcs	sum, sum, ip, put_byte_1	@ update checksum
 		strb	ip, [dst], #1
 		tst	dst, #2
+		it	eq
 		moveq	pc, lr			@ dst is now 32bit aligned

 .Ldst_16bit:	load2b	r8, ip
@@ -94,6 +97,7 @@ FN_ENTRY

 		adds	sum, sum, #0		@ C = 0
 		tst	dst, #3			@ Test destination alignment
+		it	ne
 		blne	.Ldst_unaligned		@ align destination, return here

 		/*
@@ -147,6 +151,7 @@ FN_ENTRY
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 .Lexit:		tst	len, #1
+		ittt	ne
 		strneb	r5, [dst], #1
 		andne	r5, r5, #255
 		adcnes	sum, sum, r5, put_byte_0
@@ -160,6 +165,7 @@ FN_ENTRY
 .Ldone:		adc	r0, sum, #0
 		ldr	sum, [sp, #0]		@ dst
 		tst	sum, #1
+		it	ne
 		movne	r0, r0, ror #8
 		load_regs


--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -10,6 +10,8 @@
 * 27/03/03 Ian Molton Clean up CONFIG_CPU
 *
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/errno.h>
@@ -18,17 +20,22 @@
 		.text

 		.macro	save_regs
-		mov	ip, sp
-		stmfd	sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc}
-		sub	fp, ip, #4
+ ARM(		mov	ip, sp					)
+ ARM(		stmfd	sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc}	)
+ ARM(		sub	fp, ip, #4				)
+ THUMB(		stmfd	sp!, {r1, r2, r4 - r8, lr}		)
 		.endm

 		.macro	load_regs
-		ldmfd	sp, {r1, r2, r4-r8, fp, sp, pc}
+ ARM(		ldmfd	sp, {r1, r2, r4-r8, fp, sp, pc}		)
+ THUMB(		ldmfd	sp!, {r1, r2, r4 - r8, pc}		)
 		.endm

 		.macro	load1b,	reg1
-9999:		ldrbt	\reg1, [r0], $1
+9999:
+ ARM(		ldrbt	\reg1, [r0], $1	)
+ THUMB(		ldrbt	\reg1, [r0]	)
+ THUMB(		add	\reg1, $1	)
 		.section __ex_table, "a"
 		.align	3
 		.long	9999b, 6001f
@@ -36,8 +43,14 @@
 		.endm

 		.macro	load2b, reg1, reg2
-9999:		ldrbt	\reg1, [r0], $1
-9998:		ldrbt	\reg2, [r0], $1
+9999:
+ ARM(		ldrbt	\reg1, [r0], $1	)
+ THUMB(		ldrbt	\reg1, [r0]	)
+ THUMB(		add	\reg1, $1	)
+9998:
+ ARM(		ldrbt	\reg2, [r0], $1	)
+ THUMB(		ldrbt	\reg2, [r0]	)
+ THUMB(		add	\reg2, $1	)
 		.section __ex_table, "a"
 		.long	9999b, 6001f
 		.long	9998b, 6001f
@@ -45,7 +58,10 @@
 		.endm

 		.macro	load1l, reg1
-9999:		ldrt	\reg1, [r0], $4
+9999:
+ ARM(		ldrt	\reg1, [r0], $4	)
+ THUMB(		ldrt	\reg1, [r0]	)
+ THUMB(		add	\reg1, $4	)
 		.section __ex_table, "a"
 		.align	3
 		.long	9999b, 6001f
@@ -53,8 +69,14 @@
 		.endm

 		.macro	load2l, reg1, reg2
-9999:		ldrt	\reg1, [r0], $4
-9998:		ldrt	\reg2, [r0], $4
+9999:
+ ARM(		ldrt	\reg1, [r0], $4	)
+ THUMB(		ldrt	\reg1, [r0]	)
+ THUMB(		add	\reg1, $4	)
+9998:
+ ARM(		ldrt	\reg2, [r0], $4	)
+ THUMB(		ldrt	\reg2, [r0]	)
+ THUMB(		add	\reg2, $4	)
 		.section __ex_table, "a"
 		.long	9999b, 6001f
 		.long	9998b, 6001f
@@ -62,10 +84,22 @@
 		.endm

 		.macro	load4l, reg1, reg2, reg3, reg4
-9999:		ldrt	\reg1, [r0], $4
-9998:		ldrt	\reg2, [r0], $4
-9997:		ldrt	\reg3, [r0], $4
-9996:		ldrt	\reg4, [r0], $4
+9999:
+ ARM(		ldrt	\reg1, [r0], $4	)
+ THUMB(		ldrt	\reg1, [r0]	)
+ THUMB(		add	\reg1, $4	)
+9998:
+ ARM(		ldrt	\reg2, [r0], $4	)
+ THUMB(		ldrt	\reg2, [r0]	)
+ THUMB(		add	\reg2, $4	)
+9997:
+ ARM(		ldrt	\reg3, [r0], $4	)
+ THUMB(		ldrt	\reg3, [r0]	)
+ THUMB(		add	\reg3, $4	)
+9996:
+ ARM(		ldrt	\reg4, [r0], $4	)
+ THUMB(		ldrt	\reg4, [r0]	)
+ THUMB(		add	\reg4, $4	)
 		.section __ex_table, "a"
 		.long	9999b, 6001f
 		.long	9998b, 6001f
@@ -101,6 +135,7 @@
 		add	r2, r2, r1
 		mov	r0, #0			@ zero the buffer
 6002:		teq	r2, r1
+		it 	ne
 		strneb	r0, [r1], #1
 		bne	6002b
 		load_regs

--- a/arch/arm/lib/delay.S
+++ b/arch/arm/lib/delay.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/param.h>
@@ -31,6 +33,7 @@ ENTRY(__const_udelay)				@ 0 <= r0 <= 0x7fffff06
 		mov	r2, r2, lsr #10		@ max = 0x00007fff
 		mul	r0, r2, r0		@ max = 2^32-1
 		movs	r0, r0, lsr #6
+		it	eq
 		moveq	pc, lr
 ENDPROC(__udelay)
 ENDPROC(__const_udelay)				@ 0 <= r0 <= 0x7fffff06
@@ -60,6 +63,7 @@ ENTRY(__delay)
 		movls	pc, lr
 		subs	r0, r0, #1
 #endif
+		it	hi
 		bhi	__delay
 		mov	pc, lr
 ENDPROC(__delay)
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -11,6 +11,7 @@
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */
+#include <asm/unified.h>

 #include <linux/linkage.h>

@@ -84,8 +85,10 @@ ENTRY(__do_div64)
 	@ The division loop for needed upper bit positions.
 	@ Break out early if dividend reaches 0.
 2:	cmp	xh, yl
+	itt	cs
 	orrcs	yh, yh, ip
 	subcss	xh, xh, yl
+	it	ne
 	movnes	ip, ip, lsr #1
 	mov	yl, yl, lsr #1
 	bne	2b
@@ -93,7 +96,9 @@ ENTRY(__do_div64)
 	@ See if we need to handle lower 32-bit result.
 3:	cmp	xh, #0
 	mov	yl, #0
+	it	eq
 	cmpeq	xl, r4
+	itt	lo
 	movlo	xh, xl
 	movlo	pc, lr

@@ -104,7 +109,9 @@ ENTRY(__do_div64)
 4:	movs	xl, xl, lsl #1
 	adcs	xh, xh, xh
 	beq	6f
+	it	cc
 	cmpcc	xh, r4
+	itt	cs
 5:	orrcs	yl, yl, ip
 	subcs	xh, xh, r4
 	movs	ip, ip, lsr #1
@@ -116,6 +123,7 @@ ENTRY(__do_div64)
 	@ Otherwise, if lower part is also null then we are done.
 6:	bcs	5b
 	cmp	xl, #0
+	it	eq
 	moveq	pc, lr

 	@ We still have remainer bits in the low part.  Bring them up.
@@ -177,13 +185,16 @@ ENTRY(__do_div64)
 	mov	yh, xh, lsr ip
 	mov	yl, xl, lsr ip
 	rsb	ip, ip, #32
-	orr	yl, yl, xh, lsl ip
+ ARM(	orr	yl, yl, xh, lsl ip	)
+ THUMB(	lsl	xh, xh, ip		)
+ THUMB(	orr	yl, yl, xh		)
 	mov	xh, xl, lsl ip
 	mov	xh, xh, lsr ip
 	mov	pc, lr

 	@ eq -> division by 1: obvious enough...
-9:	moveq	yl, xl
+9:	itttt	eq
+	moveq	yl, xl
 	moveq	yh, xh
 	moveq	xh, #0
 	moveq	pc, lr

--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -13,6 +13,8 @@
 *   also call with zero size.
 * Reworked by rmk.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
                .text
@@ -25,7 +27,10 @@ ENTRY(_find_first_zero_bit_le)
 		teq	r1, #0	
 		beq	3f
 		mov	r2, #0
-1:		ldrb	r3, [r0, r2, lsr #3]
+1:
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		eors	r3, r3, #0xff		@ invert bits
 		bne	.L_found		@ any now set - found zero bit
 		add	r2, r2, #8		@ next bit pointer
@@ -44,7 +49,9 @@ ENTRY(_find_next_zero_bit_le)
 		beq	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
-		ldrb	r3, [r0, r2, lsr #3]
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		eor	r3, r3, #0xff		@ now looking for a 1 bit
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
@@ -61,7 +68,10 @@ ENTRY(_find_first_bit_le)
 		teq	r1, #0	
 		beq	3f
 		mov	r2, #0
-1:		ldrb	r3, [r0, r2, lsr #3]
+1:
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		movs	r3, r3
 		bne	.L_found		@ any now set - found zero bit
 		add	r2, r2, #8		@ next bit pointer
@@ -80,7 +90,9 @@ ENTRY(_find_next_bit_le)
 		beq	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
-		ldrb	r3, [r0, r2, lsr #3]
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
 		orr	r2, r2, #7		@ if zero, then no bits here
@@ -95,7 +107,9 @@ ENTRY(_find_first_zero_bit_be)
 		beq	3f
 		mov	r2, #0
 1:		eor	r3, r2, #0x18		@ big endian byte ordering
-		ldrb	r3, [r0, r3, lsr #3]
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		eors	r3, r3, #0xff		@ invert bits
 		bne	.L_found		@ any now set - found zero bit
 		add	r2, r2, #8		@ next bit pointer
@@ -111,7 +125,9 @@ ENTRY(_find_next_zero_bit_be)
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 		eor	r3, r2, #0x18		@ big endian byte ordering
-		ldrb	r3, [r0, r3, lsr #3]
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		eor	r3, r3, #0xff		@ now looking for a 1 bit
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
@@ -125,7 +141,9 @@ ENTRY(_find_first_bit_be)
 		beq	3f
 		mov	r2, #0
 1:		eor	r3, r2, #0x18		@ big endian byte ordering
-		ldrb	r3, [r0, r3, lsr #3]
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		movs	r3, r3
 		bne	.L_found		@ any now set - found zero bit
 		add	r2, r2, #8		@ next bit pointer
@@ -141,7 +159,9 @@ ENTRY(_find_next_bit_be)
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 		eor	r3, r2, #0x18		@ big endian byte ordering
-		ldrb	r3, [r0, r3, lsr #3]
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
 		orr	r2, r2, #7		@ if zero, then no bits here

--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -26,6 +26,8 @@
 * Note that ADDR_LIMIT is either 0 or 0xc0000000.
 * Note also that it is intended that __get_user_bad is not global.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/errno.h>

@@ -36,7 +38,10 @@ ENTRY(__get_user_1)
 ENDPROC(__get_user_1)

 ENTRY(__get_user_2)
-2:	ldrbt	r2, [r0], #1
+2:
+ ARM(	ldrbt	r2, [r0], #1	)
+ THUMB(	ldrbt	r2, [r0]	)
+ THUMB(	add	r0, #1		)
 3:	ldrbt	r3, [r0]
 #ifndef __ARMEB__
 	orr	r2, r2, r3, lsl #8

--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -7,17 +7,22 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

 .Linsb_align:	rsb	ip, ip, #4
 		cmp	ip, r2
+		it	gt
 		movgt	ip, r2
 		cmp	ip, #2
 		ldrb	r3, [r0]
 		strb	r3, [r1], #1
+		itt	ge
 		ldrgeb	r3, [r0]
 		strgeb	r3, [r1], #1
+		itt	gt
 		ldrgtb	r3, [r0]
 		strgtb	r3, [r1], #1
 		subs	r2, r2, ip
@@ -25,6 +30,7 @@

 ENTRY(__raw_readsb)
 		teq	r2, #0		@ do we have to check for the zero len?
+		it	eq
 		moveq	pc, lr
 		ands	ip, r1, #3
 		bne	.Linsb_align
@@ -72,6 +78,7 @@ ENTRY(__raw_readsb)
 		bpl	.Linsb_16_lp

 		tst	r2, #15
+		it	eq
 		ldmeqfd	sp!, {r4 - r6, pc}

 .Linsb_no_16:	tst	r2, #8
@@ -109,13 +116,16 @@ ENTRY(__raw_readsb)
 		str	r3, [r1], #4

 .Linsb_no_4:	ands	r2, r2, #3
+		it	eq
 		ldmeqfd	sp!, {r4 - r6, pc}

 		cmp	r2, #2
 		ldrb	r3, [r0]
 		strb	r3, [r1], #1
+		itt	ge
 		ldrgeb	r3, [r0]
 		strgeb	r3, [r1], #1
+		itt	gt
 		ldrgtb	r3, [r0]
 		strgtb	r3, [r1]


--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -7,11 +7,14 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

 ENTRY(__raw_readsl)
 		teq	r2, #0		@ do we have to check for the zero len?
+		it	eq
 		moveq	pc, lr
 		ands	ip, r1, #3
 		bne	3f
@@ -28,9 +31,11 @@ ENTRY(__raw_readsl)
 		bpl	1b
 		ldmfd	sp!, {r4, lr}
 2:		movs	r2, r2, lsl #31
+		ittt	cs
 		ldrcs	r3, [r0, #0]
 		ldrcs	ip, [r0, #0]
 		stmcsia	r1!, {r3, ip}
+		itt	ne
 		ldrne	r3, [r0, #0]
 		strne	r3, [r1, #0]
 		mov	pc, lr
@@ -48,6 +53,7 @@ ENTRY(__raw_readsl)

 4:		subs	r2, r2, #1
 		mov	ip, r3, pull #24
+		itttt	ne
 		ldrne	r3, [r0]
 		orrne	ip, ip, r3, push #8
 		strne	ip, [r1], #4
@@ -56,6 +62,7 @@ ENTRY(__raw_readsl)

 5:		subs	r2, r2, #1
 		mov	ip, r3, pull #16
+		itttt	ne
 		ldrne	r3, [r0]
 		orrne	ip, ip, r3, push #16
 		strne	ip, [r1], #4
@@ -64,6 +71,7 @@ ENTRY(__raw_readsl)

 6:		subs	r2, r2, #1
 		mov	ip, r3, pull #8
+		itttt	ne
 		ldrne	r3, [r0]
 		orrne	ip, ip, r3, push #24
 		strne	ip, [r1], #4

--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -26,6 +28,7 @@

 ENTRY(__raw_readsw)
 		teq	r2, #0
+		it	eq
 		moveq	pc, lr
 		tst	r1, #3
 		bne	.Linsw_align
@@ -76,7 +79,8 @@ ENTRY(__raw_readsw)
 		pack	r3, r3, ip
 		str	r3, [r1], #4

-.Lno_insw_2:	ldrneh	r3, [r0]
+.Lno_insw_2:	itt	ne
+		ldrneh	r3, [r0]
 		strneh	r3, [r1]

 		ldmfd	sp!, {r4, r5, pc}
@@ -94,6 +98,7 @@ ENTRY(__raw_readsw)
 #endif

 .Linsw_noalign:	stmfd	sp!, {r4, lr}
+		it	cc
 		ldrccb	ip, [r1, #-1]!
 		bcc	1f

@@ -121,6 +126,7 @@ ENTRY(__raw_readsw)

 3:		tst	r2, #1
 		strb	ip, [r1], #1
+		itttt	ne
 		ldrneh	ip, [r0]
   _BE_ONLY_(	movne	ip, ip, ror #8		)
 		strneb	ip, [r1], #1

--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -32,12 +34,15 @@

 .Loutsb_align:	rsb	ip, ip, #4
 		cmp	ip, r2
+		it	gt
 		movgt	ip, r2
 		cmp	ip, #2
 		ldrb	r3, [r1], #1
 		strb	r3, [r0]
+		itt	ge
 		ldrgeb	r3, [r1], #1
 		strgeb	r3, [r0]
+		itt	gt
 		ldrgtb	r3, [r1], #1
 		strgtb	r3, [r0]
 		subs	r2, r2, ip
@@ -45,6 +50,7 @@

 ENTRY(__raw_writesb)
 		teq	r2, #0		@ do we have to check for the zero len?
+		it	eq
 		moveq	pc, lr
 		ands	ip, r1, #3
 		bne	.Loutsb_align
@@ -64,6 +70,7 @@ ENTRY(__raw_writesb)
 		bpl	.Loutsb_16_lp

 		tst	r2, #15
+		it	eq
 		ldmeqfd	sp!, {r4, r5, pc}

 .Loutsb_no_16:	tst	r2, #8
@@ -80,13 +87,16 @@ ENTRY(__raw_writesb)
 		outword	r3

 .Loutsb_no_4:	ands	r2, r2, #3
+		it	eq
 		ldmeqfd	sp!, {r4, r5, pc}

 		cmp	r2, #2
 		ldrb	r3, [r1], #1
 		strb	r3, [r0]
+		itt	ge
 		ldrgeb	r3, [r1], #1
 		strgeb	r3, [r0]
+		itt	gt
 		ldrgtb	r3, [r1]
 		strgtb	r3, [r0]


--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -7,11 +7,14 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

 ENTRY(__raw_writesl)
 		teq	r2, #0		@ do we have to check for the zero len?
+		it	eq
 		moveq	pc, lr
 		ands	ip, r1, #3
 		bne	3f
@@ -28,10 +31,14 @@ ENTRY(__raw_writesl)
 		bpl	1b
 		ldmfd	sp!, {r4, lr}
 2:		movs	r2, r2, lsl #31
+		itt	cs
 		ldmcsia	r1!, {r3, ip}
 		strcs	r3, [r0, #0]
+		it	ne
 		ldrne	r3, [r1, #0]
+		it	cs
 		strcs	ip, [r0, #0]
+		it	ne
 		strne	r3, [r0, #0]
 		mov	pc, lr


--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -31,6 +33,7 @@

 ENTRY(__raw_writesw)
 		teq	r2, #0
+		it	eq
 		moveq	pc, lr
 		ands	r3, r1, #3
 		bne	.Loutsw_align
@@ -61,7 +64,8 @@ ENTRY(__raw_writesw)
 		ldr	r3, [r1], #4
 		outword	r3

-.Lno_outsw_2:	ldrneh	r3, [r1]
+.Lno_outsw_2:	itt	ne
+		ldrneh	r3, [r1]
 		strneh	r3, [r0]

 		ldmfd	sp!, {r4, r5, pc}
@@ -75,7 +79,11 @@ ENTRY(__raw_writesw)
 #endif

 .Loutsw_noalign:
-		ldr	r3, [r1, -r3]!
+ ARM(		ldr	r3, [r1, -r3]!	)
+ THUMB(		rsb	r3, r3, #0	)
+ THUMB(		ldr	r3, [r1, r3]	)
+ THUMB(		sub	r1, r3		)
+		it	cs
 		subcs	r2, r2, #1
 		bcs	2f
 		subs	r2, r2, #2
@@ -91,7 +99,8 @@ ENTRY(__raw_writesw)
 		bpl	1b

 		tst	r2, #1
-3:		movne	ip, r3, lsr #8
+3:		itt	ne
+		movne	ip, r3, lsr #8
 		strneh	ip, [r0]
 		mov	pc, lr
 ENDPROC(__raw_writesw)
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -31,6 +31,7 @@ You should have received a copy of the GNU General Public License
 along with this program; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
+#include <asm/unified.h>


 #include <linux/linkage.h>
@@ -56,6 +57,7 @@ Boston, MA 02111-1307, USA.  */
 	@ at the left end of each 4 bit nibbles in the division loop
 	@ to save one loop in most cases.
 	tst	\divisor, #0xe0000000
+	itte	eq
 	moveq	\divisor, \divisor, lsl #3
 	moveq	\curbit, #8
 	movne	\curbit, #1
@@ -65,6 +67,7 @@ Boston, MA 02111-1307, USA.  */
 	@ division loop.  Continue shifting until the divisor is 
 	@ larger than the dividend.
 1:	cmp	\divisor, #0x10000000
+	ittt	lo
 	cmplo	\divisor, \dividend
 	movlo	\divisor, \divisor, lsl #4
 	movlo	\curbit, \curbit, lsl #4
@@ -73,6 +76,7 @@ Boston, MA 02111-1307, USA.  */
 	@ For very big divisors, we must shift it a bit at a time, or
 	@ we will be in danger of overflowing.
 1:	cmp	\divisor, #0x80000000
+	ittt	lo
 	cmplo	\divisor, \dividend
 	movlo	\divisor, \divisor, lsl #1
 	movlo	\curbit, \curbit, lsl #1
@@ -84,19 +88,25 @@ Boston, MA 02111-1307, USA.  */

 	@ Division loop
 1:	cmp	\dividend, \divisor
+	itt	hs
 	subhs	\dividend, \dividend, \divisor
 	orrhs	\result,   \result,   \curbit
 	cmp	\dividend, \divisor,  lsr #1
+	itt	hs
 	subhs	\dividend, \dividend, \divisor, lsr #1
 	orrhs	\result,   \result,   \curbit,  lsr #1
 	cmp	\dividend, \divisor,  lsr #2
+	itt	hs
 	subhs	\dividend, \dividend, \divisor, lsr #2
 	orrhs	\result,   \result,   \curbit,  lsr #2
 	cmp	\dividend, \divisor,  lsr #3
+	itt	hs
 	subhs	\dividend, \dividend, \divisor, lsr #3
 	orrhs	\result,   \result,   \curbit,  lsr #3
 	cmp	\dividend, #0			@ Early termination?
+	it	ne
 	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
+	it	ne
 	movne	\divisor,  \divisor, lsr #4
 	bne	1b

@@ -113,19 +123,24 @@ Boston, MA 02111-1307, USA.  */
 #else

 	cmp	\divisor, #(1 << 16)
+	itt	hs
 	movhs	\divisor, \divisor, lsr #16
 	movhs	\order, #16
+	it	lo
 	movlo	\order, #0

 	cmp	\divisor, #(1 << 8)
+	itt	hs
 	movhs	\divisor, \divisor, lsr #8
 	addhs	\order, \order, #8

 	cmp	\divisor, #(1 << 4)
+	itt	hs
 	movhs	\divisor, \divisor, lsr #4
 	addhs	\order, \order, #4

 	cmp	\divisor, #(1 << 2)
+	ite	hi
 	addhi	\order, \order, #3
 	addls	\order, \order, \divisor, lsr #1

@@ -152,6 +167,7 @@ Boston, MA 02111-1307, USA.  */
 	@ division loop.  Continue shifting until the divisor is 
 	@ larger than the dividend.
 1:	cmp	\divisor, #0x10000000
+	ittt	lo
 	cmplo	\divisor, \dividend
 	movlo	\divisor, \divisor, lsl #4
 	addlo	\order, \order, #4
@@ -160,6 +176,7 @@ Boston, MA 02111-1307, USA.  */
 	@ For very big divisors, we must shift it a bit at a time, or
 	@ we will be in danger of overflowing.
 1:	cmp	\divisor, #0x80000000
+	ittt	lo
 	cmplo	\divisor, \dividend
 	movlo	\divisor, \divisor, lsl #1
 	addlo	\order, \order, #1
@@ -173,19 +190,25 @@ Boston, MA 02111-1307, USA.  */
 	blt	2f

 1:	cmp	\dividend, \divisor
+	it	hs
 	subhs	\dividend, \dividend, \divisor
 	cmp	\dividend, \divisor,  lsr #1
+	it	hs
 	subhs	\dividend, \dividend, \divisor, lsr #1
 	cmp	\dividend, \divisor,  lsr #2
+	it	hs
 	subhs	\dividend, \dividend, \divisor, lsr #2
 	cmp	\dividend, \divisor,  lsr #3
+	it	hs
 	subhs	\dividend, \dividend, \divisor, lsr #3
 	cmp	\dividend, #1
 	mov	\divisor, \divisor, lsr #4
+	it	ge
 	subges	\order, \order, #4
 	bge	1b

 	tst	\order, #3
+	it	ne
 	teqne	\dividend, #0
 	beq	5f

@@ -194,12 +217,15 @@ Boston, MA 02111-1307, USA.  */
 	blt	4f
 	beq	3f
 	cmp	\dividend, \divisor
+	it	hs
 	subhs	\dividend, \dividend, \divisor
 	mov	\divisor,  \divisor,  lsr #1
 3:	cmp	\dividend, \divisor
+	it	hs
 	subhs	\dividend, \dividend, \divisor
 	mov	\divisor,  \divisor,  lsr #1
 4:	cmp	\dividend, \divisor
+	it	hs
 	subhs	\dividend, \dividend, \divisor
 5:
 .endm
@@ -209,6 +235,7 @@ ENTRY(__udivsi3)
 ENTRY(__aeabi_uidiv)

 	subs	r2, r1, #1
+	it	eq
 	moveq	pc, lr
 	bcc	Ldiv0
 	cmp	r0, r1
@@ -221,7 +248,8 @@ ENTRY(__aeabi_uidiv)
 	mov	r0, r2
 	mov	pc, lr

-11:	moveq	r0, #1
+11:	ite	eq
+	moveq	r0, #1
 	movne	r0, #0
 	mov	pc, lr

@@ -237,10 +265,14 @@ ENTRY(__umodsi3)

 	subs	r2, r1, #1			@ compare divisor with 1
 	bcc	Ldiv0
+	ite	ne
 	cmpne	r0, r1				@ compare dividend with divisor
 	moveq   r0, #0
+	it	hi
 	tsthi	r1, r2				@ see if divisor is power of 2
+	it	eq
 	andeq	r0, r0, r2
+	it	ls
 	movls	pc, lr

 	ARM_MOD_BODY r0, r1, r2, r3
@@ -255,10 +287,12 @@ ENTRY(__aeabi_idiv)
 	cmp	r1, #0
 	eor	ip, r0, r1			@ save the sign of the result.
 	beq	Ldiv0
+	it	mi
 	rsbmi	r1, r1, #0			@ loops below use unsigned.
 	subs	r2, r1, #1			@ division by 1 or -1 ?
 	beq	10f
 	movs	r3, r0
+	it	mi
 	rsbmi	r3, r0, #0			@ positive dividend value
 	cmp	r3, r1
 	bls	11f
@@ -268,14 +302,18 @@ ENTRY(__aeabi_idiv)
 	ARM_DIV_BODY r3, r1, r0, r2

 	cmp	ip, #0
+	it	mi
 	rsbmi	r0, r0, #0
 	mov	pc, lr

 10:	teq	ip, r0				@ same sign ?
+	it	mi
 	rsbmi	r0, r0, #0
 	mov	pc, lr

-11:	movlo	r0, #0
+11:	it	lo
+	movlo	r0, #0
+	itt	eq
 	moveq	r0, ip, asr #31
 	orreq	r0, r0, #1
 	mov	pc, lr
@@ -284,6 +322,7 @@ ENTRY(__aeabi_idiv)

 	cmp	ip, #0
 	mov	r0, r3, lsr r2
+	it	mi
 	rsbmi	r0, r0, #0
 	mov	pc, lr

@@ -294,19 +333,25 @@ ENTRY(__modsi3)

 	cmp	r1, #0
 	beq	Ldiv0
+	it	mi
 	rsbmi	r1, r1, #0			@ loops below use unsigned.
 	movs	ip, r0				@ preserve sign of dividend
+	it	mi
 	rsbmi	r0, r0, #0			@ if negative make positive
 	subs	r2, r1, #1			@ compare divisor with 1
+	ite	ne
 	cmpne	r0, r1				@ compare dividend with divisor
 	moveq	r0, #0
+	it	hi
 	tsthi	r1, r2				@ see if divisor is power of 2
+	it	eq
 	andeq	r0, r0, r2
 	bls	10f

 	ARM_MOD_BODY r0, r1, r2, r3

 10:	cmp	ip, #0
+	it	mi
 	rsbmi	r0, r0, #0
 	mov	pc, lr


--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -24,6 +24,7 @@ You should have received a copy of the GNU General Public License
 along with this program; see the file COPYING.  If not, write to
 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
 Boston, MA 02110-1301, USA.  */
+#include <asm/unified.h>


 #include <linux/linkage.h>
@@ -41,9 +42,12 @@ ENTRY(__aeabi_llsr)

 	subs	r3, r2, #32
 	rsb	ip, r2, #32
+	itett	mi
 	movmi	al, al, lsr r2
 	movpl	al, ah, lsr r3
-	orrmi	al, al, ah, lsl ip
+ ARM(	orrmi	al, al, ah, lsl ip	)
+ THUMB(	lslmi	r3, ah, ip		)
+ THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, lsr r2
 	mov	pc, lr


--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -9,6 +9,8 @@
 *
 *  ASM optimised string functions
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -21,6 +23,7 @@ ENTRY(memchr)
 	teq	r3, r1
 	bne	1b
 	sub	r0, r0, #1
-2:	movne	r0, #0
+2:	it	ne
+	movne	r0, #0
 	mov	pc, lr
 ENDPROC(memchr)
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -9,12 +9,16 @@
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */
+#include <asm/unified.h>

 #include <linux/linkage.h>
 #include <asm/assembler.h>

+#define LDR1W_SHIFT	0
+#define STR1W_SHIFT	0
+
 	.macro ldr1w ptr reg abort
-	ldr \reg, [\ptr], #4
+	W(ldr) \reg, [\ptr], #4
 	.endm

 	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
@@ -26,11 +30,16 @@
 	.endm

 	.macro ldr1b ptr reg cond=al abort
+	.ifnc \cond,al
+	it \cond
 	ldr\cond\()b \reg, [\ptr], #1
+	.else
+	ldrb \reg, [\ptr], #1
+	.endif
 	.endm

 	.macro str1w ptr reg abort
-	str \reg, [\ptr], #4
+	W(str) \reg, [\ptr], #4
 	.endm

 	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
@@ -38,7 +47,12 @@
 	.endm

 	.macro str1b ptr reg cond=al abort
+	.ifnc \cond,al
+	it \cond
 	str\cond\()b \reg, [\ptr], #1
+	.else
+	strb \reg, [\ptr], #1
+	.endif
 	.endm

 	.macro enter reg1 reg2

--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -9,6 +9,7 @@
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */
+#include <asm/unified.h>

 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -37,7 +38,9 @@
 ENTRY(memmove)

 		subs	ip, r0, r1
+		it	hi
 		cmphi	r2, ip
+		it	ls
 		bls	memcpy

 		stmfd	sp!, {r0, r4, lr}
@@ -79,46 +82,55 @@ ENTRY(memmove)

 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+		it	ne
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
 6:		nop
-		ldr	r3, [r1, #-4]!
-		ldr	r4, [r1, #-4]!
-		ldr	r5, [r1, #-4]!
-		ldr	r6, [r1, #-4]!
-		ldr	r7, [r1, #-4]!
-		ldr	r8, [r1, #-4]!
-		ldr	lr, [r1, #-4]!
+		W(ldr)	r3, [r1, #-4]!
+		W(ldr)	r4, [r1, #-4]!
+		W(ldr)	r5, [r1, #-4]!
+		W(ldr)	r6, [r1, #-4]!
+		W(ldr)	r7, [r1, #-4]!
+		W(ldr)	r8, [r1, #-4]!
+		W(ldr)	lr, [r1, #-4]!

 		add	pc, pc, ip
 		nop
 		nop
-		str	r3, [r0, #-4]!
-		str	r4, [r0, #-4]!
-		str	r5, [r0, #-4]!
-		str	r6, [r0, #-4]!
-		str	r7, [r0, #-4]!
-		str	r8, [r0, #-4]!
-		str	lr, [r0, #-4]!
+		W(str)	r3, [r0, #-4]!
+		W(str)	r4, [r0, #-4]!
+		W(str)	r5, [r0, #-4]!
+		W(str)	r6, [r0, #-4]!
+		W(str)	r7, [r0, #-4]!
+		W(str)	r8, [r0, #-4]!
+		W(str)	lr, [r0, #-4]!

 	CALGN(	bcs	2b			)

 7:		ldmfd	sp!, {r5 - r8}

 8:		movs	r2, r2, lsl #31
+		it	ne
 		ldrneb	r3, [r1, #-1]!
+		itt	cs
 		ldrcsb	r4, [r1, #-1]!
 		ldrcsb	ip, [r1, #-1]
+		it	ne
 		strneb	r3, [r0, #-1]!
+		itt	cs
 		strcsb	r4, [r0, #-1]!
 		strcsb	ip, [r0, #-1]
 		ldmfd	sp!, {r0, r4, pc}

 9:		cmp	ip, #2
+		it	gt
 		ldrgtb	r3, [r1, #-1]!
+		it	ge
 		ldrgeb	r4, [r1, #-1]!
 		ldrb	lr, [r1, #-1]!
+		it	gt
 		strgtb	r3, [r0, #-1]!
+		it	ge
 		strgeb	r4, [r0, #-1]!
 		subs	r2, r2, ip
 		strb	lr, [r0, #-1]!

--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -9,6 +9,8 @@
 *
 *  ASM optimised string functions
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -19,7 +21,9 @@
 1:	subs	r2, r2, #4		@ 1 do we have enough
 	blt	5f			@ 1 bytes to align with?
 	cmp	r3, #2			@ 1
+	it	lt
 	strltb	r1, [r0], #1		@ 1
+	it	le
 	strleb	r1, [r0], #1		@ 1
 	strb	r1, [r0], #1		@ 1
 	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
@@ -48,34 +52,42 @@ ENTRY(memset)
 	mov	lr, r1

 2:	subs	r2, r2, #64
+	itttt	ge
 	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time.
 	stmgeia	r0!, {r1, r3, ip, lr}
 	stmgeia	r0!, {r1, r3, ip, lr}
 	stmgeia	r0!, {r1, r3, ip, lr}
 	bgt	2b
+	it	eq
 	ldmeqfd	sp!, {pc}		@ Now <64 bytes to go.
 /*
 * No need to correct the count; we're only testing bits from now on
 */
 	tst	r2, #32
+	itt	ne
 	stmneia	r0!, {r1, r3, ip, lr}
 	stmneia	r0!, {r1, r3, ip, lr}
 	tst	r2, #16
+	it	ne
 	stmneia	r0!, {r1, r3, ip, lr}
 	ldr	lr, [sp], #4

 4:	tst	r2, #8
+	it	ne
 	stmneia	r0!, {r1, r3}
 	tst	r2, #4
+	it	ne
 	strne	r1, [r0], #4
 /*
 * When we get here, we've got less than 4 bytes to zero.  We
 * may have an unaligned pointer as well.
 */
 5:	tst	r2, #2
+	itt	ne
 	strneb	r1, [r0], #1
 	strneb	r1, [r0], #1
 	tst	r2, #1
+	it	ne
 	strneb	r1, [r0], #1
 	mov	pc, lr
 ENDPROC(memset)
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -21,7 +23,9 @@
 1:	subs	r1, r1, #4		@ 1 do we have enough
 	blt	5f			@ 1 bytes to align with?
 	cmp	r3, #2			@ 1
+	it	lt
 	strltb	r2, [r0], #1		@ 1
+	it	le
 	strleb	r2, [r0], #1		@ 1
 	strb	r2, [r0], #1		@ 1
 	add	r1, r1, r3		@ 1 (r1 = r1 - (4 - r3))
@@ -48,34 +52,42 @@ ENTRY(__memzero)
 	mov	lr, r2			@ 1

 3:	subs	r1, r1, #64		@ 1 write 32 bytes out per loop
+	itttt	ge
 	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
 	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
 	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
 	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
 	bgt	3b			@ 1
+	it	eq
 	ldmeqfd	sp!, {pc}		@ 1/2 quick exit
 /*
 * No need to correct the count; we're only testing bits from now on
 */
 	tst	r1, #32			@ 1
+	itt	ne
 	stmneia	r0!, {r2, r3, ip, lr}	@ 4
 	stmneia	r0!, {r2, r3, ip, lr}	@ 4
 	tst	r1, #16			@ 1 16 bytes or more?
+	it	ne
 	stmneia	r0!, {r2, r3, ip, lr}	@ 4
 	ldr	lr, [sp], #4		@ 1

 4:	tst	r1, #8			@ 1 8 bytes or more?
+	it	ne
 	stmneia	r0!, {r2, r3}		@ 2
 	tst	r1, #4			@ 1 4 bytes or more?
+	it	ne
 	strne	r2, [r0], #4		@ 1
 /*
 * When we get here, we've got less than 4 bytes to zero.  We
 * may have an unaligned pointer as well.
 */
 5:	tst	r1, #2			@ 1 2 bytes or more?
+	itt	ne
 	strneb	r2, [r0], #1		@ 1
 	strneb	r2, [r0], #1		@ 1
 	tst	r1, #1			@ 1 a byte left over
+	it	ne
 	strneb	r2, [r0], #1		@ 1
 	mov	pc, lr			@ 1
 ENDPROC(__memzero)
--- a/arch/arm/lib/muldi3.S
+++ b/arch/arm/lib/muldi3.S
@@ -9,6 +9,7 @@
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */
+#include <asm/unified.h>

 #include <linux/linkage.h>


--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -26,6 +26,8 @@
 * Note that ADDR_LIMIT is either 0 or 0xc0000000
 * Note also that it is intended that __put_user_bad is not global.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/errno.h>

@@ -38,10 +40,16 @@ ENDPROC(__put_user_1)
 ENTRY(__put_user_2)
 	mov	ip, r2, lsr #8
 #ifndef __ARMEB__
-2:	strbt	r2, [r0], #1
+2:
+ ARM(	strbt	r2, [r0], #1	)
+ THUMB(	strbt	r2, [r0]	)
+ THUMB(	add	r0, #1		)
 3:	strbt	ip, [r0]
 #else
-2:	strbt	ip, [r0], #1
+2:
+ ARM(	strbt	ip, [r0], #1	)
+ THUMB(	strbt	ip, [r0]	)
+ THUMB(	add	r0, #1		)
 3:	strbt	r2, [r0]
 #endif
 	mov	r0, #0
@@ -55,7 +63,10 @@ ENTRY(__put_user_4)
 ENDPROC(__put_user_4)

 ENTRY(__put_user_8)
-5:	strt	r2, [r0], #4
+5:
+ ARM(	strt	r2, [r0], #4	)
+ THUMB(	strt	r2, [r0]	)
+ THUMB(	add	r0, #4		)
 6:	strt	r3, [r0]
 	mov	r0, #0
 	mov	pc, lr

--- a/arch/arm/lib/setbit.S
+++ b/arch/arm/lib/setbit.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include "bitops.h"

--- a/arch/arm/lib/sha1.S
+++ b/arch/arm/lib/sha1.S
@@ -12,6 +12,7 @@
 *
 *  The reference implementation for this code is linux/lib/sha1.c
 */
+#include <asm/unified.h>

 #include <linux/linkage.h>

@@ -187,6 +188,7 @@ ENTRY(sha_transform)

 ENDPROC(sha_transform)

+	.align	2
 .L_sha_K:
 	.word	0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6

@@ -195,6 +197,7 @@ ENDPROC(sha_transform)
 * void sha_init(__u32 *buf)
 */

+	.align	2
 .L_sha_initial_digest:
 	.word	0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0


--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -9,6 +9,8 @@
 *
 *  ASM optimised string functions
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -18,9 +20,11 @@ ENTRY(strchr)
 		and	r1, r1, #0xff
 1:		ldrb	r2, [r0], #1
 		teq	r2, r1
+		it	ne
 		teqne	r2, #0
 		bne	1b
 		teq	r2, r1
+		ite	ne
 		movne	r0, #0
 		subeq	r0, r0, #1
 		mov	pc, lr

--- a/arch/arm/lib/strncpy_from_user.S
+++ b/arch/arm/lib/strncpy_from_user.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/errno.h>
@@ -23,7 +25,10 @@
 ENTRY(__strncpy_from_user)
 	mov	ip, r1
 1:	subs	r2, r2, #1
-USER(	ldrplbt	r3, [r1], #1)
+ARM(USER(	ldrplbt	r3, [r1], #1	))
+	itt	pl
+THUMB(USER(	ldrplbt	r3, [r1]	))
+THUMB(		addpl	r1, #1		)
 	bmi	2f
 	strb	r3, [r0], #1
 	teq	r3, #0

--- a/arch/arm/lib/strnlen_user.S
+++ b/arch/arm/lib/strnlen_user.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/errno.h>
@@ -23,7 +25,9 @@
 ENTRY(__strnlen_user)
 	mov	r2, r0
 1:
-USER(	ldrbt	r3, [r0], #1)
+ARM(USER(	ldrbt	r3, [r0], #1	))
+THUMB(USER(	ldrbt	r3, [r0]	))
+THUMB(		add	r0, #1		)
 	teq	r3, #0
 	beq	2f
 	subs	r1, r1, #1

--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -9,6 +9,8 @@
 *
 *  ASM optimised string functions
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>

@@ -18,6 +20,7 @@ ENTRY(strrchr)
 		mov	r3, #0
 1:		ldrb	r2, [r0], #1
 		teq	r2, r1
+		it	eq
 		subeq	r3, r0, #1
 		teq	r2, #0
 		bne	1b

--- a/arch/arm/lib/testchangebit.S
+++ b/arch/arm/lib/testchangebit.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include "bitops.h"

--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include "bitops.h"
@@ -15,6 +17,6 @@
 ENTRY(_test_and_clear_bit_be)
 		eor	r0, r0, #0x18		@ big endian byte ordering
 ENTRY(_test_and_clear_bit_le)
-	testop	bicne, strneb
+	testop	bicne, strneb, ne
 ENDPROC(_test_and_clear_bit_be)
 ENDPROC(_test_and_clear_bit_le)
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -7,6 +7,8 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/unified.h>
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include "bitops.h"
@@ -15,6 +17,6 @@
 ENTRY(_test_and_set_bit_be)
 		eor	r0, r0, #0x18		@ big endian byte ordering
 ENTRY(_test_and_set_bit_le)
-	testop	orreq, streqb
+	testop	orreq, streqb, eq
 ENDPROC(_test_and_set_bit_be)
 ENDPROC(_test_and_set_bit_le)
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -9,6 +9,7 @@
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */
+#include <asm/unified.h>

 #include <linux/linkage.h>

@@ -27,9 +28,13 @@
 ENTRY(__ucmpdi2)

 	cmp	xh, yh
+	it	eq
 	cmpeq	xl, yl
+	it	lo
 	movlo	r0, #0
+	it	eq
 	moveq	r0, #1
+	it	hi
 	movhi	r0, #2
 	mov	pc, lr

@@ -40,9 +45,13 @@ ENDPROC(__ucmpdi2)
 ENTRY(__aeabi_ulcmp)

 	cmp	xh, yh
+	it	eq
 	cmpeq	xl, yl
+	it	lo
 	movlo	r0, #-1
+	it	eq
 	moveq	r0, #0
+	it	hi
 	movhi	r0, #1
 	mov	pc, lr


--- a/include/asm-arm/checksum.h
+++ b/include/asm-arm/checksum.h
@@ -73,6 +73,7 @@ ip_fast_csum(const void *iph, unsigned int ihl)
 1:	adcs	%0, %0, %3					\n\
 	ldr	%3, [%1], #4					\n\
 	tst	%2, #15			@ do this carefully	\n\
+	it	ne						\n\
 	subne	%2, %2, #1		@ without destroying	\n\
 	bne	1b			@ the carry flag	\n\
 	adcs	%0, %0, %3					\n\

--- a/include/asm-arm/uaccess.h
+++ b/include/asm-arm/uaccess.h
@@ -12,6 +12,7 @@
 * User space memory access functions
 */
 #include <linux/sched.h>
+#include <asm/unified.h>
 #include <asm/errno.h>
 #include <asm/memory.h>
 #include <asm/domain.h>
@@ -68,7 +69,7 @@ static inline void set_fs(mm_segment_t fs)

 #define __addr_ok(addr) ({ \
 	unsigned long flag; \
-	__asm__("cmp %2, %0; movlo %0, #0" \
+	__asm__("cmp %2, %0; it lo; movlo %0, #0" \
 		: "=&r" (flag) \
 		: "0" (current_thread_info()->addr_limit), "r" (addr) \
 		: "cc"); \
@@ -78,7 +79,7 @@ static inline void set_fs(mm_segment_t fs)
 #define __range_ok(addr,size) ({ \
 	unsigned long flag, roksum; \
 	__chk_user_ptr(addr);	\
-	__asm__("adds %1, %2, %3; sbcccs %1, %1, %0; movcc %0, #0" \
+	__asm__("adds %1, %2, %3; it cc; sbcccs %1, %1, %0; it cc; movcc %0, #0" \
 		: "=&r" (flag), "=&r" (roksum) \
 		: "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \
 		: "cc"); \
@@ -225,7 +226,7 @@ do {									\

 #define __get_user_asm_byte(x,addr,err)				\
 	__asm__ __volatile__(					\
-	"1:	ldrbt	%1,[%2],#0\n"				\
+	"1:	ldrbt	%1,[%2]\n"				\
 	"2:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -261,7 +262,7 @@ do {									\

 #define __get_user_asm_word(x,addr,err)				\
 	__asm__ __volatile__(					\
-	"1:	ldrt	%1,[%2],#0\n"				\
+	"1:	ldrt	%1,[%2]\n"				\
 	"2:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -306,7 +307,7 @@ do {									\

 #define __put_user_asm_byte(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
-	"1:	strbt	%1,[%2],#0\n"				\
+	"1:	strbt	%1,[%2]\n"				\
 	"2:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -339,7 +340,7 @@ do {									\

 #define __put_user_asm_word(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
-	"1:	strt	%1,[%2],#0\n"				\
+	"1:	strt	%1,[%2]\n"				\
 	"2:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -364,8 +365,10 @@ do {									\

 #define __put_user_asm_dword(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
-	"1:	strt	" __reg_oper1 ", [%1], #4\n"		\
-	"2:	strt	" __reg_oper0 ", [%1], #0\n"		\
+ ARM(	"1:	strt	" __reg_oper1 ", [%1], #4\n"	)	\
+ THUMB(	"1:	strt	" __reg_oper1 ", [%1]\n"	)	\
+ THUMB(	"	add	%1, %1, #4\n"			)	\
+	"2:	strt	" __reg_oper0 ", [%1]\n"		\
 	"3:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\