ARMv7: Add VFPv3 support

This patch adds the support for VFPv3 (the kernel currently supports VFPv2). The main differences are 32 double registers (compared to 16) and missing FPINST and FPINST2 registers. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

ARMv7: Add VFPv3 support
This patch adds the support for VFPv3 (the kernel currently supports VFPv2). The main differences are 32 double registers (compared to 16) and missing FPINST and FPINST2 registers. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
37b95035 · Catalin Marinas · c90820f6 · 37b95035 · 37b95035 · 37b95035
Commit 37b95035 authored Jun 11, 2007 by Catalin Marinas
8 changed files
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -892,7 +892,7 @@ config FPE_FASTFPE
 config VFP
 	bool "VFP-format floating point maths"
-	depends on CPU_V6 || CPU_ARM926T
+	depends on CPU_V6 || CPU_ARM926T || CPU_V7
 	help
 	  Say Y to include VFP support code in the kernel. This is needed
 	  if your hardware includes a VFP unit.
@@ -902,6 +902,11 @@ config VFP
 	  Say N if your target does not have VFP hardware.
+config VFPv3
+	bool
+	depends on VFP
+	default y if CPU_V7
 endmenu
 menu "Userspace binary formats"

--- a/arch/arm/vfp/vfp.h
+++ b/arch/arm/vfp/vfp.h
@@ -265,7 +265,11 @@ struct vfp_double {
 * which returns (double)0.0.  This is useful for the compare with
 * zero instructions.
 */
+#ifdef CONFIG_VFPv3
+#define VFP_REG_ZERO	32
+#else
 #define VFP_REG_ZERO	16
+#endif
 extern u64 vfp_get_double(unsigned int reg);
 extern void vfp_put_double(u64 val, unsigned int reg);

--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -100,10 +100,12 @@ vfp_support_entry:
 	cmp	r4, #0
 	beq	no_old_VFP_process
 	VFPFMRX	r5, FPSCR		@ current status
+#ifndef CONFIG_VFPv3
 	VFPFMRX	r6, FPINST		@ FPINST (always there, rev0 onwards)
 	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to read?
 	VFPFMRX	r8, FPINST2, NE		@ FPINST2 if needed - avoids reading
 					@ nonexistant reg on rev0
+#endif
 	VFPFSTMIA r4 			@ save the working registers
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
 					@ and point r4 at the word at the
@@ -117,10 +119,12 @@ no_old_VFP_process:
 	VFPFLDMIA r10	 		@ reload the working registers while
 					@ FPEXC is in a safe state
 	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
+#ifndef CONFIG_VFPv3
 	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to write?
 	VFPFMXR	FPINST2, r8, NE		@ FPINST2 if needed - avoids writing
 					@ nonexistant reg on rev0
 	VFPFMXR	FPINST, r6
+#endif
 	VFPFMXR	FPSCR, r5		@ restore status
 check_for_exception:
@@ -175,10 +179,12 @@ vfp_save_state:
 	@ r1 - FPEXC
 	DBGSTR1	"save VFP state %p", r0
 	VFPFMRX	r2, FPSCR		@ current status
+#ifndef CONFIG_VFPv3
 	VFPFMRX	r3, FPINST		@ FPINST (always there, rev0 onwards)
 	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to read?
 	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed - avoids reading
 					@ nonexistant reg on rev0
+#endif
 	VFPFSTMIA r0 			@ save the working registers
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
 	mov	pc, lr
@@ -217,8 +223,15 @@ vfp_get_double:
 	fmrrd	r0, r1, d\dr
 	mov	pc, lr
 	.endr
+#ifdef CONFIG_VFPv3
+	@ d16 - d31 registers
+	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	mrrc	p11, 3, r0, r1, c\dr	@ fmrrd	r0, r1, d\dr
+	mov	pc, lr
+	.endr
+#endif
-	@ virtual register 16 for compare with zero
+	@ virtual register 16 (or 32 if VFPv3) for compare with zero
 	mov	r0, #0
 	mov	r1, #0
 	mov	pc, lr
@@ -231,3 +244,10 @@ vfp_put_double:
 	fmdrr	d\dr, r0, r1
 	mov	pc, lr
 	.endr
+#ifdef CONFIG_VFPv3
+	@ d16 - d31 registers
+	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	mcrr	p11, 3, r1, r2, c\dr	@ fmdrr	r1, r2, d\dr
+	mov	pc, lr
+	.endr
+#endif
--- a/arch/arm/vfp/vfpinstr.h
+++ b/arch/arm/vfp/vfpinstr.h
@@ -52,11 +52,11 @@
 #define FEXT_TO_IDX(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
 #define vfp_get_sd(inst)	((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22)
-#define vfp_get_dd(inst)	((inst & 0x0000f000) >> 12)
+#define vfp_get_dd(inst)	((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18)
 #define vfp_get_sm(inst)	((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5)
-#define vfp_get_dm(inst)	((inst & 0x0000000f))
+#define vfp_get_dm(inst)	((inst & 0x0000000f) | (inst & (1 << 5)) >> 1)
 #define vfp_get_sn(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
-#define vfp_get_dn(inst)	((inst & 0x000f0000) >> 16)
+#define vfp_get_dn(inst)	((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3)
 #define vfp_single(inst)	(((inst) & 0x0000f00) == 0xa00)

--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -124,13 +124,13 @@ void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
 	send_sig_info(SIGFPE, &info, current);
 }
-static void vfp_panic(char *reason)
+static void vfp_panic(char *reason, u32 inst)
 {
 	int i;
 	printk(KERN_ERR "VFP: Error: %s\n", reason);
 	printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n",
-		fmrx(FPEXC), fmrx(FPSCR), fmrx(FPINST));
+		fmrx(FPEXC), fmrx(FPSCR), inst);
 	for (i = 0; i < 32; i += 2)
 		printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n",
 		       i, vfp_get_float(i), i+1, vfp_get_float(i+1));
@@ -146,7 +146,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
 	pr_debug("VFP: raising exceptions %08x\n", exceptions);
 	if (exceptions == VFP_EXCEPTION_ERROR) {
-		vfp_panic("unhandled bounce");
+		vfp_panic("unhandled bounce", inst);
 		vfp_raise_sigfpe(0, regs);
 		return;
 	}
@@ -261,11 +261,16 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	 * FPEXC bounce reason, but this appears to be unreliable.
 	 * Emulate the bounced instruction instead.
 	 */
+#ifndef CONFIG_VFPv3
 	inst = fmrx(FPINST);
+#else
+	inst = trigger;
+#endif
 	exceptions = vfp_emulate_instruction(inst, fpscr, regs);
 	if (exceptions)
 		vfp_raise_exceptions(exceptions, inst, orig_fpscr, regs);
+#ifndef CONFIG_VFPv3
 	/*
 	 * If there isn't a second FP instruction, exit now.
 	 */
@@ -279,6 +284,9 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	barrier();
 	trigger = fmrx(FPINST2);
 	orig_fpscr = fpscr = fmrx(FPSCR);
+#else
+	return;
+#endif
 emulate:
 	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);

--- a/include/asm-arm/fpstate.h
+++ b/include/asm-arm/fpstate.h
@@ -24,17 +24,23 @@
 */
 struct vfp_hard_struct {
+#ifdef CONFIG_VFPv3
+	__u64 fpregs[32];
+#else
 	__u64 fpregs[16];
+#endif
 #if __LINUX_ARM_ARCH__ < 6
 	__u32 fpmx_state;
 #endif
 	__u32 fpexc;
 	__u32 fpscr;
+#ifndef CONFIG_VFPv3
 	/*
 	 * VFP implementation specific state
 	 */
 	__u32 fpinst;
 	__u32 fpinst2;
+#endif
 #ifdef CONFIG_SMP
 	__u32 cpu;
 #endif

--- a/include/asm-arm/vfp.h
+++ b/include/asm-arm/vfp.h
@@ -55,11 +55,13 @@
 #define FPSCR_IXC		(1<<4)
 #define FPSCR_IDC		(1<<7)
+#ifndef CONFIG_VFPv3
 /*
 * VFP9-S specific.
 */
 #define FPINST			cr9
 #define FPINST2			cr10
+#endif
 /* FPEXC bits */
 #define FPEXC_FPV2		(1<<28)

--- a/include/asm-arm/vfpmacros.h
+++ b/include/asm-arm/vfpmacros.h
@@ -20,6 +20,9 @@
 	LDC	p11, cr0, [\base],#33*4		    @ FLDMIAX \base!, {d0-d15}
 #else
 	LDC	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d0-d15}
+#endif
+#ifdef CONFIG_VFPv3
+	LDCL	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
 #endif
 	.endm
@@ -29,5 +32,8 @@
 	STC	p11, cr0, [\base],#33*4		    @ FSTMIAX \base!, {d0-d15}
 #else
 	STC	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d0-d15}
+#endif
+#ifdef CONFIG_VFPv3
+	STCL	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
 #endif
 	.endm