Commit e2a81baf authored by Jeremy Fitzhardinge's avatar Jeremy Fitzhardinge Committed by Ingo Molnar

xen: support sysenter/sysexit if hypervisor does

64-bit Xen supports sysenter for 32-bit guests, so support its
use.  (sysenter is faster than int $0x80 in 32-on-64.)

sysexit is still not supported, so we fake it up using iret.
Signed-off-by: default avatarJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent aa380c82
...@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper) ...@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
ENDPROC(kernel_thread_helper) ENDPROC(kernel_thread_helper)
#ifdef CONFIG_XEN #ifdef CONFIG_XEN
/* Xen doesn't set %esp to be precisely what the normal sysenter
entrypoint expects, so fix it up before using the normal path. */
ENTRY(xen_sysenter_target)
RING0_INT_FRAME
addl $5*4, %esp /* remove xen-provided frame */
jmp sysenter_past_esp
ENTRY(xen_hypervisor_callback) ENTRY(xen_hypervisor_callback)
CFI_STARTPROC CFI_STARTPROC
pushl $0 pushl $0
...@@ -1036,8 +1043,17 @@ ENTRY(xen_hypervisor_callback) ...@@ -1036,8 +1043,17 @@ ENTRY(xen_hypervisor_callback)
jae 1f jae 1f
call xen_iret_crit_fixup call xen_iret_crit_fixup
jmp 2f
1: cmpl $xen_sysexit_start_crit,%eax
jb 2f
cmpl $xen_sysexit_end_crit,%eax
jae 2f
jmp xen_sysexit_crit_fixup
1: mov %esp, %eax ENTRY(xen_do_upcall)
2: mov %esp, %eax
call xen_evtchn_do_upcall call xen_evtchn_do_upcall
jmp ret_from_intr jmp ret_from_intr
CFI_ENDPROC CFI_ENDPROC
......
...@@ -155,7 +155,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, ...@@ -155,7 +155,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
if (*ax == 1) if (*ax == 1)
maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
(1 << X86_FEATURE_ACPI) | /* disable ACPI */ (1 << X86_FEATURE_ACPI) | /* disable ACPI */
(1 << X86_FEATURE_SEP) | /* disable SEP */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */ (1 << X86_FEATURE_ACC)); /* thermal monitoring */
asm(XEN_EMULATE_PREFIX "cpuid" asm(XEN_EMULATE_PREFIX "cpuid"
...@@ -994,7 +993,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { ...@@ -994,7 +993,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.read_pmc = native_read_pmc, .read_pmc = native_read_pmc,
.iret = xen_iret, .iret = xen_iret,
.irq_enable_syscall_ret = NULL, /* never called */ .irq_enable_syscall_ret = xen_sysexit,
.load_tr_desc = paravirt_nop, .load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt, .set_ldt = xen_set_ldt,
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include <xen/interface/callback.h>
#include <xen/interface/physdev.h> #include <xen/interface/physdev.h>
#include <xen/features.h> #include <xen/features.h>
...@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void) ...@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
} }
void xen_enable_sysenter(void)
{
int cpu = smp_processor_id();
extern void xen_sysenter_target(void);
/* Mask events on entry, even though they get enabled immediately */
static struct callback_register sysenter = {
.type = CALLBACKTYPE_sysenter,
.address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
.flags = CALLBACKF_mask_events,
};
if (!boot_cpu_has(X86_FEATURE_SEP) ||
HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
}
}
void __init xen_arch_setup(void) void __init xen_arch_setup(void)
{ {
struct physdev_set_iopl set_iopl; struct physdev_set_iopl set_iopl;
...@@ -82,6 +101,8 @@ void __init xen_arch_setup(void) ...@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
__KERNEL_CS, (unsigned long)xen_failsafe_callback); __KERNEL_CS, (unsigned long)xen_failsafe_callback);
xen_enable_sysenter();
set_iopl.iopl = 1; set_iopl.iopl = 1;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
if (rc != 0) if (rc != 0)
......
...@@ -72,6 +72,7 @@ static __cpuinit void cpu_bringup_and_idle(void) ...@@ -72,6 +72,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
int cpu = smp_processor_id(); int cpu = smp_processor_id();
cpu_init(); cpu_init();
xen_enable_sysenter();
preempt_disable(); preempt_disable();
per_cpu(cpu_state, cpu) = CPU_ONLINE; per_cpu(cpu_state, cpu) = CPU_ONLINE;
......
...@@ -280,6 +280,62 @@ ENTRY(xen_iret_crit_fixup) ...@@ -280,6 +280,62 @@ ENTRY(xen_iret_crit_fixup)
2: ret 2: ret
ENTRY(xen_sysexit)
/* Store vcpu_info pointer for easy access. Do it this
way to avoid having to reload %fs */
#ifdef CONFIG_SMP
GET_THREAD_INFO(%eax)
movl TI_cpu(%eax),%eax
movl __per_cpu_offset(,%eax,4),%eax
mov per_cpu__xen_vcpu(%eax),%eax
#else
movl per_cpu__xen_vcpu, %eax
#endif
/* We can't actually use sysexit in a pv guest,
so fake it up with iret */
pushl $__USER_DS /* user stack segment */
pushl %ecx /* user esp */
pushl PT_EFLAGS+2*4(%esp) /* user eflags */
pushl $__USER_CS /* user code segment */
pushl %edx /* user eip */
xen_sysexit_start_crit:
/* Unmask events... */
movb $0, XEN_vcpu_info_mask(%eax)
/* ...and test for pending.
There's a preempt window here, but it doesn't
matter because we're within the critical section. */
testb $0xff, XEN_vcpu_info_pending(%eax)
/* If there's something pending, mask events again so we
can directly inject it back into the kernel. */
jnz 1f
movl PT_EAX+5*4(%esp),%eax
2: iret
1: movb $1, XEN_vcpu_info_mask(%eax)
xen_sysexit_end_crit:
addl $5*4, %esp /* remove iret frame */
/* no need to re-save regs, but need to restore kernel %fs */
mov $__KERNEL_PERCPU, %eax
mov %eax, %fs
jmp xen_do_upcall
.section __ex_table,"a"
.align 4
.long 2b,iret_exc
.previous
.globl xen_sysexit_start_crit, xen_sysexit_end_crit
/*
sysexit fixup is easy, since the old frame is still sitting there
on the stack. We just need to remove the new recursive
interrupt and return.
*/
ENTRY(xen_sysexit_crit_fixup)
addl $PT_OLDESP+5*4, %esp /* remove frame+iret */
jmp xen_do_upcall
/* /*
Force an event check by making a hypercall, Force an event check by making a hypercall,
but preserve regs before making the call. but preserve regs before making the call.
......
...@@ -19,6 +19,7 @@ extern struct shared_info *HYPERVISOR_shared_info; ...@@ -19,6 +19,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
char * __init xen_memory_setup(void); char * __init xen_memory_setup(void);
void __init xen_arch_setup(void); void __init xen_arch_setup(void);
void __init xen_init_IRQ(void); void __init xen_init_IRQ(void);
void xen_enable_sysenter(void);
void xen_setup_timer(int cpu); void xen_setup_timer(int cpu);
void xen_setup_cpu_clockevents(void); void xen_setup_cpu_clockevents(void);
...@@ -64,4 +65,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void); ...@@ -64,4 +65,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
DECL_ASM(void, xen_restore_fl_direct, unsigned long); DECL_ASM(void, xen_restore_fl_direct, unsigned long);
void xen_iret(void); void xen_iret(void);
void xen_sysexit(void);
#endif /* XEN_OPS_H */ #endif /* XEN_OPS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment