Commit 6cd8e300 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits)
  KVM: Prevent overflow in largepages calculation
  KVM: Disable large pages on misaligned memory slots
  KVM: Add VT-x machine check support
  KVM: VMX: Rename rmode.active to rmode.vm86_active
  KVM: Move "exit due to NMI" handling into vmx_complete_interrupts()
  KVM: Disable CR8 intercept if tpr patching is active
  KVM: Do not migrate pending software interrupts.
  KVM: inject NMI after IRET from a previous NMI, not before.
  KVM: Always request IRQ/NMI window if an interrupt is pending
  KVM: Do not re-execute INTn instruction.
  KVM: skip_emulated_instruction() decode instruction if size is not known
  KVM: Remove irq_pending bitmap
  KVM: Do not allow interrupt injection from userspace if there is a pending event.
  KVM: Unprotect a page if #PF happens during NMI injection.
  KVM: s390: Verify memory in kvm run
  KVM: s390: Sanity check on validity intercept
  KVM: s390: Unlink vcpu on destroy - v2
  KVM: s390: optimize float int lock: spin_lock_bh --> spin_lock
  KVM: s390: use hrtimer for clock wakeup from idle - v2
  KVM: s390: Fix memory slot versus run - v3
  ...
parents ddbb8684 09f8ca74
...@@ -371,6 +371,7 @@ struct kvm_vcpu_arch { ...@@ -371,6 +371,7 @@ struct kvm_vcpu_arch {
int last_run_cpu; int last_run_cpu;
int vmm_tr_slot; int vmm_tr_slot;
int vm_tr_slot; int vm_tr_slot;
int sn_rtc_tr_slot;
#define KVM_MP_STATE_RUNNABLE 0 #define KVM_MP_STATE_RUNNABLE 0
#define KVM_MP_STATE_UNINITIALIZED 1 #define KVM_MP_STATE_UNINITIALIZED 1
...@@ -465,6 +466,7 @@ struct kvm_arch { ...@@ -465,6 +466,7 @@ struct kvm_arch {
unsigned long vmm_init_rr; unsigned long vmm_init_rr;
int online_vcpus; int online_vcpus;
int is_sn2;
struct kvm_ioapic *vioapic; struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat; struct kvm_vm_stat stat;
...@@ -472,6 +474,7 @@ struct kvm_arch { ...@@ -472,6 +474,7 @@ struct kvm_arch {
struct list_head assigned_dev_head; struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain; struct iommu_domain *iommu_domain;
int iommu_flags;
struct hlist_head irq_ack_notifier_list; struct hlist_head irq_ack_notifier_list;
unsigned long irq_sources_bitmap; unsigned long irq_sources_bitmap;
...@@ -578,6 +581,8 @@ struct kvm_vmm_info{ ...@@ -578,6 +581,8 @@ struct kvm_vmm_info{
kvm_vmm_entry *vmm_entry; kvm_vmm_entry *vmm_entry;
kvm_tramp_entry *tramp_entry; kvm_tramp_entry *tramp_entry;
unsigned long vmm_ivt; unsigned long vmm_ivt;
unsigned long patch_mov_ar;
unsigned long patch_mov_ar_sn2;
}; };
int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
...@@ -585,7 +590,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu); ...@@ -585,7 +590,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
void kvm_sal_emul(struct kvm_vcpu *vcpu); void kvm_sal_emul(struct kvm_vcpu *vcpu);
static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
#endif /* __ASSEMBLY__*/ #endif /* __ASSEMBLY__*/
#endif #endif
...@@ -146,6 +146,8 @@ ...@@ -146,6 +146,8 @@
#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX)
#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \
_PAGE_MA_UC)
# ifndef __ASSEMBLY__ # ifndef __ASSEMBLY__
......
...@@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = { ...@@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = {
.name = "IPI" .name = "IPI"
}; };
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
static struct irqaction resched_irqaction = { static struct irqaction resched_irqaction = {
.handler = dummy_handler, .handler = dummy_handler,
.flags = IRQF_DISABLED, .flags = IRQF_DISABLED,
......
...@@ -23,7 +23,7 @@ if VIRTUALIZATION ...@@ -23,7 +23,7 @@ if VIRTUALIZATION
config KVM config KVM
tristate "Kernel-based Virtual Machine (KVM) support" tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM && EXPERIMENTAL depends on HAVE_KVM && MODULES && EXPERIMENTAL
# for device assignment: # for device assignment:
depends on PCI depends on PCI
select PREEMPT_NOTIFIERS select PREEMPT_NOTIFIERS
......
This diff is collapsed.
...@@ -21,6 +21,9 @@ ...@@ -21,6 +21,9 @@
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <asm/sn/addrs.h>
#include <asm/sn/clksupport.h>
#include <asm/sn/shub_mmr.h>
#include "vti.h" #include "vti.h"
#include "misc.h" #include "misc.h"
...@@ -188,12 +191,35 @@ static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) ...@@ -188,12 +191,35 @@ static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
return result; return result;
} }
static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) /*
* On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
* RTC is used instead. This function patches the ratios from SAL
* to match the RTC before providing them to the guest.
*/
static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
{ {
struct pal_freq_ratio *ratio;
unsigned long sal_freq, sal_drift, factor;
result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
&sal_freq, &sal_drift);
ratio = (struct pal_freq_ratio *)&result->v2;
factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
sn_rtc_cycles_per_second;
ratio->num = 3;
ratio->den = factor;
}
static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
{
struct ia64_pal_retval result; struct ia64_pal_retval result;
PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
if (vcpu->kvm->arch.is_sn2)
sn2_patch_itc_freq_ratios(&result);
return result; return result;
} }
......
...@@ -20,6 +20,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); ...@@ -20,6 +20,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
#define kvm_apic_present(x) (true)
#endif #endif
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <asm/asmmacro.h> #include <asm/asmmacro.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/kvm_host.h>
#include "vti.h" #include "vti.h"
#include "asm-offsets.h" #include "asm-offsets.h"
...@@ -140,6 +141,35 @@ GLOBAL_ENTRY(kvm_asm_mov_from_ar) ...@@ -140,6 +141,35 @@ GLOBAL_ENTRY(kvm_asm_mov_from_ar)
;; ;;
END(kvm_asm_mov_from_ar) END(kvm_asm_mov_from_ar)
/*
* Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
* clock as it's source for emulating the ITC. This version will be
* copied on top of the original version if the host is determined to
* be an SN2.
*/
GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
extr.u r17=r25,6,7
mov r24=b0
;;
ld8 r18=[r18]
ld8 r19=[r19]
addl r20=@gprel(asm_mov_to_reg),gp
;;
add r19=r19,r18
shladd r17=r17,4,r20
;;
adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
st8 [r16] = r19
mov b0=r17
br.sptk.few b0
;;
END(kvm_asm_mov_from_ar_sn2)
// mov r1=rr[r3] // mov r1=rr[r3]
GLOBAL_ENTRY(kvm_asm_mov_from_rr) GLOBAL_ENTRY(kvm_asm_mov_from_rr)
......
...@@ -652,20 +652,25 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, ...@@ -652,20 +652,25 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
unsigned long isr, unsigned long iim) unsigned long isr, unsigned long iim)
{ {
struct kvm_vcpu *v = current_vcpu; struct kvm_vcpu *v = current_vcpu;
long psr;
if (ia64_psr(regs)->cpl == 0) { if (ia64_psr(regs)->cpl == 0) {
/* Allow hypercalls only when cpl = 0. */ /* Allow hypercalls only when cpl = 0. */
if (iim == DOMN_PAL_REQUEST) { if (iim == DOMN_PAL_REQUEST) {
local_irq_save(psr);
set_pal_call_data(v); set_pal_call_data(v);
vmm_transition(v); vmm_transition(v);
get_pal_call_result(v); get_pal_call_result(v);
vcpu_increment_iip(v); vcpu_increment_iip(v);
local_irq_restore(psr);
return; return;
} else if (iim == DOMN_SAL_REQUEST) { } else if (iim == DOMN_SAL_REQUEST) {
local_irq_save(psr);
set_sal_call_data(v); set_sal_call_data(v);
vmm_transition(v); vmm_transition(v);
get_sal_call_result(v); get_sal_call_result(v);
vcpu_increment_iip(v); vcpu_increment_iip(v);
local_irq_restore(psr);
return; return;
} }
} }
......
...@@ -788,13 +788,29 @@ void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, ...@@ -788,13 +788,29 @@ void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ setfpreg(reg, val, regs); /* FIXME: handle NATs later*/
} }
/*
* The Altix RTC is mapped specially here for the vmm module
*/
#define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
static long kvm_get_itc(struct kvm_vcpu *vcpu)
{
#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
if (kvm->arch.is_sn2)
return (*SN_RTC_BASE);
else
#endif
return ia64_getreg(_IA64_REG_AR_ITC);
}
/************************************************************************ /************************************************************************
* lsapic timer * lsapic timer
***********************************************************************/ ***********************************************************************/
u64 vcpu_get_itc(struct kvm_vcpu *vcpu) u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
{ {
unsigned long guest_itc; unsigned long guest_itc;
guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
if (guest_itc >= VMX(vcpu, last_itc)) { if (guest_itc >= VMX(vcpu, last_itc)) {
VMX(vcpu, last_itc) = guest_itc; VMX(vcpu, last_itc) = guest_itc;
...@@ -809,7 +825,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) ...@@ -809,7 +825,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
struct kvm_vcpu *v; struct kvm_vcpu *v;
struct kvm *kvm; struct kvm *kvm;
int i; int i;
long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); long itc_offset = val - kvm_get_itc(vcpu);
unsigned long vitv = VCPU(vcpu, itv); unsigned long vitv = VCPU(vcpu, itv);
kvm = (struct kvm *)KVM_VM_BASE; kvm = (struct kvm *)KVM_VM_BASE;
......
...@@ -30,6 +30,8 @@ MODULE_AUTHOR("Intel"); ...@@ -30,6 +30,8 @@ MODULE_AUTHOR("Intel");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
extern char kvm_ia64_ivt; extern char kvm_ia64_ivt;
extern char kvm_asm_mov_from_ar;
extern char kvm_asm_mov_from_ar_sn2;
extern fpswa_interface_t *vmm_fpswa_interface; extern fpswa_interface_t *vmm_fpswa_interface;
long vmm_sanity = 1; long vmm_sanity = 1;
...@@ -39,6 +41,8 @@ struct kvm_vmm_info vmm_info = { ...@@ -39,6 +41,8 @@ struct kvm_vmm_info vmm_info = {
.vmm_entry = vmm_entry, .vmm_entry = vmm_entry,
.tramp_entry = vmm_trampoline, .tramp_entry = vmm_trampoline,
.vmm_ivt = (unsigned long)&kvm_ia64_ivt, .vmm_ivt = (unsigned long)&kvm_ia64_ivt,
.patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar,
.patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2,
}; };
static int __init kvm_vmm_init(void) static int __init kvm_vmm_init(void)
......
...@@ -95,7 +95,7 @@ GLOBAL_ENTRY(kvm_vmm_panic) ...@@ -95,7 +95,7 @@ GLOBAL_ENTRY(kvm_vmm_panic)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
...@@ -249,7 +249,7 @@ ENTRY(kvm_break_fault) ...@@ -249,7 +249,7 @@ ENTRY(kvm_break_fault)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15)ssm psr.i // restore psr.i (p15)ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
...@@ -439,7 +439,7 @@ kvm_dispatch_vexirq: ...@@ -439,7 +439,7 @@ kvm_dispatch_vexirq:
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
adds r3=8,r2 // set up second base pointer adds r3=8,r2 // set up second base pointer
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
...@@ -819,7 +819,7 @@ ENTRY(kvm_dtlb_miss_dispatch) ...@@ -819,7 +819,7 @@ ENTRY(kvm_dtlb_miss_dispatch)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor_prepare),gp addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
...@@ -842,7 +842,7 @@ ENTRY(kvm_itlb_miss_dispatch) ...@@ -842,7 +842,7 @@ ENTRY(kvm_itlb_miss_dispatch)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
...@@ -871,7 +871,7 @@ ENTRY(kvm_dispatch_reflection) ...@@ -871,7 +871,7 @@ ENTRY(kvm_dispatch_reflection)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
...@@ -898,7 +898,7 @@ ENTRY(kvm_dispatch_virtualization_fault) ...@@ -898,7 +898,7 @@ ENTRY(kvm_dispatch_virtualization_fault)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor_prepare),gp addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
...@@ -920,7 +920,7 @@ ENTRY(kvm_dispatch_interrupt) ...@@ -920,7 +920,7 @@ ENTRY(kvm_dispatch_interrupt)
;; ;;
srlz.i srlz.i
;; ;;
//(p15) ssm psr.i (p15) ssm psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
...@@ -1333,7 +1333,7 @@ hostret = r24 ...@@ -1333,7 +1333,7 @@ hostret = r24
;; ;;
(p7) srlz.i (p7) srlz.i
;; ;;
//(p6) ssm psr.i (p6) ssm psr.i
;; ;;
mov rp=rpsave mov rp=rpsave
mov ar.pfs=pfssave mov ar.pfs=pfssave
......
...@@ -254,7 +254,8 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte) ...@@ -254,7 +254,8 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte)
"(p7) st8 [%2]=r9;;" "(p7) st8 [%2]=r9;;"
"ssm psr.ic;;" "ssm psr.ic;;"
"srlz.d;;" "srlz.d;;"
/* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ "ssm psr.i;;"
"srlz.d;;"
: "=r"(ret) : "r"(iha), "r"(pte):"memory"); : "=r"(ret) : "r"(iha), "r"(pte):"memory");
return ret; return ret;
......
...@@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) ...@@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
return !!(v->arch.pending_exceptions); return !!(v->arch.pending_exceptions);
} }
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{ {
return !(v->arch.msr & MSR_WE); return !(v->arch.msr & MSR_WE);
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#ifndef ASM_KVM_HOST_H #ifndef ASM_KVM_HOST_H
#define ASM_KVM_HOST_H #define ASM_KVM_HOST_H
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <asm/debug.h> #include <asm/debug.h>
#include <asm/cpuid.h> #include <asm/cpuid.h>
...@@ -210,7 +212,8 @@ struct kvm_vcpu_arch { ...@@ -210,7 +212,8 @@ struct kvm_vcpu_arch {
s390_fp_regs guest_fpregs; s390_fp_regs guest_fpregs;
unsigned int guest_acrs[NUM_ACRS]; unsigned int guest_acrs[NUM_ACRS];
struct kvm_s390_local_interrupt local_int; struct kvm_s390_local_interrupt local_int;
struct timer_list ckc_timer; struct hrtimer ckc_timer;
struct tasklet_struct tasklet;
union { union {
cpuid_t cpu_id; cpuid_t cpu_id;
u64 stidp_data; u64 stidp_data;
......
...@@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu) ...@@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu)
static int handle_validity(struct kvm_vcpu *vcpu) static int handle_validity(struct kvm_vcpu *vcpu)
{ {
int viwhy = vcpu->arch.sie_block->ipb >> 16; int viwhy = vcpu->arch.sie_block->ipb >> 16;
int rc;
vcpu->stat.exit_validity++; vcpu->stat.exit_validity++;
if (viwhy == 0x37) { if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
fault_in_pages_writeable((char __user *) <= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
rc = fault_in_pages_writeable((char __user *)
vcpu->kvm->arch.guest_origin + vcpu->kvm->arch.guest_origin +
vcpu->arch.sie_block->prefix, vcpu->arch.sie_block->prefix,
PAGE_SIZE); 2*PAGE_SIZE);
return 0; if (rc)
} /* user will receive sigsegv, exit to user */
rc = -ENOTSUPP;
} else
rc = -ENOTSUPP;
if (rc)
VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
viwhy); viwhy);
return -ENOTSUPP; return rc;
} }
static int handle_instruction(struct kvm_vcpu *vcpu) static int handle_instruction(struct kvm_vcpu *vcpu)
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#include <asm/lowcore.h> #include <asm/lowcore.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <linux/signal.h> #include <linux/signal.h>
#include "kvm-s390.h" #include "kvm-s390.h"
...@@ -299,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) ...@@ -299,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
} }
if ((!rc) && atomic_read(&fi->active)) { if ((!rc) && atomic_read(&fi->active)) {
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
list_for_each_entry(inti, &fi->list, list) list_for_each_entry(inti, &fi->list, list)
if (__interrupt_is_deliverable(vcpu, inti)) { if (__interrupt_is_deliverable(vcpu, inti)) {
rc = 1; rc = 1;
break; break;
} }
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
} }
if ((!rc) && (vcpu->arch.sie_block->ckc < if ((!rc) && (vcpu->arch.sie_block->ckc <
...@@ -318,6 +320,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) ...@@ -318,6 +320,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
return rc; return rc;
} }
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{ {
return 0; return 0;
...@@ -355,14 +363,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) ...@@ -355,14 +363,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9;
vcpu->arch.ckc_timer.expires = jiffies + sltime; hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
add_timer(&vcpu->arch.ckc_timer);
VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime);
no_timer: no_timer:
spin_lock_bh(&vcpu->arch.local_int.float_int->lock); spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock); spin_lock_bh(&vcpu->arch.local_int.lock);
add_wait_queue(&vcpu->arch.local_int.wq, &wait); add_wait_queue(&vcpu->arch.local_int.wq, &wait);
while (list_empty(&vcpu->arch.local_int.list) && while (list_empty(&vcpu->arch.local_int.list) &&
...@@ -371,33 +377,46 @@ no_timer: ...@@ -371,33 +377,46 @@ no_timer:
!signal_pending(current)) { !signal_pending(current)) {
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); spin_unlock(&vcpu->arch.local_int.float_int->lock);
vcpu_put(vcpu); vcpu_put(vcpu);
schedule(); schedule();
vcpu_load(vcpu); vcpu_load(vcpu);
spin_lock_bh(&vcpu->arch.local_int.float_int->lock); spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock); spin_lock_bh(&vcpu->arch.local_int.lock);
} }
__unset_cpu_idle(vcpu); __unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
remove_wait_queue(&vcpu->wq, &wait); remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); spin_unlock(&vcpu->arch.local_int.float_int->lock);
del_timer(&vcpu->arch.ckc_timer); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0; return 0;
} }
void kvm_s390_idle_wakeup(unsigned long data) void kvm_s390_tasklet(unsigned long parm)
{ {
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
spin_lock_bh(&vcpu->arch.local_int.lock); spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1; vcpu->arch.local_int.timer_due = 1;
if (waitqueue_active(&vcpu->arch.local_int.wq)) if (waitqueue_active(&vcpu->arch.local_int.wq))
wake_up_interruptible(&vcpu->arch.local_int.wq); wake_up_interruptible(&vcpu->arch.local_int.wq);
spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.lock);
} }
/*
* low level hrtimer wake routine. Because this runs in hardirq context
* we schedule a tasklet to do the real work.
*/
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
tasklet_schedule(&vcpu->arch.tasklet);
return HRTIMER_NORESTART;
}
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{ {
...@@ -436,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) ...@@ -436,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
if (atomic_read(&fi->active)) { if (atomic_read(&fi->active)) {
do { do {
deliver = 0; deliver = 0;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
list_for_each_entry_safe(inti, n, &fi->list, list) { list_for_each_entry_safe(inti, n, &fi->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) { if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list); list_del(&inti->list);
...@@ -447,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) ...@@ -447,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
} }
if (list_empty(&fi->list)) if (list_empty(&fi->list))
atomic_set(&fi->active, 0); atomic_set(&fi->active, 0);
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
if (deliver) { if (deliver) {
__do_deliver_interrupt(vcpu, inti); __do_deliver_interrupt(vcpu, inti);
kfree(inti); kfree(inti);
...@@ -512,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, ...@@ -512,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int; fi = &kvm->arch.float_int;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
list_add_tail(&inti->list, &fi->list); list_add_tail(&inti->list, &fi->list);
atomic_set(&fi->active, 1); atomic_set(&fi->active, 1);
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
...@@ -529,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, ...@@ -529,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
if (waitqueue_active(&li->wq)) if (waitqueue_active(&li->wq))
wake_up_interruptible(&li->wq); wake_up_interruptible(&li->wq);
spin_unlock_bh(&li->lock); spin_unlock_bh(&li->lock);
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
return 0; return 0;
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/hrtimer.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
...@@ -195,6 +196,10 @@ out_nokvm: ...@@ -195,6 +196,10 @@ out_nokvm:
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{ {
VCPU_EVENT(vcpu, 3, "%s", "free cpu"); VCPU_EVENT(vcpu, 3, "%s", "free cpu");
if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
(__u64) vcpu->arch.sie_block)
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
smp_mb();
free_page((unsigned long)(vcpu->arch.sie_block)); free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu); kvm_vcpu_uninit(vcpu);
kfree(vcpu); kfree(vcpu);
...@@ -283,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -283,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
vcpu->arch.sie_block->ecb = 2; vcpu->arch.sie_block->ecb = 2;
vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->eca = 0xC1002001U;
setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
(unsigned long) vcpu); (unsigned long) vcpu);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id); get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff; vcpu->arch.cpu_id.version = 0xff;
return 0; return 0;
...@@ -307,19 +314,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, ...@@ -307,19 +314,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->icpua = id; vcpu->arch.sie_block->icpua = id;
BUG_ON(!kvm->arch.sca); BUG_ON(!kvm->arch.sca);
BUG_ON(kvm->arch.sca->cpu[id].sda); if (!kvm->arch.sca->cpu[id].sda)
kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
else
BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
spin_lock_init(&vcpu->arch.local_int.lock); spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list); INIT_LIST_HEAD(&vcpu->arch.local_int.list);
vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.float_int = &kvm->arch.float_int;
spin_lock_bh(&kvm->arch.float_int.lock); spin_lock(&kvm->arch.float_int.lock);
kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
init_waitqueue_head(&vcpu->arch.local_int.wq); init_waitqueue_head(&vcpu->arch.local_int.wq);
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
spin_unlock_bh(&kvm->arch.float_int.lock); spin_unlock(&kvm->arch.float_int.lock);
rc = kvm_vcpu_init(vcpu, kvm, id); rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc) if (rc)
...@@ -478,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -478,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu_load(vcpu); vcpu_load(vcpu);
/* verify, that memory has been registered */
if (!vcpu->kvm->arch.guest_memsize) {
vcpu_put(vcpu);
return -EINVAL;
}
if (vcpu->sigset_active) if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
...@@ -657,6 +672,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, ...@@ -657,6 +672,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_memory_slot old, struct kvm_memory_slot old,
int user_alloc) int user_alloc)
{ {
int i;
/* A few sanity checks. We can have exactly one memory slot which has /* A few sanity checks. We can have exactly one memory slot which has
to start at guest virtual zero and which has to be located at a to start at guest virtual zero and which has to be located at a
page boundary in userland and which has to end at a page boundary. page boundary in userland and which has to end at a page boundary.
...@@ -664,7 +681,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, ...@@ -664,7 +681,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
vmas. It is okay to mmap() and munmap() stuff in this slot after vmas. It is okay to mmap() and munmap() stuff in this slot after
doing this call at any time */ doing this call at any time */
if (mem->slot) if (mem->slot || kvm->arch.guest_memsize)
return -EINVAL; return -EINVAL;
if (mem->guest_phys_addr) if (mem->guest_phys_addr)
...@@ -676,15 +693,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm, ...@@ -676,15 +693,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
if (mem->memory_size & (PAGE_SIZE - 1)) if (mem->memory_size & (PAGE_SIZE - 1))
return -EINVAL; return -EINVAL;
if (!user_alloc)
return -EINVAL;
/* lock all vcpus */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (!kvm->vcpus[i])
continue;
if (!mutex_trylock(&kvm->vcpus[i]->mutex))
goto fail_out;
}
kvm->arch.guest_origin = mem->userspace_addr; kvm->arch.guest_origin = mem->userspace_addr;
kvm->arch.guest_memsize = mem->memory_size; kvm->arch.guest_memsize = mem->memory_size;
/* FIXME: we do want to interrupt running CPUs and update their memory /* update sie control blocks, and unlock all vcpus */
configuration now to avoid race conditions. But hey, changing the for (i = 0; i < KVM_MAX_VCPUS; ++i) {
memory layout while virtual CPUs are running is usually bad if (kvm->vcpus[i]) {
programming practice. */ kvm->vcpus[i]->arch.sie_block->gmsor =
kvm->arch.guest_origin;
kvm->vcpus[i]->arch.sie_block->gmslm =
kvm->arch.guest_memsize +
kvm->arch.guest_origin +
VIRTIODESCSPACE - 1ul;
mutex_unlock(&kvm->vcpus[i]->mutex);
}
}
return 0; return 0;
fail_out:
for (; i >= 0; i--)
mutex_unlock(&kvm->vcpus[i]->mutex);
return -EINVAL;
} }
void kvm_arch_flush_shadow(struct kvm *kvm) void kvm_arch_flush_shadow(struct kvm *kvm)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#ifndef ARCH_S390_KVM_S390_H #ifndef ARCH_S390_KVM_S390_H
#define ARCH_S390_KVM_S390_H #define ARCH_S390_KVM_S390_H
#include <linux/hrtimer.h>
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
...@@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) ...@@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
} }
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_idle_wakeup(unsigned long data); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
int kvm_s390_inject_vm(struct kvm *kvm, int kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int); struct kvm_s390_interrupt *s390int);
......
...@@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) ...@@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
int cpus = 0; int cpus = 0;
int n; int n;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
for (n = 0; n < KVM_MAX_VCPUS; n++) for (n = 0; n < KVM_MAX_VCPUS; n++)
if (fi->local_int[n]) if (fi->local_int[n])
cpus++; cpus++;
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
/* deal with other level 3 hypervisors */ /* deal with other level 3 hypervisors */
if (stsi(mem, 3, 2, 2) == -ENOSYS) if (stsi(mem, 3, 2, 2) == -ENOSYS)
......
...@@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, ...@@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
if (cpu_addr >= KVM_MAX_VCPUS) if (cpu_addr >= KVM_MAX_VCPUS)
return 3; /* not operational */ return 3; /* not operational */
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
if (fi->local_int[cpu_addr] == NULL) if (fi->local_int[cpu_addr] == NULL)
rc = 3; /* not operational */ rc = 3; /* not operational */
else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
...@@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, ...@@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
*reg |= SIGP_STAT_STOPPED; *reg |= SIGP_STAT_STOPPED;
rc = 1; /* status stored */ rc = 1; /* status stored */
} }
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
return rc; return rc;
...@@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) ...@@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
inti->type = KVM_S390_INT_EMERGENCY; inti->type = KVM_S390_INT_EMERGENCY;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
li = fi->local_int[cpu_addr]; li = fi->local_int[cpu_addr];
if (li == NULL) { if (li == NULL) {
rc = 3; /* not operational */ rc = 3; /* not operational */
...@@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) ...@@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
spin_unlock_bh(&li->lock); spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */ rc = 0; /* order accepted */
unlock: unlock:
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
return rc; return rc;
} }
...@@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) ...@@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
inti->type = KVM_S390_SIGP_STOP; inti->type = KVM_S390_SIGP_STOP;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
li = fi->local_int[cpu_addr]; li = fi->local_int[cpu_addr];
if (li == NULL) { if (li == NULL) {
rc = 3; /* not operational */ rc = 3; /* not operational */
...@@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) ...@@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
spin_unlock_bh(&li->lock); spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */ rc = 0; /* order accepted */
unlock: unlock:
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
return rc; return rc;
} }
...@@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, ...@@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
if (!inti) if (!inti)
return 2; /* busy */ return 2; /* busy */
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
li = fi->local_int[cpu_addr]; li = fi->local_int[cpu_addr];
if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
...@@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, ...@@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
out_li: out_li:
spin_unlock_bh(&li->lock); spin_unlock_bh(&li->lock);
out_fi: out_fi:
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
return rc; return rc;
} }
......
...@@ -116,6 +116,8 @@ ...@@ -116,6 +116,8 @@
#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
#define X86_FEATURE_AES (4*32+25) /* AES instructions */ #define X86_FEATURE_AES (4*32+25) /* AES instructions */
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define __KVM_HAVE_MSI #define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI #define __KVM_HAVE_USER_NMI
#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
/* Architectural interrupt line count. */ /* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256 #define KVM_NR_INTERRUPTS 256
......
...@@ -185,6 +185,7 @@ union kvm_mmu_page_role { ...@@ -185,6 +185,7 @@ union kvm_mmu_page_role {
unsigned access:3; unsigned access:3;
unsigned invalid:1; unsigned invalid:1;
unsigned cr4_pge:1; unsigned cr4_pge:1;
unsigned nxe:1;
}; };
}; };
...@@ -212,7 +213,6 @@ struct kvm_mmu_page { ...@@ -212,7 +213,6 @@ struct kvm_mmu_page {
int multimapped; /* More than one parent_pte? */ int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */ int root_count; /* Currently serving as active root */
bool unsync; bool unsync;
bool global;
unsigned int unsync_children; unsigned int unsync_children;
union { union {
u64 *parent_pte; /* !multimapped */ u64 *parent_pte; /* !multimapped */
...@@ -261,13 +261,11 @@ struct kvm_mmu { ...@@ -261,13 +261,11 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role; union kvm_mmu_page_role base_role;
u64 *pae_root; u64 *pae_root;
u64 rsvd_bits_mask[2][4];
}; };
struct kvm_vcpu_arch { struct kvm_vcpu_arch {
u64 host_tsc; u64 host_tsc;
int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
/* /*
* rip and regs accesses must go through * rip and regs accesses must go through
* kvm_{register,rip}_{read,write} functions. * kvm_{register,rip}_{read,write} functions.
...@@ -286,6 +284,7 @@ struct kvm_vcpu_arch { ...@@ -286,6 +284,7 @@ struct kvm_vcpu_arch {
u64 shadow_efer; u64 shadow_efer;
u64 apic_base; u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */ struct kvm_lapic *apic; /* kernel irqchip context */
int32_t apic_arb_prio;
int mp_state; int mp_state;
int sipi_vector; int sipi_vector;
u64 ia32_misc_enable_msr; u64 ia32_misc_enable_msr;
...@@ -320,6 +319,8 @@ struct kvm_vcpu_arch { ...@@ -320,6 +319,8 @@ struct kvm_vcpu_arch {
struct kvm_pio_request pio; struct kvm_pio_request pio;
void *pio_data; void *pio_data;
u8 event_exit_inst_len;
struct kvm_queued_exception { struct kvm_queued_exception {
bool pending; bool pending;
bool has_error_code; bool has_error_code;
...@@ -329,11 +330,12 @@ struct kvm_vcpu_arch { ...@@ -329,11 +330,12 @@ struct kvm_vcpu_arch {
struct kvm_queued_interrupt { struct kvm_queued_interrupt {
bool pending; bool pending;
bool soft;
u8 nr; u8 nr;
} interrupt; } interrupt;
struct { struct {
int active; int vm86_active;
u8 save_iopl; u8 save_iopl;
struct kvm_save_segment { struct kvm_save_segment {
u16 selector; u16 selector;
...@@ -356,9 +358,9 @@ struct kvm_vcpu_arch { ...@@ -356,9 +358,9 @@ struct kvm_vcpu_arch {
unsigned int time_offset; unsigned int time_offset;
struct page *time_page; struct page *time_page;
bool singlestep; /* guest is single stepped by KVM */
bool nmi_pending; bool nmi_pending;
bool nmi_injected; bool nmi_injected;
bool nmi_window_open;
struct mtrr_state_type mtrr_state; struct mtrr_state_type mtrr_state;
u32 pat; u32 pat;
...@@ -392,15 +394,14 @@ struct kvm_arch{ ...@@ -392,15 +394,14 @@ struct kvm_arch{
*/ */
struct list_head active_mmu_pages; struct list_head active_mmu_pages;
struct list_head assigned_dev_head; struct list_head assigned_dev_head;
struct list_head oos_global_pages;
struct iommu_domain *iommu_domain; struct iommu_domain *iommu_domain;
int iommu_flags;
struct kvm_pic *vpic; struct kvm_pic *vpic;
struct kvm_ioapic *vioapic; struct kvm_ioapic *vioapic;
struct kvm_pit *vpit; struct kvm_pit *vpit;
struct hlist_head irq_ack_notifier_list; struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode; int vapics_in_nmi_mode;
int round_robin_prev_vcpu;
unsigned int tss_addr; unsigned int tss_addr;
struct page *apic_access_page; struct page *apic_access_page;
...@@ -423,7 +424,6 @@ struct kvm_vm_stat { ...@@ -423,7 +424,6 @@ struct kvm_vm_stat {
u32 mmu_recycled; u32 mmu_recycled;
u32 mmu_cache_miss; u32 mmu_cache_miss;
u32 mmu_unsync; u32 mmu_unsync;
u32 mmu_unsync_global;
u32 remote_tlb_flush; u32 remote_tlb_flush;
u32 lpages; u32 lpages;
}; };
...@@ -443,7 +443,6 @@ struct kvm_vcpu_stat { ...@@ -443,7 +443,6 @@ struct kvm_vcpu_stat {
u32 halt_exits; u32 halt_exits;
u32 halt_wakeup; u32 halt_wakeup;
u32 request_irq_exits; u32 request_irq_exits;
u32 request_nmi_exits;
u32 irq_exits; u32 irq_exits;
u32 host_state_reload; u32 host_state_reload;
u32 efer_reload; u32 efer_reload;
...@@ -511,20 +510,22 @@ struct kvm_x86_ops { ...@@ -511,20 +510,22 @@ struct kvm_x86_ops {
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
void (*patch_hypercall)(struct kvm_vcpu *vcpu, void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr); unsigned char *hypercall_addr);
int (*get_irq)(struct kvm_vcpu *vcpu); void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_irq)(struct kvm_vcpu *vcpu, int vec); void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code); bool has_error_code, u32 error_code);
bool (*exception_injected)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
void (*inject_pending_irq)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu);
void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
struct kvm_run *run); void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void); int (*get_tdp_level)(void);
int (*get_mt_mask_shift)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
}; };
extern struct kvm_x86_ops *kvm_x86_ops; extern struct kvm_x86_ops *kvm_x86_ops;
...@@ -538,7 +539,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); ...@@ -538,7 +539,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); u64 dirty_mask, u64 nx_mask, u64 x_mask);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
...@@ -552,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -552,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes); const void *val, int bytes);
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret); gpa_t addr, unsigned long *ret);
u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled; extern bool tdp_enabled;
...@@ -563,6 +565,7 @@ enum emulation_result { ...@@ -563,6 +565,7 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long cr2, u16 error_code, int emulation_type); unsigned long cr2, u16 error_code, int emulation_type);
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
...@@ -638,7 +641,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); ...@@ -638,7 +641,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
...@@ -769,6 +771,8 @@ enum { ...@@ -769,6 +771,8 @@ enum {
#define HF_GIF_MASK (1 << 0) #define HF_GIF_MASK (1 << 0)
#define HF_HIF_MASK (1 << 1) #define HF_HIF_MASK (1 << 1)
#define HF_VINTR_MASK (1 << 2) #define HF_VINTR_MASK (1 << 2)
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
/* /*
* Hardware virtualization extension instructions may fault if a * Hardware virtualization extension instructions may fault if a
...@@ -791,5 +795,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void); ...@@ -791,5 +795,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER #define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
#endif /* _ASM_X86_KVM_HOST_H */ #endif /* _ASM_X86_KVM_HOST_H */
...@@ -143,6 +143,9 @@ struct decode_cache { ...@@ -143,6 +143,9 @@ struct decode_cache {
struct fetch_cache fetch; struct fetch_cache fetch;
}; };
#define X86_SHADOW_INT_MOV_SS 1
#define X86_SHADOW_INT_STI 2
struct x86_emulate_ctxt { struct x86_emulate_ctxt {
/* Register state before/after emulation. */ /* Register state before/after emulation. */
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
...@@ -152,6 +155,9 @@ struct x86_emulate_ctxt { ...@@ -152,6 +155,9 @@ struct x86_emulate_ctxt {
int mode; int mode;
u32 cs_base; u32 cs_base;
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
/* decode cache */ /* decode cache */
struct decode_cache decode; struct decode_cache decode;
}; };
......
...@@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb { ...@@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EVTINJ_VALID_ERR (1 << 11) #define SVM_EVTINJ_VALID_ERR (1 << 11)
#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
......
...@@ -247,6 +247,7 @@ enum vmcs_field { ...@@ -247,6 +247,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36 #define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_VIOLATION 48
......
...@@ -420,6 +420,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) ...@@ -420,6 +420,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
out2: out2:
atomic_dec(&mce_entry); atomic_dec(&mce_entry);
} }
EXPORT_SYMBOL_GPL(do_machine_check);
#ifdef CONFIG_X86_MCE_INTEL #ifdef CONFIG_X86_MCE_INTEL
/*** /***
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <asm/timer.h>
#define MMU_QUEUE_SIZE 1024 #define MMU_QUEUE_SIZE 1024
...@@ -230,6 +231,9 @@ static void paravirt_ops_setup(void) ...@@ -230,6 +231,9 @@ static void paravirt_ops_setup(void)
pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
} }
#ifdef CONFIG_X86_IO_APIC
no_timer_check = 1;
#endif
} }
void __init kvm_guest_init(void) void __init kvm_guest_init(void)
......
...@@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs) ...@@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
{ {
ack_APIC_irq(); ack_APIC_irq();
inc_irq_stat(irq_resched_count); inc_irq_stat(irq_resched_count);
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
} }
void smp_call_function_interrupt(struct pt_regs *regs) void smp_call_function_interrupt(struct pt_regs *regs)
......
...@@ -50,6 +50,9 @@ config KVM_INTEL ...@@ -50,6 +50,9 @@ config KVM_INTEL
Provides support for KVM on Intel processors equipped with the VT Provides support for KVM on Intel processors equipped with the VT
extensions. extensions.
To compile this as a module, choose M here: the module
will be called kvm-intel.
config KVM_AMD config KVM_AMD
tristate "KVM for AMD processors support" tristate "KVM for AMD processors support"
depends on KVM depends on KVM
...@@ -57,6 +60,9 @@ config KVM_AMD ...@@ -57,6 +60,9 @@ config KVM_AMD
Provides support for KVM on AMD processors equipped with the AMD-V Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions. (SVM) extensions.
To compile this as a module, choose M here: the module
will be called kvm-amd.
config KVM_TRACE config KVM_TRACE
bool "KVM trace support" bool "KVM trace support"
depends on KVM && SYSFS depends on KVM && SYSFS
......
...@@ -14,7 +14,7 @@ endif ...@@ -14,7 +14,7 @@ endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
i8254.o i8254.o timer.o
obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
......
...@@ -98,6 +98,37 @@ static int pit_get_gate(struct kvm *kvm, int channel) ...@@ -98,6 +98,37 @@ static int pit_get_gate(struct kvm *kvm, int channel)
return kvm->arch.vpit->pit_state.channels[channel].gate; return kvm->arch.vpit->pit_state.channels[channel].gate;
} }
static s64 __kpit_elapsed(struct kvm *kvm)
{
s64 elapsed;
ktime_t remaining;
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
/*
* The Counter does not stop when it reaches zero. In
* Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
* the highest count, either FFFF hex for binary counting
* or 9999 for BCD counting, and continues counting.
* Modes 2 and 3 are periodic; the Counter reloads
* itself with the initial count and continues counting
* from there.
*/
remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
elapsed = mod_64(elapsed, ps->pit_timer.period);
return elapsed;
}
static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c,
int channel)
{
if (channel == 0)
return __kpit_elapsed(kvm);
return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
}
static int pit_get_count(struct kvm *kvm, int channel) static int pit_get_count(struct kvm *kvm, int channel)
{ {
struct kvm_kpit_channel_state *c = struct kvm_kpit_channel_state *c =
...@@ -107,7 +138,7 @@ static int pit_get_count(struct kvm *kvm, int channel) ...@@ -107,7 +138,7 @@ static int pit_get_count(struct kvm *kvm, int channel)
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); t = kpit_elapsed(kvm, c, channel);
d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) { switch (c->mode) {
...@@ -137,7 +168,7 @@ static int pit_get_out(struct kvm *kvm, int channel) ...@@ -137,7 +168,7 @@ static int pit_get_out(struct kvm *kvm, int channel)
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); t = kpit_elapsed(kvm, c, channel);
d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) { switch (c->mode) {
...@@ -193,28 +224,6 @@ static void pit_latch_status(struct kvm *kvm, int channel) ...@@ -193,28 +224,6 @@ static void pit_latch_status(struct kvm *kvm, int channel)
} }
} }
static int __pit_timer_fn(struct kvm_kpit_state *ps)
{
struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
struct kvm_kpit_timer *pt = &ps->pit_timer;
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
if (!pt->reinject)
atomic_set(&pt->pending, 1);
if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
hrtimer_add_expires_ns(&pt->timer, pt->period);
pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
if (pt->period)
ps->channels[0].count_load_time = ktime_get();
return (pt->period == 0 ? 0 : 1);
}
int pit_has_pending_timer(struct kvm_vcpu *vcpu) int pit_has_pending_timer(struct kvm_vcpu *vcpu)
{ {
struct kvm_pit *pit = vcpu->kvm->arch.vpit; struct kvm_pit *pit = vcpu->kvm->arch.vpit;
...@@ -235,21 +244,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) ...@@ -235,21 +244,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
spin_unlock(&ps->inject_lock); spin_unlock(&ps->inject_lock);
} }
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
{
struct kvm_kpit_state *ps;
int restart_timer = 0;
ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
restart_timer = __pit_timer_fn(ps);
if (restart_timer)
return HRTIMER_RESTART;
else
return HRTIMER_NORESTART;
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
{ {
struct kvm_pit *pit = vcpu->kvm->arch.vpit; struct kvm_pit *pit = vcpu->kvm->arch.vpit;
...@@ -263,15 +257,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) ...@@ -263,15 +257,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
hrtimer_start_expires(timer, HRTIMER_MODE_ABS); hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
} }
static void destroy_pit_timer(struct kvm_kpit_timer *pt) static void destroy_pit_timer(struct kvm_timer *pt)
{ {
pr_debug("pit: execute del timer!\n"); pr_debug("pit: execute del timer!\n");
hrtimer_cancel(&pt->timer); hrtimer_cancel(&pt->timer);
} }
static bool kpit_is_periodic(struct kvm_timer *ktimer)
{
struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
pit_timer);
return ps->is_periodic;
}
static struct kvm_timer_ops kpit_ops = {
.is_periodic = kpit_is_periodic,
};
static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
{ {
struct kvm_kpit_timer *pt = &ps->pit_timer; struct kvm_timer *pt = &ps->pit_timer;
s64 interval; s64 interval;
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
...@@ -280,8 +285,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) ...@@ -280,8 +285,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
/* TODO The new value only affected after the retriggered */ /* TODO The new value only affected after the retriggered */
hrtimer_cancel(&pt->timer); hrtimer_cancel(&pt->timer);
pt->period = (is_period == 0) ? 0 : interval; pt->period = interval;
pt->timer.function = pit_timer_fn; ps->is_periodic = is_period;
pt->timer.function = kvm_timer_fn;
pt->t_ops = &kpit_ops;
pt->kvm = ps->pit->kvm;
pt->vcpu_id = 0;
atomic_set(&pt->pending, 0); atomic_set(&pt->pending, 0);
ps->irq_ack = 1; ps->irq_ack = 1;
...@@ -298,23 +309,23 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) ...@@ -298,23 +309,23 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
/* /*
* Though spec said the state of 8254 is undefined after power-up, * The largest possible initial count is 0; this is equivalent
* seems some tricky OS like Windows XP depends on IRQ0 interrupt * to 216 for binary counting and 104 for BCD counting.
* when booting up.
* So here setting initialize rate for it, and not a specific number
*/ */
if (val == 0) if (val == 0)
val = 0x10000; val = 0x10000;
ps->channels[channel].count_load_time = ktime_get();
ps->channels[channel].count = val; ps->channels[channel].count = val;
if (channel != 0) if (channel != 0) {
ps->channels[channel].count_load_time = ktime_get();
return; return;
}
/* Two types of timer /* Two types of timer
* mode 1 is one shot, mode 2 is period, otherwise del timer */ * mode 1 is one shot, mode 2 is period, otherwise del timer */
switch (ps->channels[0].mode) { switch (ps->channels[0].mode) {
case 0:
case 1: case 1:
/* FIXME: enhance mode 4 precision */ /* FIXME: enhance mode 4 precision */
case 4: case 4:
......
...@@ -3,15 +3,6 @@ ...@@ -3,15 +3,6 @@
#include "iodev.h" #include "iodev.h"
struct kvm_kpit_timer {
struct hrtimer timer;
int irq;
s64 period; /* unit: ns */
s64 scheduled;
atomic_t pending;
bool reinject;
};
struct kvm_kpit_channel_state { struct kvm_kpit_channel_state {
u32 count; /* can be 65536 */ u32 count; /* can be 65536 */
u16 latched_count; u16 latched_count;
...@@ -30,7 +21,8 @@ struct kvm_kpit_channel_state { ...@@ -30,7 +21,8 @@ struct kvm_kpit_channel_state {
struct kvm_kpit_state { struct kvm_kpit_state {
struct kvm_kpit_channel_state channels[3]; struct kvm_kpit_channel_state channels[3];
struct kvm_kpit_timer pit_timer; struct kvm_timer pit_timer;
bool is_periodic;
u32 speaker_data_on; u32 speaker_data_on;
struct mutex lock; struct mutex lock;
struct kvm_pit *pit; struct kvm_pit *pit;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "irq.h" #include "irq.h"
#include "i8254.h" #include "i8254.h"
#include "x86.h"
/* /*
* check if there are pending timer events * check if there are pending timer events
...@@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) ...@@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{ {
struct kvm_pic *s; struct kvm_pic *s;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.pending;
if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
if (kvm_apic_accept_pic_intr(v)) { if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm); /* PIC */ s = pic_irqchip(v->kvm); /* PIC */
...@@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) ...@@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
struct kvm_pic *s; struct kvm_pic *s;
int vector; int vector;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.nr;
vector = kvm_get_apic_interrupt(v); /* APIC */ vector = kvm_get_apic_interrupt(v); /* APIC */
if (vector == -1) { if (vector == -1) {
if (kvm_apic_accept_pic_intr(v)) { if (kvm_apic_accept_pic_intr(v)) {
......
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
atomic_t pending; /* accumulated triggered timers */
bool reinject;
struct kvm_timer_ops *t_ops;
struct kvm *kvm;
int vcpu_id;
};
struct kvm_timer_ops {
bool (*is_periodic)(struct kvm_timer *);
};
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
This diff is collapsed.
...@@ -2,18 +2,15 @@ ...@@ -2,18 +2,15 @@
#define __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H
#include "iodev.h" #include "iodev.h"
#include "kvm_timer.h"
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
struct kvm_lapic { struct kvm_lapic {
unsigned long base_address; unsigned long base_address;
struct kvm_io_device dev; struct kvm_io_device dev;
struct { struct kvm_timer lapic_timer;
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count; u32 divide_count;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
struct page *regs_page; struct page *regs_page;
void *regs; void *regs;
...@@ -34,12 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); ...@@ -34,12 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
bool kvm_apic_present(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
......
This diff is collapsed.
...@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu) ...@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
return vcpu->arch.cr0 & X86_CR0_PG; return vcpu->arch.cr0 & X86_CR0_PG;
} }
static inline int is_present_pte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;
}
#endif #endif
...@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, ...@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
gfn_t table_gfn; gfn_t table_gfn;
unsigned index, pt_access, pte_access; unsigned index, pt_access, pte_access;
gpa_t pte_gpa; gpa_t pte_gpa;
int rsvd_fault = 0;
pgprintk("%s: addr %lx\n", __func__, addr); pgprintk("%s: addr %lx\n", __func__, addr);
walk: walk:
...@@ -157,6 +158,10 @@ walk: ...@@ -157,6 +158,10 @@ walk:
if (!is_present_pte(pte)) if (!is_present_pte(pte))
goto not_present; goto not_present;
rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
if (rsvd_fault)
goto access_error;
if (write_fault && !is_writeble_pte(pte)) if (write_fault && !is_writeble_pte(pte))
if (user_fault || is_write_protection(vcpu)) if (user_fault || is_write_protection(vcpu))
goto access_error; goto access_error;
...@@ -209,7 +214,6 @@ walk: ...@@ -209,7 +214,6 @@ walk:
if (ret) if (ret)
goto walk; goto walk;
pte |= PT_DIRTY_MASK; pte |= PT_DIRTY_MASK;
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
walker->ptes[walker->level - 1] = pte; walker->ptes[walker->level - 1] = pte;
} }
...@@ -233,6 +237,8 @@ err: ...@@ -233,6 +237,8 @@ err:
walker->error_code |= PFERR_USER_MASK; walker->error_code |= PFERR_USER_MASK;
if (fetch_fault) if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK; walker->error_code |= PFERR_FETCH_MASK;
if (rsvd_fault)
walker->error_code |= PFERR_RSVD_MASK;
return 0; return 0;
} }
...@@ -262,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, ...@@ -262,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
kvm_get_pfn(pfn); kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, largepage, gpte & PT_DIRTY_MASK, NULL, largepage,
gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), gpte_to_gfn(gpte), pfn, true);
pfn, true);
} }
/* /*
...@@ -297,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -297,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
user_fault, write_fault, user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK, gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, largepage, ptwrite, largepage,
gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
gw->gfn, pfn, false); gw->gfn, pfn, false);
break; break;
} }
...@@ -380,7 +384,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -380,7 +384,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
return r; return r;
/* /*
* Look up the shadow pte for the faulting address. * Look up the guest pte for the faulting address.
*/ */
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault); fetch_fault);
...@@ -586,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ...@@ -586,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++; nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, is_dirty_pte(gpte), 0, gfn,
spte_to_pfn(sp->spt[i]), true, false); spte_to_pfn(sp->spt[i]), true, false);
} }
......
This diff is collapsed.
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/hrtimer.h>
#include <asm/atomic.h>
#include "kvm_timer.h"
static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
{
int restart_timer = 0;
wait_queue_head_t *q = &vcpu->wq;
/* FIXME: this code should not know anything about vcpus */
if (!atomic_inc_and_test(&ktimer->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
if (!ktimer->reinject)
atomic_set(&ktimer->pending, 1);
if (waitqueue_active(q))
wake_up_interruptible(q);
if (ktimer->t_ops->is_periodic(ktimer)) {
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
restart_timer = 1;
}
return restart_timer;
}
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
{
int restart_timer;
struct kvm_vcpu *vcpu;
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
if (!vcpu)
return HRTIMER_NORESTART;
restart_timer = __kvm_timer_fn(vcpu, ktimer);
if (restart_timer)
return HRTIMER_RESTART;
else
return HRTIMER_NORESTART;
}
This diff is collapsed.
This diff is collapsed.
...@@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) ...@@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
vcpu->arch.exception.pending = false; vcpu->arch.exception.pending = false;
} }
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector) static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
bool soft)
{ {
vcpu->arch.interrupt.pending = true; vcpu->arch.interrupt.pending = true;
vcpu->arch.interrupt.soft = soft;
vcpu->arch.interrupt.nr = vector; vcpu->arch.interrupt.nr = vector;
} }
...@@ -19,4 +21,14 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) ...@@ -19,4 +21,14 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
vcpu->arch.interrupt.pending = false; vcpu->arch.interrupt.pending = false;
} }
static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
vcpu->arch.nmi_injected;
}
static inline bool kvm_exception_is_soft(unsigned int nr)
{
return (nr == BP_VECTOR) || (nr == OF_VECTOR);
}
#endif #endif
...@@ -59,13 +59,14 @@ ...@@ -59,13 +59,14 @@
#define SrcImm (5<<4) /* Immediate operand. */ #define SrcImm (5<<4) /* Immediate operand. */
#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
#define SrcOne (7<<4) /* Implied '1' */ #define SrcOne (7<<4) /* Implied '1' */
#define SrcMask (7<<4) #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
#define SrcMask (0xf<<4)
/* Generic ModRM decode. */ /* Generic ModRM decode. */
#define ModRM (1<<7) #define ModRM (1<<8)
/* Destination is only written; never read. */ /* Destination is only written; never read. */
#define Mov (1<<8) #define Mov (1<<9)
#define BitOp (1<<9) #define BitOp (1<<10)
#define MemAbs (1<<10) /* Memory operand is absolute displacement */ #define MemAbs (1<<11) /* Memory operand is absolute displacement */
#define String (1<<12) /* String instruction (rep capable) */ #define String (1<<12) /* String instruction (rep capable) */
#define Stack (1<<13) /* Stack instruction (push/pop) */ #define Stack (1<<13) /* Stack instruction (push/pop) */
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
...@@ -76,6 +77,7 @@ ...@@ -76,6 +77,7 @@
#define Src2CL (1<<29) #define Src2CL (1<<29)
#define Src2ImmByte (2<<29) #define Src2ImmByte (2<<29)
#define Src2One (3<<29) #define Src2One (3<<29)
#define Src2Imm16 (4<<29)
#define Src2Mask (7<<29) #define Src2Mask (7<<29)
enum { enum {
...@@ -135,11 +137,11 @@ static u32 opcode_table[256] = { ...@@ -135,11 +137,11 @@ static u32 opcode_table[256] = {
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
/* 0x70 - 0x77 */ /* 0x70 - 0x77 */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
/* 0x78 - 0x7F */ /* 0x78 - 0x7F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
/* 0x80 - 0x87 */ /* 0x80 - 0x87 */
Group | Group1_80, Group | Group1_81, Group | Group1_80, Group | Group1_81,
Group | Group1_82, Group | Group1_83, Group | Group1_82, Group | Group1_83,
...@@ -153,7 +155,8 @@ static u32 opcode_table[256] = { ...@@ -153,7 +155,8 @@ static u32 opcode_table[256] = {
/* 0x90 - 0x97 */ /* 0x90 - 0x97 */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x98 - 0x9F */ /* 0x98 - 0x9F */
0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, 0, 0, SrcImm | Src2Imm16, 0,
ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
/* 0xA0 - 0xA7 */ /* 0xA0 - 0xA7 */
ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
...@@ -178,7 +181,8 @@ static u32 opcode_table[256] = { ...@@ -178,7 +181,8 @@ static u32 opcode_table[256] = {
0, ImplicitOps | Stack, 0, 0, 0, ImplicitOps | Stack, 0, 0,
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */ /* 0xC8 - 0xCF */
0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | Stack,
ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
/* 0xD0 - 0xD7 */ /* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
...@@ -187,11 +191,11 @@ static u32 opcode_table[256] = { ...@@ -187,11 +191,11 @@ static u32 opcode_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xE7 */ /* 0xE0 - 0xE7 */
0, 0, 0, 0, 0, 0, 0, 0,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, ByteOp | SrcImmUByte, SrcImmUByte,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, ByteOp | SrcImmUByte, SrcImmUByte,
/* 0xE8 - 0xEF */ /* 0xE8 - 0xEF */
ImplicitOps | Stack, SrcImm | ImplicitOps, SrcImm | Stack, SrcImm | ImplicitOps,
ImplicitOps, SrcImmByte | ImplicitOps, SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */ /* 0xF0 - 0xF7 */
...@@ -230,10 +234,8 @@ static u32 twobyte_table[256] = { ...@@ -230,10 +234,8 @@ static u32 twobyte_table[256] = {
/* 0x70 - 0x7F */ /* 0x70 - 0x7F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x80 - 0x8F */ /* 0x80 - 0x8F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x90 - 0x9F */ /* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */ /* 0xA0 - 0xA7 */
...@@ -1044,10 +1046,14 @@ done_prefixes: ...@@ -1044,10 +1046,14 @@ done_prefixes:
} }
break; break;
case SrcImmByte: case SrcImmByte:
case SrcImmUByte:
c->src.type = OP_IMM; c->src.type = OP_IMM;
c->src.ptr = (unsigned long *)c->eip; c->src.ptr = (unsigned long *)c->eip;
c->src.bytes = 1; c->src.bytes = 1;
if ((c->d & SrcMask) == SrcImmByte)
c->src.val = insn_fetch(s8, 1, c->eip); c->src.val = insn_fetch(s8, 1, c->eip);
else
c->src.val = insn_fetch(u8, 1, c->eip);
break; break;
case SrcOne: case SrcOne:
c->src.bytes = 1; c->src.bytes = 1;
...@@ -1072,6 +1078,12 @@ done_prefixes: ...@@ -1072,6 +1078,12 @@ done_prefixes:
c->src2.bytes = 1; c->src2.bytes = 1;
c->src2.val = insn_fetch(u8, 1, c->eip); c->src2.val = insn_fetch(u8, 1, c->eip);
break; break;
case Src2Imm16:
c->src2.type = OP_IMM;
c->src2.ptr = (unsigned long *)c->eip;
c->src2.bytes = 2;
c->src2.val = insn_fetch(u16, 2, c->eip);
break;
case Src2One: case Src2One:
c->src2.bytes = 1; c->src2.bytes = 1;
c->src2.val = 1; c->src2.val = 1;
...@@ -1349,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, ...@@ -1349,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
return 0; return 0;
} }
void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
{
u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
/*
* an sti; sti; sequence only disable interrupts for the first
* instruction. So, if the last instruction, be it emulated or
* not, left the system with the INT_STI flag enabled, it
* means that the last instruction is an sti. We should not
* leave the flag on in this case. The same goes for mov ss
*/
if (!(int_shadow & mask))
ctxt->interruptibility = mask;
}
int int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{ {
...@@ -1360,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ...@@ -1360,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
int io_dir_in; int io_dir_in;
int rc = 0; int rc = 0;
ctxt->interruptibility = 0;
/* Shadow copy of register state. Committed on successful emulation. /* Shadow copy of register state. Committed on successful emulation.
* NOTE: we can copy them from vcpu as x86_decode_insn() doesn't * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
* modify them. * modify them.
...@@ -1531,13 +1559,10 @@ special_insn: ...@@ -1531,13 +1559,10 @@ special_insn:
return -1; return -1;
} }
return 0; return 0;
case 0x70 ... 0x7f: /* jcc (short) */ { case 0x70 ... 0x7f: /* jcc (short) */
int rel = insn_fetch(s8, 1, c->eip);
if (test_cc(c->b, ctxt->eflags)) if (test_cc(c->b, ctxt->eflags))
jmp_rel(c, rel); jmp_rel(c, c->src.val);
break; break;
}
case 0x80 ... 0x83: /* Grp1 */ case 0x80 ... 0x83: /* Grp1 */
switch (c->modrm_reg) { switch (c->modrm_reg) {
case 0: case 0:
...@@ -1609,6 +1634,9 @@ special_insn: ...@@ -1609,6 +1634,9 @@ special_insn:
int err; int err;
sel = c->src.val; sel = c->src.val;
if (c->modrm_reg == VCPU_SREG_SS)
toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
if (c->modrm_reg <= 5) { if (c->modrm_reg <= 5) {
type_bits = (c->modrm_reg == 1) ? 9 : 1; type_bits = (c->modrm_reg == 1) ? 9 : 1;
err = kvm_load_segment_descriptor(ctxt->vcpu, sel, err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
...@@ -1769,59 +1797,32 @@ special_insn: ...@@ -1769,59 +1797,32 @@ special_insn:
break; break;
case 0xe4: /* inb */ case 0xe4: /* inb */
case 0xe5: /* in */ case 0xe5: /* in */
port = insn_fetch(u8, 1, c->eip); port = c->src.val;
io_dir_in = 1; io_dir_in = 1;
goto do_io; goto do_io;
case 0xe6: /* outb */ case 0xe6: /* outb */
case 0xe7: /* out */ case 0xe7: /* out */
port = insn_fetch(u8, 1, c->eip); port = c->src.val;
io_dir_in = 0; io_dir_in = 0;
goto do_io; goto do_io;
case 0xe8: /* call (near) */ { case 0xe8: /* call (near) */ {
long int rel; long int rel = c->src.val;
switch (c->op_bytes) {
case 2:
rel = insn_fetch(s16, 2, c->eip);
break;
case 4:
rel = insn_fetch(s32, 4, c->eip);
break;
default:
DPRINTF("Call: Invalid op_bytes\n");
goto cannot_emulate;
}
c->src.val = (unsigned long) c->eip; c->src.val = (unsigned long) c->eip;
jmp_rel(c, rel); jmp_rel(c, rel);
c->op_bytes = c->ad_bytes;
emulate_push(ctxt); emulate_push(ctxt);
break; break;
} }
case 0xe9: /* jmp rel */ case 0xe9: /* jmp rel */
goto jmp; goto jmp;
case 0xea: /* jmp far */ { case 0xea: /* jmp far */
uint32_t eip; if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
uint16_t sel; VCPU_SREG_CS) < 0) {
switch (c->op_bytes) {
case 2:
eip = insn_fetch(u16, 2, c->eip);
break;
case 4:
eip = insn_fetch(u32, 4, c->eip);
break;
default:
DPRINTF("jmp far: Invalid op_bytes\n");
goto cannot_emulate;
}
sel = insn_fetch(u16, 2, c->eip);
if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) {
DPRINTF("jmp far: Failed to load CS descriptor\n"); DPRINTF("jmp far: Failed to load CS descriptor\n");
goto cannot_emulate; goto cannot_emulate;
} }
c->eip = eip; c->eip = c->src.val;
break; break;
}
case 0xeb: case 0xeb:
jmp: /* jmp rel short */ jmp: /* jmp rel short */
jmp_rel(c, c->src.val); jmp_rel(c, c->src.val);
...@@ -1865,6 +1866,7 @@ special_insn: ...@@ -1865,6 +1866,7 @@ special_insn:
c->dst.type = OP_NONE; /* Disable writeback. */ c->dst.type = OP_NONE; /* Disable writeback. */
break; break;
case 0xfb: /* sti */ case 0xfb: /* sti */
toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
ctxt->eflags |= X86_EFLAGS_IF; ctxt->eflags |= X86_EFLAGS_IF;
c->dst.type = OP_NONE; /* Disable writeback. */ c->dst.type = OP_NONE; /* Disable writeback. */
break; break;
...@@ -2039,28 +2041,11 @@ twobyte_insn: ...@@ -2039,28 +2041,11 @@ twobyte_insn:
if (!test_cc(c->b, ctxt->eflags)) if (!test_cc(c->b, ctxt->eflags))
c->dst.type = OP_NONE; /* no writeback */ c->dst.type = OP_NONE; /* no writeback */
break; break;
case 0x80 ... 0x8f: /* jnz rel, etc*/ { case 0x80 ... 0x8f: /* jnz rel, etc*/
long int rel;
switch (c->op_bytes) {
case 2:
rel = insn_fetch(s16, 2, c->eip);
break;
case 4:
rel = insn_fetch(s32, 4, c->eip);
break;
case 8:
rel = insn_fetch(s64, 8, c->eip);
break;
default:
DPRINTF("jnz: Invalid op_bytes\n");
goto cannot_emulate;
}
if (test_cc(c->b, ctxt->eflags)) if (test_cc(c->b, ctxt->eflags))
jmp_rel(c, rel); jmp_rel(c, c->src.val);
c->dst.type = OP_NONE; c->dst.type = OP_NONE;
break; break;
}
case 0xa3: case 0xa3:
bt: /* bt */ bt: /* bt */
c->dst.type = OP_NONE; c->dst.type = OP_NONE;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment