Commit 6de410c2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (66 commits)
  KVM: Remove unused 'instruction_length'
  KVM: Don't require explicit indication of completion of mmio or pio
  KVM: Remove extraneous guest entry on mmio read
  KVM: SVM: Only save/restore MSRs when needed
  KVM: fix an if() condition
  KVM: VMX: Add lazy FPU support for VT
  KVM: VMX: Properly shadow the CR0 register in the vcpu struct
  KVM: Don't complain about cpu erratum AA15
  KVM: Lazy FPU support for SVM
  KVM: Allow passing 64-bit values to the emulated read/write API
  KVM: Per-vcpu statistics
  KVM: VMX: Avoid unnecessary vcpu_load()/vcpu_put() cycles
  KVM: MMU: Avoid heavy ASSERT at non debug mode.
  KVM: VMX: Only save/restore MSR_K6_STAR if necessary
  KVM: Fold drivers/kvm/kvm_vmx.h into drivers/kvm/vmx.c
  KVM: VMX: Don't switch 64-bit msrs for 32-bit guests
  KVM: VMX: Reduce unnecessary saving of host msrs
  KVM: Handle guest page faults when emulating mmio
  KVM: SVM: Report hardware exit reason to userspace instead of dmesg
  KVM: Retry sleeping allocation if atomic allocation fails
  ...
parents c6799ade 2ff81f70
...@@ -51,16 +51,19 @@ ...@@ -51,16 +51,19 @@
#define UNMAPPED_GVA (~(gpa_t)0) #define UNMAPPED_GVA (~(gpa_t)0)
#define KVM_MAX_VCPUS 1 #define KVM_MAX_VCPUS 1
#define KVM_ALIAS_SLOTS 4
#define KVM_MEMORY_SLOTS 4 #define KVM_MEMORY_SLOTS 4
#define KVM_NUM_MMU_PAGES 256 #define KVM_NUM_MMU_PAGES 256
#define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25 #define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
#define FX_IMAGE_SIZE 512 #define FX_IMAGE_SIZE 512
#define FX_IMAGE_ALIGN 16 #define FX_IMAGE_ALIGN 16
#define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
#define DE_VECTOR 0 #define DE_VECTOR 0
#define NM_VECTOR 7
#define DF_VECTOR 8 #define DF_VECTOR 8
#define TS_VECTOR 10 #define TS_VECTOR 10
#define NP_VECTOR 11 #define NP_VECTOR 11
...@@ -73,6 +76,8 @@ ...@@ -73,6 +76,8 @@
#define IOPL_SHIFT 12 #define IOPL_SHIFT 12
#define KVM_PIO_PAGE_OFFSET 1
/* /*
* Address types: * Address types:
* *
...@@ -106,6 +111,7 @@ struct kvm_pte_chain { ...@@ -106,6 +111,7 @@ struct kvm_pte_chain {
* bits 4:7 - page table level for this shadow (1-4) * bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests * bits 8:9 - page table quadrant for 2-level guests
* bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
* bits 17:18 - "access" - the user and writable bits of a huge page pde
*/ */
union kvm_mmu_page_role { union kvm_mmu_page_role {
unsigned word; unsigned word;
...@@ -115,6 +121,7 @@ union kvm_mmu_page_role { ...@@ -115,6 +121,7 @@ union kvm_mmu_page_role {
unsigned quadrant : 2; unsigned quadrant : 2;
unsigned pad_for_nice_hex_output : 6; unsigned pad_for_nice_hex_output : 6;
unsigned metaphysical : 1; unsigned metaphysical : 1;
unsigned hugepage_access : 2;
}; };
}; };
...@@ -133,7 +140,6 @@ struct kvm_mmu_page { ...@@ -133,7 +140,6 @@ struct kvm_mmu_page {
unsigned long slot_bitmap; /* One bit set per slot which has memory unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page. * in this shadow page.
*/ */
int global; /* Set if all ptes in this page are global */
int multimapped; /* More than one parent_pte? */ int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */ int root_count; /* Currently serving as active root */
union { union {
...@@ -219,6 +225,34 @@ enum { ...@@ -219,6 +225,34 @@ enum {
VCPU_SREG_LDTR, VCPU_SREG_LDTR,
}; };
struct kvm_pio_request {
unsigned long count;
int cur_count;
struct page *guest_pages[2];
unsigned guest_page_offset;
int in;
int size;
int string;
int down;
int rep;
};
struct kvm_stat {
u32 pf_fixed;
u32 pf_guest;
u32 tlb_flush;
u32 invlpg;
u32 exits;
u32 io_exits;
u32 mmio_exits;
u32 signal_exits;
u32 irq_window_exits;
u32 halt_exits;
u32 request_irq_exits;
u32 irq_exits;
};
struct kvm_vcpu { struct kvm_vcpu {
struct kvm *kvm; struct kvm *kvm;
union { union {
...@@ -228,6 +262,8 @@ struct kvm_vcpu { ...@@ -228,6 +262,8 @@ struct kvm_vcpu {
struct mutex mutex; struct mutex mutex;
int cpu; int cpu;
int launched; int launched;
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open; int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
...@@ -266,6 +302,7 @@ struct kvm_vcpu { ...@@ -266,6 +302,7 @@ struct kvm_vcpu {
char fx_buf[FX_BUF_SIZE]; char fx_buf[FX_BUF_SIZE];
char *host_fx_image; char *host_fx_image;
char *guest_fx_image; char *guest_fx_image;
int fpu_active;
int mmio_needed; int mmio_needed;
int mmio_read_completed; int mmio_read_completed;
...@@ -273,6 +310,14 @@ struct kvm_vcpu { ...@@ -273,6 +310,14 @@ struct kvm_vcpu {
int mmio_size; int mmio_size;
unsigned char mmio_data[8]; unsigned char mmio_data[8];
gpa_t mmio_phys_addr; gpa_t mmio_phys_addr;
gva_t mmio_fault_cr2;
struct kvm_pio_request pio;
void *pio_data;
int sigset_active;
sigset_t sigset;
struct kvm_stat stat;
struct { struct {
int active; int active;
...@@ -284,6 +329,15 @@ struct kvm_vcpu { ...@@ -284,6 +329,15 @@ struct kvm_vcpu {
u32 ar; u32 ar;
} tr, es, ds, fs, gs; } tr, es, ds, fs, gs;
} rmode; } rmode;
int cpuid_nent;
struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
};
struct kvm_mem_alias {
gfn_t base_gfn;
unsigned long npages;
gfn_t target_gfn;
}; };
struct kvm_memory_slot { struct kvm_memory_slot {
...@@ -296,6 +350,8 @@ struct kvm_memory_slot { ...@@ -296,6 +350,8 @@ struct kvm_memory_slot {
struct kvm { struct kvm {
spinlock_t lock; /* protects everything except vcpus */ spinlock_t lock; /* protects everything except vcpus */
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int nmemslots; int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
/* /*
...@@ -312,22 +368,6 @@ struct kvm { ...@@ -312,22 +368,6 @@ struct kvm {
struct file *filp; struct file *filp;
}; };
struct kvm_stat {
u32 pf_fixed;
u32 pf_guest;
u32 tlb_flush;
u32 invlpg;
u32 exits;
u32 io_exits;
u32 mmio_exits;
u32 signal_exits;
u32 irq_window_exits;
u32 halt_exits;
u32 request_irq_exits;
u32 irq_exits;
};
struct descriptor_table { struct descriptor_table {
u16 limit; u16 limit;
unsigned long base; unsigned long base;
...@@ -358,10 +398,8 @@ struct kvm_arch_ops { ...@@ -358,10 +398,8 @@ struct kvm_arch_ops {
void (*set_segment)(struct kvm_vcpu *vcpu, void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg); struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
...@@ -391,7 +429,6 @@ struct kvm_arch_ops { ...@@ -391,7 +429,6 @@ struct kvm_arch_ops {
unsigned char *hypercall_addr); unsigned char *hypercall_addr);
}; };
extern struct kvm_stat kvm_stat;
extern struct kvm_arch_ops *kvm_arch_ops; extern struct kvm_arch_ops *kvm_arch_ops;
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
...@@ -400,28 +437,29 @@ extern struct kvm_arch_ops *kvm_arch_ops; ...@@ -400,28 +437,29 @@ extern struct kvm_arch_ops *kvm_arch_ops;
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
void kvm_exit_arch(void); void kvm_exit_arch(void);
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu); void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
void kvm_mmu_zap_all(struct kvm_vcpu *vcpu);
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1) #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_emulator_want_group7_invlpg(void); void kvm_emulator_want_group7_invlpg(void);
extern hpa_t bad_page_address; extern hpa_t bad_page_address;
static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn) struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
{
return slot->phys_mem[gfn - slot->base_gfn];
}
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
...@@ -444,6 +482,10 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, ...@@ -444,6 +482,10 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value,
struct x86_emulate_ctxt; struct x86_emulate_ctxt;
int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int string, int down,
gva_t address, int rep, unsigned port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
int emulate_clts(struct kvm_vcpu *vcpu); int emulate_clts(struct kvm_vcpu *vcpu);
int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
...@@ -493,12 +535,6 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, ...@@ -493,12 +535,6 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
return vcpu->mmu.page_fault(vcpu, gva, error_code); return vcpu->mmu.page_fault(vcpu, gva, error_code);
} }
static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL;
}
static inline int is_long_mode(struct kvm_vcpu *vcpu) static inline int is_long_mode(struct kvm_vcpu *vcpu)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
...@@ -51,27 +51,27 @@ static DEFINE_SPINLOCK(kvm_lock); ...@@ -51,27 +51,27 @@ static DEFINE_SPINLOCK(kvm_lock);
static LIST_HEAD(vm_list); static LIST_HEAD(vm_list);
struct kvm_arch_ops *kvm_arch_ops; struct kvm_arch_ops *kvm_arch_ops;
struct kvm_stat kvm_stat;
EXPORT_SYMBOL_GPL(kvm_stat); #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
static struct kvm_stats_debugfs_item { static struct kvm_stats_debugfs_item {
const char *name; const char *name;
u32 *data; int offset;
struct dentry *dentry; struct dentry *dentry;
} debugfs_entries[] = { } debugfs_entries[] = {
{ "pf_fixed", &kvm_stat.pf_fixed }, { "pf_fixed", STAT_OFFSET(pf_fixed) },
{ "pf_guest", &kvm_stat.pf_guest }, { "pf_guest", STAT_OFFSET(pf_guest) },
{ "tlb_flush", &kvm_stat.tlb_flush }, { "tlb_flush", STAT_OFFSET(tlb_flush) },
{ "invlpg", &kvm_stat.invlpg }, { "invlpg", STAT_OFFSET(invlpg) },
{ "exits", &kvm_stat.exits }, { "exits", STAT_OFFSET(exits) },
{ "io_exits", &kvm_stat.io_exits }, { "io_exits", STAT_OFFSET(io_exits) },
{ "mmio_exits", &kvm_stat.mmio_exits }, { "mmio_exits", STAT_OFFSET(mmio_exits) },
{ "signal_exits", &kvm_stat.signal_exits }, { "signal_exits", STAT_OFFSET(signal_exits) },
{ "irq_window", &kvm_stat.irq_window_exits }, { "irq_window", STAT_OFFSET(irq_window_exits) },
{ "halt_exits", &kvm_stat.halt_exits }, { "halt_exits", STAT_OFFSET(halt_exits) },
{ "request_irq", &kvm_stat.request_irq_exits }, { "request_irq", STAT_OFFSET(request_irq_exits) },
{ "irq_exits", &kvm_stat.irq_exits }, { "irq_exits", STAT_OFFSET(irq_exits) },
{ NULL, NULL } { NULL }
}; };
static struct dentry *debugfs_dir; static struct dentry *debugfs_dir;
...@@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm) ...@@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm)
kvm_free_physmem_slot(&kvm->memslots[i], NULL); kvm_free_physmem_slot(&kvm->memslots[i], NULL);
} }
static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
{
int i;
for (i = 0; i < 2; ++i)
if (vcpu->pio.guest_pages[i]) {
__free_page(vcpu->pio.guest_pages[i]);
vcpu->pio.guest_pages[i] = NULL;
}
}
static void kvm_free_vcpu(struct kvm_vcpu *vcpu) static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
{ {
if (!vcpu->vmcs) if (!vcpu->vmcs)
...@@ -355,6 +366,11 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) ...@@ -355,6 +366,11 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
kvm_mmu_destroy(vcpu); kvm_mmu_destroy(vcpu);
vcpu_put(vcpu); vcpu_put(vcpu);
kvm_arch_ops->vcpu_free(vcpu); kvm_arch_ops->vcpu_free(vcpu);
free_page((unsigned long)vcpu->run);
vcpu->run = NULL;
free_page((unsigned long)vcpu->pio_data);
vcpu->pio_data = NULL;
free_pio_guest_pages(vcpu);
} }
static void kvm_free_vcpus(struct kvm *kvm) static void kvm_free_vcpus(struct kvm *kvm)
...@@ -404,12 +420,12 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) ...@@ -404,12 +420,12 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
u64 pdpte; u64 pdpte;
u64 *pdpt; u64 *pdpt;
int ret; int ret;
struct kvm_memory_slot *memslot; struct page *page;
spin_lock(&vcpu->kvm->lock); spin_lock(&vcpu->kvm->lock);
memslot = gfn_to_memslot(vcpu->kvm, pdpt_gfn); page = gfn_to_page(vcpu->kvm, pdpt_gfn);
/* FIXME: !memslot - emulate? 0xff? */ /* FIXME: !page - emulate? 0xff? */
pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0); pdpt = kmap_atomic(page, KM_USER0);
ret = 1; ret = 1;
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
...@@ -494,7 +510,6 @@ EXPORT_SYMBOL_GPL(set_cr0); ...@@ -494,7 +510,6 @@ EXPORT_SYMBOL_GPL(set_cr0);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
{ {
kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
} }
EXPORT_SYMBOL_GPL(lmsw); EXPORT_SYMBOL_GPL(lmsw);
...@@ -830,7 +845,73 @@ out: ...@@ -830,7 +845,73 @@ out:
return r; return r;
} }
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) /*
* Set a new alias region. Aliases map a portion of physical memory into
* another portion. This is useful for memory windows, for example the PC
* VGA region.
*/
static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
struct kvm_memory_alias *alias)
{
int r, n;
struct kvm_mem_alias *p;
r = -EINVAL;
/* General sanity checks */
if (alias->memory_size & (PAGE_SIZE - 1))
goto out;
if (alias->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
if (alias->slot >= KVM_ALIAS_SLOTS)
goto out;
if (alias->guest_phys_addr + alias->memory_size
< alias->guest_phys_addr)
goto out;
if (alias->target_phys_addr + alias->memory_size
< alias->target_phys_addr)
goto out;
spin_lock(&kvm->lock);
p = &kvm->aliases[alias->slot];
p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
p->npages = alias->memory_size >> PAGE_SHIFT;
p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
for (n = KVM_ALIAS_SLOTS; n > 0; --n)
if (kvm->aliases[n - 1].npages)
break;
kvm->naliases = n;
spin_unlock(&kvm->lock);
vcpu_load(&kvm->vcpus[0]);
spin_lock(&kvm->lock);
kvm_mmu_zap_all(&kvm->vcpus[0]);
spin_unlock(&kvm->lock);
vcpu_put(&kvm->vcpus[0]);
return 0;
out:
return r;
}
static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
{
int i;
struct kvm_mem_alias *alias;
for (i = 0; i < kvm->naliases; ++i) {
alias = &kvm->aliases[i];
if (gfn >= alias->base_gfn
&& gfn < alias->base_gfn + alias->npages)
return alias->target_gfn + gfn - alias->base_gfn;
}
return gfn;
}
static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{ {
int i; int i;
...@@ -843,7 +924,24 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) ...@@ -843,7 +924,24 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
} }
return NULL; return NULL;
} }
EXPORT_SYMBOL_GPL(gfn_to_memslot);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
gfn = unalias_gfn(kvm, gfn);
return __gfn_to_memslot(kvm, gfn);
}
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot;
gfn = unalias_gfn(kvm, gfn);
slot = __gfn_to_memslot(kvm, gfn);
if (!slot)
return NULL;
return slot->phys_mem[gfn - slot->base_gfn];
}
EXPORT_SYMBOL_GPL(gfn_to_page);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn) void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
{ {
...@@ -871,7 +969,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) ...@@ -871,7 +969,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
} }
static int emulator_read_std(unsigned long addr, static int emulator_read_std(unsigned long addr,
unsigned long *val, void *val,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt *ctxt) struct x86_emulate_ctxt *ctxt)
{ {
...@@ -883,20 +981,20 @@ static int emulator_read_std(unsigned long addr, ...@@ -883,20 +981,20 @@ static int emulator_read_std(unsigned long addr,
unsigned offset = addr & (PAGE_SIZE-1); unsigned offset = addr & (PAGE_SIZE-1);
unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
unsigned long pfn; unsigned long pfn;
struct kvm_memory_slot *memslot; struct page *page;
void *page; void *page_virt;
if (gpa == UNMAPPED_GVA) if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT; return X86EMUL_PROPAGATE_FAULT;
pfn = gpa >> PAGE_SHIFT; pfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(vcpu->kvm, pfn); page = gfn_to_page(vcpu->kvm, pfn);
if (!memslot) if (!page)
return X86EMUL_UNHANDLEABLE; return X86EMUL_UNHANDLEABLE;
page = kmap_atomic(gfn_to_page(memslot, pfn), KM_USER0); page_virt = kmap_atomic(page, KM_USER0);
memcpy(data, page + offset, tocopy); memcpy(data, page_virt + offset, tocopy);
kunmap_atomic(page, KM_USER0); kunmap_atomic(page_virt, KM_USER0);
bytes -= tocopy; bytes -= tocopy;
data += tocopy; data += tocopy;
...@@ -907,7 +1005,7 @@ static int emulator_read_std(unsigned long addr, ...@@ -907,7 +1005,7 @@ static int emulator_read_std(unsigned long addr,
} }
static int emulator_write_std(unsigned long addr, static int emulator_write_std(unsigned long addr,
unsigned long val, const void *val,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt *ctxt) struct x86_emulate_ctxt *ctxt)
{ {
...@@ -917,7 +1015,7 @@ static int emulator_write_std(unsigned long addr, ...@@ -917,7 +1015,7 @@ static int emulator_write_std(unsigned long addr,
} }
static int emulator_read_emulated(unsigned long addr, static int emulator_read_emulated(unsigned long addr,
unsigned long *val, void *val,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt *ctxt) struct x86_emulate_ctxt *ctxt)
{ {
...@@ -945,37 +1043,37 @@ static int emulator_read_emulated(unsigned long addr, ...@@ -945,37 +1043,37 @@ static int emulator_read_emulated(unsigned long addr,
} }
static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
unsigned long val, int bytes) const void *val, int bytes)
{ {
struct kvm_memory_slot *m;
struct page *page; struct page *page;
void *virt; void *virt;
if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
return 0; return 0;
m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
if (!m) if (!page)
return 0; return 0;
page = gfn_to_page(m, gpa >> PAGE_SHIFT);
kvm_mmu_pre_write(vcpu, gpa, bytes); kvm_mmu_pre_write(vcpu, gpa, bytes);
mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
virt = kmap_atomic(page, KM_USER0); virt = kmap_atomic(page, KM_USER0);
memcpy(virt + offset_in_page(gpa), &val, bytes); memcpy(virt + offset_in_page(gpa), val, bytes);
kunmap_atomic(virt, KM_USER0); kunmap_atomic(virt, KM_USER0);
kvm_mmu_post_write(vcpu, gpa, bytes); kvm_mmu_post_write(vcpu, gpa, bytes);
return 1; return 1;
} }
static int emulator_write_emulated(unsigned long addr, static int emulator_write_emulated(unsigned long addr,
unsigned long val, const void *val,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt *ctxt) struct x86_emulate_ctxt *ctxt)
{ {
struct kvm_vcpu *vcpu = ctxt->vcpu; struct kvm_vcpu *vcpu = ctxt->vcpu;
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA) if (gpa == UNMAPPED_GVA) {
kvm_arch_ops->inject_page_fault(vcpu, addr, 2);
return X86EMUL_PROPAGATE_FAULT; return X86EMUL_PROPAGATE_FAULT;
}
if (emulator_write_phys(vcpu, gpa, val, bytes)) if (emulator_write_phys(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
...@@ -984,14 +1082,14 @@ static int emulator_write_emulated(unsigned long addr, ...@@ -984,14 +1082,14 @@ static int emulator_write_emulated(unsigned long addr,
vcpu->mmio_phys_addr = gpa; vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes; vcpu->mmio_size = bytes;
vcpu->mmio_is_write = 1; vcpu->mmio_is_write = 1;
memcpy(vcpu->mmio_data, &val, bytes); memcpy(vcpu->mmio_data, val, bytes);
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
static int emulator_cmpxchg_emulated(unsigned long addr, static int emulator_cmpxchg_emulated(unsigned long addr,
unsigned long old, const void *old,
unsigned long new, const void *new,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt *ctxt) struct x86_emulate_ctxt *ctxt)
{ {
...@@ -1004,30 +1102,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr, ...@@ -1004,30 +1102,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
return emulator_write_emulated(addr, new, bytes, ctxt); return emulator_write_emulated(addr, new, bytes, ctxt);
} }
#ifdef CONFIG_X86_32
static int emulator_cmpxchg8b_emulated(unsigned long addr,
unsigned long old_lo,
unsigned long old_hi,
unsigned long new_lo,
unsigned long new_hi,
struct x86_emulate_ctxt *ctxt)
{
static int reported;
int r;
if (!reported) {
reported = 1;
printk(KERN_WARNING "kvm: emulating exchange8b as write\n");
}
r = emulator_write_emulated(addr, new_lo, 4, ctxt);
if (r != X86EMUL_CONTINUE)
return r;
return emulator_write_emulated(addr+4, new_hi, 4, ctxt);
}
#endif
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{ {
return kvm_arch_ops->get_segment_base(vcpu, seg); return kvm_arch_ops->get_segment_base(vcpu, seg);
...@@ -1042,7 +1116,6 @@ int emulate_clts(struct kvm_vcpu *vcpu) ...@@ -1042,7 +1116,6 @@ int emulate_clts(struct kvm_vcpu *vcpu)
{ {
unsigned long cr0; unsigned long cr0;
kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
cr0 = vcpu->cr0 & ~CR0_TS_MASK; cr0 = vcpu->cr0 & ~CR0_TS_MASK;
kvm_arch_ops->set_cr0(vcpu, cr0); kvm_arch_ops->set_cr0(vcpu, cr0);
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
...@@ -1102,9 +1175,6 @@ struct x86_emulate_ops emulate_ops = { ...@@ -1102,9 +1175,6 @@ struct x86_emulate_ops emulate_ops = {
.read_emulated = emulator_read_emulated, .read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated, .write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated,
#ifdef CONFIG_X86_32
.cmpxchg8b_emulated = emulator_cmpxchg8b_emulated,
#endif
}; };
int emulate_instruction(struct kvm_vcpu *vcpu, int emulate_instruction(struct kvm_vcpu *vcpu,
...@@ -1116,6 +1186,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, ...@@ -1116,6 +1186,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
int r; int r;
int cs_db, cs_l; int cs_db, cs_l;
vcpu->mmio_fault_cr2 = cr2;
kvm_arch_ops->cache_regs(vcpu); kvm_arch_ops->cache_regs(vcpu);
kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
...@@ -1166,8 +1237,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu, ...@@ -1166,8 +1237,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
kvm_arch_ops->decache_regs(vcpu); kvm_arch_ops->decache_regs(vcpu);
kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags);
if (vcpu->mmio_is_write) if (vcpu->mmio_is_write) {
vcpu->mmio_needed = 0;
return EMULATE_DO_MMIO; return EMULATE_DO_MMIO;
}
return EMULATE_DONE; return EMULATE_DONE;
} }
...@@ -1177,7 +1250,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -1177,7 +1250,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
{ {
unsigned long nr, a0, a1, a2, a3, a4, a5, ret; unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
kvm_arch_ops->decache_regs(vcpu); kvm_arch_ops->cache_regs(vcpu);
ret = -KVM_EINVAL; ret = -KVM_EINVAL;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
if (is_long_mode(vcpu)) { if (is_long_mode(vcpu)) {
...@@ -1201,10 +1274,19 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -1201,10 +1274,19 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
} }
switch (nr) { switch (nr) {
default: default:
; run->hypercall.args[0] = a0;
run->hypercall.args[1] = a1;
run->hypercall.args[2] = a2;
run->hypercall.args[3] = a3;
run->hypercall.args[4] = a4;
run->hypercall.args[5] = a5;
run->hypercall.ret = ret;
run->hypercall.longmode = is_long_mode(vcpu);
kvm_arch_ops->decache_regs(vcpu);
return 0;
} }
vcpu->regs[VCPU_REGS_RAX] = ret; vcpu->regs[VCPU_REGS_RAX] = ret;
kvm_arch_ops->cache_regs(vcpu); kvm_arch_ops->decache_regs(vcpu);
return 1; return 1;
} }
EXPORT_SYMBOL_GPL(kvm_hypercall); EXPORT_SYMBOL_GPL(kvm_hypercall);
...@@ -1237,7 +1319,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, ...@@ -1237,7 +1319,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
{ {
kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); kvm_arch_ops->decache_cr4_guest_bits(vcpu);
switch (cr) { switch (cr) {
case 0: case 0:
return vcpu->cr0; return vcpu->cr0;
...@@ -1442,6 +1524,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) ...@@ -1442,6 +1524,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
__FUNCTION__, data); __FUNCTION__, data);
break; break;
case MSR_IA32_MCG_STATUS:
printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
__FUNCTION__, data);
break;
case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE: case MSR_IA32_UCODE_WRITE:
case 0x200 ... 0x2ff: /* MTRRs */ case 0x200 ... 0x2ff: /* MTRRs */
...@@ -1478,6 +1564,8 @@ static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) ...@@ -1478,6 +1564,8 @@ static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
void kvm_resched(struct kvm_vcpu *vcpu) void kvm_resched(struct kvm_vcpu *vcpu)
{ {
if (!need_resched())
return;
vcpu_put(vcpu); vcpu_put(vcpu);
cond_resched(); cond_resched();
vcpu_load(vcpu); vcpu_load(vcpu);
...@@ -1502,29 +1590,250 @@ void save_msrs(struct vmx_msr_entry *e, int n) ...@@ -1502,29 +1590,250 @@ void save_msrs(struct vmx_msr_entry *e, int n)
} }
EXPORT_SYMBOL_GPL(save_msrs); EXPORT_SYMBOL_GPL(save_msrs);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
int i;
u32 function;
struct kvm_cpuid_entry *e, *best;
kvm_arch_ops->cache_regs(vcpu);
function = vcpu->regs[VCPU_REGS_RAX];
vcpu->regs[VCPU_REGS_RAX] = 0;
vcpu->regs[VCPU_REGS_RBX] = 0;
vcpu->regs[VCPU_REGS_RCX] = 0;
vcpu->regs[VCPU_REGS_RDX] = 0;
best = NULL;
for (i = 0; i < vcpu->cpuid_nent; ++i) {
e = &vcpu->cpuid_entries[i];
if (e->function == function) {
best = e;
break;
}
/*
* Both basic or both extended?
*/
if (((e->function ^ function) & 0x80000000) == 0)
if (!best || e->function > best->function)
best = e;
}
if (best) {
vcpu->regs[VCPU_REGS_RAX] = best->eax;
vcpu->regs[VCPU_REGS_RBX] = best->ebx;
vcpu->regs[VCPU_REGS_RCX] = best->ecx;
vcpu->regs[VCPU_REGS_RDX] = best->edx;
}
kvm_arch_ops->decache_regs(vcpu);
kvm_arch_ops->skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
static int pio_copy_data(struct kvm_vcpu *vcpu)
{
void *p = vcpu->pio_data;
void *q;
unsigned bytes;
int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
kvm_arch_ops->vcpu_put(vcpu);
q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
PAGE_KERNEL);
if (!q) {
kvm_arch_ops->vcpu_load(vcpu);
free_pio_guest_pages(vcpu);
return -ENOMEM;
}
q += vcpu->pio.guest_page_offset;
bytes = vcpu->pio.size * vcpu->pio.cur_count;
if (vcpu->pio.in)
memcpy(q, p, bytes);
else
memcpy(p, q, bytes);
q -= vcpu->pio.guest_page_offset;
vunmap(q);
kvm_arch_ops->vcpu_load(vcpu);
free_pio_guest_pages(vcpu);
return 0;
}
static int complete_pio(struct kvm_vcpu *vcpu)
{
struct kvm_pio_request *io = &vcpu->pio;
long delta;
int r;
kvm_arch_ops->cache_regs(vcpu);
if (!io->string) {
if (io->in)
memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
io->size);
} else {
if (io->in) {
r = pio_copy_data(vcpu);
if (r) {
kvm_arch_ops->cache_regs(vcpu);
return r;
}
}
delta = 1;
if (io->rep) {
delta *= io->cur_count;
/*
* The size of the register should really depend on
* current address size.
*/
vcpu->regs[VCPU_REGS_RCX] -= delta;
}
if (io->down)
delta = -delta;
delta *= io->size;
if (io->in)
vcpu->regs[VCPU_REGS_RDI] += delta;
else
vcpu->regs[VCPU_REGS_RSI] += delta;
}
kvm_arch_ops->decache_regs(vcpu);
io->count -= io->cur_count;
io->cur_count = 0;
if (!io->count)
kvm_arch_ops->skip_emulated_instruction(vcpu);
return 0;
}
int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int string, int down,
gva_t address, int rep, unsigned port)
{
unsigned now, in_page;
int i;
int nr_pages = 1;
struct page *page;
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = count;
vcpu->run->io.port = port;
vcpu->pio.count = count;
vcpu->pio.cur_count = count;
vcpu->pio.size = size;
vcpu->pio.in = in;
vcpu->pio.string = string;
vcpu->pio.down = down;
vcpu->pio.guest_page_offset = offset_in_page(address);
vcpu->pio.rep = rep;
if (!string) {
kvm_arch_ops->cache_regs(vcpu);
memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
kvm_arch_ops->decache_regs(vcpu);
return 0;
}
if (!count) {
kvm_arch_ops->skip_emulated_instruction(vcpu);
return 1;
}
now = min(count, PAGE_SIZE / size);
if (!down)
in_page = PAGE_SIZE - offset_in_page(address);
else
in_page = offset_in_page(address) + size;
now = min(count, (unsigned long)in_page / size);
if (!now) {
/*
* String I/O straddles page boundary. Pin two guest pages
* so that we satisfy atomicity constraints. Do just one
* transaction to avoid complexity.
*/
nr_pages = 2;
now = 1;
}
if (down) {
/*
* String I/O in reverse. Yuck. Kill the guest, fix later.
*/
printk(KERN_ERR "kvm: guest string pio down\n");
inject_gp(vcpu);
return 1;
}
vcpu->run->io.count = now;
vcpu->pio.cur_count = now;
for (i = 0; i < nr_pages; ++i) {
spin_lock(&vcpu->kvm->lock);
page = gva_to_page(vcpu, address + i * PAGE_SIZE);
if (page)
get_page(page);
vcpu->pio.guest_pages[i] = page;
spin_unlock(&vcpu->kvm->lock);
if (!page) {
inject_gp(vcpu);
free_pio_guest_pages(vcpu);
return 1;
}
}
if (!vcpu->pio.in)
return pio_copy_data(vcpu);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_setup_pio);
static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
int r; int r;
sigset_t sigsaved;
vcpu_load(vcpu); vcpu_load(vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
/* re-sync apic's tpr */ /* re-sync apic's tpr */
vcpu->cr8 = kvm_run->cr8; vcpu->cr8 = kvm_run->cr8;
if (kvm_run->emulated) { if (vcpu->pio.cur_count) {
kvm_arch_ops->skip_emulated_instruction(vcpu); r = complete_pio(vcpu);
kvm_run->emulated = 0; if (r)
goto out;
} }
if (kvm_run->mmio_completed) { if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1; vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
r = emulate_instruction(vcpu, kvm_run,
vcpu->mmio_fault_cr2, 0);
if (r == EMULATE_DO_MMIO) {
/*
* Read-modify-write. Back to userspace.
*/
kvm_run->exit_reason = KVM_EXIT_MMIO;
r = 0;
goto out;
}
} }
vcpu->mmio_needed = 0; if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
kvm_arch_ops->cache_regs(vcpu);
vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
kvm_arch_ops->decache_regs(vcpu);
}
r = kvm_arch_ops->run(vcpu, kvm_run); r = kvm_arch_ops->run(vcpu, kvm_run);
out:
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
vcpu_put(vcpu); vcpu_put(vcpu);
return r; return r;
} }
...@@ -1633,7 +1942,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, ...@@ -1633,7 +1942,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
sregs->gdt.limit = dt.limit; sregs->gdt.limit = dt.limit;
sregs->gdt.base = dt.base; sregs->gdt.base = dt.base;
kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); kvm_arch_ops->decache_cr4_guest_bits(vcpu);
sregs->cr0 = vcpu->cr0; sregs->cr0 = vcpu->cr0;
sregs->cr2 = vcpu->cr2; sregs->cr2 = vcpu->cr2;
sregs->cr3 = vcpu->cr3; sregs->cr3 = vcpu->cr3;
...@@ -1665,16 +1974,6 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, ...@@ -1665,16 +1974,6 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
vcpu_load(vcpu); vcpu_load(vcpu);
set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
dt.limit = sregs->idt.limit; dt.limit = sregs->idt.limit;
dt.base = sregs->idt.base; dt.base = sregs->idt.base;
kvm_arch_ops->set_idt(vcpu, &dt); kvm_arch_ops->set_idt(vcpu, &dt);
...@@ -1694,10 +1993,10 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, ...@@ -1694,10 +1993,10 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
#endif #endif
vcpu->apic_base = sregs->apic_base; vcpu->apic_base = sregs->apic_base;
kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); kvm_arch_ops->decache_cr4_guest_bits(vcpu);
mmu_reset_needed |= vcpu->cr0 != sregs->cr0; mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0); kvm_arch_ops->set_cr0(vcpu, sregs->cr0);
mmu_reset_needed |= vcpu->cr4 != sregs->cr4; mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
kvm_arch_ops->set_cr4(vcpu, sregs->cr4); kvm_arch_ops->set_cr4(vcpu, sregs->cr4);
...@@ -1714,6 +2013,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, ...@@ -1714,6 +2013,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
if (vcpu->irq_pending[i]) if (vcpu->irq_pending[i])
__set_bit(i, &vcpu->irq_summary); __set_bit(i, &vcpu->irq_summary);
set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
vcpu_put(vcpu); vcpu_put(vcpu);
return 0; return 0;
...@@ -1887,6 +2196,36 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, ...@@ -1887,6 +2196,36 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
return r; return r;
} }
static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
unsigned long address,
int *type)
{
struct kvm_vcpu *vcpu = vma->vm_file->private_data;
unsigned long pgoff;
struct page *page;
*type = VM_FAULT_MINOR;
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
if (pgoff == 0)
page = virt_to_page(vcpu->run);
else if (pgoff == KVM_PIO_PAGE_OFFSET)
page = virt_to_page(vcpu->pio_data);
else
return NOPAGE_SIGBUS;
get_page(page);
return page;
}
static struct vm_operations_struct kvm_vcpu_vm_ops = {
.nopage = kvm_vcpu_nopage,
};
static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
{
vma->vm_ops = &kvm_vcpu_vm_ops;
return 0;
}
static int kvm_vcpu_release(struct inode *inode, struct file *filp) static int kvm_vcpu_release(struct inode *inode, struct file *filp)
{ {
struct kvm_vcpu *vcpu = filp->private_data; struct kvm_vcpu *vcpu = filp->private_data;
...@@ -1899,6 +2238,7 @@ static struct file_operations kvm_vcpu_fops = { ...@@ -1899,6 +2238,7 @@ static struct file_operations kvm_vcpu_fops = {
.release = kvm_vcpu_release, .release = kvm_vcpu_release,
.unlocked_ioctl = kvm_vcpu_ioctl, .unlocked_ioctl = kvm_vcpu_ioctl,
.compat_ioctl = kvm_vcpu_ioctl, .compat_ioctl = kvm_vcpu_ioctl,
.mmap = kvm_vcpu_mmap,
}; };
/* /*
...@@ -1947,6 +2287,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) ...@@ -1947,6 +2287,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
{ {
int r; int r;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
struct page *page;
r = -EINVAL; r = -EINVAL;
if (!valid_vcpu(n)) if (!valid_vcpu(n))
...@@ -1961,9 +2302,22 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) ...@@ -1961,9 +2302,22 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
return -EEXIST; return -EEXIST;
} }
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
r = -ENOMEM;
if (!page)
goto out_unlock;
vcpu->run = page_address(page);
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
r = -ENOMEM;
if (!page)
goto out_free_run;
vcpu->pio_data = page_address(page);
vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
FX_IMAGE_ALIGN); FX_IMAGE_ALIGN);
vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
vcpu->cr0 = 0x10;
r = kvm_arch_ops->vcpu_create(vcpu); r = kvm_arch_ops->vcpu_create(vcpu);
if (r < 0) if (r < 0)
...@@ -1990,11 +2344,107 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) ...@@ -1990,11 +2344,107 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
out_free_vcpus: out_free_vcpus:
kvm_free_vcpu(vcpu); kvm_free_vcpu(vcpu);
out_free_run:
free_page((unsigned long)vcpu->run);
vcpu->run = NULL;
out_unlock:
mutex_unlock(&vcpu->mutex); mutex_unlock(&vcpu->mutex);
out: out:
return r; return r;
} }
static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
struct kvm_cpuid_entry __user *entries)
{
int r;
r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
goto out;
r = -EFAULT;
if (copy_from_user(&vcpu->cpuid_entries, entries,
cpuid->nent * sizeof(struct kvm_cpuid_entry)))
goto out;
vcpu->cpuid_nent = cpuid->nent;
return 0;
out:
return r;
}
static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
{
if (sigset) {
sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
vcpu->sigset_active = 1;
vcpu->sigset = *sigset;
} else
vcpu->sigset_active = 0;
return 0;
}
/*
* fxsave fpu state. Taken from x86_64/processor.h. To be killed when
* we have asm/x86/processor.h
*/
struct fxsave {
u16 cwd;
u16 swd;
u16 twd;
u16 fop;
u64 rip;
u64 rdp;
u32 mxcsr;
u32 mxcsr_mask;
u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
#ifdef CONFIG_X86_64
u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
#else
u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
#endif
};
static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image;
vcpu_load(vcpu);
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
fpu->ftwx = fxsave->twd;
fpu->last_opcode = fxsave->fop;
fpu->last_ip = fxsave->rip;
fpu->last_dp = fxsave->rdp;
memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
vcpu_put(vcpu);
return 0;
}
static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image;
vcpu_load(vcpu);
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
fxsave->swd = fpu->fsw;
fxsave->twd = fpu->ftwx;
fxsave->fop = fpu->last_opcode;
fxsave->rip = fpu->last_ip;
fxsave->rdp = fpu->last_dp;
memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
vcpu_put(vcpu);
return 0;
}
static long kvm_vcpu_ioctl(struct file *filp, static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg) unsigned int ioctl, unsigned long arg)
{ {
...@@ -2003,21 +2453,12 @@ static long kvm_vcpu_ioctl(struct file *filp, ...@@ -2003,21 +2453,12 @@ static long kvm_vcpu_ioctl(struct file *filp,
int r = -EINVAL; int r = -EINVAL;
switch (ioctl) { switch (ioctl) {
case KVM_RUN: { case KVM_RUN:
struct kvm_run kvm_run; r = -EINVAL;
if (arg)
r = -EFAULT;
if (copy_from_user(&kvm_run, argp, sizeof kvm_run))
goto out; goto out;
r = kvm_vcpu_ioctl_run(vcpu, &kvm_run); r = kvm_vcpu_ioctl_run(vcpu, vcpu->run);
if (r < 0 && r != -EINTR)
goto out;
if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) {
r = -EFAULT;
goto out;
}
break; break;
}
case KVM_GET_REGS: { case KVM_GET_REGS: {
struct kvm_regs kvm_regs; struct kvm_regs kvm_regs;
...@@ -2113,6 +2554,66 @@ static long kvm_vcpu_ioctl(struct file *filp, ...@@ -2113,6 +2554,66 @@ static long kvm_vcpu_ioctl(struct file *filp,
case KVM_SET_MSRS: case KVM_SET_MSRS:
r = msr_io(vcpu, argp, do_set_msr, 0); r = msr_io(vcpu, argp, do_set_msr, 0);
break; break;
case KVM_SET_CPUID: {
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
if (r)
goto out;
break;
}
case KVM_SET_SIGNAL_MASK: {
struct kvm_signal_mask __user *sigmask_arg = argp;
struct kvm_signal_mask kvm_sigmask;
sigset_t sigset, *p;
p = NULL;
if (argp) {
r = -EFAULT;
if (copy_from_user(&kvm_sigmask, argp,
sizeof kvm_sigmask))
goto out;
r = -EINVAL;
if (kvm_sigmask.len != sizeof sigset)
goto out;
r = -EFAULT;
if (copy_from_user(&sigset, sigmask_arg->sigset,
sizeof sigset))
goto out;
p = &sigset;
}
r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
break;
}
case KVM_GET_FPU: {
struct kvm_fpu fpu;
memset(&fpu, 0, sizeof fpu);
r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu);
if (r)
goto out;
r = -EFAULT;
if (copy_to_user(argp, &fpu, sizeof fpu))
goto out;
r = 0;
break;
}
case KVM_SET_FPU: {
struct kvm_fpu fpu;
r = -EFAULT;
if (copy_from_user(&fpu, argp, sizeof fpu))
goto out;
r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu);
if (r)
goto out;
r = 0;
break;
}
default: default:
; ;
} }
...@@ -2155,6 +2656,17 @@ static long kvm_vm_ioctl(struct file *filp, ...@@ -2155,6 +2656,17 @@ static long kvm_vm_ioctl(struct file *filp,
goto out; goto out;
break; break;
} }
case KVM_SET_MEMORY_ALIAS: {
struct kvm_memory_alias alias;
r = -EFAULT;
if (copy_from_user(&alias, argp, sizeof alias))
goto out;
r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
if (r)
goto out;
break;
}
default: default:
; ;
} }
...@@ -2168,15 +2680,11 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, ...@@ -2168,15 +2680,11 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
{ {
struct kvm *kvm = vma->vm_file->private_data; struct kvm *kvm = vma->vm_file->private_data;
unsigned long pgoff; unsigned long pgoff;
struct kvm_memory_slot *slot;
struct page *page; struct page *page;
*type = VM_FAULT_MINOR; *type = VM_FAULT_MINOR;
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
slot = gfn_to_memslot(kvm, pgoff); page = gfn_to_page(kvm, pgoff);
if (!slot)
return NOPAGE_SIGBUS;
page = gfn_to_page(slot, pgoff);
if (!page) if (!page)
return NOPAGE_SIGBUS; return NOPAGE_SIGBUS;
get_page(page); get_page(page);
...@@ -2248,13 +2756,19 @@ static long kvm_dev_ioctl(struct file *filp, ...@@ -2248,13 +2756,19 @@ static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg) unsigned int ioctl, unsigned long arg)
{ {
void __user *argp = (void __user *)arg; void __user *argp = (void __user *)arg;
int r = -EINVAL; long r = -EINVAL;
switch (ioctl) { switch (ioctl) {
case KVM_GET_API_VERSION: case KVM_GET_API_VERSION:
r = -EINVAL;
if (arg)
goto out;
r = KVM_API_VERSION; r = KVM_API_VERSION;
break; break;
case KVM_CREATE_VM: case KVM_CREATE_VM:
r = -EINVAL;
if (arg)
goto out;
r = kvm_dev_ioctl_create_vm(); r = kvm_dev_ioctl_create_vm();
break; break;
case KVM_GET_MSR_INDEX_LIST: { case KVM_GET_MSR_INDEX_LIST: {
...@@ -2284,6 +2798,18 @@ static long kvm_dev_ioctl(struct file *filp, ...@@ -2284,6 +2798,18 @@ static long kvm_dev_ioctl(struct file *filp,
r = 0; r = 0;
break; break;
} }
case KVM_CHECK_EXTENSION:
/*
* No extensions defined at present.
*/
r = 0;
break;
case KVM_GET_VCPU_MMAP_SIZE:
r = -EINVAL;
if (arg)
goto out;
r = 2 * PAGE_SIZE;
break;
default: default:
; ;
} }
...@@ -2299,7 +2825,7 @@ static struct file_operations kvm_chardev_ops = { ...@@ -2299,7 +2825,7 @@ static struct file_operations kvm_chardev_ops = {
}; };
static struct miscdevice kvm_dev = { static struct miscdevice kvm_dev = {
MISC_DYNAMIC_MINOR, KVM_MINOR,
"kvm", "kvm",
&kvm_chardev_ops, &kvm_chardev_ops,
}; };
...@@ -2385,14 +2911,39 @@ static struct notifier_block kvm_cpu_notifier = { ...@@ -2385,14 +2911,39 @@ static struct notifier_block kvm_cpu_notifier = {
.priority = 20, /* must be > scheduler priority */ .priority = 20, /* must be > scheduler priority */
}; };
static u64 stat_get(void *_offset)
{
unsigned offset = (long)_offset;
u64 total = 0;
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i;
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = &kvm->vcpus[i];
total += *(u32 *)((void *)vcpu + offset);
}
spin_unlock(&kvm_lock);
return total;
}
static void stat_set(void *offset, u64 val)
{
}
DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n");
static __init void kvm_init_debug(void) static __init void kvm_init_debug(void)
{ {
struct kvm_stats_debugfs_item *p; struct kvm_stats_debugfs_item *p;
debugfs_dir = debugfs_create_dir("kvm", NULL); debugfs_dir = debugfs_create_dir("kvm", NULL);
for (p = debugfs_entries; p->name; ++p) for (p = debugfs_entries; p->name; ++p)
p->dentry = debugfs_create_u32(p->name, 0444, debugfs_dir, p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
p->data); (void *)(long)p->offset,
&stat_fops);
} }
static void kvm_exit_debug(void) static void kvm_exit_debug(void)
...@@ -2522,6 +3073,10 @@ static __init int kvm_init(void) ...@@ -2522,6 +3073,10 @@ static __init int kvm_init(void)
static struct page *bad_page; static struct page *bad_page;
int r; int r;
r = kvm_mmu_module_init();
if (r)
goto out4;
r = register_filesystem(&kvm_fs_type); r = register_filesystem(&kvm_fs_type);
if (r) if (r)
goto out3; goto out3;
...@@ -2550,6 +3105,8 @@ out: ...@@ -2550,6 +3105,8 @@ out:
out2: out2:
unregister_filesystem(&kvm_fs_type); unregister_filesystem(&kvm_fs_type);
out3: out3:
kvm_mmu_module_exit();
out4:
return r; return r;
} }
...@@ -2559,6 +3116,7 @@ static __exit void kvm_exit(void) ...@@ -2559,6 +3116,7 @@ static __exit void kvm_exit(void)
__free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
mntput(kvmfs_mnt); mntput(kvmfs_mnt);
unregister_filesystem(&kvm_fs_type); unregister_filesystem(&kvm_fs_type);
kvm_mmu_module_exit();
} }
module_init(kvm_init) module_init(kvm_init)
......
...@@ -9,17 +9,15 @@ ...@@ -9,17 +9,15 @@
#include "svm.h" #include "svm.h"
#include "kvm.h" #include "kvm.h"
static const u32 host_save_msrs[] = { static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
MSR_FS_BASE, MSR_GS_BASE, MSR_FS_BASE,
#endif #endif
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_IA32_DEBUGCTLMSR, /*MSR_IA32_LASTBRANCHFROMIP,
MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/
}; };
#define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs) #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
#define NUM_DB_REGS 4 #define NUM_DB_REGS 4
struct vcpu_svm { struct vcpu_svm {
...@@ -28,13 +26,12 @@ struct vcpu_svm { ...@@ -28,13 +26,12 @@ struct vcpu_svm {
struct svm_cpu_data *svm_data; struct svm_cpu_data *svm_data;
uint64_t asid_generation; uint64_t asid_generation;
unsigned long cr0;
unsigned long cr4;
unsigned long db_regs[NUM_DB_REGS]; unsigned long db_regs[NUM_DB_REGS];
u64 next_rip; u64 next_rip;
u64 host_msrs[NR_HOST_SAVE_MSRS]; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
u64 host_gs_base;
unsigned long host_cr2; unsigned long host_cr2;
unsigned long host_db_regs[NUM_DB_REGS]; unsigned long host_db_regs[NUM_DB_REGS];
unsigned long host_dr6; unsigned long host_dr6;
......
#ifndef __KVM_VMX_H
#define __KVM_VMX_H
#ifdef CONFIG_X86_64
/*
* avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt
* mechanism (cpu bug AA24)
*/
#define NR_BAD_MSRS 2
#else
#define NR_BAD_MSRS 0
#endif
#endif
...@@ -52,11 +52,15 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} ...@@ -52,11 +52,15 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
static int dbg = 1; static int dbg = 1;
#endif #endif
#ifndef MMU_DEBUG
#define ASSERT(x) do { } while (0)
#else
#define ASSERT(x) \ #define ASSERT(x) \
if (!(x)) { \ if (!(x)) { \
printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ printk(KERN_WARNING "assertion failed %s:%d: %s\n", \
__FILE__, __LINE__, #x); \ __FILE__, __LINE__, #x); \
} }
#endif
#define PT64_PT_BITS 9 #define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
...@@ -159,6 +163,9 @@ struct kvm_rmap_desc { ...@@ -159,6 +163,9 @@ struct kvm_rmap_desc {
struct kvm_rmap_desc *more; struct kvm_rmap_desc *more;
}; };
static struct kmem_cache *pte_chain_cache;
static struct kmem_cache *rmap_desc_cache;
static int is_write_protection(struct kvm_vcpu *vcpu) static int is_write_protection(struct kvm_vcpu *vcpu)
{ {
return vcpu->cr0 & CR0_WP_MASK; return vcpu->cr0 & CR0_WP_MASK;
...@@ -196,14 +203,15 @@ static int is_rmap_pte(u64 pte) ...@@ -196,14 +203,15 @@ static int is_rmap_pte(u64 pte)
} }
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
size_t objsize, int min) struct kmem_cache *base_cache, int min,
gfp_t gfp_flags)
{ {
void *obj; void *obj;
if (cache->nobjs >= min) if (cache->nobjs >= min)
return 0; return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) { while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kzalloc(objsize, GFP_NOWAIT); obj = kmem_cache_zalloc(base_cache, gfp_flags);
if (!obj) if (!obj)
return -ENOMEM; return -ENOMEM;
cache->objects[cache->nobjs++] = obj; cache->objects[cache->nobjs++] = obj;
...@@ -217,20 +225,35 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) ...@@ -217,20 +225,35 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
kfree(mc->objects[--mc->nobjs]); kfree(mc->objects[--mc->nobjs]);
} }
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags)
{ {
int r; int r;
r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
sizeof(struct kvm_pte_chain), 4); pte_chain_cache, 4, gfp_flags);
if (r) if (r)
goto out; goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
sizeof(struct kvm_rmap_desc), 1); rmap_desc_cache, 1, gfp_flags);
out: out:
return r; return r;
} }
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
{
int r;
r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
if (r < 0) {
spin_unlock(&vcpu->kvm->lock);
kvm_arch_ops->vcpu_put(vcpu);
r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
kvm_arch_ops->vcpu_load(vcpu);
spin_lock(&vcpu->kvm->lock);
}
return r;
}
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{ {
mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
...@@ -390,13 +413,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) ...@@ -390,13 +413,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
struct page *page; struct page *page;
struct kvm_memory_slot *slot;
struct kvm_rmap_desc *desc; struct kvm_rmap_desc *desc;
u64 *spte; u64 *spte;
slot = gfn_to_memslot(kvm, gfn); page = gfn_to_page(kvm, gfn);
BUG_ON(!slot); BUG_ON(!page);
page = gfn_to_page(slot, gfn);
while (page_private(page)) { while (page_private(page)) {
if (!(page_private(page) & 1)) if (!(page_private(page) & 1))
...@@ -417,6 +438,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) ...@@ -417,6 +438,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
} }
} }
#ifdef MMU_DEBUG
static int is_empty_shadow_page(hpa_t page_hpa) static int is_empty_shadow_page(hpa_t page_hpa)
{ {
u64 *pos; u64 *pos;
...@@ -431,15 +453,15 @@ static int is_empty_shadow_page(hpa_t page_hpa) ...@@ -431,15 +453,15 @@ static int is_empty_shadow_page(hpa_t page_hpa)
} }
return 1; return 1;
} }
#endif
static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
{ {
struct kvm_mmu_page *page_head = page_header(page_hpa); struct kvm_mmu_page *page_head = page_header(page_hpa);
ASSERT(is_empty_shadow_page(page_hpa)); ASSERT(is_empty_shadow_page(page_hpa));
list_del(&page_head->link);
page_head->page_hpa = page_hpa; page_head->page_hpa = page_hpa;
list_add(&page_head->link, &vcpu->free_pages); list_move(&page_head->link, &vcpu->free_pages);
++vcpu->kvm->n_free_mmu_pages; ++vcpu->kvm->n_free_mmu_pages;
} }
...@@ -457,11 +479,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, ...@@ -457,11 +479,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
return NULL; return NULL;
page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
list_del(&page->link); list_move(&page->link, &vcpu->kvm->active_mmu_pages);
list_add(&page->link, &vcpu->kvm->active_mmu_pages);
ASSERT(is_empty_shadow_page(page->page_hpa)); ASSERT(is_empty_shadow_page(page->page_hpa));
page->slot_bitmap = 0; page->slot_bitmap = 0;
page->global = 1;
page->multimapped = 0; page->multimapped = 0;
page->parent_pte = parent_pte; page->parent_pte = parent_pte;
--vcpu->kvm->n_free_mmu_pages; --vcpu->kvm->n_free_mmu_pages;
...@@ -569,6 +589,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, ...@@ -569,6 +589,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gva_t gaddr, gva_t gaddr,
unsigned level, unsigned level,
int metaphysical, int metaphysical,
unsigned hugepage_access,
u64 *parent_pte) u64 *parent_pte)
{ {
union kvm_mmu_page_role role; union kvm_mmu_page_role role;
...@@ -582,6 +603,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, ...@@ -582,6 +603,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
role.glevels = vcpu->mmu.root_level; role.glevels = vcpu->mmu.root_level;
role.level = level; role.level = level;
role.metaphysical = metaphysical; role.metaphysical = metaphysical;
role.hugepage_access = hugepage_access;
if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
...@@ -669,10 +691,8 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, ...@@ -669,10 +691,8 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
if (!page->root_count) { if (!page->root_count) {
hlist_del(&page->hash_link); hlist_del(&page->hash_link);
kvm_mmu_free_page(vcpu, page->page_hpa); kvm_mmu_free_page(vcpu, page->page_hpa);
} else { } else
list_del(&page->link); list_move(&page->link, &vcpu->kvm->active_mmu_pages);
list_add(&page->link, &vcpu->kvm->active_mmu_pages);
}
} }
static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
...@@ -714,14 +734,12 @@ hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) ...@@ -714,14 +734,12 @@ hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{ {
struct kvm_memory_slot *slot;
struct page *page; struct page *page;
ASSERT((gpa & HPA_ERR_MASK) == 0); ASSERT((gpa & HPA_ERR_MASK) == 0);
slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
if (!slot) if (!page)
return gpa | HPA_ERR_MASK; return gpa | HPA_ERR_MASK;
page = gfn_to_page(slot, gpa >> PAGE_SHIFT);
return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
| (gpa & (PAGE_SIZE-1)); | (gpa & (PAGE_SIZE-1));
} }
...@@ -735,6 +753,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) ...@@ -735,6 +753,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
return gpa_to_hpa(vcpu, gpa); return gpa_to_hpa(vcpu, gpa);
} }
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
{
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
if (gpa == UNMAPPED_GVA)
return NULL;
return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
}
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{ {
} }
...@@ -772,7 +799,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) ...@@ -772,7 +799,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
>> PAGE_SHIFT; >> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
v, level - 1, v, level - 1,
1, &table[index]); 1, 0, &table[index]);
if (!new_table) { if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n"); pgprintk("nonpaging_map: ENOMEM\n");
return -ENOMEM; return -ENOMEM;
...@@ -804,10 +831,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) ...@@ -804,10 +831,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->mmu.pae_root[i]; hpa_t root = vcpu->mmu.pae_root[i];
ASSERT(VALID_PAGE(root)); if (root) {
root &= PT64_BASE_ADDR_MASK; ASSERT(VALID_PAGE(root));
page = page_header(root); root &= PT64_BASE_ADDR_MASK;
--page->root_count; page = page_header(root);
--page->root_count;
}
vcpu->mmu.pae_root[i] = INVALID_PAGE; vcpu->mmu.pae_root[i] = INVALID_PAGE;
} }
vcpu->mmu.root_hpa = INVALID_PAGE; vcpu->mmu.root_hpa = INVALID_PAGE;
...@@ -827,7 +856,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ...@@ -827,7 +856,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root)); ASSERT(!VALID_PAGE(root));
page = kvm_mmu_get_page(vcpu, root_gfn, 0, page = kvm_mmu_get_page(vcpu, root_gfn, 0,
PT64_ROOT_LEVEL, 0, NULL); PT64_ROOT_LEVEL, 0, 0, NULL);
root = page->page_hpa; root = page->page_hpa;
++page->root_count; ++page->root_count;
vcpu->mmu.root_hpa = root; vcpu->mmu.root_hpa = root;
...@@ -838,13 +867,17 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ...@@ -838,13 +867,17 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
hpa_t root = vcpu->mmu.pae_root[i]; hpa_t root = vcpu->mmu.pae_root[i];
ASSERT(!VALID_PAGE(root)); ASSERT(!VALID_PAGE(root));
if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) {
if (!is_present_pte(vcpu->pdptrs[i])) {
vcpu->mmu.pae_root[i] = 0;
continue;
}
root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
else if (vcpu->mmu.root_level == 0) } else if (vcpu->mmu.root_level == 0)
root_gfn = 0; root_gfn = 0;
page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, !is_paging(vcpu), PT32_ROOT_LEVEL, !is_paging(vcpu),
NULL); 0, NULL);
root = page->page_hpa; root = page->page_hpa;
++page->root_count; ++page->root_count;
vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
...@@ -903,7 +936,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) ...@@ -903,7 +936,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{ {
++kvm_stat.tlb_flush; ++vcpu->stat.tlb_flush;
kvm_arch_ops->tlb_flush(vcpu); kvm_arch_ops->tlb_flush(vcpu);
} }
...@@ -918,11 +951,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) ...@@ -918,11 +951,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu)
kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
} }
static void mark_pagetable_nonglobal(void *shadow_pte)
{
page_header(__pa(shadow_pte))->global = 0;
}
static inline void set_pte_common(struct kvm_vcpu *vcpu, static inline void set_pte_common(struct kvm_vcpu *vcpu,
u64 *shadow_pte, u64 *shadow_pte,
gpa_t gaddr, gpa_t gaddr,
...@@ -940,9 +968,6 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, ...@@ -940,9 +968,6 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
*shadow_pte |= access_bits; *shadow_pte |= access_bits;
if (!(*shadow_pte & PT_GLOBAL_MASK))
mark_pagetable_nonglobal(shadow_pte);
if (is_error_hpa(paddr)) { if (is_error_hpa(paddr)) {
*shadow_pte |= gaddr; *shadow_pte |= gaddr;
*shadow_pte |= PT_SHADOW_IO_MARK; *shadow_pte |= PT_SHADOW_IO_MARK;
...@@ -1316,6 +1341,51 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) ...@@ -1316,6 +1341,51 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
} }
} }
void kvm_mmu_zap_all(struct kvm_vcpu *vcpu)
{
destroy_kvm_mmu(vcpu);
while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
struct kvm_mmu_page *page;
page = container_of(vcpu->kvm->active_mmu_pages.next,
struct kvm_mmu_page, link);
kvm_mmu_zap_page(vcpu, page);
}
mmu_free_memory_caches(vcpu);
kvm_arch_ops->tlb_flush(vcpu);
init_kvm_mmu(vcpu);
}
void kvm_mmu_module_exit(void)
{
if (pte_chain_cache)
kmem_cache_destroy(pte_chain_cache);
if (rmap_desc_cache)
kmem_cache_destroy(rmap_desc_cache);
}
int kvm_mmu_module_init(void)
{
pte_chain_cache = kmem_cache_create("kvm_pte_chain",
sizeof(struct kvm_pte_chain),
0, 0, NULL, NULL);
if (!pte_chain_cache)
goto nomem;
rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
sizeof(struct kvm_rmap_desc),
0, 0, NULL, NULL);
if (!rmap_desc_cache)
goto nomem;
return 0;
nomem:
kvm_mmu_module_exit();
return -ENOMEM;
}
#ifdef AUDIT #ifdef AUDIT
static const char *audit_msg; static const char *audit_msg;
...@@ -1338,7 +1408,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, ...@@ -1338,7 +1408,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
u64 ent = pt[i]; u64 ent = pt[i];
if (!ent & PT_PRESENT_MASK) if (!(ent & PT_PRESENT_MASK))
continue; continue;
va = canonicalize(va); va = canonicalize(va);
...@@ -1360,7 +1430,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, ...@@ -1360,7 +1430,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
static void audit_mappings(struct kvm_vcpu *vcpu) static void audit_mappings(struct kvm_vcpu *vcpu)
{ {
int i; unsigned i;
if (vcpu->mmu.root_level == 4) if (vcpu->mmu.root_level == 4)
audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4); audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
......
...@@ -148,8 +148,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, ...@@ -148,8 +148,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
break; break;
} }
if (walker->level != 3 || is_long_mode(vcpu)) walker->inherited_ar &= walker->table[index];
walker->inherited_ar &= walker->table[index];
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
kunmap_atomic(walker->table, KM_USER0); kunmap_atomic(walker->table, KM_USER0);
...@@ -248,6 +247,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -248,6 +247,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
u64 shadow_pte; u64 shadow_pte;
int metaphysical; int metaphysical;
gfn_t table_gfn; gfn_t table_gfn;
unsigned hugepage_access = 0;
if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
if (level == PT_PAGE_TABLE_LEVEL) if (level == PT_PAGE_TABLE_LEVEL)
...@@ -277,6 +277,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -277,6 +277,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (level - 1 == PT_PAGE_TABLE_LEVEL if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) { && walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1; metaphysical = 1;
hugepage_access = *guest_ent;
hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
hugepage_access >>= PT_WRITABLE_SHIFT;
table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) table_gfn = (*guest_ent & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT; >> PAGE_SHIFT;
} else { } else {
...@@ -284,7 +287,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -284,7 +287,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
table_gfn = walker->table_gfn[level - 2]; table_gfn = walker->table_gfn[level - 2];
} }
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
metaphysical, shadow_ent); metaphysical, hugepage_access,
shadow_ent);
shadow_addr = shadow_page->page_hpa; shadow_addr = shadow_page->page_hpa;
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK; | PT_WRITABLE_MASK | PT_USER_MASK;
...@@ -444,7 +448,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -444,7 +448,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
if (is_io_pte(*shadow_pte)) if (is_io_pte(*shadow_pte))
return 1; return 1;
++kvm_stat.pf_fixed; ++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)"); kvm_mmu_audit(vcpu, "post page fault (fixed)");
return write_pt; return write_pt;
......
...@@ -44,6 +44,10 @@ MODULE_LICENSE("GPL"); ...@@ -44,6 +44,10 @@ MODULE_LICENSE("GPL");
#define KVM_EFER_LMA (1 << 10) #define KVM_EFER_LMA (1 << 10)
#define KVM_EFER_LME (1 << 8) #define KVM_EFER_LME (1 << 8)
#define SVM_FEATURE_NPT (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1)
#define SVM_DEATURE_SVML (1 << 2)
unsigned long iopm_base; unsigned long iopm_base;
unsigned long msrpm_base; unsigned long msrpm_base;
...@@ -59,15 +63,16 @@ struct kvm_ldttss_desc { ...@@ -59,15 +63,16 @@ struct kvm_ldttss_desc {
struct svm_cpu_data { struct svm_cpu_data {
int cpu; int cpu;
uint64_t asid_generation; u64 asid_generation;
uint32_t max_asid; u32 max_asid;
uint32_t next_asid; u32 next_asid;
struct kvm_ldttss_desc *tss_desc; struct kvm_ldttss_desc *tss_desc;
struct page *save_area; struct page *save_area;
}; };
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
static uint32_t svm_features;
struct svm_init_data { struct svm_init_data {
int cpu; int cpu;
...@@ -82,6 +87,11 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; ...@@ -82,6 +87,11 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
#define MAX_INST_SIZE 15 #define MAX_INST_SIZE 15
static inline u32 svm_has(u32 feat)
{
return svm_features & feat;
}
static unsigned get_addr_size(struct kvm_vcpu *vcpu) static unsigned get_addr_size(struct kvm_vcpu *vcpu)
{ {
struct vmcb_save_area *sa = &vcpu->svm->vmcb->save; struct vmcb_save_area *sa = &vcpu->svm->vmcb->save;
...@@ -203,13 +213,6 @@ static void inject_ud(struct kvm_vcpu *vcpu) ...@@ -203,13 +213,6 @@ static void inject_ud(struct kvm_vcpu *vcpu)
UD_VECTOR; UD_VECTOR;
} }
static void inject_db(struct kvm_vcpu *vcpu)
{
vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
SVM_EVTINJ_TYPE_EXEPT |
DB_VECTOR;
}
static int is_page_fault(uint32_t info) static int is_page_fault(uint32_t info)
{ {
info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
...@@ -309,6 +312,7 @@ static void svm_hardware_enable(void *garbage) ...@@ -309,6 +312,7 @@ static void svm_hardware_enable(void *garbage)
svm_data->asid_generation = 1; svm_data->asid_generation = 1;
svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
svm_data->next_asid = svm_data->max_asid + 1; svm_data->next_asid = svm_data->max_asid + 1;
svm_features = cpuid_edx(SVM_CPUID_FUNC);
asm volatile ( "sgdt %0" : "=m"(gdt_descr) ); asm volatile ( "sgdt %0" : "=m"(gdt_descr) );
gdt = (struct desc_struct *)gdt_descr.address; gdt = (struct desc_struct *)gdt_descr.address;
...@@ -459,7 +463,6 @@ static void init_vmcb(struct vmcb *vmcb) ...@@ -459,7 +463,6 @@ static void init_vmcb(struct vmcb *vmcb)
{ {
struct vmcb_control_area *control = &vmcb->control; struct vmcb_control_area *control = &vmcb->control;
struct vmcb_save_area *save = &vmcb->save; struct vmcb_save_area *save = &vmcb->save;
u64 tsc;
control->intercept_cr_read = INTERCEPT_CR0_MASK | control->intercept_cr_read = INTERCEPT_CR0_MASK |
INTERCEPT_CR3_MASK | INTERCEPT_CR3_MASK |
...@@ -511,12 +514,13 @@ static void init_vmcb(struct vmcb *vmcb) ...@@ -511,12 +514,13 @@ static void init_vmcb(struct vmcb *vmcb)
(1ULL << INTERCEPT_VMSAVE) | (1ULL << INTERCEPT_VMSAVE) |
(1ULL << INTERCEPT_STGI) | (1ULL << INTERCEPT_STGI) |
(1ULL << INTERCEPT_CLGI) | (1ULL << INTERCEPT_CLGI) |
(1ULL << INTERCEPT_SKINIT); (1ULL << INTERCEPT_SKINIT) |
(1ULL << INTERCEPT_MONITOR) |
(1ULL << INTERCEPT_MWAIT);
control->iopm_base_pa = iopm_base; control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = msrpm_base; control->msrpm_base_pa = msrpm_base;
rdtscll(tsc); control->tsc_offset = 0;
control->tsc_offset = -tsc;
control->int_ctl = V_INTR_MASKING_MASK; control->int_ctl = V_INTR_MASKING_MASK;
init_seg(&save->es); init_seg(&save->es);
...@@ -576,12 +580,15 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -576,12 +580,15 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
vcpu->svm->vmcb = page_address(page); vcpu->svm->vmcb = page_address(page);
memset(vcpu->svm->vmcb, 0, PAGE_SIZE); memset(vcpu->svm->vmcb, 0, PAGE_SIZE);
vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
vcpu->svm->cr0 = 0x00000010;
vcpu->svm->asid_generation = 0; vcpu->svm->asid_generation = 0;
memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
init_vmcb(vcpu->svm->vmcb); init_vmcb(vcpu->svm->vmcb);
fx_init(vcpu); fx_init(vcpu);
vcpu->fpu_active = 1;
vcpu->apic_base = 0xfee00000 |
/*for vcpu 0*/ MSR_IA32_APICBASE_BSP |
MSR_IA32_APICBASE_ENABLE;
return 0; return 0;
...@@ -602,11 +609,34 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) ...@@ -602,11 +609,34 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
static void svm_vcpu_load(struct kvm_vcpu *vcpu) static void svm_vcpu_load(struct kvm_vcpu *vcpu)
{ {
get_cpu(); int cpu, i;
cpu = get_cpu();
if (unlikely(cpu != vcpu->cpu)) {
u64 tsc_this, delta;
/*
* Make sure that the guest sees a monotonically
* increasing TSC.
*/
rdtscll(tsc_this);
delta = vcpu->host_tsc - tsc_this;
vcpu->svm->vmcb->control.tsc_offset += delta;
vcpu->cpu = cpu;
}
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
rdmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]);
} }
static void svm_vcpu_put(struct kvm_vcpu *vcpu) static void svm_vcpu_put(struct kvm_vcpu *vcpu)
{ {
int i;
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]);
rdtscll(vcpu->host_tsc);
put_cpu(); put_cpu();
} }
...@@ -714,7 +744,7 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) ...@@ -714,7 +744,7 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
vcpu->svm->vmcb->save.gdtr.base = dt->base ; vcpu->svm->vmcb->save.gdtr.base = dt->base ;
} }
static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{ {
} }
...@@ -733,9 +763,15 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) ...@@ -733,9 +763,15 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
} }
} }
#endif #endif
vcpu->svm->cr0 = cr0; if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) {
vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK | CR0_WP_MASK; vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
vcpu->fpu_active = 1;
}
vcpu->cr0 = cr0; vcpu->cr0 = cr0;
cr0 |= CR0_PG_MASK | CR0_WP_MASK;
cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK);
vcpu->svm->vmcb->save.cr0 = cr0;
} }
static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
...@@ -785,18 +821,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) ...@@ -785,18 +821,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
static void load_host_msrs(struct kvm_vcpu *vcpu) static void load_host_msrs(struct kvm_vcpu *vcpu)
{ {
int i; #ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base);
for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) #endif
wrmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]);
} }
static void save_host_msrs(struct kvm_vcpu *vcpu) static void save_host_msrs(struct kvm_vcpu *vcpu)
{ {
int i; #ifdef CONFIG_X86_64
rdmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base);
for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) #endif
rdmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]);
} }
static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data)
...@@ -890,7 +924,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -890,7 +924,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
case EMULATE_DONE: case EMULATE_DONE:
return 1; return 1;
case EMULATE_DO_MMIO: case EMULATE_DO_MMIO:
++kvm_stat.mmio_exits; ++vcpu->stat.mmio_exits;
kvm_run->exit_reason = KVM_EXIT_MMIO; kvm_run->exit_reason = KVM_EXIT_MMIO;
return 0; return 0;
case EMULATE_FAIL: case EMULATE_FAIL:
...@@ -904,6 +938,16 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -904,6 +938,16 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0; return 0;
} }
static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
if (!(vcpu->cr0 & CR0_TS_MASK))
vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK;
vcpu->fpu_active = 1;
return 1;
}
static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
/* /*
...@@ -981,7 +1025,7 @@ static int io_get_override(struct kvm_vcpu *vcpu, ...@@ -981,7 +1025,7 @@ static int io_get_override(struct kvm_vcpu *vcpu,
return 0; return 0;
} }
static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address)
{ {
unsigned long addr_mask; unsigned long addr_mask;
unsigned long *reg; unsigned long *reg;
...@@ -1025,38 +1069,38 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) ...@@ -1025,38 +1069,38 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address)
static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug?
int _in = io_info & SVM_IOIO_TYPE_MASK; int size, down, in, string, rep;
unsigned port;
unsigned long count;
gva_t address = 0;
++kvm_stat.io_exits; ++vcpu->stat.io_exits;
vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2;
kvm_run->exit_reason = KVM_EXIT_IO; in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
kvm_run->io.port = io_info >> 16; port = io_info >> 16;
kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); string = (io_info & SVM_IOIO_STR_MASK) != 0;
kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; rep = (io_info & SVM_IOIO_REP_MASK) != 0;
kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; count = 1;
down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
if (kvm_run->io.string) { if (string) {
unsigned addr_mask; unsigned addr_mask;
addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); addr_mask = io_adress(vcpu, in, &address);
if (!addr_mask) { if (!addr_mask) {
printk(KERN_DEBUG "%s: get io address failed\n", printk(KERN_DEBUG "%s: get io address failed\n",
__FUNCTION__); __FUNCTION__);
return 1; return 1;
} }
if (kvm_run->io.rep) { if (rep)
kvm_run->io.count count = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
= vcpu->regs[VCPU_REGS_RCX] & addr_mask; }
kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down,
& X86_EFLAGS_DF) != 0; address, rep, port);
}
} else
kvm_run->io.value = vcpu->svm->vmcb->save.rax;
return 0;
} }
static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
...@@ -1072,13 +1116,14 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1072,13 +1116,14 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1; return 1;
kvm_run->exit_reason = KVM_EXIT_HLT; kvm_run->exit_reason = KVM_EXIT_HLT;
++kvm_stat.halt_exits; ++vcpu->stat.halt_exits;
return 0; return 0;
} }
static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
vcpu->svm->vmcb->save.rip += 3; vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3;
skip_emulated_instruction(vcpu);
return kvm_hypercall(vcpu, kvm_run); return kvm_hypercall(vcpu, kvm_run);
} }
...@@ -1098,8 +1143,8 @@ static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_r ...@@ -1098,8 +1143,8 @@ static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_r
static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
kvm_run->exit_reason = KVM_EXIT_CPUID; kvm_emulate_cpuid(vcpu);
return 0; return 1;
} }
static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
...@@ -1239,7 +1284,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu, ...@@ -1239,7 +1284,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu,
*/ */
if (kvm_run->request_interrupt_window && if (kvm_run->request_interrupt_window &&
!vcpu->irq_summary) { !vcpu->irq_summary) {
++kvm_stat.irq_window_exits; ++vcpu->stat.irq_window_exits;
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0; return 0;
} }
...@@ -1267,6 +1312,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, ...@@ -1267,6 +1312,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
[SVM_EXIT_WRITE_DR5] = emulate_on_interception, [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
[SVM_EXIT_WRITE_DR7] = emulate_on_interception, [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
[SVM_EXIT_INTR] = nop_on_interception, [SVM_EXIT_INTR] = nop_on_interception,
[SVM_EXIT_NMI] = nop_on_interception, [SVM_EXIT_NMI] = nop_on_interception,
[SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_SMI] = nop_on_interception,
...@@ -1288,6 +1334,8 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, ...@@ -1288,6 +1334,8 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
[SVM_EXIT_STGI] = invalid_op_interception, [SVM_EXIT_STGI] = invalid_op_interception,
[SVM_EXIT_CLGI] = invalid_op_interception, [SVM_EXIT_CLGI] = invalid_op_interception,
[SVM_EXIT_SKINIT] = invalid_op_interception, [SVM_EXIT_SKINIT] = invalid_op_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
[SVM_EXIT_MWAIT] = invalid_op_interception,
}; };
...@@ -1295,8 +1343,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1295,8 +1343,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
u32 exit_code = vcpu->svm->vmcb->control.exit_code; u32 exit_code = vcpu->svm->vmcb->control.exit_code;
kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT;
if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) &&
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR)
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
...@@ -1307,12 +1353,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1307,12 +1353,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (exit_code >= ARRAY_SIZE(svm_exit_handlers) if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| svm_exit_handlers[exit_code] == 0) { || svm_exit_handlers[exit_code] == 0) {
kvm_run->exit_reason = KVM_EXIT_UNKNOWN; kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n", kvm_run->hw.hardware_exit_reason = exit_code;
__FUNCTION__,
exit_code,
vcpu->svm->vmcb->save.rip,
vcpu->cr0,
vcpu->svm->vmcb->save.rflags);
return 0; return 0;
} }
...@@ -1461,8 +1502,10 @@ again: ...@@ -1461,8 +1502,10 @@ again:
load_db_regs(vcpu->svm->db_regs); load_db_regs(vcpu->svm->db_regs);
} }
fx_save(vcpu->host_fx_image); if (vcpu->fpu_active) {
fx_restore(vcpu->guest_fx_image); fx_save(vcpu->host_fx_image);
fx_restore(vcpu->guest_fx_image);
}
asm volatile ( asm volatile (
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -1573,8 +1616,10 @@ again: ...@@ -1573,8 +1616,10 @@ again:
#endif #endif
: "cc", "memory" ); : "cc", "memory" );
fx_save(vcpu->guest_fx_image); if (vcpu->fpu_active) {
fx_restore(vcpu->host_fx_image); fx_save(vcpu->guest_fx_image);
fx_restore(vcpu->host_fx_image);
}
if ((vcpu->svm->vmcb->save.dr7 & 0xff)) if ((vcpu->svm->vmcb->save.dr7 & 0xff))
load_db_regs(vcpu->svm->host_db_regs); load_db_regs(vcpu->svm->host_db_regs);
...@@ -1606,8 +1651,9 @@ again: ...@@ -1606,8 +1651,9 @@ again:
vcpu->svm->next_rip = 0; vcpu->svm->next_rip = 0;
if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; kvm_run->fail_entry.hardware_entry_failure_reason
= vcpu->svm->vmcb->control.exit_code;
post_kvm_run_save(vcpu, kvm_run); post_kvm_run_save(vcpu, kvm_run);
return 0; return 0;
} }
...@@ -1615,14 +1661,16 @@ again: ...@@ -1615,14 +1661,16 @@ again:
r = handle_exit(vcpu, kvm_run); r = handle_exit(vcpu, kvm_run);
if (r > 0) { if (r > 0) {
if (signal_pending(current)) { if (signal_pending(current)) {
++kvm_stat.signal_exits; ++vcpu->stat.signal_exits;
post_kvm_run_save(vcpu, kvm_run); post_kvm_run_save(vcpu, kvm_run);
kvm_run->exit_reason = KVM_EXIT_INTR;
return -EINTR; return -EINTR;
} }
if (dm_request_for_irq_injection(vcpu, kvm_run)) { if (dm_request_for_irq_injection(vcpu, kvm_run)) {
++kvm_stat.request_irq_exits; ++vcpu->stat.request_irq_exits;
post_kvm_run_save(vcpu, kvm_run); post_kvm_run_save(vcpu, kvm_run);
kvm_run->exit_reason = KVM_EXIT_INTR;
return -EINTR; return -EINTR;
} }
kvm_resched(vcpu); kvm_resched(vcpu);
...@@ -1641,6 +1689,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) ...@@ -1641,6 +1689,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{ {
vcpu->svm->vmcb->save.cr3 = root; vcpu->svm->vmcb->save.cr3 = root;
force_new_asid(vcpu); force_new_asid(vcpu);
if (vcpu->fpu_active) {
vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK;
vcpu->fpu_active = 0;
}
} }
static void svm_inject_page_fault(struct kvm_vcpu *vcpu, static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
...@@ -1649,7 +1703,7 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu, ...@@ -1649,7 +1703,7 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
{ {
uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info;
++kvm_stat.pf_guest; ++vcpu->stat.pf_guest;
if (is_page_fault(exit_int_info)) { if (is_page_fault(exit_int_info)) {
...@@ -1709,9 +1763,8 @@ static struct kvm_arch_ops svm_arch_ops = { ...@@ -1709,9 +1763,8 @@ static struct kvm_arch_ops svm_arch_ops = {
.get_segment = svm_get_segment, .get_segment = svm_get_segment,
.set_segment = svm_set_segment, .set_segment = svm_set_segment,
.get_cs_db_l_bits = svm_get_cs_db_l_bits, .get_cs_db_l_bits = svm_get_cs_db_l_bits,
.decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits, .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
.set_cr0 = svm_set_cr0, .set_cr0 = svm_set_cr0,
.set_cr0_no_modeswitch = svm_set_cr0,
.set_cr3 = svm_set_cr3, .set_cr3 = svm_set_cr3,
.set_cr4 = svm_set_cr4, .set_cr4 = svm_set_cr4,
.set_efer = svm_set_efer, .set_efer = svm_set_efer,
......
...@@ -44,6 +44,9 @@ enum { ...@@ -44,6 +44,9 @@ enum {
INTERCEPT_RDTSCP, INTERCEPT_RDTSCP,
INTERCEPT_ICEBP, INTERCEPT_ICEBP,
INTERCEPT_WBINVD, INTERCEPT_WBINVD,
INTERCEPT_MONITOR,
INTERCEPT_MWAIT,
INTERCEPT_MWAIT_COND,
}; };
...@@ -298,6 +301,9 @@ struct __attribute__ ((__packed__)) vmcb { ...@@ -298,6 +301,9 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EXIT_RDTSCP 0x087 #define SVM_EXIT_RDTSCP 0x087
#define SVM_EXIT_ICEBP 0x088 #define SVM_EXIT_ICEBP 0x088
#define SVM_EXIT_WBINVD 0x089 #define SVM_EXIT_WBINVD 0x089
#define SVM_EXIT_MONITOR 0x08a
#define SVM_EXIT_MWAIT 0x08b
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_NPF 0x400 #define SVM_EXIT_NPF 0x400
#define SVM_EXIT_ERR -1 #define SVM_EXIT_ERR -1
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "kvm.h" #include "kvm.h"
#include "vmx.h" #include "vmx.h"
#include "kvm_vmx.h"
#include <linux/module.h> #include <linux/module.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/mm.h> #include <linux/mm.h>
...@@ -70,6 +69,10 @@ static struct kvm_vmx_segment_field { ...@@ -70,6 +69,10 @@ static struct kvm_vmx_segment_field {
VMX_SEGMENT_FIELD(LDTR), VMX_SEGMENT_FIELD(LDTR),
}; };
/*
* Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
* away by decrementing the array size.
*/
static const u32 vmx_msr_index[] = { static const u32 vmx_msr_index[] = {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
...@@ -78,6 +81,19 @@ static const u32 vmx_msr_index[] = { ...@@ -78,6 +81,19 @@ static const u32 vmx_msr_index[] = {
}; };
#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
#ifdef CONFIG_X86_64
static unsigned msr_offset_kernel_gs_base;
#define NR_64BIT_MSRS 4
/*
* avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt
* mechanism (cpu bug AA24)
*/
#define NR_BAD_MSRS 2
#else
#define NR_64BIT_MSRS 0
#define NR_BAD_MSRS 0
#endif
static inline int is_page_fault(u32 intr_info) static inline int is_page_fault(u32 intr_info)
{ {
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
...@@ -85,6 +101,13 @@ static inline int is_page_fault(u32 intr_info) ...@@ -85,6 +101,13 @@ static inline int is_page_fault(u32 intr_info)
(INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
} }
static inline int is_no_device(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
(INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_external_interrupt(u32 intr_info) static inline int is_external_interrupt(u32 intr_info)
{ {
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
...@@ -200,6 +223,16 @@ static void vmcs_write64(unsigned long field, u64 value) ...@@ -200,6 +223,16 @@ static void vmcs_write64(unsigned long field, u64 value)
#endif #endif
} }
static void vmcs_clear_bits(unsigned long field, u32 mask)
{
vmcs_writel(field, vmcs_readl(field) & ~mask);
}
static void vmcs_set_bits(unsigned long field, u32 mask)
{
vmcs_writel(field, vmcs_readl(field) | mask);
}
/* /*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes * Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken. * vcpu mutex is already taken.
...@@ -296,6 +329,44 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) ...@@ -296,6 +329,44 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
INTR_INFO_VALID_MASK); INTR_INFO_VALID_MASK);
} }
/*
* Set up the vmcs to automatically save and restore system
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
* mode, as fiddling with msrs is very expensive.
*/
static void setup_msrs(struct kvm_vcpu *vcpu)
{
int nr_skip, nr_good_msrs;
if (is_long_mode(vcpu))
nr_skip = NR_BAD_MSRS;
else
nr_skip = NR_64BIT_MSRS;
nr_good_msrs = vcpu->nmsrs - nr_skip;
/*
* MSR_K6_STAR is only needed on long mode guests, and only
* if efer.sce is enabled.
*/
if (find_msr_entry(vcpu, MSR_K6_STAR)) {
--nr_good_msrs;
#ifdef CONFIG_X86_64
if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE))
++nr_good_msrs;
#endif
}
vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
virt_to_phys(vcpu->guest_msrs + nr_skip));
vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
virt_to_phys(vcpu->guest_msrs + nr_skip));
vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
virt_to_phys(vcpu->host_msrs + nr_skip));
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
}
/* /*
* reads and returns guest's timestamp counter "register" * reads and returns guest's timestamp counter "register"
* guest_tsc = host_tsc + tsc_offset -- 21.3 * guest_tsc = host_tsc + tsc_offset -- 21.3
...@@ -712,6 +783,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) ...@@ -712,6 +783,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
vmcs_write32(GUEST_CS_LIMIT, 0xffff); vmcs_write32(GUEST_CS_LIMIT, 0xffff);
if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
vmcs_writel(GUEST_CS_BASE, 0xf0000);
vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
...@@ -754,11 +827,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu) ...@@ -754,11 +827,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
#endif #endif
static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{ {
vcpu->cr0 &= KVM_GUEST_CR0_MASK;
vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK;
vcpu->cr4 &= KVM_GUEST_CR4_MASK; vcpu->cr4 &= KVM_GUEST_CR4_MASK;
vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
} }
...@@ -780,22 +850,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) ...@@ -780,22 +850,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
} }
#endif #endif
vmcs_writel(CR0_READ_SHADOW, cr0); if (!(cr0 & CR0_TS_MASK)) {
vmcs_writel(GUEST_CR0, vcpu->fpu_active = 1;
(cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK);
vcpu->cr0 = cr0; }
}
/*
* Used when restoring the VM to avoid corrupting segment registers
*/
static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
{
if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
enter_rmode(vcpu);
vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0);
update_exception_bitmap(vcpu);
vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, vmcs_writel(GUEST_CR0,
(cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
...@@ -805,6 +864,12 @@ static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) ...@@ -805,6 +864,12 @@ static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{ {
vmcs_writel(GUEST_CR3, cr3); vmcs_writel(GUEST_CR3, cr3);
if (!(vcpu->cr0 & CR0_TS_MASK)) {
vcpu->fpu_active = 0;
vmcs_set_bits(GUEST_CR0, CR0_TS_MASK);
vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
}
} }
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
...@@ -835,6 +900,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) ...@@ -835,6 +900,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
msr->data = efer & ~EFER_LME; msr->data = efer & ~EFER_LME;
} }
setup_msrs(vcpu);
} }
#endif #endif
...@@ -878,7 +944,14 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, ...@@ -878,7 +944,14 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
vmcs_writel(sf->base, var->base); vmcs_writel(sf->base, var->base);
vmcs_write32(sf->limit, var->limit); vmcs_write32(sf->limit, var->limit);
vmcs_write16(sf->selector, var->selector); vmcs_write16(sf->selector, var->selector);
if (var->unusable) if (vcpu->rmode.active && var->s) {
/*
* Hack real-mode segments into vm86 compatibility.
*/
if (var->base == 0xffff0000 && var->selector == 0xf000)
vmcs_writel(sf->base, 0xf0000);
ar = 0xf3;
} else if (var->unusable)
ar = 1 << 16; ar = 1 << 16;
else { else {
ar = var->type & 15; ar = var->type & 15;
...@@ -933,9 +1006,9 @@ static int init_rmode_tss(struct kvm* kvm) ...@@ -933,9 +1006,9 @@ static int init_rmode_tss(struct kvm* kvm)
gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
char *page; char *page;
p1 = _gfn_to_page(kvm, fn++); p1 = gfn_to_page(kvm, fn++);
p2 = _gfn_to_page(kvm, fn++); p2 = gfn_to_page(kvm, fn++);
p3 = _gfn_to_page(kvm, fn); p3 = gfn_to_page(kvm, fn);
if (!p1 || !p2 || !p3) { if (!p1 || !p2 || !p3) {
kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__); kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__);
...@@ -991,7 +1064,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -991,7 +1064,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
struct descriptor_table dt; struct descriptor_table dt;
int i; int i;
int ret = 0; int ret = 0;
int nr_good_msrs;
extern asmlinkage void kvm_vmx_return(void); extern asmlinkage void kvm_vmx_return(void);
if (!init_rmode_tss(vcpu->kvm)) { if (!init_rmode_tss(vcpu->kvm)) {
...@@ -1136,23 +1208,17 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -1136,23 +1208,17 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->host_msrs[j].reserved = 0; vcpu->host_msrs[j].reserved = 0;
vcpu->host_msrs[j].data = data; vcpu->host_msrs[j].data = data;
vcpu->guest_msrs[j] = vcpu->host_msrs[j]; vcpu->guest_msrs[j] = vcpu->host_msrs[j];
#ifdef CONFIG_X86_64
if (index == MSR_KERNEL_GS_BASE)
msr_offset_kernel_gs_base = j;
#endif
++vcpu->nmsrs; ++vcpu->nmsrs;
} }
printk(KERN_DEBUG "kvm: msrs: %d\n", vcpu->nmsrs);
nr_good_msrs = vcpu->nmsrs - NR_BAD_MSRS; setup_msrs(vcpu);
vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
virt_to_phys(vcpu->host_msrs + NR_BAD_MSRS));
vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS,
(HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
/* 22.2.1, 20.8.1 */ /* 22.2.1, 20.8.1 */
vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS,
...@@ -1164,7 +1230,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -1164,7 +1230,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
vmcs_writel(TPR_THRESHOLD, 0); vmcs_writel(TPR_THRESHOLD, 0);
#endif #endif
vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK); vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
vcpu->cr0 = 0x60000010; vcpu->cr0 = 0x60000010;
...@@ -1190,7 +1256,7 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) ...@@ -1190,7 +1256,7 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
u16 sp = vmcs_readl(GUEST_RSP); u16 sp = vmcs_readl(GUEST_RSP);
u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT); u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT);
if (sp > ss_limit || sp - 6 > sp) { if (sp > ss_limit || sp < 6 ) {
vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n", vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
__FUNCTION__, __FUNCTION__,
vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RSP),
...@@ -1330,6 +1396,15 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1330,6 +1396,15 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
asm ("int $2"); asm ("int $2");
return 1; return 1;
} }
if (is_no_device(intr_info)) {
vcpu->fpu_active = 1;
vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
if (!(vcpu->cr0 & CR0_TS_MASK))
vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
return 1;
}
error_code = 0; error_code = 0;
rip = vmcs_readl(GUEST_RIP); rip = vmcs_readl(GUEST_RIP);
if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
...@@ -1355,7 +1430,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1355,7 +1430,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
case EMULATE_DONE: case EMULATE_DONE:
return 1; return 1;
case EMULATE_DO_MMIO: case EMULATE_DO_MMIO:
++kvm_stat.mmio_exits; ++vcpu->stat.mmio_exits;
kvm_run->exit_reason = KVM_EXIT_MMIO; kvm_run->exit_reason = KVM_EXIT_MMIO;
return 0; return 0;
case EMULATE_FAIL: case EMULATE_FAIL:
...@@ -1384,7 +1459,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1384,7 +1459,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_external_interrupt(struct kvm_vcpu *vcpu, static int handle_external_interrupt(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run) struct kvm_run *kvm_run)
{ {
++kvm_stat.irq_exits; ++vcpu->stat.irq_exits;
return 1; return 1;
} }
...@@ -1394,7 +1469,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1394,7 +1469,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0; return 0;
} }
static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count)
{ {
u64 inst; u64 inst;
gva_t rip; gva_t rip;
...@@ -1439,33 +1514,35 @@ static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) ...@@ -1439,33 +1514,35 @@ static int get_io_count(struct kvm_vcpu *vcpu, u64 *count)
done: done:
countr_size *= 8; countr_size *= 8;
*count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size));
//printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]);
return 1; return 1;
} }
static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
u64 exit_qualification; u64 exit_qualification;
int size, down, in, string, rep;
unsigned port;
unsigned long count;
gva_t address;
++kvm_stat.io_exits; ++vcpu->stat.io_exits;
exit_qualification = vmcs_read64(EXIT_QUALIFICATION); exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
kvm_run->exit_reason = KVM_EXIT_IO; in = (exit_qualification & 8) != 0;
if (exit_qualification & 8) size = (exit_qualification & 7) + 1;
kvm_run->io.direction = KVM_EXIT_IO_IN; string = (exit_qualification & 16) != 0;
else down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
kvm_run->io.direction = KVM_EXIT_IO_OUT; count = 1;
kvm_run->io.size = (exit_qualification & 7) + 1; rep = (exit_qualification & 32) != 0;
kvm_run->io.string = (exit_qualification & 16) != 0; port = exit_qualification >> 16;
kvm_run->io.string_down address = 0;
= (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; if (string) {
kvm_run->io.rep = (exit_qualification & 32) != 0; if (rep && !get_io_count(vcpu, &count))
kvm_run->io.port = exit_qualification >> 16;
if (kvm_run->io.string) {
if (!get_io_count(vcpu, &kvm_run->io.count))
return 1; return 1;
kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); address = vmcs_readl(GUEST_LINEAR_ADDRESS);
} else }
kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down,
return 0; address, rep, port);
} }
static void static void
...@@ -1514,6 +1591,15 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1514,6 +1591,15 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1; return 1;
}; };
break; break;
case 2: /* clts */
vcpu_load_rsp_rip(vcpu);
vcpu->fpu_active = 1;
vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
vcpu->cr0 &= ~CR0_TS_MASK;
vmcs_writel(CR0_READ_SHADOW, vcpu->cr0);
skip_emulated_instruction(vcpu);
return 1;
case 1: /*mov from cr*/ case 1: /*mov from cr*/
switch (cr) { switch (cr) {
case 3: case 3:
...@@ -1523,8 +1609,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1523,8 +1609,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
skip_emulated_instruction(vcpu); skip_emulated_instruction(vcpu);
return 1; return 1;
case 8: case 8:
printk(KERN_DEBUG "handle_cr: read CR8 "
"cpu erratum AA15\n");
vcpu_load_rsp_rip(vcpu); vcpu_load_rsp_rip(vcpu);
vcpu->regs[reg] = vcpu->cr8; vcpu->regs[reg] = vcpu->cr8;
vcpu_put_rsp_rip(vcpu); vcpu_put_rsp_rip(vcpu);
...@@ -1583,8 +1667,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1583,8 +1667,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
kvm_run->exit_reason = KVM_EXIT_CPUID; kvm_emulate_cpuid(vcpu);
return 0; return 1;
} }
static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
...@@ -1639,7 +1723,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, ...@@ -1639,7 +1723,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
if (kvm_run->request_interrupt_window && if (kvm_run->request_interrupt_window &&
!vcpu->irq_summary) { !vcpu->irq_summary) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++kvm_stat.irq_window_exits; ++vcpu->stat.irq_window_exits;
return 0; return 0;
} }
return 1; return 1;
...@@ -1652,13 +1736,13 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1652,13 +1736,13 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1; return 1;
kvm_run->exit_reason = KVM_EXIT_HLT; kvm_run->exit_reason = KVM_EXIT_HLT;
++kvm_stat.halt_exits; ++vcpu->stat.halt_exits;
return 0; return 0;
} }
static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3); skip_emulated_instruction(vcpu);
return kvm_hypercall(vcpu, kvm_run); return kvm_hypercall(vcpu, kvm_run);
} }
...@@ -1699,7 +1783,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ...@@ -1699,7 +1783,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
exit_reason != EXIT_REASON_EXCEPTION_NMI ) exit_reason != EXIT_REASON_EXCEPTION_NMI )
printk(KERN_WARNING "%s: unexpected, valid vectoring info and " printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
"exit reason is 0x%x\n", __FUNCTION__, exit_reason); "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
if (exit_reason < kvm_vmx_max_exit_handlers if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason]) && kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
...@@ -1763,11 +1846,21 @@ again: ...@@ -1763,11 +1846,21 @@ again:
if (vcpu->guest_debug.enabled) if (vcpu->guest_debug.enabled)
kvm_guest_debug_pre(vcpu); kvm_guest_debug_pre(vcpu);
fx_save(vcpu->host_fx_image); if (vcpu->fpu_active) {
fx_restore(vcpu->guest_fx_image); fx_save(vcpu->host_fx_image);
fx_restore(vcpu->guest_fx_image);
}
/*
* Loading guest fpu may have cleared host cr0.ts
*/
vmcs_writel(HOST_CR0, read_cr0());
save_msrs(vcpu->host_msrs, vcpu->nmsrs); #ifdef CONFIG_X86_64
load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); if (is_long_mode(vcpu)) {
save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1);
load_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
}
#endif
asm ( asm (
/* Store host registers */ /* Store host registers */
...@@ -1909,21 +2002,28 @@ again: ...@@ -1909,21 +2002,28 @@ again:
reload_tss(); reload_tss();
} }
++kvm_stat.exits; ++vcpu->stat.exits;
save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); #ifdef CONFIG_X86_64
load_msrs(vcpu->host_msrs, NR_BAD_MSRS); if (is_long_mode(vcpu)) {
save_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
load_msrs(vcpu->host_msrs, NR_BAD_MSRS);
}
#endif
if (vcpu->fpu_active) {
fx_save(vcpu->guest_fx_image);
fx_restore(vcpu->host_fx_image);
}
fx_save(vcpu->guest_fx_image);
fx_restore(vcpu->host_fx_image);
vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
kvm_run->exit_type = 0;
if (fail) { if (fail) {
kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); kvm_run->fail_entry.hardware_entry_failure_reason
= vmcs_read32(VM_INSTRUCTION_ERROR);
r = 0; r = 0;
} else { } else {
/* /*
...@@ -1933,19 +2033,20 @@ again: ...@@ -1933,19 +2033,20 @@ again:
profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
vcpu->launched = 1; vcpu->launched = 1;
kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT;
r = kvm_handle_exit(kvm_run, vcpu); r = kvm_handle_exit(kvm_run, vcpu);
if (r > 0) { if (r > 0) {
/* Give scheduler a change to reschedule. */ /* Give scheduler a change to reschedule. */
if (signal_pending(current)) { if (signal_pending(current)) {
++kvm_stat.signal_exits; ++vcpu->stat.signal_exits;
post_kvm_run_save(vcpu, kvm_run); post_kvm_run_save(vcpu, kvm_run);
kvm_run->exit_reason = KVM_EXIT_INTR;
return -EINTR; return -EINTR;
} }
if (dm_request_for_irq_injection(vcpu, kvm_run)) { if (dm_request_for_irq_injection(vcpu, kvm_run)) {
++kvm_stat.request_irq_exits; ++vcpu->stat.request_irq_exits;
post_kvm_run_save(vcpu, kvm_run); post_kvm_run_save(vcpu, kvm_run);
kvm_run->exit_reason = KVM_EXIT_INTR;
return -EINTR; return -EINTR;
} }
...@@ -1969,7 +2070,7 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, ...@@ -1969,7 +2070,7 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
{ {
u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
++kvm_stat.pf_guest; ++vcpu->stat.pf_guest;
if (is_page_fault(vect_info)) { if (is_page_fault(vect_info)) {
printk(KERN_DEBUG "inject_page_fault: " printk(KERN_DEBUG "inject_page_fault: "
...@@ -2026,6 +2127,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -2026,6 +2127,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
vmcs_clear(vmcs); vmcs_clear(vmcs);
vcpu->vmcs = vmcs; vcpu->vmcs = vmcs;
vcpu->launched = 0; vcpu->launched = 0;
vcpu->fpu_active = 1;
return 0; return 0;
...@@ -2062,9 +2164,8 @@ static struct kvm_arch_ops vmx_arch_ops = { ...@@ -2062,9 +2164,8 @@ static struct kvm_arch_ops vmx_arch_ops = {
.get_segment = vmx_get_segment, .get_segment = vmx_get_segment,
.set_segment = vmx_set_segment, .set_segment = vmx_set_segment,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits, .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
.set_cr0 = vmx_set_cr0, .set_cr0 = vmx_set_cr0,
.set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch,
.set_cr3 = vmx_set_cr3, .set_cr3 = vmx_set_cr3,
.set_cr4 = vmx_set_cr4, .set_cr4 = vmx_set_cr4,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
...@@ -833,8 +833,9 @@ done_prefixes: ...@@ -833,8 +833,9 @@ done_prefixes:
dst.ptr = (unsigned long *)cr2; dst.ptr = (unsigned long *)cr2;
dst.bytes = (d & ByteOp) ? 1 : op_bytes; dst.bytes = (d & ByteOp) ? 1 : op_bytes;
if (d & BitOp) { if (d & BitOp) {
dst.ptr += src.val / BITS_PER_LONG; unsigned long mask = ~(dst.bytes * 8 - 1);
dst.bytes = sizeof(long);
dst.ptr = (void *)dst.ptr + (src.val & mask) / 8;
} }
if (!(d & Mov) && /* optimisation - avoid slow emulated read */ if (!(d & Mov) && /* optimisation - avoid slow emulated read */
((rc = ops->read_emulated((unsigned long)dst.ptr, ((rc = ops->read_emulated((unsigned long)dst.ptr,
...@@ -1044,7 +1045,7 @@ done_prefixes: ...@@ -1044,7 +1045,7 @@ done_prefixes:
if ((rc = ops->write_std( if ((rc = ops->write_std(
register_address(ctxt->ss_base, register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]), _regs[VCPU_REGS_RSP]),
dst.val, dst.bytes, ctxt)) != 0) &dst.val, dst.bytes, ctxt)) != 0)
goto done; goto done;
dst.val = dst.orig_val; /* skanky: disable writeback */ dst.val = dst.orig_val; /* skanky: disable writeback */
break; break;
...@@ -1077,12 +1078,12 @@ writeback: ...@@ -1077,12 +1078,12 @@ writeback:
case OP_MEM: case OP_MEM:
if (lock_prefix) if (lock_prefix)
rc = ops->cmpxchg_emulated((unsigned long)dst. rc = ops->cmpxchg_emulated((unsigned long)dst.
ptr, dst.orig_val, ptr, &dst.orig_val,
dst.val, dst.bytes, &dst.val, dst.bytes,
ctxt); ctxt);
else else
rc = ops->write_emulated((unsigned long)dst.ptr, rc = ops->write_emulated((unsigned long)dst.ptr,
dst.val, dst.bytes, &dst.val, dst.bytes,
ctxt); ctxt);
if (rc != 0) if (rc != 0)
goto done; goto done;
...@@ -1320,36 +1321,8 @@ twobyte_special_insn: ...@@ -1320,36 +1321,8 @@ twobyte_special_insn:
realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags); realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags);
break; break;
case 0xc7: /* Grp9 (cmpxchg8b) */ case 0xc7: /* Grp9 (cmpxchg8b) */
#if defined(__i386__)
{
unsigned long old_lo, old_hi;
if (((rc = ops->read_emulated(cr2 + 0, &old_lo, 4,
ctxt)) != 0)
|| ((rc = ops->read_emulated(cr2 + 4, &old_hi, 4,
ctxt)) != 0))
goto done;
if ((old_lo != _regs[VCPU_REGS_RAX])
|| (old_hi != _regs[VCPU_REGS_RDX])) {
_regs[VCPU_REGS_RAX] = old_lo;
_regs[VCPU_REGS_RDX] = old_hi;
_eflags &= ~EFLG_ZF;
} else if (ops->cmpxchg8b_emulated == NULL) {
rc = X86EMUL_UNHANDLEABLE;
goto done;
} else {
if ((rc = ops->cmpxchg8b_emulated(cr2, old_lo,
old_hi,
_regs[VCPU_REGS_RBX],
_regs[VCPU_REGS_RCX],
ctxt)) != 0)
goto done;
_eflags |= EFLG_ZF;
}
break;
}
#elif defined(CONFIG_X86_64)
{ {
unsigned long old, new; u64 old, new;
if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0) if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0)
goto done; goto done;
if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) || if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) ||
...@@ -1358,15 +1331,15 @@ twobyte_special_insn: ...@@ -1358,15 +1331,15 @@ twobyte_special_insn:
_regs[VCPU_REGS_RDX] = (u32) (old >> 32); _regs[VCPU_REGS_RDX] = (u32) (old >> 32);
_eflags &= ~EFLG_ZF; _eflags &= ~EFLG_ZF;
} else { } else {
new = (_regs[VCPU_REGS_RCX] << 32) | (u32) _regs[VCPU_REGS_RBX]; new = ((u64)_regs[VCPU_REGS_RCX] << 32)
if ((rc = ops->cmpxchg_emulated(cr2, old, | (u32) _regs[VCPU_REGS_RBX];
new, 8, ctxt)) != 0) if ((rc = ops->cmpxchg_emulated(cr2, &old,
&new, 8, ctxt)) != 0)
goto done; goto done;
_eflags |= EFLG_ZF; _eflags |= EFLG_ZF;
} }
break; break;
} }
#endif
} }
goto writeback; goto writeback;
......
...@@ -59,8 +59,7 @@ struct x86_emulate_ops { ...@@ -59,8 +59,7 @@ struct x86_emulate_ops {
* @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory. * @bytes: [IN ] Number of bytes to read from memory.
*/ */
int (*read_std)(unsigned long addr, int (*read_std)(unsigned long addr, void *val,
unsigned long *val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt); unsigned int bytes, struct x86_emulate_ctxt * ctxt);
/* /*
...@@ -71,8 +70,7 @@ struct x86_emulate_ops { ...@@ -71,8 +70,7 @@ struct x86_emulate_ops {
* required). * required).
* @bytes: [IN ] Number of bytes to write to memory. * @bytes: [IN ] Number of bytes to write to memory.
*/ */
int (*write_std)(unsigned long addr, int (*write_std)(unsigned long addr, const void *val,
unsigned long val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt); unsigned int bytes, struct x86_emulate_ctxt * ctxt);
/* /*
...@@ -82,7 +80,7 @@ struct x86_emulate_ops { ...@@ -82,7 +80,7 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to read from memory. * @bytes: [IN ] Number of bytes to read from memory.
*/ */
int (*read_emulated) (unsigned long addr, int (*read_emulated) (unsigned long addr,
unsigned long *val, void *val,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt * ctxt); struct x86_emulate_ctxt * ctxt);
...@@ -94,7 +92,7 @@ struct x86_emulate_ops { ...@@ -94,7 +92,7 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to write to memory. * @bytes: [IN ] Number of bytes to write to memory.
*/ */
int (*write_emulated) (unsigned long addr, int (*write_emulated) (unsigned long addr,
unsigned long val, const void *val,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt * ctxt); struct x86_emulate_ctxt * ctxt);
...@@ -107,29 +105,11 @@ struct x86_emulate_ops { ...@@ -107,29 +105,11 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to access using CMPXCHG. * @bytes: [IN ] Number of bytes to access using CMPXCHG.
*/ */
int (*cmpxchg_emulated) (unsigned long addr, int (*cmpxchg_emulated) (unsigned long addr,
unsigned long old, const void *old,
unsigned long new, const void *new,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt * ctxt); struct x86_emulate_ctxt * ctxt);
/*
* cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
* emulated/special memory area.
* @addr: [IN ] Linear address to access.
* @old: [IN ] Value expected to be current at @addr.
* @new: [IN ] Value to write to @addr.
* NOTES:
* 1. This function is only ever called when emulating a real CMPXCHG8B.
* 2. This function is *never* called on x86/64 systems.
* 2. Not defining this function (i.e., specifying NULL) is equivalent
* to defining a function that always returns X86EMUL_UNHANDLEABLE.
*/
int (*cmpxchg8b_emulated) (unsigned long addr,
unsigned long old_lo,
unsigned long old_hi,
unsigned long new_lo,
unsigned long new_hi,
struct x86_emulate_ctxt * ctxt);
}; };
struct cpu_user_regs; struct cpu_user_regs;
......
...@@ -96,6 +96,7 @@ header-y += iso_fs.h ...@@ -96,6 +96,7 @@ header-y += iso_fs.h
header-y += ixjuser.h header-y += ixjuser.h
header-y += jffs2.h header-y += jffs2.h
header-y += keyctl.h header-y += keyctl.h
header-y += kvm.h
header-y += limits.h header-y += limits.h
header-y += lock_dlm_plock.h header-y += lock_dlm_plock.h
header-y += magic.h header-y += magic.h
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#include <asm/types.h> #include <asm/types.h>
#include <linux/ioctl.h> #include <linux/ioctl.h>
#define KVM_API_VERSION 4 #define KVM_API_VERSION 12
/* /*
* Architectural interrupt line count, and the size of the bitmap needed * Architectural interrupt line count, and the size of the bitmap needed
...@@ -33,37 +33,39 @@ struct kvm_memory_region { ...@@ -33,37 +33,39 @@ struct kvm_memory_region {
/* for kvm_memory_region::flags */ /* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL #define KVM_MEM_LOG_DIRTY_PAGES 1UL
struct kvm_memory_alias {
#define KVM_EXIT_TYPE_FAIL_ENTRY 1 __u32 slot; /* this has a different namespace than memory slots */
#define KVM_EXIT_TYPE_VM_EXIT 2 __u32 flags;
__u64 guest_phys_addr;
__u64 memory_size;
__u64 target_phys_addr;
};
enum kvm_exit_reason { enum kvm_exit_reason {
KVM_EXIT_UNKNOWN = 0, KVM_EXIT_UNKNOWN = 0,
KVM_EXIT_EXCEPTION = 1, KVM_EXIT_EXCEPTION = 1,
KVM_EXIT_IO = 2, KVM_EXIT_IO = 2,
KVM_EXIT_CPUID = 3, KVM_EXIT_HYPERCALL = 3,
KVM_EXIT_DEBUG = 4, KVM_EXIT_DEBUG = 4,
KVM_EXIT_HLT = 5, KVM_EXIT_HLT = 5,
KVM_EXIT_MMIO = 6, KVM_EXIT_MMIO = 6,
KVM_EXIT_IRQ_WINDOW_OPEN = 7, KVM_EXIT_IRQ_WINDOW_OPEN = 7,
KVM_EXIT_SHUTDOWN = 8, KVM_EXIT_SHUTDOWN = 8,
KVM_EXIT_FAIL_ENTRY = 9,
KVM_EXIT_INTR = 10,
}; };
/* for KVM_RUN */ /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run { struct kvm_run {
/* in */ /* in */
__u32 emulated; /* skip current instruction */
__u32 mmio_completed; /* mmio request completed */
__u8 request_interrupt_window; __u8 request_interrupt_window;
__u8 padding1[7]; __u8 padding1[7];
/* out */ /* out */
__u32 exit_type;
__u32 exit_reason; __u32 exit_reason;
__u32 instruction_length;
__u8 ready_for_interrupt_injection; __u8 ready_for_interrupt_injection;
__u8 if_flag; __u8 if_flag;
__u16 padding2; __u8 padding2[2];
/* in (pre_kvm_run), out (post_kvm_run) */ /* in (pre_kvm_run), out (post_kvm_run) */
__u64 cr8; __u64 cr8;
...@@ -72,29 +74,26 @@ struct kvm_run { ...@@ -72,29 +74,26 @@ struct kvm_run {
union { union {
/* KVM_EXIT_UNKNOWN */ /* KVM_EXIT_UNKNOWN */
struct { struct {
__u32 hardware_exit_reason; __u64 hardware_exit_reason;
} hw; } hw;
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
} fail_entry;
/* KVM_EXIT_EXCEPTION */ /* KVM_EXIT_EXCEPTION */
struct { struct {
__u32 exception; __u32 exception;
__u32 error_code; __u32 error_code;
} ex; } ex;
/* KVM_EXIT_IO */ /* KVM_EXIT_IO */
struct { struct kvm_io {
#define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_IN 0
#define KVM_EXIT_IO_OUT 1 #define KVM_EXIT_IO_OUT 1
__u8 direction; __u8 direction;
__u8 size; /* bytes */ __u8 size; /* bytes */
__u8 string;
__u8 string_down;
__u8 rep;
__u8 pad;
__u16 port; __u16 port;
__u64 count; __u32 count;
union { __u64 data_offset; /* relative to kvm_run start */
__u64 address;
__u32 value;
};
} io; } io;
struct { struct {
} debug; } debug;
...@@ -105,6 +104,13 @@ struct kvm_run { ...@@ -105,6 +104,13 @@ struct kvm_run {
__u32 len; __u32 len;
__u8 is_write; __u8 is_write;
} mmio; } mmio;
/* KVM_EXIT_HYPERCALL */
struct {
__u64 args[6];
__u64 ret;
__u32 longmode;
__u32 pad;
} hypercall;
}; };
}; };
...@@ -118,6 +124,21 @@ struct kvm_regs { ...@@ -118,6 +124,21 @@ struct kvm_regs {
__u64 rip, rflags; __u64 rip, rflags;
}; };
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
__u16 fcw;
__u16 fsw;
__u8 ftwx; /* in fxsave format */
__u8 pad1;
__u16 last_opcode;
__u64 last_ip;
__u64 last_dp;
__u8 xmm[16][16];
__u32 mxcsr;
__u32 pad2;
};
struct kvm_segment { struct kvm_segment {
__u64 base; __u64 base;
__u32 limit; __u32 limit;
...@@ -210,38 +231,74 @@ struct kvm_dirty_log { ...@@ -210,38 +231,74 @@ struct kvm_dirty_log {
}; };
}; };
struct kvm_cpuid_entry {
__u32 function;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding;
};
/* for KVM_SET_CPUID */
struct kvm_cpuid {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry entries[0];
};
/* for KVM_SET_SIGNAL_MASK */
struct kvm_signal_mask {
__u32 len;
__u8 sigset[0];
};
#define KVMIO 0xAE #define KVMIO 0xAE
/* /*
* ioctls for /dev/kvm fds: * ioctls for /dev/kvm fds:
*/ */
#define KVM_GET_API_VERSION _IO(KVMIO, 1) #define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
#define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */ #define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */
#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list)
/*
* Check if a kvm extension is available. Argument is extension number,
* return is 1 (yes) or 0 (no, sorry).
*/
#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03)
/*
* Get size for mmap(vcpu_fd)
*/
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
/* /*
* ioctls for VM fds * ioctls for VM fds
*/ */
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region) #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
/* /*
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
* a vcpu fd. * a vcpu fd.
*/ */
#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int) #define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
/* /*
* ioctls for vcpu fds * ioctls for vcpu fds
*/ */
#define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) #define KVM_RUN _IO(KVMIO, 0x80)
#define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs) #define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs)
#define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) #define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs)
#define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs) #define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs)
#define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs) #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
#define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
#define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
#define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) #define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
#define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask)
#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu)
#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
#endif #endif
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define TUN_MINOR 200 #define TUN_MINOR 200
#define HPET_MINOR 228 #define HPET_MINOR 228
#define KVM_MINOR 232
struct device; struct device;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment