Commit 7993ba43 authored by Avi Kivity's avatar Avi Kivity Committed by Linus Torvalds

[PATCH] KVM: MMU: Perform access checks in walk_addr()

Check pte permission bits in walk_addr(), instead of scattering the checks all
over the code.  This has the following benefits:

1. We no longer set the accessed bit for accessed which fail permission checks.
2. Setting the accessed bit is simplified.
3. Under some circumstances, we used to pretend a page fault was fixed when
   it would actually fail the access checks.  This caused an unnecessary
   vmexit.
4. The error code for guest page faults is now correct.

The fix helps netbsd further along booting, and allows kvm to pass the new mmu
testsuite.
Signed-off-by: default avatarAvi Kivity <avi@qumranet.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 6f00e68f
...@@ -992,16 +992,6 @@ static inline int fix_read_pf(u64 *shadow_ent) ...@@ -992,16 +992,6 @@ static inline int fix_read_pf(u64 *shadow_ent)
return 0; return 0;
} }
static int may_access(u64 pte, int write, int user)
{
if (user && !(pte & PT_USER_MASK))
return 0;
if (write && !(pte & PT_WRITABLE_MASK))
return 0;
return 1;
}
static void paging_free(struct kvm_vcpu *vcpu) static void paging_free(struct kvm_vcpu *vcpu)
{ {
nonpaging_free(vcpu); nonpaging_free(vcpu);
......
...@@ -63,13 +63,15 @@ struct guest_walker { ...@@ -63,13 +63,15 @@ struct guest_walker {
pt_element_t *ptep; pt_element_t *ptep;
pt_element_t inherited_ar; pt_element_t inherited_ar;
gfn_t gfn; gfn_t gfn;
u32 error_code;
}; };
/* /*
* Fetch a guest pte for a guest virtual address * Fetch a guest pte for a guest virtual address
*/ */
static void FNAME(walk_addr)(struct guest_walker *walker, static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr) struct kvm_vcpu *vcpu, gva_t addr,
int write_fault, int user_fault)
{ {
hpa_t hpa; hpa_t hpa;
struct kvm_memory_slot *slot; struct kvm_memory_slot *slot;
...@@ -86,7 +88,7 @@ static void FNAME(walk_addr)(struct guest_walker *walker, ...@@ -86,7 +88,7 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
root = *walker->ptep; root = *walker->ptep;
if (!(root & PT_PRESENT_MASK)) if (!(root & PT_PRESENT_MASK))
return; goto not_present;
--walker->level; --walker->level;
} }
#endif #endif
...@@ -111,11 +113,18 @@ static void FNAME(walk_addr)(struct guest_walker *walker, ...@@ -111,11 +113,18 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
ASSERT(((unsigned long)walker->table & PAGE_MASK) == ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
((unsigned long)ptep & PAGE_MASK)); ((unsigned long)ptep & PAGE_MASK));
if (is_present_pte(*ptep) && !(*ptep & PT_ACCESSED_MASK))
*ptep |= PT_ACCESSED_MASK;
if (!is_present_pte(*ptep)) if (!is_present_pte(*ptep))
break; goto not_present;
if (write_fault && !is_writeble_pte(*ptep))
if (user_fault || is_write_protection(vcpu))
goto access_error;
if (user_fault && !(*ptep & PT_USER_MASK))
goto access_error;
if (!(*ptep & PT_ACCESSED_MASK))
*ptep |= PT_ACCESSED_MASK; /* avoid rmw */
if (walker->level == PT_PAGE_TABLE_LEVEL) { if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = (*ptep & PT_BASE_ADDR_MASK) walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
...@@ -146,6 +155,21 @@ static void FNAME(walk_addr)(struct guest_walker *walker, ...@@ -146,6 +155,21 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
} }
walker->ptep = ptep; walker->ptep = ptep;
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
return 1;
not_present:
walker->error_code = 0;
goto err;
access_error:
walker->error_code = PFERR_PRESENT_MASK;
err:
if (write_fault)
walker->error_code |= PFERR_WRITE_MASK;
if (user_fault)
walker->error_code |= PFERR_USER_MASK;
return 0;
} }
static void FNAME(release_walker)(struct guest_walker *walker) static void FNAME(release_walker)(struct guest_walker *walker)
...@@ -347,7 +371,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -347,7 +371,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
u32 error_code) u32 error_code)
{ {
int write_fault = error_code & PFERR_WRITE_MASK; int write_fault = error_code & PFERR_WRITE_MASK;
int pte_present = error_code & PFERR_PRESENT_MASK;
int user_fault = error_code & PFERR_USER_MASK; int user_fault = error_code & PFERR_USER_MASK;
struct guest_walker walker; struct guest_walker walker;
u64 *shadow_pte; u64 *shadow_pte;
...@@ -365,19 +388,19 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -365,19 +388,19 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
/* /*
* Look up the shadow pte for the faulting address. * Look up the shadow pte for the faulting address.
*/ */
FNAME(walk_addr)(&walker, vcpu, addr); r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault);
shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
/* /*
* The page is not mapped by the guest. Let the guest handle it. * The page is not mapped by the guest. Let the guest handle it.
*/ */
if (!shadow_pte) { if (!r) {
pgprintk("%s: not mapped\n", __FUNCTION__); pgprintk("%s: guest page fault\n", __FUNCTION__);
inject_page_fault(vcpu, addr, error_code); inject_page_fault(vcpu, addr, walker.error_code);
FNAME(release_walker)(&walker); FNAME(release_walker)(&walker);
return 0; return 0;
} }
shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
shadow_pte, *shadow_pte); shadow_pte, *shadow_pte);
...@@ -399,22 +422,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -399,22 +422,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
* mmio: emulate if accessible, otherwise its a guest fault. * mmio: emulate if accessible, otherwise its a guest fault.
*/ */
if (is_io_pte(*shadow_pte)) { if (is_io_pte(*shadow_pte)) {
if (may_access(*shadow_pte, write_fault, user_fault)) return 1;
return 1;
pgprintk("%s: io work, no access\n", __FUNCTION__);
inject_page_fault(vcpu, addr,
error_code | PFERR_PRESENT_MASK);
kvm_mmu_audit(vcpu, "post page fault (io)");
return 0;
}
/*
* pte not present, guest page fault.
*/
if (pte_present && !fixed && !write_pt) {
inject_page_fault(vcpu, addr, error_code);
kvm_mmu_audit(vcpu, "post page fault (guest)");
return 0;
} }
++kvm_stat.pf_fixed; ++kvm_stat.pf_fixed;
...@@ -429,7 +437,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) ...@@ -429,7 +437,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
pt_element_t guest_pte; pt_element_t guest_pte;
gpa_t gpa; gpa_t gpa;
FNAME(walk_addr)(&walker, vcpu, vaddr); FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0);
guest_pte = *walker.ptep; guest_pte = *walker.ptep;
FNAME(release_walker)(&walker); FNAME(release_walker)(&walker);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment