Commit 7e4e4056 authored by Joerg Roedel's avatar Joerg Roedel Committed by Avi Kivity

KVM: MMU: shadow support for 1gb pages

This patch adds support for shadow paging to the 1gb page table code in KVM.
With this code the guest can use 1gb pages even if the host does not support
them.

[ Marcelo: fix shadow page collision on pmd level if a guest 1gb page is mapped
           with 4kb ptes on host level ]
Signed-off-by: default avatarJoerg Roedel <joerg.roedel@amd.com>
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent e04da980
...@@ -315,7 +315,6 @@ struct kvm_vcpu_arch { ...@@ -315,7 +315,6 @@ struct kvm_vcpu_arch {
struct { struct {
gfn_t gfn; /* presumed gfn during guest pte update */ gfn_t gfn; /* presumed gfn during guest pte update */
pfn_t pfn; /* pfn corresponding to that gfn */ pfn_t pfn; /* pfn corresponding to that gfn */
int level;
unsigned long mmu_seq; unsigned long mmu_seq;
} update_pte; } update_pte;
......
...@@ -2478,12 +2478,9 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, ...@@ -2478,12 +2478,9 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
const void *new) const void *new)
{ {
if (sp->role.level != PT_PAGE_TABLE_LEVEL) { if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
if (vcpu->arch.update_pte.level == PT_PAGE_TABLE_LEVEL ||
sp->role.glevels == PT32_ROOT_LEVEL) {
++vcpu->kvm->stat.mmu_pde_zapped; ++vcpu->kvm->stat.mmu_pde_zapped;
return; return;
} }
}
++vcpu->kvm->stat.mmu_pte_updated; ++vcpu->kvm->stat.mmu_pte_updated;
if (sp->role.glevels == PT32_ROOT_LEVEL) if (sp->role.glevels == PT32_ROOT_LEVEL)
...@@ -2528,8 +2525,6 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -2528,8 +2525,6 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
u64 gpte = 0; u64 gpte = 0;
pfn_t pfn; pfn_t pfn;
vcpu->arch.update_pte.level = PT_PAGE_TABLE_LEVEL;
if (bytes != 4 && bytes != 8) if (bytes != 4 && bytes != 8)
return; return;
...@@ -2557,11 +2552,6 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -2557,11 +2552,6 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
return; return;
gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
if (is_large_pte(gpte) &&
(mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL)) {
gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
vcpu->arch.update_pte.level = PT_DIRECTORY_LEVEL;
}
vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb(); smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn); pfn = gfn_to_pfn(vcpu->kvm, gfn);
......
...@@ -256,7 +256,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, ...@@ -256,7 +256,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
pt_element_t gpte; pt_element_t gpte;
unsigned pte_access; unsigned pte_access;
pfn_t pfn; pfn_t pfn;
int level = vcpu->arch.update_pte.level;
gpte = *(const pt_element_t *)pte; gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
...@@ -275,7 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, ...@@ -275,7 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
return; return;
kvm_get_pfn(pfn); kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, level, gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
gpte_to_gfn(gpte), pfn, true); gpte_to_gfn(gpte), pfn, true);
} }
...@@ -284,7 +283,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, ...@@ -284,7 +283,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
*/ */
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw, struct guest_walker *gw,
int user_fault, int write_fault, int largepage, int user_fault, int write_fault, int hlevel,
int *ptwrite, pfn_t pfn) int *ptwrite, pfn_t pfn)
{ {
unsigned access = gw->pt_access; unsigned access = gw->pt_access;
...@@ -303,8 +302,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -303,8 +302,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
for_each_shadow_entry(vcpu, addr, iterator) { for_each_shadow_entry(vcpu, addr, iterator) {
level = iterator.level; level = iterator.level;
sptep = iterator.sptep; sptep = iterator.sptep;
if (level == PT_PAGE_TABLE_LEVEL if (iterator.level == hlevel) {
|| (largepage && level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, sptep, access, mmu_set_spte(vcpu, sptep, access,
gw->pte_access & access, gw->pte_access & access,
user_fault, write_fault, user_fault, write_fault,
...@@ -323,12 +321,15 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -323,12 +321,15 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
kvm_flush_remote_tlbs(vcpu->kvm); kvm_flush_remote_tlbs(vcpu->kvm);
} }
if (level == PT_DIRECTORY_LEVEL if (level <= gw->level) {
&& gw->level == PT_DIRECTORY_LEVEL) { int delta = level - gw->level + 1;
direct = 1; direct = 1;
if (!is_dirty_gpte(gw->ptes[level - 1])) if (!is_dirty_gpte(gw->ptes[level - delta]))
access &= ~ACC_WRITE_MASK; access &= ~ACC_WRITE_MASK;
table_gfn = gpte_to_gfn(gw->ptes[level - 1]); table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
/* advance table_gfn when emulating 1gb pages with 4k */
if (delta == 0)
table_gfn += PT_INDEX(addr, level);
} else { } else {
direct = 0; direct = 0;
table_gfn = gw->table_gfn[level - 2]; table_gfn = gw->table_gfn[level - 2];
...@@ -381,7 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -381,7 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
int write_pt = 0; int write_pt = 0;
int r; int r;
pfn_t pfn; pfn_t pfn;
int largepage = 0; int level = PT_PAGE_TABLE_LEVEL;
unsigned long mmu_seq; unsigned long mmu_seq;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
...@@ -407,15 +408,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -407,15 +408,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
return 0; return 0;
} }
if (walker.level == PT_DIRECTORY_LEVEL) { if (walker.level >= PT_DIRECTORY_LEVEL) {
gfn_t large_gfn; level = min(walker.level, mapping_level(vcpu, walker.gfn));
large_gfn = walker.gfn & walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
if (mapping_level(vcpu, large_gfn) == PT_DIRECTORY_LEVEL) {
walker.gfn = large_gfn;
largepage = 1;
}
} }
mmu_seq = vcpu->kvm->mmu_notifier_seq; mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb(); smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
...@@ -432,8 +429,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -432,8 +429,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
goto out_unlock; goto out_unlock;
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
largepage, &write_pt, pfn); level, &write_pt, pfn);
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
sptep, *sptep, write_pt); sptep, *sptep, write_pt);
...@@ -469,7 +465,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) ...@@ -469,7 +465,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
/* FIXME: properly handle invlpg on large guest pages */ /* FIXME: properly handle invlpg on large guest pages */
if (level == PT_PAGE_TABLE_LEVEL || if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
struct kvm_mmu_page *sp = page_header(__pa(sptep)); struct kvm_mmu_page *sp = page_header(__pa(sptep));
pte_gpa = (sp->gfn << PAGE_SHIFT); pte_gpa = (sp->gfn << PAGE_SHIFT);
...@@ -599,7 +596,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ...@@ -599,7 +596,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++; nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_gpte(gpte), 0, gfn, is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
spte_to_pfn(sp->spt[i]), true, false); spte_to_pfn(sp->spt[i]), true, false);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment