Commit 3c726f8d authored by Benjamin Herrenschmidt's avatar Benjamin Herrenschmidt Committed by Linus Torvalds

[PATCH] ppc64: support 64k pages

Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel
base page size to 64K.  The resulting kernel still boots on any
hardware.  On current machines with 4K pages support only, the kernel
will maintain 16 "subpages" for each 64K page transparently.

Note that while real 64K capable HW has been tested, the current patch
will not enable it yet as such hardware is not released yet, and I'm
still verifying with the firmware architects the proper to get the
information from the newer hypervisors.
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent f912696a
......@@ -603,6 +603,15 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
config PPC_64K_PAGES
bool "64k page size"
help
This option changes the kernel logical page size to 64k. On machines
without processor support for 64k pages, the kernel will simulate
them by loading each individual 4k page on demand transparently,
while on hardware with such support, it will be used to map
normal application pages.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on PPC64 && SMP
......
......@@ -125,6 +125,9 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_64K_PAGES
DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
#endif
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
......
......@@ -240,7 +240,7 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
{ /* Power5 */
{ /* Power5 GR */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003a0000,
.cpu_name = "POWER5 (gr)",
......@@ -255,7 +255,7 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
{ /* Power5 */
{ /* Power5 GS */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003b0000,
.cpu_name = "POWER5 (gs)",
......
This diff is collapsed.
......@@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
.xRanges = {
{ .xPages = HvPagesToMap,
.xOffset = 0,
.xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT),
.xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT),
},
},
};
......@@ -554,12 +554,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SLB)) {
unsigned long sp_vsid = get_kernel_vsid(sp);
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
sp_vsid <<= SLB_VSID_SHIFT;
sp_vsid |= SLB_VSID_KERNEL;
if (cpu_has_feature(CPU_FTR_16M_PAGE))
sp_vsid |= SLB_VSID_L;
sp_vsid |= SLB_VSID_KERNEL | llp;
p->thread.ksp_vsid = sp_vsid;
}
......
......@@ -724,10 +724,10 @@ static inline char *find_flat_dt_string(u32 offset)
* used to extract the memory informations at boot before we can
* unflatten the tree
*/
static int __init scan_flat_dt(int (*it)(unsigned long node,
const char *uname, int depth,
void *data),
void *data)
int __init of_scan_flat_dt(int (*it)(unsigned long node,
const char *uname, int depth,
void *data),
void *data)
{
unsigned long p = ((unsigned long)initial_boot_params) +
initial_boot_params->off_dt_struct;
......@@ -784,8 +784,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
* This function can be used within scan_flattened_dt callback to get
* access to properties
*/
static void* __init get_flat_dt_prop(unsigned long node, const char *name,
unsigned long *size)
void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
unsigned long *size)
{
unsigned long p = node;
......@@ -1087,7 +1087,7 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
char *type = get_flat_dt_prop(node, "device_type", NULL);
char *type = of_get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
unsigned long size = 0;
......@@ -1095,19 +1095,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
#ifdef CONFIG_PPC_PSERIES
/* On LPAR, look for the first ibm,pft-size property for the hash table size
*/
if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
u32 *pft_size;
pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL);
if (pft_size != NULL) {
/* pft_size[0] is the NUMA CEC cookie */
ppc64_pft_size = pft_size[1];
}
}
#endif
boot_cpuid = 0;
boot_cpuid_phys = 0;
if (initial_boot_params && initial_boot_params->version >= 2) {
......@@ -1117,8 +1104,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
} else {
/* Check if it's the boot-cpu, set it's hw index now */
if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
prop = get_flat_dt_prop(node, "reg", NULL);
if (of_get_flat_dt_prop(node,
"linux,boot-cpu", NULL) != NULL) {
prop = of_get_flat_dt_prop(node, "reg", NULL);
if (prop != NULL)
boot_cpuid_phys = *prop;
}
......@@ -1127,14 +1115,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size);
prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
}
/* Same goes for Apple's "altivec" property */
prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL);
prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
if (prop) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
......@@ -1147,7 +1135,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* this by looking at the size of the ibm,ppc-interrupt-server#s
* property
*/
prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
&size);
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
if (prop && ((size / sizeof(u32)) > 1))
......@@ -1170,7 +1158,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
return 0;
/* get platform type */
prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
#ifdef CONFIG_PPC64
......@@ -1183,21 +1171,21 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
#ifdef CONFIG_PPC64
/* check if iommu is forced on or off */
if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
iommu_is_off = 1;
if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
iommu_force_on = 1;
#endif
lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL);
lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
if (lprop)
memory_limit = *lprop;
#ifdef CONFIG_PPC64
lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
if (lprop)
tce_alloc_start = *lprop;
lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
if (lprop)
tce_alloc_end = *lprop;
#endif
......@@ -1209,9 +1197,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
{
u64 *basep, *entryp;
basep = get_flat_dt_prop(node, "linux,rtas-base", NULL);
entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL);
prop = get_flat_dt_prop(node, "linux,rtas-size", NULL);
basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
if (basep && entryp && prop) {
rtas.base = *basep;
rtas.entry = *entryp;
......@@ -1232,11 +1220,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
if (depth != 0)
return 0;
prop = get_flat_dt_prop(node, "#size-cells", NULL);
prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
dt_root_size_cells = (prop == NULL) ? 1 : *prop;
DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
prop = get_flat_dt_prop(node, "#address-cells", NULL);
prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
......@@ -1271,7 +1259,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
static int __init early_init_dt_scan_memory(unsigned long node,
const char *uname, int depth, void *data)
{
char *type = get_flat_dt_prop(node, "device_type", NULL);
char *type = of_get_flat_dt_prop(node, "device_type", NULL);
cell_t *reg, *endp;
unsigned long l;
......@@ -1279,7 +1267,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
if (type == NULL || strcmp(type, "memory") != 0)
return 0;
reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
......@@ -1343,12 +1331,12 @@ void __init early_init_devtree(void *params)
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
*/
scan_flat_dt(early_init_dt_scan_chosen, NULL);
of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
/* Scan memory nodes and rebuild LMBs */
lmb_init();
scan_flat_dt(early_init_dt_scan_root, NULL);
scan_flat_dt(early_init_dt_scan_memory, NULL);
of_scan_flat_dt(early_init_dt_scan_root, NULL);
of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
#ifdef CONFIG_PPC64
......@@ -1363,10 +1351,10 @@ void __init early_init_devtree(void *params)
DBG("Scanning CPUs ...\n");
/* Retreive hash table size from flattened tree plus other
* CPU related informations (altivec support, boot CPU ID, ...)
/* Retreive CPU related informations from the flat tree
* (altivec support, boot CPU ID, ...)
*/
scan_flat_dt(early_init_dt_scan_cpus, NULL);
of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
DBG(" <- early_init_devtree()\n");
}
......
......@@ -277,16 +277,21 @@ void __init early_setup(unsigned long dt_ptr)
DBG("Found, Initializing memory management...\n");
/*
* Initialize stab / SLB management
* Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before stab/slb initialization as
* this is currently where the page size encoding is obtained
*/
if (!firmware_has_feature(FW_FEATURE_ISERIES))
stab_initialize(lpaca->stab_real);
htab_initialize();
/*
* Initialize the MMU Hash table and create the linear mapping
* of memory
* Initialize stab / SLB management except on iSeries
*/
htab_initialize();
if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
if (cpu_has_feature(CPU_FTR_SLB))
slb_initialize();
else
stab_initialize(lpaca->stab_real);
}
DBG(" <- early_setup()\n");
}
......@@ -552,10 +557,12 @@ static void __init irqstack_early_init(void)
* SLB misses on them.
*/
for_each_cpu(i) {
softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
THREAD_SIZE, 0x10000000));
hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
THREAD_SIZE, 0x10000000));
softirq_ctx[i] = (struct thread_info *)
__va(lmb_alloc_base(THREAD_SIZE,
THREAD_SIZE, 0x10000000));
hardirq_ctx[i] = (struct thread_info *)
__va(lmb_alloc_base(THREAD_SIZE,
THREAD_SIZE, 0x10000000));
}
}
#else
......@@ -583,8 +590,8 @@ static void __init emergency_stack_init(void)
limit = min(0x10000000UL, lmb.rmo_size);
for_each_cpu(i)
paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128,
limit)) + PAGE_SIZE;
paca[i].emergency_sp =
__va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
}
/*
......
......@@ -11,7 +11,7 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
_GLOBAL(copy_page)
_GLOBAL(copy_4K_page)
std r31,-8(1)
std r30,-16(1)
std r29,-24(1)
......
......@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
std r4,-16(r1)
std r5,-8(r1)
dcbt 0,r4
beq .Lcopy_page
beq .Lcopy_page_4K
andi. r6,r6,7
mtcrf 0x01,r5
blt cr1,.Lshort_copy
......@@ -366,7 +366,7 @@ _GLOBAL(__copy_tofrom_user)
* above (following the .Ldst_aligned label) but it runs slightly
* slower on POWER3.
*/
.Lcopy_page:
.Lcopy_page_4K:
std r31,-32(1)
std r30,-40(1)
std r29,-48(1)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
pm = pmd_offset(pu, addr);
#ifdef CONFIG_PPC_64K_PAGES
/* Currently, we use the normal PTE offset within full
* size PTE pages, thus our huge PTEs are scattered in
* the PTE page and we do waste some. We may change
* that in the future, but the current mecanism keeps
* things much simpler
*/
if (!pmd_none(*pm)) {
/* Note: pte_offset_* are all equivalent on
* ppc64 as we don't have HIGHMEM
*/
pt = pte_offset_kernel(pm, addr);
return pt;
}
#else /* CONFIG_PPC_64K_PAGES */
/* On 4k pages, we put huge PTEs in the PMD page */
pt = (pte_t *)pm;
BUG_ON(!pmd_none(*pm)
&& !(pte_present(*pt) && pte_huge(*pt)));
return pt;
#endif /* CONFIG_PPC_64K_PAGES */
}
}
......@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
if (pu) {
pm = pmd_alloc(mm, pu, addr);
if (pm) {
#ifdef CONFIG_PPC_64K_PAGES
/* See comment in huge_pte_offset. Note that if we ever
* want to put the page size in the PMD, we would have
* to open code our own pte_alloc* function in order
* to populate and set the size atomically
*/
pt = pte_alloc_map(mm, pm, addr);
#else /* CONFIG_PPC_64K_PAGES */
pt = (pte_t *)pm;
BUG_ON(!pmd_none(*pm)
&& !(pte_present(*pt) && pte_huge(*pt)));
#endif /* CONFIG_PPC_64K_PAGES */
return pt;
}
}
......@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
return NULL;
}
#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
int i;
if (pte_present(*ptep)) {
pte_clear(mm, addr, ptep);
/* We open-code pte_clear because we need to pass the right
* argument to hpte_update (huge / !huge)
*/
unsigned long old = pte_update(ptep, ~0UL);
if (old & _PAGE_HASHPTE)
hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
flush_tlb_pending();
}
for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
ptep++;
}
*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
int i;
if (old & _PAGE_HASHPTE)
hpte_update(mm, addr, old, 0);
for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
ptep[i] = __pte(0);
hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
*ptep = __pte(0);
return __pte(old);
}
......@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
int lastshift;
u16 areamask, curareas;
if (HPAGE_SHIFT == 0)
return -EINVAL;
if (len & ~HPAGE_MASK)
return -EINVAL;
......@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local)
{
pte_t *ptep;
unsigned long va, vpn;
pte_t old_pte, new_pte;
unsigned long rflags, prpn;
unsigned long old_pte, new_pte;
unsigned long va, rflags, pa;
long slot;
int err = 1;
spin_lock(&mm->page_table_lock);
ptep = huge_pte_offset(mm, ea);
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> HPAGE_SHIFT;
/*
* If no pte found or not present, send the problem up to
......@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
if (unlikely(!ptep || pte_none(*ptep)))
goto out;
/* BUG_ON(pte_bad(*ptep)); */
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
......@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
*/
old_pte = *ptep;
new_pte = old_pte;
rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
do {
old_pte = pte_val(*ptep);
if (old_pte & _PAGE_BUSY)
goto out;
new_pte = old_pte | _PAGE_BUSY |
_PAGE_ACCESSED | _PAGE_HASHPTE;
} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
old_pte, new_pte));
rflags = 0x2 | (!(new_pte & _PAGE_RW));
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
/* Check if pte already has an hpte (case 2) */
if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
if (unlikely(old_pte & _PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot;
hash = hpt_hash(vpn, 1);
if (pte_val(old_pte) & _PAGE_SECONDARY)
hash = hpt_hash(va, HPAGE_SHIFT);
if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
slot += (old_pte & _PAGE_F_GIX) >> 12;
if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
old_pte &= ~_PAGE_HPTEFLAGS;
}
if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
unsigned long hash = hpt_hash(vpn, 1);
if (likely(!(old_pte & _PAGE_HASHPTE))) {
unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
unsigned long hpte_group;
prpn = pte_pfn(old_pte);
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
repeat:
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
/* Update the linux pte with the HPTE slot */
pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
pte_val(new_pte) |= _PAGE_HASHPTE;
/* clear HPTE slot informations in new PTE */
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
/* Add in WIMG bits */
/* XXX We should store these in the pte */
/* --BenH: I think they are ... */
rflags |= _PAGE_COHERENT;
slot = ppc_md.hpte_insert(hpte_group, va, prpn,
HPTE_V_LARGE, rflags);
/* Insert into the hash table, primary slot */
slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
mmu_huge_psize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
pte_val(new_pte) |= _PAGE_SECONDARY;
new_pte |= _PAGE_F_SECOND;
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
slot = ppc_md.hpte_insert(hpte_group, va, prpn,
HPTE_V_LARGE |
slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
HPTE_V_SECONDARY,
rflags);
mmu_huge_psize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
......@@ -726,20 +744,18 @@ repeat:
if (unlikely(slot == -2))
panic("hash_huge_page: pte_insert failed\n");
pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
/*
* No need to use ldarx/stdcx here because all who
* might be updating the pte will hold the
* page_table_lock
*/
*ptep = new_pte;
new_pte |= (slot << 12) & _PAGE_F_GIX;
}
/*
* No need to use ldarx/stdcx here because all who
* might be updating the pte will hold the
* page_table_lock
*/
*ptep = __pte(new_pte & ~_PAGE_BUSY);
err = 0;
out:
spin_unlock(&mm->page_table_lock);
return err;
}
......@@ -188,12 +188,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
memset(addr, 0, kmem_cache_size(cache));
}
#ifdef CONFIG_PPC_64K_PAGES
static const int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PGD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pte_pmd_cache", "pgd_cache",
};
#else
static const int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pgd_pte_cache", "pud_pmd_cache",
};
#endif /* CONFIG_PPC_64K_PAGES */
kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
......@@ -201,19 +210,14 @@ void pgtable_cache_init(void)
{
int i;
BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];
pgtable_cache[i] = kmem_cache_create(name,
size, size,
SLAB_HWCACHE_ALIGN
| SLAB_MUST_HWCACHE_ALIGN,
SLAB_HWCACHE_ALIGN |
SLAB_MUST_HWCACHE_ALIGN,
zero_ctor,
NULL);
if (! pgtable_cache[i])
......
......@@ -61,6 +61,9 @@ int init_bootmem_done;
int mem_init_done;
unsigned long memory_limit;
extern void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long access, unsigned long trap);
/*
* This is called by /dev/mem to know if a given address has to
* be mapped non-cacheable or not
......@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t pte)
{
/* handle i-cache coherency */
unsigned long pfn = pte_pfn(pte);
#ifdef CONFIG_PPC32
pmd_t *pmd;
#else
unsigned long vsid;
void *pgdir;
pte_t *ptep;
int local = 0;
cpumask_t tmp;
unsigned long flags;
#ifdef CONFIG_PPC_STD_MMU
unsigned long access = 0, trap;
#endif
unsigned long pfn = pte_pfn(pte);
/* handle i-cache coherency */
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
......@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte) || address >= TASK_SIZE)
return;
#ifdef CONFIG_PPC32
if (Hash == 0)
return;
pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
if (!pmd_none(*pmd))
add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
#else
pgdir = vma->vm_mm->pgd;
if (pgdir == NULL)
return;
ptep = find_linux_pte(pgdir, address);
if (!ptep)
/* We try to figure out if we are coming from an instruction
* access fault and pass that down to __hash_page so we avoid
* double-faulting on execution of fresh text. We have to test
* for regs NULL since init will get here first thing at boot
*
* We also avoid filling the hash if not coming from a fault
*/
if (current->thread.regs == NULL)
return;
vsid = get_vsid(vma->vm_mm->context.id, address);
local_irq_save(flags);
tmp = cpumask_of_cpu(smp_processor_id());
if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
local = 1;
__hash_page(address, 0, vsid, ptep, 0x300, local);
local_irq_restore(flags);
#endif
#endif
trap = TRAP(current->thread.regs);
if (trap == 0x400)
access |= _PAGE_EXEC;
else if (trap != 0x300)
return;
hash_preload(vma->vm_mm, address, access, trap);
#endif /* CONFIG_PPC_STD_MMU */
}
......@@ -101,7 +101,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long vsid;
if (mem_init_done) {
pgdp = pgd_offset_k(ea);
......@@ -117,28 +116,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
} else {
unsigned long va, vpn, hash, hpteg;
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
* entry in the hardware page table.
*
*/
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0xFFFFFFF);
vpn = va >> PAGE_SHIFT;
hash = hpt_hash(vpn, 0);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
/* Panic if a pte grpup is full */
if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
HPTE_V_BOLTED,
_PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
== -1) {
panic("map_io_page: could not insert mapping");
}
if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
mmu_virtual_psize))
panic("Can't map bolted IO mapping");
}
return 0;
}
......
......@@ -178,6 +178,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
bat_addrs[index].phys = phys;
}
/*
* Preload a translation in the hash table
*/
void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long access, unsigned long trap)
{
pmd_t *pmd;
if (Hash == 0)
return;
pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
if (!pmd_none(*pmd))
add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
}
/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
......
......@@ -14,14 +14,32 @@
* 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
#include <linux/config.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif
extern void slb_allocate(unsigned long ea);
extern void slb_allocate_realmode(unsigned long ea);
extern void slb_allocate_user(unsigned long ea);
static void slb_allocate(unsigned long ea)
{
/* Currently, we do real mode for all SLBs including user, but
* that will change if we bring back dynamic VSIDs
*/
slb_allocate_realmode(ea);
}
static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
{
......@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
{
/* If you change this make sure you change SLB_NUM_BOLTED
* appropriately too. */
unsigned long ksp_flags = SLB_VSID_KERNEL;
unsigned long linear_llp, virtual_llp, lflags, vflags;
unsigned long ksp_esid_data;
WARN_ON(!irqs_disabled());
if (cpu_has_feature(CPU_FTR_16M_PAGE))
ksp_flags |= SLB_VSID_L;
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
lflags = SLB_VSID_KERNEL | linear_llp;
vflags = SLB_VSID_KERNEL | virtual_llp;
ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
......@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
/* Slot 2 - kernel stack */
"slbmte %2,%3\n"
"isync"
:: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)),
:: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
"r"(mk_esid_data(VMALLOCBASE, 1)),
"r"(mk_vsid_data(ksp_esid_data, ksp_flags)),
"r"(mk_vsid_data(ksp_esid_data, lflags)),
"r"(ksp_esid_data)
: "memory");
}
......@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
get_paca()->slb_cache_ptr = 0;
get_paca()->context = mm->context;
#ifdef CONFIG_PPC_64K_PAGES
get_paca()->pgdir = mm->pgd;
#endif /* CONFIG_PPC_64K_PAGES */
/*
* preload some userspace segments into the SLB.
......@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
slb_allocate(unmapped_base);
}
static inline void patch_slb_encoding(unsigned int *insn_addr,
unsigned int immed)
{
/* Assume the instruction had a "0" immediate value, just
* "or" in the new value
*/
*insn_addr |= immed;
flush_icache_range((unsigned long)insn_addr, 4+
(unsigned long)insn_addr);
}
void slb_initialize(void)
{
unsigned long linear_llp, virtual_llp;
static int slb_encoding_inited;
extern unsigned int *slb_miss_kernel_load_linear;
extern unsigned int *slb_miss_kernel_load_virtual;
extern unsigned int *slb_miss_user_load_normal;
#ifdef CONFIG_HUGETLB_PAGE
extern unsigned int *slb_miss_user_load_huge;
unsigned long huge_llp;
huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
#endif
/* Prepare our SLB miss handler based on our page size */
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
if (!slb_encoding_inited) {
slb_encoding_inited = 1;
patch_slb_encoding(slb_miss_kernel_load_linear,
SLB_VSID_KERNEL | linear_llp);
patch_slb_encoding(slb_miss_kernel_load_virtual,
SLB_VSID_KERNEL | virtual_llp);
patch_slb_encoding(slb_miss_user_load_normal,
SLB_VSID_USER | virtual_llp);
DBG("SLB: linear LLP = %04x\n", linear_llp);
DBG("SLB: virtual LLP = %04x\n", virtual_llp);
#ifdef CONFIG_HUGETLB_PAGE
patch_slb_encoding(slb_miss_user_load_huge,
SLB_VSID_USER | huge_llp);
DBG("SLB: huge LLP = %04x\n", huge_llp);
#endif
}
/* On iSeries the bolted entries have already been set up by
* the hypervisor from the lparMap data in head.S */
#ifndef CONFIG_PPC_ISERIES
unsigned long flags = SLB_VSID_KERNEL;
{
unsigned long lflags, vflags;
/* Invalidate the entire SLB (even slot 0) & all the ERATS */
if (cpu_has_feature(CPU_FTR_16M_PAGE))
flags |= SLB_VSID_L;
lflags = SLB_VSID_KERNEL | linear_llp;
vflags = SLB_VSID_KERNEL | virtual_llp;
asm volatile("isync":::"memory");
asm volatile("slbmte %0,%0"::"r" (0) : "memory");
/* Invalidate the entire SLB (even slot 0) & all the ERATS */
asm volatile("isync":::"memory");
asm volatile("slbmte %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
create_slbe(KERNELBASE, flags, 0);
create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1);
create_slbe(KERNELBASE, lflags, 0);
/* VMALLOC space has 4K pages always for now */
create_slbe(VMALLOCBASE, vflags, 1);
/* We don't bolt the stack for the time being - we're in boot,
* so the stack is in the bolted segment. By the time it goes
* elsewhere, we'll call _switch() which will bolt in the new
* one. */
asm volatile("isync":::"memory");
#endif
}
#endif /* CONFIG_PPC_ISERIES */
get_paca()->stab_rr = SLB_NUM_BOLTED;
}
......@@ -18,61 +18,28 @@
#include <linux/config.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/pgtable.h>
/* void slb_allocate(unsigned long ea);
/* void slb_allocate_realmode(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
* No other registers are examined or changed.
*/
_GLOBAL(slb_allocate)
/*
* First find a slot, round robin. Previously we tried to find
* a free slot first but that took too long. Unfortunately we
* dont have any LRU information to help us choose a slot.
*/
#ifdef CONFIG_PPC_ISERIES
/*
* On iSeries, the "bolted" stack segment can be cast out on
* shared processor switch so we need to check for a miss on
* it and restore it to the right slot.
*/
ld r9,PACAKSAVE(r13)
clrrdi r9,r9,28
clrrdi r11,r3,28
li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
cmpld r9,r11
beq 3f
#endif /* CONFIG_PPC_ISERIES */
ld r10,PACASTABRR(r13)
addi r10,r10,1
/* use a cpu feature mask if we ever change our slb size */
cmpldi r10,SLB_NUM_ENTRIES
blt+ 4f
li r10,SLB_NUM_BOLTED
4:
std r10,PACASTABRR(r13)
3:
/* r3 = faulting address, r10 = entry */
_GLOBAL(slb_allocate_realmode)
/* r3 = faulting address */
srdi r9,r3,60 /* get region */
srdi r3,r3,28 /* get esid */
srdi r10,r3,28 /* get esid */
cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */
oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
/* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
/* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
blt cr7,0f /* user or kernel? */
/* kernel address: proto-VSID = ESID */
......@@ -81,43 +48,161 @@ _GLOBAL(slb_allocate)
* top segment. That's ok, the scramble below will translate
* it to VSID 0, which is reserved as a bad VSID - one which
* will never have any pages in it. */
li r11,SLB_VSID_KERNEL
BEGIN_FTR_SECTION
bne cr7,9f
li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
b 9f
0: /* user address: proto-VSID = context<<15 | ESID */
srdi. r9,r3,USER_ESID_BITS
/* Check if hitting the linear mapping of the vmalloc/ioremap
* kernel space
*/
bne cr7,1f
/* Linear mapping encoding bits, the "li" instruction below will
* be patched by the kernel at boot
*/
_GLOBAL(slb_miss_kernel_load_linear)
li r11,0
b slb_finish_load
1: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
* will be patched by the kernel at boot
*/
_GLOBAL(slb_miss_kernel_load_virtual)
li r11,0
b slb_finish_load
0: /* user address: proto-VSID = context << 15 | ESID. First check
* if the address is within the boundaries of the user region
*/
srdi. r9,r10,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
/* Figure out if the segment contains huge pages */
#ifdef CONFIG_HUGETLB_PAGE
BEGIN_FTR_SECTION
b 1f
END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
lhz r9,PACAHIGHHTLBAREAS(r13)
srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
srd r9,r9,r11
lhz r11,PACALOWHTLBAREAS(r13)
srd r11,r11,r3
or r9,r9,r11
END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
srd r11,r11,r10
or. r9,r9,r11
beq 1f
_GLOBAL(slb_miss_user_load_huge)
li r11,0
b 2f
1:
#endif /* CONFIG_HUGETLB_PAGE */
li r11,SLB_VSID_USER
_GLOBAL(slb_miss_user_load_normal)
li r11,0
#ifdef CONFIG_HUGETLB_PAGE
BEGIN_FTR_SECTION
rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */
END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
#endif /* CONFIG_HUGETLB_PAGE */
2:
ld r9,PACACONTEXTID(r13)
rldimi r10,r9,USER_ESID_BITS,0
b slb_finish_load
8: /* invalid EA */
li r10,0 /* BAD_VSID */
li r11,SLB_VSID_USER /* flags don't much matter */
b slb_finish_load
#ifdef __DISABLED__
/* void slb_allocate_user(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
* No other registers are examined or changed.
*
* It is called with translation enabled in order to be able to walk the
* page tables. This is not currently used.
*/
_GLOBAL(slb_allocate_user)
/* r3 = faulting address */
srdi r10,r3,28 /* get esid */
crset 4*cr7+lt /* set "user" flag for later */
/* check if we fit in the range covered by the pagetables*/
srdi. r9,r3,PGTABLE_EADDR_SIZE
crnot 4*cr0+eq,4*cr0+eq
beqlr
/* now we need to get to the page tables in order to get the page
* size encoding from the PMD. In the future, we'll be able to deal
* with 1T segments too by getting the encoding from the PGD instead
*/
ld r9,PACAPGDIR(r13)
cmpldi cr0,r9,0
beqlr
rlwinm r11,r10,8,25,28
ldx r9,r9,r11 /* get pgd_t */
cmpldi cr0,r9,0
beqlr
rlwinm r11,r10,3,17,28
ldx r9,r9,r11 /* get pmd_t */
cmpldi cr0,r9,0
beqlr
/* build vsid flags */
andi. r11,r9,SLB_VSID_LLP
ori r11,r11,SLB_VSID_USER
/* get context to calculate proto-VSID */
ld r9,PACACONTEXTID(r13)
rldimi r3,r9,USER_ESID_BITS,0
rldimi r10,r9,USER_ESID_BITS,0
/* fall through slb_finish_load */
#endif /* __DISABLED__ */
9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
ASM_VSID_SCRAMBLE(r3,r9)
rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */
/*
* Finish loading of an SLB entry and return
*
* r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
*/
slb_finish_load:
ASM_VSID_SCRAMBLE(r10,r9)
rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
/* r3 = EA, r11 = VSID data */
/*
* Find a slot, round robin. Previously we tried to find a
* free slot first but that took too long. Unfortunately we
* dont have any LRU information to help us choose a slot.
*/
#ifdef CONFIG_PPC_ISERIES
/*
* On iSeries, the "bolted" stack segment can be cast out on
* shared processor switch so we need to check for a miss on
* it and restore it to the right slot.
*/
ld r9,PACAKSAVE(r13)
clrrdi r9,r9,28
clrrdi r3,r3,28
li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
cmpld r9,r3
beq 3f
#endif /* CONFIG_PPC_ISERIES */
ld r10,PACASTABRR(r13)
addi r10,r10,1
/* use a cpu feature mask if we ever change our slb size */
cmpldi r10,SLB_NUM_ENTRIES
blt+ 4f
li r10,SLB_NUM_BOLTED
4:
std r10,PACASTABRR(r13)
3:
rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
/* r3 = ESID data, r11 = VSID data */
/*
* No need for an isync before or after this slbmte. The exception
......@@ -125,7 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
*/
slbmte r11,r10
bgelr cr7 /* we're done for kernel addresses */
/* we're done for kernel addresses */
crclr 4*cr0+eq /* set result to "success" */
bgelr cr7
/* Update the slb cache */
lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
......@@ -143,9 +230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
li r3,SLB_CACHE_ENTRIES+1
2:
sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
crclr 4*cr0+eq /* set result to "success" */
blr
8: /* invalid EA */
li r3,0 /* BAD_VSID */
li r11,SLB_VSID_USER /* flags don't much matter */
b 9b
......@@ -26,7 +26,6 @@ struct stab_entry {
unsigned long vsid_data;
};
/* Both the segment table and SLB code uses the following cache */
#define NR_STAB_CACHE_ENTRIES 8
DEFINE_PER_CPU(long, stab_cache_ptr);
DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
......@@ -186,7 +185,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
/* Never flush the first entry. */
ste += 1;
for (entry = 1;
entry < (PAGE_SIZE / sizeof(struct stab_entry));
entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
entry++, ste++) {
unsigned long ea;
ea = ste->esid_data & ESID_MASK;
......@@ -200,6 +199,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
__get_cpu_var(stab_cache_ptr) = 0;
#ifdef CONFIG_PPC_64K_PAGES
get_paca()->pgdir = mm->pgd;
#endif /* CONFIG_PPC_64K_PAGES */
/* Now preload some entries for the new task */
if (test_tsk_thread_flag(tsk, TIF_32BIT))
unmapped_base = TASK_UNMAPPED_BASE_USER32;
......@@ -223,8 +226,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
asm volatile("sync" : : : "memory");
}
extern void slb_initialize(void);
/*
* Allocate segment tables for secondary CPUs. These must all go in
* the first (bolted) segment, so that do_stab_bolted won't get a
......@@ -243,18 +244,21 @@ void stabs_alloc(void)
if (cpu == 0)
continue; /* stab for CPU 0 is statically allocated */
newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
1<<SID_SHIFT);
if (! newstab)
panic("Unable to allocate segment table for CPU %d.\n",
cpu);
newstab += KERNELBASE;
memset((void *)newstab, 0, PAGE_SIZE);
memset((void *)newstab, 0, HW_PAGE_SIZE);
paca[cpu].stab_addr = newstab;
paca[cpu].stab_real = virt_to_abs(newstab);
printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx "
"virtual, 0x%lx absolute\n",
cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
}
}
......@@ -267,13 +271,9 @@ void stab_initialize(unsigned long stab)
{
unsigned long vsid = get_kernel_vsid(KERNELBASE);
if (cpu_has_feature(CPU_FTR_SLB)) {
slb_initialize();
} else {
asm volatile("isync; slbia; isync":::"memory");
make_ste(stab, GET_ESID(KERNELBASE), vsid);
asm volatile("isync; slbia; isync":::"memory");
make_ste(stab, GET_ESID(KERNELBASE), vsid);
/* Order update */
asm volatile("sync":::"memory");
}
/* Order update */
asm volatile("sync":::"memory");
}
......@@ -21,6 +21,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
......@@ -30,7 +31,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <linux/highmem.h>
#include <asm/bug.h>
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
......@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
* (if we remove it we should clear the _PTE_HPTEFLAGS bits).
*/
void hpte_update(struct mm_struct *mm, unsigned long addr,
unsigned long pte, int wrprot)
pte_t *ptep, unsigned long pte, int huge)
{
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
unsigned long vsid;
unsigned int psize = mmu_virtual_psize;
int i;
i = batch->index;
/* We mask the address for the base page size. Huge pages will
* have applied their own masking already
*/
addr &= PAGE_MASK;
/* Get page size (maybe move back to caller) */
if (huge) {
#ifdef CONFIG_HUGETLB_PAGE
psize = mmu_huge_psize;
#else
BUG();
#endif
}
/*
* This can happen when we are in the middle of a TLB batch and
* we encounter memory pressure (eg copy_page_range when it tries
* to allocate a new pte). If we have to reclaim memory and end
* up scanning and resetting referenced bits then our batch context
* will change mid stream.
*
* We also need to ensure only one page size is present in a given
* batch
*/
if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) {
if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
flush_tlb_pending();
i = 0;
}
if (i == 0) {
batch->mm = mm;
batch->large = pte_huge(pte);
batch->psize = psize;
}
if (addr < KERNELBASE) {
vsid = get_vsid(mm->context.id, addr);
......@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
} else
vsid = get_kernel_vsid(addr);
batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
batch->pte[i] = __pte(pte);
batch->pte[i] = __real_pte(__pte(pte), ptep);
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
flush_tlb_pending();
......@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
local = 1;
if (i == 1)
flush_hash_page(batch->vaddr[0], batch->pte[0], local);
flush_hash_page(batch->vaddr[0], batch->pte[0],
batch->psize, local);
else
flush_hash_range(i, local);
batch->index = 0;
......
......@@ -39,15 +39,16 @@ static inline void iSeries_hunlock(unsigned long slot)
spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
}
static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
unsigned long prpn, unsigned long vflags,
unsigned long rflags)
long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
unsigned long pa, unsigned long rflags,
unsigned long vflags, int psize)
{
unsigned long arpn;
long slot;
hpte_t lhpte;
int secondary = 0;
BUG_ON(psize != MMU_PAGE_4K);
/*
* The hypervisor tries both primary and secondary.
* If we are being called to insert in the secondary,
......@@ -59,8 +60,19 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
iSeries_hlock(hpte_group);
slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
BUG_ON(lhpte.v & HPTE_V_VALID);
slot = HvCallHpt_findValid(&lhpte, va >> HW_PAGE_SHIFT);
if (unlikely(lhpte.v & HPTE_V_VALID)) {
if (vflags & HPTE_V_BOLTED) {
HvCallHpt_setSwBits(slot, 0x10, 0);
HvCallHpt_setPp(slot, PP_RWXX);
iSeries_hunlock(hpte_group);
if (slot < 0)
return 0x8 | (slot & 7);
else
return slot & 7;
}
BUG();
}
if (slot == -1) { /* No available entry found in either group */
iSeries_hunlock(hpte_group);
......@@ -73,10 +85,9 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
slot &= 0x7fffffffffffffff;
}
arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
lhpte.v = hpte_encode_v(va, MMU_PAGE_4K) | vflags | HPTE_V_VALID;
lhpte.r = hpte_encode_r(phys_to_abs(pa), MMU_PAGE_4K) | rflags;
/* Now fill in the actual HPTE */
HvCallHpt_addValidate(slot, secondary, &lhpte);
......@@ -86,25 +97,6 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
return (secondary << 3) | (slot & 7);
}
long iSeries_hpte_bolt_or_insert(unsigned long hpte_group,
unsigned long va, unsigned long prpn, unsigned long vflags,
unsigned long rflags)
{
long slot;
hpte_t lhpte;
slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
if (lhpte.v & HPTE_V_VALID) {
/* Bolt the existing HPTE */
HvCallHpt_setSwBits(slot, 0x10, 0);
HvCallHpt_setPp(slot, PP_RWXX);
return 0;
}
return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags);
}
static unsigned long iSeries_hpte_getword0(unsigned long slot)
{
hpte_t hpte;
......@@ -150,15 +142,17 @@ static long iSeries_hpte_remove(unsigned long hpte_group)
* bits 61..63 : PP2,PP1,PP0
*/
static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long va, int large, int local)
unsigned long va, int psize, int local)
{
hpte_t hpte;
unsigned long avpn = va >> 23;
unsigned long want_v;
iSeries_hlock(slot);
HvCallHpt_get(&hpte, slot);
if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) {
want_v = hpte_encode_v(va, MMU_PAGE_4K);
if (HPTE_V_COMPARE(hpte.v, want_v) && (hpte.v & HPTE_V_VALID)) {
/*
* Hypervisor expects bits as NPPP, which is
* different from how they are mapped in our PP.
......@@ -210,14 +204,17 @@ static long iSeries_hpte_find(unsigned long vpn)
*
* No need to lock here because we should be the only user.
*/
static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
int psize)
{
unsigned long vsid,va,vpn;
long slot;
BUG_ON(psize != MMU_PAGE_4K);
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> PAGE_SHIFT;
vpn = va >> HW_PAGE_SHIFT;
slot = iSeries_hpte_find(vpn);
if (slot == -1)
panic("updateboltedpp: Could not find page to bolt\n");
......@@ -225,7 +222,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
}
static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
int large, int local)
int psize, int local)
{
unsigned long hpte_v;
unsigned long avpn = va >> 23;
......
......@@ -22,7 +22,7 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
while (len) {
hv_buf.addr = cur;
left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur;
left_this_page = ((cur & HW_PAGE_MASK) + HW_PAGE_SIZE) - cur;
if (left_this_page > len)
left_this_page = len;
hv_buf.len = left_this_page;
......@@ -30,6 +30,6 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
HvCall2(HvCallBaseWriteLogBuffer,
virt_to_abs(&hv_buf),
left_this_page);
cur = (cur & PAGE_MASK) + PAGE_SIZE;
cur = (cur & HW_PAGE_MASK) + HW_PAGE_SIZE;
}
}
......@@ -43,9 +43,12 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
u64 rc;
union tce_entry tce;
index <<= TCE_PAGE_FACTOR;
npages <<= TCE_PAGE_FACTOR;
while (npages--) {
tce.te_word = 0;
tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT;
tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> TCE_SHIFT;
if (tbl->it_type == TCE_VB) {
/* Virtual Bus */
......@@ -66,7 +69,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
rc);
index++;
uaddr += PAGE_SIZE;
uaddr += TCE_PAGE_SIZE;
}
}
......@@ -74,6 +77,9 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
{
u64 rc;
npages <<= TCE_PAGE_FACTOR;
index <<= TCE_PAGE_FACTOR;
while (npages--) {
rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
if (rc)
......@@ -83,27 +89,6 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
}
}
#ifdef CONFIG_PCI
/*
* This function compares the known tables to find an iommu_table
* that has already been built for hardware TCEs.
*/
static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
{
struct pci_dn *pdn;
list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
struct iommu_table *it = pdn->iommu_table;
if ((it != NULL) &&
(it->it_type == TCE_PCI) &&
(it->it_offset == tbl->it_offset) &&
(it->it_index == tbl->it_index) &&
(it->it_size == tbl->it_size))
return it;
}
return NULL;
}
/*
* Call Hv with the architected data structure to get TCE table info.
* info. Put the returned data into the Linux representation of the
......@@ -113,8 +98,10 @@ static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
* 2. TCE table per Bus.
* 3. TCE Table per IOA.
*/
static void iommu_table_getparms(struct pci_dn *pdn,
struct iommu_table* tbl)
void iommu_table_getparms_iSeries(unsigned long busno,
unsigned char slotno,
unsigned char virtbus,
struct iommu_table* tbl)
{
struct iommu_table_cb *parms;
......@@ -124,9 +111,9 @@ static void iommu_table_getparms(struct pci_dn *pdn,
memset(parms, 0, sizeof(*parms));
parms->itc_busno = pdn->busno;
parms->itc_slotno = pdn->LogicalSlot;
parms->itc_virtbus = 0;
parms->itc_busno = busno;
parms->itc_slotno = slotno;
parms->itc_virtbus = virtbus;
HvCallXm_getTceTableParms(iseries_hv_addr(parms));
......@@ -134,17 +121,40 @@ static void iommu_table_getparms(struct pci_dn *pdn,
panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
/* itc_size is in pages worth of table, it_size is in # of entries */
tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry);
tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) /
sizeof(union tce_entry)) >> TCE_PAGE_FACTOR;
tbl->it_busno = parms->itc_busno;
tbl->it_offset = parms->itc_offset;
tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR;
tbl->it_index = parms->itc_index;
tbl->it_blocksize = 1;
tbl->it_type = TCE_PCI;
tbl->it_type = virtbus ? TCE_VB : TCE_PCI;
kfree(parms);
}
#ifdef CONFIG_PCI
/*
* This function compares the known tables to find an iommu_table
* that has already been built for hardware TCEs.
*/
static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
{
struct pci_dn *pdn;
list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
struct iommu_table *it = pdn->iommu_table;
if ((it != NULL) &&
(it->it_type == TCE_PCI) &&
(it->it_offset == tbl->it_offset) &&
(it->it_index == tbl->it_index) &&
(it->it_size == tbl->it_size))
return it;
}
return NULL;
}
void iommu_devnode_init_iSeries(struct device_node *dn)
{
struct iommu_table *tbl;
......@@ -152,7 +162,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn)
tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
iommu_table_getparms(pdn, tbl);
iommu_table_getparms_iSeries(pdn->busno, pdn->LogicalSlot, 0, tbl);
/* Look for existing tce table */
pdn->iommu_table = iommu_table_find(tbl);
......
......@@ -320,11 +320,11 @@ static void __init iSeries_init_early(void)
*/
if (naca.xRamDisk) {
initrd_start = (unsigned long)__va(naca.xRamDisk);
initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE;
initrd_end = initrd_start + naca.xRamDiskSize * HW_PAGE_SIZE;
initrd_below_start_ok = 1; // ramdisk in kernel space
ROOT_DEV = Root_RAM0;
if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize)
rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024;
if (((rd_size * 1024) / HW_PAGE_SIZE) < naca.xRamDiskSize)
rd_size = (naca.xRamDiskSize * HW_PAGE_SIZE) / 1024;
} else
#endif /* CONFIG_BLK_DEV_INITRD */
{
......@@ -470,13 +470,14 @@ static void __init build_iSeries_Memory_Map(void)
*/
hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
hptSizePages = (u32)HvCallHpt_getHptPages();
hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
hptSizeChunks = hptSizePages >>
(MSCHUNKS_CHUNK_SHIFT - HW_PAGE_SHIFT);
hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
printk("HPT absolute addr = %016lx, size = %dK\n",
chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE);
ppc64_pft_size = __ilog2(hptSizePages * HW_PAGE_SIZE);
/*
* The actual hashed page table is in the hypervisor,
......@@ -629,7 +630,7 @@ static void __init iSeries_fixup_klimit(void)
*/
if (naca.xRamDisk)
klimit = KERNELBASE + (u64)naca.xRamDisk +
(naca.xRamDiskSize * PAGE_SIZE);
(naca.xRamDiskSize * HW_PAGE_SIZE);
else {
/*
* No ram disk was included - check and see if there
......
......@@ -30,41 +30,14 @@ static struct iommu_table vio_iommu_table;
static void __init iommu_vio_init(void)
{
struct iommu_table *t;
struct iommu_table_cb cb;
unsigned long cbp;
unsigned long itc_entries;
iommu_table_getparms_iSeries(255, 0, 0xff, &veth_iommu_table);
veth_iommu_table.it_size /= 2;
vio_iommu_table = veth_iommu_table;
vio_iommu_table.it_offset += veth_iommu_table.it_size;
cb.itc_busno = 255; /* Bus 255 is the virtual bus */
cb.itc_virtbus = 0xff; /* Ask for virtual bus */
cbp = virt_to_abs(&cb);
HvCallXm_getTceTableParms(cbp);
itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
veth_iommu_table.it_size = itc_entries / 2;
veth_iommu_table.it_busno = cb.itc_busno;
veth_iommu_table.it_offset = cb.itc_offset;
veth_iommu_table.it_index = cb.itc_index;
veth_iommu_table.it_type = TCE_VB;
veth_iommu_table.it_blocksize = 1;
t = iommu_init_table(&veth_iommu_table);
if (!t)
if (!iommu_init_table(&veth_iommu_table))
printk("Virtual Bus VETH TCE table failed.\n");
vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
vio_iommu_table.it_busno = cb.itc_busno;
vio_iommu_table.it_offset = cb.itc_offset +
veth_iommu_table.it_size;
vio_iommu_table.it_index = cb.itc_index;
vio_iommu_table.it_type = TCE_VB;
vio_iommu_table.it_blocksize = 1;
t = iommu_init_table(&vio_iommu_table);
if (!t)
if (!iommu_init_table(&vio_iommu_table))
printk("Virtual Bus VIO TCE table failed.\n");
}
......
......@@ -68,7 +68,8 @@ static DEFINE_SPINLOCK(statuslock);
* For each kind of event we allocate a buffer that is
* guaranteed not to cross a page boundary
*/
static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned;
static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256]
__attribute__((__aligned__(4096)));
static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
static int event_buffer_initialised;
......@@ -116,12 +117,12 @@ static int proc_viopath_show(struct seq_file *m, void *v)
HvLpEvent_Rc hvrc;
DECLARE_MUTEX_LOCKED(Semaphore);
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
if (!buf)
return 0;
memset(buf, 0, PAGE_SIZE);
memset(buf, 0, HW_PAGE_SIZE);
handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE,
handle = dma_map_single(iSeries_vio_dev, buf, HW_PAGE_SIZE,
DMA_FROM_DEVICE);
hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
......@@ -131,7 +132,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
viopath_sourceinst(viopath_hostLp),
viopath_targetinst(viopath_hostLp),
(u64)(unsigned long)&Semaphore, VIOVERSION << 16,
((u64)handle) << 32, PAGE_SIZE, 0, 0);
((u64)handle) << 32, HW_PAGE_SIZE, 0, 0);
if (hvrc != HvLpEvent_Rc_Good)
printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
......@@ -140,7 +141,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
vlanMap = HvLpConfig_getVirtualLanIndexMap();
buf[PAGE_SIZE-1] = '\0';
buf[HW_PAGE_SIZE-1] = '\0';
seq_printf(m, "%s", buf);
seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
......@@ -152,7 +153,8 @@ static int proc_viopath_show(struct seq_file *m, void *v)
e2a(xItExtVpdPanel.systemSerial[4]),
e2a(xItExtVpdPanel.systemSerial[5]));
dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE);
dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
DMA_FROM_DEVICE);
kfree(buf);
return 0;
......
......@@ -19,7 +19,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define DEBUG
#undef DEBUG_LOW
#include <linux/config.h>
#include <linux/kernel.h>
......@@ -41,10 +41,10 @@
#include "plpar_wrappers.h"
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0)
#else
#define DBG(fmt...)
#define DBG_LOW(fmt...) do { } while(0)
#endif
/* in pSeries_hvCall.S */
......@@ -276,8 +276,9 @@ void vpa_init(int cpu)
}
long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long va, unsigned long prpn,
unsigned long vflags, unsigned long rflags)
unsigned long va, unsigned long pa,
unsigned long rflags, unsigned long vflags,
int psize)
{
unsigned long lpar_rc;
unsigned long flags;
......@@ -285,11 +286,28 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long hpte_v, hpte_r;
unsigned long dummy0, dummy1;
hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID;
if (vflags & HPTE_V_LARGE)
hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
if (!(vflags & HPTE_V_BOLTED))
DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
"rflags=%lx, vflags=%lx, psize=%d)\n",
hpte_group, va, pa, rflags, vflags, psize);
hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
#if 1
{
int i;
for (i=0;i<8;i++) {
unsigned long w0, w1;
plpar_pte_read(0, hpte_group, &w0, &w1);
BUG_ON (HPTE_V_COMPARE(hpte_v, w0)
&& (w0 & HPTE_V_VALID));
}
}
#endif
/* Now fill in the actual HPTE */
/* Set CEC cookie to 0 */
......@@ -299,23 +317,30 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Exact = 0 */
flags = 0;
/* XXX why is this here? - Anton */
/* Make pHyp happy */
if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
hpte_r &= ~_PAGE_COHERENT;
lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
hpte_r, &slot, &dummy0, &dummy1);
if (unlikely(lpar_rc == H_PTEG_Full))
if (unlikely(lpar_rc == H_PTEG_Full)) {
if (!(vflags & HPTE_V_BOLTED))
DBG_LOW(" full\n");
return -1;
}
/*
* Since we try and ioremap PHBs we don't own, the pte insert
* will fail. However we must catch the failure in hash_page
* or we will loop forever, so return -2 in this case.
*/
if (unlikely(lpar_rc != H_Success))
if (unlikely(lpar_rc != H_Success)) {
if (!(vflags & HPTE_V_BOLTED))
DBG_LOW(" lpar err %d\n", lpar_rc);
return -2;
}
if (!(vflags & HPTE_V_BOLTED))
DBG_LOW(" -> slot: %d\n", slot & 7);
/* Because of iSeries, we have to pass down the secondary
* bucket bit here as well
......@@ -340,10 +365,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
/* don't remove a bolted entry */
lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
(0x1UL << 4), &dummy1, &dummy2);
if (lpar_rc == H_Success)
return i;
BUG_ON(lpar_rc != H_Not_Found);
slot_offset++;
......@@ -371,20 +394,28 @@ static void pSeries_lpar_hptab_clear(void)
* We can probably optimize here and assume the high bits of newpp are
* already zero. For now I am paranoid.
*/
static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long va, int large, int local)
static long pSeries_lpar_hpte_updatepp(unsigned long slot,
unsigned long newpp,
unsigned long va,
int psize, int local)
{
unsigned long lpar_rc;
unsigned long flags = (newpp & 7) | H_AVPN;
unsigned long avpn = va >> 23;
unsigned long want_v;
if (large)
avpn &= ~0x1UL;
want_v = hpte_encode_v(va, psize);
lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ",
want_v & HPTE_V_AVPN, slot, flags, psize);
if (lpar_rc == H_Not_Found)
lpar_rc = plpar_pte_protect(flags, slot, want_v & HPTE_V_AVPN);
if (lpar_rc == H_Not_Found) {
DBG_LOW("not found !\n");
return -1;
}
DBG_LOW("ok\n");
BUG_ON(lpar_rc != H_Success);
......@@ -410,21 +441,22 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
return dword0;
}
static long pSeries_lpar_hpte_find(unsigned long vpn)
static long pSeries_lpar_hpte_find(unsigned long va, int psize)
{
unsigned long hash;
unsigned long i, j;
long slot;
unsigned long hpte_v;
unsigned long want_v, hpte_v;
hash = hpt_hash(vpn, 0);
hash = hpt_hash(va, mmu_psize_defs[psize].shift);
want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hpte_v = pSeries_lpar_hpte_getword0(slot);
if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
......@@ -441,17 +473,15 @@ static long pSeries_lpar_hpte_find(unsigned long vpn)
}
static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
unsigned long ea)
unsigned long ea,
int psize)
{
unsigned long lpar_rc;
unsigned long vsid, va, vpn, flags;
long slot;
unsigned long lpar_rc, slot, vsid, va, flags;
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> PAGE_SHIFT;
slot = pSeries_lpar_hpte_find(vpn);
slot = pSeries_lpar_hpte_find(va, psize);
BUG_ON(slot == -1);
flags = newpp & 7;
......@@ -461,18 +491,18 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
}
static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
int large, int local)
int psize, int local)
{
unsigned long avpn = va >> 23;
unsigned long want_v;
unsigned long lpar_rc;
unsigned long dummy1, dummy2;
if (large)
avpn &= ~0x1UL;
lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
&dummy2);
DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d",
slot, va, psize, local);
want_v = hpte_encode_v(va, psize);
lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v & HPTE_V_AVPN,
&dummy1, &dummy2);
if (lpar_rc == H_Not_Found)
return;
......@@ -494,7 +524,8 @@ void pSeries_lpar_flush_hash_range(unsigned long number, int local)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
for (i = 0; i < number; i++)
flush_hash_page(batch->vaddr[i], batch->pte[i], local);
flush_hash_page(batch->vaddr[i], batch->pte[i],
batch->psize, local);
if (lock_tlbie)
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
......
......@@ -47,6 +47,10 @@ config ARCH_MAY_HAVE_PC_FDC
bool
default y
config PPC_STD_MMU
bool
default y
# We optimistically allocate largepages from the VM, so make the limit
# large enough (16MB). This badly named config option is actually
# max order + 1
......@@ -294,6 +298,15 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
config PPC_64K_PAGES
bool "64k page size"
help
This option changes the kernel logical page size to 64k. On machines
without processor support for 64k pages, the kernel will simulate
them by loading each individual 4k page on demand transparently,
while on hardware with such support, it will be used to map
normal application pages.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on SMP
......
......@@ -93,6 +93,9 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_64K_PAGES
DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
#endif
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
......
This diff is collapsed.
......@@ -23,7 +23,7 @@
static union {
struct systemcfg data;
u8 page[PAGE_SIZE];
} systemcfg_store __page_aligned;
} systemcfg_store __attribute__((__section__(".data.page.aligned")));
struct systemcfg *systemcfg = &systemcfg_store.data;
EXPORT_SYMBOL(systemcfg);
......
......@@ -635,10 +635,10 @@ static inline char *find_flat_dt_string(u32 offset)
* used to extract the memory informations at boot before we can
* unflatten the tree
*/
static int __init scan_flat_dt(int (*it)(unsigned long node,
const char *uname, int depth,
void *data),
void *data)
int __init of_scan_flat_dt(int (*it)(unsigned long node,
const char *uname, int depth,
void *data),
void *data)
{
unsigned long p = ((unsigned long)initial_boot_params) +
initial_boot_params->off_dt_struct;
......@@ -695,8 +695,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
* This function can be used within scan_flattened_dt callback to get
* access to properties
*/
static void* __init get_flat_dt_prop(unsigned long node, const char *name,
unsigned long *size)
void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
unsigned long *size)
{
unsigned long p = node;
......@@ -996,7 +996,7 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
char *type = get_flat_dt_prop(node, "device_type", NULL);
char *type = of_get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
unsigned long size;
......@@ -1004,17 +1004,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
/* On LPAR, look for the first ibm,pft-size property for the hash table size
*/
if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
u32 *pft_size;
pft_size = (u32 *)get_flat_dt_prop(node, "ibm,pft-size", NULL);
if (pft_size != NULL) {
/* pft_size[0] is the NUMA CEC cookie */
ppc64_pft_size = pft_size[1];
}
}
if (initial_boot_params && initial_boot_params->version >= 2) {
/* version 2 of the kexec param format adds the phys cpuid
* of booted proc.
......@@ -1023,8 +1012,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
boot_cpuid = 0;
} else {
/* Check if it's the boot-cpu, set it's hw index in paca now */
if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
u32 *prop = get_flat_dt_prop(node, "reg", NULL);
if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL)
!= NULL) {
u32 *prop = of_get_flat_dt_prop(node, "reg", NULL);
set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop);
boot_cpuid_phys = get_hard_smp_processor_id(0);
}
......@@ -1032,14 +1022,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", NULL);
prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
}
/* Same goes for Apple's "altivec" property */
prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL);
prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
if (prop) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
......@@ -1051,7 +1041,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* this by looking at the size of the ibm,ppc-interrupt-server#s
* property
*/
prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
&size);
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
if (prop && ((size / sizeof(u32)) > 1))
......@@ -1072,26 +1062,26 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
return 0;
/* get platform type */
prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
systemcfg->platform = *prop;
/* check if iommu is forced on or off */
if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
iommu_is_off = 1;
if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
iommu_force_on = 1;
prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL);
prop64 = (u64*)of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
if (prop64)
memory_limit = *prop64;
prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-start",NULL);
if (prop64)
tce_alloc_start = *prop64;
prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
if (prop64)
tce_alloc_end = *prop64;
......@@ -1102,9 +1092,12 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
{
u64 *basep, *entryp;
basep = (u64*)get_flat_dt_prop(node, "linux,rtas-base", NULL);
entryp = (u64*)get_flat_dt_prop(node, "linux,rtas-entry", NULL);
prop = (u32*)get_flat_dt_prop(node, "linux,rtas-size", NULL);
basep = (u64*)of_get_flat_dt_prop(node,
"linux,rtas-base", NULL);
entryp = (u64*)of_get_flat_dt_prop(node,
"linux,rtas-entry", NULL);
prop = (u32*)of_get_flat_dt_prop(node,
"linux,rtas-size", NULL);
if (basep && entryp && prop) {
rtas.base = *basep;
rtas.entry = *entryp;
......@@ -1125,11 +1118,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
if (depth != 0)
return 0;
prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL);
prop = (u32 *)of_get_flat_dt_prop(node, "#size-cells", NULL);
dt_root_size_cells = (prop == NULL) ? 1 : *prop;
DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL);
prop = (u32 *)of_get_flat_dt_prop(node, "#address-cells", NULL);
dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
......@@ -1161,7 +1154,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
static int __init early_init_dt_scan_memory(unsigned long node,
const char *uname, int depth, void *data)
{
char *type = get_flat_dt_prop(node, "device_type", NULL);
char *type = of_get_flat_dt_prop(node, "device_type", NULL);
cell_t *reg, *endp;
unsigned long l;
......@@ -1169,7 +1162,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
if (type == NULL || strcmp(type, "memory") != 0)
return 0;
reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
......@@ -1225,19 +1218,16 @@ void __init early_init_devtree(void *params)
/* Setup flat device-tree pointer */
initial_boot_params = params;
/* By default, hash size is not set */
ppc64_pft_size = 0;
/* Retreive various informations from the /chosen node of the
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
*/
scan_flat_dt(early_init_dt_scan_chosen, NULL);
of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
/* Scan memory nodes and rebuild LMBs */
lmb_init();
scan_flat_dt(early_init_dt_scan_root, NULL);
scan_flat_dt(early_init_dt_scan_memory, NULL);
of_scan_flat_dt(early_init_dt_scan_root, NULL);
of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
systemcfg->physicalMemorySize = lmb_phys_mem_size();
......@@ -1253,26 +1243,8 @@ void __init early_init_devtree(void *params)
/* Retreive hash table size from flattened tree plus other
* CPU related informations (altivec support, boot CPU ID, ...)
*/
scan_flat_dt(early_init_dt_scan_cpus, NULL);
/* If hash size wasn't obtained above, we calculate it now based on
* the total RAM size
*/
if (ppc64_pft_size == 0) {
unsigned long rnd_mem_size, pteg_count;
/* round mem_size up to next power of 2 */
rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
if (rnd_mem_size < systemcfg->physicalMemorySize)
rnd_mem_size <<= 1;
/* # pages / 2 */
pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11);
ppc64_pft_size = __ilog2(pteg_count << 7);
}
of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
DBG("Hash pftSize: %x\n", (int)ppc64_pft_size);
DBG(" <- early_init_devtree()\n");
}
......
......@@ -101,6 +101,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000020000000000)
#define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0000040000000000)
#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000)
#define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0000100000000000)
#else
/* ensure on 32b processors the flags are available for compiling but
* don't do anything */
......@@ -116,6 +117,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0)
#define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0)
#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0)
#define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0)
#endif
#ifndef __ASSEMBLY__
......@@ -339,6 +341,7 @@ enum {
#ifdef __powerpc64__
CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |
CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_CELL |
CPU_FTR_CI_LARGE_PAGE |
#endif
0,
......
......@@ -74,6 +74,11 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn);
/* Creates table for an individual device node */
extern void iommu_devnode_init_iSeries(struct device_node *dn);
/* Get table parameters from HV */
extern void iommu_table_getparms_iSeries(unsigned long busno,
unsigned char slotno,
unsigned char virtbus,
struct iommu_table* tbl);
#endif /* CONFIG_PPC_ISERIES */
......
......@@ -47,20 +47,22 @@ struct machdep_calls {
#ifdef CONFIG_PPC64
void (*hpte_invalidate)(unsigned long slot,
unsigned long va,
int large,
int psize,
int local);
long (*hpte_updatepp)(unsigned long slot,
unsigned long newpp,
unsigned long va,
int large,
int pize,
int local);
void (*hpte_updateboltedpp)(unsigned long newpp,
unsigned long ea);
unsigned long ea,
int psize);
long (*hpte_insert)(unsigned long hpte_group,
unsigned long va,
unsigned long prpn,
unsigned long rflags,
unsigned long vflags,
unsigned long rflags);
int psize);
long (*hpte_remove)(unsigned long hpte_group);
void (*flush_hash_range)(unsigned long number, int local);
......
......@@ -178,6 +178,14 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
extern struct device_node *of_node_get(struct device_node *node);
extern void of_node_put(struct device_node *node);
/* For scanning the flat device-tree at boot time */
int __init of_scan_flat_dt(int (*it)(unsigned long node,
const char *uname, int depth,
void *data),
void *data);
void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
unsigned long *size);
/* For updating the device tree at runtime */
extern void of_attach_node(struct device_node *);
extern void of_detach_node(const struct device_node *);
......
......@@ -289,7 +289,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
#ifdef CONFIG_PPC64
static __inline__ unsigned long
__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new)
__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
{
unsigned long prev;
......
This diff is collapsed.
......@@ -31,9 +31,9 @@ struct mm_struct;
struct ppc64_tlb_batch {
unsigned long index;
struct mm_struct *mm;
pte_t pte[PPC64_TLB_BATCH_NR];
real_pte_t pte[PPC64_TLB_BATCH_NR];
unsigned long vaddr[PPC64_TLB_BATCH_NR];
unsigned int large;
unsigned int psize;
};
DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
......@@ -48,8 +48,9 @@ static inline void flush_tlb_pending(void)
put_cpu_var(ppc64_tlb_batch);
}
extern void flush_hash_page(unsigned long va, pte_t pte, int local);
void flush_hash_range(unsigned long number, int local);
extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize,
int local);
extern void flush_hash_range(unsigned long number, int local);
#else /* CONFIG_PPC64 */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment