Commit d0f13e3c authored by Benjamin Herrenschmidt's avatar Benjamin Herrenschmidt Committed by Paul Mackerras

[POWERPC] Introduce address space "slices"

The basic issue is to be able to do what hugetlbfs does but with
different page sizes for some other special filesystems; more
specifically, my need is:

 - Huge pages

 - SPE local store mappings using 64K pages on a 4K base page size
kernel on Cell

 - Some special 4K segments in 64K-page kernels for mapping a dodgy
type of powerpc-specific infiniband hardware that requires 4K MMU
mappings for various reasons I won't explain here.

The main issues are:

 - To maintain/keep track of the page size per "segment" (as we can
only have one page size per segment on powerpc, which are 256MB
divisions of the address space).

 - To make sure special mappings stay within their allotted
"segments" (including MAP_FIXED crap)

 - To make sure everybody else doesn't mmap/brk/grow_stack into a
"segment" that is used for a special mapping

Some of the necessary mechanisms to handle that were present in the
hugetlbfs code, but mostly in ways not suitable for anything else.

The patch relies on some changes to the generic get_unmapped_area()
that just got merged.  It still hijacks hugetlb callbacks here or
there as the generic code hasn't been entirely cleaned up yet but
that shouldn't be a problem.

So what is a slice ?  Well, I re-used the mechanism used formerly by our
hugetlbfs implementation which divides the address space in
"meta-segments" which I called "slices".  The division is done using
256MB slices below 4G, and 1T slices above.  Thus the address space is
divided currently into 16 "low" slices and 16 "high" slices.  (Special
case: high slice 0 is the area between 4G and 1T).

Doing so simplifies significantly the tracking of segments and avoids
having to keep track of all the 256MB segments in the address space.

While I used the "concepts" of hugetlbfs, I mostly re-implemented
everything in a more generic way and "ported" hugetlbfs to it.

Slices can have an associated page size, which is encoded in the mmu
context and used by the SLB miss handler to set the segment sizes.  The
hash code currently doesn't care, it has a specific check for hugepages,
though I might add a mechanism to provide per-slice hash mapping
functions in the future.

The slice code provide a pair of "generic" get_unmapped_area() (bottomup
and topdown) functions that should work with any slice size.  There is
some trickiness here so I would appreciate people to have a look at the
implementation of these and let me know if I got something wrong.
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent 16f1c746
......@@ -352,6 +352,11 @@ config PPC_STD_MMU_32
def_bool y
depends on PPC_STD_MMU && PPC32
config PPC_MM_SLICES
bool
default y if HUGETLB_PAGE
default n
config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
depends on PPC64
......
......@@ -122,12 +122,18 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
#endif /* CONFIG_HUGETLB_PAGE */
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
context.low_slices_psize));
DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
context.high_slices_psize));
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
#else
DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
#endif /* CONFIG_PPC_MM_SLICES */
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
......
......@@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o
obj-$(CONFIG_44x) += 44x_mmu.o
obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
......@@ -51,6 +51,7 @@
#include <asm/cputable.h>
#include <asm/abs_addr.h>
#include <asm/sections.h>
#include <asm/spu.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
......@@ -601,8 +602,13 @@ static void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
{
if (mm->context.user_psize == MMU_PAGE_4K)
return;
#ifdef CONFIG_PPC_MM_SLICES
slice_set_user_psize(mm, MMU_PAGE_4K);
#else /* CONFIG_PPC_MM_SLICES */
mm->context.user_psize = MMU_PAGE_4K;
mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp;
#endif /* CONFIG_PPC_MM_SLICES */
#ifdef CONFIG_SPE_BASE
spu_flush_all_slbs(mm);
#endif
......@@ -670,11 +676,14 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
#ifdef CONFIG_HUGETLB_PAGE
/* Handle hugepage regions */
if (unlikely(in_hugepage_area(mm->context, ea))) {
if (HPAGE_SHIFT &&
unlikely(get_slice_psize(mm, ea) == mmu_huge_psize)) {
DBG_LOW(" -> huge page !\n");
return hash_huge_page(mm, access, ea, vsid, local, trap);
}
#endif /* CONFIG_HUGETLB_PAGE */
/* Get PTE and page size from page tables */
ptep = find_linux_pte(pgdir, ea);
......@@ -770,10 +779,13 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long flags;
int local = 0;
/* We don't want huge pages prefaulted for now
*/
if (unlikely(in_hugepage_area(mm->context, ea)))
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
#ifdef CONFIG_PPC_MM_SLICES
/* We only prefault standard pages for now */
if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize));
return;
#endif
DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
" trap=%lx\n", mm, mm->pgd, ea, access, trap);
......
This diff is collapsed.
......@@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
int index;
int err;
int new_context = (mm->context.id == 0);
again:
if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
......@@ -50,9 +51,18 @@ again:
}
mm->context.id = index;
#ifdef CONFIG_PPC_MM_SLICES
/* The old code would re-promote on fork, we don't do that
* when using slices as it could cause problem promoting slices
* that have been forced down to 4K
*/
if (new_context)
slice_set_user_psize(mm, mmu_virtual_psize);
#else
mm->context.user_psize = mmu_virtual_psize;
mm->context.sllp = SLB_VSID_USER |
mmu_psize_defs[mmu_virtual_psize].sllp;
#endif
return 0;
}
......
......@@ -198,12 +198,6 @@ void slb_initialize(void)
static int slb_encoding_inited;
extern unsigned int *slb_miss_kernel_load_linear;
extern unsigned int *slb_miss_kernel_load_io;
#ifdef CONFIG_HUGETLB_PAGE
extern unsigned int *slb_miss_user_load_huge;
unsigned long huge_llp;
huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
#endif
/* Prepare our SLB miss handler based on our page size */
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
......@@ -220,11 +214,6 @@ void slb_initialize(void)
DBG("SLB: linear LLP = %04x\n", linear_llp);
DBG("SLB: io LLP = %04x\n", io_llp);
#ifdef CONFIG_HUGETLB_PAGE
patch_slb_encoding(slb_miss_user_load_huge,
SLB_VSID_USER | huge_llp);
DBG("SLB: huge LLP = %04x\n", huge_llp);
#endif
}
get_paca()->stab_rr = SLB_NUM_BOLTED;
......
......@@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io)
srdi. r9,r10,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
/* Figure out if the segment contains huge pages */
#ifdef CONFIG_HUGETLB_PAGE
BEGIN_FTR_SECTION
b 1f
END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
/* when using slices, we extract the psize off the slice bitmaps
* and then we need to get the sllp encoding off the mmu_psize_defs
* array.
*
* XXX This is a bit inefficient especially for the normal case,
* so we should try to implement a fast path for the standard page
* size using the old sllp value so we avoid the array. We cannot
* really do dynamic patching unfortunately as processes might flip
* between 4k and 64k standard page size
*/
#ifdef CONFIG_PPC_MM_SLICES
cmpldi r10,16
lhz r9,PACALOWHTLBAREAS(r13)
mr r11,r10
/* Get the slice index * 4 in r11 and matching slice size mask in r9 */
ld r9,PACALOWSLICESPSIZE(r13)
sldi r11,r10,2
blt 5f
ld r9,PACAHIGHSLICEPSIZE(r13)
srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
andi. r11,r11,0x3c
lhz r9,PACAHIGHHTLBAREAS(r13)
srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
5: srd r9,r9,r11
andi. r9,r9,1
beq 1f
_GLOBAL(slb_miss_user_load_huge)
li r11,0
b 2f
1:
#endif /* CONFIG_HUGETLB_PAGE */
5: /* Extract the psize and multiply to get an array offset */
srd r9,r9,r11
andi. r9,r9,0xf
mulli r9,r9,MMUPSIZEDEFSIZE
/* Now get to the array and obtain the sllp
*/
ld r11,PACATOC(r13)
ld r11,mmu_psize_defs@got(r11)
add r11,r11,r9
ld r11,MMUPSIZESLLP(r11)
ori r11,r11,SLB_VSID_USER
#else
/* paca context sllp already contains the SLB_VSID_USER bits */
lhz r11,PACACONTEXTSLLP(r13)
2:
#endif /* CONFIG_PPC_MM_SLICES */
ld r9,PACACONTEXTID(r13)
rldimi r10,r9,USER_ESID_BITS,0
b slb_finish_load
......
This diff is collapsed.
......@@ -144,12 +144,11 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
switch(REGION_ID(ea)) {
case USER_REGION_ID:
#ifdef CONFIG_HUGETLB_PAGE
if (in_hugepage_area(mm->context, ea))
psize = mmu_huge_psize;
else
#endif
#ifdef CONFIG_PPC_MM_SLICES
psize = get_slice_psize(mm, ea);
#else
psize = mm->context.user_psize;
#endif
vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
SLB_VSID_USER;
break;
......
......@@ -351,9 +351,12 @@ typedef unsigned long mm_context_id_t;
typedef struct {
mm_context_id_t id;
u16 user_psize; /* page size index */
u16 sllp; /* SLB entry page size encoding */
#ifdef CONFIG_HUGETLB_PAGE
u16 low_htlb_areas, high_htlb_areas;
#ifdef CONFIG_PPC_MM_SLICES
u64 low_slices_psize; /* SLB page size encodings */
u64 high_slices_psize; /* 4 bits per slice for now */
#else
u16 sllp; /* SLB page size encoding */
#endif
unsigned long vdso_base;
} mm_context_t;
......
......@@ -83,8 +83,8 @@ struct paca_struct {
mm_context_t context;
u16 vmalloc_sllp;
u16 slb_cache[SLB_CACHE_ENTRIES];
u16 slb_cache_ptr;
u16 slb_cache[SLB_CACHE_ENTRIES];
/*
* then miscellaneous read-write fields
......
......@@ -88,57 +88,55 @@ extern unsigned int HPAGE_SHIFT;
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_HUGETLB_PAGE
#ifdef CONFIG_PPC_MM_SLICES
#define HTLB_AREA_SHIFT 40
#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT)
#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT)
#define SLICE_LOW_SHIFT 28
#define SLICE_HIGH_SHIFT 40
#define LOW_ESID_MASK(addr, len) \
(((1U << (GET_ESID(min((addr)+(len)-1, 0x100000000UL))+1)) \
- (1U << GET_ESID(min((addr), 0x100000000UL)))) & 0xffff)
#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
- (1U << GET_HTLB_AREA(addr))) & 0xffff)
#define SLICE_LOW_TOP (0x100000000ul)
#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
#define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
#define ARCH_HAS_SETCLEAR_HUGE_PTE
#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
#define touches_hugepage_low_range(mm, addr, len) \
(((addr) < 0x100000000UL) \
&& (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas))
#define touches_hugepage_high_range(mm, addr, len) \
((((addr) + (len)) > 0x100000000UL) \
&& (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas))
#define __within_hugepage_low_range(addr, len, segmask) \
( (((addr)+(len)) <= 0x100000000UL) \
&& ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)))
#define within_hugepage_low_range(addr, len) \
__within_hugepage_low_range((addr), (len), \
current->mm->context.low_htlb_areas)
#define __within_hugepage_high_range(addr, len, zonemask) \
( ((addr) >= 0x100000000UL) \
&& ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)))
#define within_hugepage_high_range(addr, len) \
__within_hugepage_high_range((addr), (len), \
current->mm->context.high_htlb_areas)
#define is_hugepage_only_range(mm, addr, len) \
(touches_hugepage_high_range((mm), (addr), (len)) || \
touches_hugepage_low_range((mm), (addr), (len)))
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#ifndef __ASSEMBLY__
struct slice_mask {
u16 low_slices;
u16 high_slices;
};
struct mm_struct;
#define in_hugepage_area(context, addr) \
(cpu_has_feature(CPU_FTR_16M_PAGE) && \
( ( (addr) >= 0x100000000UL) \
? ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) \
: ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) )
extern unsigned long slice_get_unmapped_area(unsigned long addr,
unsigned long len,
unsigned long flags,
unsigned int psize,
int topdown,
int use_cache);
#else /* !CONFIG_HUGETLB_PAGE */
extern unsigned int get_slice_psize(struct mm_struct *mm,
unsigned long addr);
#define in_hugepage_area(mm, addr) 0
extern void slice_init_context(struct mm_struct *mm, unsigned int psize);
extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
extern int is_hugepage_only_range(struct mm_struct *m,
unsigned long addr,
unsigned long len);
#endif /* __ASSEMBLY__ */
#else
#define slice_init()
#endif /* CONFIG_PPC_MM_SLICES */
#ifdef CONFIG_HUGETLB_PAGE
#define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
#define ARCH_HAS_SETCLEAR_HUGE_PTE
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif /* !CONFIG_HUGETLB_PAGE */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment