Commit 4e416953 authored by Balbir Singh's avatar Balbir Singh Committed by Linus Torvalds

memory controller: soft limit reclaim on contention

Implement reclaim from groups over their soft limit

Permit reclaim from memory cgroups on contention (via the direct reclaim
path).

memory cgroup soft limit reclaim finds the group that exceeds its soft
limit by the largest number of pages and reclaims pages from it and then
reinserts the cgroup into its correct place in the rbtree.

Add additional checks to mem_cgroup_hierarchical_reclaim() to detect long
loops in case all swap is turned off.  The code has been refactored and
the loop check (loop < 2) has been enhanced for soft limits.  For soft
limits, we try to do more targetted reclaim.  Instead of bailing out after
two loops, the routine now reclaims memory proportional to the size by
which the soft limit is exceeded.  The proportion has been empirically
determined.

[akpm@linux-foundation.org: build fix]
[kamezawa.hiroyu@jp.fujitsu.com: fix softlimit css refcnt handling]
[nishimura@mxp.nes.nec.co.jp: refcount of the "victim" should be decremented before exiting the loop]
Signed-off-by: default avatarBalbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: default avatarKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 75822b44
...@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void) ...@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void)
extern bool mem_cgroup_oom_called(struct task_struct *task); extern bool mem_cgroup_oom_called(struct task_struct *task);
void mem_cgroup_update_mapped_file_stat(struct page *page, int val); void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask, int nid,
int zid);
#else /* CONFIG_CGROUP_MEM_RES_CTLR */ #else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct mem_cgroup; struct mem_cgroup;
...@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page, ...@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
{ {
} }
static inline
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask, int nid, int zid)
{
return 0;
}
#endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* CONFIG_CGROUP_MEM_CONT */
#endif /* _LINUX_MEMCONTROL_H */ #endif /* _LINUX_MEMCONTROL_H */
......
...@@ -217,6 +217,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, ...@@ -217,6 +217,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap, gfp_t gfp_mask, bool noswap,
unsigned int swappiness); unsigned int swappiness);
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
unsigned int swappiness,
struct zone *zone,
int nid);
extern int __isolate_lru_page(struct page *page, int mode, int file); extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages); extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness; extern int vm_swappiness;
......
This diff is collapsed.
...@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, ...@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#ifdef CONFIG_CGROUP_MEM_RES_CTLR #ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
unsigned int swappiness,
struct zone *zone, int nid)
{
struct scan_control sc = {
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = !noswap,
.swap_cluster_max = SWAP_CLUSTER_MAX,
.swappiness = swappiness,
.order = 0,
.mem_cgroup = mem,
.isolate_pages = mem_cgroup_isolate_pages,
};
nodemask_t nm = nodemask_of_node(nid);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
sc.nodemask = &nm;
sc.nr_reclaimed = 0;
sc.nr_scanned = 0;
/*
* NOTE: Although we can get the priority field, using it
* here is not a good idea, since it limits the pages we can scan.
* if we don't reclaim here, the shrink_zone from balance_pgdat
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
shrink_zone(0, zone, &sc);
return sc.nr_reclaimed;
}
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask, gfp_t gfp_mask,
bool noswap, bool noswap,
unsigned int swappiness) unsigned int swappiness)
{ {
struct zonelist *zonelist;
struct scan_control sc = { struct scan_control sc = {
.may_writepage = !laptop_mode, .may_writepage = !laptop_mode,
.may_unmap = 1, .may_unmap = 1,
...@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, ...@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
.isolate_pages = mem_cgroup_isolate_pages, .isolate_pages = mem_cgroup_isolate_pages,
.nodemask = NULL, /* we don't care the placement */ .nodemask = NULL, /* we don't care the placement */
}; };
struct zonelist *zonelist;
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
...@@ -1974,6 +2007,7 @@ loop_again: ...@@ -1974,6 +2007,7 @@ loop_again:
for (i = 0; i <= end_zone; i++) { for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i; struct zone *zone = pgdat->node_zones + i;
int nr_slab; int nr_slab;
int nid, zid;
if (!populated_zone(zone)) if (!populated_zone(zone))
continue; continue;
...@@ -1988,6 +2022,15 @@ loop_again: ...@@ -1988,6 +2022,15 @@ loop_again:
temp_priority[i] = priority; temp_priority[i] = priority;
sc.nr_scanned = 0; sc.nr_scanned = 0;
note_zone_scanning_priority(zone, priority); note_zone_scanning_priority(zone, priority);
nid = pgdat->node_id;
zid = zone_idx(zone);
/*
* Call soft limit reclaim before calling shrink_zone.
* For now we ignore the return value
*/
mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
nid, zid);
/* /*
* We put equal pressure on every zone, unless one * We put equal pressure on every zone, unless one
* zone has way too many pages free already. * zone has way too many pages free already.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment