Commit b9c565d5 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

memcg: remove clear_page_cgroup and atomics

Remove clear_page_cgroup: it's an unhelpful helper, see for example how
mem_cgroup_uncharge_page had to unlock_page_cgroup just in order to call it
(serious races from that?  I'm not sure).

Once that's gone, you can see it's pointless for page_cgroup's ref_cnt to be
atomic: it's always manipulated under lock_page_cgroup, except where
force_empty unilaterally reset it to 0 (and how does uncharge's
atomic_dec_and_test protect against that?).

Simplify this page_cgroup locking: if you've got the lock and the pc is
attached, then the ref_cnt must be positive: VM_BUG_ONs to check that, and to
check that pc->page matches page (we're on the way to finding why sometimes it
doesn't, but this patch doesn't fix that).
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d5b69e38
...@@ -161,8 +161,7 @@ struct page_cgroup { ...@@ -161,8 +161,7 @@ struct page_cgroup {
struct list_head lru; /* per cgroup LRU list */ struct list_head lru; /* per cgroup LRU list */
struct page *page; struct page *page;
struct mem_cgroup *mem_cgroup; struct mem_cgroup *mem_cgroup;
atomic_t ref_cnt; /* Helpful when pages move b/w */ int ref_cnt; /* cached, mapped, migrating */
/* mapped and cached states */
int flags; int flags;
}; };
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
...@@ -283,27 +282,6 @@ static void unlock_page_cgroup(struct page *page) ...@@ -283,27 +282,6 @@ static void unlock_page_cgroup(struct page *page)
bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
} }
/*
* Clear page->page_cgroup member under lock_page_cgroup().
* If given "pc" value is different from one page->page_cgroup,
* page->cgroup is not cleared.
* Returns a value of page->page_cgroup at lock taken.
* A can can detect failure of clearing by following
* clear_page_cgroup(page, pc) == pc
*/
static struct page_cgroup *clear_page_cgroup(struct page *page,
struct page_cgroup *pc)
{
struct page_cgroup *ret;
/* lock and clear */
lock_page_cgroup(page);
ret = page_get_page_cgroup(page);
if (likely(ret == pc))
page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);
return ret;
}
static void __mem_cgroup_remove_list(struct page_cgroup *pc) static void __mem_cgroup_remove_list(struct page_cgroup *pc)
{ {
int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
...@@ -555,16 +533,13 @@ retry: ...@@ -555,16 +533,13 @@ retry:
* the page has already been accounted. * the page has already been accounted.
*/ */
if (pc) { if (pc) {
if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) { VM_BUG_ON(pc->page != page);
/* this page is under being uncharged ? */ VM_BUG_ON(pc->ref_cnt <= 0);
unlock_page_cgroup(page);
cpu_relax(); pc->ref_cnt++;
goto retry;
} else {
unlock_page_cgroup(page); unlock_page_cgroup(page);
goto done; goto done;
} }
}
unlock_page_cgroup(page); unlock_page_cgroup(page);
pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
...@@ -612,7 +587,7 @@ retry: ...@@ -612,7 +587,7 @@ retry:
congestion_wait(WRITE, HZ/10); congestion_wait(WRITE, HZ/10);
} }
atomic_set(&pc->ref_cnt, 1); pc->ref_cnt = 1;
pc->mem_cgroup = mem; pc->mem_cgroup = mem;
pc->page = page; pc->page = page;
pc->flags = PAGE_CGROUP_FLAG_ACTIVE; pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
...@@ -683,24 +658,24 @@ void mem_cgroup_uncharge_page(struct page *page) ...@@ -683,24 +658,24 @@ void mem_cgroup_uncharge_page(struct page *page)
if (!pc) if (!pc)
goto unlock; goto unlock;
if (atomic_dec_and_test(&pc->ref_cnt)) { VM_BUG_ON(pc->page != page);
page = pc->page; VM_BUG_ON(pc->ref_cnt <= 0);
mz = page_cgroup_zoneinfo(pc);
/* if (--(pc->ref_cnt) == 0) {
* get page->cgroup and clear it under lock. page_assign_page_cgroup(page, NULL);
* force_empty can drop page->cgroup without checking refcnt.
*/
unlock_page_cgroup(page); unlock_page_cgroup(page);
if (clear_page_cgroup(page, pc) == pc) {
mem = pc->mem_cgroup; mem = pc->mem_cgroup;
css_put(&mem->css); css_put(&mem->css);
res_counter_uncharge(&mem->res, PAGE_SIZE); res_counter_uncharge(&mem->res, PAGE_SIZE);
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags); spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_remove_list(pc); __mem_cgroup_remove_list(pc);
spin_unlock_irqrestore(&mz->lru_lock, flags); spin_unlock_irqrestore(&mz->lru_lock, flags);
kfree(pc); kfree(pc);
} return;
lock_page_cgroup(page);
} }
unlock: unlock:
...@@ -714,14 +689,13 @@ unlock: ...@@ -714,14 +689,13 @@ unlock:
int mem_cgroup_prepare_migration(struct page *page) int mem_cgroup_prepare_migration(struct page *page)
{ {
struct page_cgroup *pc; struct page_cgroup *pc;
int ret = 0;
lock_page_cgroup(page); lock_page_cgroup(page);
pc = page_get_page_cgroup(page); pc = page_get_page_cgroup(page);
if (pc && atomic_inc_not_zero(&pc->ref_cnt)) if (pc)
ret = 1; pc->ref_cnt++;
unlock_page_cgroup(page); unlock_page_cgroup(page);
return ret; return pc != NULL;
} }
void mem_cgroup_end_migration(struct page *page) void mem_cgroup_end_migration(struct page *page)
...@@ -740,15 +714,17 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) ...@@ -740,15 +714,17 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage)
struct mem_cgroup_per_zone *mz; struct mem_cgroup_per_zone *mz;
unsigned long flags; unsigned long flags;
retry: lock_page_cgroup(page);
pc = page_get_page_cgroup(page); pc = page_get_page_cgroup(page);
if (!pc) if (!pc) {
unlock_page_cgroup(page);
return; return;
}
mz = page_cgroup_zoneinfo(pc); page_assign_page_cgroup(page, NULL);
if (clear_page_cgroup(page, pc) != pc) unlock_page_cgroup(page);
goto retry;
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags); spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_remove_list(pc); __mem_cgroup_remove_list(pc);
spin_unlock_irqrestore(&mz->lru_lock, flags); spin_unlock_irqrestore(&mz->lru_lock, flags);
...@@ -794,16 +770,20 @@ retry: ...@@ -794,16 +770,20 @@ retry:
while (--count && !list_empty(list)) { while (--count && !list_empty(list)) {
pc = list_entry(list->prev, struct page_cgroup, lru); pc = list_entry(list->prev, struct page_cgroup, lru);
page = pc->page; page = pc->page;
/* Avoid race with charge */ lock_page_cgroup(page);
atomic_set(&pc->ref_cnt, 0); if (page_get_page_cgroup(page) == pc) {
if (clear_page_cgroup(page, pc) == pc) { page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);
css_put(&mem->css); css_put(&mem->css);
res_counter_uncharge(&mem->res, PAGE_SIZE); res_counter_uncharge(&mem->res, PAGE_SIZE);
__mem_cgroup_remove_list(pc); __mem_cgroup_remove_list(pc);
kfree(pc); kfree(pc);
} else /* being uncharged ? ...do relax */ } else {
/* racing uncharge: let page go then retry */
unlock_page_cgroup(page);
break; break;
} }
}
spin_unlock_irqrestore(&mz->lru_lock, flags); spin_unlock_irqrestore(&mz->lru_lock, flags);
if (!list_empty(list)) { if (!list_empty(list)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment