Commit 5d337b91 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] swap: swap_lock replace list+device

The idea of a swap_device_lock per device, and a swap_list_lock over them all,
is appealing; but in practice almost every holder of swap_device_lock must
already hold swap_list_lock, which defeats the purpose of the split.

The only exceptions have been swap_duplicate, valid_swaphandles and an
untrodden path in try_to_unuse (plus a few places added in this series).
valid_swaphandles doesn't show up high in profiles, but swap_duplicate does
demand attention.  However, with the hold time in get_swap_pages so much
reduced, I've not yet found a load and set of swap device priorities to show
even swap_duplicate benefitting from the split.  Certainly the split is mere
overhead in the common case of a single swap device.

So, replace swap_list_lock and swap_device_lock by spinlock_t swap_lock
(generally we seem to prefer an _ in the name, and not hide in a macro).

If someone can show a regression in swap_duplicate, then probably we should
add a hashlock for the swap_map entries alone (shorts being anatomic), so as
to help the case of the single swap device too.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 048c27fd
...@@ -83,19 +83,18 @@ single address space optimization, so that the zap_page_range (from ...@@ -83,19 +83,18 @@ single address space optimization, so that the zap_page_range (from
vmtruncate) does not lose sending ipi's to cloned threads that might vmtruncate) does not lose sending ipi's to cloned threads that might
be spawned underneath it and go to user mode to drag in pte's into tlbs. be spawned underneath it and go to user mode to drag in pte's into tlbs.
swap_list_lock/swap_device_lock swap_lock
------------------------------- --------------
The swap devices are chained in priority order from the "swap_list" header. The swap devices are chained in priority order from the "swap_list" header.
The "swap_list" is used for the round-robin swaphandle allocation strategy. The "swap_list" is used for the round-robin swaphandle allocation strategy.
The #free swaphandles is maintained in "nr_swap_pages". These two together The #free swaphandles is maintained in "nr_swap_pages". These two together
are protected by the swap_list_lock. are protected by the swap_lock.
The swap_device_lock, which is per swap device, protects the reference The swap_lock also protects all the device reference counts on the
counts on the corresponding swaphandles, maintained in the "swap_map" corresponding swaphandles, maintained in the "swap_map" array, and the
array, and the "highest_bit" and "lowest_bit" fields. "highest_bit" and "lowest_bit" fields.
Both of these are spinlocks, and are never acquired from intr level. The The swap_lock is a spinlock, and is never acquired from intr level.
locking hierarchy is swap_list_lock -> swap_device_lock.
To prevent races between swap space deletion or async readahead swapins To prevent races between swap space deletion or async readahead swapins
deciding whether a swap handle is being used, ie worthy of being read in deciding whether a swap handle is being used, ie worthy of being read in
......
...@@ -121,7 +121,7 @@ enum { ...@@ -121,7 +121,7 @@ enum {
*/ */
struct swap_info_struct { struct swap_info_struct {
unsigned int flags; unsigned int flags;
spinlock_t sdev_lock; int prio; /* swap priority */
struct file *swap_file; struct file *swap_file;
struct block_device *bdev; struct block_device *bdev;
struct list_head extent_list; struct list_head extent_list;
...@@ -135,7 +135,6 @@ struct swap_info_struct { ...@@ -135,7 +135,6 @@ struct swap_info_struct {
unsigned int pages; unsigned int pages;
unsigned int max; unsigned int max;
unsigned int inuse_pages; unsigned int inuse_pages;
int prio; /* swap priority */
int next; /* next entry on swap list */ int next; /* next entry on swap list */
}; };
...@@ -221,13 +220,7 @@ extern int can_share_swap_page(struct page *); ...@@ -221,13 +220,7 @@ extern int can_share_swap_page(struct page *);
extern int remove_exclusive_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *);
struct backing_dev_info; struct backing_dev_info;
extern struct swap_list_t swap_list; extern spinlock_t swap_lock;
extern spinlock_t swaplock;
#define swap_list_lock() spin_lock(&swaplock)
#define swap_list_unlock() spin_unlock(&swaplock)
#define swap_device_lock(p) spin_lock(&p->sdev_lock)
#define swap_device_unlock(p) spin_unlock(&p->sdev_lock)
/* linux/mm/thrash.c */ /* linux/mm/thrash.c */
extern struct mm_struct * swap_token_mm; extern struct mm_struct * swap_token_mm;
......
...@@ -54,9 +54,8 @@ ...@@ -54,9 +54,8 @@
* *
* ->i_mmap_lock (vmtruncate) * ->i_mmap_lock (vmtruncate)
* ->private_lock (__free_pte->__set_page_dirty_buffers) * ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_list_lock * ->swap_lock (exclusive_swap_page, others)
* ->swap_device_lock (exclusive_swap_page, others) * ->mapping->tree_lock
* ->mapping->tree_lock
* *
* ->i_sem * ->i_sem
* ->i_mmap_lock (truncate->unmap_mapping_range) * ->i_mmap_lock (truncate->unmap_mapping_range)
...@@ -86,7 +85,7 @@ ...@@ -86,7 +85,7 @@
* ->page_table_lock (anon_vma_prepare and various) * ->page_table_lock (anon_vma_prepare and various)
* *
* ->page_table_lock * ->page_table_lock
* ->swap_device_lock (try_to_unmap_one) * ->swap_lock (try_to_unmap_one)
* ->private_lock (try_to_unmap_one) * ->private_lock (try_to_unmap_one)
* ->tree_lock (try_to_unmap_one) * ->tree_lock (try_to_unmap_one)
* ->zone.lru_lock (follow_page->mark_page_accessed) * ->zone.lru_lock (follow_page->mark_page_accessed)
......
...@@ -34,9 +34,8 @@ ...@@ -34,9 +34,8 @@
* anon_vma->lock * anon_vma->lock
* mm->page_table_lock * mm->page_table_lock
* zone->lru_lock (in mark_page_accessed) * zone->lru_lock (in mark_page_accessed)
* swap_list_lock (in swap_free etc's swap_info_get) * swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others) * mmlist_lock (in mmput, drain_mmlist and others)
* swap_device_lock (in swap_duplicate, swap_info_get)
* mapping->private_lock (in __set_page_dirty_buffers) * mapping->private_lock (in __set_page_dirty_buffers)
* inode_lock (in set_page_dirty's __mark_inode_dirty) * inode_lock (in set_page_dirty's __mark_inode_dirty)
* sb_lock (within inode_lock in fs/fs-writeback.c) * sb_lock (within inode_lock in fs/fs-writeback.c)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment