Commit 9d0243bc authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] drop-pagecache

Add /proc/sys/vm/drop_caches.  When written to, this will cause the kernel to
discard as much pagecache and/or reclaimable slab objects as it can.  THis
operation requires root permissions.

It won't drop dirty data, so the user should run `sync' first.

Caveats:

a) Holds inode_lock for exorbitant amounts of time.

b) Needs to be taught about NUMA nodes: propagate these all the way through
   so the discarding can be controlled on a per-node basis.

This is a debugging feature: useful for getting consistent results between
filesystem benchmarks.  We could possibly put it under a config option, but
it's less than 300 bytes.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent bec6b0c8
...@@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent ...@@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent
unnecessary page faults in thrashing situation. The unit of the value is unnecessary page faults in thrashing situation. The unit of the value is
second. The value would be useful to tune thrashing behavior. second. The value would be useful to tune thrashing behavior.
drop_caches
-----------
Writing to this will cause the kernel to drop clean caches, dentries and
inodes from memory, causing that memory to become free.
To free pagecache:
echo 1 > /proc/sys/vm/drop_caches
To free dentries and inodes:
echo 2 > /proc/sys/vm/drop_caches
To free pagecache, dentries and inodes:
echo 3 > /proc/sys/vm/drop_caches
As this is a non-destructive operation and dirty objects are not freeable, the
user should run `sync' first.
2.5 /proc/sys/dev - Device specific parameters 2.5 /proc/sys/dev - Device specific parameters
---------------------------------------------- ----------------------------------------------
......
...@@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm: ...@@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm:
- min_free_kbytes - min_free_kbytes
- laptop_mode - laptop_mode
- block_dump - block_dump
- drop-caches
============================================================== ==============================================================
dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
block_dump, swap_token_timeout: block_dump, swap_token_timeout, drop-caches:
See Documentation/filesystems/proc.txt See Documentation/filesystems/proc.txt
......
...@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ ...@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
ioprio.o pnode.o ioprio.o pnode.o drop_caches.o
obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_INOTIFY) += inotify.o
obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_EPOLL) += eventpoll.o
......
/*
* Implement the manual drop-all-pagecache function
*/
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/writeback.h>
#include <linux/sysctl.h>
#include <linux/gfp.h>
/* A global variable is a bit ugly, but it keeps the code simple */
int sysctl_drop_caches;
static void drop_pagecache_sb(struct super_block *sb)
{
struct inode *inode;
spin_lock(&inode_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_FREEING|I_WILL_FREE))
continue;
invalidate_inode_pages(inode->i_mapping);
}
spin_unlock(&inode_lock);
}
void drop_pagecache(void)
{
struct super_block *sb;
spin_lock(&sb_lock);
restart:
list_for_each_entry(sb, &super_blocks, s_list) {
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
if (sb->s_root)
drop_pagecache_sb(sb);
up_read(&sb->s_umount);
spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
goto restart;
}
spin_unlock(&sb_lock);
}
void drop_slab(void)
{
int nr_objects;
do {
nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
} while (nr_objects > 10);
}
int drop_caches_sysctl_handler(ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec_minmax(table, write, file, buffer, length, ppos);
if (write) {
if (sysctl_drop_caches & 1)
drop_pagecache();
if (sysctl_drop_caches & 2)
drop_slab();
}
return 0;
}
...@@ -1036,5 +1036,12 @@ int in_gate_area_no_task(unsigned long addr); ...@@ -1036,5 +1036,12 @@ int in_gate_area_no_task(unsigned long addr);
/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
#define OOM_DISABLE -17 #define OOM_DISABLE -17
int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
unsigned long lru_pages);
void drop_pagecache(void);
void drop_slab(void);
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */ #endif /* _LINUX_MM_H */
...@@ -180,6 +180,7 @@ enum ...@@ -180,6 +180,7 @@ enum
VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
}; };
......
...@@ -68,6 +68,7 @@ extern int min_free_kbytes; ...@@ -68,6 +68,7 @@ extern int min_free_kbytes;
extern int printk_ratelimit_jiffies; extern int printk_ratelimit_jiffies;
extern int printk_ratelimit_burst; extern int printk_ratelimit_burst;
extern int pid_max_min, pid_max_max; extern int pid_max_min, pid_max_max;
extern int sysctl_drop_caches;
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
int unknown_nmi_panic; int unknown_nmi_panic;
...@@ -774,6 +775,15 @@ static ctl_table vm_table[] = { ...@@ -774,6 +775,15 @@ static ctl_table vm_table[] = {
.proc_handler = &lowmem_reserve_ratio_sysctl_handler, .proc_handler = &lowmem_reserve_ratio_sysctl_handler,
.strategy = &sysctl_intvec, .strategy = &sysctl_intvec,
}, },
{
.ctl_name = VM_DROP_PAGECACHE,
.procname = "drop_caches",
.data = &sysctl_drop_caches,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = drop_caches_sysctl_handler,
.strategy = &sysctl_intvec,
},
{ {
.ctl_name = VM_MIN_FREE_KBYTES, .ctl_name = VM_MIN_FREE_KBYTES,
.procname = "min_free_kbytes", .procname = "min_free_kbytes",
......
...@@ -249,7 +249,6 @@ unlock: ...@@ -249,7 +249,6 @@ unlock:
break; break;
} }
pagevec_release(&pvec); pagevec_release(&pvec);
cond_resched();
} }
return ret; return ret;
} }
......
...@@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker); ...@@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker);
* *
* Returns the number of slab objects which we shrunk. * Returns the number of slab objects which we shrunk.
*/ */
static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
unsigned long lru_pages)
{ {
struct shrinker *shrinker; struct shrinker *shrinker;
int ret = 0; int ret = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment