Commit 195cf453 authored by Bron Gondwana's avatar Bron Gondwana Committed by Linus Torvalds

mm/page-writeback: highmem_is_dirtyable option

Add vm.highmem_is_dirtyable toggle

A 32 bit machine with HIGHMEM64 enabled running DCC has an MMAPed file of
approximately 2Gb size which contains a hash format that is written
randomly by the dbclean process.  On 2.6.16 this process took a few
minutes.  With lowmem only accounting of dirty ratios, this takes about 12
hours of 100% disk IO, all random writes.

Include a toggle in /proc/sys/vm/highmem_is_dirtyable which can be set to 1 to
add the highmem back to the total available memory count.

[akpm@linux-foundation.org: Fix the CONFIG_DETECT_SOFTLOCKUP=y build]
Signed-off-by: default avatarBron Gondwana <brong@fastmail.fm>
Cc: Ethan Solomita <solo@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: WU Fengguang <wfg@mail.ustc.edu.cn>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3dfa5721
...@@ -1315,6 +1315,21 @@ for writeout by the pdflush daemons. It is expressed in 100'ths of a second. ...@@ -1315,6 +1315,21 @@ for writeout by the pdflush daemons. It is expressed in 100'ths of a second.
Data which has been dirty in-memory for longer than this interval will be Data which has been dirty in-memory for longer than this interval will be
written out next time a pdflush daemon wakes up. written out next time a pdflush daemon wakes up.
highmem_is_dirtyable
--------------------
Only present if CONFIG_HIGHMEM is set.
This defaults to 0 (false), meaning that the ratios set above are calculated
as a percentage of lowmem only. This protects against excessive scanning
in page reclaim, swapping and general VM distress.
Setting this to 1 can be useful on 32 bit machines where you want to make
random changes within an MMAPed file that is larger than your available
lowmem without causing large quantities of random IO. Is is safe if the
behavior of all programs running on the machine is known and memory will
not be otherwise stressed.
legacy_va_layout legacy_va_layout
---------------- ----------------
......
...@@ -22,6 +22,7 @@ Currently, these files are in /proc/sys/vm: ...@@ -22,6 +22,7 @@ Currently, these files are in /proc/sys/vm:
- dirty_background_ratio - dirty_background_ratio
- dirty_expire_centisecs - dirty_expire_centisecs
- dirty_writeback_centisecs - dirty_writeback_centisecs
- highmem_is_dirtyable (only if CONFIG_HIGHMEM set)
- max_map_count - max_map_count
- min_free_kbytes - min_free_kbytes
- laptop_mode - laptop_mode
...@@ -40,9 +41,9 @@ Currently, these files are in /proc/sys/vm: ...@@ -40,9 +41,9 @@ Currently, these files are in /proc/sys/vm:
============================================================== ==============================================================
dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, dirty_writeback_centisecs, highmem_is_dirtyable,
block_dump, swap_token_timeout, drop-caches, vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout,
hugepages_treat_as_movable: drop-caches, hugepages_treat_as_movable:
See Documentation/filesystems/proc.txt See Documentation/filesystems/proc.txt
......
...@@ -100,6 +100,7 @@ extern int dirty_background_ratio; ...@@ -100,6 +100,7 @@ extern int dirty_background_ratio;
extern int vm_dirty_ratio; extern int vm_dirty_ratio;
extern int dirty_writeback_interval; extern int dirty_writeback_interval;
extern int dirty_expire_interval; extern int dirty_expire_interval;
extern int vm_highmem_is_dirtyable;
extern int block_dump; extern int block_dump;
extern int laptop_mode; extern int laptop_mode;
......
...@@ -84,8 +84,11 @@ extern int sysctl_stat_interval; ...@@ -84,8 +84,11 @@ extern int sysctl_stat_interval;
extern int latencytop_enabled; extern int latencytop_enabled;
/* Constants used for minimum and maximum */ /* Constants used for minimum and maximum */
#ifdef CONFIG_DETECT_SOFTLOCKUP #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
static int one = 1; static int one = 1;
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
static int sixty = 60; static int sixty = 60;
#endif #endif
...@@ -1150,6 +1153,19 @@ static struct ctl_table vm_table[] = { ...@@ -1150,6 +1153,19 @@ static struct ctl_table vm_table[] = {
.extra1 = &zero, .extra1 = &zero,
}, },
#endif #endif
#ifdef CONFIG_HIGHMEM
{
.ctl_name = CTL_UNNUMBERED,
.procname = "highmem_is_dirtyable",
.data = &vm_highmem_is_dirtyable,
.maxlen = sizeof(vm_highmem_is_dirtyable),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
.extra2 = &one,
},
#endif
/* /*
* NOTE: do not add new entries to this table unless you have read * NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt * Documentation/sysctl/ctl_unnumbered.txt
......
...@@ -68,6 +68,12 @@ static inline long sync_writeback_pages(void) ...@@ -68,6 +68,12 @@ static inline long sync_writeback_pages(void)
*/ */
int dirty_background_ratio = 5; int dirty_background_ratio = 5;
/*
* free highmem will not be subtracted from the total free memory
* for calculating free ratios if vm_highmem_is_dirtyable is true
*/
int vm_highmem_is_dirtyable;
/* /*
* The generator of dirty data starts writeback at this percentage * The generator of dirty data starts writeback at this percentage
*/ */
...@@ -287,7 +293,10 @@ static unsigned long determine_dirtyable_memory(void) ...@@ -287,7 +293,10 @@ static unsigned long determine_dirtyable_memory(void)
x = global_page_state(NR_FREE_PAGES) x = global_page_state(NR_FREE_PAGES)
+ global_page_state(NR_INACTIVE) + global_page_state(NR_INACTIVE)
+ global_page_state(NR_ACTIVE); + global_page_state(NR_ACTIVE);
x -= highmem_dirtyable_memory(x);
if (!vm_highmem_is_dirtyable)
x -= highmem_dirtyable_memory(x);
return x + 1; /* Ensure that we never return 0 */ return x + 1; /* Ensure that we never return 0 */
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment