Commit b00dc3ad authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] tmpfs: fix mount mpol nodelist parsing

I've been dissatisfied with the mpol_nodelist mount option which was
added to tmpfs earlier in -rc.  Replace it by mpol=policy:nodelist.

And it was broken: a nodelist is a comma-separated list of numbers and
ranges; the mount options are a comma-separated list of token=values.
Whoops, blindly strsep'ing on commas doesn't work so well: since we've
no numeric tokens, and unlikely to add them, use that to distinguish.

Move the mpol= parsing to shmem_parse_mpol under CONFIG_NUMA, reject
all its options as invalid if not NUMA.  /proc shows MPOL_PREFERRED
as "prefer", so use that name for the policy instead of "preferred".

Enforce that mpol=default has no nodelist; that mpol=prefer has one
node only; that mpol=bind has a nodelist; but let mpol=interleave use
node_online_map if no nodelist given.  Describe this in tmpfs.txt.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Acked-by: default avatarRobin Holt <holt@sgi.com>
Acked-by: default avatarAndi Kleen <ak@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 808c783e
......@@ -79,15 +79,18 @@ that instance in a system with many cpus making intensive use of it.
tmpfs has a mount option to set the NUMA memory allocation policy for
all files in that instance:
mpol=interleave prefers to allocate memory from each node in turn
all files in that instance (if CONFIG_NUMA is enabled) - which can be
adjusted on the fly via 'mount -o remount ...'
mpol=default prefers to allocate memory from the local node
mpol=bind prefers to allocate from mpol_nodelist
mpol=preferred prefers to allocate from first node in mpol_nodelist
mpol=prefer:Node prefers to allocate memory from the given Node
mpol=bind:NodeList allocates memory only from nodes in NodeList
mpol=interleave prefers to allocate from each node in turn
mpol=interleave:NodeList allocates from each node of NodeList in turn
The following mount option is used in conjunction with mpol=interleave,
mpol=bind or mpol=preferred:
mpol_nodelist: nodelist suitable for parsing with nodelist_parse.
NodeList format is a comma-separated list of decimal numbers and ranges,
a range being two hyphen-separated decimal numbers, the smallest and
largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15
To specify the initial root directory you can use the following mount
......@@ -109,4 +112,4 @@ RAM/SWAP in 10240 inodes and it is only accessible by root.
Author:
Christoph Rohland <cr@sap.com>, 1.12.01
Updated:
Hugh Dickins <hugh@veritas.com>, 13 March 2005
Hugh Dickins <hugh@veritas.com>, 19 February 2006
......@@ -45,6 +45,7 @@
#include <linux/swapops.h>
#include <linux/mempolicy.h>
#include <linux/namei.h>
#include <linux/ctype.h>
#include <asm/uaccess.h>
#include <asm/div64.h>
#include <asm/pgtable.h>
......@@ -874,6 +875,51 @@ redirty:
}
#ifdef CONFIG_NUMA
static int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
{
char *nodelist = strchr(value, ':');
int err = 1;
if (nodelist) {
/* NUL-terminate policy string */
*nodelist++ = '\0';
if (nodelist_parse(nodelist, *policy_nodes))
goto out;
}
if (!strcmp(value, "default")) {
*policy = MPOL_DEFAULT;
/* Don't allow a nodelist */
if (!nodelist)
err = 0;
} else if (!strcmp(value, "prefer")) {
*policy = MPOL_PREFERRED;
/* Insist on a nodelist of one node only */
if (nodelist) {
char *rest = nodelist;
while (isdigit(*rest))
rest++;
if (!*rest)
err = 0;
}
} else if (!strcmp(value, "bind")) {
*policy = MPOL_BIND;
/* Insist on a nodelist */
if (nodelist)
err = 0;
} else if (!strcmp(value, "interleave")) {
*policy = MPOL_INTERLEAVE;
/* Default to nodes online if no nodelist */
if (!nodelist)
*policy_nodes = node_online_map;
err = 0;
}
out:
/* Restore string for error message */
if (nodelist)
*--nodelist = ':';
return err;
}
static struct page *shmem_swapin_async(struct shared_policy *p,
swp_entry_t entry, unsigned long idx)
{
......@@ -926,6 +972,11 @@ shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
return page;
}
#else
static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
{
return 1;
}
static inline struct page *
shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
{
......@@ -1859,7 +1910,23 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid,
{
char *this_char, *value, *rest;
while ((this_char = strsep(&options, ",")) != NULL) {
while (options != NULL) {
this_char = options;
for (;;) {
/*
* NUL-terminate this option: unfortunately,
* mount options form a comma-separated list,
* but mpol's nodelist may also contain commas.
*/
options = strchr(options, ',');
if (options == NULL)
break;
options++;
if (!isdigit(*options)) {
options[-1] = '\0';
break;
}
}
if (!*this_char)
continue;
if ((value = strchr(this_char,'=')) != NULL) {
......@@ -1910,18 +1977,8 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid,
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"mpol")) {
if (!strcmp(value,"default"))
*policy = MPOL_DEFAULT;
else if (!strcmp(value,"preferred"))
*policy = MPOL_PREFERRED;
else if (!strcmp(value,"bind"))
*policy = MPOL_BIND;
else if (!strcmp(value,"interleave"))
*policy = MPOL_INTERLEAVE;
else
if (shmem_parse_mpol(value,policy,policy_nodes))
goto bad_val;
} else if (!strcmp(this_char,"mpol_nodelist")) {
nodelist_parse(value, *policy_nodes);
} else {
printk(KERN_ERR "tmpfs: Bad mount option %s\n",
this_char);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment