Merge branch 'for-2.6.27' of git://linux-nfs.org/~bfields/linux

* 'for-2.6.27' of git://linux-nfs.org/~bfields/linux: (51 commits) nfsd: nfs4xdr.c do-while is not a compound statement nfsd: Use C99 initializers in fs/nfsd/nfs4xdr.c lockd: Pass "struct sockaddr *" to new failover-by-IP function lockd: get host reference in nlmsvc_create_block() instead of callers lockd: minor svclock.c style fixes lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_lock lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_testlock lockd: nlm_release_host() checks for NULL, caller needn't file lock: reorder struct file_lock to save space on 64 bit builds nfsd: take file and mnt write in nfs4_upgrade_open nfsd: document open share bit tracking nfsd: tabulate nfs4 xdr encoding functions nfsd: dprint operation names svcrdma: Change WR context get/put to use the kmem cache svcrdma: Create a kmem cache for the WR contexts svcrdma: Add flush_scheduled_work to module exit function svcrdma: Limit ORD based on client's advertised IRD svcrdma: Remove unused wait q from svcrdma_xprt structure svcrdma: Remove unneeded spin locks from __svc_rdma_free svcrdma: Add dma map count and WARN_ON ...

Merge branch 'for-2.6.27' of git://linux-nfs.org/~bfields/linux
* 'for-2.6.27' of git://linux-nfs.org/~bfields/linux: (51 commits) nfsd: nfs4xdr.c do-while is not a compound statement nfsd: Use C99 initializers in fs/nfsd/nfs4xdr.c lockd: Pass "struct sockaddr *" to new failover-by-IP function lockd: get host reference in nlmsvc_create_block() instead of callers lockd: minor svclock.c style fixes lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_lock lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_testlock lockd: nlm_release_host() checks for NULL, caller needn't file lock: reorder struct file_lock to save space on 64 bit builds nfsd: take file and mnt write in nfs4_upgrade_open nfsd: document open share bit tracking nfsd: tabulate nfs4 xdr encoding functions nfsd: dprint operation names svcrdma: Change WR context get/put to use the kmem cache svcrdma: Create a kmem cache for the WR contexts svcrdma: Add flush_scheduled_work to module exit function svcrdma: Limit ORD based on client's advertised IRD svcrdma: Remove unused wait q from svcrdma_xprt structure svcrdma: Remove unneeded spin locks from __svc_rdma_free svcrdma: Add dma map count and WARN_ON ...
14b395e3 · Linus Torvalds · 734b397c · 5108b276 · 14b395e3 · 14b395e3
Commit 14b395e3 authored Jul 20, 2008 by Linus Torvalds
36 changed files
--- a/Documentation/filesystems/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -5,7 +5,7 @@
 ################################################################################

 Author: NetApp and Open Grid Computing
- Date: April 15, 2008
+ Date: May 29, 2008

 Table of Contents
 ~~~~~~~~~~~~~~~~~
@@ -60,16 +60,18 @@ Installation
    The procedures described in this document have been tested with
    distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).

-  - Install nfs-utils-1.1.1 or greater on the client
+  - Install nfs-utils-1.1.2 or greater on the client

-    An NFS/RDMA mount point can only be obtained by using the mount.nfs
-    command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs
-    you are using, type:
+    An NFS/RDMA mount point can be obtained by using the mount.nfs command in
+    nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
+    version with support for NFS/RDMA mounts, but for various reasons we
+    recommend using nfs-utils-1.1.2 or greater). To see which version of
+    mount.nfs you are using, type:

-    > /sbin/mount.nfs -V
+    $ /sbin/mount.nfs -V

-    If the version is less than 1.1.1 or the command does not exist,
-    then you will need to install the latest version of nfs-utils.
+    If the version is less than 1.1.2 or the command does not exist,
+    you should install the latest version of nfs-utils.

    Download the latest package from:

@@ -77,22 +79,33 @@ Installation

    Uncompress the package and follow the installation instructions.

-    If you will not be using GSS and NFSv4, the installation process
-    can be simplified by disabling these features when running configure:
+    If you will not need the idmapper and gssd executables (you do not need
+    these to create an NFS/RDMA enabled mount command), the installation
+    process can be simplified by disabling these features when running
+    configure:

-    > ./configure --disable-gss --disable-nfsv4
+    $ ./configure --disable-gss --disable-nfsv4

-    For more information on this see the package's README and INSTALL files.
+    To build nfs-utils you will need the tcp_wrappers package installed. For
+    more information on this see the package's README and INSTALL files.

    After building the nfs-utils package, there will be a mount.nfs binary in
    the utils/mount directory. This binary can be used to initiate NFS v2, v3,
-    or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4.
-    The standard technique is to create a symlink called mount.nfs4 to mount.nfs.
+    or v4 mounts. To initiate a v4 mount, the binary must be called
+    mount.nfs4.  The standard technique is to create a symlink called
+    mount.nfs4 to mount.nfs.

-    NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed
+    This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
+
+    $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
+
+    In this location, mount.nfs will be invoked automatically for NFS mounts
+    by the system mount commmand.
+
+    NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
    on the NFS client machine. You do not need this specific version of
    nfs-utils on the server. Furthermore, only the mount.nfs command from
-    nfs-utils-1.1.1 is needed on the client.
+    nfs-utils-1.1.2 is needed on the client.

  - Install a Linux kernel with NFS/RDMA

@@ -156,8 +169,8 @@ Check RDMA and NFS Setup
    this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
    card:

-    > modprobe ib_mthca
-    > modprobe ib_ipoib
+    $ modprobe ib_mthca
+    $ modprobe ib_ipoib

    If you are using InfiniBand, make sure there is a Subnet Manager (SM)
    running on the network. If your IB switch has an embedded SM, you can
@@ -166,7 +179,7 @@ Check RDMA and NFS Setup

    If an SM is running on your network, you should see the following:

-    > cat /sys/class/infiniband/driverX/ports/1/state
+    $ cat /sys/class/infiniband/driverX/ports/1/state
    4: ACTIVE

    where driverX is mthca0, ipath5, ehca3, etc.
@@ -174,10 +187,10 @@ Check RDMA and NFS Setup
    To further test the InfiniBand software stack, use IPoIB (this
    assumes you have two IB hosts named host1 and host2):

-    host1> ifconfig ib0 a.b.c.x
-    host2> ifconfig ib0 a.b.c.y
-    host1> ping a.b.c.y
-    host2> ping a.b.c.x
+    host1$ ifconfig ib0 a.b.c.x
+    host2$ ifconfig ib0 a.b.c.y
+    host1$ ping a.b.c.y
+    host2$ ping a.b.c.x

    For other device types, follow the appropriate procedures.

@@ -202,11 +215,11 @@ NFS/RDMA Setup
    /vol0   192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
    /vol0   192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)

-    The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the
-    cleint's iWARP address(es) for an RNIC.
+    The IP address(es) is(are) the client's IPoIB address for an InfiniBand
+    HCA or the cleint's iWARP address(es) for an RNIC.

-    NOTE: The "insecure" option must be used because the NFS/RDMA client does not
-    use a reserved port.
+    NOTE: The "insecure" option must be used because the NFS/RDMA client does
+    not use a reserved port.

 Each time a machine boots:

@@ -214,43 +227,45 @@ NFS/RDMA Setup

    For InfiniBand using a Mellanox adapter:

-    > modprobe ib_mthca
-    > modprobe ib_ipoib
-    > ifconfig ib0 a.b.c.d
+    $ modprobe ib_mthca
+    $ modprobe ib_ipoib
+    $ ifconfig ib0 a.b.c.d

    NOTE: use unique addresses for the client and server

  - Start the NFS server

-    If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
-    load the RDMA transport module:
+    If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+    kernel config), load the RDMA transport module:

-    > modprobe svcrdma
+    $ modprobe svcrdma

-    Regardless of how the server was built (module or built-in), start the server:
+    Regardless of how the server was built (module or built-in), start the
+    server:

-    > /etc/init.d/nfs start
+    $ /etc/init.d/nfs start

    or

-    > service nfs start
+    $ service nfs start

    Instruct the server to listen on the RDMA transport:

-    > echo rdma 2050 > /proc/fs/nfsd/portlist
+    $ echo rdma 2050 > /proc/fs/nfsd/portlist

  - On the client system

-    If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
-    load the RDMA client module:
+    If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+    kernel config), load the RDMA client module:

-    > modprobe xprtrdma.ko
+    $ modprobe xprtrdma.ko

-    Regardless of how the client was built (module or built-in), issue the mount.nfs command:
+    Regardless of how the client was built (module or built-in), use this
+    command to mount the NFS/RDMA server:

-    > /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050
+    $ mount -o rdma,port=2050 <IPoIB-server-name-or-address>:/<export> /mnt

-    To verify that the mount is using RDMA, run "cat /proc/mounts" and check the
-    "proto" field for the given mount.
+    To verify that the mount is using RDMA, run "cat /proc/mounts" and check
+    the "proto" field for the given mount.

  Congratulations! You're using NFS/RDMA!
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
 static DEFINE_MUTEX(nlmsvc_mutex);
 static unsigned int		nlmsvc_users;
 static struct task_struct	*nlmsvc_task;
-static struct svc_serv		*nlmsvc_serv;
+static struct svc_rqst		*nlmsvc_rqst;
 int				nlmsvc_grace_period;
 unsigned long			nlmsvc_timeout;

@@ -194,20 +194,11 @@ lockd(void *vrqstp)

 		svc_process(rqstp);
 	}
-
 	flush_signals(current);
 	if (nlmsvc_ops)
 		nlmsvc_invalidate_all();
 	nlm_shutdown_hosts();
-
 	unlock_kernel();
-
-	nlmsvc_task = NULL;
-	nlmsvc_serv = NULL;
-
-	/* Exit the RPC thread */
-	svc_exit_thread(rqstp);
-
 	return 0;
 }

@@ -254,16 +245,15 @@ int
 lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
 {
 	struct svc_serv *serv;
-	struct svc_rqst *rqstp;
 	int		error = 0;

 	mutex_lock(&nlmsvc_mutex);
 	/*
 	 * Check whether we're already up and running.
 	 */
-	if (nlmsvc_serv) {
+	if (nlmsvc_rqst) {
 		if (proto)
-			error = make_socks(nlmsvc_serv, proto);
+			error = make_socks(nlmsvc_rqst->rq_server, proto);
 		goto out;
 	}

@@ -288,9 +278,10 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
 	/*
 	 * Create the kernel thread and wait for it to start.
 	 */
-	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
-	if (IS_ERR(rqstp)) {
-		error = PTR_ERR(rqstp);
+	nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
+	if (IS_ERR(nlmsvc_rqst)) {
+		error = PTR_ERR(nlmsvc_rqst);
+		nlmsvc_rqst = NULL;
 		printk(KERN_WARNING
 			"lockd_up: svc_rqst allocation failed, error=%d\n",
 			error);
@@ -298,16 +289,15 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
 	}

 	svc_sock_update_bufs(serv);
-	nlmsvc_serv = rqstp->rq_server;

-	nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name);
+	nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name);
 	if (IS_ERR(nlmsvc_task)) {
 		error = PTR_ERR(nlmsvc_task);
+		svc_exit_thread(nlmsvc_rqst);
 		nlmsvc_task = NULL;
-		nlmsvc_serv = NULL;
+		nlmsvc_rqst = NULL;
 		printk(KERN_WARNING
 			"lockd_up: kthread_run failed, error=%d\n", error);
-		svc_exit_thread(rqstp);
 		goto destroy_and_out;
 	}

@@ -346,6 +336,9 @@ lockd_down(void)
 		BUG();
 	}
 	kthread_stop(nlmsvc_task);
+	svc_exit_thread(nlmsvc_rqst);
+	nlmsvc_task = NULL;
+	nlmsvc_rqst = NULL;
 out:
 	mutex_unlock(&nlmsvc_mutex);
 }

--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -58,8 +58,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return 0;

 no_locks:
-	if (host)
-		nlm_release_host(host);
+	nlm_release_host(host);
 	if (error)
 		return error;	
 	return nlm_lck_denied_nolocks;
@@ -100,7 +99,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;

 	/* Now check for conflicting locks */
-	resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
+	resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
 	if (resp->status == nlm_drop_reply)
 		rc = rpc_drop_reply;
 	else
@@ -146,7 +145,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 #endif

 	/* Now try to lock the file */
-	resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
+	resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
 					argp->block, &argp->cookie);
 	if (resp->status == nlm_drop_reply)
 		rc = rpc_drop_reply;

--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -129,9 +129,9 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)

 static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
 {
-	if(a->len != b->len)
+	if (a->len != b->len)
 		return 0;
-	if(memcmp(a->data,b->data,a->len))
+	if (memcmp(a->data, b->data, a->len))
 		return 0;
 	return 1;
 }
@@ -180,6 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
 	struct nlm_block	*block;
 	struct nlm_rqst		*call = NULL;

+	nlm_get_host(host);
 	call = nlm_alloc_call(host);
 	if (call == NULL)
 		return NULL;
@@ -358,10 +359,10 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
 */
 __be32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
-			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
+	    struct nlm_host *host, struct nlm_lock *lock, int wait,
+	    struct nlm_cookie *cookie)
 {
 	struct nlm_block	*block = NULL;
-	struct nlm_host		*host;
 	int			error;
 	__be32			ret;

@@ -373,11 +374,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 				(long long)lock->fl.fl_end,
 				wait);

-	/* Create host handle for callback */
-	host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
-	if (host == NULL)
-		return nlm_lck_denied_nolocks;
-
 	/* Lock file against concurrent access */
 	mutex_lock(&file->f_mutex);
 	/* Get existing block (in case client is busy-waiting)
@@ -385,8 +381,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	 */
 	block = nlmsvc_lookup_block(file, lock);
 	if (block == NULL) {
-		block = nlmsvc_create_block(rqstp, nlm_get_host(host), file,
-				lock, cookie);
+		block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
 		ret = nlm_lck_denied_nolocks;
 		if (block == NULL)
 			goto out;
@@ -417,7 +412,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	lock->fl.fl_flags &= ~FL_SLEEP;

 	dprintk("lockd: vfs_lock_file returned %d\n", error);
-	switch(error) {
+	switch (error) {
 		case 0:
 			ret = nlm_granted;
 			goto out;
@@ -450,7 +445,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 out:
 	mutex_unlock(&file->f_mutex);
 	nlmsvc_release_block(block);
-	nlm_release_host(host);
 	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
 	return ret;
 }
@@ -460,8 +454,8 @@ out:
 */
 __be32
 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
-		struct nlm_lock *lock, struct nlm_lock *conflock,
-		struct nlm_cookie *cookie)
+		struct nlm_host *host, struct nlm_lock *lock,
+		struct nlm_lock *conflock, struct nlm_cookie *cookie)
 {
 	struct nlm_block 	*block = NULL;
 	int			error;
@@ -479,16 +473,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,

 	if (block == NULL) {
 		struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-		struct nlm_host	*host;

 		if (conf == NULL)
 			return nlm_granted;
-		/* Create host handle for callback */
-		host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
-		if (host == NULL) {
-			kfree(conf);
-			return nlm_lck_denied_nolocks;
-		}
 		block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
 		if (block == NULL) {
 			kfree(conf);
@@ -897,7 +884,7 @@ nlmsvc_retry_blocked(void)

 		if (block->b_when == NLM_NEVER)
 			break;
-	        if (time_after(block->b_when,jiffies)) {
+		if (time_after(block->b_when, jiffies)) {
 			timeout = block->b_when - jiffies;
 			break;
 		}

--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -87,8 +87,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return 0;

 no_locks:
-	if (host)
-		nlm_release_host(host);
+	nlm_release_host(host);
 	if (error)
 		return error;
 	return nlm_lck_denied_nolocks;
@@ -129,7 +128,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;

 	/* Now check for conflicting locks */
-	resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
+	resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
 	if (resp->status == nlm_drop_reply)
 		rc = rpc_drop_reply;
 	else
@@ -176,7 +175,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 #endif

 	/* Now try to lock the file */
-	resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
+	resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
 					       argp->block, &argp->cookie));
 	if (resp->status == nlm_drop_reply)
 		rc = rpc_drop_reply;

--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -373,13 +373,16 @@ nlmsvc_free_host_resources(struct nlm_host *host)
 	}
 }

-/*
- * Remove all locks held for clients
+/**
+ * nlmsvc_invalidate_all - remove all locks held for clients
+ *
+ * Release all locks held by NFS clients.
+ *
 */
 void
 nlmsvc_invalidate_all(void)
 {
-	/* Release all locks held by NFS clients.
+	/*
 	 * Previously, the code would call
 	 * nlmsvc_free_host_resources for each client in
 	 * turn, which is about as inefficient as it gets.
@@ -396,6 +399,12 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
 	return sb == file->f_file->f_path.mnt->mnt_sb;
 }

+/**
+ * nlmsvc_unlock_all_by_sb - release locks held on this file system
+ * @sb: super block
+ *
+ * Release all locks held by clients accessing this file system.
+ */
 int
 nlmsvc_unlock_all_by_sb(struct super_block *sb)
 {
@@ -409,17 +418,22 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
 static int
 nlmsvc_match_ip(void *datap, struct nlm_host *host)
 {
-	__be32 *server_addr = datap;
-
-	return host->h_saddr.sin_addr.s_addr == *server_addr;
+	return nlm_cmp_addr(&host->h_saddr, datap);
 }

+/**
+ * nlmsvc_unlock_all_by_ip - release local locks by IP address
+ * @server_addr: server's IP address as seen by clients
+ *
+ * Release all locks held by clients accessing this host
+ * via the passed in IP address.
+ */
 int
-nlmsvc_unlock_all_by_ip(__be32 server_addr)
+nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr)
 {
 	int ret;
-	ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL);
-	return ret ? -EIO : 0;

+	ret = nlm_traverse_files(server_addr, nlmsvc_match_ip, NULL);
+	return ret ? -EIO : 0;
 }
 EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 	fh.fh_export = NULL;

 	exp_readlock();
-	nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp);
+	nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
 	fh_put(&fh);
 	rqstp->rq_client = NULL;
 	exp_readunlock();

--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -40,7 +40,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
 	dprintk("nfsd: GETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));

 	fh = fh_copy(&resp->fh, &argp->fh);
-	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+	nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+	if (nfserr)
 		RETURN_STATUS(nfserr);

 	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -107,7 +108,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 	dprintk("nfsd: SETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));

 	fh = fh_copy(&resp->fh, &argp->fh);
-	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);

 	if (!nfserr) {
 		nfserr = nfserrno( nfsd_set_posix_acl(
@@ -134,7 +135,7 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));

 	fh_copy(&resp->fh, &argp->fh);
-	return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+	return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
 }

 /*

--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -36,7 +36,8 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
 	__be32 nfserr = 0;

 	fh = fh_copy(&resp->fh, &argp->fh);
-	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+	nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+	if (nfserr)
 		RETURN_STATUS(nfserr);

 	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -101,7 +102,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 	__be32 nfserr = 0;

 	fh = fh_copy(&resp->fh, &argp->fh);
-	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);

 	if (!nfserr) {
 		nfserr = nfserrno( nfsd_set_posix_acl(

--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -63,7 +63,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 		SVCFH_fmt(&argp->fh));

 	fh_copy(&resp->fh, &argp->fh);
-	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
 	if (nfserr)
 		RETURN_STATUS(nfserr);

@@ -242,7 +242,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 	attr   = &argp->attrs;

 	/* Get the directory inode */
-	nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE);
+	nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
 	if (nfserr)
 		RETURN_STATUS(nfserr);

@@ -558,7 +558,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 	resp->f_maxfilesize = ~(u32) 0;
 	resp->f_properties = NFS3_FSF_DEFAULT;

-	nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+	nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);

 	/* Check special features of the file system. May request
 	 * different read/write sizes for file systems known to have
@@ -597,7 +597,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
 	resp->p_case_insensitive = 0;
 	resp->p_case_preserving = 1;

-	nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+	nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);

 	if (nfserr == 0) {
 		struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;

--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -71,11 +71,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 		return nfserr_inval;

 	if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
-		accmode |= MAY_READ;
+		accmode |= NFSD_MAY_READ;
 	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-		accmode |= (MAY_WRITE | MAY_TRUNC);
+		accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
 	if (open->op_share_deny & NFS4_SHARE_DENY_WRITE)
-		accmode |= MAY_WRITE;
+		accmode |= NFSD_MAY_WRITE;

 	status = fh_verify(rqstp, current_fh, S_IFREG, accmode);

@@ -126,7 +126,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 			&resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);

 	if (!created)
-		status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
+		status = do_open_permission(rqstp, current_fh, open,
+					    NFSD_MAY_NOP);

 out:
 	fh_put(&resfh);
@@ -157,7 +158,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 	open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
 		(open->op_iattr.ia_size == 0);

-	status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE);
+	status = do_open_permission(rqstp, current_fh, open,
+				    NFSD_MAY_OWNER_OVERRIDE);

 	return status;
 }
@@ -186,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len;
 		memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh,
 				rp->rp_openfh_len);
-		status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+		status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
 		if (status)
 			dprintk("nfsd4_open: replay failed"
 				" restoring previous filehandle\n");
@@ -285,7 +287,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
 	memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
 	       putfh->pf_fhlen);
-	return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+	return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
 }

 static __be32
@@ -363,7 +365,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,

 	fh_init(&resfh, NFS4_FHSIZE);

-	status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE);
+	status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
+			   NFSD_MAY_CREATE);
 	if (status == nfserr_symlink)
 		status = nfserr_notdir;
 	if (status)
@@ -445,7 +448,7 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	__be32 status;

-	status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+	status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
 	if (status)
 		return status;

@@ -730,7 +733,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	int count;
 	__be32 status;

-	status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+	status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
 	if (status)
 		return status;

@@ -843,10 +846,13 @@ struct nfsd4_operation {
 #define ALLOWED_WITHOUT_FH 1
 /* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
 #define ALLOWED_ON_ABSENT_FS 2
+	char *op_name;
 };

 static struct nfsd4_operation nfsd4_ops[];

+static inline char *nfsd4_op_name(unsigned opnum);
+
 /*
 * COMPOUND call.
 */
@@ -888,7 +894,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	while (!status && resp->opcnt < args->opcnt) {
 		op = &args->ops[resp->opcnt++];

-		dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum);
+		dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
+			resp->opcnt, args->opcnt, op->opnum,
+			nfsd4_op_name(op->opnum));

 		/*
 		 * The XDR decode routines may have pre-set op->status;
@@ -952,126 +960,170 @@ encode_op:
 out:
 	nfsd4_release_compoundargs(args);
 	cstate_free(cstate);
+	dprintk("nfsv4 compound returned %d\n", ntohl(status));
 	return status;
 }

 static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	[OP_ACCESS] = {
 		.op_func = (nfsd4op_func)nfsd4_access,
+		.op_name = "OP_ACCESS",
 	},
 	[OP_CLOSE] = {
 		.op_func = (nfsd4op_func)nfsd4_close,
+		.op_name = "OP_CLOSE",
 	},
 	[OP_COMMIT] = {
 		.op_func = (nfsd4op_func)nfsd4_commit,
+		.op_name = "OP_COMMIT",
 	},
 	[OP_CREATE] = {
 		.op_func = (nfsd4op_func)nfsd4_create,
+		.op_name = "OP_CREATE",
 	},
 	[OP_DELEGRETURN] = {
 		.op_func = (nfsd4op_func)nfsd4_delegreturn,
+		.op_name = "OP_DELEGRETURN",
 	},
 	[OP_GETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_getattr,
 		.op_flags = ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_GETATTR",
 	},
 	[OP_GETFH] = {
 		.op_func = (nfsd4op_func)nfsd4_getfh,
+		.op_name = "OP_GETFH",
 	},
 	[OP_LINK] = {
 		.op_func = (nfsd4op_func)nfsd4_link,
+		.op_name = "OP_LINK",
 	},
 	[OP_LOCK] = {
 		.op_func = (nfsd4op_func)nfsd4_lock,
+		.op_name = "OP_LOCK",
 	},
 	[OP_LOCKT] = {
 		.op_func = (nfsd4op_func)nfsd4_lockt,
+		.op_name = "OP_LOCKT",
 	},
 	[OP_LOCKU] = {
 		.op_func = (nfsd4op_func)nfsd4_locku,
+		.op_name = "OP_LOCKU",
 	},
 	[OP_LOOKUP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookup,
+		.op_name = "OP_LOOKUP",
 	},
 	[OP_LOOKUPP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookupp,
+		.op_name = "OP_LOOKUPP",
 	},
 	[OP_NVERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_nverify,
+		.op_name = "OP_NVERIFY",
 	},
 	[OP_OPEN] = {
 		.op_func = (nfsd4op_func)nfsd4_open,
+		.op_name = "OP_OPEN",
 	},
 	[OP_OPEN_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_open_confirm,
+		.op_name = "OP_OPEN_CONFIRM",
 	},
 	[OP_OPEN_DOWNGRADE] = {
 		.op_func = (nfsd4op_func)nfsd4_open_downgrade,
+		.op_name = "OP_OPEN_DOWNGRADE",
 	},
 	[OP_PUTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_PUTFH",
 	},
 	[OP_PUTPUBFH] = {
-		/* unsupported; just for future reference: */
+		/* unsupported, just for future reference: */
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_PUTPUBFH",
 	},
 	[OP_PUTROOTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_PUTROOTFH",
 	},
 	[OP_READ] = {
 		.op_func = (nfsd4op_func)nfsd4_read,
+		.op_name = "OP_READ",
 	},
 	[OP_READDIR] = {
 		.op_func = (nfsd4op_func)nfsd4_readdir,
+		.op_name = "OP_READDIR",
 	},
 	[OP_READLINK] = {
 		.op_func = (nfsd4op_func)nfsd4_readlink,
+		.op_name = "OP_READLINK",
 	},
 	[OP_REMOVE] = {
 		.op_func = (nfsd4op_func)nfsd4_remove,
+		.op_name = "OP_REMOVE",
 	},
 	[OP_RENAME] = {
+		.op_name = "OP_RENAME",
 		.op_func = (nfsd4op_func)nfsd4_rename,
 	},
 	[OP_RENEW] = {
 		.op_func = (nfsd4op_func)nfsd4_renew,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_RENEW",
 	},
 	[OP_RESTOREFH] = {
 		.op_func = (nfsd4op_func)nfsd4_restorefh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_RESTOREFH",
 	},
 	[OP_SAVEFH] = {
 		.op_func = (nfsd4op_func)nfsd4_savefh,
+		.op_name = "OP_SAVEFH",
 	},
 	[OP_SECINFO] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo,
+		.op_name = "OP_SECINFO",
 	},
 	[OP_SETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_setattr,
+		.op_name = "OP_SETATTR",
 	},
 	[OP_SETCLIENTID] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_SETCLIENTID",
 	},
 	[OP_SETCLIENTID_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_SETCLIENTID_CONFIRM",
 	},
 	[OP_VERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_verify,
+		.op_name = "OP_VERIFY",
 	},
 	[OP_WRITE] = {
 		.op_func = (nfsd4op_func)nfsd4_write,
+		.op_name = "OP_WRITE",
 	},
 	[OP_RELEASE_LOCKOWNER] = {
 		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_name = "OP_RELEASE_LOCKOWNER",
 	},
 };

+static inline char *
+nfsd4_op_name(unsigned opnum)
+{
+	if (opnum < ARRAY_SIZE(nfsd4_ops))
+		return nfsd4_ops[opnum].op_name;
+	return "unknown_operation";
+}
+
 #define nfs4svc_decode_voidargs		NULL
 #define nfs4svc_release_void		NULL
 #define nfsd4_voidres			nfsd4_voidargs

--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1173,6 +1173,24 @@ static inline int deny_valid(u32 x)
 	return x <= NFS4_SHARE_DENY_BOTH;
 }

+/*
+ * We store the NONE, READ, WRITE, and BOTH bits separately in the
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used.  This allows us
+ * to enforce the recommendation of rfc 3530 14.2.19 that the server
+ * return an error if the client attempt to downgrade to a combination
+ * of share bits not explicable by closing some of its previous opens.
+ *
+ * XXX: This enforcement is actually incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ *	OPEN allow read, deny write
+ *	OPEN allow both, deny none
+ *	DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
+ */
 static void
 set_access(unsigned int *access, unsigned long bmap) {
 	int i;
@@ -1570,6 +1588,10 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
 		int err = get_write_access(inode);
 		if (err)
 			return nfserrno(err);
+		err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
+		if (err)
+			return nfserrno(err);
+		file_take_write(filp);
 	}
 	status = nfsd4_truncate(rqstp, cur_fh, open);
 	if (status) {
@@ -1579,8 +1601,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
 	}
 	/* remember the open */
 	filp->f_mode |= open->op_share_access;
-	set_bit(open->op_share_access, &stp->st_access_bmap);
-	set_bit(open->op_share_deny, &stp->st_deny_bmap);
+	__set_bit(open->op_share_access, &stp->st_access_bmap);
+	__set_bit(open->op_share_deny, &stp->st_deny_bmap);

 	return nfs_ok;
 }
@@ -1722,9 +1744,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 		/* Stateid was not found, this is a new OPEN */
 		int flags = 0;
 		if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
-			flags |= MAY_READ;
+			flags |= NFSD_MAY_READ;
 		if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-			flags |= MAY_WRITE;
+			flags |= NFSD_MAY_WRITE;
 		status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
 		if (status)
 			goto out;
@@ -2610,7 +2632,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 return nfserr_inval;

 	if ((status = fh_verify(rqstp, &cstate->current_fh,
-				S_IFREG, MAY_LOCK))) {
+				S_IFREG, NFSD_MAY_LOCK))) {
 		dprintk("NFSD: nfsd4_lock: permission denied!\n");
 		return status;
 	}
@@ -3249,12 +3271,14 @@ nfs4_state_shutdown(void)
 	nfs4_unlock_state();
 }

+/*
+ * user_recovery_dirname is protected by the nfsd_mutex since it's only
+ * accessed when nfsd is starting.
+ */
 static void
 nfs4_set_recdir(char *recdir)
 {
-	nfs4_lock_state();
 	strcpy(user_recovery_dirname, recdir);
-	nfs4_unlock_state();
 }

 /*
@@ -3278,6 +3302,12 @@ nfs4_reset_recoverydir(char *recdir)
 	return status;
 }

+char *
+nfs4_recoverydir(void)
+{
+	return user_recovery_dirname;
+}
+
 /*
 * Called when leasetime is changed.
 *
@@ -3286,11 +3316,12 @@ nfs4_reset_recoverydir(char *recdir)
 * we start to register any changes in lease time.  If the administrator
 * really wants to change the lease time *now*, they can go ahead and bring
 * nfsd down and then back up again after changing the lease time.
+ *
+ * user_lease_time is protected by nfsd_mutex since it's only really accessed
+ * when nfsd is starting
 */
 void
 nfs4_reset_lease(time_t leasetime)
 {
-	lock_kernel();
 	user_lease_time = leasetime;
-	unlock_kernel();
 }
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -310,9 +310,12 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)

 static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
 {
-	__be32 server_ip;
-	char *fo_path, c;
+	struct sockaddr_in sin = {
+		.sin_family	= AF_INET,
+	};
 	int b1, b2, b3, b4;
+	char c;
+	char *fo_path;

 	/* sanity check */
 	if (size == 0)
@@ -326,11 +329,13 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
 		return -EINVAL;

 	/* get ipv4 address */
-	if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
+	if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4)
 		return -EINVAL;
-	server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
+	if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
+		return -EINVAL;
+	sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);

-	return nlmsvc_unlock_all_by_ip(server_ip);
+	return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin);
 }

 static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
@@ -450,22 +455,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 	int i;
 	int rv;
 	int len;
-    	int npools = nfsd_nrpools();
+	int npools;
 	int *nthreads;

+	mutex_lock(&nfsd_mutex);
+	npools = nfsd_nrpools();
 	if (npools == 0) {
 		/*
 		 * NFS is shut down.  The admin can start it by
 		 * writing to the threads file but NOT the pool_threads
 		 * file, sorry.  Report zero threads.
 		 */
+		mutex_unlock(&nfsd_mutex);
 		strcpy(buf, "0\n");
 		return strlen(buf);
 	}

 	nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+	rv = -ENOMEM;
 	if (nthreads == NULL)
-		return -ENOMEM;
+		goto out_free;

 	if (size > 0) {
 		for (i = 0; i < npools; i++) {
@@ -496,14 +505,16 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 		mesg += len;
 	}

+	mutex_unlock(&nfsd_mutex);
 	return (mesg-buf);

 out_free:
 	kfree(nthreads);
+	mutex_unlock(&nfsd_mutex);
 	return rv;
 }

-static ssize_t write_versions(struct file *file, char *buf, size_t size)
+static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 {
 	/*
 	 * Format:
@@ -566,14 +577,23 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
 	return len;
 }

-static ssize_t write_ports(struct file *file, char *buf, size_t size)
+static ssize_t write_versions(struct file *file, char *buf, size_t size)
+{
+	ssize_t rv;
+
+	mutex_lock(&nfsd_mutex);
+	rv = __write_versions(file, buf, size);
+	mutex_unlock(&nfsd_mutex);
+	return rv;
+}
+
+static ssize_t __write_ports(struct file *file, char *buf, size_t size)
 {
 	if (size == 0) {
 		int len = 0;
-		lock_kernel();
+
 		if (nfsd_serv)
 			len = svc_xprt_names(nfsd_serv, buf, 0);
-		unlock_kernel();
 		return len;
 	}
 	/* Either a single 'fd' number is written, in which
@@ -603,9 +623,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 			/* Decrease the count, but don't shutdown the
 			 * the service
 			 */
-			lock_kernel();
 			nfsd_serv->sv_nrthreads--;
-			unlock_kernel();
 		}
 		return err < 0 ? err : 0;
 	}
@@ -614,10 +632,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 		int len = 0;
 		if (!toclose)
 			return -ENOMEM;
-		lock_kernel();
 		if (nfsd_serv)
 			len = svc_sock_names(buf, nfsd_serv, toclose);
-		unlock_kernel();
 		if (len >= 0)
 			lockd_down();
 		kfree(toclose);
@@ -655,7 +671,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 		if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
 			if (port == 0)
 				return -EINVAL;
-			lock_kernel();
 			if (nfsd_serv) {
 				xprt = svc_find_xprt(nfsd_serv, transport,
 						     AF_UNSPEC, port);
@@ -666,13 +681,23 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 				} else
 					err = -ENOTCONN;
 			}
-			unlock_kernel();
 			return err < 0 ? err : 0;
 		}
 	}
 	return -EINVAL;
 }

+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+	ssize_t rv;
+
+	mutex_lock(&nfsd_mutex);
+	rv = __write_ports(file, buf, size);
+	mutex_unlock(&nfsd_mutex);
+	return rv;
+}
+
+
 int nfsd_max_blksize;

 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
@@ -691,13 +716,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 		if (bsize > NFSSVC_MAXBLKSIZE)
 			bsize = NFSSVC_MAXBLKSIZE;
 		bsize &= ~(1024-1);
-		lock_kernel();
+		mutex_lock(&nfsd_mutex);
 		if (nfsd_serv && nfsd_serv->sv_nrthreads) {
-			unlock_kernel();
+			mutex_unlock(&nfsd_mutex);
 			return -EBUSY;
 		}
 		nfsd_max_blksize = bsize;
-		unlock_kernel();
+		mutex_unlock(&nfsd_mutex);
 	}
 	return sprintf(buf, "%d\n", nfsd_max_blksize);
 }
@@ -705,16 +730,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 #ifdef CONFIG_NFSD_V4
 extern time_t nfs4_leasetime(void);

-static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
 {
 	/* if size > 10 seconds, call
 	 * nfs4_reset_lease() then write out the new lease (seconds) as reply
 	 */
 	char *mesg = buf;
-	int rv;
+	int rv, lease;

 	if (size > 0) {
-		int lease;
+		if (nfsd_serv)
+			return -EBUSY;
 		rv = get_int(&mesg, &lease);
 		if (rv)
 			return rv;
@@ -726,24 +752,52 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 	return strlen(buf);
 }

-static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+{
+	ssize_t rv;
+
+	mutex_lock(&nfsd_mutex);
+	rv = __write_leasetime(file, buf, size);
+	mutex_unlock(&nfsd_mutex);
+	return rv;
+}
+
+extern char *nfs4_recoverydir(void);
+
+static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
 	char *recdir;
 	int len, status;

-	if (size == 0 || size > PATH_MAX || buf[size-1] != '\n')
-		return -EINVAL;
-	buf[size-1] = 0;
+	if (size > 0) {
+		if (nfsd_serv)
+			return -EBUSY;
+		if (size > PATH_MAX || buf[size-1] != '\n')
+			return -EINVAL;
+		buf[size-1] = 0;

-	recdir = mesg;
-	len = qword_get(&mesg, recdir, size);
-	if (len <= 0)
-		return -EINVAL;
+		recdir = mesg;
+		len = qword_get(&mesg, recdir, size);
+		if (len <= 0)
+			return -EINVAL;

-	status = nfs4_reset_recoverydir(recdir);
+		status = nfs4_reset_recoverydir(recdir);
+	}
+	sprintf(buf, "%s\n", nfs4_recoverydir());
 	return strlen(buf);
 }
+
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+	ssize_t rv;
+
+	mutex_lock(&nfsd_mutex);
+	rv = __write_recoverydir(file, buf, size);
+	mutex_unlock(&nfsd_mutex);
+	return rv;
+}
+
 #endif

 /*----------------------------------------------------------------------------*/

--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -176,9 +176,24 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 	if (IS_ERR(exp))
 		return nfserrno(PTR_ERR(exp));

-	error = nfsd_setuser_and_check_port(rqstp, exp);
-	if (error)
-		goto out;
+	if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
+		/* Elevate privileges so that the lack of 'r' or 'x'
+		 * permission on some parent directory will
+		 * not stop exportfs_decode_fh from being able
+		 * to reconnect a directory into the dentry cache.
+		 * The same problem can affect "SUBTREECHECK" exports,
+		 * but as nfsd_acceptable depends on correct
+		 * access control settings being in effect, we cannot
+		 * fix that case easily.
+		 */
+		current->cap_effective =
+			cap_raise_nfsd_set(current->cap_effective,
+					   current->cap_permitted);
+	} else {
+		error = nfsd_setuser_and_check_port(rqstp, exp);
+		if (error)
+			goto out;
+	}

 	/*
 	 * Look up the dentry using the NFS file handle.
@@ -215,6 +230,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 		goto out;
 	}

+	if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
+		error = nfsd_setuser_and_check_port(rqstp, exp);
+		if (error) {
+			dput(dentry);
+			goto out;
+		}
+	}
+
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 			(dentry->d_flags & DCACHE_DISCONNECTED)) {
 		printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -279,7 +302,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 	if (error)
 		goto out;

-	if (!(access & MAY_LOCK)) {
+	if (!(access & NFSD_MAY_LOCK)) {
 		/*
 		 * pseudoflavor restrictions are not enforced on NLM,
 		 * which clients virtually always use auth_sys for,

--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -65,7 +65,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));

 	fh_copy(&resp->fh, &argp->fh);
-	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
 	return nfsd_return_attrs(nfserr, resp);
 }

@@ -215,11 +215,11 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 		SVCFH_fmt(dirfhp), argp->len, argp->name);

 	/* First verify the parent file handle */
-	nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC);
+	nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
 	if (nfserr)
 		goto done; /* must fh_put dirfhp even on error */

-	/* Check for MAY_WRITE in nfsd_create if necessary */
+	/* Check for NFSD_MAY_WRITE in nfsd_create if necessary */

 	nfserr = nfserr_acces;
 	if (!argp->len)
@@ -281,7 +281,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 					nfserr = nfsd_permission(rqstp,
 								 newfhp->fh_export,
 								 newfhp->fh_dentry,
-								 MAY_WRITE|MAY_LOCAL_ACCESS);
+								 NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
 					if (nfserr && nfserr != nfserr_rofs)
 						goto out_unlock;
 				}
@@ -614,6 +614,7 @@ nfserrno (int errno)
 #endif
 		{ nfserr_stale, -ESTALE },
 		{ nfserr_jukebox, -ETIMEDOUT },
+		{ nfserr_jukebox, -ERESTARTSYS },
 		{ nfserr_dropit, -EAGAIN },
 		{ nfserr_dropit, -ENOMEM },
 		{ nfserr_badname, -ESRCH },

--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
 #include <linux/smp_lock.h>
 #include <linux/freezer.h>
 #include <linux/fs_struct.h>
+#include <linux/kthread.h>

 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/stats.h>
@@ -36,28 +37,38 @@

 #define NFSDDBG_FACILITY	NFSDDBG_SVC

-/* these signals will be delivered to an nfsd thread 
- * when handling a request
- */
-#define ALLOWED_SIGS	(sigmask(SIGKILL))
-/* these signals will be delivered to an nfsd thread
- * when not handling a request. i.e. when waiting
- */
-#define SHUTDOWN_SIGS	(sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
-/* if the last thread dies with SIGHUP, then the exports table is
- * left unchanged ( like 2.4-{0-9} ).  Any other signal will clear
- * the exports table (like 2.2).
- */
-#define	SIG_NOCLEAN	SIGHUP
-
 extern struct svc_program	nfsd_program;
-static void			nfsd(struct svc_rqst *rqstp);
+static int			nfsd(void *vrqstp);
 struct timeval			nfssvc_boot;
-       struct svc_serv 		*nfsd_serv;
 static atomic_t			nfsd_busy;
 static unsigned long		nfsd_last_call;
 static DEFINE_SPINLOCK(nfsd_call_lock);

+/*
+ * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
+ * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
+ * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ *
+ * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
+ * of nfsd threads must exist and each must listed in ->sp_all_threads in each
+ * entry of ->sv_pools[].
+ *
+ * Transitions of the thread count between zero and non-zero are of particular
+ * interest since the svc_serv needs to be created and initialized at that
+ * point, or freed.
+ *
+ * Finally, the nfsd_mutex also protects some of the global variables that are
+ * accessed when nfsd starts and that are settable via the write_* routines in
+ * nfsctl.c. In particular:
+ *
+ *	user_recovery_dirname
+ *	user_lease_time
+ *	nfsd_versions
+ */
+DEFINE_MUTEX(nfsd_mutex);
+struct svc_serv 		*nfsd_serv;
+
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
 static struct svc_version *	nfsd_acl_version[] = {
@@ -145,13 +156,14 @@ int nfsd_vers(int vers, enum vers_op change)

 int nfsd_nrthreads(void)
 {
-	if (nfsd_serv == NULL)
-		return 0;
-	else
-		return nfsd_serv->sv_nrthreads;
+	int rv = 0;
+	mutex_lock(&nfsd_mutex);
+	if (nfsd_serv)
+		rv = nfsd_serv->sv_nrthreads;
+	mutex_unlock(&nfsd_mutex);
+	return rv;
 }

-static int killsig;	/* signal that was used to kill last nfsd */
 static void nfsd_last_thread(struct svc_serv *serv)
 {
 	/* When last nfsd thread exits we need to do some clean-up */
@@ -162,11 +174,9 @@ static void nfsd_last_thread(struct svc_serv *serv)
 	nfsd_racache_shutdown();
 	nfs4_state_shutdown();

-	printk(KERN_WARNING "nfsd: last server has exited\n");
-	if (killsig != SIG_NOCLEAN) {
-		printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
-		nfsd_export_flush();
-	}
+	printk(KERN_WARNING "nfsd: last server has exited, flushing export "
+			    "cache\n");
+	nfsd_export_flush();
 }

 void nfsd_reset_versions(void)
@@ -190,13 +200,14 @@ void nfsd_reset_versions(void)
 	}
 }

+
 int nfsd_create_serv(void)
 {
 	int err = 0;
-	lock_kernel();
+
+	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 	if (nfsd_serv) {
 		svc_get(nfsd_serv);
-		unlock_kernel();
 		return 0;
 	}
 	if (nfsd_max_blksize == 0) {
@@ -217,13 +228,11 @@ int nfsd_create_serv(void)
 	}

 	atomic_set(&nfsd_busy, 0);
-	nfsd_serv = svc_create_pooled(&nfsd_program,
-				      nfsd_max_blksize,
-				      nfsd_last_thread,
-				      nfsd, SIG_NOCLEAN, THIS_MODULE);
+	nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+				      nfsd_last_thread, nfsd, THIS_MODULE);
 	if (nfsd_serv == NULL)
 		err = -ENOMEM;
-	unlock_kernel();
+
 	do_gettimeofday(&nfssvc_boot);		/* record boot time */
 	return err;
 }
@@ -282,6 +291,8 @@ int nfsd_set_nrthreads(int n, int *nthreads)
 	int tot = 0;
 	int err = 0;

+	WARN_ON(!mutex_is_locked(&nfsd_mutex));
+
 	if (nfsd_serv == NULL || n <= 0)
 		return 0;

@@ -316,7 +327,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
 		nthreads[0] = 1;

 	/* apply the new numbers */
-	lock_kernel();
 	svc_get(nfsd_serv);
 	for (i = 0; i < n; i++) {
 		err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
@@ -325,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
 			break;
 	}
 	svc_destroy(nfsd_serv);
-	unlock_kernel();

 	return err;
 }
@@ -334,8 +343,8 @@ int
 nfsd_svc(unsigned short port, int nrservs)
 {
 	int	error;
-	
-	lock_kernel();
+
+	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");
 	error = -EINVAL;
 	if (nrservs <= 0)
@@ -363,7 +372,7 @@ nfsd_svc(unsigned short port, int nrservs)
 failure:
 	svc_destroy(nfsd_serv);		/* Release server */
 out:
-	unlock_kernel();
+	mutex_unlock(&nfsd_mutex);
 	return error;
 }

@@ -391,18 +400,17 @@ update_thread_usage(int busy_threads)
 /*
 * This is the NFS server kernel thread
 */
-static void
-nfsd(struct svc_rqst *rqstp)
+static int
+nfsd(void *vrqstp)
 {
+	struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
 	struct fs_struct *fsp;
-	int		err;
-	sigset_t shutdown_mask, allowed_mask;
+	int err, preverr = 0;

 	/* Lock module and set up kernel thread */
-	lock_kernel();
-	daemonize("nfsd");
+	mutex_lock(&nfsd_mutex);

-	/* After daemonize() this kernel thread shares current->fs
+	/* At this point, the thread shares current->fs
 	 * with the init process. We need to create files with a
 	 * umask of 0 instead of init's umask. */
 	fsp = copy_fs_struct(current->fs);
@@ -414,14 +422,17 @@ nfsd(struct svc_rqst *rqstp)
 	current->fs = fsp;
 	current->fs->umask = 0;

-	siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
-	siginitsetinv(&allowed_mask, ALLOWED_SIGS);
+	/*
+	 * thread is spawned with all signals set to SIG_IGN, re-enable
+	 * the ones that will bring down the thread
+	 */
+	allow_signal(SIGKILL);
+	allow_signal(SIGHUP);
+	allow_signal(SIGINT);
+	allow_signal(SIGQUIT);

 	nfsdstats.th_cnt++;
-
-	rqstp->rq_task = current;
-
-	unlock_kernel();
+	mutex_unlock(&nfsd_mutex);

 	/*
 	 * We want less throttling in balance_dirty_pages() so that nfs to
@@ -435,26 +446,30 @@ nfsd(struct svc_rqst *rqstp)
 	 * The main request loop
 	 */
 	for (;;) {
-		/* Block all but the shutdown signals */
-		sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
-
 		/*
 		 * Find a socket with data available and call its
 		 * recvfrom routine.
 		 */
 		while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
 			;
-		if (err < 0)
+		if (err == -EINTR)
 			break;
+		else if (err < 0) {
+			if (err != preverr) {
+				printk(KERN_WARNING "%s: unexpected error "
+					"from svc_recv (%d)\n", __func__, -err);
+				preverr = err;
+			}
+			schedule_timeout_uninterruptible(HZ);
+			continue;
+		}
+
 		update_thread_usage(atomic_read(&nfsd_busy));
 		atomic_inc(&nfsd_busy);

 		/* Lock the export hash tables for reading. */
 		exp_readlock();

-		/* Process request with signals blocked.  */
-		sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
-
 		svc_process(rqstp);

 		/* Unlock export hash tables */
@@ -463,22 +478,10 @@ nfsd(struct svc_rqst *rqstp)
 		atomic_dec(&nfsd_busy);
 	}

-	if (err != -EINTR) {
-		printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
-	} else {
-		unsigned int	signo;
-
-		for (signo = 1; signo <= _NSIG; signo++)
-			if (sigismember(&current->pending.signal, signo) &&
-			    !sigismember(&current->blocked, signo))
-				break;
-		killsig = signo;
-	}
 	/* Clear signals before calling svc_exit_thread() */
 	flush_signals(current);

-	lock_kernel();
-
+	mutex_lock(&nfsd_mutex);
 	nfsdstats.th_cnt --;

 out:
@@ -486,8 +489,9 @@ out:
 	svc_exit_thread(rqstp);

 	/* Release module */
-	unlock_kernel();
+	mutex_unlock(&nfsd_mutex);
 	module_put_and_exit(0);
+	return 0;
 }

 static __be32 map_new_errors(u32 vers, __be32 nfserr)

--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -918,12 +918,12 @@ struct file_lock {
 	struct list_head fl_link;	/* doubly linked list of all locks */
 	struct list_head fl_block;	/* circular list of blocked processes */
 	fl_owner_t fl_owner;
+	unsigned char fl_flags;
+	unsigned char fl_type;
 	unsigned int fl_pid;
 	struct pid *fl_nspid;
 	wait_queue_head_t fl_wait;
 	struct file *fl_file;
-	unsigned char fl_flags;
-	unsigned char fl_type;
 	loff_t fl_start;
 	loff_t fl_end;


--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -200,10 +200,12 @@ typedef int	  (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
 * Server-side lock handling
 */
 __be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
-					struct nlm_lock *, int, struct nlm_cookie *);
+			      struct nlm_host *, struct nlm_lock *, int,
+			      struct nlm_cookie *);
 __be32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
 __be32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
-			struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *);
+			struct nlm_host *, struct nlm_lock *,
+			struct nlm_lock *, struct nlm_cookie *);
 __be32		  nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
 unsigned long	  nlmsvc_retry_blocked(void);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
@@ -224,7 +226,7 @@ void		  nlmsvc_invalidate_all(void);
 * Cluster failover support
 */
 int           nlmsvc_unlock_all_by_sb(struct super_block *sb);
-int           nlmsvc_unlock_all_by_ip(__be32 server_addr);
+int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);

 static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 {

--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -65,9 +65,6 @@
 #define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG   0x00000010
 #define NFS4_ACE_FAILED_ACCESS_ACE_FLAG       0x00000020
 #define NFS4_ACE_IDENTIFIER_GROUP             0x00000040
-#define NFS4_ACE_OWNER                        0x00000080
-#define NFS4_ACE_GROUP                        0x00000100
-#define NFS4_ACE_EVERYONE                     0x00000200

 #define NFS4_ACE_READ_DATA                    0x00000001
 #define NFS4_ACE_LIST_DIRECTORY               0x00000001

--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -28,20 +28,20 @@
 #define NFSD_SUPPORTED_MINOR_VERSION	0

 /*
- * Special flags for nfsd_permission. These must be different from MAY_READ,
- * MAY_WRITE, and MAY_EXEC.
+ * Flags for nfsd_permission
 */
-#define MAY_NOP			0
-#define MAY_SATTR		8
-#define MAY_TRUNC		16
-#define MAY_LOCK		32
-#define MAY_OWNER_OVERRIDE	64
-#define	MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
-#if (MAY_SATTR | MAY_TRUNC | MAY_LOCK | MAY_OWNER_OVERRIDE | MAY_LOCAL_ACCESS) & (MAY_READ | MAY_WRITE | MAY_EXEC)
-# error "please use a different value for MAY_SATTR or MAY_TRUNC or MAY_LOCK or MAY_LOCAL_ACCESS or MAY_OWNER_OVERRIDE."
-#endif
-#define MAY_CREATE		(MAY_EXEC|MAY_WRITE)
-#define MAY_REMOVE		(MAY_EXEC|MAY_WRITE|MAY_TRUNC)
+#define NFSD_MAY_NOP		0
+#define NFSD_MAY_EXEC		1 /* == MAY_EXEC */
+#define NFSD_MAY_WRITE		2 /* == MAY_WRITE */
+#define NFSD_MAY_READ		4 /* == MAY_READ */
+#define NFSD_MAY_SATTR		8
+#define NFSD_MAY_TRUNC		16
+#define NFSD_MAY_LOCK		32
+#define NFSD_MAY_OWNER_OVERRIDE	64
+#define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
+
+#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
+#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)

 /*
 * Callback function for readdir
@@ -54,6 +54,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
 extern struct svc_program	nfsd_program;
 extern struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
+extern struct mutex		nfsd_mutex;
 extern struct svc_serv		*nfsd_serv;

 extern struct seq_operations nfs_exports_op;

--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -98,8 +98,6 @@ struct nfs4_callback {
 	u32                     cb_ident;
 	/* RPC client info */
 	atomic_t		cb_set;     /* successful CB_NULL call */
-	struct rpc_program      cb_program;
-	struct rpc_stat         cb_stat;
 	struct rpc_clnt *       cb_client;
 };


--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -51,6 +51,9 @@ struct krb5_ctx {

 extern spinlock_t krb5_seq_lock;

+/* The length of the Kerberos GSS token header */
+#define GSS_KRB5_TOK_HDR_LEN	(16)
+
 #define KG_TOK_MIC_MSG    0x0101
 #define KG_TOK_WRAP_MSG   0x0201


--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -22,7 +22,7 @@
 /*
 * This is the RPC server thread function prototype
 */
-typedef void		(*svc_thread_fn)(struct svc_rqst *);
+typedef int		(*svc_thread_fn)(void *);

 /*
 *
@@ -80,7 +80,6 @@ struct svc_serv {
 	struct module *		sv_module;	/* optional module to count when
 						 * adding threads */
 	svc_thread_fn		sv_function;	/* main function for threads */
-	int			sv_kill_signal;	/* signal to kill threads */
 };

 /*
@@ -388,8 +387,8 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
 					struct svc_pool *pool);
 void		   svc_exit_thread(struct svc_rqst *);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-			void (*shutdown)(struct svc_serv*),
-			svc_thread_fn, int sig, struct module *);
+			void (*shutdown)(struct svc_serv*), svc_thread_fn,
+			struct module *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 void		   svc_destroy(struct svc_serv *);
 int		   svc_process(struct svc_rqst *);

--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -72,7 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
 */
 struct svc_rdma_op_ctxt {
 	struct svc_rdma_op_ctxt *read_hdr;
-	struct list_head free_list;
+	int hdr_count;
 	struct xdr_buf arg;
 	struct list_head dto_q;
 	enum ib_wr_opcode wr_op;
@@ -86,6 +86,31 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };

+/*
+ * NFS_ requests are mapped on the client side by the chunk lists in
+ * the RPCRDMA header. During the fetching of the RPC from the client
+ * and the writing of the reply to the client, the memory in the
+ * client and the memory in the server must be mapped as contiguous
+ * vaddr/len for access by the hardware. These data strucures keep
+ * these mappings.
+ *
+ * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
+ * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
+ * 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
+ * mapping of the reply.
+ */
+struct svc_rdma_chunk_sge {
+	int start;		/* sge no for this chunk */
+	int count;		/* sge count for this chunk */
+};
+struct svc_rdma_req_map {
+	unsigned long count;
+	union {
+		struct kvec sge[RPCSVC_MAXPAGES];
+		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
+	};
+};
+
 #define RDMACTXT_F_LAST_CTXT	2

 struct svcxprt_rdma {
@@ -93,7 +118,6 @@ struct svcxprt_rdma {
 	struct rdma_cm_id    *sc_cm_id;		/* RDMA connection id */
 	struct list_head     sc_accept_q;	/* Conn. waiting accept */
 	int		     sc_ord;		/* RDMA read limit */
-	wait_queue_head_t    sc_read_wait;
 	int                  sc_max_sge;

 	int                  sc_sq_depth;	/* Depth of SQ */
@@ -104,12 +128,8 @@ struct svcxprt_rdma {

 	struct ib_pd         *sc_pd;

+	atomic_t	     sc_dma_used;
 	atomic_t	     sc_ctxt_used;
-	struct list_head     sc_ctxt_free;
-	int		     sc_ctxt_cnt;
-	int		     sc_ctxt_bump;
-	int		     sc_ctxt_max;
-	spinlock_t	     sc_ctxt_lock;
 	struct list_head     sc_rq_dto_q;
 	spinlock_t	     sc_rq_dto_lock;
 	struct ib_qp         *sc_qp;
@@ -173,6 +193,8 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
+extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
 extern struct svc_xprt_class svc_rdma_class;

--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,12 +5,12 @@
 obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o

 auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
-	gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o
+	gss_mech_switch.o svcauth_gss.o

 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o

 rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
-	gss_krb5_seqnum.o gss_krb5_wrap.o
+	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o

 obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o


--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -83,8 +83,6 @@ out:
 	return ret;
 }

-EXPORT_SYMBOL(krb5_encrypt);
-
 u32
 krb5_decrypt(
     struct crypto_blkcipher *tfm,
@@ -118,8 +116,6 @@ out:
 	return ret;
 }

-EXPORT_SYMBOL(krb5_decrypt);
-
 static int
 checksummer(struct scatterlist *sg, void *data)
 {
@@ -161,8 +157,6 @@ out:
 	return err ? GSS_S_FAILURE : 0;
 }

-EXPORT_SYMBOL(make_checksum);
-
 struct encryptor_desc {
 	u8 iv[8]; /* XXX hard-coded blocksize */
 	struct blkcipher_desc desc;
@@ -262,8 +256,6 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
 	return ret;
 }

-EXPORT_SYMBOL(gss_encrypt_xdr_buf);
-
 struct decryptor_desc {
 	u8 iv[8]; /* XXX hard-coded blocksize */
 	struct blkcipher_desc desc;
@@ -334,5 +326,3 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,

 	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
 }
-
-EXPORT_SYMBOL(gss_decrypt_xdr_buf);
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -78,7 +78,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
 	char			cksumdata[16];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
-	unsigned char		*ptr, *krb5_hdr, *msg_start;
+	unsigned char		*ptr, *msg_start;
 	s32			now;
 	u32			seq_send;

@@ -87,36 +87,36 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,

 	now = get_seconds();

-	token->len = g_token_size(&ctx->mech_used, 24);
+	token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);

 	ptr = token->data;
-	g_make_token_header(&ctx->mech_used, 24, &ptr);
+	g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);

-	*ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff);
-	*ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff);
+	/* ptr now at header described in rfc 1964, section 1.2.1: */
+	ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
+	ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);

-	/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
-	krb5_hdr = ptr - 2;
-	msg_start = krb5_hdr + 24;
+	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;

-	*(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
-	memset(krb5_hdr + 4, 0xff, 4);
+	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+	memset(ptr + 4, 0xff, 4);

-	if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum))
+	if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
 		return GSS_S_FAILURE;

 	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
 			  md5cksum.data, md5cksum.len))
 		return GSS_S_FAILURE;

-	memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8);
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);

 	spin_lock(&krb5_seq_lock);
 	seq_send = ctx->seq_send++;
 	spin_unlock(&krb5_seq_lock);

 	if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
-			      seq_send, krb5_hdr + 16, krb5_hdr + 8))
+			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
+			      ptr + 8))
 		return GSS_S_FAILURE;

 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;

--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -92,30 +92,30 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 					read_token->len))
 		return GSS_S_DEFECTIVE_TOKEN;

-	if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
-	    (*ptr++ != ( KG_TOK_MIC_MSG    &0xff))   )
+	if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
+	    (ptr[1] !=  (KG_TOK_MIC_MSG & 0xff)))
 		return GSS_S_DEFECTIVE_TOKEN;

 	/* XXX sanity-check bodysize?? */

-	signalg = ptr[0] + (ptr[1] << 8);
+	signalg = ptr[2] + (ptr[3] << 8);
 	if (signalg != SGN_ALG_DES_MAC_MD5)
 		return GSS_S_DEFECTIVE_TOKEN;

-	sealalg = ptr[2] + (ptr[3] << 8);
+	sealalg = ptr[4] + (ptr[5] << 8);
 	if (sealalg != SEAL_ALG_NONE)
 		return GSS_S_DEFECTIVE_TOKEN;

-	if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;

-	if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum))
+	if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
 		return GSS_S_FAILURE;

 	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
 		return GSS_S_FAILURE;

-	if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
 		return GSS_S_BAD_SIG;

 	/* it got through unscathed.  Make sure the context is unexpired */
@@ -127,7 +127,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,

 	/* do sequencing checks */

-	if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum))
+	if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
 		return GSS_S_FAILURE;

 	if ((ctx->initiate && direction != 0xff) ||

--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -87,8 +87,8 @@ out:
 	return 0;
 }

-static inline void
-make_confounder(char *p, int blocksize)
+static void
+make_confounder(char *p, u32 conflen)
 {
 	static u64 i = 0;
 	u64 *q = (u64 *)p;
@@ -102,8 +102,22 @@ make_confounder(char *p, int blocksize)
 	 * uniqueness would mean worrying about atomicity and rollover, and I
 	 * don't care enough. */

-	BUG_ON(blocksize != 8);
-	*q = i++;
+	/* initialize to random value */
+	if (i == 0) {
+		i = random32();
+		i = (i << 32) | random32();
+	}
+
+	switch (conflen) {
+	case 16:
+		*q++ = i++;
+		/* fall through */
+	case 8:
+		*q++ = i++;
+		break;
+	default:
+		BUG();
+	}
 }

 /* Assumptions: the head and tail of inbuf are ours to play with.
@@ -122,7 +136,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	char			cksumdata[16];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
 	int			blocksize = 0, plainlen;
-	unsigned char		*ptr, *krb5_hdr, *msg_start;
+	unsigned char		*ptr, *msg_start;
 	s32			now;
 	int			headlen;
 	struct page		**tmp_pages;
@@ -149,26 +163,26 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	buf->len += headlen;
 	BUG_ON((buf->len - offset - headlen) % blocksize);

-	g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr);
+	g_make_token_header(&kctx->mech_used,
+				GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);


-	*ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff);
-	*ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff);
+	/* ptr now at header described in rfc 1964, section 1.2.1: */
+	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
+	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);

-	/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
-	krb5_hdr = ptr - 2;
-	msg_start = krb5_hdr + 24;
+	msg_start = ptr + 24;

-	*(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
-	memset(krb5_hdr + 4, 0xff, 4);
-	*(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES);
+	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+	memset(ptr + 4, 0xff, 4);
+	*(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);

 	make_confounder(msg_start, blocksize);

 	/* XXXJBF: UGH!: */
 	tmp_pages = buf->pages;
 	buf->pages = pages;
-	if (make_checksum("md5", krb5_hdr, 8, buf,
+	if (make_checksum("md5", ptr, 8, buf,
 				offset + headlen - blocksize, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
@@ -176,7 +190,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
 			  md5cksum.data, md5cksum.len))
 		return GSS_S_FAILURE;
-	memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8);
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);

 	spin_lock(&krb5_seq_lock);
 	seq_send = kctx->seq_send++;
@@ -185,7 +199,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	/* XXX would probably be more efficient to compute checksum
 	 * and encrypt at the same time: */
 	if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
-			       seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
 		return GSS_S_FAILURE;

 	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
@@ -219,38 +233,38 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 					buf->len - offset))
 		return GSS_S_DEFECTIVE_TOKEN;

-	if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
-	    (*ptr++ !=  (KG_TOK_WRAP_MSG    &0xff))   )
+	if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
+	    (ptr[1] !=  (KG_TOK_WRAP_MSG & 0xff)))
 		return GSS_S_DEFECTIVE_TOKEN;

 	/* XXX sanity-check bodysize?? */

 	/* get the sign and seal algorithms */

-	signalg = ptr[0] + (ptr[1] << 8);
+	signalg = ptr[2] + (ptr[3] << 8);
 	if (signalg != SGN_ALG_DES_MAC_MD5)
 		return GSS_S_DEFECTIVE_TOKEN;

-	sealalg = ptr[2] + (ptr[3] << 8);
+	sealalg = ptr[4] + (ptr[5] << 8);
 	if (sealalg != SEAL_ALG_DES)
 		return GSS_S_DEFECTIVE_TOKEN;

-	if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;

 	if (gss_decrypt_xdr_buf(kctx->enc, buf,
-			ptr + 22 - (unsigned char *)buf->head[0].iov_base))
+			ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
 		return GSS_S_DEFECTIVE_TOKEN;

-	if (make_checksum("md5", ptr - 2, 8, buf,
-		 ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+	if (make_checksum("md5", ptr, 8, buf,
+		 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
 		return GSS_S_FAILURE;

 	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
 			   md5cksum.data, md5cksum.len))
 		return GSS_S_FAILURE;

-	if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
 		return GSS_S_BAD_SIG;

 	/* it got through unscathed.  Make sure the context is unexpired */
@@ -262,8 +276,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)

 	/* do sequencing checks */

-	if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
-				    &seqnum))
+	if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+				    &direction, &seqnum))
 		return GSS_S_BAD_SIG;

 	if ((kctx->initiate && direction != 0xff) ||
@@ -274,7 +288,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 	 * better to copy and encrypt at the same time. */

 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
-	data_start = ptr + 22 + blocksize;
+	data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
 	orig_start = buf->head[0].iov_base + offset;
 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
 	memmove(orig_start, data_start, data_len);

--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/kthread.h>

 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>
@@ -291,15 +292,14 @@ svc_pool_map_put(void)


 /*
- * Set the current thread's cpus_allowed mask so that it
+ * Set the given thread's cpus_allowed mask so that it
 * will only run on cpus in the given pool.
- *
- * Returns 1 and fills in oldmask iff a cpumask was applied.
 */
-static inline int
-svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+static inline void
+svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
 {
 	struct svc_pool_map *m = &svc_pool_map;
+	unsigned int node = m->pool_to[pidx];

 	/*
 	 * The caller checks for sv_nrpools > 1, which
@@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
 	 */
 	BUG_ON(m->count == 0);

-	switch (m->mode)
-	{
-	default:
-		return 0;
+	switch (m->mode) {
 	case SVC_POOL_PERCPU:
 	{
-		unsigned int cpu = m->pool_to[pidx];
-
-		*oldmask = current->cpus_allowed;
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-		return 1;
+		set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
+		break;
 	}
 	case SVC_POOL_PERNODE:
 	{
-		unsigned int node = m->pool_to[pidx];
 		node_to_cpumask_ptr(nodecpumask, node);
-
-		*oldmask = current->cpus_allowed;
-		set_cpus_allowed_ptr(current, nodecpumask);
-		return 1;
+		set_cpus_allowed_ptr(task, nodecpumask);
+		break;
 	}
 	}
 }
@@ -443,7 +434,7 @@ EXPORT_SYMBOL(svc_create);
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 		void (*shutdown)(struct svc_serv *serv),
-		  svc_thread_fn func, int sig, struct module *mod)
+		  svc_thread_fn func, struct module *mod)
 {
 	struct svc_serv *serv;
 	unsigned int npools = svc_pool_map_get();
@@ -452,7 +443,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,

 	if (serv != NULL) {
 		serv->sv_function = func;
-		serv->sv_kill_signal = sig;
 		serv->sv_module = mod;
 	}

@@ -461,7 +451,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 EXPORT_SYMBOL(svc_create_pooled);

 /*
- * Destroy an RPC service.  Should be called with the BKL held
+ * Destroy an RPC service. Should be called with appropriate locking to
+ * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
 */
 void
 svc_destroy(struct svc_serv *serv)
@@ -577,46 +568,6 @@ out_enomem:
 }
 EXPORT_SYMBOL(svc_prepare_thread);

-/*
- * Create a thread in the given pool.  Caller must hold BKL.
- * On a NUMA or SMP machine, with a multi-pool serv, the thread
- * will be restricted to run on the cpus belonging to the pool.
- */
-static int
-__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
-		    struct svc_pool *pool)
-{
-	struct svc_rqst	*rqstp;
-	int		error = -ENOMEM;
-	int		have_oldmask = 0;
-	cpumask_t	uninitialized_var(oldmask);
-
-	rqstp = svc_prepare_thread(serv, pool);
-	if (IS_ERR(rqstp)) {
-		error = PTR_ERR(rqstp);
-		goto out;
-	}
-
-	if (serv->sv_nrpools > 1)
-		have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
-
-	error = kernel_thread((int (*)(void *)) func, rqstp, 0);
-
-	if (have_oldmask)
-		set_cpus_allowed(current, oldmask);
-
-	if (error < 0)
-		goto out_thread;
-	svc_sock_update_bufs(serv);
-	error = 0;
-out:
-	return error;
-
-out_thread:
-	svc_exit_thread(rqstp);
-	goto out;
-}
-
 /*
 * Choose a pool in which to create a new thread, for svc_set_num_threads
 */
@@ -674,7 +625,7 @@ found_pool:
 * of threads the given number.  If `pool' is non-NULL, applies
 * only to threads in that pool, otherwise round-robins between
 * all pools.  Must be called with a svc_get() reference and
- * the BKL held.
+ * the BKL or another lock to protect access to svc_serv fields.
 *
 * Destroying threads relies on the service threads filling in
 * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
@@ -686,7 +637,9 @@ found_pool:
 int
 svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
-	struct task_struct *victim;
+	struct svc_rqst	*rqstp;
+	struct task_struct *task;
+	struct svc_pool *chosen_pool;
 	int error = 0;
 	unsigned int state = serv->sv_nrthreads-1;

@@ -702,18 +655,34 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	/* create new threads */
 	while (nrservs > 0) {
 		nrservs--;
+		chosen_pool = choose_pool(serv, pool, &state);
+
+		rqstp = svc_prepare_thread(serv, chosen_pool);
+		if (IS_ERR(rqstp)) {
+			error = PTR_ERR(rqstp);
+			break;
+		}
+
 		__module_get(serv->sv_module);
-		error = __svc_create_thread(serv->sv_function, serv,
-					    choose_pool(serv, pool, &state));
-		if (error < 0) {
+		task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
+		if (IS_ERR(task)) {
+			error = PTR_ERR(task);
 			module_put(serv->sv_module);
+			svc_exit_thread(rqstp);
 			break;
 		}
+
+		rqstp->rq_task = task;
+		if (serv->sv_nrpools > 1)
+			svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
+
+		svc_sock_update_bufs(serv);
+		wake_up_process(task);
 	}
 	/* destroy old threads */
 	while (nrservs < 0 &&
-	       (victim = choose_victim(serv, pool, &state)) != NULL) {
-		send_sig(serv->sv_kill_signal, victim, 1);
+	       (task = choose_victim(serv, pool, &state)) != NULL) {
+		send_sig(SIGINT, task, 1);
 		nrservs++;
 	}

@@ -722,7 +691,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 EXPORT_SYMBOL(svc_set_num_threads);

 /*
- * Called from a server thread as it's exiting.  Caller must hold BKL.
+ * Called from a server thread as it's exiting. Caller must hold the BKL or
+ * the "service mutex", whichever is appropriate for the service.
 */
 void
 svc_exit_thread(struct svc_rqst *rqstp)

--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -69,6 +69,10 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;

+/* Temporary NFS request map and context caches */
+struct kmem_cache *svc_rdma_map_cachep;
+struct kmem_cache *svc_rdma_ctxt_cachep;
+
 /*
 * This function implements reading and resetting an atomic_t stat
 * variable through read/write to a proc file. Any write to the file
@@ -236,11 +240,14 @@ static ctl_table svcrdma_root_table[] = {
 void svc_rdma_cleanup(void)
 {
 	dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
+	flush_scheduled_work();
 	if (svcrdma_table_header) {
 		unregister_sysctl_table(svcrdma_table_header);
 		svcrdma_table_header = NULL;
 	}
 	svc_unreg_xprt_class(&svc_rdma_class);
+	kmem_cache_destroy(svc_rdma_map_cachep);
+	kmem_cache_destroy(svc_rdma_ctxt_cachep);
 }

 int svc_rdma_init(void)
@@ -255,9 +262,37 @@ int svc_rdma_init(void)
 		svcrdma_table_header =
 			register_sysctl_table(svcrdma_root_table);

+	/* Create the temporary map cache */
+	svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
+						sizeof(struct svc_rdma_req_map),
+						0,
+						SLAB_HWCACHE_ALIGN,
+						NULL);
+	if (!svc_rdma_map_cachep) {
+		printk(KERN_INFO "Could not allocate map cache.\n");
+		goto err0;
+	}
+
+	/* Create the temporary context cache */
+	svc_rdma_ctxt_cachep =
+		kmem_cache_create("svc_rdma_ctxt_cache",
+				  sizeof(struct svc_rdma_op_ctxt),
+				  0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
+	if (!svc_rdma_ctxt_cachep) {
+		printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
+		goto err1;
+	}
+
 	/* Register RDMA with the SVC transport switch */
 	svc_reg_xprt_class(&svc_rdma_class);
 	return 0;
+ err1:
+	kmem_cache_destroy(svc_rdma_map_cachep);
+ err0:
+	unregister_sysctl_table(svcrdma_table_header);
+	return -ENOMEM;
 }
 MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
 MODULE_DESCRIPTION("SVC RDMA Transport");

--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -112,11 +112,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }

-struct chunk_sge {
-	int start;		/* sge no for this chunk */
-	int count;		/* sge count for this chunk */
-};
-
 /* Encode a read-chunk-list as an array of IB SGE
 *
 * Assumptions:
@@ -134,8 +129,8 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 			   struct svc_rqst *rqstp,
 			   struct svc_rdma_op_ctxt *head,
 			   struct rpcrdma_msg *rmsgp,
-			   struct ib_sge *sge,
-			   struct chunk_sge *ch_sge_ary,
+			   struct svc_rdma_req_map *rpl_map,
+			   struct svc_rdma_req_map *chl_map,
 			   int ch_count,
 			   int byte_count)
 {
@@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 	head->arg.head[0] = rqstp->rq_arg.head[0];
 	head->arg.tail[0] = rqstp->rq_arg.tail[0];
 	head->arg.pages = &head->pages[head->count];
-	head->sge[0].length = head->count; /* save count of hdr pages */
+	head->hdr_count = head->count; /* save count of hdr pages */
 	head->arg.page_base = 0;
 	head->arg.page_len = ch_bytes;
 	head->arg.len = rqstp->rq_arg.len + ch_bytes;
 	head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
 	head->count++;
-	ch_sge_ary[0].start = 0;
+	chl_map->ch[0].start = 0;
 	while (byte_count) {
+		rpl_map->sge[sge_no].iov_base =
+			page_address(rqstp->rq_arg.pages[page_no]) + page_off;
 		sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
-		sge[sge_no].addr =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					rqstp->rq_arg.pages[page_no],
-					page_off, sge_bytes,
-					DMA_FROM_DEVICE);
-		sge[sge_no].length = sge_bytes;
-		sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+		rpl_map->sge[sge_no].iov_len = sge_bytes;
 		/*
 		 * Don't bump head->count here because the same page
 		 * may be used by multiple SGE.
@@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 		 * SGE, move to the next SGE
 		 */
 		if (ch_bytes == 0) {
-			ch_sge_ary[ch_no].count =
-				sge_no - ch_sge_ary[ch_no].start;
+			chl_map->ch[ch_no].count =
+				sge_no - chl_map->ch[ch_no].start;
 			ch_no++;
 			ch++;
-			ch_sge_ary[ch_no].start = sge_no;
+			chl_map->ch[ch_no].start = sge_no;
 			ch_bytes = ch->rc_target.rs_length;
 			/* If bytes remaining account for next chunk */
 			if (byte_count) {
@@ -220,18 +211,25 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 	return sge_no;
 }

-static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt,
-			      struct ib_sge *sge,
+static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
+			      struct svc_rdma_op_ctxt *ctxt,
+			      struct kvec *vec,
 			      u64 *sgl_offset,
 			      int count)
 {
 	int i;

 	ctxt->count = count;
+	ctxt->direction = DMA_FROM_DEVICE;
 	for (i = 0; i < count; i++) {
-		ctxt->sge[i].addr = sge[i].addr;
-		ctxt->sge[i].length = sge[i].length;
-		*sgl_offset = *sgl_offset + sge[i].length;
+		atomic_inc(&xprt->sc_dma_used);
+		ctxt->sge[i].addr =
+			ib_dma_map_single(xprt->sc_cm_id->device,
+					  vec[i].iov_base, vec[i].iov_len,
+					  DMA_FROM_DEVICE);
+		ctxt->sge[i].length = vec[i].iov_len;
+		ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
+		*sgl_offset = *sgl_offset + vec[i].iov_len;
 	}
 }

@@ -282,34 +280,29 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	struct ib_send_wr read_wr;
 	int err = 0;
 	int ch_no;
-	struct ib_sge *sge;
 	int ch_count;
 	int byte_count;
 	int sge_count;
 	u64 sgl_offset;
 	struct rpcrdma_read_chunk *ch;
 	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct svc_rdma_op_ctxt *tmp_sge_ctxt;
-	struct svc_rdma_op_ctxt *tmp_ch_ctxt;
-	struct chunk_sge *ch_sge_ary;
+	struct svc_rdma_req_map *rpl_map;
+	struct svc_rdma_req_map *chl_map;

 	/* If no read list is present, return 0 */
 	ch = svc_rdma_get_read_chunk(rmsgp);
 	if (!ch)
 		return 0;

-	/* Allocate temporary contexts to keep SGE */
-	BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge));
-	tmp_sge_ctxt = svc_rdma_get_context(xprt);
-	sge = tmp_sge_ctxt->sge;
-	tmp_ch_ctxt = svc_rdma_get_context(xprt);
-	ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
+	/* Allocate temporary reply and chunk maps */
+	rpl_map = svc_rdma_get_req_map();
+	chl_map = svc_rdma_get_req_map();

 	svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
 	if (ch_count > RPCSVC_MAXPAGES)
 		return -EINVAL;
 	sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
-				    sge, ch_sge_ary,
+				    rpl_map, chl_map,
 				    ch_count, byte_count);
 	sgl_offset = 0;
 	ch_no = 0;
@@ -331,14 +324,15 @@ next_sge:
 		read_wr.wr.rdma.remote_addr =
 			get_unaligned(&(ch->rc_target.rs_offset)) +
 			sgl_offset;
-		read_wr.sg_list = &sge[ch_sge_ary[ch_no].start];
+		read_wr.sg_list = ctxt->sge;
 		read_wr.num_sge =
-			rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count);
-		rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
+			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
+		rdma_set_ctxt_sge(xprt, ctxt,
+				  &rpl_map->sge[chl_map->ch[ch_no].start],
 				  &sgl_offset,
 				  read_wr.num_sge);
 		if (((ch+1)->rc_discrim == 0) &&
-		    (read_wr.num_sge == ch_sge_ary[ch_no].count)) {
+		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
 			/*
 			 * Mark the last RDMA_READ with a bit to
 			 * indicate all RPC data has been fetched from
@@ -358,9 +352,9 @@ next_sge:
 		}
 		atomic_inc(&rdma_stat_read);

-		if (read_wr.num_sge < ch_sge_ary[ch_no].count) {
-			ch_sge_ary[ch_no].count -= read_wr.num_sge;
-			ch_sge_ary[ch_no].start += read_wr.num_sge;
+		if (read_wr.num_sge < chl_map->ch[ch_no].count) {
+			chl_map->ch[ch_no].count -= read_wr.num_sge;
+			chl_map->ch[ch_no].start += read_wr.num_sge;
 			goto next_sge;
 		}
 		sgl_offset = 0;
@@ -368,8 +362,8 @@ next_sge:
 	}

 out:
-	svc_rdma_put_context(tmp_sge_ctxt, 0);
-	svc_rdma_put_context(tmp_ch_ctxt, 0);
+	svc_rdma_put_req_map(rpl_map);
+	svc_rdma_put_req_map(chl_map);

 	/* Detach arg pages. svc_recv will replenish them */
 	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
@@ -399,7 +393,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 		rqstp->rq_pages[page_no] = head->pages[page_no];
 	}
 	/* Point rq_arg.pages past header */
-	rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length];
+	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
 	rqstp->rq_arg.page_len = head->arg.page_len;
 	rqstp->rq_arg.page_base = head->arg.page_base;


--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c