Commit 36bdfc8b authored by Greg Banks's avatar Greg Banks Committed by Linus Torvalds

[PATCH] knfsd: move tempsock aging to a timer

Following are 11 patches from Greg Banks which combine to make knfsd more
Numa-aware.  They reduce hitting on 'global' data structures, and create some
data-structures that can be node-local.

knfsd threads are bound to a particular node, and the thread to handle a new
request is chosen from the threads that are attach to the node that received
the interrupt.

The distribution of threads across nodes can be controlled by a new file in
the 'nfsd' filesystem, though the default approach of an even spread is
probably fine for most sites.

Some (old) numbers that show the efficacy of these patches: N == number of
NICs == number of CPUs == nmber of clients.  Number of NUMA nodes == N/2

N	Throughput, MiB/s	CPU usage, % (max=N*100)
	Before	After		Before	After
	---	------	----		-----	-----
	4	312	435		350	228
	6	500	656		501	418
	8	562	804		690	589

This patch:

Move the aging of RPC/TCP connection sockets from the main svc_recv() loop to
a timer which uses a mark-and-sweep algorithm every 6 minutes.  This reduces
the amount of work that needs to be done in the main RPC loop and the length
of time we need to hold the (effectively global) svc_serv->sv_lock.

[akpm@osdl.org: cleanup]
Signed-off-by: default avatarGreg Banks <gnb@melbourne.sgi.com>
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4a3ae42d
...@@ -40,6 +40,7 @@ struct svc_serv { ...@@ -40,6 +40,7 @@ struct svc_serv {
struct list_head sv_permsocks; /* all permanent sockets */ struct list_head sv_permsocks; /* all permanent sockets */
struct list_head sv_tempsocks; /* all temporary sockets */ struct list_head sv_tempsocks; /* all temporary sockets */
int sv_tmpcnt; /* count of temporary sockets */ int sv_tmpcnt; /* count of temporary sockets */
struct timer_list sv_temptimer; /* timer for aging temporary sockets */
char * sv_name; /* service name */ char * sv_name; /* service name */
......
...@@ -31,6 +31,8 @@ struct svc_sock { ...@@ -31,6 +31,8 @@ struct svc_sock {
#define SK_DEAD 6 /* socket closed */ #define SK_DEAD 6 /* socket closed */
#define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */ #define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */
#define SK_DEFERRED 8 /* request on sk_deferred */ #define SK_DEFERRED 8 /* request on sk_deferred */
#define SK_OLD 9 /* used for temp socket aging mark+sweep */
#define SK_DETACHED 10 /* detached from tempsocks list */
int sk_reserved; /* space on outq that is reserved */ int sk_reserved; /* space on outq that is reserved */
......
...@@ -59,6 +59,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, ...@@ -59,6 +59,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize,
INIT_LIST_HEAD(&serv->sv_sockets); INIT_LIST_HEAD(&serv->sv_sockets);
INIT_LIST_HEAD(&serv->sv_tempsocks); INIT_LIST_HEAD(&serv->sv_tempsocks);
INIT_LIST_HEAD(&serv->sv_permsocks); INIT_LIST_HEAD(&serv->sv_permsocks);
init_timer(&serv->sv_temptimer);
spin_lock_init(&serv->sv_lock); spin_lock_init(&serv->sv_lock);
/* Remove any stale portmap registrations */ /* Remove any stale portmap registrations */
...@@ -87,6 +88,8 @@ svc_destroy(struct svc_serv *serv) ...@@ -87,6 +88,8 @@ svc_destroy(struct svc_serv *serv)
} else } else
printk("svc_destroy: no threads for serv=%p!\n", serv); printk("svc_destroy: no threads for serv=%p!\n", serv);
del_timer_sync(&serv->sv_temptimer);
while (!list_empty(&serv->sv_tempsocks)) { while (!list_empty(&serv->sv_tempsocks)) {
svsk = list_entry(serv->sv_tempsocks.next, svsk = list_entry(serv->sv_tempsocks.next,
struct svc_sock, struct svc_sock,
......
...@@ -74,6 +74,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); ...@@ -74,6 +74,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp); static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req); static struct cache_deferred_req *svc_defer(struct cache_req *req);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
* 6 minutes
* http://www.connectathon.org/talks96/nfstcp.pdf
*/
static int svc_conn_age_period = 6*60;
/* /*
* Queue up an idle server thread. Must have serv->sv_lock held. * Queue up an idle server thread. Must have serv->sv_lock held.
* Note: this is really a stack rather than a queue, so that we only * Note: this is really a stack rather than a queue, so that we only
...@@ -1220,24 +1227,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) ...@@ -1220,24 +1227,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
return -EINTR; return -EINTR;
spin_lock_bh(&serv->sv_lock); spin_lock_bh(&serv->sv_lock);
if (!list_empty(&serv->sv_tempsocks)) { if ((svsk = svc_sock_dequeue(serv)) != NULL) {
svsk = list_entry(serv->sv_tempsocks.next,
struct svc_sock, sk_list);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
* 6 minutes
* http://www.connectathon.org/talks96/nfstcp.pdf
*/
if (get_seconds() - svsk->sk_lastrecv < 6*60
|| test_bit(SK_BUSY, &svsk->sk_flags))
svsk = NULL;
}
if (svsk) {
set_bit(SK_BUSY, &svsk->sk_flags);
set_bit(SK_CLOSE, &svsk->sk_flags);
rqstp->rq_sock = svsk;
svsk->sk_inuse++;
} else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
rqstp->rq_sock = svsk; rqstp->rq_sock = svsk;
svsk->sk_inuse++; svsk->sk_inuse++;
rqstp->rq_reserved = serv->sv_bufsz; rqstp->rq_reserved = serv->sv_bufsz;
...@@ -1282,13 +1272,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) ...@@ -1282,13 +1272,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
return -EAGAIN; return -EAGAIN;
} }
svsk->sk_lastrecv = get_seconds(); svsk->sk_lastrecv = get_seconds();
if (test_bit(SK_TEMP, &svsk->sk_flags)) { clear_bit(SK_OLD, &svsk->sk_flags);
/* push active sockets to end of list */
spin_lock_bh(&serv->sv_lock);
if (!list_empty(&svsk->sk_list))
list_move_tail(&svsk->sk_list, &serv->sv_tempsocks);
spin_unlock_bh(&serv->sv_lock);
}
rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024;
rqstp->rq_chandle.defer = svc_defer; rqstp->rq_chandle.defer = svc_defer;
...@@ -1347,6 +1331,58 @@ svc_send(struct svc_rqst *rqstp) ...@@ -1347,6 +1331,58 @@ svc_send(struct svc_rqst *rqstp)
return len; return len;
} }
/*
* Timer function to close old temporary sockets, using
* a mark-and-sweep algorithm.
*/
static void
svc_age_temp_sockets(unsigned long closure)
{
struct svc_serv *serv = (struct svc_serv *)closure;
struct svc_sock *svsk;
struct list_head *le, *next;
LIST_HEAD(to_be_aged);
dprintk("svc_age_temp_sockets\n");
if (!spin_trylock_bh(&serv->sv_lock)) {
/* busy, try again 1 sec later */
dprintk("svc_age_temp_sockets: busy\n");
mod_timer(&serv->sv_temptimer, jiffies + HZ);
return;
}
list_for_each_safe(le, next, &serv->sv_tempsocks) {
svsk = list_entry(le, struct svc_sock, sk_list);
if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
continue;
if (svsk->sk_inuse || test_bit(SK_BUSY, &svsk->sk_flags))
continue;
svsk->sk_inuse++;
list_move(le, &to_be_aged);
set_bit(SK_CLOSE, &svsk->sk_flags);
set_bit(SK_DETACHED, &svsk->sk_flags);
}
spin_unlock_bh(&serv->sv_lock);
while (!list_empty(&to_be_aged)) {
le = to_be_aged.next;
/* fiddling the sk_list node is safe 'cos we're SK_DETACHED */
list_del_init(le);
svsk = list_entry(le, struct svc_sock, sk_list);
dprintk("queuing svsk %p for closing, %lu seconds old\n",
svsk, get_seconds() - svsk->sk_lastrecv);
/* a thread will dequeue and close it soon */
svc_sock_enqueue(svsk);
svc_sock_put(svsk);
}
mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
}
/* /*
* Initialize socket for RPC use and create svc_sock struct * Initialize socket for RPC use and create svc_sock struct
* XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
...@@ -1400,6 +1436,13 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, ...@@ -1400,6 +1436,13 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
set_bit(SK_TEMP, &svsk->sk_flags); set_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_tempsocks); list_add(&svsk->sk_list, &serv->sv_tempsocks);
serv->sv_tmpcnt++; serv->sv_tmpcnt++;
if (serv->sv_temptimer.function == NULL) {
/* setup timer to age temp sockets */
setup_timer(&serv->sv_temptimer, svc_age_temp_sockets,
(unsigned long)serv);
mod_timer(&serv->sv_temptimer,
jiffies + svc_conn_age_period * HZ);
}
} else { } else {
clear_bit(SK_TEMP, &svsk->sk_flags); clear_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_permsocks); list_add(&svsk->sk_list, &serv->sv_permsocks);
...@@ -1513,6 +1556,7 @@ svc_delete_socket(struct svc_sock *svsk) ...@@ -1513,6 +1556,7 @@ svc_delete_socket(struct svc_sock *svsk)
spin_lock_bh(&serv->sv_lock); spin_lock_bh(&serv->sv_lock);
if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
list_del_init(&svsk->sk_list); list_del_init(&svsk->sk_list);
list_del_init(&svsk->sk_ready); list_del_init(&svsk->sk_ready);
if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment