Commit 295ff7ed authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

[TIMEWAIT]: Introduce inet_timewait_death_row

That groups all of the tables and variables associated to the TCP timewait
schedulling/recycling/killing code, that now can be isolated from the TCP
specific code and used by other transport protocols, such as DCCP.

Next changeset will move this code to net/ipv4/inet_timewait_sock.c
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0b4e03bf
...@@ -19,13 +19,69 @@ ...@@ -19,13 +19,69 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/timer.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/workqueue.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/tcp_states.h> #include <net/tcp_states.h>
#include <asm/atomic.h> #include <asm/atomic.h>
struct inet_hashinfo;
#define INET_TWDR_RECYCLE_SLOTS_LOG 5
#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG)
/*
* If time > 4sec, it is "slow" path, no recycling is required,
* so that we select tick to get range about 4 seconds.
*/
#if HZ <= 16 || HZ > 4096
# error Unsupported: HZ <= 16 or HZ > 4096
#elif HZ <= 32
# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 64
# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 128
# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 256
# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 512
# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 1024
# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 2048
# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#else
# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#endif
/* TIME_WAIT reaping mechanism. */
#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
#define INET_TWDR_TWKILL_QUOTA 100
struct inet_timewait_death_row {
/* Short-time timewait calendar */
int twcal_hand;
int twcal_jiffie;
struct timer_list twcal_timer;
struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS];
spinlock_t death_lock;
int tw_count;
int period;
u32 thread_slots;
struct work_struct twkill_work;
struct timer_list tw_timer;
int slot;
struct hlist_head cells[INET_TWDR_TWKILL_SLOTS];
struct inet_hashinfo *hashinfo;
int sysctl_tw_recycle;
int sysctl_max_tw_buckets;
};
#if (BITS_PER_LONG == 64) #if (BITS_PER_LONG == 64)
#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
#else #else
...@@ -33,7 +89,6 @@ ...@@ -33,7 +89,6 @@
#endif #endif
struct inet_bind_bucket; struct inet_bind_bucket;
struct inet_hashinfo;
/* /*
* This is a TIME_WAIT sock. It works around the memory consumption * This is a TIME_WAIT sock. It works around the memory consumption
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <net/inet_connection_sock.h> #include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
#include <net/inet_hashtables.h> #include <net/inet_hashtables.h>
#include <net/checksum.h> #include <net/checksum.h>
#include <net/request_sock.h> #include <net/request_sock.h>
...@@ -42,9 +43,9 @@ ...@@ -42,9 +43,9 @@
extern struct inet_hashinfo tcp_hashinfo; extern struct inet_hashinfo tcp_hashinfo;
extern atomic_t tcp_orphan_count; extern atomic_t tcp_orphan_count;
extern int tcp_tw_count;
extern void tcp_time_wait(struct sock *sk, int state, int timeo); extern void tcp_time_wait(struct sock *sk, int state, int timeo);
extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
struct inet_timewait_death_row *twdr);
#define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_HEADER (128 + MAX_HEADER)
...@@ -148,33 +149,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); ...@@ -148,33 +149,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw);
* timestamps. It must be less than * timestamps. It must be less than
* minimal timewait lifetime. * minimal timewait lifetime.
*/ */
#define TCP_TW_RECYCLE_SLOTS_LOG 5
#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
/* If time > 4sec, it is "slow" path, no recycling is required,
so that we select tick to get range about 4 seconds.
*/
#if HZ <= 16 || HZ > 4096
# error Unsupported: HZ <= 16 or HZ > 4096
#elif HZ <= 32
# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 64
# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 128
# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 256
# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 512
# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 1024
# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 2048
# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
#else
# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
#endif
/* /*
* TCP option * TCP option
*/ */
...@@ -209,12 +183,13 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); ...@@ -209,12 +183,13 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw);
#define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_CORK 2 /* Socket is corked */
#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */
extern struct inet_timewait_death_row tcp_death_row;
/* sysctl variables for tcp */ /* sysctl variables for tcp */
extern int sysctl_tcp_timestamps; extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_sack; extern int sysctl_tcp_sack;
extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_fin_timeout;
extern int sysctl_tcp_tw_recycle;
extern int sysctl_tcp_keepalive_time; extern int sysctl_tcp_keepalive_time;
extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_probes;
extern int sysctl_tcp_keepalive_intvl; extern int sysctl_tcp_keepalive_intvl;
...@@ -229,7 +204,6 @@ extern int sysctl_tcp_stdurg; ...@@ -229,7 +204,6 @@ extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_rfc1337;
extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_abort_on_overflow;
extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_max_orphans;
extern int sysctl_tcp_max_tw_buckets;
extern int sysctl_tcp_fack; extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering; extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn; extern int sysctl_tcp_ecn;
......
...@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) ...@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq); socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
tcp_tw_count, atomic_read(&tcp_sockets_allocated), tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated)); atomic_read(&tcp_memory_allocated));
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
......
...@@ -259,7 +259,7 @@ ctl_table ipv4_table[] = { ...@@ -259,7 +259,7 @@ ctl_table ipv4_table[] = {
{ {
.ctl_name = NET_TCP_MAX_TW_BUCKETS, .ctl_name = NET_TCP_MAX_TW_BUCKETS,
.procname = "tcp_max_tw_buckets", .procname = "tcp_max_tw_buckets",
.data = &sysctl_tcp_max_tw_buckets, .data = &tcp_death_row.sysctl_max_tw_buckets,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
...@@ -363,7 +363,7 @@ ctl_table ipv4_table[] = { ...@@ -363,7 +363,7 @@ ctl_table ipv4_table[] = {
{ {
.ctl_name = NET_TCP_TW_RECYCLE, .ctl_name = NET_TCP_TW_RECYCLE,
.procname = "tcp_tw_recycle", .procname = "tcp_tw_recycle",
.data = &sysctl_tcp_tw_recycle, .data = &tcp_death_row.sysctl_tw_recycle,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
......
...@@ -2109,12 +2109,12 @@ void __init tcp_init(void) ...@@ -2109,12 +2109,12 @@ void __init tcp_init(void)
if (order >= 4) { if (order >= 4) {
sysctl_local_port_range[0] = 32768; sysctl_local_port_range[0] = 32768;
sysctl_local_port_range[1] = 61000; sysctl_local_port_range[1] = 61000;
sysctl_tcp_max_tw_buckets = 180000; tcp_death_row.sysctl_max_tw_buckets = 180000;
sysctl_tcp_max_orphans = 4096 << (order - 4); sysctl_tcp_max_orphans = 4096 << (order - 4);
sysctl_max_syn_backlog = 1024; sysctl_max_syn_backlog = 1024;
} else if (order < 3) { } else if (order < 3) {
sysctl_local_port_range[0] = 1024 * (3 - order); sysctl_local_port_range[0] = 1024 * (3 - order);
sysctl_tcp_max_tw_buckets >>= (3 - order); tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
sysctl_tcp_max_orphans >>= (3 - order); sysctl_tcp_max_orphans >>= (3 - order);
sysctl_max_syn_backlog = 128; sysctl_max_syn_backlog = 128;
} }
......
...@@ -199,7 +199,7 @@ unique: ...@@ -199,7 +199,7 @@ unique:
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) { } else if (tw) {
/* Silly. Should hash-dance instead... */ /* Silly. Should hash-dance instead... */
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw); inet_twsk_put(tw);
...@@ -291,7 +291,7 @@ ok: ...@@ -291,7 +291,7 @@ ok:
spin_unlock(&head->lock); spin_unlock(&head->lock);
if (tw) { if (tw) {
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);;
inet_twsk_put(tw); inet_twsk_put(tw);
} }
...@@ -366,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ...@@ -366,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->write_seq = 0; tp->write_seq = 0;
} }
if (sysctl_tcp_tw_recycle && if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
struct inet_peer *peer = rt_get_peer(rt); struct inet_peer *peer = rt_get_peer(rt);
...@@ -965,7 +965,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -965,7 +965,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* are made in the function processing timewait state. * are made in the function processing timewait state.
*/ */
if (tmp_opt.saw_tstamp && if (tmp_opt.saw_tstamp &&
sysctl_tcp_tw_recycle && tcp_death_row.sysctl_tw_recycle &&
(dst = inet_csk_route_req(sk, req)) != NULL && (dst = inet_csk_route_req(sk, req)) != NULL &&
(peer = rt_get_peer((struct rtable *)dst)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
peer->v4daddr == saddr) { peer->v4daddr == saddr) {
...@@ -1305,7 +1305,8 @@ do_time_wait: ...@@ -1305,7 +1305,8 @@ do_time_wait:
ntohs(th->dest), ntohs(th->dest),
inet_iif(skb)); inet_iif(skb));
if (sk2) { if (sk2) {
tcp_tw_deschedule((struct inet_timewait_sock *)sk); inet_twsk_deschedule((struct inet_timewait_sock *)sk,
&tcp_death_row);
inet_twsk_put((struct inet_timewait_sock *)sk); inet_twsk_put((struct inet_timewait_sock *)sk);
sk = sk2; sk = sk2;
goto process; goto process;
......
This diff is collapsed.
...@@ -521,7 +521,7 @@ unique: ...@@ -521,7 +521,7 @@ unique:
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) { } else if (tw) {
/* Silly. Should hash-dance instead... */ /* Silly. Should hash-dance instead... */
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw); inet_twsk_put(tw);
...@@ -611,7 +611,7 @@ ok: ...@@ -611,7 +611,7 @@ ok:
spin_unlock(&head->lock); spin_unlock(&head->lock);
if (tw) { if (tw) {
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);
inet_twsk_put(tw); inet_twsk_put(tw);
} }
...@@ -1820,8 +1820,9 @@ do_time_wait: ...@@ -1820,8 +1820,9 @@ do_time_wait:
sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
if (sk2 != NULL) { if (sk2 != NULL) {
tcp_tw_deschedule((struct inet_timewait_sock *)sk); struct inet_timewait_sock *tw = inet_twsk(sk);
inet_twsk_put((struct inet_timewait_sock *)sk); inet_twsk_deschedule(tw, &tcp_death_row);
inet_twsk_put(tw);
sk = sk2; sk = sk2;
goto process; goto process;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment