Commit 295f7324 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

[ICSK]: Introduce reqsk_queue_prune from code in tcp_synack_timer

With this we're very close to getting all of the current TCP
refactorings in my dccp-2.6 tree merged, next changeset will export
some functions needed by the current DCCP code and then dccp-2.6.git
will be born!
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0a5578cf
...@@ -270,7 +270,7 @@ struct tcp_sock { ...@@ -270,7 +270,7 @@ struct tcp_sock {
__u8 frto_counter; /* Number of new acks after RTO */ __u8 frto_counter; /* Number of new acks after RTO */
__u8 nonagle; /* Disable Nagle algorithm? */ __u8 nonagle; /* Disable Nagle algorithm? */
__u8 defer_accept; /* User waits for some data after accept() */ /* ONE BYTE HOLE, TRY TO PACK */
/* RTT measurement */ /* RTT measurement */
__u32 srtt; /* smoothed round trip time << 3 */ __u32 srtt; /* smoothed round trip time << 3 */
......
...@@ -239,4 +239,6 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk, ...@@ -239,4 +239,6 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
reqsk_free(req); reqsk_free(req);
} }
extern void inet_csk_listen_stop(struct sock *sk);
#endif /* _INET_CONNECTION_SOCK_H */ #endif /* _INET_CONNECTION_SOCK_H */
...@@ -97,6 +97,7 @@ struct listen_sock { ...@@ -97,6 +97,7 @@ struct listen_sock {
* *
* @rskq_accept_head - FIFO head of established children * @rskq_accept_head - FIFO head of established children
* @rskq_accept_tail - FIFO tail of established children * @rskq_accept_tail - FIFO tail of established children
* @rskq_defer_accept - User waits for some data after accept()
* @syn_wait_lock - serializer * @syn_wait_lock - serializer
* *
* %syn_wait_lock is necessary only to avoid proc interface having to grab the main * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
...@@ -112,6 +113,8 @@ struct request_sock_queue { ...@@ -112,6 +113,8 @@ struct request_sock_queue {
struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_head;
struct request_sock *rskq_accept_tail; struct request_sock *rskq_accept_tail;
rwlock_t syn_wait_lock; rwlock_t syn_wait_lock;
u8 rskq_defer_accept;
/* 3 bytes hole, try to pack */
struct listen_sock *listen_opt; struct listen_sock *listen_opt;
}; };
...@@ -255,4 +258,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, ...@@ -255,4 +258,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
write_unlock(&queue->syn_wait_lock); write_unlock(&queue->syn_wait_lock);
} }
extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
const unsigned long interval, const unsigned long timeout,
const unsigned long max_rto, int max_retries);
#endif /* _REQUEST_SOCK_H */ #endif /* _REQUEST_SOCK_H */
...@@ -423,7 +423,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, ...@@ -423,7 +423,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
size_t len, int nonblock, size_t len, int nonblock,
int flags, int *addr_len); int flags, int *addr_len);
extern int tcp_listen_start(struct sock *sk); extern int inet_csk_listen_start(struct sock *sk,
const int nr_table_entries);
extern void tcp_parse_options(struct sk_buff *skb, extern void tcp_parse_options(struct sk_buff *skb,
struct tcp_options_received *opt_rx, struct tcp_options_received *opt_rx,
......
...@@ -52,6 +52,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, ...@@ -52,6 +52,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock); rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = queue->rskq_accept_head = NULL; queue->rskq_accept_head = queue->rskq_accept_head = NULL;
queue->rskq_defer_accept = 0;
lopt->nr_table_entries = nr_table_entries; lopt->nr_table_entries = nr_table_entries;
write_lock_bh(&queue->syn_wait_lock); write_lock_bh(&queue->syn_wait_lock);
......
...@@ -99,6 +99,7 @@ ...@@ -99,6 +99,7 @@
#include <net/arp.h> #include <net/arp.h>
#include <net/route.h> #include <net/route.h>
#include <net/ip_fib.h> #include <net/ip_fib.h>
#include <net/inet_connection_sock.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/udp.h> #include <net/udp.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
......
...@@ -495,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start); ...@@ -495,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
* This routine closes sockets which have been at least partially * This routine closes sockets which have been at least partially
* opened, but not yet accepted. * opened, but not yet accepted.
*/ */
static void inet_csk_listen_stop(struct sock *sk) void inet_csk_listen_stop(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock *acc_req; struct request_sock *acc_req;
...@@ -1947,15 +1947,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, ...@@ -1947,15 +1947,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
break; break;
case TCP_DEFER_ACCEPT: case TCP_DEFER_ACCEPT:
tp->defer_accept = 0; icsk->icsk_accept_queue.rskq_defer_accept = 0;
if (val > 0) { if (val > 0) {
/* Translate value in seconds to number of /* Translate value in seconds to number of
* retransmits */ * retransmits */
while (tp->defer_accept < 32 && while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
val > ((TCP_TIMEOUT_INIT / HZ) << val > ((TCP_TIMEOUT_INIT / HZ) <<
tp->defer_accept)) icsk->icsk_accept_queue.rskq_defer_accept))
tp->defer_accept++; icsk->icsk_accept_queue.rskq_defer_accept++;
tp->defer_accept++; icsk->icsk_accept_queue.rskq_defer_accept++;
} }
break; break;
...@@ -2058,6 +2058,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info); ...@@ -2058,6 +2058,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info);
int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen) int __user *optlen)
{ {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int val, len; int val, len;
...@@ -2095,7 +2096,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, ...@@ -2095,7 +2096,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
break; break;
case TCP_SYNCNT: case TCP_SYNCNT:
val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries; val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
break; break;
case TCP_LINGER2: case TCP_LINGER2:
val = tp->linger2; val = tp->linger2;
...@@ -2103,8 +2104,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, ...@@ -2103,8 +2104,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
val = (val ? : sysctl_tcp_fin_timeout) / HZ; val = (val ? : sysctl_tcp_fin_timeout) / HZ;
break; break;
case TCP_DEFER_ACCEPT: case TCP_DEFER_ACCEPT:
val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) << val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
(tp->defer_accept - 1)); ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
break; break;
case TCP_WINDOW_CLAMP: case TCP_WINDOW_CLAMP:
val = tp->window_clamp; val = tp->window_clamp;
...@@ -2125,7 +2126,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, ...@@ -2125,7 +2126,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
return 0; return 0;
} }
case TCP_QUICKACK: case TCP_QUICKACK:
val = !inet_csk(sk)->icsk_ack.pingpong; val = !icsk->icsk_ack.pingpong;
break; break;
case TCP_CONGESTION: case TCP_CONGESTION:
......
...@@ -3831,6 +3831,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -3831,6 +3831,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_parse_options(skb, &tp->rx_opt, 0); tcp_parse_options(skb, &tp->rx_opt, 0);
if (th->ack) { if (th->ack) {
struct inet_connection_sock *icsk;
/* rfc793: /* rfc793:
* "If the state is SYN-SENT then * "If the state is SYN-SENT then
* first check the ACK bit * first check the ACK bit
...@@ -3956,7 +3957,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -3956,7 +3957,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
sk_wake_async(sk, 0, POLL_OUT); sk_wake_async(sk, 0, POLL_OUT);
} }
if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) { icsk = inet_csk(sk);
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
/* Save one ACK. Data will be ready after /* Save one ACK. Data will be ready after
* several ticks, if write_pending is set. * several ticks, if write_pending is set.
* *
...@@ -3965,8 +3970,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -3965,8 +3970,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* to stand against the temptation 8) --ANK * to stand against the temptation 8) --ANK
*/ */
inet_csk_schedule_ack(sk); inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp; icsk->icsk_ack.lrcvtime = tcp_time_stamp;
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; icsk->icsk_ack.ato = TCP_ATO_MIN;
tcp_incr_quickack(sk); tcp_incr_quickack(sk);
tcp_enter_quickack_mode(sk); tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
......
...@@ -787,9 +787,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, ...@@ -787,9 +787,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
does sequence test, SYN is truncated, and thus we consider does sequence test, SYN is truncated, and thus we consider
it a bare ACK. it a bare ACK.
If tp->defer_accept, we silently drop this bare ACK. Otherwise, If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
we create an established connection. Both ends (listening sockets) bare ACK. Otherwise, we create an established connection. Both
accept the new incoming connection and try to talk to each other. 8-) ends (listening sockets) accept the new incoming connection and try
to talk to each other. 8-)
Note: This case is both harmless, and rare. Possibility is about the Note: This case is both harmless, and rare. Possibility is about the
same as us discovering intelligent life on another plant tomorrow. same as us discovering intelligent life on another plant tomorrow.
...@@ -856,7 +857,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, ...@@ -856,7 +857,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
return NULL; return NULL;
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1; inet_rsk(req)->acked = 1;
return NULL; return NULL;
} }
......
...@@ -424,16 +424,12 @@ out_unlock: ...@@ -424,16 +424,12 @@ out_unlock:
sock_put(sk); sock_put(sk);
} }
/* void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
* Timer for listening sockets const unsigned long interval, const unsigned long timeout,
*/ const unsigned long max_rto, int max_retries)
static void tcp_synack_timer(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(parent);
struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = queue->listen_opt;
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
int thresh = max_retries; int thresh = max_retries;
unsigned long now = jiffies; unsigned long now = jiffies;
struct request_sock **reqp, *req; struct request_sock **reqp, *req;
...@@ -470,10 +466,10 @@ static void tcp_synack_timer(struct sock *sk) ...@@ -470,10 +466,10 @@ static void tcp_synack_timer(struct sock *sk)
} }
} }
if (tp->defer_accept) if (queue->rskq_defer_accept)
max_retries = tp->defer_accept; max_retries = queue->rskq_defer_accept;
budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL)); budget = 2 * (lopt->nr_table_entries / (timeout / interval));
i = lopt->clock_hand; i = lopt->clock_hand;
do { do {
...@@ -482,20 +478,19 @@ static void tcp_synack_timer(struct sock *sk) ...@@ -482,20 +478,19 @@ static void tcp_synack_timer(struct sock *sk)
if (time_after_eq(now, req->expires)) { if (time_after_eq(now, req->expires)) {
if ((req->retrans < thresh || if ((req->retrans < thresh ||
(inet_rsk(req)->acked && req->retrans < max_retries)) (inet_rsk(req)->acked && req->retrans < max_retries))
&& !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) { && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
unsigned long timeo; unsigned long timeo;
if (req->retrans++ == 0) if (req->retrans++ == 0)
lopt->qlen_young--; lopt->qlen_young--;
timeo = min((TCP_TIMEOUT_INIT << req->retrans), timeo = min((timeout << req->retrans), max_rto);
TCP_RTO_MAX);
req->expires = now + timeo; req->expires = now + timeo;
reqp = &req->dl_next; reqp = &req->dl_next;
continue; continue;
} }
/* Drop this request */ /* Drop this request */
inet_csk_reqsk_queue_unlink(sk, req, reqp); inet_csk_reqsk_queue_unlink(parent, req, reqp);
reqsk_queue_removed(&icsk->icsk_accept_queue, req); reqsk_queue_removed(&icsk->icsk_accept_queue, req);
reqsk_free(req); reqsk_free(req);
continue; continue;
...@@ -503,14 +498,29 @@ static void tcp_synack_timer(struct sock *sk) ...@@ -503,14 +498,29 @@ static void tcp_synack_timer(struct sock *sk)
reqp = &req->dl_next; reqp = &req->dl_next;
} }
i = (i+1)&(TCP_SYNQ_HSIZE-1); i = (i + 1) & (lopt->nr_table_entries - 1);
} while (--budget > 0); } while (--budget > 0);
lopt->clock_hand = i; lopt->clock_hand = i;
if (lopt->qlen) if (lopt->qlen)
inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); inet_csk_reset_keepalive_timer(parent, interval);
}
EXPORT_SYMBOL_GPL(reqsk_queue_prune);
/*
* Timer for listening sockets
*/
static void tcp_synack_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries);
} }
void tcp_set_keepalive(struct sock *sk, int val) void tcp_set_keepalive(struct sock *sk, int val)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment