Commit a019d6fe authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

[ICSK]: Move generalised functions from tcp to inet_connection_sock

This also improves reqsk_queue_prune and renames it to
inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock
and inet_request_sock objects, not just with request_sock ones thus
belonging to inet_request_sock.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 7c657876
...@@ -239,6 +239,13 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk, ...@@ -239,6 +239,13 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
reqsk_free(req); reqsk_free(req);
} }
extern void inet_csk_reqsk_queue_prune(struct sock *parent,
const unsigned long interval,
const unsigned long timeout,
const unsigned long max_rto);
extern void inet_csk_destroy_sock(struct sock *sk);
extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries);
extern void inet_csk_listen_stop(struct sock *sk); extern void inet_csk_listen_stop(struct sock *sk);
#endif /* _INET_CONNECTION_SOCK_H */ #endif /* _INET_CONNECTION_SOCK_H */
...@@ -258,8 +258,4 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, ...@@ -258,8 +258,4 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
write_unlock(&queue->syn_wait_lock); write_unlock(&queue->syn_wait_lock);
} }
extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
const unsigned long interval, const unsigned long timeout,
const unsigned long max_rto, int max_retries);
#endif /* _REQUEST_SOCK_H */ #endif /* _REQUEST_SOCK_H */
...@@ -423,9 +423,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, ...@@ -423,9 +423,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
size_t len, int nonblock, size_t len, int nonblock,
int flags, int *addr_len); int flags, int *addr_len);
extern int inet_csk_listen_start(struct sock *sk,
const int nr_table_entries);
extern void tcp_parse_options(struct sk_buff *skb, extern void tcp_parse_options(struct sk_buff *skb,
struct tcp_options_received *opt_rx, struct tcp_options_received *opt_rx,
int estab); int estab);
...@@ -861,9 +858,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) ...@@ -861,9 +858,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
tp->snd_wl1 = seq; tp->snd_wl1 = seq;
} }
extern void inet_csk_destroy_sock(struct sock *sk);
/* /*
* Calculate(/check) TCP checksum * Calculate(/check) TCP checksum
*/ */
......
...@@ -220,11 +220,7 @@ out: ...@@ -220,11 +220,7 @@ out:
*/ */
static void dccp_response_timer(struct sock *sk) static void dccp_response_timer(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */;
reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries);
} }
static void dccp_keepalive_timer(unsigned long data) static void dccp_keepalive_timer(unsigned long data)
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <net/ip.h> #include <net/ip.h>
#include <net/route.h> #include <net/route.h>
#include <net/tcp_states.h> #include <net/tcp_states.h>
#include <net/xfrm.h>
#ifdef INET_CSK_DEBUG #ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
...@@ -398,8 +399,100 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, ...@@ -398,8 +399,100 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
inet_csk_reqsk_queue_added(sk, timeout); inet_csk_reqsk_queue_added(sk, timeout);
} }
/* Only thing we need from tcp.h */
extern int sysctl_tcp_synack_retries;
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
void inet_csk_reqsk_queue_prune(struct sock *parent,
const unsigned long interval,
const unsigned long timeout,
const unsigned long max_rto)
{
struct inet_connection_sock *icsk = inet_csk(parent);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct listen_sock *lopt = queue->listen_opt;
int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
int thresh = max_retries;
unsigned long now = jiffies;
struct request_sock **reqp, *req;
int i, budget;
if (lopt == NULL || lopt->qlen == 0)
return;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 3 seconds, it means
* one of the following things: synack was lost, ack was lost,
* rtt is high or nobody planned to ack (i.e. synflood).
* When server is a bit loaded, queue is populated with old
* open requests, reducing effective size of queue.
* When server is well loaded, queue size reduces to zero
* after several minutes of work. It is not synflood,
* it is normal operation. The solution is pruning
* too old entries overriding normal timeout, when
* situation becomes dangerous.
*
* Essentially, we reserve half of room for young
* embrions; and abort old ones without pity, if old
* ones are about to clog our table.
*/
if (lopt->qlen>>(lopt->max_qlen_log-1)) {
int young = (lopt->qlen_young<<1);
while (thresh > 2) {
if (lopt->qlen < young)
break;
thresh--;
young <<= 1;
}
}
if (queue->rskq_defer_accept)
max_retries = queue->rskq_defer_accept;
budget = 2 * (lopt->nr_table_entries / (timeout / interval));
i = lopt->clock_hand;
do {
reqp=&lopt->syn_table[i];
while ((req = *reqp) != NULL) {
if (time_after_eq(now, req->expires)) {
if ((req->retrans < thresh ||
(inet_rsk(req)->acked && req->retrans < max_retries))
&& !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
unsigned long timeo;
if (req->retrans++ == 0)
lopt->qlen_young--;
timeo = min((timeout << req->retrans), max_rto);
req->expires = now + timeo;
reqp = &req->dl_next;
continue;
}
/* Drop this request */
inet_csk_reqsk_queue_unlink(parent, req, reqp);
reqsk_queue_removed(queue, req);
reqsk_free(req);
continue;
}
reqp = &req->dl_next;
}
i = (i + 1) & (lopt->nr_table_entries - 1);
} while (--budget > 0);
lopt->clock_hand = i;
if (lopt->qlen)
inet_csk_reset_keepalive_timer(parent, interval);
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
const unsigned int __nocast priority) const unsigned int __nocast priority)
{ {
...@@ -424,3 +517,124 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, ...@@ -424,3 +517,124 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
} }
EXPORT_SYMBOL_GPL(inet_csk_clone); EXPORT_SYMBOL_GPL(inet_csk_clone);
/*
* At this point, there should be no process reference to this
* socket, and thus no user references at all. Therefore we
* can assume the socket waitqueue is inactive and nobody will
* try to jump onto it.
*/
void inet_csk_destroy_sock(struct sock *sk)
{
BUG_TRAP(sk->sk_state == TCP_CLOSE);
BUG_TRAP(sock_flag(sk, SOCK_DEAD));
/* It cannot be in hash table! */
BUG_TRAP(sk_unhashed(sk));
/* If it has not 0 inet_sk(sk)->num, it must be bound */
BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
sk->sk_prot->destroy(sk);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
atomic_dec(sk->sk_prot->orphan_count);
sock_put(sk);
}
EXPORT_SYMBOL(inet_csk_destroy_sock);
int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
{
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
if (rc != 0)
return rc;
sk->sk_max_ack_backlog = 0;
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
sk->sk_state = TCP_LISTEN;
if (!sk->sk_prot->get_port(sk, inet->num)) {
inet->sport = htons(inet->num);
sk_dst_reset(sk);
sk->sk_prot->hash(sk);
return 0;
}
sk->sk_state = TCP_CLOSE;
__reqsk_queue_destroy(&icsk->icsk_accept_queue);
return -EADDRINUSE;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
/*
* This routine closes sockets which have been at least partially
* opened, but not yet accepted.
*/
void inet_csk_listen_stop(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock *acc_req;
struct request_sock *req;
inet_csk_delete_keepalive_timer(sk);
/* make all the listen_opt local to us */
acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
/* Following specs, it would be better either to send FIN
* (and enter FIN-WAIT-1, it is normal close)
* or to send active reset (abort).
* Certainly, it is pretty dangerous while synflood, but it is
* bad justification for our negligence 8)
* To be honest, we are not able to make either
* of the variants now. --ANK
*/
reqsk_queue_destroy(&icsk->icsk_accept_queue);
while ((req = acc_req) != NULL) {
struct sock *child = req->sk;
acc_req = req->dl_next;
local_bh_disable();
bh_lock_sock(child);
BUG_TRAP(!sock_owned_by_user(child));
sock_hold(child);
sk->sk_prot->disconnect(child, O_NONBLOCK);
sock_orphan(child);
atomic_inc(sk->sk_prot->orphan_count);
inet_csk_destroy_sock(child);
bh_unlock_sock(child);
local_bh_enable();
sock_put(child);
sk_acceptq_removed(sk);
__reqsk_free(req);
}
BUG_TRAP(!sk->sk_ack_backlog);
}
EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
...@@ -456,96 +456,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) ...@@ -456,96 +456,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return put_user(answ, (int __user *)arg); return put_user(answ, (int __user *)arg);
} }
int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
{
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
if (rc != 0)
return rc;
sk->sk_max_ack_backlog = 0;
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
sk->sk_state = TCP_LISTEN;
if (!sk->sk_prot->get_port(sk, inet->num)) {
inet->sport = htons(inet->num);
sk_dst_reset(sk);
sk->sk_prot->hash(sk);
return 0;
}
sk->sk_state = TCP_CLOSE;
__reqsk_queue_destroy(&icsk->icsk_accept_queue);
return -EADDRINUSE;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
/*
* This routine closes sockets which have been at least partially
* opened, but not yet accepted.
*/
void inet_csk_listen_stop(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock *acc_req;
struct request_sock *req;
inet_csk_delete_keepalive_timer(sk);
/* make all the listen_opt local to us */
acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
/* Following specs, it would be better either to send FIN
* (and enter FIN-WAIT-1, it is normal close)
* or to send active reset (abort).
* Certainly, it is pretty dangerous while synflood, but it is
* bad justification for our negligence 8)
* To be honest, we are not able to make either
* of the variants now. --ANK
*/
reqsk_queue_destroy(&icsk->icsk_accept_queue);
while ((req = acc_req) != NULL) {
struct sock *child = req->sk;
acc_req = req->dl_next;
local_bh_disable();
bh_lock_sock(child);
BUG_TRAP(!sock_owned_by_user(child));
sock_hold(child);
sk->sk_prot->disconnect(child, O_NONBLOCK);
sock_orphan(child);
atomic_inc(sk->sk_prot->orphan_count);
inet_csk_destroy_sock(child);
bh_unlock_sock(child);
local_bh_enable();
sock_put(child);
sk_acceptq_removed(sk);
__reqsk_free(req);
}
BUG_TRAP(!sk->sk_ack_backlog);
}
EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{ {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
...@@ -1559,35 +1469,6 @@ void tcp_shutdown(struct sock *sk, int how) ...@@ -1559,35 +1469,6 @@ void tcp_shutdown(struct sock *sk, int how)
} }
} }
/*
* At this point, there should be no process reference to this
* socket, and thus no user references at all. Therefore we
* can assume the socket waitqueue is inactive and nobody will
* try to jump onto it.
*/
void inet_csk_destroy_sock(struct sock *sk)
{
BUG_TRAP(sk->sk_state == TCP_CLOSE);
BUG_TRAP(sock_flag(sk, SOCK_DEAD));
/* It cannot be in hash table! */
BUG_TRAP(sk_unhashed(sk));
/* If it has not 0 inet_sk(sk)->num, it must be bound */
BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
sk->sk_prot->destroy(sk);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
atomic_dec(sk->sk_prot->orphan_count);
sock_put(sk);
}
void tcp_close(struct sock *sk, long timeout) void tcp_close(struct sock *sk, long timeout)
{ {
struct sk_buff *skb; struct sk_buff *skb;
...@@ -2258,7 +2139,6 @@ void __init tcp_init(void) ...@@ -2258,7 +2139,6 @@ void __init tcp_init(void)
} }
EXPORT_SYMBOL(tcp_close); EXPORT_SYMBOL(tcp_close);
EXPORT_SYMBOL(inet_csk_destroy_sock);
EXPORT_SYMBOL(tcp_disconnect); EXPORT_SYMBOL(tcp_disconnect);
EXPORT_SYMBOL(tcp_getsockopt); EXPORT_SYMBOL(tcp_getsockopt);
EXPORT_SYMBOL(tcp_ioctl); EXPORT_SYMBOL(tcp_ioctl);
......
...@@ -424,103 +424,14 @@ out_unlock: ...@@ -424,103 +424,14 @@ out_unlock:
sock_put(sk); sock_put(sk);
} }
void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
const unsigned long interval, const unsigned long timeout,
const unsigned long max_rto, int max_retries)
{
struct inet_connection_sock *icsk = inet_csk(parent);
struct listen_sock *lopt = queue->listen_opt;
int thresh = max_retries;
unsigned long now = jiffies;
struct request_sock **reqp, *req;
int i, budget;
if (lopt == NULL || lopt->qlen == 0)
return;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 3 seconds, it means
* one of the following things: synack was lost, ack was lost,
* rtt is high or nobody planned to ack (i.e. synflood).
* When server is a bit loaded, queue is populated with old
* open requests, reducing effective size of queue.
* When server is well loaded, queue size reduces to zero
* after several minutes of work. It is not synflood,
* it is normal operation. The solution is pruning
* too old entries overriding normal timeout, when
* situation becomes dangerous.
*
* Essentially, we reserve half of room for young
* embrions; and abort old ones without pity, if old
* ones are about to clog our table.
*/
if (lopt->qlen>>(lopt->max_qlen_log-1)) {
int young = (lopt->qlen_young<<1);
while (thresh > 2) {
if (lopt->qlen < young)
break;
thresh--;
young <<= 1;
}
}
if (queue->rskq_defer_accept)
max_retries = queue->rskq_defer_accept;
budget = 2 * (lopt->nr_table_entries / (timeout / interval));
i = lopt->clock_hand;
do {
reqp=&lopt->syn_table[i];
while ((req = *reqp) != NULL) {
if (time_after_eq(now, req->expires)) {
if ((req->retrans < thresh ||
(inet_rsk(req)->acked && req->retrans < max_retries))
&& !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
unsigned long timeo;
if (req->retrans++ == 0)
lopt->qlen_young--;
timeo = min((timeout << req->retrans), max_rto);
req->expires = now + timeo;
reqp = &req->dl_next;
continue;
}
/* Drop this request */
inet_csk_reqsk_queue_unlink(parent, req, reqp);
reqsk_queue_removed(&icsk->icsk_accept_queue, req);
reqsk_free(req);
continue;
}
reqp = &req->dl_next;
}
i = (i + 1) & (lopt->nr_table_entries - 1);
} while (--budget > 0);
lopt->clock_hand = i;
if (lopt->qlen)
inet_csk_reset_keepalive_timer(parent, interval);
}
EXPORT_SYMBOL_GPL(reqsk_queue_prune);
/* /*
* Timer for listening sockets * Timer for listening sockets
*/ */
static void tcp_synack_timer(struct sock *sk) static void tcp_synack_timer(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; TCP_TIMEOUT_INIT, TCP_RTO_MAX);
reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries);
} }
void tcp_set_keepalive(struct sock *sk, int val) void tcp_set_keepalive(struct sock *sk, int val)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment