Commit b000cd37 authored by Vitaliy Gusev's avatar Vitaliy Gusev Committed by David S. Miller

[TCP]: Fix never pruned tcp out-of-order queue.

tcp_prune_queue() doesn't prune an out-of-order queue at all.
Therefore sk_rmem_schedule() can fail but the out-of-order queue isn't
pruned . This can lead to tcp deadlock state if the next two
conditions are held:

1. There are a sequence hole between last received in
   order segment and segments enqueued to the out-of-order queue.

2. Size of all segments in the out-of-order queue is more than tcp_mem[2].
Signed-off-by: default avatarVitaliy Gusev <vgusev@openvz.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 066a3b5b
...@@ -3841,8 +3841,26 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -3841,8 +3841,26 @@ static void tcp_ofo_queue(struct sock *sk)
} }
} }
static void tcp_prune_ofo_queue(struct sock *sk);
static int tcp_prune_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk);
static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
{
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
!sk_rmem_schedule(sk, size)) {
if (tcp_prune_queue(sk) < 0)
return -1;
if (!sk_rmem_schedule(sk, size)) {
tcp_prune_ofo_queue(sk);
if (!sk_rmem_schedule(sk, size))
return -1;
}
}
return 0;
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{ {
struct tcphdr *th = tcp_hdr(skb); struct tcphdr *th = tcp_hdr(skb);
...@@ -3892,12 +3910,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ...@@ -3892,12 +3910,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (eaten <= 0) { if (eaten <= 0) {
queue_and_out: queue_and_out:
if (eaten < 0 && if (eaten < 0 &&
(atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || tcp_try_rmem_schedule(sk, skb->truesize))
!sk_rmem_schedule(sk, skb->truesize))) {
if (tcp_prune_queue(sk) < 0 ||
!sk_rmem_schedule(sk, skb->truesize))
goto drop; goto drop;
}
skb_set_owner_r(skb, sk); skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb); __skb_queue_tail(&sk->sk_receive_queue, skb);
} }
...@@ -3966,12 +3981,8 @@ drop: ...@@ -3966,12 +3981,8 @@ drop:
TCP_ECN_check_ce(tp, skb); TCP_ECN_check_ce(tp, skb);
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || if (tcp_try_rmem_schedule(sk, skb->truesize))
!sk_rmem_schedule(sk, skb->truesize)) {
if (tcp_prune_queue(sk) < 0 ||
!sk_rmem_schedule(sk, skb->truesize))
goto drop; goto drop;
}
/* Disable header prediction. */ /* Disable header prediction. */
tp->pred_flags = 0; tp->pred_flags = 0;
...@@ -4198,6 +4209,28 @@ static void tcp_collapse_ofo_queue(struct sock *sk) ...@@ -4198,6 +4209,28 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
} }
} }
/*
* Purge the out-of-order queue.
*/
static void tcp_prune_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (!skb_queue_empty(&tp->out_of_order_queue)) {
NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
__skb_queue_purge(&tp->out_of_order_queue);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
* is in a sad state like this, we care only about integrity
* of the connection not performance.
*/
if (tp->rx_opt.sack_ok)
tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk);
}
}
/* Reduce allocated memory if we can, trying to get /* Reduce allocated memory if we can, trying to get
* the socket within its memory limits again. * the socket within its memory limits again.
* *
...@@ -4231,20 +4264,7 @@ static int tcp_prune_queue(struct sock *sk) ...@@ -4231,20 +4264,7 @@ static int tcp_prune_queue(struct sock *sk)
/* Collapsing did not help, destructive actions follow. /* Collapsing did not help, destructive actions follow.
* This must not ever occur. */ * This must not ever occur. */
/* First, purge the out_of_order queue. */ tcp_prune_ofo_queue(sk);
if (!skb_queue_empty(&tp->out_of_order_queue)) {
NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
__skb_queue_purge(&tp->out_of_order_queue);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
* is in a sad state like this, we care only about integrity
* of the connection not performance.
*/
if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk);
}
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment