Commit 1240d137 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

ipv4: udp: Optimise multicast reception

UDP multicast rx path is a bit complex and can hold a spinlock
for a long time.

Using a small (32 or 64 entries) stack of socket pointers can help
to perform expensive operations (skb_clone(), udp_queue_rcv_skb())
outside of the lock, in most cases.

It's also a base for a future RCU conversion of multicast recption.
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarLucian Adrian Grijincu <lgrijincu@ixiacom.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent fddc17de
...@@ -1329,49 +1329,73 @@ drop: ...@@ -1329,49 +1329,73 @@ drop:
return -1; return -1;
} }
static void flush_stack(struct sock **stack, unsigned int count,
struct sk_buff *skb, unsigned int final)
{
unsigned int i;
struct sk_buff *skb1 = NULL;
for (i = 0; i < count; i++) {
if (likely(skb1 == NULL))
skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
if (skb1 && udp_queue_rcv_skb(stack[i], skb1) <= 0)
skb1 = NULL;
}
if (unlikely(skb1))
kfree_skb(skb1);
}
/* /*
* Multicasts and broadcasts go to each listener. * Multicasts and broadcasts go to each listener.
* *
* Note: called only from the BH handler context, * Note: called only from the BH handler context.
* so we don't need to lock the hashes.
*/ */
static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udphdr *uh, struct udphdr *uh,
__be32 saddr, __be32 daddr, __be32 saddr, __be32 daddr,
struct udp_table *udptable) struct udp_table *udptable)
{ {
struct sock *sk; struct sock *sk, *stack[256 / sizeof(struct sock *)];
struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
int dif; int dif;
unsigned int i, count = 0;
spin_lock(&hslot->lock); spin_lock(&hslot->lock);
sk = sk_nulls_head(&hslot->head); sk = sk_nulls_head(&hslot->head);
dif = skb->dev->ifindex; dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) { while (sk) {
struct sock *sknext = NULL; stack[count++] = sk;
sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
daddr, uh->source, saddr, dif);
if (unlikely(count == ARRAY_SIZE(stack))) {
if (!sk)
break;
flush_stack(stack, count, skb, ~0);
count = 0;
}
}
/*
* before releasing chain lock, we must take a reference on sockets
*/
for (i = 0; i < count; i++)
sock_hold(stack[i]);
do { spin_unlock(&hslot->lock);
struct sk_buff *skb1 = skb;
sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, /*
daddr, uh->source, saddr, * do the slow work with no lock held
dif); */
if (sknext) if (count) {
skb1 = skb_clone(skb, GFP_ATOMIC); flush_stack(stack, count, skb, count - 1);
if (skb1) { for (i = 0; i < count; i++)
int ret = udp_queue_rcv_skb(sk, skb1); sock_put(stack[i]);
if (ret > 0) } else {
/* we should probably re-process instead kfree_skb(skb);
* of dropping packets here. */
kfree_skb(skb1);
} }
sk = sknext;
} while (sknext);
} else
consume_skb(skb);
spin_unlock(&hslot->lock);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment