Commit 86911732 authored by Herbert Xu's avatar Herbert Xu Committed by David S. Miller

gro: Avoid copying headers of unmerged packets

Unfortunately simplicity isn't always the best.  The fraginfo
interface turned out to be suboptimal.  The problem was quite
obvious.  For every packet, we have to copy the headers from
the frags structure into skb->head, even though for 99% of the
packets this part is immediately thrown away after the merge.

LRO didn't have this problem because it directly read the headers
from the frags structure.

This patch attempts to address this by creating an interface
that allows GRO to access the headers in the first frag without
having to copy it.  Because all drivers that use frags place the
headers in the first frag this optimisation should be enough.
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5d0d9be8
...@@ -984,6 +984,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, ...@@ -984,6 +984,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
void netif_napi_del(struct napi_struct *napi); void netif_napi_del(struct napi_struct *napi);
struct napi_gro_cb { struct napi_gro_cb {
/* This indicates where we are processing relative to skb->data. */
int data_offset;
/* This is non-zero if the packet may be of the same flow. */ /* This is non-zero if the packet may be of the same flow. */
int same_flow; int same_flow;
...@@ -1087,6 +1090,29 @@ extern int dev_restart(struct net_device *dev); ...@@ -1087,6 +1090,29 @@ extern int dev_restart(struct net_device *dev);
#ifdef CONFIG_NETPOLL_TRAP #ifdef CONFIG_NETPOLL_TRAP
extern int netpoll_trap(void); extern int netpoll_trap(void);
#endif #endif
extern void *skb_gro_header(struct sk_buff *skb, unsigned int hlen);
extern int skb_gro_receive(struct sk_buff **head,
struct sk_buff *skb);
static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
{
return NAPI_GRO_CB(skb)->data_offset;
}
static inline unsigned int skb_gro_len(const struct sk_buff *skb)
{
return skb->len - NAPI_GRO_CB(skb)->data_offset;
}
static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
{
NAPI_GRO_CB(skb)->data_offset += len;
}
static inline void skb_gro_reset_offset(struct sk_buff *skb)
{
NAPI_GRO_CB(skb)->data_offset = 0;
}
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type, unsigned short type,
......
...@@ -1687,8 +1687,6 @@ extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, ...@@ -1687,8 +1687,6 @@ extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
int shiftlen); int shiftlen);
extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
extern int skb_gro_receive(struct sk_buff **head,
struct sk_buff *skb);
static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
int len, void *buffer) int len, void *buffer)
......
...@@ -98,6 +98,8 @@ drop: ...@@ -98,6 +98,8 @@ drop:
int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
unsigned int vlan_tci, struct sk_buff *skb) unsigned int vlan_tci, struct sk_buff *skb)
{ {
skb_gro_reset_offset(skb);
return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb); return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
} }
EXPORT_SYMBOL(vlan_gro_receive); EXPORT_SYMBOL(vlan_gro_receive);
......
...@@ -215,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) ...@@ -215,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
} }
static inline void *skb_gro_mac_header(struct sk_buff *skb)
{
return skb_headlen(skb) ? skb_mac_header(skb) :
page_address(skb_shinfo(skb)->frags[0].page) +
skb_shinfo(skb)->frags[0].page_offset;
}
/* Device list insertion */ /* Device list insertion */
static int list_netdevice(struct net_device *dev) static int list_netdevice(struct net_device *dev)
{ {
...@@ -2350,7 +2357,6 @@ static int napi_gro_complete(struct sk_buff *skb) ...@@ -2350,7 +2357,6 @@ static int napi_gro_complete(struct sk_buff *skb)
out: out:
skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_size = 0;
__skb_push(skb, -skb_network_offset(skb));
return netif_receive_skb(skb); return netif_receive_skb(skb);
} }
...@@ -2368,6 +2374,25 @@ void napi_gro_flush(struct napi_struct *napi) ...@@ -2368,6 +2374,25 @@ void napi_gro_flush(struct napi_struct *napi)
} }
EXPORT_SYMBOL(napi_gro_flush); EXPORT_SYMBOL(napi_gro_flush);
void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
{
unsigned int offset = skb_gro_offset(skb);
hlen += offset;
if (hlen <= skb_headlen(skb))
return skb->data + offset;
if (unlikely(!skb_shinfo(skb)->nr_frags ||
skb_shinfo(skb)->frags[0].size <=
hlen - skb_headlen(skb) ||
PageHighMem(skb_shinfo(skb)->frags[0].page)))
return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
return page_address(skb_shinfo(skb)->frags[0].page) +
skb_shinfo(skb)->frags[0].page_offset + offset;
}
EXPORT_SYMBOL(skb_gro_header);
int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{ {
struct sk_buff **pp = NULL; struct sk_buff **pp = NULL;
...@@ -2388,11 +2413,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) ...@@ -2388,11 +2413,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) { list_for_each_entry_rcu(ptype, head, list) {
struct sk_buff *p; struct sk_buff *p;
void *mac;
if (ptype->type != type || ptype->dev || !ptype->gro_receive) if (ptype->type != type || ptype->dev || !ptype->gro_receive)
continue; continue;
skb_reset_network_header(skb); skb_set_network_header(skb, skb_gro_offset(skb));
mac = skb_gro_mac_header(skb);
mac_len = skb->network_header - skb->mac_header; mac_len = skb->network_header - skb->mac_header;
skb->mac_len = mac_len; skb->mac_len = mac_len;
NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->same_flow = 0;
...@@ -2406,8 +2433,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) ...@@ -2406,8 +2433,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
continue; continue;
if (p->mac_len != mac_len || if (p->mac_len != mac_len ||
memcmp(skb_mac_header(p), skb_mac_header(skb), memcmp(skb_mac_header(p), mac, mac_len))
mac_len))
NAPI_GRO_CB(p)->same_flow = 0; NAPI_GRO_CB(p)->same_flow = 0;
} }
...@@ -2434,13 +2460,11 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) ...@@ -2434,13 +2460,11 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
if (same_flow) if (same_flow)
goto ok; goto ok;
if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS)
__skb_push(skb, -skb_network_offset(skb));
goto normal; goto normal;
}
NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->count = 1;
skb_shinfo(skb)->gso_size = skb->len; skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list; skb->next = napi->gro_list;
napi->gro_list = skb; napi->gro_list = skb;
ret = GRO_HELD; ret = GRO_HELD;
...@@ -2488,6 +2512,8 @@ EXPORT_SYMBOL(napi_skb_finish); ...@@ -2488,6 +2512,8 @@ EXPORT_SYMBOL(napi_skb_finish);
int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{ {
skb_gro_reset_offset(skb);
return napi_skb_finish(__napi_gro_receive(napi, skb), skb); return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
} }
EXPORT_SYMBOL(napi_gro_receive); EXPORT_SYMBOL(napi_gro_receive);
...@@ -2506,6 +2532,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, ...@@ -2506,6 +2532,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
{ {
struct net_device *dev = napi->dev; struct net_device *dev = napi->dev;
struct sk_buff *skb = napi->skb; struct sk_buff *skb = napi->skb;
struct ethhdr *eth;
napi->skb = NULL; napi->skb = NULL;
...@@ -2525,13 +2552,23 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, ...@@ -2525,13 +2552,23 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
skb->len += info->len; skb->len += info->len;
skb->truesize += info->len; skb->truesize += info->len;
if (!pskb_may_pull(skb, ETH_HLEN)) { skb_reset_mac_header(skb);
skb_gro_reset_offset(skb);
eth = skb_gro_header(skb, sizeof(*eth));
if (!eth) {
napi_reuse_skb(napi, skb); napi_reuse_skb(napi, skb);
skb = NULL; skb = NULL;
goto out; goto out;
} }
skb->protocol = eth_type_trans(skb, dev); skb_gro_pull(skb, sizeof(*eth));
/*
* This works because the only protocols we care about don't require
* special handling. We'll fix it up properly at the end.
*/
skb->protocol = eth->h_proto;
skb->ip_summed = info->ip_summed; skb->ip_summed = info->ip_summed;
skb->csum = info->csum; skb->csum = info->csum;
...@@ -2544,11 +2581,22 @@ EXPORT_SYMBOL(napi_fraginfo_skb); ...@@ -2544,11 +2581,22 @@ EXPORT_SYMBOL(napi_fraginfo_skb);
int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
{ {
int err = NET_RX_SUCCESS; int err = NET_RX_SUCCESS;
int may;
switch (ret) { switch (ret) {
case GRO_NORMAL: case GRO_NORMAL:
case GRO_HELD:
may = pskb_may_pull(skb, skb_gro_offset(skb));
BUG_ON(!may);
skb->protocol = eth_type_trans(skb, napi->dev);
if (ret == GRO_NORMAL)
return netif_receive_skb(skb); return netif_receive_skb(skb);
skb_gro_pull(skb, -ETH_HLEN);
break;
case GRO_DROP: case GRO_DROP:
err = NET_RX_DROP; err = NET_RX_DROP;
/* fall through */ /* fall through */
......
...@@ -2584,17 +2584,21 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) ...@@ -2584,17 +2584,21 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
struct sk_buff *p = *head; struct sk_buff *p = *head;
struct sk_buff *nskb; struct sk_buff *nskb;
unsigned int headroom; unsigned int headroom;
unsigned int hlen = p->data - skb_mac_header(p); unsigned int len = skb_gro_len(skb);
unsigned int len = skb->len;
if (hlen + p->len + len >= 65536) if (p->len + len >= 65536)
return -E2BIG; return -E2BIG;
if (skb_shinfo(p)->frag_list) if (skb_shinfo(p)->frag_list)
goto merge; goto merge;
else if (!skb_headlen(p) && !skb_headlen(skb) && else if (skb_headlen(skb) <= skb_gro_offset(skb) &&
skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <=
MAX_SKB_FRAGS) { MAX_SKB_FRAGS) {
skb_shinfo(skb)->frags[0].page_offset +=
skb_gro_offset(skb) - skb_headlen(skb);
skb_shinfo(skb)->frags[0].size -=
skb_gro_offset(skb) - skb_headlen(skb);
memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
skb_shinfo(skb)->frags, skb_shinfo(skb)->frags,
skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
...@@ -2611,7 +2615,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) ...@@ -2611,7 +2615,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
} }
headroom = skb_headroom(p); headroom = skb_headroom(p);
nskb = netdev_alloc_skb(p->dev, headroom); nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
if (unlikely(!nskb)) if (unlikely(!nskb))
return -ENOMEM; return -ENOMEM;
...@@ -2619,12 +2623,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) ...@@ -2619,12 +2623,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
nskb->mac_len = p->mac_len; nskb->mac_len = p->mac_len;
skb_reserve(nskb, headroom); skb_reserve(nskb, headroom);
__skb_put(nskb, skb_gro_offset(p));
skb_set_mac_header(nskb, -hlen); skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
skb_set_network_header(nskb, skb_network_offset(p)); skb_set_network_header(nskb, skb_network_offset(p));
skb_set_transport_header(nskb, skb_transport_offset(p)); skb_set_transport_header(nskb, skb_transport_offset(p));
memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); __skb_pull(p, skb_gro_offset(p));
memcpy(skb_mac_header(nskb), skb_mac_header(p),
p->data - skb_mac_header(p));
*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
skb_shinfo(nskb)->frag_list = p; skb_shinfo(nskb)->frag_list = p;
......
...@@ -1253,10 +1253,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, ...@@ -1253,10 +1253,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
int proto; int proto;
int id; int id;
if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) iph = skb_gro_header(skb, sizeof(*iph));
if (unlikely(!iph))
goto out; goto out;
iph = ip_hdr(skb);
proto = iph->protocol & (MAX_INET_PROTOS - 1); proto = iph->protocol & (MAX_INET_PROTOS - 1);
rcu_read_lock(); rcu_read_lock();
...@@ -1270,7 +1270,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, ...@@ -1270,7 +1270,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto out_unlock; goto out_unlock;
flush = ntohs(iph->tot_len) != skb->len || flush = ntohs(iph->tot_len) != skb_gro_len(skb) ||
iph->frag_off != htons(IP_DF); iph->frag_off != htons(IP_DF);
id = ntohs(iph->id); id = ntohs(iph->id);
...@@ -1298,8 +1298,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, ...@@ -1298,8 +1298,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
} }
NAPI_GRO_CB(skb)->flush |= flush; NAPI_GRO_CB(skb)->flush |= flush;
__skb_pull(skb, sizeof(*iph)); skb_gro_pull(skb, sizeof(*iph));
skb_reset_transport_header(skb); skb_set_transport_header(skb, skb_gro_offset(skb));
pp = ops->gro_receive(head, skb); pp = ops->gro_receive(head, skb);
......
...@@ -2481,19 +2481,19 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) ...@@ -2481,19 +2481,19 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
unsigned int mss = 1; unsigned int mss = 1;
int flush = 1; int flush = 1;
if (!pskb_may_pull(skb, sizeof(*th))) th = skb_gro_header(skb, sizeof(*th));
if (unlikely(!th))
goto out; goto out;
th = tcp_hdr(skb);
thlen = th->doff * 4; thlen = th->doff * 4;
if (thlen < sizeof(*th)) if (thlen < sizeof(*th))
goto out; goto out;
if (!pskb_may_pull(skb, thlen)) th = skb_gro_header(skb, thlen);
if (unlikely(!th))
goto out; goto out;
th = tcp_hdr(skb); skb_gro_pull(skb, thlen);
__skb_pull(skb, thlen);
flags = tcp_flag_word(th); flags = tcp_flag_word(th);
...@@ -2521,10 +2521,10 @@ found: ...@@ -2521,10 +2521,10 @@ found:
flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; flush |= th->ack_seq != th2->ack_seq || th->window != th2->window;
flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th));
total = p->len; total = skb_gro_len(p);
mss = skb_shinfo(p)->gso_size; mss = skb_shinfo(p)->gso_size;
flush |= skb->len > mss || skb->len <= 0; flush |= skb_gro_len(skb) > mss || !skb_gro_len(skb);
flush |= ntohl(th2->seq) + total != ntohl(th->seq); flush |= ntohl(th2->seq) + total != ntohl(th->seq);
if (flush || skb_gro_receive(head, skb)) { if (flush || skb_gro_receive(head, skb)) {
...@@ -2537,7 +2537,7 @@ found: ...@@ -2537,7 +2537,7 @@ found:
tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
out_check_final: out_check_final:
flush = skb->len < mss; flush = skb_gro_len(skb) < mss;
flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
TCP_FLAG_SYN | TCP_FLAG_FIN); TCP_FLAG_SYN | TCP_FLAG_FIN);
......
...@@ -2355,7 +2355,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) ...@@ -2355,7 +2355,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
switch (skb->ip_summed) { switch (skb->ip_summed) {
case CHECKSUM_COMPLETE: case CHECKSUM_COMPLETE:
if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
skb->csum)) { skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
break; break;
......
...@@ -799,24 +799,34 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, ...@@ -799,24 +799,34 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
int proto; int proto;
__wsum csum; __wsum csum;
if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) iph = skb_gro_header(skb, sizeof(*iph));
if (unlikely(!iph))
goto out; goto out;
iph = ipv6_hdr(skb); skb_gro_pull(skb, sizeof(*iph));
__skb_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb));
flush += ntohs(iph->payload_len) != skb->len; flush += ntohs(iph->payload_len) != skb_gro_len(skb);
rcu_read_lock(); rcu_read_lock();
proto = ipv6_gso_pull_exthdrs(skb, iph->nexthdr); proto = iph->nexthdr;
iph = ipv6_hdr(skb);
IPV6_GRO_CB(skb)->proto = proto;
ops = rcu_dereference(inet6_protos[proto]); ops = rcu_dereference(inet6_protos[proto]);
if (!ops || !ops->gro_receive) {
__pskb_pull(skb, skb_gro_offset(skb));
proto = ipv6_gso_pull_exthdrs(skb, proto);
skb_gro_pull(skb, -skb_transport_offset(skb));
skb_reset_transport_header(skb);
__skb_push(skb, skb_gro_offset(skb));
if (!ops || !ops->gro_receive) if (!ops || !ops->gro_receive)
goto out_unlock; goto out_unlock;
iph = ipv6_hdr(skb);
}
IPV6_GRO_CB(skb)->proto = proto;
flush--; flush--;
skb_reset_transport_header(skb);
nlen = skb_network_header_len(skb); nlen = skb_network_header_len(skb);
for (p = *head; p; p = p->next) { for (p = *head; p; p = p->next) {
......
...@@ -948,7 +948,7 @@ struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) ...@@ -948,7 +948,7 @@ struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb)
switch (skb->ip_summed) { switch (skb->ip_summed) {
case CHECKSUM_COMPLETE: case CHECKSUM_COMPLETE:
if (!tcp_v6_check(skb->len, &iph->saddr, &iph->daddr, if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
skb->csum)) { skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment